*********************************************************************
* Author: Isai Damier
* Title: String Search Algorithms: Find Substring
* Project: geekviewpoint
* Package: algorithm.search.string
*
* Statement:
* You are probably familiar with the figure of speech "Needle in
* a haystack." All the same, the analogy is not difficult to
* understand: it refers to the difficult task of finding something
* small in a much larger space.
*
* The string-matching problem is a formal equivalent of the
* "needle in a haystack" metaphor. Given a text T[1...n] and a
* pattern P[1...m] such that both T and P are comprised by the
* same alphabet, check whether P is a substring of T.
*
* Sample Input: Text="The universe is a software simulation"
* Pattern="verse"
* Sample Output: 7
*
* Time complexity of Solution:
* Worst: O(m*(n-m+1)) where |text|=n and |pattern|=m.
* This algorithm is inefficient because it does not skip any shift
* of the haystack but blindly examines each one of them.
*
* Approach:
*
* Restating the problem:
* Given a haystack (i.e. text) and a needle (i.e. pattern), return
* the start index of the first occurrence of the needle in the
* haystack. If the needle is not in the haystack, return -1. Throw
* an exception if either the needle or the haystack is null or empty.
*
* Preliminaries:
* Logically, any claim about an empty set is true. Therefore, if
* either the needle or the haystack is null or empty then the truth
* concerning their relationship cannot be determined. Hence, under
* such conditions, the function shall throw an exception: It would
* not be acceptable to return -1 because -1 means the proposition
* that "the given needle is in the given haystack" is false.
* On the other hand, if the haystack has fewer characters than
* the needle, then we can say with deterministic certainty that
* the haystack does not contain the needle and so return -1.
*
* Algorithm:
* This algorithm is usually referred to as the naive string-matching
* algorithm because it checks all shifts of the haystack,
* as explained below.
* 1] for each shift of the haystack, loop
* 2] for each character of the needle, loop
* 3] if there is a mismatch between corresponding needle and
* haystack characters, then break out of the needle loop;
* 4] else if all characters of needle is found in haystack
* return the start of the current shift of haystack.
* 5] return -1 (after exiting loops)
*
* NOTE: if the task is to count or print all occurrences of the
* needle, instead of returning in step 4, simply "visit"
* and keep going.
*
********************************************************************/
int findSubstring(String haystack, String needle) throws Exception {
// preliminaries
if (null == haystack || haystack.isEmpty()) {
throw new Exception("invalid haystack");
}
if (null == needle || needle.isEmpty()) {
throw new Exception("invalid needle");
}
if (haystack.length() < needle.length()) {
return -1;
}
// algorithm
for (int h = 0; h < haystack.length(); h++) {
for (int n = 0; n< needle.length() && h+n < haystack.length(); n++){
if (haystack.charAt(h + n) != needle.charAt(n)) {
break;
} else if (n == needle.length() - 1) {
return h;
}
}
}
return -1;
}
package algorithm.search.string;
import java.util.Arrays;
import java.util.List;
import org.junit.Test;
import static org.junit.Assert.*;
/***
* The haystacks are packed from the works of poet
* Thomas Stearns Eliot (September 26, 1888 – January 4, 1965)
***/
public class StringSearchAlgorithmsTest {
/**
* Test of findSubstring method, of class StringSearchAlgorithms.
*/
@Test
public void testFindSubstring() throws Exception {
System.out.println("findSubstring");
String haystack = "Because I know that time is always time "
+ "And place is always and only place "
+ "And what is actual is actual ony for one time "
+ "And only for one place "
+ "I rejoice that things are as they are and "
+ "... "
+ "Consequently I rejoice, having to construct something "
+ " Upon which to rejoice";
String needle = "time is always time";
StringSearchAlgorithms stringMatcher = new StringSearchAlgorithms();
int expResult = 20;
int result = stringMatcher.findSubstring(haystack, needle);
assertEquals(expResult, result);
needle = "rejoicer";
expResult = -1;
result = stringMatcher.findSubstring(haystack, needle);
assertEquals(expResult, result);
needle = "Because";
expResult = 0;
result = stringMatcher.findSubstring(haystack, needle);
assertEquals(expResult, result);
needle = "because";
expResult = -1;
result = stringMatcher.findSubstring(haystack, needle);
assertEquals(expResult, result);
}
}