Find Substring
by Isai Damier

*********************************************************************
 * Author: Isai Damier
 * Title: String Search Algorithms: Find Substring
 * Project: geekviewpoint
 * Package: algorithm.search.string
 *
 * Statement:
 *   You are probably familiar with the figure of speech "Needle in
 *   a haystack." All the same, the analogy is not difficult to
 *   understand: it refers to the difficult task of finding something
 *   small in a much larger space.
 *
 *   The string-matching problem is a formal equivalent of the
 *   "needle in a haystack" metaphor. Given a text T[1...n] and a
 *   pattern P[1...m] such that both T and P are comprised by the
 *   same alphabet, check whether P is a substring of T.
 *
 * Sample Input: Text="The universe is a software simulation"
 *               Pattern="verse"
 * Sample Output: 7
 *
 *   Time complexity of Solution:
 *   Worst: O(m*(n-m+1)) where |text|=n and |pattern|=m.
 *   This algorithm is inefficient because it does not skip any shift
 *   of the haystack but blindly examines each one of them.
 *
 * Approach:
 *
 *   Restating the problem:
 *   Given a haystack (i.e. text) and a needle (i.e. pattern), return
 *   the start index of the first occurrence of the needle in the
 *   haystack. If the needle is not in the haystack, return -1. Throw
 *   an exception if either the needle or the haystack is null or empty.
 *
 *   Preliminaries:
 *   Logically, any claim about an empty set is true. Therefore, if
 *   either the needle or the haystack is null or empty then the truth
 *   concerning their relationship cannot be determined. Hence, under
 *   such conditions, the function shall throw an exception: It would
 *   not be acceptable to return -1 because -1 means the proposition
 *   that "the given needle is in the given haystack" is false.
 *   On the other hand, if the haystack has fewer characters than
 *   the needle, then we can say with deterministic certainty that
 *   the haystack does not contain the needle and so return -1.
 *
 *   Algorithm:
 *   This algorithm is usually referred to as the naive string-matching
 *   algorithm because it checks all shifts of the haystack,
 *   as explained below.
 *     1] for each shift of the haystack, loop
 *       2] for each character of the needle, loop
 *         3] if there is a mismatch between corresponding needle and
 *            haystack characters, then break out of the needle loop;
 *         4] else if all characters of needle is found in haystack
 *            return the start of the current shift of haystack.
 *     5] return -1 (after exiting loops)
 *
 *   NOTE: if the task is to count or print all occurrences of the
 *         needle, instead of returning in step 4, simply "visit"
 *         and keep going.
 *
 ********************************************************************/ 
 int findSubstring(String haystack, String needle) throws Exception {
  // preliminaries
  if (null == haystack || haystack.isEmpty()) {
    throw new Exception("invalid haystack");
  }
  if (null == needle || needle.isEmpty()) {
    throw new Exception("invalid needle");
  }
  if (haystack.length() < needle.length()) {
    return -1;
  }
  // algorithm
  for (int h = 0; h < haystack.length(); h++) {
    for (int n = 0; n< needle.length() && h+n < haystack.length(); n++){
      if (haystack.charAt(h + n) != needle.charAt(n)) {
        break;
      } else if (n == needle.length() - 1) {
        return h;
      }
    }
  }

  return -1;
}
package algorithm.search.string;

import java.util.Arrays;
import java.util.List;
import org.junit.Test;
import static org.junit.Assert.*;

/***
 * The haystacks are packed from the works of poet
 * Thomas Stearns Eliot (September 26, 1888 – January 4, 1965)
 ***/
public class StringSearchAlgorithmsTest {

  /**
   * Test of findSubstring method, of class StringSearchAlgorithms.
   */
  @Test
  public void testFindSubstring() throws Exception {
    System.out.println("findSubstring");
    String haystack = "Because I know that time is always time "
            + "And place is always and only place "
            + "And what is actual is actual ony for one time "
            + "And only for one place "
            + "I rejoice that things are as they are and "
            + "... "
            + "Consequently I rejoice, having to construct something "
            + " Upon which to rejoice";

    String needle = "time is always time";
    StringSearchAlgorithms stringMatcher = new StringSearchAlgorithms();
    int expResult = 20;
    int result = stringMatcher.findSubstring(haystack, needle);
    assertEquals(expResult, result);

    needle = "rejoicer";
    expResult = -1;
    result = stringMatcher.findSubstring(haystack, needle);
    assertEquals(expResult, result);

    needle = "Because";
    expResult = 0;
    result = stringMatcher.findSubstring(haystack, needle);
    assertEquals(expResult, result);

    needle = "because";
    expResult = -1;
    result = stringMatcher.findSubstring(haystack, needle);
    assertEquals(expResult, result);
  }
}