Find All Matches of Substring
by Isai Damier, Android Engineer @ Google

/*********************************************************************
 * Author: Isai Damier
 * Title: String Search Algorithms: Find All Occurrences of Substring
 * Project: geekviewpoint
 * Package: algorithm.search.string
 *
 * Statement:
 *   You are probably familiar with the figure of speech "Needle in
 *   a haystack." All the same, the analogy is not difficult to
 *   understand: it refers to the difficult task of finding something
 *   small in a much larger space.
 *
 *   The string-matching problem is a formal equivalent of the
 *   "needle in a haystack" metaphor. Given a text T[1...n] and a
 *   pattern P[1...m] such that both T and P are comprised by the
 *   same alphabet, find all P's in T.
 *
 * Sample Input: Text="Weialala leia Wallala leialala"
 *               Pattern="eia"
 * Sample Output: 1, 10, 23
 *
 *   Time complexity of Solution:
 *   Worst: O(m*(n-m+1)) where |text|=n and |pattern|=m.
 *   This algorithm is inefficient because it does not skip any shift
 *   of the haystack but blindly examines each one of them.
 *
 * Approach:
 *
 *   Restating the problem:
 *   Given a haystack (i.e. text) and a needle (i.e. pattern), return
 *   the start index of all the occurrences of the needle in the
 *   haystack. If the needle is not in the haystack, return an empty
 *   result set. Throw an exception if either the needle or the
 *   haystack is null or empty.
 *
 *   Preliminaries:
 *   Logically, any claim about an empty set is true. Therefore, if
 *   either the needle or the haystack is null or empty then the truth
 *   concerning their relationship cannot be determined. Hence, under
 *   such conditions, the function shall throw an exception: It would
 *   not be acceptable to return an empty result set because an empty
 *   result set means the proposition that
 *
 *      "the given needle is in the given haystack"
 *
 *   is false. On the other hand, if the haystack has fewer characters
 *   than the needle, then we can say with deterministic certainty that
 *   the haystack does not contain the needle and so return an empty
 *   result set.
 *
 *   Algorithm:
 *   This algorithm is usually referred to as the naive string-matching
 *   algorithm because it checks all shifts of the haystack,
 *   as explained below.
 *     1] for each shift of the haystack, loop
 *       2] for each character of the needle, loop
 *         3] if there is a mismatch between corresponding needle and
 *            haystack characters, then break out of the needle loop;
 *         4] else if all characters of needle is found in haystack
 *            add the start of the current shift of haystack to the
 *            result set.
 *     5] return result set (after exiting loops)
 *
 *   NOTE: if the task is to find any occurrence of the needle,
 *         simply return in step 4.
 *
 ********************************************************************/ 
 List<Integer> allIndicesOf(String haystack, String needle)throws Exception{
  // preliminaries
  if (null == haystack || haystack.isEmpty()) {
    throw new Exception("invalid haystack");
  }
  if (null == needle || needle.isEmpty()) {
    throw new Exception("invalid needle");
  }
  List<Integer> resultSet = new ArrayList<Integer>();
  if (haystack.length() < needle.length()) {
    return resultSet;
  }

  // algorithm
  for (int h = 0; h < haystack.length(); h++) {
    for (int n =0; n < needle.length() && h+n < haystack.length(); n++){

      if (haystack.charAt(h + n) != needle.charAt(n)) {
        break;
      } else if (n == needle.length() - 1) {
        resultSet.add(h);
      }
    }
  }

  return resultSet;
}
package algorithm.search.string;

import java.util.Arrays;
import java.util.List;
import org.junit.Test;
import static org.junit.Assert.*;

/***
 * The haystacks are packed from the works of poet
 * Thomas Stearns Eliot (September 26, 1888 – January 4, 1965)
 ***/
public class StringSearchAlgorithmsTest {

  /**
   * Test of allIndicesOf method, of class StringSearchAlgorithms.
   */
  @Test
  public void testAllIndicesOf() throws Exception {
    System.out.println("allIndicesOf");

    String haystack = "No! I am not Prince Hamlet, nor was meant to be; "
            + "Am an attendant lord, one that will do "
            + "To swell a progress, start a scene or two, "
            + "Advise the prince; no doubt, an easy tool, "
            + "Deferential, glad to be of use, "
            + "Politic, cautious, and meticulous; "
            + "Full of high sentence, but a bit obtuse; "
            + "At times, indeed, almost ridiculous-- "
            + "Almost, at times, the Fool.";

    String needle = "to be";
    StringSearchAlgorithms stringMatcher = new StringSearchAlgorithms();
    List<Integer> expResult = Arrays.asList(42, 192);
    List<Integer> result = stringMatcher.allIndicesOf(haystack, needle);
    assertEquals(expResult, result);

    haystack = "OOOO that Shakespeherian Rag--";
    needle = "OO";
    expResult = Arrays.asList(0, 1, 2);
    result = stringMatcher.allIndicesOf(haystack, needle);
    assertEquals(expResult, result);

  }
}