/***************************************************************************
* Author: Isai Damier
* Title: Trie.java
* Project: geekviewpoint
* Package: datastructure
*
* Description:
* A trie is a tree data-structure that stores words by compressing common
* prefixes. To illustrate, following is a word list and its resulting trie.
*
* WORDS: rat, rats, rattle, rate, rates, rating, can, cane,
* canny, cant, cans, cat, cats, cattle, cattles.
*
* TRIE:
* ___________|____________
* | |
* r c
* a ________a___________
* t~ | |
* ____|_____ n~ t~
* | | | | _____|_____ _______|______
* s~ t e~ i | | | | | t
* l s~ n e~ n t~ s~ s~ l
* e~ g~ y~ e~
* s~
*
* Each ~ in the figure indicates where a prefix is a word.
*
* Generally, a trie has all the benefits of a hash table without
* any of the disadvantages.
*
* --------------------------------------------------------------
* | HASH TABLE | TRIE | explanations
* --------------------------------------------------------------
* Memory | O(n) | < O(n) | trie uses prefix compression.
* | | | Hence it does not store each
* | | | word explicitly
* ----------------------------------------------------------------
* Search | O(1) | O(1) | trie is technically faster.
* | | pseudo- | Given a word, computing a
* | | constant | hash takes at least as long
* | | | as traversing a trie. Plus,
* | | | trie has no collision.
* ----------------------------------------------------------------
*
* Tries are particularly superior to hash tables when it comes to solving
* problems such as word puzzles like boggle. In such puzzles the objective
* is to find how many words in a given list are valid. So if for example
* at a particular instance in boggle you have a list of one billion words
* all starting with zh-, whereas the dictionary has no words starting with
* zh-; then: if the dictionary is a hash table, you must compute the entire
* hashcode for each word and do one billion look-ups; if on the other hand
* the dictionary is a trie, you only do the equivalent of partially
* computing one hashcode! That's a saving of over one billion fold!
*
* This implementations of trie uses an array to store the children
* nodes, where the numerical value of each char serves as index.
**************************************************************************/
package algorithms.trie;
import java.util.ArrayList;
import java.util.List;
import java.util.Stack;
public class Trie {
//the root only serves to anchor the trie.
private TrieNode root;
public Trie() {
root = new TrieNode('\0');
}//constructor
/******************************************************************
* Function: removeWord
* @param word
* @return boolean
*
* Description: Delete the given word from the trie. If the word
* does not exist, return false.
*
* Technical Details: This algorithm essentially uses tail
* recursion to remove the word one character at a time.
* Because the relation is tail-recursive and has many
* different cases, it is more convenient to use an explicit
* stack and a for loop as opposed to recursion per se.
*
* 0] initialize an empty stack to hold visited nodes
* 1] set auxiliary node n to root and add n to the stack
* 2] for each character c in the given word
* 3] set n to the child node of n that's holding c
* 4] if n is null return false to mean word not found
* 5] add n to stack
* 6] at this point n represents the last char in word so
* if n is not marked, return false. ELSE
* 7] unmark n
* 8] if n is a prefix then return true. ELSE
* 9] pop the stack to remove the last char in word
* 10] since the word is found and it's not a prefix:
* for each node n on the stack, if n only has one child
* delete n's child
* if n is marked, return true.
* 11] return true.
****************************************************************/
public boolean removeWord(String word) {
Stack<TrieNode> stack = new Stack<TrieNode>();
TrieNode n = root;
stack.add(n);
for (char c : word.toCharArray()) {
n = n.next[c];
if (null == n)//word not found
{
return false;
}
stack.add(n);
}
if (!n.word)//word not found
{
return false;
}
n.word = false;
if (!n.nextIsEmpty())//word is a prefix
{
return true;
}
//word is not a prefix
stack.pop();
n = stack.pop();
while (!stack.isEmpty() && n.nextSize() == 1) {
n.clearNext();
if (n.word) {
return true;
}
n = stack.pop();
}
return true;
}//remove
}
package algorithms.graph;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.junit.Test;
import static org.junit.Assert.*;
/***
* The dictionary is assembled from the works of poet
* Percy Bysshe Shelley (1792-1822)
***/
public class TrieTest {
/**
* Test of removeWord method, of class Trie.
*/
@Test
public void testRemoveWord() {
System.out.println("removeWord");
Trie trie = new Trie();
String input[] = {"counts", "count"};
for (String w : input) {
trie.addWord(w);
}
assertEquals(trie.getWordList().size(), input.length);
assertTrue(trie.removeWord(input[1]));
assertEquals(trie.getWordList().size(), 1);
assertTrue(trie.removeWord(input[0]));
assertEquals(trie.getWordList().size(), 0);
List<String> dictionary = Arrays.asList("His", "soul", "had", "wedded", "Wisdom",
"and", "her", "dower", "is", "love", "justice", "clothed", "in",
"which", "he", "sate", "apart", "from", "men", "as", "a",
"lonely", "tower", "pitying", "the", "tumult", "of", "their",
"dark", "estate");
assertEquals(trie.getWordList().size(), 0);
for (String w : dictionary) {
trie.addWord(w);
}
assertEquals(trie.getWordList().size(), dictionary.size());
for (String w : dictionary) {
assertTrue(trie.removeWord(w));
}
assertEquals(trie.getWordList().size(), 0);
for (String w : input) {
trie.addWord(w);
}
String word[] = {"strictly", "meditate", "the"};
for (String w : word) {
if (trie.isWord(w)) {
fail(w + " should not be a word");
}
if (trie.removeWord(w)) {
fail(w + " should not have been in the dictionary");
}
}
assertTrue(trie.isWord("count"));
assertTrue(trie.isWord("counts"));
}
}