Wednesday, 31 August 2016

Trie vs arrayList vs LinkedList performance while inserting thousands and thousands of words and then searching a random word

Code For Trie and TrieNode Class that would be handling inserting and searching

class TrieNode {
    TrieNode[] arr;
    boolean isEnd;
    int count;
    // Initialize your data structure here.
    public TrieNode() {
        this.arr = new TrieNode[26];
    }

}

public class Trie {
    private TrieNode root;

    public Trie() {
        root = new TrieNode();
    }

    // Inserts a word into the trie.
    public void insert(String word) {
        TrieNode p = root;
        for(int i=0; i<word.length(); i++){
            char c = word.charAt(i);
            int index = c-'a';
            if(p.arr[index]==null){
                TrieNode temp = new TrieNode();
                p.arr[index]=temp;
                p = temp;
            }else{
                p=p.arr[index];
            }
        }
        p.isEnd=true;
    }

    // Returns if the word is in the trie.
    public boolean search(String word) {
        TrieNode p = searchNode(word);
        if(p==null){
            return false;
        }else{
            if(p.isEnd)
                return true;
        }

        return false;
    }

    // Returns if there is any word in the trie
    // that starts with the given prefix.
    public boolean startsWith(String prefix) {
        TrieNode p = searchNode(prefix);
        if(p==null){
            return false;
        }else{
            return true;
        }
    }

    public TrieNode searchNode(String s){
        TrieNode p = root;
        for(int i=0; i<s.length(); i++){
            char c= s.charAt(i);
            int index = c-'a';
            if(p.arr[index]!=null){
                p = p.arr[index];
            }else{
                return null;
            }
        }

        if(p==root)
            return null;

        return p;
    }
}

Test class for testing performance of Trie as compared to linked list and arrayList

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;

public class TrieTest {

public static void main(String[] args) throws Exception {

Trie trie = new Trie();

List<String> arrayList = new ArrayList<String>();
List<String> linkedList = new LinkedList<String>();

// Load the words from the resource file
InputStream in = TrieTest.class.getResourceAsStream("/words_en.txt");
BufferedReader reader = new BufferedReader(new InputStreamReader(in));
ArrayList<String> words = new ArrayList<>(150000);
do {
String line = reader.readLine();
if (line == null) {
break;
}
if (line.matches("[a-z]+")) {
words.add(line);
}
} while (true);
reader.close();


System.out.println("Inserting 109583 words");
long startForTrieInsert = System.currentTimeMillis();
words.stream().forEach(word -> trie.insert(word));
System.out.println("Completed in " + (System.currentTimeMillis() - startForTrieInsert) + " ms.");

long startForArrayListInsert = System.currentTimeMillis();
words.stream().forEach(word -> arrayList.add(word));
System.out.println("Completed in " + (System.currentTimeMillis() - startForArrayListInsert) + " ms.");

long startForLinkedListInsert = System.currentTimeMillis();
words.stream().forEach(word -> linkedList.add(word));
System.out.println("Completed in " + (System.currentTimeMillis() - startForLinkedListInsert) + " ms.");

String testWord = "zyzzyvas";
// Create the three vocabularies
long startTrieSearching = System.currentTimeMillis();
int found = 0;
if (trie.search(testWord)) {
found++;
}
System.out.println("Seraching " + testWord);
System.out.println("Completed in " + (System.currentTimeMillis() - startTrieSearching) + " ms. Found " + found);

long startArrayList = System.currentTimeMillis();
found = 0;
if (arrayList.contains(testWord)) {
found++;
}

System.out.println("Completed in " + (System.currentTimeMillis() - startArrayList) + " ms. Found " + found);

long startLinkedList = System.currentTimeMillis();
found = 0;
if (linkedList.contains(testWord)) {
found++;
}

System.out.println("Completed in " + (System.currentTimeMillis() - startLinkedList) + " ms. Found " + found);
}
}


outputs 

Order of printing is Trie, ArrayList and then LinkedList

Inserting 109583 words
Completed in 67 ms.
Completed in 0 ms.
Completed in 7 ms.
Seraching zyzzyvas
Completed in 0 ms. Found 1
Completed in 4 ms. Found 1
Completed in 4 ms. Found 1

Inserting 109583 words
Completed in 129 ms.
Completed in 0 ms.
Completed in 16 ms.
Seraching zyzzyvas
Completed in 0 ms. Found 1
Completed in 0 ms. Found 1
Completed in 15 ms. Found 1

Inserting 109583 words
Completed in 62 ms.
Completed in 8 ms.
Completed in 5 ms.
Seraching zyzzyvas
Completed in 0 ms. Found 1
Completed in 5 ms. Found 1
Completed in 4 ms. Found 1

Inserting 109583 words
Completed in 98 ms.
Completed in 15 ms.
Completed in 0 ms.
Seraching zyzzyvas
Completed in 0 ms. Found 1
Completed in 16 ms. Found 1
Completed in 0 ms. Found 1

Here we can see that while inserting, Trie takes alot of time, but when searching for a random word, it always returns the search in 0ms. While ArrayList and LinkedList seaching varies. Hence for searching Trie has the best performance.

Volatile keyword, Synchronized keyword and Lock interface

Volatile Marking a member variable as volatile, makes sure, when ever the member variable is accessed, its value is always read from the...