Trie Data Structure

Van C. Ngo · May 30, 2017

Introduction

A trie is a kind of search tree that is used to store a dynamic set where the keys are strings. A trie can be seen as a tree-shape deterministic finite automaton (DFA). Each finite language can be generated by a trie automaton, and each trie can be compressed into a acyclic DFA.

Every node of trie consists of multiple branches. Each branch represents a possible character of keys. We need to mark the last node of every key as leaf node. A trie node field value will be used to distinguish the node as leaf node (there are other uses of the value field).

Applications

A common application of tries is storing a predictive text or auto-complete dictionary. Such applications take advantage of a trie’s ability to quickly look for, insert, and delete items. Using trie, we can search for the key in \(\mathcal{O}(m)\) instead of \(\mathcal{O}(m.\text{log}n)\) if we use a binary search tree, where \(m\), \(n\) are the length of search string and the number of keys, respectively.

Tries are also well suited for implementing approximate matching algorithms including those used in spell checking and hyphenation applications.

Implementation

In this post, I will show how to implement a trie where the characters are from ‘a’ to ‘z’ (26 characters in total) using arrays in OCaml.

(************************************)
(*               Trie               *)
(*          Van Chan Ngo            *)
(************************************)

(* trie.mli *)

exception Trie_invalid of string
type t
val empty : unit -> t
val is_empty : t -> bool
val insert : string -> t -> unit
val search : string -> t -> bool

(* trie.ml *)

exception Trie_invalid of string

type t =  Empty 
        | Node of bool ref * t array (* true if a node is a leaf *)

let alphabet_size = 26 (* number of letters from 'a' to 'z' *)

let get_index c = Char.code c - Char.code 'a'

let create_node () = 
	let a = Array.make alphabet_size Empty in
	Node (ref false, a)

let set_leaf b trie = 
	match trie with
	| Empty -> ()
	| Node (rb, _) -> rb := b

let empty () = create_node ()

let is_empty trie = match trie with
	| Empty -> true
	| Node (b, a) ->
		Array.fold_left (fun res t -> match t with 
                                      | Empty -> res && true
                                      | _ -> res && false
                                 ) true a

let insert_char c trie = 
	let index = get_index c in 
	match trie with
	| Empty -> 
		raise (Trie_invalid "Invalid trie!")
	| Node (b, array_t) -> 
		let t = array_t.(index) in 
		begin
			match t with
			| Empty -> array_t.(index) <- create_node (); array_t.(index)
			| _ -> array_t.(index)
		end

let insert s trie = 
	let rec aux s t mark_leaf = 
		let n = String.length s in
		if n = 0 then 
			set_leaf mark_leaf t
		else 
			let next_node = insert_char (String.get s 0) t in 
			aux (String.sub s 1 (n - 1)) next_node true
	in match trie with
	| Empty -> aux s (create_node ()) false
	| _ -> aux s trie false

let search_char c trie =
	let index = get_index c in
	match trie with
	| Empty -> Empty
	| Node (b, a) -> match a.(index) with
		| Empty -> Empty
		| Node (_, _) as t -> t

let rec search s trie = 
	let n = String.length s in
	if n = 0 then
		begin
			match trie with
			| Empty -> false
			| Node (b, a) -> !b
		end
	else
		begin
			let t = search_char (String.get s 0) trie in
			match t with
			| Empty -> false
			| Node (b,a) as next_trie -> search (String.sub s 1 (n - 1)) next_trie
		end

(* test.ml *)

open Trie

let _ = 
	let t0 = empty () in
	insert "this" t0;
	insert "is" t0;
	insert "a" t0;
	insert "trie" t0;
	if search "this" t0 then 
		Printf.printf "\"this\" is present\n"
	else
		Printf.printf "\"this\" is not present\n";
	if search "trie" t0 then 
		Printf.printf "\"trie\" is present\n"
	else
		Printf.printf "\"trie\" is not present\n";
	if search "ben" t0 then 
		Printf.printf "\"ben\" is present\n"
	else
		Printf.printf "\"ben\" is not present\n"

The outputs should be as follows.

./test.native
"this" is present
"trie" is present
"ben" is not present