Introduction
A trie is a kind of search tree that is used to store a dynamic set where the keys are strings. A trie can be seen as a tree-shape deterministic finite automaton (DFA). Each finite language can be generated by a trie automaton, and each trie can be compressed into a acyclic DFA.
Every node of trie consists of multiple branches. Each branch represents a possible character of keys. We need to mark the last node of every key as leaf node. A trie node field value will be used to distinguish the node as leaf node (there are other uses of the value field).
Applications
A common application of tries is storing a predictive text or auto-complete dictionary. Such applications take advantage of a trie’s ability to quickly look for, insert, and delete items. Using trie, we can search for the key in \(\mathcal{O}(m)\) instead of \(\mathcal{O}(m.\text{log}n)\) if we use a binary search tree, where \(m\), \(n\) are the length of search string and the number of keys, respectively.
Tries are also well suited for implementing approximate matching algorithms including those used in spell checking and hyphenation applications.
Implementation
In this post, I will show how to implement a trie where the characters are from ‘a’ to ‘z’ (26 characters in total) using arrays in OCaml.
(************************************)
(* Trie *)
(* Van Chan Ngo *)
(************************************)
(* trie.mli *)
exception Trie_invalid of string
type t
val empty : unit -> t
val is_empty : t -> bool
val insert : string -> t -> unit
val search : string -> t -> bool
(* trie.ml *)
exception Trie_invalid of string
type t = Empty
| Node of bool ref * t array (* true if a node is a leaf *)
let alphabet_size = 26 (* number of letters from 'a' to 'z' *)
let get_index c = Char.code c - Char.code 'a'
let create_node () =
let a = Array.make alphabet_size Empty in
Node (ref false, a)
let set_leaf b trie =
match trie with
| Empty -> ()
| Node (rb, _) -> rb := b
let empty () = create_node ()
let is_empty trie = match trie with
| Empty -> true
| Node (b, a) ->
Array.fold_left (fun res t -> match t with
| Empty -> res && true
| _ -> res && false
) true a
let insert_char c trie =
let index = get_index c in
match trie with
| Empty ->
raise (Trie_invalid "Invalid trie!")
| Node (b, array_t) ->
let t = array_t.(index) in
begin
match t with
| Empty -> array_t.(index) <- create_node (); array_t.(index)
| _ -> array_t.(index)
end
let insert s trie =
let rec aux s t mark_leaf =
let n = String.length s in
if n = 0 then
set_leaf mark_leaf t
else
let next_node = insert_char (String.get s 0) t in
aux (String.sub s 1 (n - 1)) next_node true
in match trie with
| Empty -> aux s (create_node ()) false
| _ -> aux s trie false
let search_char c trie =
let index = get_index c in
match trie with
| Empty -> Empty
| Node (b, a) -> match a.(index) with
| Empty -> Empty
| Node (_, _) as t -> t
let rec search s trie =
let n = String.length s in
if n = 0 then
begin
match trie with
| Empty -> false
| Node (b, a) -> !b
end
else
begin
let t = search_char (String.get s 0) trie in
match t with
| Empty -> false
| Node (b,a) as next_trie -> search (String.sub s 1 (n - 1)) next_trie
end
(* test.ml *)
open Trie
let _ =
let t0 = empty () in
insert "this" t0;
insert "is" t0;
insert "a" t0;
insert "trie" t0;
if search "this" t0 then
Printf.printf "\"this\" is present\n"
else
Printf.printf "\"this\" is not present\n";
if search "trie" t0 then
Printf.printf "\"trie\" is present\n"
else
Printf.printf "\"trie\" is not present\n";
if search "ben" t0 then
Printf.printf "\"ben\" is present\n"
else
Printf.printf "\"ben\" is not present\n"
The outputs should be as follows.
./test.native
"this" is present
"trie" is present
"ben" is not present