字典树 | echo

Trie 字典树

Trie 字典树主要用于存储字符串，Trie 的每个 Node 保存一个字符。用链表来描述的话，一个字符串就是一个链表。每个 Node 都保存了它的所有子结点。

下图为 see、pain、panda、dog构成的字典树

结点设置

是否存在以该结点为最后字符的单词，标识位 isEnd；
当前结点的所有子节点，使用Map存储，当字符串中全为小写字母时可以用数组代替。

基本操作

插入

将一个单词的每一个字符从头到尾插入字典树，并将最后一个字符的 idEnd 标志位设为 true。

查找

从根结点开始遍历字典树，判断查找的字符串是否存在于某一条路径中，且字符串最后一个字符对应的字典树中的结点 isEnd 标志位必须为 true，代表这个结点后面没有字符。例如字典树中存在单词 panda 查找 pan 就要返回false，因为 pan 中的字符 n 对应字典树中的结点 isEnd 标志位为 false。

前缀查找

与查找类似，对最后一个字符对应字典树的结点标志位没有限制。也就是字典树中存在单词 panda，查找 pan 前缀返回true。

删除

删除操作分为三种情况：

要删除的单词位另一个单词的前缀，将最后一个字符对应字典树结点的 isEnd 标志位改为 false；
要删除的单词对应字典树中的所有结点都没有分支，直接删除整个链表。
要删除的单词对应字典树中的结点有分支，删除单词对应字典树链表上的最后一个分支结点到末尾结点。

第一张图中存在 panda、pan 两个单词，我们需要删除 pan ，直接将字母 n 的 isEnd 标志位设为false。

第二张图中存在 see 这个单词，并且其链表路径上没有任何分支，直接删除整个链表。

第三张图中存在 pain、panda、pad三个单词，这三个单词在字母 a 处分叉，要删除 pad ，直接将字符串 pad 中 a 后的字符（d）删除。

代码实现

package datastructure;

import java.util.Map;
import java.util.TreeMap;

public class Trie {

    private Node root;
    private int size;

    public Trie(){
        root = new Node();
    }
    public int size(){
        return size;
    }
    public boolean isEmpty(){
        return size == 0;
    }
    /*
    * 插入单词
    * @param word 需要插入的单词
    * */
    public void add(String word){
        Node current = root;
        char[] str = word.toCharArray();
        for(char c : str){
            //获取下一个结点
            Node next = current.next.get(c);
            if(next == null){
                //结点为空
                current.next.put(c, new Node());
            }
            //继续向下
            current = current.next.get(c);
        }
        //如果当前单词已经存在，则不需要增加size
        if(!current.isEnd){
            size++;
            current.isEnd = true;
        }
    }

    /*
    * 查询字典树中是否存在单词
    * @param word 需要查询的单词
    * @return 存在单词返回 true，否则返回 false
    * */
    public boolean contains(String word){
        Node current = root;
        for(int i = 0; i < word.length(); i++){
            char c = word.charAt(i);
            Node node = current.next.get(c);
            if(node == null){
                return false;
            }
            current = node;
        }
        // 字典树中有 panda，查询 pan 返回 false
        return current.isEnd;
    }

    /*
    * 查询字典树中是否存在某个前缀
    * @param prefix 一个字符串前缀
    * @return 存在前缀返回 true，否则返回 false
    * */
    public boolean containsPrefix(String prefix){
        Node current = root;
        for(int i = 0; i < prefix.length(); i++){
            char c = prefix.charAt(i);
            Node node = current.next.get(c);
            if(node == null){
                return false;
            }
            current = node;
        }

        // 字典树中有 panda，查询 pan这个前缀，返回 true
        return true;
    }

    public boolean remove(String word){
        Node multiChildNode = null;
        int multiChildNodeIndex = -1;
        Node current = root;
        for(int i = 0; i < word.length(); i++){
            Node child = current.next.get(word.charAt(i));
            if(child == null){
                //字典树中不存在这个单词
                return false;
            }

            //维护最后一个分叉结点
            if(child.next.size() > 1){
                multiChildNodeIndex = i;
                multiChildNode = child;
            }
            current = child;
        }

        //如果单词后面还有子节点,证明该单词是某个单词的前缀
        if(current.next.size() > 0){
            if(current.isEnd){
                //将其单词标识改为false
                current.isEnd = false;
                //单词树减一
                size-- ;
                return true;
            }
            //不存在该单词，该单词只是前缀
            return false;
        }

        //如果单词的所有字母都没有多个分支，删除整个单词
        if(multiChildNodeIndex == -1){
            //移除单词的第一个字母（移除整个单词）
            root.next.remove(word.charAt(0));
            size-- ;
            return true;
        }

        //如果单词除了最后一个字母，其他的字母有分支
        if(multiChildNodeIndex != word.length() - 1){
            //移除掉分叉结点后的所有结点
            multiChildNode.next.remove(word.charAt(multiChildNodeIndex + 1));
            size-- ;
            return true;
        }
        return false;
    }

    private static class Node{
        public boolean isEnd;//是否存在以该结点为末尾的单词
        public Map<Character, Node> next;//子节点

        public Node(){
            next = new TreeMap<>();
        }

        public Node(boolean isEnd){
            this.isEnd = isEnd;
        }

    }

    public static void main(String[] args) {
        Trie tree = new Trie();
        tree.add("panda");
        tree.add("pan");
        tree.add("pad");

        System.out.println(tree.contains("pad"));
        System.out.println(tree.contains("pand"));
        System.out.println("Size: " + tree.size());
        System.out.println(tree.remove("pad"));
        System.out.println("Size: " + tree.size());
        System.out.println(tree.remove("pan"));
        System.out.println("Size: " + tree.size());
        System.out.println(tree.remove("panz"));
        System.out.println("Size: " + tree.size());
        System.out.println(tree.remove("panda"));
        System.out.println("Size: " + tree.size());
        tree.add("panz");
        System.out.println(tree.contains("panz"));
    }
}