forked from indy256/codelibrary
-
Notifications
You must be signed in to change notification settings - Fork 0
/
SuffixTree.java
110 lines (103 loc) · 3.9 KB
/
SuffixTree.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
package strings;
// https://stackoverflow.com/questions/9452701/ukkonens-suffix-tree-algorithm-in-plain-english/9513423#9513423
public class SuffixTree {
public static final String ALPHABET = "abcdefghijklmnopqrstuvwxyz0123456789\1\2";
public static class Node {
public int begin;
public int end;
public int depth; // distance in characters from root to this node
public Node parent;
public Node[] children;
public Node suffixLink;
Node(int begin, int end, int depth, Node parent) {
this.begin = begin;
this.end = end;
this.parent = parent;
this.depth = depth;
children = new Node[ALPHABET.length()];
}
}
public static Node buildSuffixTree(CharSequence s) {
int n = s.length();
byte[] a = new byte[n];
for (int i = 0; i < n; i++) a[i] = (byte) ALPHABET.indexOf(s.charAt(i));
Node root = new Node(0, 0, 0, null);
Node node = root;
for (int i = 0, tail = 0; i < n; i++, tail++) {
Node last = null;
while (tail >= 0) {
Node ch = node.children[a[i - tail]];
while (ch != null && tail >= ch.end - ch.begin) {
tail -= ch.end - ch.begin;
node = ch;
ch = ch.children[a[i - tail]];
}
if (ch == null) {
node.children[a[i]] = new Node(i, n, node.depth + node.end - node.begin, node);
if (last != null)
last.suffixLink = node;
last = null;
} else {
byte afterTail = a[ch.begin + tail];
if (afterTail == a[i]) {
if (last != null)
last.suffixLink = node;
break;
} else {
Node splitNode = new Node(ch.begin, ch.begin + tail, node.depth + node.end - node.begin, node);
splitNode.children[a[i]] = new Node(i, n, ch.depth + tail, splitNode);
splitNode.children[afterTail] = ch;
ch.begin += tail;
ch.depth += tail;
ch.parent = splitNode;
node.children[a[i - tail]] = splitNode;
if (last != null)
last.suffixLink = splitNode;
last = splitNode;
}
}
if (node == root) {
--tail;
} else {
node = node.suffixLink;
}
}
}
return root;
}
// Usage example
public static void main(String[] args) {
String s1 = "aabcc";
String s2 = "abc";
// build generalized suffix tree
String s = s1 + '\1' + s2 + '\2';
Node tree = buildSuffixTree(s);
int lcs = SuffixTree.lcs(tree, s1.length(), s1.length() + s2.length() + 1);
System.out.println(3 == lcs);
}
public static int lcsLength;
public static int lcsBeginIndex;
// traverse suffix tree to find longest common substring
public static int lcs(SuffixTree.Node node, int i1, int i2) {
if (node.begin <= i1 && i1 < node.end) {
return 1;
}
if (node.begin <= i2 && i2 < node.end) {
return 2;
}
int mask = 0;
for (char f = 0; f < SuffixTree.ALPHABET.length(); f++) {
if (node.children[f] != null) {
mask |= lcs(node.children[f], i1, i2);
}
}
if (mask == 3) {
int curLength = node.depth + node.end - node.begin;
if (lcsLength < curLength) {
lcsLength = curLength;
lcsBeginIndex = node.begin;
}
}
return mask;
}
}