-
Notifications
You must be signed in to change notification settings - Fork 0
/
Markov (1).java
140 lines (136 loc) · 3.82 KB
/
Markov (1).java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import java.util.Map.Entry;
import java.util.*;
/**
* Represents a k-character substring
* Give a pseudo-random suffix character based on probability.
*
*/
public class Markov {
String substring;
int count = 0;
TreeMap<Character, Integer> map;
/**
* Constructs a Markov object from a given substring.
* @param substring the given substring.
*/
public Markov(String substring) {
this.substring = substring;
map = new TreeMap<Character, Integer>();
add();
}
/**
* Constructs a Markov object from a given substring and suffix character.
* Suffix characters are stored in a TreeMap.
* @param substring the specified substring.
* @param suffix the specified suffix.
*/
public Markov(String substring, Character suffix) {
this.substring = substring;
map = new TreeMap<Character, Integer>();
add(suffix);
}
/**
* Increments the count of number of times the substring appears in a text.
*/
public void add() {
count++;
}
/**
* Adds a suffix character to the TreeMap.
* @param c the suffix character to be added.
*/
public void add(char c) {
add();
if (map.containsKey(c)) {
int frequency = map.get(c);
map.put(c, frequency+1);
}
else
map.put(c, 1);
}
/**
* Gives the frequency count of a suffix character
* The number of times the specified suffix follows the substring in a text.
* @param c the specified suffix.
* @return the frequency count.
*/
public int getFrequencyCount(char c) {
if (!map.containsKey(c)) {
return -1;
}
else {
return map.get(c);
}
}
/**
* Gives a percentage of frequency count / number of total suffixes.
* @param c
* @return
*/
public double getCharFrequency(char c) {
if (getFrequencyCount(c) == -1)
return -1;
else
return ((double) getFrequencyCount(c))/((double)count);
}
/**
* Finds whether or not the given suffix is in the TreeMap.
* @param c the given suffix.
* @return True if the suffix exists in the TreeMap, otherwise false
*/
public boolean containsChar(char c) {
if (!map.containsKey(c)) return false;
else return true;
}
/**
* Return the number of times this substring occurs in a text.
* @return The number of times occurs in a text
*/
public int count() {
return count;
}
/**
* Return the TreeMap.
* @return the TreeMap.
*/
public TreeMap<Character, Integer> getMap() {
return map;
}
/**
* Using random probability, returns a pseudo-random character to follow the substring.
* Character possibilities are added to an ArrayList, and a random number
* from 0 to the last index in the ArrayList is picked. Since more common suffixes
* occupy more indices in the ArrayList, they have greater probability
* @return the pseudo-random suffix.
*/
public char random() {
Character ret = null;
Set<Entry<Character, Integer>> s = map.entrySet();
Iterator<Entry<Character, Integer>> it = s.iterator();
ArrayList<Character> suffixes = new ArrayList<Character>();
while (it.hasNext()) {
Entry<Character, Integer> tmp = it.next();
for (int i=0; i<tmp.getValue(); i++) {
suffixes.add(tmp.getKey());
}
}
Random rand = new Random();
int retIndex = rand.nextInt(suffixes.size());
ret = suffixes.get(retIndex);
return ret;
}
/**
* Gives a String representation of the Markov object.
* @return said String representation.
*/
public String toString() {
String ret = "Substring: "+substring+", Count: "+count;
ret += "\n"+"Suffixes and frequency counts: ";
for (Entry<Character, Integer> entry : map.entrySet()) {
char key = entry.getKey();
int value = entry.getValue();
ret += "\n"+"Suffix: "+key+", frequency count: "+value;
}
return ret;
}
}