-
Notifications
You must be signed in to change notification settings - Fork 29
/
PatternMgr.py
296 lines (265 loc) · 12.8 KB
/
PatternMgr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
# -*- coding: utf-8 -*-
# 本类实现了Richard Wallace博士在以下站点描述的AIML模式匹配算法:http://www.alicebot.org/documentation/matching.html '''
from __future__ import print_function
import marshal
import pprint
import re
from constants import *
class PatternMgr:
# special dictionary keys
_UNDERSCORE = 0
_STAR = 1
_TEMPLATE = 2
_THAT = 3
_TOPIC = 4
_BOT_NAME = 5
def __init__(self):
self._root = {}
self._templateCount = 0
self._botName = u"Nameless"
punctuation = "\"`~!@#$%^&*()-_=+[{]}\|;:',<.>/?"
self._puncStripRE = re.compile("[" + re.escape(punctuation) + "]")
self._whitespaceRE = re.compile("\s+", re.UNICODE)
def numTemplates(self):
"""返回当前存储的模板数量。"""
return self._templateCount
def setBotName(self, name):
"""设置机器人的名称,用于匹配模式中的<bot name =“name”>标签。 名字必须是一个单词! """
# 将多个单词的名字合并为一个单词
self._botName = unicode( ' '.join(name.split()) )
def dump(self):
"""打印所有学习的模式,用于调试目的。"""
pprint.pprint(self._root)
def save(self, filename):
"""将当前模式转储到由filename指定的文件。 要稍后恢复,请使用restore(). """
try:
outFile = open(filename, "wb")
marshal.dump(self._templateCount, outFile)
marshal.dump(self._botName, outFile)
marshal.dump(self._root, outFile)
outFile.close()
except Exception as e:
print( "Error saving PatternMgr to file %s:" % filename )
raise
def restore(self, filename):
"""还原以前保存过的模式集合。"""
try:
inFile = open(filename, "rb")
self._templateCount = marshal.load(inFile)
self._botName = marshal.load(inFile)
self._root = marshal.load(inFile)
inFile.close()
except Exception as e:
print( "Error restoring PatternMgr from file %s:" % filename )
raise
def add(self, data, template):
"""将[pattern / that / topic]元组及其相应的模板添加到节点树中。 """
pattern,that,topic = data
# TODO: 请确保 words只包含合法的字符! (alphanumerics,*,_)
# N浏览节点树到模板的位置,如有必要添加节点。
node = self._root
for word in pattern.split():
key = word
if key == u"_":
key = self._UNDERSCORE
elif key == u"*":
key = self._STAR
elif key == u"BOT_NAME":
key = self._BOT_NAME
if key not in node:
node[key] = {}
node = node[key]
# 如果包含一个非空的“that”模式,进一步向下浏览
if len(that) > 0:
if self._THAT not in node:
node[self._THAT] = {}
node = node[self._THAT]
for word in that.split():
key = word
if key == u"_":
key = self._UNDERSCORE
elif key == u"*":
key = self._STAR
if key not in node:
node[key] = {}
node = node[key]
# 如果包含一个非空的“topic”字符串,可以进一步导航
if len(topic) > 0:
if self._TOPIC not in node:
node[self._TOPIC] = {}
node = node[self._TOPIC]
for word in topic.split():
key = word
if key == u"_":
key = self._UNDERSCORE
elif key == u"*":
key = self._STAR
if key not in node:
node[key] = {}
node = node[key]
# 添加模板
if self._TEMPLATE not in node:
self._templateCount += 1
node[self._TEMPLATE] = template
def match(self, pattern, that, topic):
""" 返回最接近模式的模板。 'that'参数包含机器人以前的回应。 “topic”参数包含当前的对话主题。
如果没有找到模板,则返回None。 """
if len(pattern) == 0:
return None
# 切断输入内容。 删除所有标点符号并将文本转换为全部大写。【关键!】
input_ = pattern.upper()
input_ = re.sub(self._puncStripRE, " ", input_)
if that.strip() == u"": that = u"ULTRABOGUSDUMMYTHAT" # 'that' must never be empty
thatInput = that.upper()
thatInput = re.sub(self._puncStripRE, " ", thatInput)
thatInput = re.sub(self._whitespaceRE, " ", thatInput)
if topic.strip() == u"": topic = u"ULTRABOGUSDUMMYTOPIC" # 'topic' must never be empty
topicInput = topic.upper()
topicInput = re.sub(self._puncStripRE, " ", topicInput)
# 将输入传递给递归调用
patMatch, template = self._match(input_.split(), thatInput.split(), topicInput.split(), self._root)
return template
def star(self, starType, pattern, that, topic, index):
"""返回一个字符串,即由*匹配的模式部分。
'starType'参数指定要找到哪种星型。 合法值是:
- “star”:匹配主要模式中的一个星号。
- “thatstar”:与that模式中的一个星号匹配。
- “topicstar”:与topic模式中的一个星号匹配。 """
# 破坏输入。 删除所有标点符号并将文本转换为全部大写。
input_ = pattern.upper()
input_ = re.sub(self._puncStripRE, " ", input_)
input_ = re.sub(self._whitespaceRE, " ", input_)
if that.strip() == u"": that = u"ULTRABOGUSDUMMYTHAT" # 'that' must never be empty
thatInput = that.upper()
thatInput = re.sub(self._puncStripRE, " ", thatInput)
thatInput = re.sub(self._whitespaceRE, " ", thatInput)
if topic.strip() == u"": topic = u"ULTRABOGUSDUMMYTOPIC" # 'topic' must never be empty
topicInput = topic.upper()
topicInput = re.sub(self._puncStripRE, " ", topicInput)
topicInput = re.sub(self._whitespaceRE, " ", topicInput)
# P将输入传递给递归 pattern-matcher
patMatch, template = self._match(input_.split(), thatInput.split(), topicInput.split(), self._root)
if template == None:
return ""
# 返回基于starType参数提取模式的适当部分。
words = None
if starType == 'star':
patMatch = patMatch[:patMatch.index(self._THAT)]
words = input_.split()
elif starType == 'thatstar':
patMatch = patMatch[patMatch.index(self._THAT)+1 : patMatch.index(self._TOPIC)]
words = thatInput.split()
elif starType == 'topicstar':
patMatch = patMatch[patMatch.index(self._TOPIC)+1 :]
words = topicInput.split()
else:
# unknown value
raise ValueError( "starType must be in ['star', 'thatstar', 'topicstar']" )
# 将输入的字符串与匹配的模式进行逐字比较。 在循环结束时,如果foundTheRightStar为true,
# 则start和end将包含所需星形匹配子字符串的开始和结束索引(以“单词”表示)。
foundTheRightStar = False
start = end = j = numStars = k = 0
for i in range(len(words)):
# 在处理不是我们正在寻找的星星之后,这个条件是 true
if i < k:
continue
# 如果我们已经达到了模式的结尾,就完成了。
if j == len(patMatch):
break
if not foundTheRightStar:
if patMatch[j] in [self._STAR, self._UNDERSCORE]: #we got a star
numStars += 1
if numStars == index:
# 这个是我们关心的那个 star .
foundTheRightStar = True
start = i
# 迭代字符串的其余部分。
for k in range (i, len(words)):
# 如果星星在模式的最后,我们知道它到底在哪里。
if j+1 == len (patMatch):
end = len (words)
break
# 如果单词已经开始再次匹配,那么这个星星就结束了。
# ======== 不确定:修正:对于pattch“* A B”,“A C A B”将匹配,这是一个错误
if patMatch[j+1] == words[k]:
end = k - 1
i = k
break
# 如果我们刚刚完成处理我们所关心的星,我们会尽早退出循环。
if foundTheRightStar:
break
# 移动到模式的下一个元素。
j += 1
# 从原始的,毫不含糊的输入中提取星号。
if foundTheRightStar:
#print( ' '.join(pattern.split()[start:end+1]) )
if starType == 'star': return ' '.join(pattern.split()[start:end+1])
elif starType == 'thatstar': return ' '.join(that.split()[start:end+1])
elif starType == 'topicstar': return ' '.join(topic.split()[start:end+1])
else: return u""
def _match(self, words, thatWords, topicWords, root):
"""返回一个元组(pat,tem),其中pat是节点列表,从根开始并导致匹配的模式,tem是匹配的模板。 """
# 基本情况:如果单词列表为空,则返回当前节点的模板。
if len(words) == 0:
# we're out of words.
pattern = []
template = None
if len(thatWords) > 0:
# 如果该词不为空,则在_THAT节点上将该词与该词递归模式匹配。
try:
pattern, template = self._match(thatWords, [], topicWords, root[self._THAT])
if pattern != None:
pattern = [self._THAT] + pattern
except KeyError:
pattern = []
template = None
elif len(topicWords) > 0:
# 如果该字词为空且topicWords不为空,则以topicWords为单词在_TOPIC节点上以递归方式进行模式。
try:
pattern, template = self._match(topicWords, [], [], root[self._TOPIC])
if pattern != None:
pattern = [self._TOPIC] + pattern
except KeyError:
pattern = []
template = None
if template == None:
# 完全没有输入了。 在此节点抓取模板。
pattern = []
try: template = root[self._TEMPLATE]
except KeyError: template = None
return (pattern, template)
first = words[0]
suffix = words[1:]
# Check underscore.检查下划线。
# 注意:这是标准AIML集合中的问题,目前已被禁用。
if self._UNDERSCORE in root:
# 必须包含suf为[]的情况,以便处理在模式结尾处出现*或_的情况。
for j in range(len(suffix)+1):
suf = suffix[j:]
pattern, template = self._match(suf, thatWords, topicWords, root[self._UNDERSCORE])
if template is not None:
newPattern = [self._UNDERSCORE] + pattern
return (newPattern, template)
# Check first
if first in root:
pattern, template = self._match(suffix, thatWords, topicWords, root[first])
if template is not None:
newPattern = [first] + pattern
return (newPattern, template)
# check bot name
if self._BOT_NAME in root and first == self._botName:
pattern, template = self._match(suffix, thatWords, topicWords, root[self._BOT_NAME])
if template is not None:
newPattern = [first] + pattern
return (newPattern, template)
# check star
if self._STAR in root:
# 必须包含suf为[]的情况,以便处理在模式结尾处出现*或_的情况。
for j in range(len(suffix)+1):
suf = suffix[j:]
pattern, template = self._match(suf, thatWords, topicWords, root[self._STAR])
if template is not None:
newPattern = [self._STAR] + pattern
return (newPattern, template)
# 没有找到匹配。
return (None, None)