-
Notifications
You must be signed in to change notification settings - Fork 9
/
match_sig.py
126 lines (102 loc) · 3.31 KB
/
match_sig.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import json
from pyxdameraulevenshtein import damerau_levenshtein_distance, normalized_damerau_levenshtein_distance
def listSame(l1, l2):
if len(l1) != len(l2):
return False
for idx in range(0, len(l1)):
if l1[idx] != l2[idx]:
return False
return True
sig1 = json.loads(open(r"D:\desktop\j8e_match_clz\sig\sig1.json", "r", encoding="utf8").read())
sig2 = json.loads(open(r"D:\desktop\j8e_match_clz\sig\sig2.json", "r", encoding="utf8").read())
badCount = 0
match = []
notMatch = []
for sig1Item in sig1:
matchItem = {
"source": sig1Item,
"match": []
}
for sig2Item in sig2:
if sig1Item["Feature"] == sig2Item["Feature"]:
matchItem["match"].append(sig2Item)
if len(matchItem["match"]) != 0:
match.append(matchItem)
else:
notMatch.append(sig1Item)
badCount += 1
perfectMatch = []
multipleMatch = []
likelyMathc = []
for item in match:
if len(item["match"]) > 1:
multipleMatch.append(item)
badCount += len(item["match"])
else:
perfectMatch.append(item)
def getMethodCodeFeature(item):
feature = []
for method in item["Method"]:
feature.extend(method["CodeFeature"])
feature.append(-1)
return feature
for item in multipleMatch:
dists = []
sourceFeature = getMethodCodeFeature(item["source"])
for matched in item["match"]:
matchFeature = getMethodCodeFeature(matched)
dist = damerau_levenshtein_distance(sourceFeature, matchFeature)
dists.append({
"dist": dist,
"match": matched
})
dists = sorted(dists, key=lambda v: v["dist"])
dists = dists[:5]
item["likely"] = dists
print("multipleMatch:" + json.dumps(item))
# for item in notMatch:
# dists = []
# for sig2Item in sig2:
# dist = damerau_levenshtein_distance(item["Feature"], sig2Item["Feature"])
# dists.append({
# "dist": dist,
# "match": sig2Item
# })
# dists = sorted(dists, key=lambda v: v["dist"])
# dists = dists[:5]
# likelyItem = {
# "source": item,
# "likely": dists
# }
# likelyMathc.append(likelyItem)
# print("likelyMathc:" + json.dumps(likelyItem))
def print_match(f, m):
for item in m:
for matchItem in item["match"]:
f.write(item["source"]["RawName"] + " - " + item["source"]["NowName"] + " -> " +
matchItem["NowName"])
f.write("\n")
def print_likely_match(f, m):
for item in m:
for likely in item["likely"]:
f.write(item["source"]["RawName"] + " - " + item["source"]["NowName"] + " -> " +
likely["match"]["NowName"] + ", dist:" + str(likely["dist"]))
f.write("\n")
f = open("./result.txt", "w")
f.write("--------perfectMatch--------\n")
print_match(f, perfectMatch)
f.write("--------multipleMatch--------\n")
print_likely_match(f, multipleMatch)
f.write("--------likelyMathc--------\n")
print_likely_match(f, likelyMathc)
f = open("./perfectMatch.json", "w")
f.write(json.dumps(perfectMatch))
f.close()
f = open("./multipleMatch.json", "w")
f.write(json.dumps(multipleMatch))
f.close()
# dist越小类越相似
f = open("./likelyMathc.json", "w")
f.write(json.dumps(likelyMathc))
f.close()
print("badCount: " + str(badCount))