-
Notifications
You must be signed in to change notification settings - Fork 20
/
check_grammar.py
executable file
·63 lines (56 loc) · 1.94 KB
/
check_grammar.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/env python
from itertools import groupby
import math
import nltk
import sys
def do_check(filename):
body_grammar = nltk.data.load("file:%s" % filename, 'cfg')
uses = {}
print "Nonterminals with no productions:"
for label,prods in groupby(body_grammar.productions(),
lambda p: p.lhs().symbol()):
l = label
if l not in uses:
uses[l] = 0
np = 0
for p in prods:
np += 1
for term in p.rhs():
s = repr(term)
if s not in uses:
uses[s] = 0
uses[s] += 1
if (not isinstance(term, basestring) and
len(body_grammar.productions(term)) == 0):
print "* %s (label %s)" % (term, label)
# check # of productions
#if np >= 3:
# bits = math.log(np-1, 2)
# if int(bits) != bits:
# print "*** label %s has %s productions" % (label, np)
print "Nonterminals with duplicate productions:"
for label,prods in groupby(body_grammar.productions(),
lambda p: p.lhs().symbol()):
l = label
pset = set()
#done = set()
for p in prods:
if p in pset:# and p not in done:
print "* term %s: %s" % (label, p)
#done.add(p)
pset.add(p)
for term in p.rhs():
s = repr(term)
if s not in uses:
uses[s] = 0
uses[s] += 1
if (not isinstance(term, basestring) and
len(body_grammar.productions(term)) == 0):
print "* %s (label %s)" % (term, label)
print "\nNonterminals with no uses:"
print "grep -v ",
for t,u in uses.iteritems():
if u == 0 and t != "START":
print "-e \"^%s -\" " % t,
print filename
do_check(sys.argv[1])