-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspell.py
executable file
·117 lines (103 loc) · 3.49 KB
/
spell.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/env python
import collections, os
def clean(word):
import re
return re.match('[^a-zA-Z]*([a-zA-Z\'-]*)[^a-zA-Z]*', word).group(1).lower()
#-----arguments-----#
import argparse
desc = '''\
a command-line spell checker for domain-specific configuration and usage
case-insensitive
By default, common English words are accepted.
"ignore groups" may be specified as options.
The following ones are provided:
- e (modern English)
- t (technical)
- html
- css
By default, e ignore group is enabled. Use
- none: to disable all ignore groups
- all: to enable all ignore groups
'''
parser=argparse.ArgumentParser(description=desc, formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('file', help='file to spell check')
parser.add_argument('ignore_group', nargs='*', help='an ignore group to use', default=['e'])
args=parser.parse_args()
if args.ignore_group == ['none']:
args.ignore_group = []
if args.ignore_group == ['all']:
args.ignore_group = [i[:-4] for i in os.listdir('ignore')]
#-----setup-----#
home=os.path.split(os.path.realpath(__file__))[0]
if not os.path.exists(os.path.join(home, 'word_list.txt')):
import glob
frequency=collections.defaultdict(int)
for path in glob.glob(os.path.join(home, 'books', '*.txt')):
with open(path) as book:
for word in book.read().split():
word=clean(word)
if not word: continue
frequency[word]+=1
words=[]
for word in frequency:
if frequency[word]>1:
words.append(word)
with open(os.path.join(home, 'word_list.txt'), 'w') as word_list:
for word in words:
word_list.write(word+'\n')
words=None
with open(os.path.join(home, 'word_list.txt')) as word_list:
words=set(word_list.read().split())
ignores=set()
for ignore_group in args.ignore_group:
with open(os.path.join(home, 'ignore', ignore_group+'.txt')) as ignore_list:
ignores.update(ignore_list.read().split())
#-----processing-----#
try: input=raw_input
except NameError: pass
new_ignores=collections.defaultdict(list)
with open(args.file) as file:
line_number=1
quit=False
for line in file.readlines():
for dirty_word in line.split():
word=clean(dirty_word)
if not word: continue
if word not in ignores and word not in words:
while True:
print('Found "{0}" on line {1}. Enter ? for help.'.format(word, line_number))
i=input()
if i=='?' or i=='':
print('Enter , to continue reading through the file.')
print('Enter an ignore group name to add the word to that ignore group (and also start ignoring it in this run).')
print('Enter ig? to print out all ignore groups.')
print('Enter . to quit, updating ignore groups.')
print('Enter .. to quit, updating nothing.')
print('Enter ?? to print out some context.')
print('Enter ??? to debug.')
continue
if i==',': break
if i=='ig?':
for i in os.listdir(os.path.join(home, 'ignore')):
print(i[:-4])
if i=='.': quit=True; break
if i=='..': exit(0)
if i=='??': print(line.strip()); print(dirty_word); continue
if i=='???': import pdb; pdb.set_trace(); continue
new_ignores[i].append(word)
ignores.add(word)
break
if quit: break
if quit: break
line_number+=1
for ignore_group in new_ignores:
path=os.path.join(home, 'ignore', ignore_group+'.txt')
if os.path.exists(path):
with open(path, 'r') as f:
words=set([line.strip() for line in f.readlines()])
else:
words=set()
with open(path, 'a') as ignore_list:
for word in new_ignores[ignore_group]:
if word in words: continue
ignore_list.write(word+'\n')