Skip to content

Commit

Permalink
Allows multiple txt files
Browse files Browse the repository at this point in the history
  • Loading branch information
Patrick-Lapid committed Sep 17, 2021
1 parent aaf164d commit 9a05ae8
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 6 deletions.
20 changes: 14 additions & 6 deletions preprocessing/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,22 @@
# nltk.download('punkt')
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.util import filestring
usrInput = input("Enter file name: ")
fileNames = [usrInput]


while usrInput != "":
fileNames.append(usrInput)
usrInput = input("Enter file name (ENTER to terminate): ")


fileName = input("Enter File Name: ")
try:
with open(fileName, encoding='utf-8') as file:
text = file.read()
words = word_tokenize(text)
sentences = sent_tokenize(text)
print('Words: ', words, '\nSentences: ', sentences)
for fileName in fileNames:
with open(fileName, encoding='utf-8') as file:
text = file.read()
words = word_tokenize(text)
sentences = sent_tokenize(text)
print('Words: ', words, '\nSentences: ', sentences)

except IOError as e:
print("I/O error({0}): {1}".format(e.errno, e.strerror))
Expand Down
3 changes: 3 additions & 0 deletions preprocessing/test2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
apple
orange
banana

0 comments on commit 9a05ae8

Please sign in to comment.