diff --git a/preprocessing/test.py b/preprocessing/test.py index a71ad99..5cf8ebd 100644 --- a/preprocessing/test.py +++ b/preprocessing/test.py @@ -2,14 +2,22 @@ # nltk.download('punkt') from nltk.tokenize import sent_tokenize, word_tokenize from nltk.util import filestring +usrInput = input("Enter file name: ") +fileNames = [usrInput] + + +while usrInput != "": + fileNames.append(usrInput) + usrInput = input("Enter file name (ENTER to terminate): ") + -fileName = input("Enter File Name: ") try: - with open(fileName, encoding='utf-8') as file: - text = file.read() - words = word_tokenize(text) - sentences = sent_tokenize(text) - print('Words: ', words, '\nSentences: ', sentences) + for fileName in fileNames: + with open(fileName, encoding='utf-8') as file: + text = file.read() + words = word_tokenize(text) + sentences = sent_tokenize(text) + print('Words: ', words, '\nSentences: ', sentences) except IOError as e: print("I/O error({0}): {1}".format(e.errno, e.strerror)) diff --git a/preprocessing/test2.txt b/preprocessing/test2.txt new file mode 100644 index 0000000..48074ae --- /dev/null +++ b/preprocessing/test2.txt @@ -0,0 +1,3 @@ +apple +orange +banana