From aaf164d74e26e0938ca5108196faecb5370ccd50 Mon Sep 17 00:00:00 2001 From: Patrick-Lapid Date: Tue, 14 Sep 2021 21:20:47 -0400 Subject: [PATCH] added test file --- preprocessing/test.py | 18 ++++++++++++++++++ preprocessing/test.txt | 6 ++++++ 2 files changed, 24 insertions(+) create mode 100644 preprocessing/test.py create mode 100644 preprocessing/test.txt diff --git a/preprocessing/test.py b/preprocessing/test.py new file mode 100644 index 0000000..a71ad99 --- /dev/null +++ b/preprocessing/test.py @@ -0,0 +1,18 @@ +import nltk +# nltk.download('punkt') +from nltk.tokenize import sent_tokenize, word_tokenize +from nltk.util import filestring + +fileName = input("Enter File Name: ") +try: + with open(fileName, encoding='utf-8') as file: + text = file.read() + words = word_tokenize(text) + sentences = sent_tokenize(text) + print('Words: ', words, '\nSentences: ', sentences) + +except IOError as e: + print("I/O error({0}): {1}".format(e.errno, e.strerror)) +except OSError: + print(f"OS error trying to open {fileName}") + diff --git a/preprocessing/test.txt b/preprocessing/test.txt new file mode 100644 index 0000000..c5b1d98 --- /dev/null +++ b/preprocessing/test.txt @@ -0,0 +1,6 @@ +This is a test text file. + +This is a new line. + + +Goodbye