Skip to content

Commit

Permalink
added test file
Browse files Browse the repository at this point in the history
  • Loading branch information
Patrick-Lapid committed Sep 15, 2021
1 parent 8f94d2c commit aaf164d
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 0 deletions.
18 changes: 18 additions & 0 deletions preprocessing/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import nltk
# nltk.download('punkt')
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.util import filestring

fileName = input("Enter File Name: ")
try:
with open(fileName, encoding='utf-8') as file:
text = file.read()
words = word_tokenize(text)
sentences = sent_tokenize(text)
print('Words: ', words, '\nSentences: ', sentences)

except IOError as e:
print("I/O error({0}): {1}".format(e.errno, e.strerror))
except OSError:
print(f"OS error trying to open {fileName}")

6 changes: 6 additions & 0 deletions preprocessing/test.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
This is a test text file.

This is a new line.


Goodbye

0 comments on commit aaf164d

Please sign in to comment.