From 12efaffd449612c4d6cf91c6b6e42687b8dcbfc6 Mon Sep 17 00:00:00 2001 From: SarthakJShetty Date: Sat, 18 Dec 2021 21:25:03 -0500 Subject: [PATCH] Fixed the new spaCy bug where the entire model name had to be included while loading/downloading the model. Also added spaCy to the install_requires in setup.py. No idea how it worked earlier... --- pyResearchInsights/NLP_Engine.py | 7 ++++++- setup.py | 7 ++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/pyResearchInsights/NLP_Engine.py b/pyResearchInsights/NLP_Engine.py index 4ef87c7..28a00ae 100755 --- a/pyResearchInsights/NLP_Engine.py +++ b/pyResearchInsights/NLP_Engine.py @@ -149,7 +149,12 @@ def lemmatization(status_logger_name, textual_data, allowed_postags=['NOUN', 'AD status_logger(status_logger_name, lemmatization_start_status_key) texts_out = [] - nlp = spacy.load('en', disable=['parser', 'ner']) + try: + nlp = spacy.load('en_core_web_sm', disable=['parser', 'ner']) + except OSError: + from spacy.cli import download + download('en_core_web_sm') + nlp = spacy.load('en_core_web_sm', disable=['parser', 'ner']) for sent in textual_data: doc = nlp(" ".join(sent)) texts_out.append([token.lemma_ for token in doc if token.pos_ in allowed_postags]) diff --git a/setup.py b/setup.py index 1dcee3e..b13433c 100644 --- a/setup.py +++ b/setup.py @@ -2,20 +2,21 @@ setup( name = 'pyResearchInsights', # How you named your package folder (MyLib) packages = ['pyResearchInsights'], # Chose the same as "name" - version = '1.58', # Start with a small number and increase it with every change you make + version = '1.59', # Start with a small number and increase it with every change you make license='MIT', # Chose a license from here: https://help.github.com/articles/licensing-a-repository description = 'End-to-end tool for scientific literature analysis', # Give a short description about your library long_description = 'Check out the detailed README [here](https://github.com/SarthakJShetty/pyResearchInsights)!', author = 'Sarthak J. Shetty', # Type in your name author_email = 'sarthakshetty97@gmail.com', # Type in your E-Mail url = 'https://github.com/SarthakJShetty/pyResearchInsights', # Provide either the link to your github or to your website - download_url = 'https://github.com/SarthakJShetty/pyResearchInsights/archive/v_158.tar.gz', # I explain this later on + download_url = 'https://github.com/SarthakJShetty/pyResearchInsights/archive/v_159.tar.gz', # I explain this later on keywords = ['Educational Tools', 'Analysis', 'Scraper', 'Natural Language Processing'], # Keywords that define your package best install_requires=[ # I get to this in a second 'numpy', 'pandas', 'matplotlib', 'nltk', + 'spacy', 'pyLDAvis', 'gensim', 'beautifulsoup4', @@ -28,6 +29,6 @@ 'Programming Language :: Python :: 3', #Specify which pyhton versions that you want to support 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', ], ) \ No newline at end of file