From 0a74ef6a459b5e2e0eeadc1a769c2a226f009314 Mon Sep 17 00:00:00 2001 From: Sarthak J Shetty Date: Fri, 28 Jun 2024 15:59:40 -0400 Subject: [PATCH] fixed html error, creating new tag, pyRI works again! --- pyResearchInsights/Scraper.py | 11 +++++------ setup.py | 12 +++++------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/pyResearchInsights/Scraper.py b/pyResearchInsights/Scraper.py index a552cea..00fd13a 100755 --- a/pyResearchInsights/Scraper.py +++ b/pyResearchInsights/Scraper.py @@ -4,8 +4,6 @@ '''Adding the libraries to be used here.''' -'''Importing urllib.request to use urlopen''' -from urllib.request import build_opener, HTTPCookieProcessor ''''Importing urllib.error to handle errors in HTTP pinging.''' import urllib.error '''BeautifulSoup is used for souping.''' @@ -20,6 +18,9 @@ import time '''Fragmenting code into different scripts. Some functions are to be used across the different sub-parts as well. Hence, shifted some of the functions to the new script.''' from pyResearchInsights.common_functions import pre_processing, argument_formatter, keyword_url_generator, abstract_id_log_name_generator, status_logger +'''Requests collects the HTML from the URL that is being pinged''' +import requests + def url_reader(url, status_logger_name): '''This keyword is supplied to the URL and is hence used for souping. @@ -28,10 +29,8 @@ def url_reader(url, status_logger_name): moves on to the next PII number''' try: '''Using the urllib function, urlopen to extract the html_code of the given page''' - open_connection = build_opener(HTTPCookieProcessor()) - html_code = open_connection.open(url) - '''Closing the abstract window after each abstract has been extracted''' - return html_code + response = requests.get(url) + return response.content except (UnboundLocalError, urllib.error.HTTPError): pass diff --git a/setup.py b/setup.py index b13433c..d600266 100644 --- a/setup.py +++ b/setup.py @@ -2,19 +2,20 @@ setup( name = 'pyResearchInsights', # How you named your package folder (MyLib) packages = ['pyResearchInsights'], # Chose the same as "name" - version = '1.59', # Start with a small number and increase it with every change you make + version = '1.60', # Start with a small number and increase it with every change you make license='MIT', # Chose a license from here: https://help.github.com/articles/licensing-a-repository description = 'End-to-end tool for scientific literature analysis', # Give a short description about your library long_description = 'Check out the detailed README [here](https://github.com/SarthakJShetty/pyResearchInsights)!', author = 'Sarthak J. Shetty', # Type in your name author_email = 'sarthakshetty97@gmail.com', # Type in your E-Mail url = 'https://github.com/SarthakJShetty/pyResearchInsights', # Provide either the link to your github or to your website - download_url = 'https://github.com/SarthakJShetty/pyResearchInsights/archive/v_159.tar.gz', # I explain this later on + download_url = 'https://github.com/SarthakJShetty/pyResearchInsights/archive/v_160.tar.gz', # I explain this later on keywords = ['Educational Tools', 'Analysis', 'Scraper', 'Natural Language Processing'], # Keywords that define your package best install_requires=[ # I get to this in a second 'numpy', 'pandas', - 'matplotlib', + 'matplotlib==3.5.2', + "scipy==1.10.1", 'nltk', 'spacy', 'pyLDAvis', @@ -26,9 +27,6 @@ 'Intended Audience :: Developers', # Define that your audience are developers 'Topic :: Software Development :: Build Tools', 'License :: OSI Approved :: MIT License', # Again, pick a license - 'Programming Language :: Python :: 3', #Specify which pyhton versions that you want to support - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.11', ], ) \ No newline at end of file