Skip to content

Commit

Permalink
fixed html error, creating new tag, pyRI works again!
Browse files Browse the repository at this point in the history
  • Loading branch information
SarthakJShetty committed Jun 28, 2024
1 parent e4b7dd4 commit 0a74ef6
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 13 deletions.
11 changes: 5 additions & 6 deletions pyResearchInsights/Scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@

'''Adding the libraries to be used here.'''

'''Importing urllib.request to use urlopen'''
from urllib.request import build_opener, HTTPCookieProcessor
''''Importing urllib.error to handle errors in HTTP pinging.'''
import urllib.error
'''BeautifulSoup is used for souping.'''
Expand All @@ -20,6 +18,9 @@
import time
'''Fragmenting code into different scripts. Some functions are to be used across the different sub-parts as well. Hence, shifted some of the functions to the new script.'''
from pyResearchInsights.common_functions import pre_processing, argument_formatter, keyword_url_generator, abstract_id_log_name_generator, status_logger
'''Requests collects the HTML from the URL that is being pinged'''
import requests


def url_reader(url, status_logger_name):
'''This keyword is supplied to the URL and is hence used for souping.
Expand All @@ -28,10 +29,8 @@ def url_reader(url, status_logger_name):
moves on to the next PII number'''
try:
'''Using the urllib function, urlopen to extract the html_code of the given page'''
open_connection = build_opener(HTTPCookieProcessor())
html_code = open_connection.open(url)
'''Closing the abstract window after each abstract has been extracted'''
return html_code
response = requests.get(url)
return response.content
except (UnboundLocalError, urllib.error.HTTPError):
pass

Expand Down
12 changes: 5 additions & 7 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,20 @@
setup(
name = 'pyResearchInsights', # How you named your package folder (MyLib)
packages = ['pyResearchInsights'], # Chose the same as "name"
version = '1.59', # Start with a small number and increase it with every change you make
version = '1.60', # Start with a small number and increase it with every change you make
license='MIT', # Chose a license from here: https://help.github.com/articles/licensing-a-repository
description = 'End-to-end tool for scientific literature analysis', # Give a short description about your library
long_description = 'Check out the detailed README [here](https://github.com/SarthakJShetty/pyResearchInsights)!',
author = 'Sarthak J. Shetty', # Type in your name
author_email = '[email protected]', # Type in your E-Mail
url = 'https://github.com/SarthakJShetty/pyResearchInsights', # Provide either the link to your github or to your website
download_url = 'https://github.com/SarthakJShetty/pyResearchInsights/archive/v_159.tar.gz', # I explain this later on
download_url = 'https://github.com/SarthakJShetty/pyResearchInsights/archive/v_160.tar.gz', # I explain this later on
keywords = ['Educational Tools', 'Analysis', 'Scraper', 'Natural Language Processing'], # Keywords that define your package best
install_requires=[ # I get to this in a second
'numpy',
'pandas',
'matplotlib',
'matplotlib==3.5.2',
"scipy==1.10.1",
'nltk',
'spacy',
'pyLDAvis',
Expand All @@ -26,9 +27,6 @@
'Intended Audience :: Developers', # Define that your audience are developers
'Topic :: Software Development :: Build Tools',
'License :: OSI Approved :: MIT License', # Again, pick a license
'Programming Language :: Python :: 3', #Specify which pyhton versions that you want to support
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.11',
],
)

0 comments on commit 0a74ef6

Please sign in to comment.