Skip to content

Commit

Permalink
Merge pull request #6 from autonomio/add_arxiv_api
Browse files Browse the repository at this point in the history
add arxiv API
  • Loading branch information
mikkokotila authored Jan 29, 2022
2 parents e3ad9ca + e149758 commit 82273de
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 4 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
.DS_Store
*.pyc
__pycache__
dedomena/*.pyc
dedomena/*/*.pyc
1 change: 1 addition & 0 deletions dedomena/apis/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from .twitter import twitter
from .pubmed import pubmed
from .arxiv import arxiv
38 changes: 38 additions & 0 deletions dedomena/apis/arxiv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
def arxiv(keyword, n):

'''Get articles with meta-data from ArXiv
articles = arxiv('tibetan', 50)
keyword | str | The string to be searched for in the title of the articles.
n | int | Number of articles to return.
'''

import urllib
import xmltodict
import pandas as pd

url = 'http://export.arxiv.org/api/query?search_query=all:'
query = keyword + '&start=0&max_results=' + str(n)
data = urllib.request.urlopen(url + query)
results_xml = data.read().decode('utf-8')

out = []

results_dict = xmltodict.parse(results_xml)

results_list = results_dict['feed']['entry']

for i in range(len(results_list)):

title = results_list[i]['title']
publication_date = results_list[i]['published']
abstract = results_list[i]['summary']

out.append([title, publication_date, abstract])

out = pd.DataFrame(out)
out.columns = ['title', 'publication_date', 'abstract']

return out
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
pandas
pymed
twintel
pmlb
pmlb
xmltodict
5 changes: 3 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
URL = 'http://autonom.io'
LICENSE = 'MIT'
DOWNLOAD_URL = 'https://github.com/autonomio/dedomena/'
VERSION = '0.0.7'
VERSION = '0.1.0'

try:
from setuptools import setup
Expand All @@ -26,7 +26,8 @@
install_requires = ['pandas',
'pymed',
'twintel',
'pmlb']
'pmlb',
'xmltodict']

if __name__ == "__main__":

Expand Down
8 changes: 7 additions & 1 deletion test_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,10 @@
_null = da.datasets.pmlb(dataset)

# test apis.twitter
_null = da.apis.twitter('cars', 200)
_null = da.apis.twitter('cars', 50)

# test apis.pubmed
_null = da.apis.pubmed('COVID', 50)

# test apis.arxiv
_null = da.apis.arxiv('nlp', 50)

0 comments on commit 82273de

Please sign in to comment.