ananelson · cameronneylon · May 28, 2014 · May 28, 2014 · May 28, 2014 · Jun 5, 2014
diff --git a/examples/pubspeed/2012/.DS_Store b/examples/pubspeed/2012/.DS_Store
diff --git a/examples/pubspeed/2012/oacensus.yaml b/examples/pubspeed/2012/oacensus.yaml
@@ -0,0 +1,40 @@
+# Get articles from Pubmed
+- pubmed:
+    search: '"journal article"[Publication Type]'
+    journals : 
+        - "Current Biology : CB"
+        - "Nature Neuroscience" 
+        - "Genes & development"
+        - "Bioinformatics"
+        - "Cell"
+        - "Cell host & microbe"
+        - "Nature genetics"
+        - "Molecular systems biology"
+        - "Nature"
+        - "PLoS neglected tropical diseases"
+        - "PLoS medicine"
+        - "PLoS biology"
+        - "PLoS computational biology"
+        - "PLoS pathogens"
+        - "The Journal of infectious diseases"
+        - "Nature medicine"
+        - "Genome research"
+        - "BMC infectious diseases"
+        - "PLoS genetics"
+        - "BMC public health"
+        - "Nature cell biology"
+        - "The American journal of tropical medicine and hygiene"
+        - "PeerJ"
+        - "SpringerPlus"
+        - "eLife"
+        - "Nature communications"
+        - "Scientific reports"
+        - "PLoS one"
+        - "BMJ open"
+        - "British medical journal"
+        - "Journal of virology"
+        - "Proceedings of the national academy of sciences of the united states of america"
+        - "Science (New York, N.Y.)"
+    start-period : 2012-01
+    end-period : 2012-12
+    ret-max : 1000
diff --git a/examples/pubspeed/2012/pubspeed2012.xls b/examples/pubspeed/2012/pubspeed2012.xls
diff --git a/examples/pubspeed/2012/run.sh b/examples/pubspeed/2012/run.sh
@@ -0,0 +1 @@
+oacensus run --config oacensus.yaml --reports pubspeed-excel
diff --git a/examples/pubspeed/2013/oacensus.yaml b/examples/pubspeed/2013/oacensus.yaml
@@ -0,0 +1,40 @@
+# Get articles from Pubmed
+- pubmed:
+    search: '"journal article"[Publication Type]'
+    journals : 
+        - "Current Biology : CB"
+        - "Nature Neuroscience" 
+        - "Genes & development"
+        - "Bioinformatics"
+        - "Cell"
+        - "Cell host & microbe"
+        - "Nature genetics"
+        - "Molecular systems biology"
+        - "Nature"
+        - "PLoS neglected tropical diseases"
+        - "PLoS medicine"
+        - "PLoS biology"
+        - "PLoS computational biology"
+        - "PLoS pathogens"
+        - "The Journal of infectious diseases"
+        - "Nature medicine"
+        - "Genome research"
+        - "BMC infectious diseases"
+        - "PLoS genetics"
+        - "BMC public health"
+        - "Nature cell biology"
+        - "The American journal of tropical medicine and hygiene"
+        - "PeerJ"
+        - "SpringerPlus"
+        - "eLife"
+        - "Nature communications"
+        - "Scientific reports"
+        - "PLoS one"
+        - "BMJ open"
+        - "British medical journal"
+        - "Journal of virology"
+        - "Proceedings of the national academy of sciences of the united states of america"
+        - "Science (New York, N.Y.)"
+    start-period : 2013-01
+    end-period : 2013-12
+    ret-max : 1000
diff --git a/examples/pubspeed/2013/pubspeed2013.xls b/examples/pubspeed/2013/pubspeed2013.xls
diff --git a/examples/pubspeed/2013/run.sh b/examples/pubspeed/2013/run.sh
@@ -0,0 +1 @@
+oacensus run --config oacensus.yaml --reports pubspeed-excel
diff --git a/examples/pubspeed/2014/oacensus.yaml b/examples/pubspeed/2014/oacensus.yaml
@@ -0,0 +1,40 @@
+# Get articles from Pubmed
+- pubmed:
+    search: '"journal article"[Publication Type]'
+    journals : 
+        - "Current Biology : CB"
+        - "Nature Neuroscience" 
+        - "Genes & development"
+        - "Bioinformatics"
+        - "Cell"
+        - "Cell host & microbe"
+        - "Nature genetics"
+        - "Molecular systems biology"
+        - "Nature"
+        - "PLoS neglected tropical diseases"
+        - "PLoS medicine"
+        - "PLoS biology"
+        - "PLoS computational biology"
+        - "PLoS pathogens"
+        - "The Journal of infectious diseases"
+        - "Nature medicine"
+        - "Genome research"
+        - "BMC infectious diseases"
+        - "PLoS genetics"
+        - "BMC public health"
+        - "Nature cell biology"
+        - "The American journal of tropical medicine and hygiene"
+        - "PeerJ"
+        - "SpringerPlus"
+        - "eLife"
+        - "Nature communications"
+        - "Scientific reports"
+        - "PLoS one"
+        - "BMJ open"
+        - "British medical journal"
+        - "Journal of virology"
+        - "Proceedings of the national academy of sciences of the united states of america"
+        - "Science (New York, N.Y.)"
+    start-period : 2014-01
+    end-period : 2014-06
+    ret-max : 1000
diff --git a/examples/pubspeed/2014/pubspeed2014.xls b/examples/pubspeed/2014/pubspeed2014.xls
diff --git a/examples/pubspeed/2014/run.sh b/examples/pubspeed/2014/run.sh
@@ -0,0 +1 @@
+oacensus run --config oacensus.yaml --reports pubspeed-excel
diff --git a/examples/pubspeed/execute_pone.py b/examples/pubspeed/execute_pone.py
@@ -0,0 +1,41 @@
+__author__ = 'cneylon'
+
+import os
+import os.path
+import subprocess
+
+DATE_PERIODS = [('2012-01-02', '2012-06-30'),
+				('2012-07-01', '2012-12-31'),
+				('2013-01-02', '2013-06-30'),
+				('2013-07-01', '2013-12-31'),
+				('2014-01-02', '2014-06-30')]
+
+for period in DATE_PERIODS:
+	date_term = 'AND "journal article"[Publication Type]' 
+	search_term = """'"PLoS one"[Journal] %s'""" % (date_term)
+	path = 'run/pone_' + period[0]
+	if not os.path.isdir(path):
+		os.makedirs(path)
+
+	yelems = (
+				search_term,
+				period[0][0:7],
+				period[1][0:7]
+			)
+	yaml = """
+- pubmed:
+    search: %s
+    start-period: %s
+    end-period: %s 
+""" % yelems
+
+	yaml_path = os.path.join(path, 'oacensus.yaml')
+	with open(yaml_path, 'w') as f:
+		f.write(yaml)
+
+	os.chdir(path)
+	subprocess.call(['oacensus', 'run', '--config', 'oacensus.yaml', '--reports', 'pubspeed-excel'])
+	os.chdir('../..')
+
+
+
diff --git a/examples/pubspeed/run-report.sh b/examples/pubspeed/run-report.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+oacensus run --progress --reports "pubspeed-excel"
diff --git a/examples/wellcome2013/apc-oag.xlsx b/examples/wellcome2013/apc-oag.xlsx
diff --git a/examples/wellcome2013/oacensus.yaml b/examples/wellcome2013/oacensus.yaml
@@ -0,0 +1,33 @@
+# Load standard licenses.
+- licenses
+
+# Create articles from xlsx file.
+- excelarticles: {
+    location: "wellcome20.xlsx",
+    list-name: Wellcome 2012-13,
+    source: wellcome,
+    period: 2012-13,
+    column-mapping : {
+        publisher : publisher.name,
+        journal : journal.title,
+        title : title,
+        doi : doi
+        }
+    }
+
+# Get publication date and standardize journal title (based on article DOI).
+- crossref
+
+# Get ISSNs based on journal titles.
+- crossrefjournals
+
+# Get openness information from OAG (based on article DOI).
+- oag
+
+# Check pubmed, get external identifiers including PMC (based on article DOI).
+- pubmed-update-repositories
+
+# Look up journal info in DOAJ
+- doaj
+
+# TODO look up license in PMC
diff --git a/examples/wellcome2013/run-report.sh b/examples/wellcome2013/run-report.sh
@@ -0,0 +1 @@
+oacensus run --progress --reports "openness-excel"
diff --git a/examples/wellcome2013/wellcome.xlsx b/examples/wellcome2013/wellcome.xlsx
diff --git a/examples/wellcome2013/wellcome20.xlsx b/examples/wellcome2013/wellcome20.xlsx
diff --git a/oacensus/licenses.yaml b/oacensus/licenses.yaml
@@ -26,7 +26,7 @@ cc-by-nd:
 cc-by-nc-sa:
     title: Creative Commons Attribution NonCommercial ShareAlike
     url: https://creativecommons.org/licenses/by-nc-nd-sa/4.0
-    aliases: []
+    aliases: [cc-BY-NC-SA]
 
 cc-zero:
     title: No Rights Reserved

diff --git a/oacensus/load_plugins.py b/oacensus/load_plugins.py
@@ -14,9 +14,11 @@
 import oacensus.scrapers.rcukgtr
 import oacensus.scrapers.scimago
 import oacensus.scrapers.wiley
+import oacensus.scrapers.openaire
 
 import oacensus.reports.excel_dump
 import oacensus.reports.oa_excel
+import oacensus.reports.pubspeed_excel
 import oacensus.reports.text_dump
 import oacensus.reports.personal_openness
 import oacensus.reports.institution
diff --git a/oacensus/models.py b/oacensus/models.py
@@ -222,6 +222,12 @@ class Article(ModelBase):
         help_text="Digital object identifier for article.")
     date_published = CharField(null=True,
         help_text="When article was published, in YYYY(-MM(-DD)) format.")
+    date_submitted = CharField(null=True,
+        help_text="When article was submitted for peer review, in YYYY(-MM(-DD)) format.")
+    date_accepted = CharField(null=True,
+        help_text="When article was accepted for publication, in YYYY(-MM(-DD)) format.")
+    date_aheadofprint = CharField(null=True,
+        help_text="Ahead of print publication date, generally AOP, in YYYY(-MM(-DD)) format.")
     period = CharField(
         help_text="Name of date-based period in which this article was scraped.")
     url = CharField(null=True,

diff --git a/oacensus/reports/pubspeed_excel.py b/oacensus/reports/pubspeed_excel.py
@@ -0,0 +1,82 @@
+from oacensus.models import Article
+from oacensus.models import ModelBase
+from oacensus.report import Report
+import datetime
+import inflection
+import inspect
+import os
+import xlwt
+
+class PubspeedExcel(Report):
+    """
+    An excel-based openness report.
+    """
+
+    aliases = ['pubspeed-excel']
+
+    _settings = {
+            'filename' : ("Name of file to write excel dump to.", "pubspeed.xls"),
+            "sheet-name" : ("Name of worksheet.", "Articles"),
+            'date-format-string' : ( "Excel style date format string.", "D-MMM-YYYY"),
+            "fields" : ("Fields to include in report.", [
+                "doi", "title", "date_published",
+                "date_submitted", "date_accepted", "date_aheadofprint",
+                "journal.title", "journal.issn"
+                ])
+            }
+
+    def run(self):
+        date_style = xlwt.XFStyle()
+        date_style.num_format_str = self.setting('date-format-string')
+
+        bold_font = xlwt.Font()
+        bold_font.bold = True
+
+        bold_style = xlwt.XFStyle()
+        bold_style.font = bold_font
+
+        filename = self.setting('filename')
+        if os.path.exists(filename):
+            os.remove(filename)
+
+        workbook = xlwt.Workbook()
+        ws = workbook.add_sheet("Articles")
+
+        keys = self.setting('fields')
+
+        # Write Headers
+        for j, k in enumerate(keys):
+            heading = inflection.titleize(k.replace("_id", "_identifier"))
+            ws.write(0, j, heading, bold_style)
+
+        for i, article in enumerate(Article.select()):
+            print article
+            for j, key in enumerate(keys):
+                if key.startswith("journal."):
+                    value = getattr(article.journal, key.replace("journal.", ""))
+                elif key.startswith("publisher."):
+                    if article.journal.publisher is not None:
+                        value = getattr(article.journal.publisher, key.replace("publisher.", ""))
+                    else:
+                        value = None
+                else:
+                    value = getattr(article, key)
+
+                fmt = None
+
+                if isinstance(value, ModelBase):
+                    value = unicode(value)
+                elif isinstance(value, datetime.date):
+                    fmt = date_style
+                elif inspect.ismethod(value):
+                    value = value()
+                else:
+                    pass
+
+                if fmt:
+                    ws.write(i+1, j, value, fmt)
+                else:
+                    ws.write(i+1, j, value)
+
+        workbook.save(filename)
+        print "  pubspeed report written to %s" % filename
diff --git a/oacensus/scrapers/openaire.py b/oacensus/scrapers/openaire.py
@@ -0,0 +1,50 @@
+__author__ = 'cneylon'
+from oacensus.scraper import ArticleScraper
+from oacensus.models import Article
+from oacensus.models import Repository
+from oacensus.models import Instance
+import json
+import requests
+import xml.etree.ElementTree as ET
+
+class OpenAIRE(ArticleScraper):
+    """
+    Gets accessibility information for articles with DOIs in the database.
+    """
+    aliases = ['openaire']
+
+    _settings = {
+            'base-url' : ("Base url of OpenAIRE API", "http://api.openaire.eu/search/publications"),
+            }
+
+    def scrape(self):
+        # don't use scrape method since our query depends on db state, so
+        # caching will not be accurate
+        pass
+
+    def process(self):
+        articles = Article.select().where(~(Article.doi >> None))
+        for article in articles:
+            response = requests.get(self.setting('base-url'), params = {'doi' : article.doi})
+            openaire_response = ET.fromstring(response.text.encode('utf-8'))
+
+            for inst in openaire_response.iter('instance'):
+                reponame = inst.find('hostedby').get('name')
+                repository = Repository.find_or_create_by_name({'name':reponame,
+                                                                'source': 'openaire',
+                                                                'log' : 'Created by openaire plugin'})
+
+
+                status = inst.find('licence').get('classname')
+                ftr = {'Open Access' : True,
+                       'Closed Access' : False,
+                       'Embargo' : False,
+                       'Restricted' : False}.get(status, False)
+
+                url = inst.find('webresource').find('url').text
+                Instance.create(article = article,
+                                repository = repository,
+                                free_to_read = ftr,
+                                info_url=url,
+                                source=self.db_source(),
+                                log='OpenAIRE response obtained from %s repository' % reponame)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		oacensus run --config oacensus.yaml --reports pubspeed-excel
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		#!/bin/sh
		oacensus run --progress --reports "pubspeed-excel"
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		oacensus run --progress --reports "openness-excel"