From 75cc606c70e005a4b7c0a78737071a7587f3d05d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Szymon=20=C5=81opaciuk?= Date: Wed, 17 Jan 2018 13:39:40 +0100 Subject: [PATCH] IOP Spider: make use of NLM parser MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Szymon Łopaciuk --- hepcrawl/extractors/nlm.py | 158 ------------- hepcrawl/spiders/iop_spider.py | 92 +++----- t.json | 0 tests/functional/iop/fixtures/test_iop.json | 124 +++++------ tests/functional/iop/test_iop.py | 3 +- tests/unit/test_iop.py | 231 -------------------- x.json | 1 + 7 files changed, 86 insertions(+), 523 deletions(-) delete mode 100644 hepcrawl/extractors/nlm.py delete mode 100644 t.json delete mode 100644 tests/unit/test_iop.py create mode 100644 x.json diff --git a/hepcrawl/extractors/nlm.py b/hepcrawl/extractors/nlm.py deleted file mode 100644 index dddc1e13..00000000 --- a/hepcrawl/extractors/nlm.py +++ /dev/null @@ -1,158 +0,0 @@ -# -*- coding: utf-8 -*- -# -# This file is part of hepcrawl. -# Copyright (C) 2016, 2017 CERN. -# -# hepcrawl is a free software; you can redistribute it and/or modify it -# under the terms of the Revised BSD License; see LICENSE file for -# more details. - -"""Common extraction from the NLM XML format.""" - -from __future__ import absolute_import, division, print_function - - -class NLM(object): - """Special extractions for NLM formats.""" - - @staticmethod - def get_authors(node): - """Get the authors.""" - authors = [] - for author in node.xpath("./AuthorList//Author"): - surname = author.xpath("./LastName/text()").extract_first() - firstname = author.xpath("./FirstName/text()").extract_first() - middlename = author.xpath("./MiddleName/text()").extract_first() - affiliations = author.xpath(".//Affiliation/text()").extract() - - if not surname: - surname = "" - given_names = "" - if firstname and middlename: - given_names = "{} {}".format(firstname, middlename) - elif firstname: - given_names = firstname - - auth_dict = {} - auth_dict["surname"] = surname - auth_dict["given_names"] = given_names - if affiliations: - auth_dict["affiliations"] = [ - {"value": aff} for aff in affiliations] - authors.append(auth_dict) - - return authors - - @staticmethod - def get_collections(doctype): - """Return the article's collection.""" - collections = ["HEP", "Citeable", "Published"] - if doctype: - if doctype == "Review": - collections += ["Review"] - if "conference" in doctype.lower(): - collections += ["ConferencePaper"] - return collections - - @staticmethod - def get_dois(node): - """Get DOI.""" - dois = node.xpath( - ".//ArticleIdList/ArticleId[@IdType='doi']/text()").extract() - if not dois: - dois = node.xpath( - ".//ELocationID[@EIdType='doi']/text()").extract() - - return dois - - @staticmethod - def get_date_published(node): - """Publication date.""" - year = node.xpath(".//Journal/PubDate/Year/text()").extract_first() - month = node.xpath(".//Journal/PubDate/Month/text()").extract_first() - day = node.xpath(".//Journal/PubDate/Day/text()").extract_first() - - date_published = "" - if year: - date_published = year - if month: - date_published += "-" + month - if day: - date_published += "-" + day - - return date_published - - @staticmethod - def get_pub_status(node): - """Publication status. - - .. note:: - Cases of publication status: - - * aheadofprint - * ppublish - * epublish - * received - * accepted - * revised - * ecollection - """ - pubstatus = node.xpath(".//Journal/PubDate/@PubStatus").extract_first() - - return pubstatus - - @staticmethod - def get_doctype(node): - """Publication type. - - .. note:: - Cases of publication type: - - * Addresses - * Bibliography - * Case Reports - * Classical Article - * Clinical Conference - * Clinical Trial - * Congresses - * Consensus Development Conference - * Consensus Development Conference, NIH - * Corrected and Republished Article - * Editorial - * Festschrift - * Guideline - * Interview - * Journal Article - * Lectures - * etter - * Meta-Analysis - * News - * Newspaper Article - * Observational Study - * Patient Education Handout - * Practice Guideline - * Published Erratum - * Retraction of Publication - * Review - * Video-Audio Media - * Webcasts - """ - pubtype = node.xpath(".//PublicationType/text()").extract_first() - return pubtype - - @staticmethod - def get_page_numbers(node): - """Get page numbers and number of pages.""" - - fpage = node.xpath(".//FirstPage/text()").extract_first() - lpage = node.xpath(".//LastPage/text()").extract_first() - try: - page_nr = str(int(lpage) - int(fpage) + 1) - except (ValueError, TypeError): - page_nr = None - - return ( - fpage, - lpage, - page_nr, - ) diff --git a/hepcrawl/spiders/iop_spider.py b/hepcrawl/spiders/iop_spider.py index 1601b04d..91b05aa0 100644 --- a/hepcrawl/spiders/iop_spider.py +++ b/hepcrawl/spiders/iop_spider.py @@ -19,13 +19,11 @@ from scrapy.spiders import XMLFeedSpider from .common import StatefulSpider -from ..extractors.nlm import NLM -from ..items import HEPRecord -from ..loaders import HEPLoader +from ..parsers import NLMParser from ..utils import ParsedItem -class IOPSpider(StatefulSpider, XMLFeedSpider, NLM): +class IOPSpider(StatefulSpider, XMLFeedSpider): """IOPSpider crawler. This spider should first be able to harvest files from `IOP STACKS`_. @@ -152,7 +150,7 @@ def get_pdf_path(self, vol, issue, fpage): if pattern in pdf_path: return os.path.join(self.pdf_files, pdf_path) - def add_document(self, file_path, hidden, fulltext): + def make_document(self, file_path, hidden, fulltext): """Create a structured dictionary and add to 'files' item.""" file_dict = { "hidden": hidden, @@ -164,72 +162,40 @@ def add_document(self, file_path, hidden, fulltext): return file_dict def parse_node(self, response, node): - """Parse the record XML and create a HEPRecord.""" - record = HEPLoader(item=HEPRecord(), selector=node, response=response) - - pub_status = self.get_pub_status(node) - if pub_status in {"aheadofprint", "received"}: - return None - - fpage, lpage, page_nr = self.get_page_numbers(node) - volume = node.xpath(".//Journal/Volume/text()").extract_first() - issue = node.xpath(".//Journal/Issue/text()").extract_first() - - record.add_value("journal_fpage", fpage) - record.add_value("journal_lpage", lpage) - record.add_xpath('abstract', ".//Abstract") - record.add_xpath("title", ".//ArticleTitle") - record.add_value('authors', self.get_authors(node)) - journal_title = node.xpath( - ".//Journal/JournalTitle/text()").extract_first() - record.add_value("journal_title", journal_title) - record.add_value("journal_issue", issue) - record.add_value("journal_volume", volume) - record.add_xpath("journal_issn", ".//Journal/Issn/text()") - record.add_dois(dois_values=self.get_dois(node)) - - journal_year = node.xpath(".//Journal/PubDate/Year/text()").extract() - if journal_year: - record.add_value("journal_year", int(journal_year[0])) - - record.add_xpath("language", ".//Language/text()") - record.add_value("page_nr", page_nr) - record.add_value('date_published', self.get_date_published(node)) - record.add_xpath('copyright_statement', - "./CopyrightInformation/text()") - record.add_xpath('copyright_holder', "//Journal/PublisherName/text()") - record.add_xpath( - 'free_keywords', "ObjectList/Object[@Type='keyword']/Param[@Name='value']/text()") - - record.add_xpath("related_article_doi", "//Replaces[@IdType='doi']/text()") - doctype = self.get_doctype(node) # FIXME: should these be mapped? - record.add_value("journal_doctype", doctype) - record.add_value('collections', self.get_collections(doctype)) - - xml_file_path = response.url - record.add_value( - "documents", - self.add_document(xml_file_path, hidden=True, fulltext=True), + """Parse individual Article nodes to create a HEPRecord.""" + parser = NLMParser(node, source='IOP') + + xml_document = self.make_document( + file_path=response.url, + hidden=True, + fulltext=False, ) + parser.builder.add_document(**xml_document) + if self.pdf_files: - pdf_file_path = self.get_pdf_path(volume, issue, fpage) + pdf_file_path = self.get_pdf_path( + parser.journal_volume, + parser.journal_issue, + parser.page_start + ) if pdf_file_path: - if doctype and "erratum" in doctype.lower(): + if parser.material == "erratum": fulltext = False else: - fulltext = True - if journal_title in self.OPEN_ACCESS_JOURNALS: + fulltext = True # FIXME + if parser.journal_title in self.OPEN_ACCESS_JOURNALS: hidden = False else: hidden = True - record.add_value( - "documents", - self.add_document(pdf_file_path, hidden=hidden, fulltext=fulltext), + + pdf_document = self.make_document( + file_path=pdf_file_path, + hidden=hidden, + fulltext=fulltext ) + parser.builder.add_document(**pdf_document) - parsed_item = ParsedItem( - record=record.load_item(), - record_format='hepcrawl', + return ParsedItem( + record=parser.parse(), + record_format='hep', ) - - return parsed_item diff --git a/t.json b/t.json deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/functional/iop/fixtures/test_iop.json b/tests/functional/iop/fixtures/test_iop.json index 1b620dd2..c806d9a5 100644 --- a/tests/functional/iop/fixtures/test_iop.json +++ b/tests/functional/iop/fixtures/test_iop.json @@ -1,47 +1,30 @@ [ { - "refereed": true, "documents": [ { - "fulltext": true, + "fulltext": false, "description": "IOP", "url": "file:///code/tests/functional/iop/fixtures/test_iop.xml", - "source": "iop", + "source": "IOP", "key": "test_iop.xml", "hidden": true } ], - "curated": false, - "_collections": [ - "Literature" - ], - "dois": [ - { - "source": "iop", - "value": "10.1088/1742-6596/851/1/012001" - } - ], "acquisition_source": { "source": "iop", "method": "hepcrawl", "submission_number": "None", - "datetime": "2018-01-12T13:06:39.088639" + "datetime": "2018-01-17T12:25:25.086248" }, - "titles": [ - { - "source": "iop", - "title": "Response of optically stimulated luminescence dosimeters subjected to X-rays in diagnostic energy range" - } - ], - "copyright": [ - { - "holder": "Institute of Physics" - } + "_collections": [ + "Literature" ], + "citeable": true, "authors": [ { - "affiliations": [ + "raw_affiliations": [ { + "source": "IOP", "value": "Department of Physics, Universiti Teknologi Malaysia, Johor, Bahru Johor, 81300, Malaysia. Centre for Energy Research and Training, Ahmadu Bello University, Zaria, Kaduna State, P.M.B. 1014, Nigeria." } ], @@ -63,13 +46,27 @@ "full_name": "Salehhon, N." } ], + "titles": [ + { + "source": "IOP", + "title": "Response of optically stimulated luminescence dosimeters subjected to X-rays in diagnostic energy range" + } + ], + "dois": [ + { + "source": "IOP", + "material": "publication", + "value": "10.1088/1742-6596/851/1/012001" + } + ], "publication_info": [ { + "journal_title": "J. Phys.: Conf. Ser.", + "material": "publication", "journal_volume": "851", - "page_start": "012001", "year": 2017, - "journal_issue": "1", - "journal_title": "J. Phys.: Conf. Ser." + "page_start": "012001", + "journal_issue": "1" } ], "document_type": [ @@ -77,61 +74,39 @@ ], "abstracts": [ { - "source": "iop", - "value": "The use of optically stimulated luminescence (OSL) for dosimetry applications has recently increased considerably due to availability of commercial OSL dosimeters (nanoDots) for clinical use. The OSL dosimeter has a great potential to be used in clinical dosimetry because of its prevailing advantages in both handling and application. However, utilising nanoDot OSLDs for dose measurement in diagnostic radiology can only be guaranteed when the performance and characteristics of the dosimeters are apposite. In the present work, we examined the response of commercially available nanoDot OSLD (Al$_{2}$O$_{3}$:C) subjected to X-rays in general radiography. The nanoDots response with respect to reproducibility, dose linearity and signal depletion were analysed using microStar reader (Landauer, Inc., Glenwood, IL). Irradiations were performed free-in-air using 70, 80 and 120 kV tube voltages and tube currents ranging from 10 \u2013 100 mAs. The results showed that the nanoDots exhibit good linearity and reproducibility when subjected to diagnostic X-rays, with coefficient of variations (CV) ranging between 2.3% to 3.5% representing a good reproducibility. The results also indicated average of 1% signal reduction per readout. Hence, the nanoDots showed a promising potential for dose measurement in general X-ray procedure." - } - ], - "imprints": [ - { - "date": "2017" + "source": "IOP", + "value": "The use of optically stimulated luminescence (OSL) for dosimetry applications has recently increased considerably due to availability of commercial OSL dosimeters (nanoDots) for clinical use. The OSL dosimeter has a great potential to be used in clinical dosimetry because of its prevailing advantages in both handling and application. However, utilising nanoDot OSLDs for dose measurement in diagnostic radiology can only be guaranteed when the performance and characteristics of the dosimeters are apposite. In the present work, we examined the response of commercially available nanoDot OSLD (Al2O3:C) subjected to X-rays in general radiography. The nanoDots response with respect to reproducibility, dose linearity and signal depletion were analysed using microStar reader (Landauer, Inc., Glenwood, IL). Irradiations were performed free-in-air using 70, 80 and 120 kV tube voltages and tube currents ranging from 10 – 100 mAs. The results showed that the nanoDots exhibit good linearity and reproducibility when subjected to diagnostic X-rays, with coefficient of variations (CV) ranging between 2.3% to 3.5% representing a good reproducibility. The results also indicated average of 1% signal reduction per readout. Hence, the nanoDots showed a promising potential for dose measurement in general X-ray procedure." } ], - "citeable": true + "curated": false }, { - "refereed": true, "documents": [ { - "fulltext": true, + "fulltext": false, "description": "IOP", "url": "file:///code/tests/functional/iop/fixtures/test_iop.xml", - "source": "iop", + "source": "IOP", "key": "test_iop.xml", "hidden": true } ], - "curated": false, - "_collections": [ - "Literature" - ], - "dois": [ - { - "source": "iop", - "value": "10.1088/1361-6560/aa6be8" - } - ], "acquisition_source": { "source": "iop", "method": "hepcrawl", "submission_number": "None", - "datetime": "2018-01-12T13:06:39.192671" + "datetime": "2018-01-17T12:25:25.148679" }, - "titles": [ - { - "source": "iop", - "title": "Magnetic resonance imaging with hyperpolarized agents: methods and applications" - } - ], - "copyright": [ - { - "holder": "Institute of Physics" - } + "_collections": [ + "Literature" ], + "citeable": true, "authors": [ { - "affiliations": [ + "raw_affiliations": [ { - "value": "Department of Medical Physics, University of Wisconsin\u2013Madison, Madison, WI, United States of America." + "source": "IOP", + "value": "Department of Medical Physics, University of Wisconsin–Madison, Madison, WI, United States of America." } ], "full_name": "Adamson, Erin B." @@ -146,10 +121,24 @@ "full_name": "Fain, Sean B." } ], + "titles": [ + { + "source": "IOP", + "title": "Magnetic resonance imaging with hyperpolarized agents: methods and applications" + } + ], + "dois": [ + { + "source": "IOP", + "material": "publication", + "value": "10.1088/1361-6560/aa6be8" + } + ], "publication_info": [ { "page_end": "R123", "journal_title": "Phys. Med. Biol.", + "material": "publication", "journal_volume": "62", "year": 2017, "page_start": "R81", @@ -161,15 +150,10 @@ ], "abstracts": [ { - "source": "iop", - "value": "In the past decade, hyperpolarized (HP) contrast agents have been under active development for MRI applications to address the twin challenges of functional and quantitative imaging. Both HP helium ($^{3}$He) and xenon ($^{129}$Xe) gases have reached the stage where they are under study in clinical research. HP $^{129}$Xe, in particular, is poised for larger scale clinical research to investigate asthma, chronic obstructive pulmonary disease, and fibrotic lung diseases. With advances in polarizer technology and unique capabilities for imaging of $^{129}$Xe gas exchange into lung tissue and blood, HP $^{129}$Xe MRI is attracting new attention. In parallel, HP $^{13}$C and $^{15}$N MRI methods have steadily advanced in a wide range of pre-clinical research applications for imaging metabolism in various cancers and cardiac disease. The HP [1-$^{13}$C] pyruvate MRI technique, in particular, has undergone phase I trials in prostate cancer and is poised for investigational new drug trials at multiple institutions in cancer and cardiac applications. This review treats the methodology behind both HP gases and HP $^{13}$C and $^{15}$N liquid state agents. Gas and liquid phase HP agents share similar technologies for achieving non-equilibrium polarization outside the field of the MRI scanner, strategies for image data acquisition, and translational challenges in moving from pre-clinical to clinical research. To cover the wide array of methods and applications, this review is organized by numerical section into (1) a brief introduction, (2) the physical and biological properties of the most common polarized agents with a brief summary of applications and methods of polarization, (3) methods for image acquisition and reconstruction specific to improving data acquisition efficiency for HP MRI, (4) the main physical properties that enable unique measures of physiology or metabolic pathways, followed by a more detailed review of the literature describing the use of HP agents to study: (5) metabolic pathways in cancer and cardiac disease and (6) lung function in both pre-clinical and clinical research studies, concluding with (7) some future directions and challenges, and (8) an overall summary." - } - ], - "imprints": [ - { - "date": "2017" + "source": "IOP", + "value": "In the past decade, hyperpolarized (HP) contrast agents have been under active development for MRI applications to address the twin challenges of functional and quantitative imaging. Both HP helium (3He) and xenon (129Xe) gases have reached the stage where they are under study in clinical research. HP 129Xe, in particular, is poised for larger scale clinical research to investigate asthma, chronic obstructive pulmonary disease, and fibrotic lung diseases. With advances in polarizer technology and unique capabilities for imaging of 129Xe gas exchange into lung tissue and blood, HP 129Xe MRI is attracting new attention. In parallel, HP 13C and 15N MRI methods have steadily advanced in a wide range of pre-clinical research applications for imaging metabolism in various cancers and cardiac disease. The HP [1-13C] pyruvate MRI technique, in particular, has undergone phase I trials in prostate cancer and is poised for investigational new drug trials at multiple institutions in cancer and cardiac applications. This review treats the methodology behind both HP gases and HP 13C and 15N liquid state agents. Gas and liquid phase HP agents share similar technologies for achieving non-equilibrium polarization outside the field of the MRI scanner, strategies for image data acquisition, and translational challenges in moving from pre-clinical to clinical research. To cover the wide array of methods and applications, this review is organized by numerical section into (1) a brief introduction, (2) the physical and biological properties of the most common polarized agents with a brief summary of applications and methods of polarization, (3) methods for image acquisition and reconstruction specific to improving data acquisition efficiency for HP MRI, (4) the main physical properties that enable unique measures of physiology or metabolic pathways, followed by a more detailed review of the literature describing the use of HP agents to study: (5) metabolic pathways in cancer and cardiac disease and (6) lung function in both pre-clinical and clinical research studies, concluding with (7) some future directions and challenges, and (8) an overall summary." } ], - "citeable": true + "curated": false } ] diff --git a/tests/functional/iop/test_iop.py b/tests/functional/iop/test_iop.py index 08e3397d..d8d6a7a6 100644 --- a/tests/functional/iop/test_iop.py +++ b/tests/functional/iop/test_iop.py @@ -103,5 +103,6 @@ def test_iop(expected_results, config): expected_results, key=lambda x: x['titles'][0]['title'] ) - + import json + json.dump(gotten_results, open('x.json', 'w')) assert gotten_results == expected_results diff --git a/tests/unit/test_iop.py b/tests/unit/test_iop.py deleted file mode 100644 index 8ea7b166..00000000 --- a/tests/unit/test_iop.py +++ /dev/null @@ -1,231 +0,0 @@ -# -*- coding: utf-8 -*- -# -# This file is part of hepcrawl. -# Copyright (C) 2016, 2017 CERN. -# -# hepcrawl is a free software; you can redistribute it and/or modify it -# under the terms of the Revised BSD License; see LICENSE file for -# more details. - -from __future__ import absolute_import, division, print_function, unicode_literals - -import os - -import six - -import pytest - -from hepcrawl.spiders import iop_spider - -from hepcrawl.testlib.fixtures import ( - fake_response_from_file, - fake_response_from_string, - get_node, - get_test_suite_path, -) - -TEST_PDF_DIR = get_test_suite_path( - 'responses', - 'iop', - 'pdf', -) - - -@pytest.fixture -def record(): - """Return results generator from the WSP spider.""" - spider = iop_spider.IOPSpider() - response = fake_response_from_file('iop/xml/test_standard.xml') - node = get_node(spider, "Article", response) - spider.pdf_files = TEST_PDF_DIR - - parsed_item = spider.parse_node(response, node) - assert parsed_item - assert parsed_item.record - - return parsed_item.record - - -def test_title(record): - """Test extracting title.""" - title = 'A Modified Lynch Syndrome Screening Algorithm in Colon Cancer: BRAF Immunohistochemistry Is Efficacious and Cost Beneficial.' - assert "title" in record - assert record["title"] == title - - -def test_date_published(record): - """Test extracting date_published.""" - assert "date_published" in record - assert record["date_published"] == '2015-03' - - -def test_page_nr(record): - """Test extracting page_nr""" - assert "journal_fpage" in record - assert "journal_lpage" in record - assert record["journal_fpage"] == '336' - assert record["journal_lpage"] == '343' - - -def test_free_keywords(record): - """Test extracting free_keywords""" - keywords = [u'BRAF', u'MLH1', - u'Immunohistochemistry', u'Cost-benefit analysis'] - - assert "free_keywords" in record - for keyword in record["free_keywords"]: - assert keyword["source"] == "author" - assert keyword["value"] in keywords - - -def test_dois(record): - """Test extracting dois.""" - assert record["dois"] - assert record["dois"][0]["value"] == '10.1309/AJCP4D7RXOBHLKGJ' - - -def test_collections(record): - """Test extracting collections.""" - collections = ['HEP', 'Citeable', 'Published'] - assert record["collections"] - for collection in record["collections"]: - assert collection["primary"] in collections - - -def test_publication_info(record): - """Test extracting dois.""" - journal_title = "Am J Clin Pathol" - journal_year = 2015 - journal_volume = "143" - journal_issue = "3" - journal_issn = "1943-7722" - assert "journal_title" in record - assert record["journal_title"] == journal_title - assert "journal_year" in record - assert record["journal_year"] == journal_year - assert "journal_volume" in record - assert record["journal_volume"] == journal_volume - assert "journal_issue" in record - assert record["journal_issue"] == journal_issue - assert "journal_issn" in record - assert record["journal_issn"][0] == journal_issn - - -def test_copyrights(record): - """Test extracting copyright.""" - copyright_holder = "American Society for Clinical Pathology" - copyright_statement = "Copyright\xa9 by the American Society for \n Clinical Pathology" - assert "copyright_holder" in record - assert record["copyright_holder"] == copyright_holder - assert "copyright_statement" in record - assert record["copyright_statement"] == copyright_statement - - -def test_files(record): - """Test files dictionary.""" - pdf_filename = "143_3_336.pdf" - - assert "documents" in record - assert record["documents"][1]["hidden"] - assert record["documents"][1]["fulltext"] - assert record["documents"][1]["url"] == os.path.join(TEST_PDF_DIR, pdf_filename) - assert record["documents"][1]["key"] == pdf_filename - - -@pytest.fixture -def erratum_open_access_record(): - """Return results generator from the WSP spider.""" - spider = iop_spider.IOPSpider() - body = """ - -
- - Institute of Physics - J. Phys.: Conf. Ser. - 143 - 3 - - 336 - Published Erratum -
-
- """ - response = fake_response_from_string(body) - node = get_node(spider, "Article", response) - spider.pdf_files = get_test_suite_path( - 'responses', - 'iop', - 'pdf', - ) - - parsed_item = spider.parse_node(response, node) - assert parsed_item - assert parsed_item.record - - return parsed_item.record - - -def test_files_erratum_open_access_record(erratum_open_access_record): - """Test files dict with open access journal with erratum article.""" - pdf_filename = "143_3_336.pdf" - assert "documents" in erratum_open_access_record - assert not erratum_open_access_record["documents"][1]["hidden"] - assert not erratum_open_access_record["documents"][1]["fulltext"] - assert erratum_open_access_record["documents"][1]["url"] == ( - os.path.join(TEST_PDF_DIR, pdf_filename) - ) - assert erratum_open_access_record["documents"][1]["key"] == pdf_filename - - -def test_not_published_record(): - """Not-published paper should result in nothing.""" - spider = iop_spider.IOPSpider() - body = """ - -
- - - 2015 - 03 - - -
-
- """ - response = fake_response_from_string(body) - node = get_node(spider, "Article", response) - spider.pdf_files = get_test_suite_path( - 'responses', - 'iop', - 'pdf', - ) - records = spider.parse_node(response, node) - assert records is None - - -@pytest.fixture -def tarfile(): - """Return path to test tar.gz file.""" - return get_test_suite_path( - 'responses', - 'iop', - 'packages', - 'test.tar.gz', - ) - - -def test_tarfile(tarfile, tmpdir): - """Test untarring a tar.gz package with a test PDF file.""" - spider = iop_spider.IOPSpider() - pdf_files = spider.untar_files(tarfile, six.text_type(tmpdir)) - assert len(pdf_files) == 1 - assert "test_143_3_336.pdf" in pdf_files[0] - - -def test_handle_package(tarfile): - """Test getting the target folder name for pdf files.""" - spider = iop_spider.IOPSpider() - tarfile = "file://" + tarfile - target_folder = spider.handle_package(tarfile) - - assert target_folder diff --git a/x.json b/x.json new file mode 100644 index 00000000..afa237b5 --- /dev/null +++ b/x.json @@ -0,0 +1 @@ +[{"documents": [{"fulltext": false, "description": "IOP", "url": "file:///code/tests/functional/iop/fixtures/test_iop.xml", "source": "IOP", "key": "test_iop.xml", "hidden": true}], "acquisition_source": {"source": "iop", "method": "hepcrawl", "submission_number": "5652c7f6190f11e79e8000224dabeaad", "datetime": "2017-04-03T10:26:40.365216"}, "_collections": ["Literature"], "dois": [{"source": "IOP", "material": "publication", "value": "10.1088/1361-6560/aa6be8"}], "titles": [{"source": "IOP", "title": "Magnetic resonance imaging with hyperpolarized agents: methods and applications"}], "authors": [{"raw_affiliations": [{"source": "IOP", "value": "Department of Medical Physics, University of Wisconsin\u2013Madison, Madison, WI, United States of America."}], "full_name": "Adamson, Erin B."}, {"full_name": "Ludwig, Kai D."}, {"full_name": "Mummy, David G."}, {"full_name": "Fain, Sean B."}], "publication_info": [{"page_end": "R123", "journal_title": "Phys. Med. Biol.", "material": "publication", "journal_volume": "62", "year": 2017, "page_start": "R81", "journal_issue": "13"}], "document_type": ["article"], "abstracts": [{"source": "IOP", "value": "In the past decade, hyperpolarized (HP) contrast agents have been under active development for MRI applications to address the twin challenges of functional and quantitative imaging. Both HP helium (3He) and xenon (129Xe) gases have reached the stage where they are under study in clinical research. HP 129Xe, in particular, is poised for larger scale clinical research to investigate asthma, chronic obstructive pulmonary disease, and fibrotic lung diseases. With advances in polarizer technology and unique capabilities for imaging of 129Xe gas exchange into lung tissue and blood, HP 129Xe MRI is attracting new attention. In parallel, HP 13C and 15N MRI methods have steadily advanced in a wide range of pre-clinical research applications for imaging metabolism in various cancers and cardiac disease. The HP [1-13C] pyruvate MRI technique, in particular, has undergone phase I trials in prostate cancer and is poised for investigational new drug trials at multiple institutions in cancer and cardiac applications. This review treats the methodology behind both HP gases and HP 13C and 15N liquid state agents. Gas and liquid phase HP agents share similar technologies for achieving non-equilibrium polarization outside the field of the MRI scanner, strategies for image data acquisition, and translational challenges in moving from pre-clinical to clinical research. To cover the wide array of methods and applications, this review is organized by numerical section into (1) a brief introduction, (2) the physical and biological properties of the most common polarized agents with a brief summary of applications and methods of polarization, (3) methods for image acquisition and reconstruction specific to improving data acquisition efficiency for HP MRI, (4) the main physical properties that enable unique measures of physiology or metabolic pathways, followed by a more detailed review of the literature describing the use of HP agents to study: (5) metabolic pathways in cancer and cardiac disease and (6) lung function in both pre-clinical and clinical research studies, concluding with (7) some future directions and challenges, and (8) an overall summary."}], "curated": false, "citeable": true}, {"documents": [{"fulltext": false, "description": "IOP", "url": "file:///code/tests/functional/iop/fixtures/test_iop.xml", "source": "IOP", "key": "test_iop.xml", "hidden": true}], "acquisition_source": {"source": "iop", "method": "hepcrawl", "submission_number": "5652c7f6190f11e79e8000224dabeaad", "datetime": "2017-04-03T10:26:40.365216"}, "_collections": ["Literature"], "dois": [{"source": "IOP", "material": "publication", "value": "10.1088/1742-6596/851/1/012001"}], "titles": [{"source": "IOP", "title": "Response of optically stimulated luminescence dosimeters subjected to X-rays in diagnostic energy range"}], "authors": [{"raw_affiliations": [{"source": "IOP", "value": "Department of Physics, Universiti Teknologi Malaysia, Johor, Bahru Johor, 81300, Malaysia. Centre for Energy Research and Training, Ahmadu Bello University, Zaria, Kaduna State, P.M.B. 1014, Nigeria."}], "full_name": "Musa, Y."}, {"full_name": "Hashim, S."}, {"full_name": "Karim, M. K A"}, {"full_name": "Bakar, K.A."}, {"full_name": "Ang, W.C."}, {"full_name": "Salehhon, N."}], "publication_info": [{"journal_title": "J. Phys.: Conf. Ser.", "material": "publication", "journal_volume": "851", "year": 2017, "page_start": "012001", "journal_issue": "1"}], "document_type": ["article"], "abstracts": [{"source": "IOP", "value": "The use of optically stimulated luminescence (OSL) for dosimetry applications has recently increased considerably due to availability of commercial OSL dosimeters (nanoDots) for clinical use. The OSL dosimeter has a great potential to be used in clinical dosimetry because of its prevailing advantages in both handling and application. However, utilising nanoDot OSLDs for dose measurement in diagnostic radiology can only be guaranteed when the performance and characteristics of the dosimeters are apposite. In the present work, we examined the response of commercially available nanoDot OSLD (Al2O3:C) subjected to X-rays in general radiography. The nanoDots response with respect to reproducibility, dose linearity and signal depletion were analysed using microStar reader (Landauer, Inc., Glenwood, IL). Irradiations were performed free-in-air using 70, 80 and 120 kV tube voltages and tube currents ranging from 10 \u2013 100 mAs. The results showed that the nanoDots exhibit good linearity and reproducibility when subjected to diagnostic X-rays, with coefficient of variations (CV) ranging between 2.3% to 3.5% representing a good reproducibility. The results also indicated average of 1% signal reduction per readout. Hence, the nanoDots showed a promising potential for dose measurement in general X-ray procedure."}], "curated": false, "citeable": true}] \ No newline at end of file