From e9f5b149c4b11bd57ce0706e4272f8d4a86542e3 Mon Sep 17 00:00:00 2001 From: ErnestaP Date: Wed, 31 Jul 2024 17:02:06 +0200 Subject: [PATCH] OA: green access parsing fix --- dags/open_access/parsers.py | 15 +++++---------- tests/open_access/test_parser.py | 25 +++---------------------- 2 files changed, 8 insertions(+), 32 deletions(-) diff --git a/dags/open_access/parsers.py b/dags/open_access/parsers.py index 7230322..5b82694 100644 --- a/dags/open_access/parsers.py +++ b/dags/open_access/parsers.py @@ -89,12 +89,11 @@ def parse_subset_green_access(records): is_it_wanted_record_by_540_publication = ( not is_subset_540_publication_golden_access(datafields_540) ) + green_access_by_field = ( + is_it_wanted_record_by_856 or is_it_wanted_record_by_540_preprint + ) - if ( - is_it_wanted_record_by_856 - or is_it_wanted_record_by_540_preprint - or is_it_wanted_record_by_540_publication - ): + if green_access_by_field and is_it_wanted_record_by_540_publication: filtered_records.append(record) return filtered_records @@ -106,11 +105,7 @@ def parse_subset_golden_access(records): datafields_540 = record.findall("datafield/[@tag='540']") if datafields_540 is None: continue - is_it_wanted_record_by_540_publication = ( - is_subset_540_publication_golden_access(datafields_540) - ) - - if is_it_wanted_record_by_540_publication: + if is_subset_540_publication_golden_access(datafields_540): filtered_records.append(record) return filtered_records diff --git a/tests/open_access/test_parser.py b/tests/open_access/test_parser.py index f2dc7c7..bced88b 100644 --- a/tests/open_access/test_parser.py +++ b/tests/open_access/test_parser.py @@ -25,33 +25,13 @@ "2882298", ] -expected_green = [ - "2894668", - "2891489", - "2891488", - "2891487", - "2888511", - "2888151", - "2886038", - "2884472", - "2884471", - "2884470", - "2884469", - "2883672", - "2882429", - "2882335", - "2882328", - "2882327", - "2882324", - "2882322", - "2882311", - "2882298", -] +expected_green = ["2891489", "2891487", "2886038", "2884472", "2884469"] def test_get_golden_access_records_dois(shared_datadir): with open(shared_datadir / "search.xml") as file: records_ids = get_golden_access_records_ids(file.read()) + print() assert records_ids == expected_golden @@ -107,4 +87,5 @@ def test_parse_subset_540_publications(shared_datadir): def test_get_green_access_records_dois(shared_datadir): with open(shared_datadir / "search.xml") as file: records_ids = get_green_access_records_ids(file.read()) + print(records_ids) assert records_ids == expected_green