Skip to content

Commit

Permalink
OA: green access parsing fix
Browse files Browse the repository at this point in the history
  • Loading branch information
ErnestaP committed Jul 31, 2024
1 parent 1ba37db commit e9f5b14
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 32 deletions.
15 changes: 5 additions & 10 deletions dags/open_access/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,11 @@ def parse_subset_green_access(records):
is_it_wanted_record_by_540_publication = (
not is_subset_540_publication_golden_access(datafields_540)
)
green_access_by_field = (
is_it_wanted_record_by_856 or is_it_wanted_record_by_540_preprint
)

if (
is_it_wanted_record_by_856
or is_it_wanted_record_by_540_preprint
or is_it_wanted_record_by_540_publication
):
if green_access_by_field and is_it_wanted_record_by_540_publication:
filtered_records.append(record)

return filtered_records
Expand All @@ -106,11 +105,7 @@ def parse_subset_golden_access(records):
datafields_540 = record.findall("datafield/[@tag='540']")
if datafields_540 is None:
continue
is_it_wanted_record_by_540_publication = (
is_subset_540_publication_golden_access(datafields_540)
)

if is_it_wanted_record_by_540_publication:
if is_subset_540_publication_golden_access(datafields_540):
filtered_records.append(record)
return filtered_records

Expand Down
25 changes: 3 additions & 22 deletions tests/open_access/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,33 +25,13 @@
"2882298",
]

expected_green = [
"2894668",
"2891489",
"2891488",
"2891487",
"2888511",
"2888151",
"2886038",
"2884472",
"2884471",
"2884470",
"2884469",
"2883672",
"2882429",
"2882335",
"2882328",
"2882327",
"2882324",
"2882322",
"2882311",
"2882298",
]
expected_green = ["2891489", "2891487", "2886038", "2884472", "2884469"]


def test_get_golden_access_records_dois(shared_datadir):
with open(shared_datadir / "search.xml") as file:
records_ids = get_golden_access_records_ids(file.read())
print()
assert records_ids == expected_golden


Expand Down Expand Up @@ -107,4 +87,5 @@ def test_parse_subset_540_publications(shared_datadir):
def test_get_green_access_records_dois(shared_datadir):
with open(shared_datadir / "search.xml") as file:
records_ids = get_green_access_records_ids(file.read())
print(records_ids)
assert records_ids == expected_green

0 comments on commit e9f5b14

Please sign in to comment.