Skip to content

Commit

Permalink
Fix empty orgName in authors.xml
Browse files Browse the repository at this point in the history
  • Loading branch information
nooraangelva committed Jun 2, 2022
1 parent 0174570 commit 345e47f
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 1 deletion.
2 changes: 1 addition & 1 deletion inspirehep/modules/workflows/tasks/arxiv.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ def extract_authors_from_xml(xml_content):
# Getting all the names for affiliated organizations using the organization ids from author
for affiliation in author.xpath("./authorAffiliations/authorAffiliation/@organizationid").getall():
orgName = content.xpath('//organizations/Organization[@id="{}"]/orgName[@source="spiresICN" or @source="INSPIRE" and text()!="" ]/text()'.format(affiliation)).get()
if not re.match(undefined_or_none_value_regex, orgName):
if orgName and not re.match(undefined_or_none_value_regex, orgName):
affiliations.append(orgName)

# Getting all the affiliations_identifiers for affiliated organizations using the organization ids from author
Expand Down
Binary file added tests/unit/workflows/fixtures/2205.14864.tar.gz
Binary file not shown.
48 changes: 48 additions & 0 deletions tests/unit/workflows/test_workflows_tasks_arxiv.py
Original file line number Diff line number Diff line change
Expand Up @@ -1128,3 +1128,51 @@ def test_arxiv_author_no_none_in_institution_affiliations():
validate(expected_authors, authors_subschema)

assert expected_authors[0] == obj.data['authors'][14]


def test_arxiv_author_no_organization_name():
schema = load_schema('hep')
eprints_subschema = schema['properties']['arxiv_eprints']

filename = pkg_resources.resource_filename(
__name__, os.path.join('fixtures', '2205.14864.tar.gz'))

data = {
'$schema': 'http://localhost:5000/hep.json',
'arxiv_eprints': [
{
'categories': [
'hep-ex',
],
'value': '2205.14864',
},
],
}
validate(data['arxiv_eprints'], eprints_subschema)

extra_data = {}
files = MockFiles({
'2205.14864.tar.gz': AttrDict({
'file': AttrDict({
'uri': filename,
})
})
})

obj = MockObj(data, extra_data, files=files)
eng = MockEng()

arxiv_author_list(obj, eng)

authors_subschema = schema['properties']['authors']
expected_authors = [
{
'ids': [
{'value': 'INSPIRE-00149777', 'schema': u'INSPIRE ID'},
],
'full_name': u'Biermann, Peter',
},
]
validate(expected_authors, authors_subschema)

assert expected_authors[0] == obj.data['authors'][29]

0 comments on commit 345e47f

Please sign in to comment.