Skip to content

Commit

Permalink
workflows: add ger curation step
Browse files Browse the repository at this point in the history
  • Loading branch information
PascalEgn committed Sep 13, 2024
1 parent 0206b12 commit 9867933
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 9 deletions.
1 change: 1 addition & 0 deletions inspirehep/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1707,6 +1707,7 @@
"HEP_add_user": "Literature submissions",
"HAL_curation": "HAL curation",
"UK_curation": "UK curation",
"GER_curation": "German curation",
"HEP_curation": "arXiv curation",
"HEP_curation_jlab": "arXiv curation",
"HEP_publishing": "Publisher curation",
Expand Down
15 changes: 15 additions & 0 deletions inspirehep/modules/workflows/tasks/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1131,6 +1131,21 @@ def check_if_france_in_raw_affiliations(obj, eng):
return True


def check_if_germany_in_fulltext(obj, eng):
fulltext = get_fulltext(obj)
if not fulltext:
return
regex = re.compile(r"\b(Germany|Deutschland)\b", re.UNICODE | re.IGNORECASE)
return regex.search(fulltext)


def check_if_germany_in_raw_affiliations(obj, eng):
raw_affs = get_value(obj.data, 'authors.raw_affiliations.value', [])
for aff in chain.from_iterable(raw_affs):
if "germany" in aff.lower() or "deutschland" in aff.lower():
return True


def check_if_core_and_uk_in_fulltext(obj, eng):
fulltext = get_fulltext(obj)
if not fulltext or not is_core(obj, eng):
Expand Down
38 changes: 30 additions & 8 deletions inspirehep/modules/workflows/workflows/article.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@
create_core_selection_wf,
check_if_france_in_fulltext,
check_if_france_in_raw_affiliations,
check_if_germany_in_fulltext,
check_if_germany_in_raw_affiliations,
link_institutions_with_affiliations,
check_if_core_and_uk_in_fulltext
)
Expand Down Expand Up @@ -271,6 +273,15 @@
ticket_id_key='curation_ticket_id',
),
),
IF(
check_if_germany_in_fulltext,
create_ticket(
template='literaturesuggest/tickets/curation_core.html',
queue='GER_curation',
context_factory=curation_ticket_context,
ticket_id_key='curation_ticket_id',
),
),
IF(
check_if_core_and_uk_in_fulltext,
create_ticket(
Expand All @@ -281,15 +292,26 @@
),
)
],
IF(
check_if_france_in_raw_affiliations,
create_ticket(
template='literaturesuggest/tickets/curation_core.html',
queue='HAL_curation',
context_factory=curation_ticket_context,
ticket_id_key='curation_ticket_id',
[
IF(
check_if_france_in_raw_affiliations,
create_ticket(
template='literaturesuggest/tickets/curation_core.html',
queue='HAL_curation',
context_factory=curation_ticket_context,
ticket_id_key='curation_ticket_id',
),
),
)
IF(
check_if_germany_in_raw_affiliations,
create_ticket(
template='literaturesuggest/tickets/curation_core.html',
queue='GER_curation',
context_factory=curation_ticket_context,
ticket_id_key='curation_ticket_id',
),
)
]
)
),
IF_NOT(
Expand Down
4 changes: 4 additions & 0 deletions tests/integration/workflows/test_article_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,10 @@ def test_create_ticket_when_source_is_not_publishing(
"inspirehep.modules.workflows.tasks.actions.check_if_france_in_fulltext",
return_value=False,
)
@mock.patch(
"inspirehep.modules.workflows.tasks.actions.check_if_germany_in_fulltext",
return_value=False,
)
@mock.patch(
"inspirehep.modules.workflows.tasks.actions.check_if_core_and_uk_in_fulltext",
return_value=False,
Expand Down
91 changes: 90 additions & 1 deletion tests/unit/workflows/test_workflows_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@

from inspirehep.modules.workflows.tasks.actions import jlab_ticket_needed, load_from_source_data, \
extract_authors_from_pdf, is_suitable_for_pdf_authors_extraction, is_fermilab_report, add_collection, \
check_if_france_in_fulltext, check_if_france_in_raw_affiliations, check_if_core_and_uk_in_fulltext
check_if_france_in_fulltext, check_if_france_in_raw_affiliations, check_if_germany_in_fulltext, \
check_if_germany_in_raw_affiliations, check_if_core_and_uk_in_fulltext


def test_match_approval_gets_match_recid():
Expand Down Expand Up @@ -581,6 +582,94 @@ def test_check_if_france_in_fulltext_when_france_in_text_body(mocked_get_documen
assert france_in_fulltext


def test_check_if_germany_in_affiliations(app):
obj = MagicMock()
obj.data = {
'authors': [
{"full_name": "author 1",
"raw_affiliations": [{"value": "Laboratoire de Physique des 2 Infinis Irene Joliot-Curie (IJCLab), CNRS, Université Paris-Saclay, Orsay, 91405, Germany"}]

}
]
}

obj.extra_data = {}
eng = None
result = check_if_germany_in_raw_affiliations(obj, eng)
assert result


def test_check_if_deutschland_in_affiliations(app):
obj = MagicMock()
obj.data = {
'authors': [
{"full_name": "author 1",
"raw_affiliations": [{"value": "Laboratoire de Physique des 2 Infinis Irene Joliot-Curie (IJCLab), CNRS, Université Paris-Saclay, Orsay, 91405, Deutschland"}]

}
]
}

obj.extra_data = {}
eng = None
result = check_if_germany_in_raw_affiliations(obj, eng)
assert result


@patch("inspirehep.modules.workflows.tasks.actions.get_document_in_workflow")
def test_check_if_germany_in_fulltext_when_germany_in_text_body(mocked_get_document, app):
fake_grobid_response = "<country key=\"DE\">Germany</country>"
obj = MagicMock()
obj.data = {
'core': False
}
obj.extra_data = {}
eng = None
new_config = {"GROBID_URL": "http://grobid_url.local"}

new_config = {"GROBID_URL": "http://grobid_url.local"}
with patch.dict(current_app.config, new_config):
with requests_mock.Mocker() as requests_mocker:
requests_mocker.register_uri(
'POST', 'http://grobid_url.local/api/processFulltextDocument',
text=fake_grobid_response,
headers={'content-type': 'application/xml'},
status_code=200,
)
with tempfile.NamedTemporaryFile() as tmp_file:
mocked_get_document.return_value.__enter__.return_value = tmp_file.name
germany_in_fulltext = check_if_germany_in_fulltext(obj, eng)

assert germany_in_fulltext


@patch("inspirehep.modules.workflows.tasks.actions.get_document_in_workflow")
def test_check_if_germany_in_fulltext_when_deutschland_in_text_body(mocked_get_document, app):
fake_grobid_response = "<country key=\"DE\">Deutschland</country>"
obj = MagicMock()
obj.data = {
'core': False
}
obj.extra_data = {}
eng = None
new_config = {"GROBID_URL": "http://grobid_url.local"}

new_config = {"GROBID_URL": "http://grobid_url.local"}
with patch.dict(current_app.config, new_config):
with requests_mock.Mocker() as requests_mocker:
requests_mocker.register_uri(
'POST', 'http://grobid_url.local/api/processFulltextDocument',
text=fake_grobid_response,
headers={'content-type': 'application/xml'},
status_code=200,
)
with tempfile.NamedTemporaryFile() as tmp_file:
mocked_get_document.return_value.__enter__.return_value = tmp_file.name
germany_in_fulltext = check_if_germany_in_fulltext(obj, eng)

assert germany_in_fulltext


@patch("inspirehep.modules.workflows.tasks.actions.get_document_in_workflow")
def test_check_if_uk_in_fulltext_not_core(mocked_get_document, app):
fake_grobid_response = "<country key=\"UK\">England</country>"
Expand Down

0 comments on commit 9867933

Please sign in to comment.