diff --git a/inspirehep/config.py b/inspirehep/config.py index 693f337e52..23d9fb3f5a 100644 --- a/inspirehep/config.py +++ b/inspirehep/config.py @@ -1706,6 +1706,7 @@ QUEUE_TO_FUNCTIONAL_CATEGORY_MAPPING = { "HEP_add_user": "Literature submissions", "HAL_curation": "HAL curation", + "UK_curation": "UK curation", "HEP_curation": "arXiv curation", "HEP_curation_jlab": "arXiv curation", "HEP_publishing": "Publisher curation", diff --git a/inspirehep/modules/workflows/tasks/actions.py b/inspirehep/modules/workflows/tasks/actions.py index a929418d32..386558dead 100644 --- a/inspirehep/modules/workflows/tasks/actions.py +++ b/inspirehep/modules/workflows/tasks/actions.py @@ -1131,6 +1131,15 @@ def check_if_france_in_raw_affiliations(obj, eng): return True +def check_if_core_and_uk_in_fulltext(obj, eng): + fulltext = get_fulltext(obj) + if not fulltext or not is_core(obj, eng): + return + regex = re.compile( + r"\b(UK|United\s+Kingdom|England|Scotland|Northern\s+Ireland)\b", re.UNICODE | re.IGNORECASE) + return regex.search(fulltext) + + def load_record_from_hep(obj, wf): control_number = obj.data['control_number'] pid_type = get_pid_type_from_schema(obj.data['$schema']) diff --git a/inspirehep/modules/workflows/workflows/article.py b/inspirehep/modules/workflows/workflows/article.py index 4bea8a9eb3..30aeec146f 100644 --- a/inspirehep/modules/workflows/workflows/article.py +++ b/inspirehep/modules/workflows/workflows/article.py @@ -76,7 +76,8 @@ create_core_selection_wf, check_if_france_in_fulltext, check_if_france_in_raw_affiliations, - link_institutions_with_affiliations + link_institutions_with_affiliations, + check_if_core_and_uk_in_fulltext ) from inspirehep.modules.workflows.tasks.classifier import ( @@ -261,15 +262,26 @@ is_marked('is-update'), IF_ELSE( is_arxiv_paper, - IF( - check_if_france_in_fulltext, - create_ticket( - template='literaturesuggest/tickets/curation_core.html', - queue='HAL_curation', - context_factory=curation_ticket_context, - ticket_id_key='curation_ticket_id', + [ + IF( + check_if_france_in_fulltext, + create_ticket( + template='literaturesuggest/tickets/curation_core.html', + queue='HAL_curation', + context_factory=curation_ticket_context, + ticket_id_key='curation_ticket_id', + ), ), - ), + IF( + check_if_core_and_uk_in_fulltext, + create_ticket( + template='literaturesuggest/tickets/curation_core.html', + queue='UK_curation', + context_factory=curation_ticket_context, + ticket_id_key='curation_ticket_id', + ), + ) + ], IF( check_if_france_in_raw_affiliations, create_ticket( diff --git a/tests/integration/workflows/test_article_workflow.py b/tests/integration/workflows/test_article_workflow.py index 251bee90af..03bfa880bc 100644 --- a/tests/integration/workflows/test_article_workflow.py +++ b/tests/integration/workflows/test_article_workflow.py @@ -152,11 +152,16 @@ def test_create_ticket_when_source_is_not_publishing( "inspirehep.modules.workflows.tasks.actions.check_if_france_in_fulltext", return_value=False, ) +@mock.patch( + "inspirehep.modules.workflows.tasks.actions.check_if_core_and_uk_in_fulltext", + return_value=False, +) @mock.patch("inspirehep.modules.workflows.tasks.submission.send_robotupload") def test_set_fermilab_collection_from_report_number( mocked_api_request_magpie, mocked_api_request_classifier, mocked_robotupload, + mocked_check_if_core_and_uk_in_fulltext, mocked_external_services, workflow_app, ): diff --git a/tests/unit/workflows/test_workflows_actions.py b/tests/unit/workflows/test_workflows_actions.py index 4287b96656..c9a268b7b8 100644 --- a/tests/unit/workflows/test_workflows_actions.py +++ b/tests/unit/workflows/test_workflows_actions.py @@ -38,7 +38,7 @@ from inspirehep.modules.workflows.tasks.actions import jlab_ticket_needed, load_from_source_data, \ extract_authors_from_pdf, is_suitable_for_pdf_authors_extraction, is_fermilab_report, add_collection, \ - check_if_france_in_fulltext, check_if_france_in_raw_affiliations + check_if_france_in_fulltext, check_if_france_in_raw_affiliations, check_if_core_and_uk_in_fulltext def test_match_approval_gets_match_recid(): @@ -579,3 +579,81 @@ def test_check_if_france_in_fulltext_when_france_in_text_body(mocked_get_documen france_in_fulltext = check_if_france_in_fulltext(obj, eng) assert france_in_fulltext + + +@patch("inspirehep.modules.workflows.tasks.actions.get_document_in_workflow") +def test_check_if_uk_in_fulltext_not_core(mocked_get_document, app): + fake_grobid_response = "England" + obj = MagicMock() + obj.data = { + 'core': False + } + obj.extra_data = {} + eng = None + new_config = {"GROBID_URL": "http://grobid_url.local"} + with patch.dict(current_app.config, new_config): + with requests_mock.Mocker() as requests_mocker: + requests_mocker.register_uri( + 'POST', 'http://grobid_url.local/api/processFulltextDocument', + text=fake_grobid_response, + headers={'content-type': 'application/xml'}, + status_code=200, + ) + with tempfile.NamedTemporaryFile() as tmp_file: + mocked_get_document.return_value.__enter__.return_value = tmp_file.name + uk_in_fulltext_and_core = check_if_core_and_uk_in_fulltext( + obj, eng) + + assert not uk_in_fulltext_and_core + + +@patch("inspirehep.modules.workflows.tasks.actions.get_document_in_workflow") +def test_check_if_uk_in_fulltext_core(mocked_get_document, app): + fake_grobid_response = "England" + obj = MagicMock() + obj.data = { + 'core': True + } + obj.extra_data = {} + eng = None + new_config = {"GROBID_URL": "http://grobid_url.local"} + with patch.dict(current_app.config, new_config): + with requests_mock.Mocker() as requests_mocker: + requests_mocker.register_uri( + 'POST', 'http://grobid_url.local/api/processFulltextDocument', + text=fake_grobid_response, + headers={'content-type': 'application/xml'}, + status_code=200, + ) + with tempfile.NamedTemporaryFile() as tmp_file: + mocked_get_document.return_value.__enter__.return_value = tmp_file.name + uk_in_fulltext_and_core = check_if_core_and_uk_in_fulltext( + obj, eng) + + assert uk_in_fulltext_and_core + + +@patch("inspirehep.modules.workflows.tasks.actions.get_document_in_workflow") +def test_check_if_uk_in_fulltext_core_case_insensitive(mocked_get_document, app): + fake_grobid_response = "unitEd KiNgdOm" + obj = MagicMock() + obj.data = { + 'core': True + } + obj.extra_data = {} + eng = None + new_config = {"GROBID_URL": "http://grobid_url.local"} + with patch.dict(current_app.config, new_config): + with requests_mock.Mocker() as requests_mocker: + requests_mocker.register_uri( + 'POST', 'http://grobid_url.local/api/processFulltextDocument', + text=fake_grobid_response, + headers={'content-type': 'application/xml'}, + status_code=200, + ) + with tempfile.NamedTemporaryFile() as tmp_file: + mocked_get_document.return_value.__enter__.return_value = tmp_file.name + uk_in_fulltext_and_core = check_if_core_and_uk_in_fulltext( + obj, eng) + + assert uk_in_fulltext_and_core