diff --git a/inspirehep/modules/workflows/tasks/actions.py b/inspirehep/modules/workflows/tasks/actions.py index e0131578c1..cc238751d8 100644 --- a/inspirehep/modules/workflows/tasks/actions.py +++ b/inspirehep/modules/workflows/tasks/actions.py @@ -1037,7 +1037,7 @@ def create_core_selection_wf(obj, eng): record_control_number = obj.data.get('control_number') if not record_control_number: raise MissingRecordControlNumber - if is_core(obj, eng) or not _is_auto_approved(obj) or core_selection_wf_already_created(record_control_number): + if is_core(obj, eng) or not _is_auto_approved(obj) or core_selection_wf_already_created(record_control_number) or check_mark(obj, 'is-update'): LOGGER.info("No core selection needed for %s workflow with record %s", obj.id, record_control_number) return obj diff --git a/inspirehep/modules/workflows/tasks/matching.py b/inspirehep/modules/workflows/tasks/matching.py index afe4163c11..6d3c170272 100644 --- a/inspirehep/modules/workflows/tasks/matching.py +++ b/inspirehep/modules/workflows/tasks/matching.py @@ -227,14 +227,16 @@ def set_wf_not_completed_ids_to_wf(obj, skip_blocked=True, skip_halted=False): skip_halted: boolean, if True, then it skips HALTED workflows when looking for matched workflows """ + def _accept_only_article_wf(base_record, match_result): + return get_value(match_result, '_source._workflow.workflow_class') == "article" def _non_completed(base_record, match_result): - return get_value(match_result, - '_source._workflow.status') != 'COMPLETED' + return get_value(match_result, '_source._workflow.status') != 'COMPLETED' \ + and _accept_only_article_wf(base_record, match_result) def _not_completed_or_halted(base_record, match_result): return get_value(match_result, '_source._workflow.status') not in [ - 'COMPLETED', 'HALTED'] + 'COMPLETED', 'HALTED'] and _accept_only_article_wf(base_record, match_result) def is_workflow_blocked_by_another_workflow(workflow_id): workflow = workflow_object_class.get(workflow_id) diff --git a/tests/integration/workflows/fixtures/1802.08709.pdf b/tests/integration/workflows/fixtures/1802.08709.pdf new file mode 100644 index 0000000000..baccc059db Binary files /dev/null and b/tests/integration/workflows/fixtures/1802.08709.pdf differ diff --git a/tests/integration/workflows/fixtures/1802.08709.tar.gz b/tests/integration/workflows/fixtures/1802.08709.tar.gz new file mode 100644 index 0000000000..d11df69f7e Binary files /dev/null and b/tests/integration/workflows/fixtures/1802.08709.tar.gz differ diff --git a/tests/integration/workflows/test_article_workflow.py b/tests/integration/workflows/test_article_workflow.py index 26e122317a..ff03af0f4f 100644 --- a/tests/integration/workflows/test_article_workflow.py +++ b/tests/integration/workflows/test_article_workflow.py @@ -22,7 +22,11 @@ from __future__ import absolute_import, division, print_function +import os + import mock +import pkg_resources +import requests_mock from invenio_workflows import ( start, @@ -34,6 +38,7 @@ from inspirehep.modules.workflows.tasks.actions import mark +from inspirehep.modules.workflows.tasks.matching import set_wf_not_completed_ids_to_wf PUBLISHING_RECORD = { '$schema': 'https://labs.inspirehep.net/schemas/records/hep.json', @@ -279,3 +284,61 @@ def test_keywords_are_stored_in_record_when_record_is_core(mocked_robotupload, m mark('core', True)(workflow, None) wf.continue_workflow() assert wf.data['keywords'] == expected_keywords + +@mock.patch("inspirehep.modules.workflows.tasks.beard.json_api_request", return_value={}) +@mock.patch("inspirehep.modules.workflows.tasks.magpie.json_api_request", return_value={}) +@mock.patch('inspirehep.modules.workflows.tasks.upload.store_record') +@mock.patch('inspirehep.modules.workflows.tasks.submission.submit_rt_ticket', return_value="1234") +@mock.patch('inspirehep.modules.workflows.tasks.submission.send_robotupload') +def test_run_next_wf_is_not_starting_core_selection_wfs(mocked_robotupload, mocked_create_ticket, mocked_store_record, mocked_magpie, mocked_beard, mocked_external_services, workflow_app): + record = { + '$schema': 'https://labs.inspirehep.net/schemas/records/hep.json', + 'titles': [ + { + 'title': 'Title.' + }, + ], + "authors": [ + { + "full_name": "Some author", + } + ], + 'document_type': ['article'], + '_collections': ['Literature'], + 'arxiv_eprints': [{'value': "1802.08709.pdf"}, ], + 'control_number': 1234, + "acquisition_source": { + "datetime": "2021-06-11T06:59:01.928752", + "method": "hepcrawl", + "source": "arXiv", + }, + } + + workflow = build_workflow(record, extra_data={'delay': 10}) + with requests_mock.Mocker() as requests_mocker: + requests_mocker.register_uri("GET", 'http://export.arxiv.org/pdf/1802.08709.pdf', content=pkg_resources.resource_string( + __name__, os.path.join('fixtures', '1802.08709.pdf')),) + requests_mocker.register_uri("GET", "http://arxiv.org/pdf/1802.08709.pdf", text="") + requests_mocker.register_uri("GET", "http://export.arxiv.org/e-print/1802.08709.pdf", + content=pkg_resources.resource_string( + __name__, os.path.join('fixtures', '1802.08709.pdf')), ) + requests_mocker.register_uri("POST", "http://grobid_url.local/api/processHeaderDocument") + start("article", object_id=workflow.id) + + wf = workflow_object_class.get(workflow.id) + mark('auto-approved', True)(workflow, None) + wf.callback_pos = [34, 1, 13] + wf.continue_workflow() + workflow = build_workflow(record) + with requests_mock.Mocker() as requests_mocker: + requests_mocker.register_uri("GET", 'http://export.arxiv.org/pdf/1802.08709.pdf', + content=pkg_resources.resource_string( + __name__, os.path.join('fixtures', '1802.08709.pdf')), ) + requests_mocker.register_uri("GET", "http://arxiv.org/pdf/1802.08709.pdf", text="") + requests_mocker.register_uri("GET", "http://export.arxiv.org/e-print/1802.08709.pdf", + content=pkg_resources.resource_string( + __name__, os.path.join('fixtures', '1802.08709.pdf')), ) + requests_mocker.register_uri("POST", "http://grobid_url.local/api/processHeaderDocument") + start("article", object_id=workflow.id) + matched = set_wf_not_completed_ids_to_wf(workflow) + assert matched == [] diff --git a/tests/integration/workflows/test_arxiv_workflow.py b/tests/integration/workflows/test_arxiv_workflow.py index 08d9783cc3..1277d6b139 100644 --- a/tests/integration/workflows/test_arxiv_workflow.py +++ b/tests/integration/workflows/test_arxiv_workflow.py @@ -802,10 +802,37 @@ def test_previously_rejected_from_not_fully_harvested_category_is_not_auto_appro assert obj2.status == ObjectStatus.COMPLETED -def test_match_wf_in_error_goes_in_error_state(workflow_app): +@mock.patch( + "inspirehep.modules.workflows.tasks.arxiv.download_file_to_workflow", + side_effect=fake_download_file, +) +@mock.patch( + "inspirehep.modules.workflows.tasks.actions.download_file_to_workflow", + side_effect=fake_download_file, +) +@mock.patch("inspirehep.modules.workflows.tasks.arxiv.is_pdf_link", return_value=True) +@mock.patch( + "inspirehep.modules.workflows.tasks.beard.json_api_request", + side_effect=fake_beard_api_request, +) +@mock.patch( + "inspirehep.modules.workflows.tasks.magpie.json_api_request", + side_effect=fake_magpie_api_request, +) +def test_match_wf_in_error_goes_in_error_state( + mocked_api_request_magpie, + mocked_api_request_beard, + mocked_is_pdf_link, + mocked_package_download, + mocked_arxiv_download, + workflow_app, + mocked_external_services +): record = generate_record() - obj = workflow_object_class.create(data=record, data_type="hep") + wf_id = build_workflow(record).id + start("article", object_id=wf_id) + obj = workflow_object_class.get(wf_id) obj.status = ObjectStatus.ERROR obj.save() current_search.flush_and_refresh("holdingpen-hep") @@ -815,10 +842,37 @@ def test_match_wf_in_error_goes_in_error_state(workflow_app): start("article", object_id=workflow_id) -def test_match_wf_in_error_goes_in_initial_state(workflow_app): +@mock.patch( + "inspirehep.modules.workflows.tasks.arxiv.download_file_to_workflow", + side_effect=fake_download_file, +) +@mock.patch( + "inspirehep.modules.workflows.tasks.actions.download_file_to_workflow", + side_effect=fake_download_file, +) +@mock.patch("inspirehep.modules.workflows.tasks.arxiv.is_pdf_link", return_value=True) +@mock.patch( + "inspirehep.modules.workflows.tasks.beard.json_api_request", + side_effect=fake_beard_api_request, +) +@mock.patch( + "inspirehep.modules.workflows.tasks.magpie.json_api_request", + side_effect=fake_magpie_api_request, +) +def test_match_wf_in_error_goes_in_initial_state( + mocked_api_request_magpie, + mocked_api_request_beard, + mocked_is_pdf_link, + mocked_package_download, + mocked_arxiv_download, + workflow_app, + mocked_external_services +): record = generate_record() - obj = workflow_object_class.create(data=record, data_type="hep") + wf_id = build_workflow(record).id + start("article", object_id=wf_id) + obj = workflow_object_class.get(wf_id) obj.status = ObjectStatus.INITIAL obj.save() current_search.flush_and_refresh("holdingpen-hep") diff --git a/tests/integration/workflows/test_workflow_core_selection.py b/tests/integration/workflows/test_workflow_core_selection.py index f125d9e14d..7857f63a8f 100644 --- a/tests/integration/workflows/test_workflow_core_selection.py +++ b/tests/integration/workflows/test_workflow_core_selection.py @@ -102,6 +102,68 @@ def test_core_selection_wf_starts_after_article_wf_when_no_core(mocked_api_reque assert mock.request_history[1].json() == expected_record_data +@mock.patch('inspirehep.modules.workflows.tasks.submission.send_robotupload') +@mock.patch('inspirehep.modules.workflows.tasks.submission.submit_rt_ticket', return_value="1234") +@mock.patch( + "inspirehep.modules.workflows.tasks.beard.json_api_request", + side_effect=fake_beard_api_request, +) +@mock.patch( + "inspirehep.modules.workflows.tasks.magpie.json_api_request", + side_effect=fake_magpie_api_request, +) +def test_core_selection_wf_is_not_created_when_wf_is_record_update(mocked_api_request_magpie, mocked_api_request_beard, mocked_rt, mocked_send_robotupload, workflow_app, mocked_external_services): + pid_value = 123456 + mocked_url = "{inspirehep_url}/{endpoint}/{control_number}".format( + inspirehep_url=current_app.config.get("INSPIREHEP_URL"), + endpoint='literature', + control_number=pid_value + ) + record = { + "_collections": [ + "Literature" + ], + "titles": [ + {"title": "A title"}, + ], + "document_type": [ + "report" + ], + "collaborations": [ + {"value": "SHIP"} + ], + "control_number": pid_value, + } + + workflow_object = workflow_object_class.create( + data=record, + id_user=None, + data_type='hep' + ) + workflow_object.extra_data['source_data'] = {"data": record, "extra_data": {"source_data": {"data": record}}} + workflow_object.save() + + with override_config(FEATURE_FLAG_ENABLE_REST_RECORD_MANAGEMENT=True): + with requests_mock.Mocker() as mock: + mock.register_uri('GET', mocked_url, json=load_json_record('hep_record_no_core.json')) + mock.register_uri('PUT', "http://web:8000/literature/{control_number}".format(control_number=pid_value), json={"metadata": {"control_number": pid_value}}) + + start("article", object_id=workflow_object.id) + + assert WorkflowObjectModel.query.filter(WorkflowObjectModel.workflow.has(name="core_selection")).count() == 0 + + workflow_object.callback_pos = [34, 1, 13] + # Run task for creating core_selection wf + workflow_object.extra_data['auto-approved'] = True + workflow_object.extra_data['is-update'] = True + workflow_object.save() + + workflow_object.continue_workflow('restart_task') + + assert WorkflowObjectModel.query.filter(WorkflowObjectModel.workflow.has(name="core_selection")).count() == 0 + assert workflow_object.status == ObjectStatus.COMPLETED + + @mock.patch('inspirehep.modules.workflows.tasks.submission.send_robotupload') @mock.patch('inspirehep.modules.workflows.tasks.submission.submit_rt_ticket', return_value="1234") @mock.patch(