diff --git a/.github/workflows/backend-tests-on-docker.yml b/.github/workflows/backend-tests-on-docker.yml index 983d3ace36..1a737afc38 100644 --- a/.github/workflows/backend-tests-on-docker.yml +++ b/.github/workflows/backend-tests-on-docker.yml @@ -106,7 +106,8 @@ jobs: --ignore tests/integration/workflows/test_workflows_tasks_actions.py \ --ignore tests/integration/workflows/test_workflows_cli.py \ --ignore tests/integration/workflows/test_workflow_utils.py \ - --ignore tests/integration/workflows/test_workflow_core_selection.py + --ignore tests/integration/workflows/test_workflow_core_selection.py \ + --ignore tests/integration/workflows/test_matching.py Integration: runs-on: ubuntu-latest diff --git a/inspirehep/modules/workflows/tasks/actions.py b/inspirehep/modules/workflows/tasks/actions.py index e0131578c1..b251bd1f1e 100644 --- a/inspirehep/modules/workflows/tasks/actions.py +++ b/inspirehep/modules/workflows/tasks/actions.py @@ -946,8 +946,8 @@ def _match_lit_author_affiliation(raw_aff): "nested", path="authors", query=( - Q("match", authors__raw_affiliations__value=raw_aff) - & Q("exists", field="authors.affiliations.value") + Q("match", authors__raw_affiliations__value=raw_aff) & + Q("exists", field="authors.affiliations.value") ), inner_hits={}, ) @@ -1037,7 +1037,7 @@ def create_core_selection_wf(obj, eng): record_control_number = obj.data.get('control_number') if not record_control_number: raise MissingRecordControlNumber - if is_core(obj, eng) or not _is_auto_approved(obj) or core_selection_wf_already_created(record_control_number): + if is_core(obj, eng) or not _is_auto_approved(obj) or core_selection_wf_already_created(record_control_number) or check_mark(obj, 'is-update'): LOGGER.info("No core selection needed for %s workflow with record %s", obj.id, record_control_number) return obj diff --git a/inspirehep/modules/workflows/tasks/matching.py b/inspirehep/modules/workflows/tasks/matching.py index afe4163c11..6d3c170272 100644 --- a/inspirehep/modules/workflows/tasks/matching.py +++ b/inspirehep/modules/workflows/tasks/matching.py @@ -227,14 +227,16 @@ def set_wf_not_completed_ids_to_wf(obj, skip_blocked=True, skip_halted=False): skip_halted: boolean, if True, then it skips HALTED workflows when looking for matched workflows """ + def _accept_only_article_wf(base_record, match_result): + return get_value(match_result, '_source._workflow.workflow_class') == "article" def _non_completed(base_record, match_result): - return get_value(match_result, - '_source._workflow.status') != 'COMPLETED' + return get_value(match_result, '_source._workflow.status') != 'COMPLETED' \ + and _accept_only_article_wf(base_record, match_result) def _not_completed_or_halted(base_record, match_result): return get_value(match_result, '_source._workflow.status') not in [ - 'COMPLETED', 'HALTED'] + 'COMPLETED', 'HALTED'] and _accept_only_article_wf(base_record, match_result) def is_workflow_blocked_by_another_workflow(workflow_id): workflow = workflow_object_class.get(workflow_id) diff --git a/tests/integration/workflows/fixtures/1802.08709.pdf b/tests/integration/workflows/fixtures/1802.08709.pdf new file mode 100644 index 0000000000..baccc059db Binary files /dev/null and b/tests/integration/workflows/fixtures/1802.08709.pdf differ diff --git a/tests/integration/workflows/fixtures/1802.08709.tar.gz b/tests/integration/workflows/fixtures/1802.08709.tar.gz new file mode 100644 index 0000000000..d11df69f7e Binary files /dev/null and b/tests/integration/workflows/fixtures/1802.08709.tar.gz differ diff --git a/tests/integration/workflows/helpers/workflow_utils.py b/tests/integration/workflows/helpers/workflow_utils.py index d553aefdfb..aa6e862678 100644 --- a/tests/integration/workflows/helpers/workflow_utils.py +++ b/tests/integration/workflows/helpers/workflow_utils.py @@ -22,22 +22,31 @@ from __future__ import absolute_import, division, print_function +import uuid from copy import deepcopy -from invenio_workflows import workflow_object_class +from invenio_workflows import workflow_object_class, Workflow -def build_workflow(workflow_data, data_type='hep', extra_data=None, **kwargs): - workflow_object = workflow_object_class.create( - data=workflow_data, - data_type=data_type, - extra_data={ +def build_workflow(workflow_data, data_type='hep', extra_data=None, status=None, **kwargs): + extra_data = extra_data or {} + if 'source_data' not in extra_data: + extra_data = { 'source_data': { 'data': deepcopy(workflow_data), - 'extra_data': extra_data or {}, + 'extra_data': extra_data, } - }, + } + wf = Workflow(name='article', extra_data=extra_data, uuid=uuid.uuid4()) + wf.save() + workflow_object = workflow_object_class.create( + data=workflow_data, + data_type=data_type, + extra_data=extra_data, **kwargs ) - workflow_object.save() + if status: + workflow_object.status = status + workflow_object.save(id_workflow=wf.uuid) + return workflow_object diff --git a/tests/integration/workflows/test_article_workflow.py b/tests/integration/workflows/test_article_workflow.py index 26e122317a..9320776df5 100644 --- a/tests/integration/workflows/test_article_workflow.py +++ b/tests/integration/workflows/test_article_workflow.py @@ -22,7 +22,11 @@ from __future__ import absolute_import, division, print_function +import os + import mock +import pkg_resources +import requests_mock from invenio_workflows import ( start, @@ -34,6 +38,7 @@ from inspirehep.modules.workflows.tasks.actions import mark +from inspirehep.modules.workflows.tasks.matching import set_wf_not_completed_ids_to_wf PUBLISHING_RECORD = { '$schema': 'https://labs.inspirehep.net/schemas/records/hep.json', @@ -279,3 +284,69 @@ def test_keywords_are_stored_in_record_when_record_is_core(mocked_robotupload, m mark('core', True)(workflow, None) wf.continue_workflow() assert wf.data['keywords'] == expected_keywords + + +@mock.patch("inspirehep.modules.workflows.tasks.beard.json_api_request", return_value={}) +@mock.patch("inspirehep.modules.workflows.tasks.magpie.json_api_request", return_value={}) +@mock.patch('inspirehep.modules.workflows.tasks.upload.store_record') +@mock.patch('inspirehep.modules.workflows.tasks.submission.submit_rt_ticket', return_value="1234") +@mock.patch('inspirehep.modules.workflows.tasks.submission.send_robotupload') +def test_run_next_wf_is_not_starting_core_selection_wfs(mocked_robotupload, mocked_create_ticket, mocked_store_record, mocked_magpie, mocked_beard, mocked_external_services, workflow_app): + record = { + '$schema': 'https://labs.inspirehep.net/schemas/records/hep.json', + 'titles': [ + { + 'title': 'Title.' + }, + ], + "authors": [ + { + "full_name": "Some author", + } + ], + 'document_type': ['article'], + '_collections': ['Literature'], + 'arxiv_eprints': [{'value': "1802.08709.pdf"}, ], + 'control_number': 1234, + "acquisition_source": { + "datetime": "2021-06-11T06:59:01.928752", + "method": "hepcrawl", + "source": "arXiv", + }, + } + + workflow = build_workflow(record, extra_data={'delay': 10}) + with requests_mock.Mocker() as requests_mocker: + requests_mocker.register_uri( + "GET", 'http://export.arxiv.org/pdf/1802.08709.pdf', + content=pkg_resources.resource_string( + __name__, os.path.join('fixtures', '1802.08709.pdf') + ) + ) + requests_mocker.register_uri("GET", "http://arxiv.org/pdf/1802.08709.pdf", text="") + requests_mocker.register_uri( + "GET", "http://export.arxiv.org/e-print/1802.08709.pdf", + content=pkg_resources.resource_string( + __name__, os.path.join('fixtures', '1802.08709.pdf') + ) + ) + requests_mocker.register_uri("POST", "http://grobid_url.local/api/processHeaderDocument") + start("article", object_id=workflow.id) + + wf = workflow_object_class.get(workflow.id) + mark('auto-approved', True)(workflow, None) + wf.callback_pos = [34, 1, 13] + wf.continue_workflow() + workflow = build_workflow(record) + with requests_mock.Mocker() as requests_mocker: + requests_mocker.register_uri("GET", 'http://export.arxiv.org/pdf/1802.08709.pdf', + content=pkg_resources.resource_string( + __name__, os.path.join('fixtures', '1802.08709.pdf')), ) + requests_mocker.register_uri("GET", "http://arxiv.org/pdf/1802.08709.pdf", text="") + requests_mocker.register_uri("GET", "http://export.arxiv.org/e-print/1802.08709.pdf", + content=pkg_resources.resource_string( + __name__, os.path.join('fixtures', '1802.08709.pdf')), ) + requests_mocker.register_uri("POST", "http://grobid_url.local/api/processHeaderDocument") + start("article", object_id=workflow.id) + matched = set_wf_not_completed_ids_to_wf(workflow) + assert matched == [] diff --git a/tests/integration/workflows/test_arxiv_workflow.py b/tests/integration/workflows/test_arxiv_workflow.py index 08d9783cc3..13de227520 100644 --- a/tests/integration/workflows/test_arxiv_workflow.py +++ b/tests/integration/workflows/test_arxiv_workflow.py @@ -802,10 +802,13 @@ def test_previously_rejected_from_not_fully_harvested_category_is_not_auto_appro assert obj2.status == ObjectStatus.COMPLETED -def test_match_wf_in_error_goes_in_error_state(workflow_app): +def test_match_wf_in_error_goes_in_error_state( + workflow_app, +): record = generate_record() - obj = workflow_object_class.create(data=record, data_type="hep") + wf_id = build_workflow(record).id + obj = workflow_object_class.get(wf_id) obj.status = ObjectStatus.ERROR obj.save() current_search.flush_and_refresh("holdingpen-hep") @@ -815,10 +818,13 @@ def test_match_wf_in_error_goes_in_error_state(workflow_app): start("article", object_id=workflow_id) -def test_match_wf_in_error_goes_in_initial_state(workflow_app): +def test_match_wf_in_error_goes_in_initial_state( + workflow_app, +): record = generate_record() - obj = workflow_object_class.create(data=record, data_type="hep") + wf_id = build_workflow(record).id + obj = workflow_object_class.get(wf_id) obj.status = ObjectStatus.INITIAL obj.save() current_search.flush_and_refresh("holdingpen-hep") diff --git a/tests/integration/tasks/test_matching.py b/tests/integration/workflows/test_matching.py similarity index 85% rename from tests/integration/tasks/test_matching.py rename to tests/integration/workflows/test_matching.py index 4b266b95b5..66bec53d85 100644 --- a/tests/integration/tasks/test_matching.py +++ b/tests/integration/workflows/test_matching.py @@ -31,7 +31,6 @@ ObjectStatus, start, WorkflowEngine, - WorkflowObject, workflow_object_class, ) @@ -41,12 +40,13 @@ match_previously_rejected_wf_in_holdingpen, stop_matched_holdingpen_wfs, ) +from workflow_utils import build_workflow @pytest.fixture def simple_record(app): yield { - 'data': { + 'workflow_data': { '$schema': 'http://localhost:5000/schemas/records/hep.json', '_collections': ['Literature'], 'document_type': ['article'], @@ -74,16 +74,13 @@ def simple_record(app): def test_pending_holdingpen_matches_wf_if_not_completed(app, simple_record): - obj = workflow_object_class.create( - status=ObjectStatus.HALTED, - data_type='hep', - **simple_record - ) - obj_id = obj.id - obj.save() + + workflow = build_workflow(status=ObjectStatus.HALTED, **simple_record) + obj_id = workflow.id current_search.flush_and_refresh('holdingpen-hep') - obj2 = WorkflowObject.create(data_type='hep', **simple_record) + obj2 = build_workflow(**simple_record) + assert match_non_completed_wf_in_holdingpen(obj2, None) assert obj2.extra_data['holdingpen_matches'] == [obj_id] @@ -97,17 +94,14 @@ def test_pending_holdingpen_matches_wf_if_not_completed(app, simple_record): def test_match_previously_rejected_wf_in_holdingpen(app, simple_record): - obj = workflow_object_class.create( - status=ObjectStatus.COMPLETED, - data_type='hep', - **simple_record - ) + obj = build_workflow(status=ObjectStatus.COMPLETED, **simple_record) obj_id = obj.id + obj.extra_data['approved'] = False # reject it obj.save() current_search.flush_and_refresh('holdingpen-hep') - obj2 = WorkflowObject.create(data_type='hep', **simple_record) + obj2 = build_workflow(**simple_record) assert match_previously_rejected_wf_in_holdingpen(obj2, None) assert obj2.extra_data['previously_rejected_matches'] == [obj_id] @@ -121,16 +115,15 @@ def test_match_previously_rejected_wf_in_holdingpen(app, simple_record): def test_has_same_source(app, simple_record): - obj = workflow_object_class.create( + obj = build_workflow( status=ObjectStatus.HALTED, data_type='hep', **simple_record ) obj_id = obj.id - obj.save() current_search.flush_and_refresh('holdingpen-hep') - obj2 = WorkflowObject.create(data_type='hep', **simple_record) + obj2 = build_workflow(**simple_record) match_non_completed_wf_in_holdingpen(obj2, None) same_source_func = has_same_source('holdingpen_matches') @@ -140,8 +133,8 @@ def test_has_same_source(app, simple_record): # change source and match the wf in the holdingpen different_source_rec = deepcopy(simple_record) - different_source_rec['data']['acquisition_source'] = {'source': 'different'} - obj3 = WorkflowObject.create(data_type='hep', **different_source_rec) + different_source_rec['workflow_data']['acquisition_source'] = {'source': 'different'} + obj3 = build_workflow(**different_source_rec) assert match_non_completed_wf_in_holdingpen(obj3, None) assert not same_source_func(obj3, None) @@ -151,7 +144,7 @@ def test_stop_matched_holdingpen_wfs(app, simple_record): # need to run a wf in order to assign to it the wf definition and a uuid # for it - obj = workflow_object_class.create( + obj = build_workflow( data_type='hep', **simple_record ) @@ -163,7 +156,7 @@ def test_stop_matched_holdingpen_wfs(app, simple_record): obj_id = obj.id current_search.flush_and_refresh('holdingpen-hep') - obj2 = WorkflowObject.create(data_type='hep', **simple_record) + obj2 = build_workflow(**simple_record) obj2_id = obj2.id match_non_completed_wf_in_holdingpen(obj2, None) diff --git a/tests/integration/workflows/test_workflow_core_selection.py b/tests/integration/workflows/test_workflow_core_selection.py index f125d9e14d..7857f63a8f 100644 --- a/tests/integration/workflows/test_workflow_core_selection.py +++ b/tests/integration/workflows/test_workflow_core_selection.py @@ -102,6 +102,68 @@ def test_core_selection_wf_starts_after_article_wf_when_no_core(mocked_api_reque assert mock.request_history[1].json() == expected_record_data +@mock.patch('inspirehep.modules.workflows.tasks.submission.send_robotupload') +@mock.patch('inspirehep.modules.workflows.tasks.submission.submit_rt_ticket', return_value="1234") +@mock.patch( + "inspirehep.modules.workflows.tasks.beard.json_api_request", + side_effect=fake_beard_api_request, +) +@mock.patch( + "inspirehep.modules.workflows.tasks.magpie.json_api_request", + side_effect=fake_magpie_api_request, +) +def test_core_selection_wf_is_not_created_when_wf_is_record_update(mocked_api_request_magpie, mocked_api_request_beard, mocked_rt, mocked_send_robotupload, workflow_app, mocked_external_services): + pid_value = 123456 + mocked_url = "{inspirehep_url}/{endpoint}/{control_number}".format( + inspirehep_url=current_app.config.get("INSPIREHEP_URL"), + endpoint='literature', + control_number=pid_value + ) + record = { + "_collections": [ + "Literature" + ], + "titles": [ + {"title": "A title"}, + ], + "document_type": [ + "report" + ], + "collaborations": [ + {"value": "SHIP"} + ], + "control_number": pid_value, + } + + workflow_object = workflow_object_class.create( + data=record, + id_user=None, + data_type='hep' + ) + workflow_object.extra_data['source_data'] = {"data": record, "extra_data": {"source_data": {"data": record}}} + workflow_object.save() + + with override_config(FEATURE_FLAG_ENABLE_REST_RECORD_MANAGEMENT=True): + with requests_mock.Mocker() as mock: + mock.register_uri('GET', mocked_url, json=load_json_record('hep_record_no_core.json')) + mock.register_uri('PUT', "http://web:8000/literature/{control_number}".format(control_number=pid_value), json={"metadata": {"control_number": pid_value}}) + + start("article", object_id=workflow_object.id) + + assert WorkflowObjectModel.query.filter(WorkflowObjectModel.workflow.has(name="core_selection")).count() == 0 + + workflow_object.callback_pos = [34, 1, 13] + # Run task for creating core_selection wf + workflow_object.extra_data['auto-approved'] = True + workflow_object.extra_data['is-update'] = True + workflow_object.save() + + workflow_object.continue_workflow('restart_task') + + assert WorkflowObjectModel.query.filter(WorkflowObjectModel.workflow.has(name="core_selection")).count() == 0 + assert workflow_object.status == ObjectStatus.COMPLETED + + @mock.patch('inspirehep.modules.workflows.tasks.submission.send_robotupload') @mock.patch('inspirehep.modules.workflows.tasks.submission.submit_rt_ticket', return_value="1234") @mock.patch( diff --git a/tests/integration_async/test_workflows.py b/tests/integration_async/test_workflows.py index f60ca1a81f..32a789449c 100644 --- a/tests/integration_async/test_workflows.py +++ b/tests/integration_async/test_workflows.py @@ -22,6 +22,8 @@ from __future__ import absolute_import, division, print_function +import uuid + import time import pytest @@ -31,21 +33,30 @@ from invenio_db import db from invenio_search import current_search -from invenio_workflows import ObjectStatus, start, workflow_object_class +from invenio_workflows import ObjectStatus, start, workflow_object_class, Workflow -def build_workflow(workflow_data, data_type='hep', **kwargs): - workflow_object = workflow_object_class.create( - data_type=data_type, - data=workflow_data, - extra_data={ +def build_workflow(workflow_data, data_type='hep', extra_data=None, status=None, **kwargs): + extra_data = extra_data or {} + if 'source_data' not in extra_data: + extra_data = { 'source_data': { 'data': deepcopy(workflow_data), - 'extra_data': {}, + 'extra_data': extra_data, } - }, + } + wf = Workflow(name='article', extra_data=extra_data, uuid=uuid.uuid4()) + wf.save() + workflow_object = workflow_object_class.create( + data=workflow_data, + data_type=data_type, + extra_data=extra_data, **kwargs ) + if status: + workflow_object.status = status + workflow_object.save(id_workflow=wf.uuid) + return workflow_object @@ -59,7 +70,7 @@ def check_wf_state(workflow_id, desired_status, max_time=550): # Travis fails a """ start = datetime.now() end = start + timedelta(seconds=max_time) - while (True): + while True: db.session.close() if workflow_object_class.get(workflow_id).status == desired_status: return @@ -101,8 +112,7 @@ def test_wf_not_stops_when_blocking_another_one_after_restarted_on_running( 'keywords': [{'value': 'none'}] } - workflow = build_workflow(record) - workflow.status = ObjectStatus.RUNNING + workflow = build_workflow(record, status=ObjectStatus.RUNNING) workflow.save() record['titles'][0]['source'] = 'something_else' workflow2 = build_workflow(record)