diff --git a/.fireq.json b/.fireq.json deleted file mode 100644 index 20c11c174..000000000 --- a/.fireq.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "superdesk_branch": "release/2.6" -} diff --git a/server/features/search_autocomplete.feature b/server/features/search_autocomplete.feature new file mode 100644 index 000000000..f6b3a3a14 --- /dev/null +++ b/server/features/search_autocomplete.feature @@ -0,0 +1,230 @@ +Feature: Planning autocomplete + Background: Setup config + Given config update + """ + { + "ARCHIVE_AUTOCOMPLETE": true, + "ARCHIVE_AUTOCOMPLETE_DAYS": 999 + } + """ + + @auth + Scenario: Get distinct planning sluglines + Given "planning" + """ + [{ + "_id": "plan1", + "guid": "plan1", + "versioncreated": "#DATE#", + "state": "scheduled", + "pubstatus": "usable", + "slugline": "planning-1", + "language": "en", + "languages": ["en", "fr", "de"], + "planning_date": "2021-01-11T16:00:00.000Z", + "translations": [ + {"field": "slugline", "language": "en", "value": "planning-en-test"}, + {"field": "slugline", "language": "de", "value": "planning-de-test"} + ] + }] + """ + # Suggests only the value in translations if populated + When we get "/archive_autocomplete?field=slugline&language=en" + Then we get list with 1 items + """ + {"_items": [{"value": "planning-en-test", "count": 1}]} + """ + # Suggests base field if language translation not populated + When we get "/archive_autocomplete?field=slugline&language=fr" + Then we get list with 1 items + """ + {"_items": [{"value": "planning-1", "count": 1}]} + """ + + @auth + Scenario: Get distinct coverage sluglines + Given "planning" + """ + [{ + "_id": "plan1", + "guid": "plan1", + "versioncreated": "#DATE#", + "state": "scheduled", + "pubstatus": "usable", + "slugline": "planning-1", + "language": "en", + "languages": ["en", "fr", "de"], + "planning_date": "2021-01-11T16:00:00.000Z", + "coverages": [{ + "planning": {"language": "en", "slugline": "coverage-en-slugline"}, + "workflow_state": "draft", + "news_coverage_status": {"qcode": "ncostat:int"} + }, { + "planning": {"language": "fr", "slugline": "coverage-fr-slugline"}, + "workflow_state": "draft", + "news_coverage_status": {"qcode": "ncostat:int"} + }] + }] + """ + # Suggests both Planning and Coverage sluglines + When we get "/archive_autocomplete?field=slugline&language=en" + Then we get list with 2 items + """ + {"_items": [ + {"value": "planning-1", "count": 1}, + {"value": "coverage-en-slugline", "count": 1} + ]} + """ + + @auth + Scenario: Get distinct event sluglines + Given "events" + """ + [{ + "_id": "event1", + "state": "scheduled", + "pubstatus": "usable", + "slugline": "event-1", + "language": "en", + "languages": ["en", "fr", "de"], + "dates": { + "start": "2025-01-03T00:00:00+0000", + "end": "2025-01-04T00:00:00+0000" + }, + "translations": [ + {"field": "slugline", "language": "en", "value": "event-en-test"}, + {"field": "slugline", "language": "de", "value": "event-de-test"} + ] + }] + """ + # Suggests only the value in translations if populated + When we get "/archive_autocomplete?field=slugline&language=en" + Then we get list with 1 items + """ + {"_items": [{"value": "event-en-test", "count": 1}]} + """ + # Suggests base field if language translation not populated + When we get "/archive_autocomplete?field=slugline&language=fr" + Then we get list with 1 items + """ + {"_items": [{"value": "event-1", "count": 1}]} + """ + + @auth + Scenario: Can control what resources are used for suggestions + Given "planning" + """ + [{ + "_id": "plan1", + "guid": "plan1", + "versioncreated": "#DATE#", + "state": "scheduled", + "pubstatus": "usable", + "slugline": "planning-1", + "language": "en", + "languages": ["en", "fr", "de"], + "planning_date": "2021-01-11T16:00:00.000Z", + "translations": [ + {"field": "slugline", "language": "en", "value": "planning-en-test"}, + {"field": "slugline", "language": "de", "value": "planning-de-test"} + ] + }] + """ + Given "events" + """ + [{ + "_id": "event1", + "state": "scheduled", + "pubstatus": "usable", + "slugline": "event-1", + "language": "en", + "languages": ["en", "fr", "de"], + "dates": { + "start": "2025-01-03T00:00:00+0000", + "end": "2025-01-04T00:00:00+0000" + }, + "translations": [ + {"field": "slugline", "language": "en", "value": "event-en-test"}, + {"field": "slugline", "language": "de", "value": "event-de-test"} + ] + }] + """ + # Provides suggestions from all resources if argument not provided + When we get "/archive_autocomplete?field=slugline&language=en" + Then we get list with 2 items + """ + {"_items": [ + {"value": "planning-en-test", "count": 1}, + {"value": "event-en-test", "count": 1} + ]} + """ + # Doesn't provide planning suggestions if ``planning`` not in resources argument + When we get "/archive_autocomplete?field=slugline&language=en&resources=archive" + Then we get list with 0 items + # Provides planning suggestions if ``planning`` is in resources argument + When we get "/archive_autocomplete?field=slugline&language=en&resources=archive,planning" + Then we get list with 1 items + """ + {"_items": [{"value": "planning-en-test", "count": 1}]} + """ + # Doesn't provide event suggestions if ``events`` not in resources argument + When we get "/archive_autocomplete?field=slugline&language=en&resources=archive" + Then we get list with 0 items + # Provides event suggestions if ``events`` is in resources argument + When we get "/archive_autocomplete?field=slugline&language=en&resources=archive,events" + Then we get list with 1 items + """ + {"_items": [{"value": "event-en-test", "count": 1}]} + """ + + @auth + Scenario: Counts suggestions from multiple resources + Given "planning" + """ + [{ + "_id": "plan1", + "guid": "plan1", + "versioncreated": "#DATE#", + "state": "scheduled", + "pubstatus": "usable", + "slugline": "package-1", + "language": "en", + "languages": ["en", "fr", "de"], + "planning_date": "2021-01-11T16:00:00.000Z", + "translations": [ + {"field": "slugline", "language": "en", "value": "package-en-slugline"}, + {"field": "slugline", "language": "de", "value": "package-de-slugline"} + ], + "coverages": [{ + "planning": {"language": "en", "slugline": "package-en-slugline"}, + "workflow_state": "draft", + "news_coverage_status": {"qcode": "ncostat:int"} + }] + }] + """ + Given "events" + """ + [{ + "_id": "event1", + "state": "scheduled", + "pubstatus": "usable", + "slugline": "package-1", + "language": "en", + "languages": ["en", "fr", "de"], + "dates": { + "start": "2025-01-03T00:00:00+0000", + "end": "2025-01-04T00:00:00+0000" + }, + "translations": [ + {"field": "slugline", "language": "en", "value": "package-en-slugline"}, + {"field": "slugline", "language": "de", "value": "package-de-slugline"} + ] + }] + """ + When we get "/archive_autocomplete?field=slugline&language=en" + Then we get list with 1 items + """ + {"_items": [ + {"value": "package-en-slugline", "count": 3} + ]} + """ diff --git a/server/planning/__init__.py b/server/planning/__init__.py index 9ce3f618e..fc2fbd647 100644 --- a/server/planning/__init__.py +++ b/server/planning/__init__.py @@ -74,6 +74,7 @@ import planning.io # noqa from planning.planning_download import init_app as init_planning_download_app from planning.planning_locks import init_app as init_planning_locks_app +from planning.search.planning_autocomplete import init_app as init_planning_autocomplete_app __version__ = "2.7.0-dev" @@ -112,6 +113,7 @@ def init_app(app): init_validator_app(app) init_planning_download_app(app) init_planning_locks_app(app) + init_planning_autocomplete_app(app) superdesk.register_resource( "planning_article_export", diff --git a/server/planning/events/events_schema.py b/server/planning/events/events_schema.py index d7a6d2a78..6cf973102 100644 --- a/server/planning/events/events_schema.py +++ b/server/planning/events/events_schema.py @@ -324,7 +324,7 @@ "properties": { "field": not_analyzed, "language": not_analyzed, - "value": string_with_analyzer, + "value": metadata_schema["slugline"]["mapping"], }, }, }, diff --git a/server/planning/planning/planning.py b/server/planning/planning/planning.py index 19a882240..74b66d532 100644 --- a/server/planning/planning/planning.py +++ b/server/planning/planning/planning.py @@ -1535,7 +1535,7 @@ def duplicate_xmp_file(self, coverage): "properties": { "field": not_analyzed, "language": not_analyzed, - "value": string_with_analyzer, + "value": metadata_schema["slugline"]["mapping"], }, }, }, @@ -1570,16 +1570,7 @@ def duplicate_xmp_file(self, coverage): "planning": { "type": "object", "properties": { - "slugline": { - "type": "string", - "fields": { - "phrase": { - "type": "string", - "analyzer": "phrase_prefix_analyzer", - "search_analyzer": "phrase_prefix_analyzer", - } - }, - }, + "slugline": metadata_schema["slugline"]["mapping"], }, }, "assigned_to": assigned_to_schema["mapping"], diff --git a/server/planning/search/planning_autocomplete.py b/server/planning/search/planning_autocomplete.py new file mode 100644 index 000000000..a6a977f53 --- /dev/null +++ b/server/planning/search/planning_autocomplete.py @@ -0,0 +1,168 @@ +from typing import Dict, Any +from datetime import timedelta + +from flask import current_app as app + +from superdesk.utc import utcnow +from apps.archive.autocomplete import ( + SETTING_LIMIT as AUTOCOMPLETE_LIMIT, + SETTING_DAYS as AUTOCOMPLETE_DAYS, + SETTING_HOURS as AUTOCOMPLETE_HOURS, + register_autocomplete_suggestion_provider, +) + +from planning.common import WORKFLOW_STATE, POST_STATE + + +def get_planning_suggestions(field: str, language: str) -> Dict[str, int]: + bool_query = _construct_bool_query(language) + bool_query["should"].append( + { + "nested": { + "path": "coverages", + "query": {"bool": {"must": [{"term": {"coverages.planning.language": language}}]}}, + }, + } + ) + + aggs_query = _construct_aggs_query(field, language) + coverage_field_mapping = {"slugline": "coverages.planning.slugline.keyword"} + coverage_field = coverage_field_mapping.get(field) or f"coverages.planning.{field}" + + aggs_query["coverages"] = { + "nested": {"path": "coverages"}, + "aggs": { + "coverages_filtered": { + "filter": {"bool": {"must": [{"term": {"coverages.planning.language": language}}]}}, + "aggs": {"coverage_suggestions": agg_field_suggestion(coverage_field)}, + }, + }, + } + + query = { + "query": {"bool": bool_query}, + "aggs": aggs_query, + } + + res = app.data.elastic.search(query, "planning", params={"size": 0}) + return _get_aggregation_values(res.hits["aggregations"]) + + +def get_event_suggestions(field: str, language: str) -> Dict[str, int]: + query = { + "query": {"bool": _construct_bool_query(language)}, + "aggs": _construct_aggs_query(field, language), + } + + res = app.data.elastic.search(query, "events", params={"size": 0}) + return _get_aggregation_values(res.hits["aggregations"]) + + +def _get_aggregation_values(aggregations) -> Dict[str, int]: + suggestions: Dict[str, int] = {} + + try: + suggestions = { + bucket["key"]: bucket["doc_count"] + for bucket in aggregations["base_field_filtered"]["base_field"]["buckets"] + } + except KeyError: + pass + + try: + for bucket in aggregations["translations"]["languages_filtered"]["field_languages"]["buckets"]: + suggestions.setdefault(bucket["key"], 0) + suggestions[bucket["key"]] += bucket["doc_count"] + except KeyError: + pass + + try: + for bucket in aggregations["coverages"]["coverages_filtered"]["coverage_suggestions"]["buckets"]: + suggestions.setdefault(bucket["key"], 0) + suggestions[bucket["key"]] += bucket["doc_count"] + except KeyError: + pass + + return suggestions + + +def agg_field_suggestion(field): + return { + "terms": { + "field": field, + "size": app.config[AUTOCOMPLETE_LIMIT], + "order": {"_key": "asc"}, + }, + } + + +def _construct_bool_query(language: str) -> Dict[str, Any]: + versioncreated_min = ( + utcnow() - timedelta(days=app.config[AUTOCOMPLETE_DAYS], hours=app.config[AUTOCOMPLETE_HOURS]) + ).replace( + microsecond=0 + ) # avoid different microsecond each time so elastic has 1s to cache + + return { + "must": [ + {"term": {"pubstatus": POST_STATE.USABLE}}, + {"terms": {"state": [WORKFLOW_STATE.SCHEDULED, WORKFLOW_STATE.POSTPONED, WORKFLOW_STATE.RESCHEDULED]}}, + {"range": {"versioncreated": {"gte": versioncreated_min}}}, + ], + "should": [ + {"term": {"language": language}}, + {"term": {"languages": language}}, + ], + "minimum_should_match": 1, + } + + +def _construct_aggs_query(field: str, language: str) -> Dict[str, Any]: + field_mapping = {"slugline": "slugline.keyword"} + base_field = field_mapping.get(field) or field + + return { + "base_field_filtered": { + "filter": { + "bool": { + "must_not": [ + { + "nested": { + "path": "translations", + "query": { + "bool": { + "must": [ + {"term": {"translations.field": field}}, + {"term": {"translations.language": language}}, + ], + }, + }, + }, + }, + ], + }, + }, + "aggs": {"base_field": agg_field_suggestion(base_field)}, + }, + "translations": { + "nested": {"path": "translations"}, + "aggs": { + "languages_filtered": { + "filter": { + "bool": { + "must": [ + {"term": {"translations.field": field}}, + {"term": {"translations.language": language}}, + ], + }, + }, + "aggs": {"field_languages": agg_field_suggestion("translations.value.keyword")}, + }, + }, + }, + } + + +def init_app(_app): + register_autocomplete_suggestion_provider("planning", get_planning_suggestions) + register_autocomplete_suggestion_provider("events", get_event_suggestions)