From 46d3f1b4315f4996d8172bd76650b03133504657 Mon Sep 17 00:00:00 2001 From: Mark Pittaway Date: Thu, 12 Oct 2023 15:50:37 +1100 Subject: [PATCH 1/5] [SDESK-7062] Add Event, Planning & Coverages to autocomplete suggestions --- server/planning/__init__.py | 2 + .../planning/search/planning_autocomplete.py | 178 ++++++++++++++++++ 2 files changed, 180 insertions(+) create mode 100644 server/planning/search/planning_autocomplete.py diff --git a/server/planning/__init__.py b/server/planning/__init__.py index 9ce3f618e..fc2fbd647 100644 --- a/server/planning/__init__.py +++ b/server/planning/__init__.py @@ -74,6 +74,7 @@ import planning.io # noqa from planning.planning_download import init_app as init_planning_download_app from planning.planning_locks import init_app as init_planning_locks_app +from planning.search.planning_autocomplete import init_app as init_planning_autocomplete_app __version__ = "2.7.0-dev" @@ -112,6 +113,7 @@ def init_app(app): init_validator_app(app) init_planning_download_app(app) init_planning_locks_app(app) + init_planning_autocomplete_app(app) superdesk.register_resource( "planning_article_export", diff --git a/server/planning/search/planning_autocomplete.py b/server/planning/search/planning_autocomplete.py new file mode 100644 index 000000000..d67708a65 --- /dev/null +++ b/server/planning/search/planning_autocomplete.py @@ -0,0 +1,178 @@ +from typing import Set, Dict, Any +from datetime import timedelta + +from flask import current_app as app + +from superdesk.utc import utcnow +from apps.archive.autocomplete import ( + SETTING_LIMIT as AUTOCOMPLETE_LIMIT, + SETTING_DAYS as AUTOCOMPLETE_DAYS, + SETTING_HOURS as AUTOCOMPLETE_HOURS, + register_autocomplete_suggestion_provider, +) + +from planning.common import WORKFLOW_STATE, POST_STATE + + +def get_planning_suggestions(field: str, language: str) -> Set[str]: + bool_query = _construct_bool_query(language) + bool_query["should"].append( + { + "nested": { + "path": "coverages", + "query": {"bool": {"must": [{"term": {"coverages.planning.language": language}}]}}, + }, + } + ) + + aggs_query = _construct_aggs_query(field, language) + coverage_field_mapping = {"slugline": "coverages.planning.slugline.keyword"} + coverage_field = coverage_field_mapping.get(field) or f"coverages.planning.{field}" + + aggs_query["coverages"] = { + "nested": {"path": "coverages"}, + "aggs": { + "coverages_filtered": { + "filter": {"bool": {"must": [{"term": {"coverages.planning.language": language}}]}}, + "aggs": {"coverage_suggestions": agg_field_suggestion(coverage_field)}, + }, + }, + } + + query = { + "query": {"bool": bool_query}, + "aggs": aggs_query, + } + + res = app.data.elastic.search(query, "planning", params={"size": 0}) + suggestions = _get_aggregation_values(res.hits["aggregations"]) + + return suggestions + + +def get_event_suggestions(field: str, language: str) -> Set[str]: + query = { + "query": {"bool": _construct_bool_query(language)}, + "aggs": _construct_aggs_query(field, language), + } + + res = app.data.elastic.search(query, "events", params={"size": 0}) + return _get_aggregation_values(res.hits["aggregations"]) + + +def _get_aggregation_values(aggregations) -> Set[str]: + suggestions = set() + + try: + base_suggestions = set( + [bucket["key"] for bucket in aggregations["base_field_filtered"]["base_field"]["buckets"]] + ) + suggestions = base_suggestions + except KeyError: + pass + + try: + translated_suggestions = set( + [ + bucket["key"] + for bucket in aggregations["translations"]["languages_filtered"]["field_languages"]["buckets"] + ] + ) + suggestions = suggestions.union(translated_suggestions) + except KeyError: + pass + + try: + coverage_suggestions = set( + [ + bucket["key"] + for bucket in aggregations["coverages"]["coverages_filtered"]["coverage_suggestions"]["buckets"] + ] + ) + suggestions = suggestions.union(coverage_suggestions) + except KeyError: + pass + + return suggestions + + +def agg_field_suggestion(field): + return { + "terms": { + "field": field, + "size": app.config[AUTOCOMPLETE_LIMIT], + "order": {"_key": "asc"}, + }, + } + + +def _construct_bool_query(language: str) -> Dict[str, Any]: + versioncreated_min = ( + utcnow() - timedelta(days=app.config[AUTOCOMPLETE_DAYS], hours=app.config[AUTOCOMPLETE_HOURS]) + ).replace( + microsecond=0 + ) # avoid different microsecond each time so elastic has 1s to cache + + return { + "must": [ + {"term": {"pubstatus": POST_STATE.USABLE}}, + {"terms": {"state": [WORKFLOW_STATE.SCHEDULED, WORKFLOW_STATE.POSTPONED, WORKFLOW_STATE.RESCHEDULED]}}, + {"range": {"versioncreated": {"gte": versioncreated_min}}}, + ], + "should": [ + {"term": {"language": language}}, + {"term": {"languages": language}}, + ], + "minimum_should_match": 1, + } + + +def _construct_aggs_query(field: str, language: str) -> Dict[str, Any]: + field_mapping = {"slugline": "slugline.keyword"} + base_field = field_mapping.get(field) or field + + return { + "base_field_filtered": { + "filter": { + "bool": { + "must_not": [ + { + "nested": { + "path": "translations", + "query": { + "bool": { + "must": [ + {"term": {"translations.field": field}}, + {"term": {"translations.language": language}}, + ], + }, + }, + }, + }, + ], + }, + }, + "aggs": {"base_field": agg_field_suggestion(base_field)}, + }, + "translations": { + "nested": {"path": "translations"}, + "aggs": { + "languages_filtered": { + "filter": { + "bool": { + "must": [ + {"term": {"translations.field": field}}, + {"term": {"translations.language": language}}, + ], + }, + }, + "aggs": {"field_languages": agg_field_suggestion("translations.value.keyword")}, + }, + }, + }, + } + + +def init_app(_app): + register_autocomplete_suggestion_provider("planning", get_planning_suggestions) + register_autocomplete_suggestion_provider("events", get_event_suggestions) From a2a1b218097e1694491163af51995f3e4f58f018 Mon Sep 17 00:00:00 2001 From: Mark Pittaway Date: Thu, 12 Oct 2023 15:50:57 +1100 Subject: [PATCH 2/5] fix slugline schemas --- server/planning/events/events_schema.py | 2 +- server/planning/planning/planning.py | 13 ++----------- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/server/planning/events/events_schema.py b/server/planning/events/events_schema.py index d7a6d2a78..6cf973102 100644 --- a/server/planning/events/events_schema.py +++ b/server/planning/events/events_schema.py @@ -324,7 +324,7 @@ "properties": { "field": not_analyzed, "language": not_analyzed, - "value": string_with_analyzer, + "value": metadata_schema["slugline"]["mapping"], }, }, }, diff --git a/server/planning/planning/planning.py b/server/planning/planning/planning.py index 19a882240..74b66d532 100644 --- a/server/planning/planning/planning.py +++ b/server/planning/planning/planning.py @@ -1535,7 +1535,7 @@ def duplicate_xmp_file(self, coverage): "properties": { "field": not_analyzed, "language": not_analyzed, - "value": string_with_analyzer, + "value": metadata_schema["slugline"]["mapping"], }, }, }, @@ -1570,16 +1570,7 @@ def duplicate_xmp_file(self, coverage): "planning": { "type": "object", "properties": { - "slugline": { - "type": "string", - "fields": { - "phrase": { - "type": "string", - "analyzer": "phrase_prefix_analyzer", - "search_analyzer": "phrase_prefix_analyzer", - } - }, - }, + "slugline": metadata_schema["slugline"]["mapping"], }, }, "assigned_to": assigned_to_schema["mapping"], From 95b55fc21151d3602a0fd5f0ce8b32b25c833fb7 Mon Sep 17 00:00:00 2001 From: Mark Pittaway Date: Thu, 12 Oct 2023 15:51:04 +1100 Subject: [PATCH 3/5] Add behave tests --- server/features/search_autocomplete.feature | 178 ++++++++++++++++++++ 1 file changed, 178 insertions(+) create mode 100644 server/features/search_autocomplete.feature diff --git a/server/features/search_autocomplete.feature b/server/features/search_autocomplete.feature new file mode 100644 index 000000000..9f0b2faa4 --- /dev/null +++ b/server/features/search_autocomplete.feature @@ -0,0 +1,178 @@ +Feature: Planning autocomplete + Background: Setup config + Given config update + """ + { + "ARCHIVE_AUTOCOMPLETE": true, + "ARCHIVE_AUTOCOMPLETE_DAYS": 999 + } + """ + + @auth + Scenario: Get distinct planning sluglines + Given "planning" + """ + [{ + "_id": "plan1", + "guid": "plan1", + "versioncreated": "#DATE#", + "state": "scheduled", + "pubstatus": "usable", + "slugline": "planning-1", + "language": "en", + "languages": ["en", "fr", "de"], + "planning_date": "2021-01-11T16:00:00.000Z", + "translations": [ + {"field": "slugline", "language": "en", "value": "planning-en-test"}, + {"field": "slugline", "language": "de", "value": "planning-de-test"} + ] + }] + """ + # Suggests only the value in translations if populated + When we get "/archive_autocomplete?field=slugline&language=en" + Then we get list with 1 items + """ + {"_items": [{"value": "planning-en-test"}]} + """ + # Suggests base field if language translation not populated + When we get "/archive_autocomplete?field=slugline&language=fr" + Then we get list with 1 items + """ + {"_items": [{"value": "planning-1"}]} + """ + + @auth + Scenario: Get distinct coverage sluglines + Given "planning" + """ + [{ + "_id": "plan1", + "guid": "plan1", + "versioncreated": "#DATE#", + "state": "scheduled", + "pubstatus": "usable", + "slugline": "planning-1", + "language": "en", + "languages": ["en", "fr", "de"], + "planning_date": "2021-01-11T16:00:00.000Z", + "coverages": [{ + "planning": {"language": "en", "slugline": "coverage-en-slugline"}, + "workflow_state": "draft", + "news_coverage_status": {"qcode": "ncostat:int"} + }, { + "planning": {"language": "fr", "slugline": "coverage-fr-slugline"}, + "workflow_state": "draft", + "news_coverage_status": {"qcode": "ncostat:int"} + }] + }] + """ + # Suggests both Planning and Coverage sluglines + When we get "/archive_autocomplete?field=slugline&language=en" + Then we get list with 2 items + """ + {"_items": [ + {"value": "planning-1"}, + {"value": "coverage-en-slugline"} + ]} + """ + + @auth + Scenario: Get distinct event sluglines + Given "events" + """ + [{ + "_id": "event1", + "state": "scheduled", + "pubstatus": "usable", + "slugline": "event-1", + "language": "en", + "languages": ["en", "fr", "de"], + "dates": { + "start": "2025-01-03T00:00:00+0000", + "end": "2025-01-04T00:00:00+0000" + }, + "translations": [ + {"field": "slugline", "language": "en", "value": "event-en-test"}, + {"field": "slugline", "language": "de", "value": "event-de-test"} + ] + }] + """ + # Suggests only the value in translations if populated + When we get "/archive_autocomplete?field=slugline&language=en" + Then we get list with 1 items + """ + {"_items": [{"value": "event-en-test"}]} + """ + # Suggests base field if language translation not populated + When we get "/archive_autocomplete?field=slugline&language=fr" + Then we get list with 1 items + """ + {"_items": [{"value": "event-1"}]} + """ + + @auth @wip + Scenario: Can control what resources are used for suggestions + Given "planning" + """ + [{ + "_id": "plan1", + "guid": "plan1", + "versioncreated": "#DATE#", + "state": "scheduled", + "pubstatus": "usable", + "slugline": "planning-1", + "language": "en", + "languages": ["en", "fr", "de"], + "planning_date": "2021-01-11T16:00:00.000Z", + "translations": [ + {"field": "slugline", "language": "en", "value": "planning-en-test"}, + {"field": "slugline", "language": "de", "value": "planning-de-test"} + ] + }] + """ + Given "events" + """ + [{ + "_id": "event1", + "state": "scheduled", + "pubstatus": "usable", + "slugline": "event-1", + "language": "en", + "languages": ["en", "fr", "de"], + "dates": { + "start": "2025-01-03T00:00:00+0000", + "end": "2025-01-04T00:00:00+0000" + }, + "translations": [ + {"field": "slugline", "language": "en", "value": "event-en-test"}, + {"field": "slugline", "language": "de", "value": "event-de-test"} + ] + }] + """ + # Provides suggestions from all resources if argument not provided + When we get "/archive_autocomplete?field=slugline&language=en" + Then we get list with 2 items + """ + {"_items": [ + {"value": "planning-en-test"}, + {"value": "event-en-test"} + ]} + """ + # Doesn't provide planning suggestions if ``planning`` not in resources argument + When we get "/archive_autocomplete?field=slugline&language=en&resources=archive" + Then we get list with 0 items + # Provides planning suggestions if ``planning`` is in resources argument + When we get "/archive_autocomplete?field=slugline&language=en&resources=archive,planning" + Then we get list with 1 items + """ + {"_items": [{"value": "planning-en-test"}]} + """ + # Doesn't provide event suggestions if ``events`` not in resources argument + When we get "/archive_autocomplete?field=slugline&language=en&resources=archive" + Then we get list with 0 items + # Provides event suggestions if ``events`` is in resources argument + When we get "/archive_autocomplete?field=slugline&language=en&resources=archive,events" + Then we get list with 1 items + """ + {"_items": [{"value": "event-en-test"}]} + """ From e579351d03841a76f42cdb17b01cecdf6fc9bd7b Mon Sep 17 00:00:00 2001 From: Mark Pittaway Date: Fri, 13 Oct 2023 10:06:29 +1100 Subject: [PATCH 4/5] Add `count` to response --- server/features/search_autocomplete.feature | 74 ++++++++++++++++--- .../planning/search/planning_autocomplete.py | 42 ++++------- 2 files changed, 79 insertions(+), 37 deletions(-) diff --git a/server/features/search_autocomplete.feature b/server/features/search_autocomplete.feature index 9f0b2faa4..f6b3a3a14 100644 --- a/server/features/search_autocomplete.feature +++ b/server/features/search_autocomplete.feature @@ -32,13 +32,13 @@ Feature: Planning autocomplete When we get "/archive_autocomplete?field=slugline&language=en" Then we get list with 1 items """ - {"_items": [{"value": "planning-en-test"}]} + {"_items": [{"value": "planning-en-test", "count": 1}]} """ # Suggests base field if language translation not populated When we get "/archive_autocomplete?field=slugline&language=fr" Then we get list with 1 items """ - {"_items": [{"value": "planning-1"}]} + {"_items": [{"value": "planning-1", "count": 1}]} """ @auth @@ -71,8 +71,8 @@ Feature: Planning autocomplete Then we get list with 2 items """ {"_items": [ - {"value": "planning-1"}, - {"value": "coverage-en-slugline"} + {"value": "planning-1", "count": 1}, + {"value": "coverage-en-slugline", "count": 1} ]} """ @@ -101,16 +101,16 @@ Feature: Planning autocomplete When we get "/archive_autocomplete?field=slugline&language=en" Then we get list with 1 items """ - {"_items": [{"value": "event-en-test"}]} + {"_items": [{"value": "event-en-test", "count": 1}]} """ # Suggests base field if language translation not populated When we get "/archive_autocomplete?field=slugline&language=fr" Then we get list with 1 items """ - {"_items": [{"value": "event-1"}]} + {"_items": [{"value": "event-1", "count": 1}]} """ - @auth @wip + @auth Scenario: Can control what resources are used for suggestions Given "planning" """ @@ -154,8 +154,8 @@ Feature: Planning autocomplete Then we get list with 2 items """ {"_items": [ - {"value": "planning-en-test"}, - {"value": "event-en-test"} + {"value": "planning-en-test", "count": 1}, + {"value": "event-en-test", "count": 1} ]} """ # Doesn't provide planning suggestions if ``planning`` not in resources argument @@ -165,7 +165,7 @@ Feature: Planning autocomplete When we get "/archive_autocomplete?field=slugline&language=en&resources=archive,planning" Then we get list with 1 items """ - {"_items": [{"value": "planning-en-test"}]} + {"_items": [{"value": "planning-en-test", "count": 1}]} """ # Doesn't provide event suggestions if ``events`` not in resources argument When we get "/archive_autocomplete?field=slugline&language=en&resources=archive" @@ -174,5 +174,57 @@ Feature: Planning autocomplete When we get "/archive_autocomplete?field=slugline&language=en&resources=archive,events" Then we get list with 1 items """ - {"_items": [{"value": "event-en-test"}]} + {"_items": [{"value": "event-en-test", "count": 1}]} + """ + + @auth + Scenario: Counts suggestions from multiple resources + Given "planning" + """ + [{ + "_id": "plan1", + "guid": "plan1", + "versioncreated": "#DATE#", + "state": "scheduled", + "pubstatus": "usable", + "slugline": "package-1", + "language": "en", + "languages": ["en", "fr", "de"], + "planning_date": "2021-01-11T16:00:00.000Z", + "translations": [ + {"field": "slugline", "language": "en", "value": "package-en-slugline"}, + {"field": "slugline", "language": "de", "value": "package-de-slugline"} + ], + "coverages": [{ + "planning": {"language": "en", "slugline": "package-en-slugline"}, + "workflow_state": "draft", + "news_coverage_status": {"qcode": "ncostat:int"} + }] + }] + """ + Given "events" + """ + [{ + "_id": "event1", + "state": "scheduled", + "pubstatus": "usable", + "slugline": "package-1", + "language": "en", + "languages": ["en", "fr", "de"], + "dates": { + "start": "2025-01-03T00:00:00+0000", + "end": "2025-01-04T00:00:00+0000" + }, + "translations": [ + {"field": "slugline", "language": "en", "value": "package-en-slugline"}, + {"field": "slugline", "language": "de", "value": "package-de-slugline"} + ] + }] + """ + When we get "/archive_autocomplete?field=slugline&language=en" + Then we get list with 1 items + """ + {"_items": [ + {"value": "package-en-slugline", "count": 3} + ]} """ diff --git a/server/planning/search/planning_autocomplete.py b/server/planning/search/planning_autocomplete.py index d67708a65..a6a977f53 100644 --- a/server/planning/search/planning_autocomplete.py +++ b/server/planning/search/planning_autocomplete.py @@ -1,4 +1,4 @@ -from typing import Set, Dict, Any +from typing import Dict, Any from datetime import timedelta from flask import current_app as app @@ -14,7 +14,7 @@ from planning.common import WORKFLOW_STATE, POST_STATE -def get_planning_suggestions(field: str, language: str) -> Set[str]: +def get_planning_suggestions(field: str, language: str) -> Dict[str, int]: bool_query = _construct_bool_query(language) bool_query["should"].append( { @@ -45,12 +45,10 @@ def get_planning_suggestions(field: str, language: str) -> Set[str]: } res = app.data.elastic.search(query, "planning", params={"size": 0}) - suggestions = _get_aggregation_values(res.hits["aggregations"]) - - return suggestions + return _get_aggregation_values(res.hits["aggregations"]) -def get_event_suggestions(field: str, language: str) -> Set[str]: +def get_event_suggestions(field: str, language: str) -> Dict[str, int]: query = { "query": {"bool": _construct_bool_query(language)}, "aggs": _construct_aggs_query(field, language), @@ -60,36 +58,28 @@ def get_event_suggestions(field: str, language: str) -> Set[str]: return _get_aggregation_values(res.hits["aggregations"]) -def _get_aggregation_values(aggregations) -> Set[str]: - suggestions = set() +def _get_aggregation_values(aggregations) -> Dict[str, int]: + suggestions: Dict[str, int] = {} try: - base_suggestions = set( - [bucket["key"] for bucket in aggregations["base_field_filtered"]["base_field"]["buckets"]] - ) - suggestions = base_suggestions + suggestions = { + bucket["key"]: bucket["doc_count"] + for bucket in aggregations["base_field_filtered"]["base_field"]["buckets"] + } except KeyError: pass try: - translated_suggestions = set( - [ - bucket["key"] - for bucket in aggregations["translations"]["languages_filtered"]["field_languages"]["buckets"] - ] - ) - suggestions = suggestions.union(translated_suggestions) + for bucket in aggregations["translations"]["languages_filtered"]["field_languages"]["buckets"]: + suggestions.setdefault(bucket["key"], 0) + suggestions[bucket["key"]] += bucket["doc_count"] except KeyError: pass try: - coverage_suggestions = set( - [ - bucket["key"] - for bucket in aggregations["coverages"]["coverages_filtered"]["coverage_suggestions"]["buckets"] - ] - ) - suggestions = suggestions.union(coverage_suggestions) + for bucket in aggregations["coverages"]["coverages_filtered"]["coverage_suggestions"]["buckets"]: + suggestions.setdefault(bucket["key"], 0) + suggestions[bucket["key"]] += bucket["doc_count"] except KeyError: pass From a0a890747caba5626dc34cb158e37d70c464fb58 Mon Sep 17 00:00:00 2001 From: Mark Pittaway Date: Mon, 16 Oct 2023 15:22:26 +1100 Subject: [PATCH 5/5] Remove `.fireq` file --- .fireq.json | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 .fireq.json diff --git a/.fireq.json b/.fireq.json deleted file mode 100644 index 20c11c174..000000000 --- a/.fireq.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "superdesk_branch": "release/2.6" -}