From b497f2a162398d26a89fcf9a80ec1c867aeff28e Mon Sep 17 00:00:00 2001 From: Ramakrishna Date: Thu, 9 Nov 2023 20:25:02 +0530 Subject: [PATCH 1/5] script to reindex elasticsearch indexes --- .../3575_make_notes_searchable/README.md | 7 ++ .../3575_make_notes_searchable/__init__.py | 0 .../3575_make_notes_searchable/migrate.json | 16 +++ portality/scripts/es_reindex.py | 107 ++++++++++++++++++ 4 files changed, 130 insertions(+) create mode 100644 portality/migrate/3575_make_notes_searchable/README.md create mode 100644 portality/migrate/3575_make_notes_searchable/__init__.py create mode 100644 portality/migrate/3575_make_notes_searchable/migrate.json create mode 100644 portality/scripts/es_reindex.py diff --git a/portality/migrate/3575_make_notes_searchable/README.md b/portality/migrate/3575_make_notes_searchable/README.md new file mode 100644 index 0000000000..37a81f071b --- /dev/null +++ b/portality/migrate/3575_make_notes_searchable/README.md @@ -0,0 +1,7 @@ +# 09 11 2023; Issue 3575 - Make notes searchable for admin + +## Execution + +Run the migration with + + python portality/scripts/es_reindex.py -u portality/migrate/3575_make_notes_searchable/migrate.json \ No newline at end of file diff --git a/portality/migrate/3575_make_notes_searchable/__init__.py b/portality/migrate/3575_make_notes_searchable/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/portality/migrate/3575_make_notes_searchable/migrate.json b/portality/migrate/3575_make_notes_searchable/migrate.json new file mode 100644 index 0000000000..8ca5754bca --- /dev/null +++ b/portality/migrate/3575_make_notes_searchable/migrate.json @@ -0,0 +1,16 @@ +{ + "new_version": "-20231109", + "old_version": "", + "types": [ + { + "type" : "application", + "migrate": true, + "set_alias": false + }, + { + "type": "journal", + "migrate": true, + "set_alias": false + } + ] +} \ No newline at end of file diff --git a/portality/scripts/es_reindex.py b/portality/scripts/es_reindex.py new file mode 100644 index 0000000000..093ed70cef --- /dev/null +++ b/portality/scripts/es_reindex.py @@ -0,0 +1,107 @@ +""" +This script is useful to create new index with any new mapping changes if applicable and copy the content from old index to new index +""" + +import json +import time +from elasticsearch import helpers +from elasticsearch.exceptions import NotFoundError, RequestError, ConnectionError, AuthorizationException + +from portality.core import app, es_connection, put_mappings +from portality.lib import es_data_mapping + + +def do_import(config): + + # get the versions + version = config.get("new_version") + previous_version = config.get("old_version") + + # get the types we are going to work with + print("==Carrying out the following import==") + for s in config.get("types", []): + if s.get("migrate", False) is True: + print(s.get("type")) + + print("\n") + + text = input("Continue? [y/N] ") + if text.lower() != "y": + exit() + + # get all available mappings + mappings = es_data_mapping.get_mappings(app) + + # Iterate through the types then + # 1. create new index + # 2. re index with old index + # 3. set alias for new index + for s in config.get("types", []): + import_type = s["type"] + if import_type in mappings: + + # index names + default_index_name = app.config['ELASTIC_SEARCH_DB_PREFIX'] + import_type + new_index = default_index_name + version + old_index = default_index_name + previous_version + + if not es_connection.indices.exists(new_index): + try: + # create new index + r = es_connection.indices.create(index=new_index, body=mappings[import_type]) + print("Creating ES Type + Mapping in index {0} for {1}; status: {2}".format(new_index, import_type, r)) + + # reindex from the old index + print("Reindexing from {0} to {1}".format(old_index, new_index)) + retry_count = 0 + max_retries = 5 + success = False + while not success and retry_count < max_retries: + try: + result, errors = helpers.reindex(client=es_connection, source_index=old_index, + target_index=new_index) + if errors: + print(f"Some documents failed to reindex: {import_type}", errors) + else: + success = True + print(f"Reindex completed successfully: {import_type}", result) + # add alias + if s.get("set_alias", False): + es_connection.indices.put_alias(index=new_index, name=default_index_name) + print("alias set for {0} as {1}".format(new_index, default_index_name)) + else: + print("alias not set for {0}".format(new_index)) + except ConnectionError: + retry_count += 1 + print(f"Timeout occurred, retrying {retry_count}/{max_retries}") + time.sleep(10) # Wait for 10 seconds before retrying + + if not success: + print("Failed to complete the reindexing after several retries.") + + except ConnectionError as e: + print(f"Failed to connect to Elasticsearch server. {e.info}") + except NotFoundError as e: + print(f"The specified index or alias does not exist. {e.info}") + except RequestError as e: + print(f"Bad request: {e.info}") + except AuthorizationException as e: + print(f"You do not have permission to perform this operation. {e.info}") + except Exception as e: + print(f"An unexpected error occurred: {e}") + else: + print("ES Type + Mapping already exists in index {0} for {1}".format(new_index, import_type)) + + +if __name__ == '__main__': + + import argparse + parser = argparse.ArgumentParser() + + parser.add_argument("config", help="Config file path to migrate") + args = parser.parse_args() + + with open(args.config, "r", encoding="utf-8") as f: + config = json.loads(f.read()) + + do_import(config) \ No newline at end of file From e1e23875428a7f9d78531443a7919722588d7563 Mon Sep 17 00:00:00 2001 From: Ramakrishna Date: Mon, 13 Nov 2023 11:11:49 +0530 Subject: [PATCH 2/5] Added timeout to the connection --- portality/scripts/es_reindex.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/portality/scripts/es_reindex.py b/portality/scripts/es_reindex.py index 093ed70cef..70206b013e 100644 --- a/portality/scripts/es_reindex.py +++ b/portality/scripts/es_reindex.py @@ -4,14 +4,19 @@ import json import time +import elasticsearch from elasticsearch import helpers from elasticsearch.exceptions import NotFoundError, RequestError, ConnectionError, AuthorizationException -from portality.core import app, es_connection, put_mappings +from portality.core import app from portality.lib import es_data_mapping def do_import(config): + # create a connection with timeout + es_connection = elasticsearch.Elasticsearch(app.config['ELASTICSEARCH_HOSTS'], + verify_certs=app.config.get("ELASTIC_SEARCH_VERIFY_CERTS", True), + timeout=60*10) # get the versions version = config.get("new_version") From 0e6b25f694a00d193fcf4e49fd59b91b0166c6c4 Mon Sep 17 00:00:00 2001 From: Ramakrishna Date: Mon, 13 Nov 2023 11:38:24 +0530 Subject: [PATCH 3/5] Added more documentation --- portality/scripts/es_reindex.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/portality/scripts/es_reindex.py b/portality/scripts/es_reindex.py index 70206b013e..920ce5c938 100644 --- a/portality/scripts/es_reindex.py +++ b/portality/scripts/es_reindex.py @@ -1,5 +1,28 @@ +# ~~ESReindex:CLI~~ """ This script is useful to create new index with any new mapping changes if applicable and copy the content from old index to new index +run the script: +portality/scripts/es_reindex.py +ex: +portality/scripts/es_reindex.py -u /portality/migrate/3575_make_notes_searchable/migrate.json + +example json file: +{ + "new_version": "-20231109", #new index version + "old_version": "-20230901", #old index version + "types": [ + { + "type" : "application", #model type + "migrate": true, #if migration required true or false + "set_alias": false #set to true if alias has to be set with base name ex: doaj-application + }, + { + "type": "journal", + "migrate": true, + "set_alias": false + } + ] +} """ import json From 1b18457f78440b9069014fb9158325fd42af3f4b Mon Sep 17 00:00:00 2001 From: Steve Eardley Date: Tue, 14 Nov 2023 16:52:44 +0000 Subject: [PATCH 4/5] reapply correct edges version --- portality/static/vendor/edges | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/portality/static/vendor/edges b/portality/static/vendor/edges index 7350e42008..990f422016 160000 --- a/portality/static/vendor/edges +++ b/portality/static/vendor/edges @@ -1 +1 @@ -Subproject commit 7350e420087f89364e71b4431e3a3130d4d00c69 +Subproject commit 990f4220163a3e18880f0bdc3ad5c80d234d22dd From b26ebfda5ebe2b22072f9767e8282dc5e743e8c5 Mon Sep 17 00:00:00 2001 From: Steve Eardley Date: Wed, 15 Nov 2023 08:22:26 +0000 Subject: [PATCH 5/5] cosmetic changes to migrate script --- portality/migrate/3575_make_notes_searchable/migrate.json | 2 +- portality/scripts/es_reindex.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/portality/migrate/3575_make_notes_searchable/migrate.json b/portality/migrate/3575_make_notes_searchable/migrate.json index 8ca5754bca..f129942092 100644 --- a/portality/migrate/3575_make_notes_searchable/migrate.json +++ b/portality/migrate/3575_make_notes_searchable/migrate.json @@ -1,5 +1,5 @@ { - "new_version": "-20231109", + "new_version": "-20231114", "old_version": "", "types": [ { diff --git a/portality/scripts/es_reindex.py b/portality/scripts/es_reindex.py index 920ce5c938..4893b1afc4 100644 --- a/portality/scripts/es_reindex.py +++ b/portality/scripts/es_reindex.py @@ -4,7 +4,7 @@ run the script: portality/scripts/es_reindex.py ex: -portality/scripts/es_reindex.py -u /portality/migrate/3575_make_notes_searchable/migrate.json +DOAJENV=dev python portality/scripts/es_reindex.py portality/migrate/3575_make_notes_searchable/migrate.json example json file: { @@ -46,7 +46,7 @@ def do_import(config): previous_version = config.get("old_version") # get the types we are going to work with - print("==Carrying out the following import==") + print("\n==Reindexing the following types / indices==") for s in config.get("types", []): if s.get("migrate", False) is True: print(s.get("type")) @@ -63,7 +63,7 @@ def do_import(config): # Iterate through the types then # 1. create new index # 2. re index with old index - # 3. set alias for new index + # 3. set alias for new index (if requested) for s in config.get("types", []): import_type = s["type"] if import_type in mappings: @@ -130,6 +130,6 @@ def do_import(config): args = parser.parse_args() with open(args.config, "r", encoding="utf-8") as f: - config = json.loads(f.read()) + config = json.load(f) do_import(config) \ No newline at end of file