diff --git a/portality/migrate/3575_make_notes_searchable/README.md b/portality/migrate/3575_make_notes_searchable/README.md new file mode 100644 index 0000000000..37a81f071b --- /dev/null +++ b/portality/migrate/3575_make_notes_searchable/README.md @@ -0,0 +1,7 @@ +# 09 11 2023; Issue 3575 - Make notes searchable for admin + +## Execution + +Run the migration with + + python portality/scripts/es_reindex.py -u portality/migrate/3575_make_notes_searchable/migrate.json \ No newline at end of file diff --git a/portality/migrate/3575_make_notes_searchable/__init__.py b/portality/migrate/3575_make_notes_searchable/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/portality/migrate/3575_make_notes_searchable/migrate.json b/portality/migrate/3575_make_notes_searchable/migrate.json new file mode 100644 index 0000000000..f129942092 --- /dev/null +++ b/portality/migrate/3575_make_notes_searchable/migrate.json @@ -0,0 +1,16 @@ +{ + "new_version": "-20231114", + "old_version": "", + "types": [ + { + "type" : "application", + "migrate": true, + "set_alias": false + }, + { + "type": "journal", + "migrate": true, + "set_alias": false + } + ] +} \ No newline at end of file diff --git a/portality/scripts/es_reindex.py b/portality/scripts/es_reindex.py new file mode 100644 index 0000000000..4893b1afc4 --- /dev/null +++ b/portality/scripts/es_reindex.py @@ -0,0 +1,135 @@ +# ~~ESReindex:CLI~~ +""" +This script is useful to create new index with any new mapping changes if applicable and copy the content from old index to new index +run the script: +portality/scripts/es_reindex.py +ex: +DOAJENV=dev python portality/scripts/es_reindex.py portality/migrate/3575_make_notes_searchable/migrate.json + +example json file: +{ + "new_version": "-20231109", #new index version + "old_version": "-20230901", #old index version + "types": [ + { + "type" : "application", #model type + "migrate": true, #if migration required true or false + "set_alias": false #set to true if alias has to be set with base name ex: doaj-application + }, + { + "type": "journal", + "migrate": true, + "set_alias": false + } + ] +} +""" + +import json +import time +import elasticsearch +from elasticsearch import helpers +from elasticsearch.exceptions import NotFoundError, RequestError, ConnectionError, AuthorizationException + +from portality.core import app +from portality.lib import es_data_mapping + + +def do_import(config): + # create a connection with timeout + es_connection = elasticsearch.Elasticsearch(app.config['ELASTICSEARCH_HOSTS'], + verify_certs=app.config.get("ELASTIC_SEARCH_VERIFY_CERTS", True), + timeout=60*10) + + # get the versions + version = config.get("new_version") + previous_version = config.get("old_version") + + # get the types we are going to work with + print("\n==Reindexing the following types / indices==") + for s in config.get("types", []): + if s.get("migrate", False) is True: + print(s.get("type")) + + print("\n") + + text = input("Continue? [y/N] ") + if text.lower() != "y": + exit() + + # get all available mappings + mappings = es_data_mapping.get_mappings(app) + + # Iterate through the types then + # 1. create new index + # 2. re index with old index + # 3. set alias for new index (if requested) + for s in config.get("types", []): + import_type = s["type"] + if import_type in mappings: + + # index names + default_index_name = app.config['ELASTIC_SEARCH_DB_PREFIX'] + import_type + new_index = default_index_name + version + old_index = default_index_name + previous_version + + if not es_connection.indices.exists(new_index): + try: + # create new index + r = es_connection.indices.create(index=new_index, body=mappings[import_type]) + print("Creating ES Type + Mapping in index {0} for {1}; status: {2}".format(new_index, import_type, r)) + + # reindex from the old index + print("Reindexing from {0} to {1}".format(old_index, new_index)) + retry_count = 0 + max_retries = 5 + success = False + while not success and retry_count < max_retries: + try: + result, errors = helpers.reindex(client=es_connection, source_index=old_index, + target_index=new_index) + if errors: + print(f"Some documents failed to reindex: {import_type}", errors) + else: + success = True + print(f"Reindex completed successfully: {import_type}", result) + # add alias + if s.get("set_alias", False): + es_connection.indices.put_alias(index=new_index, name=default_index_name) + print("alias set for {0} as {1}".format(new_index, default_index_name)) + else: + print("alias not set for {0}".format(new_index)) + except ConnectionError: + retry_count += 1 + print(f"Timeout occurred, retrying {retry_count}/{max_retries}") + time.sleep(10) # Wait for 10 seconds before retrying + + if not success: + print("Failed to complete the reindexing after several retries.") + + except ConnectionError as e: + print(f"Failed to connect to Elasticsearch server. {e.info}") + except NotFoundError as e: + print(f"The specified index or alias does not exist. {e.info}") + except RequestError as e: + print(f"Bad request: {e.info}") + except AuthorizationException as e: + print(f"You do not have permission to perform this operation. {e.info}") + except Exception as e: + print(f"An unexpected error occurred: {e}") + else: + print("ES Type + Mapping already exists in index {0} for {1}".format(new_index, import_type)) + + +if __name__ == '__main__': + + import argparse + parser = argparse.ArgumentParser() + + parser.add_argument("config", help="Config file path to migrate") + args = parser.parse_args() + + with open(args.config, "r", encoding="utf-8") as f: + config = json.load(f) + + do_import(config) \ No newline at end of file