From b28484e891c40ef27dbbd4b227c59b26c4a30b2d Mon Sep 17 00:00:00 2001 From: andrea rota Date: Fri, 6 Oct 2023 18:06:05 +0100 Subject: [PATCH 1/3] use deterministic ids to help make the process idempotent --- .../cost-surface/cost-surface-data-migration.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/data/scripts/cost-surface/cost-surface-data-migration.py b/data/scripts/cost-surface/cost-surface-data-migration.py index 0a049b5b15..99e276138c 100644 --- a/data/scripts/cost-surface/cost-surface-data-migration.py +++ b/data/scripts/cost-surface/cost-surface-data-migration.py @@ -40,13 +40,17 @@ scenario_id, scenario_name, project_id = scenario # Insert new row in cost_surfaces + # + # We use scenario_id as the id for the cost_surface so that we can make + # the process idempotent, at least in terms of creating cost surfaces + # for existing scenarios. insert_cost_surface_sql = ''' INSERT INTO cost_surfaces (id, name, min, max, is_default, project_id, is_migrated) - VALUES (gen_random_uuid(), %s, 0, 0, false, %s, true) + VALUES (%s, %s, 0, 0, false, %s, true) RETURNING id; ''' - cur_api_db.execute(insert_cost_surface_sql, (scenario_name, project_id)) + cur_api_db.execute(insert_cost_surface_sql, (scenario_id, scenario_name, project_id)) cost_surface_id = cur_api_db.fetchone()[0] # Retrieve the unique id for this new cost_surface # Update scenarios.cost_surface_id for this specific scenario @@ -66,12 +70,16 @@ project_id = project[0] # Insert new row in cost_surfaces with project-specific information + # + # We use project_id as the id for the cost_surface so that we can make + # the process idempotent, at least in terms of creating default cost + # surfaces for existing projects. insert_cost_surface_for_project_sql = ''' INSERT INTO cost_surfaces (id, project_id, min, max, is_default, name, is_migrated) - VALUES (gen_random_uuid(), %s, 1, 1, true, 'default', true); + VALUES (%s, %s, 1, 1, true, 'default', true); ''' - cur_api_db.execute(insert_cost_surface_for_project_sql, (project_id,)) + cur_api_db.execute(insert_cost_surface_for_project_sql, (project_id, project_id,)) print("Successfully migrated API model data from marxan-api for:") print(len(all_scenarios), "scenarios") From fa281d03ce9806d96cbf252a11feaa6cba8a3687 Mon Sep 17 00:00:00 2001 From: andrea rota Date: Fri, 6 Oct 2023 18:10:09 +0100 Subject: [PATCH 2/3] only log anomalous state leading to undefined min/max for an existing cost surface Avoid automatically deleting the scenario: we log the anomalous situation and arbitrarily set the cost surface's range to [0,0]. In practice, the scenario that uses such cost surface may have contained invalid or incomplete data to start with, so setting an arbitrary range may not really matter here (as in, it should not make things worse). --- .../cost-surface-data-migration.py | 30 ++++++++++++------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/data/scripts/cost-surface/cost-surface-data-migration.py b/data/scripts/cost-surface/cost-surface-data-migration.py index 99e276138c..5e6922be86 100644 --- a/data/scripts/cost-surface/cost-surface-data-migration.py +++ b/data/scripts/cost-surface/cost-surface-data-migration.py @@ -116,18 +116,28 @@ min_max = cur_geo_db.fetchone() min_val, max_val = min_max if min_max else (None, None) - # If min and max are null, remove scenario and cost_surface from api_db if min_val is None or max_val is None: - cur_api_db.execute("DELETE FROM scenarios WHERE id = %s;", (scenario_id,)) - cur_api_db.execute("DELETE FROM cost_surfaces WHERE id = %s;", (cost_surface_id,)) orphan_scenario_ids.append(scenario_id) - else: - # Update min and max in the cost_surfaces table in the api_db - cur_api_db.execute(""" - UPDATE cost_surfaces - SET min = %s, max = %s - WHERE id = %s; - """, (min_val, max_val, cost_surface_id)) + print(f'''Could not find Min/Max for scenario {scenario_id}, cost surface {cost_surface_id}. + This may mean that no scenarios_pu_cost_data rows were created for this scenario. + This should never happen, so data for this scenario should be checked. + The scenario may be invalid, and it may need to be deleted. + ''') + + # If min and max are null, remove scenario and cost_surface from api_db + if min_val is None: + min_val = 0 + print(f'Setting min to 0 for scenario {scenario_id}, cost surface {cost_surface_id}') + if max_val is None: + max_val = 0 + print(f'Setting max to 0 for scenario {scenario_id}, cost surface {cost_surface_id}') + + # Update min and max in the cost_surfaces table in the api_db + cur_api_db.execute(""" + UPDATE cost_surfaces + SET min = %s, max = %s + WHERE id = %s; + """, (min_val, max_val, cost_surface_id)) #Default Cost Surface for projects cur_api_db.execute("SELECT id FROM projects;") From 8111fd01f7a3f669800bf0dab82578bb5c43aeeb Mon Sep 17 00:00:00 2001 From: andrea rota Date: Fri, 6 Oct 2023 18:24:49 +0100 Subject: [PATCH 3/3] add requirements.txt for cost surfaces data migration script --- .../cost-surfaces-data-migration}/cost-surface-data-migration.py | 0 .../scripts/id26.2/cost-surfaces-data-migration/requirements.txt | 1 + 2 files changed, 1 insertion(+) rename data/scripts/{cost-surface => id26.2/cost-surfaces-data-migration}/cost-surface-data-migration.py (100%) create mode 100644 data/scripts/id26.2/cost-surfaces-data-migration/requirements.txt diff --git a/data/scripts/cost-surface/cost-surface-data-migration.py b/data/scripts/id26.2/cost-surfaces-data-migration/cost-surface-data-migration.py similarity index 100% rename from data/scripts/cost-surface/cost-surface-data-migration.py rename to data/scripts/id26.2/cost-surfaces-data-migration/cost-surface-data-migration.py diff --git a/data/scripts/id26.2/cost-surfaces-data-migration/requirements.txt b/data/scripts/id26.2/cost-surfaces-data-migration/requirements.txt new file mode 100644 index 0000000000..75241c52e5 --- /dev/null +++ b/data/scripts/id26.2/cost-surfaces-data-migration/requirements.txt @@ -0,0 +1 @@ +psycopg==3.1.12 \ No newline at end of file