From bb1202cfbe8e81124d86337c2c91055b885d8479 Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Mon, 16 Oct 2023 12:03:52 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=EF=B8=8F=20Speed=20up=20migration?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../migrations/0024_import_legacy_data.py | 37 ++++++++++++------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/lnschema_core/migrations/0024_import_legacy_data.py b/lnschema_core/migrations/0024_import_legacy_data.py index d6c7ed3b..43783c59 100644 --- a/lnschema_core/migrations/0024_import_legacy_data.py +++ b/lnschema_core/migrations/0024_import_legacy_data.py @@ -9,20 +9,21 @@ import lnschema_core.models CORE_MODELS = { - "Dataset": False, - "File": False, + # sorted by topology of foreign keys + "User": False, "Transform": False, "Run": True, - "User": False, "Storage": False, + "Modality": False, "Feature": False, "FeatureSet": False, - "Modality": False, "ULabel": False, + "File": False, + "Dataset": False, } -def import_registry(registry, directory): +def import_registry(registry, directory, connection): import pandas as pd table_name = registry._meta.db_table @@ -30,7 +31,7 @@ def import_registry(registry, directory): old_foreign_key_columns = [column for column in df.columns if column.endswith("_old")] for column in old_foreign_key_columns: df.drop(column, axis=1, inplace=True) - df.to_sql(table_name, ln_setup.settings.instance.db, if_exists="append", index=False) + df.to_sql(table_name, connection, if_exists="append", index=False) def import_db(apps, schema_editor): @@ -47,13 +48,19 @@ def import_db(apps, schema_editor): " instance name, schema, db & storage settings; you can see them using: lamin info" ) raise SystemExit - for model_name in CORE_MODELS.keys(): - registry = getattr(lnschema_core.models, model_name) - import_registry(registry, directory) - many_to_many_names = [field.name for field in registry._meta.many_to_many] - for many_to_many_name in many_to_many_names: - link_orm = getattr(registry, many_to_many_name).through - import_registry(link_orm, directory) + from sqlalchemy import create_engine + + engine = create_engine(ln_setup.settings.instance.db, echo=False) + with engine.begin() as connection: + if ln_setup.settings.instance.dialect == "postgresql": + connection.execute("SET CONSTRAINTS ALL DEFERRED;") + for model_name in CORE_MODELS.keys(): + registry = getattr(lnschema_core.models, model_name) + import_registry(registry, directory, connection) + many_to_many_names = [field.name for field in registry._meta.many_to_many] + for many_to_many_name in many_to_many_names: + link_orm = getattr(registry, many_to_many_name).through + import_registry(link_orm, directory, connection) class Migration(migrations.Migration): @@ -61,7 +68,9 @@ class Migration(migrations.Migration): ("lnschema_core", "0001_initial_squashed_0023"), ] - operations = [migrations.RunPython(import_db, reverse_code=migrations.RunPython.noop)] + operations = [ + migrations.RunPython(import_db, reverse_code=migrations.RunPython.noop), + ] schemas = lamindb_setup.settings.instance.schema