Skip to content
This repository has been archived by the owner on Jan 2, 2025. It is now read-only.

Commit

Permalink
⚡️ Speed up migration
Browse files Browse the repository at this point in the history
  • Loading branch information
falexwolf committed Oct 16, 2023
1 parent 8ef5aaa commit bb1202c
Showing 1 changed file with 23 additions and 14 deletions.
37 changes: 23 additions & 14 deletions lnschema_core/migrations/0024_import_legacy_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,28 +9,29 @@
import lnschema_core.models

CORE_MODELS = {
"Dataset": False,
"File": False,
# sorted by topology of foreign keys
"User": False,
"Transform": False,
"Run": True,
"User": False,
"Storage": False,
"Modality": False,
"Feature": False,
"FeatureSet": False,
"Modality": False,
"ULabel": False,
"File": False,
"Dataset": False,
}


def import_registry(registry, directory):
def import_registry(registry, directory, connection):
import pandas as pd

table_name = registry._meta.db_table
df = pd.read_parquet(directory / f"{table_name}.parquet")
old_foreign_key_columns = [column for column in df.columns if column.endswith("_old")]
for column in old_foreign_key_columns:
df.drop(column, axis=1, inplace=True)
df.to_sql(table_name, ln_setup.settings.instance.db, if_exists="append", index=False)
df.to_sql(table_name, connection, if_exists="append", index=False)


def import_db(apps, schema_editor):
Expand All @@ -47,21 +48,29 @@ def import_db(apps, schema_editor):
" instance name, schema, db & storage settings; you can see them using: lamin info"
)
raise SystemExit
for model_name in CORE_MODELS.keys():
registry = getattr(lnschema_core.models, model_name)
import_registry(registry, directory)
many_to_many_names = [field.name for field in registry._meta.many_to_many]
for many_to_many_name in many_to_many_names:
link_orm = getattr(registry, many_to_many_name).through
import_registry(link_orm, directory)
from sqlalchemy import create_engine

engine = create_engine(ln_setup.settings.instance.db, echo=False)
with engine.begin() as connection:
if ln_setup.settings.instance.dialect == "postgresql":
connection.execute("SET CONSTRAINTS ALL DEFERRED;")
for model_name in CORE_MODELS.keys():
registry = getattr(lnschema_core.models, model_name)
import_registry(registry, directory, connection)
many_to_many_names = [field.name for field in registry._meta.many_to_many]
for many_to_many_name in many_to_many_names:
link_orm = getattr(registry, many_to_many_name).through
import_registry(link_orm, directory, connection)


class Migration(migrations.Migration):
dependencies = [
("lnschema_core", "0001_initial_squashed_0023"),
]

operations = [migrations.RunPython(import_db, reverse_code=migrations.RunPython.noop)]
operations = [
migrations.RunPython(import_db, reverse_code=migrations.RunPython.noop),
]


schemas = lamindb_setup.settings.instance.schema
Expand Down

0 comments on commit bb1202c

Please sign in to comment.