Skip to content

Commit

Permalink
Merge pull request #384 from MadcowD/wguss/alembic
Browse files Browse the repository at this point in the history
alembic automigrations
  • Loading branch information
MadcowD authored Nov 19, 2024
2 parents 22e7b88 + db4db06 commit cb42f09
Show file tree
Hide file tree
Showing 13 changed files with 920 additions and 301 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
dataset/*
*.egg
MANIFEST

Expand Down Expand Up @@ -312,3 +313,4 @@ static/
*.db-shm
*.db-wal
.aider*
examples/logdir
624 changes: 329 additions & 295 deletions poetry.lock

Large diffs are not rendered by default.

8 changes: 5 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ sqlmodel = { version = ">=0.0.21, <0.1.0", optional = true }
fastapi = { version = "^0.111.1", optional = true }
uvicorn = { version = "^0.30.3", optional = true }

alembic = { version = "^1.14.0", optional = true }
[tool.poetry.group.dev.dependencies]
pytest = "^8.3.2"
sphinx = "<8.0.0"
Expand All @@ -57,9 +58,9 @@ sphinx-rtd-theme = "^2.0.0"
# causes poetry to mark it as optional = true (even if explicitly specified optional = false).
anthropic = ["anthropic"]
groq = ["groq"]
sqlite = [ 'sqlmodel' ]
postgres = ['sqlmodel', 'psycopg2']
studio = ['fastapi', 'uvicorn', 'sqlmodel']
sqlite = [ 'sqlmodel', 'alembic' ]
postgres = ['sqlmodel', 'psycopg2', 'alembic']
studio = ['fastapi', 'uvicorn', 'sqlmodel', 'alembic']
all = [
"anthropic",
"groq",
Expand All @@ -68,6 +69,7 @@ all = [
# allow running stduio by default
'fastapi',
'uvicorn',
'alembic'
]

[build-system]
Expand Down
48 changes: 48 additions & 0 deletions src/ell/stores/migrations/README
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# ell Database Migrations

This directory contains the database migration utilities for ell's SQL storage backend. The migration system uses Alembic to handle schema changes and version control for the database.

## Overview

The migration system handles:
- Initial schema creation
- Schema updates and changes
- Version tracking of database changes
- Automatic migration detection and application

## Key Components

- `versions/`: Contains individual migration scripts
- `env.py`: Alembic environment configuration
- `script.py.mako`: Template for generating new migrations
- `make.py`: Utility script for creating new migrations

## Usage

### Creating a New Migration
```bash
python -m ell.stores.migrations.make "your migration message"
```

This will:
1. Create a temporary SQLite database
2. Detect schema changes
3. Generate a new migration file in the versions directory

### Applying Migrations

Migrations are automatically handled by the `init_or_migrate_database()` function in `ell.stores.sql`. When initializing an ell store, it will:

1. Check for existing tables
2. Apply any pending migrations
3. Initialize new databases with the latest schema

## Migration Files

Each migration file contains:
- Unique revision ID
- Dependencies on other migrations
- `upgrade()` function for applying changes
- `downgrade()` function for reverting changes

For examples, see the existing migrations in the `versions/` directory.
80 changes: 80 additions & 0 deletions src/ell/stores/migrations/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@

from alembic import command
from alembic.config import Config
from sqlalchemy import inspect, text
from pathlib import Path

from sqlmodel import Session, SQLModel, create_engine, select
import logging

logger = logging.getLogger(__name__)

def get_alembic_config(engine_url: str) -> Config:
"""Create Alembic config programmatically"""
alembic_cfg = Config()
migrations_dir = Path(__file__).parent

alembic_cfg.set_main_option("script_location", str(migrations_dir))
alembic_cfg.set_main_option("sqlalchemy.url", str(engine_url))
alembic_cfg.set_main_option("version_table", "ell_alembic_version")
alembic_cfg.set_main_option("timezone", "UTC")

return alembic_cfg

def init_or_migrate_database(engine) -> None:
"""Initialize or migrate database with ELL schema
Handles three cases:
1. Existing database with our tables but no Alembic -> stamp with initial migration
2. Database with Alembic -> upgrade to head
3. New/empty database or database without our tables -> create tables and stamp with head
Args:
engine_or_url: SQLAlchemy engine or database URL string
"""
inspector = inspect(engine)

# Check database state
our_tables_v1 = {'serializedlmp', 'invocation', 'invocationcontents',
'invocationtrace', 'serializedlmpuses'}
existing_tables = set(inspector.get_table_names())
has_our_tables = bool(our_tables_v1 & existing_tables) # Intersection
has_alembic = 'ell_alembic_version' in existing_tables

alembic_cfg = get_alembic_config(engine.url)
try:
if has_our_tables and not has_alembic:
# Case 1: Existing database with our tables but no Alembic
# This is likely a database from version <= 0.14
logger.debug("Found existing tables but no Alembic - stamping with initial migration")

command.stamp(alembic_cfg, "4524fb60d23e")
# Verify table was created
after_tables = set(inspect(engine).get_table_names())
logger.debug(f"Tables after stamp: {after_tables}")

# Check if version table has our stamp
with engine.connect() as connection:
version_result = connection.execute(text("SELECT version_num FROM ell_alembic_version")).first()
if not version_result or version_result[0] != "4524fb60d23e":
raise RuntimeError("Failed to stamp database - version table empty or incorrect version")
logger.debug(f"Successfully stamped database with version {version_result[0]}")

has_alembic = True

if has_alembic:
# Case 2: Database has Alembic - run any pending migrations
logger.debug("Running any pending Alembic migrations")
command.upgrade(alembic_cfg, "head")

else:
# Case 3: New database or database without our tables
logger.debug("New database detected - creating schema and stamping with latest migration")
# Create all tables according to current schema
SQLModel.metadata.create_all(engine)
# Stamp with latest migration
command.stamp(alembic_cfg, "head")

except Exception as e:
logger.error(f"Failed to initialize/migrate database: {e}")
raise
94 changes: 94 additions & 0 deletions src/ell/stores/migrations/env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from logging.config import fileConfig

from sqlalchemy import engine_from_config
from sqlalchemy import pool

from alembic import context
from sqlmodel import SQLModel


# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
config = context.config

# Interpret the config file for Python logging.
# This line sets up loggers basically.
if config.config_file_name is not None:
fileConfig(config.config_file_name)


# add your model's MetaData object here
# for 'autogenerate' support
# from myapp import mymodel
# target_metadata = mymodel.Base.metadata
target_metadata = SQLModel.metadata

# other values from the config, defined by the needs of env.py,
# can be acquired:
# my_important_option = config.get_main_option("my_important_option")
# ... etc.


def run_migrations_offline() -> None:
"""Run migrations in 'offline' mode.
This configures the context with just a URL
and not an Engine, though an Engine is acceptable
here as well. By skipping the Engine creation
we don't even need a DBAPI to be available.
Calls to context.execute() here emit the given string to the
script output.
"""
#XXX: These are currently untested and unused.
url = config.get_main_option("sqlalchemy.url")
version_table = config.get_main_option("version_table", "ell_alembic_version")

context.configure(
url=url,
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
compare_type=True,
compare_server_default=True,
render_as_batch=True,
as_sql=True,
version_table=version_table
)

with context.begin_transaction():
context.run_migrations()


def run_migrations_online() -> None:
"""Run migrations in 'online' mode.
In this scenario we need to create an Engine
and associate a connection with the context.
"""

#XXX: These are currently untested and unused.
connectable = engine_from_config(
config.get_section(config.config_ini_section, {}),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)

with connectable.connect() as connection:
version_table = config.get_main_option("version_table", "ell_alembic_version")
context.configure(
connection=connection,
target_metadata=target_metadata,
version_table=version_table
)

with context.begin_transaction():
context.run_migrations()


if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()
32 changes: 32 additions & 0 deletions src/ell/stores/migrations/make.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import argparse
from sqlalchemy import create_engine
from ell.stores.migrations import get_alembic_config
from alembic import command

def main():
parser = argparse.ArgumentParser(description='Create a new database migration')
parser.add_argument('message', help='Migration message/description')

args = parser.parse_args()

# Create temporary directory for SQLite database
import tempfile
from pathlib import Path

with tempfile.TemporaryDirectory() as tmpdir:
db_path = Path(tmpdir) / "temp.db"
engine = create_engine(f"sqlite:///{db_path}")
alembic_cfg = get_alembic_config(str(engine.url))

# First, upgrade to head to get to latest migration state
command.upgrade(alembic_cfg, "head")

# Now generate new migration
command.revision(alembic_cfg,
message=args.message,
autogenerate=True)

print(f"✨ Created new migration with message: {args.message}")

if __name__ == '__main__':
main()
27 changes: 27 additions & 0 deletions src/ell/stores/migrations/script.py.mako
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
"""${message}

Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}

"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa
import sqlmodel
${imports if imports else ""}

# revision identifiers, used by Alembic.
revision: str = ${repr(up_revision)}
down_revision: Union[str, None] = ${repr(down_revision)}
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}


def upgrade() -> None:
${upgrades if upgrades else "pass"}


def downgrade() -> None:
${downgrades if downgrades else "pass"}
Loading

0 comments on commit cb42f09

Please sign in to comment.