Skip to content

Commit

Permalink
feat: postgres => sqlite
Browse files Browse the repository at this point in the history
  • Loading branch information
fspoettel committed Feb 8, 2023
1 parent e995b1f commit 18921d3
Show file tree
Hide file tree
Showing 16 changed files with 148 additions and 159 deletions.
8 changes: 4 additions & 4 deletions .env.test
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
DATABASE_URI="postgresql://postgres:postgres@localhost:5432/whisperbox_test"
ENVIRONMENT="development"
API_SECRET="foo"
BROKER_URL="redis://localhost:6379/0"
DATABASE_URI="sqlite:///memory"
ENVIRONMENT="test"
API_SECRET="test_secret"
BROKER_URL="memory://"
22 changes: 11 additions & 11 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@ jobs:
- isort --check app
- flake8 app
- mypy app
# test:
# runs-on: ubuntu-latest
# name: Test
# steps:
# - uses: actions/checkout@v3
# - uses: actions/setup-python@v4
# with:
# python-version: '3.11'
# cache: 'pip'
# - pip install -e .[test]
# - pytest
test:
runs-on: ubuntu-latest
name: Test
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: '3.11'
cache: 'pip'
- pip install -e .[test]
- pytest
11 changes: 8 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
clean:
docker-compose -f docker/dev.docker-compose.yml down --volumes --remove-orphans
docker-compose -f docker/dev/docker-compose.yml down --volumes --remove-orphans

dev:
docker-compose -f docker/dev.docker-compose.yml build --progress tty
docker-compose -f docker/dev.docker-compose.yml up --remove-orphans
docker-compose -f docker/dev/docker-compose.yml build --progress tty
docker-compose -f docker/dev/docker-compose.yml up --remove-orphans

fmt:
black app
Expand All @@ -14,3 +15,7 @@ lint:

test:
pytest

run:
docker-compose -f docker/prod/docker-compose.yml build --progress tty
docker-compose -f docker/prod/docker-compose.yml up --remove-orphans
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
"""add_job_tables
Revision ID: 426b6bdc3360
Revision ID: dc8582aea0bc
Revises:
Create Date: 2023-01-27 17:55:21.758828
Create Date: 2023-02-08 12:12:00.808816
"""
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "426b6bdc3360"
revision = "dc8582aea0bc"
down_revision = None
branch_labels = None
depends_on = None
Expand All @@ -30,23 +29,28 @@ def upgrade() -> None:
sa.Column("meta", sa.JSON(none_as_null=True), nullable=True),
sa.Column(
"type",
sa.Enum("transcript", "translation", "language_detection", name="jobtype"),
sa.Enum(
"transcript",
"translation",
"language_detection",
name="jobtype",
),
nullable=False,
),
sa.Column(
"created_at",
sa.DateTime(),
server_default=sa.text("now()"),
server_default=sa.text("(CURRENT_TIMESTAMP)"),
nullable=False,
),
sa.Column("updated_at", sa.DateTime(), nullable=True),
sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("id", sa.VARCHAR(length=36), nullable=False),
sa.PrimaryKeyConstraint("id"),
)
op.create_index(op.f("ix_jobs_id"), "jobs", ["id"], unique=False)
op.create_table(
"artifacts",
sa.Column("job_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("job_id", sa.VARCHAR(length=36), nullable=False),
sa.Column("data", sa.JSON(none_as_null=True), nullable=True),
sa.Column(
"type",
Expand All @@ -56,11 +60,11 @@ def upgrade() -> None:
sa.Column(
"created_at",
sa.DateTime(),
server_default=sa.text("now()"),
server_default=sa.text("(CURRENT_TIMESTAMP)"),
nullable=False,
),
sa.Column("updated_at", sa.DateTime(), nullable=True),
sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("id", sa.VARCHAR(length=36), nullable=False),
sa.ForeignKeyConstraint(["job_id"], ["jobs.id"], ondelete="CASCADE"),
sa.PrimaryKeyConstraint("id"),
)
Expand Down
2 changes: 1 addition & 1 deletion app/shared/db/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from app.shared.config import settings

engine = create_engine(settings.DATABASE_URI)
engine = create_engine(settings.DATABASE_URI, connect_args={"check_same_thread": False})

SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)

Expand Down
8 changes: 5 additions & 3 deletions app/shared/db/models.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import uuid
from typing import Optional

from sqlalchemy import JSON, Column, DateTime, Enum, ForeignKey, String, func
from sqlalchemy import JSON, VARCHAR, Column, DateTime, Enum, ForeignKey, String, func
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.ext.declarative import declarative_base, declared_attr
from sqlalchemy.orm import Mapped, declarative_mixin # type: ignore
Expand All @@ -26,7 +26,7 @@ def updated_at(cls) -> Mapped[Optional[DateTime]]:
@declared_attr
def id(cls) -> Mapped[UUID]:
return Column(
UUID(as_uuid=True), primary_key=True, index=True, default=uuid.uuid4
VARCHAR(36), primary_key=True, index=True, default=lambda: str(uuid.uuid4())
)


Expand All @@ -44,7 +44,9 @@ class Artifact(Base, WithStandardFields):
__tablename__ = "artifacts"

job_id = Column(
UUID(as_uuid=True), ForeignKey("jobs.id", ondelete="CASCADE"), nullable=False
VARCHAR(36),
ForeignKey("jobs.id", ondelete="CASCADE"),
nullable=False,
)

data = Column(JSON(none_as_null=True))
Expand Down
3 changes: 1 addition & 2 deletions app/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,10 @@
import app.shared.db.dtos as dtos
import app.shared.db.models as models
from app.shared.db.dtos import JobStatus, JobType
from app.web.main import app
from app.web.main import app, celery

client = TestClient(app)


@pytest.fixture(name="mock_job", scope="function", autouse=False)
def mock_job(db_session: Session) -> models.Job:
job = models.Job(
Expand Down
21 changes: 13 additions & 8 deletions app/web/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def get_transcripts(
def get_transcript(
id: UUID = Path(), session: Session = Depends(get_session)
) -> Optional[models.Job]:
job = session.query(models.Job).filter(models.Job.id == id).one_or_none()
job = session.query(models.Job).filter(models.Job.id == str(id)).one_or_none()
if not job:
raise HTTPException(status_code=404)
return job
Expand All @@ -92,7 +92,7 @@ def get_artifacts_for_job(
id: UUID = Path(), session: Session = Depends(get_session)
) -> List[models.Artifact]:
artifacts = (
session.query(models.Artifact).filter(models.Artifact.job_id == id)
session.query(models.Artifact).filter(models.Artifact.job_id == str(id))
).all()

if not len(artifacts):
Expand All @@ -105,14 +105,14 @@ def get_artifacts_for_job(
def delete_transcript(
id: UUID = Path(), session: Session = Depends(get_session)
) -> None:
session.query(models.Job).filter(models.Job.id == id).delete()
session.query(models.Job).filter(models.Job.id == str(id)).delete()
return None


app.include_router(api_router)

# TODO:
# we could use `acks_late` to handle this scenario within celery itself.

# TODO: we could use `acks_late` to handle this scenario within celery itself.
# the reason this does not work well in our case is that `visibility_timeout`
# needs to be very high since whisper workers can be long running.
# doing this application-side bears the risk of poison pilling the worker though,
Expand All @@ -123,10 +123,15 @@ def on_startup() -> None:

jobs = (
session.query(models.Job)
.filter(or_(models.Job.status == dtos.JobStatus.processing, models.Job.status == dtos.JobStatus.create))
.order_by(models.Job.created_at)
.filter(
or_(
models.Job.status == dtos.JobStatus.processing,
models.Job.status == dtos.JobStatus.create,
)
)
.order_by(models.Job.created_at)
).all()

logger.info(f"Re-queueing {len(jobs)} jobs.")
logger.info(f"Requeueing {len(jobs)} jobs.")
for job in jobs:
queue_task(job)
9 changes: 0 additions & 9 deletions app/web/start.sh

This file was deleted.

15 changes: 7 additions & 8 deletions app/worker/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,16 @@
celery = get_celery_binding()


@celery.task(
bind=True,
soft_time_limit=2 * 60 * 60 # TODO: make configurable
)
@celery.task(bind=True, soft_time_limit=2 * 60 * 60) # TODO: make configurable
def transcribe(self: Task, job_id: UUID) -> None:
try:
db: Session = SessionLocal()
job = db.query(models.Job).filter(models.Job.id == job_id).one()

if job.status == dtos.JobStatus.error or job.status == dtos.JobStatus.success:
logger.warn("[{job.id}]: Received job that has already been processed, abort.")
logger.warn(
"[{job.id}]: Received job that has already been processed, abort."
)
return

job.meta = {"task_id": self.request.id}
Expand Down Expand Up @@ -51,7 +50,7 @@ def transcribe(self: Task, job_id: UUID) -> None:
result = strategy.detect_language()

artifact = models.Artifact(
job_id=job.id, data=result, type=dtos.ArtifactType.raw_transcript
job_id=str(job.id), data=result, type=dtos.ArtifactType.raw_transcript
)

db.add(artifact)
Expand All @@ -64,9 +63,9 @@ def transcribe(self: Task, job_id: UUID) -> None:
logger.info(f"[{job.id}]: set task to status success.")
except Exception as e:
if job and db:
job.meta = { **job.meta, "error": str(e) }
job.meta = {**job.meta.__dict__, "error": str(e)}
job.status = dtos.JobStatus.error
db.commit()
raise(e)
raise (e)
finally:
db.close()
91 changes: 0 additions & 91 deletions docker/dev.docker-compose.yml

This file was deleted.

Loading

0 comments on commit 18921d3

Please sign in to comment.