Skip to content

Commit

Permalink
Update out of date dbt example (#815)
Browse files Browse the repository at this point in the history
  • Loading branch information
kramstrom authored Jul 24, 2024
1 parent 82a35b6 commit cde8d29
Showing 1 changed file with 34 additions and 11 deletions.
45 changes: 34 additions & 11 deletions 10_integrations/dbt/dbt_duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@
dbt_image = (
modal.Image.debian_slim()
.pip_install(
"boto3",
"dbt-duckdb>=1.5.1",
"pandas",
"pyarrow",
"boto3~=1.34",
"dbt-duckdb~=1.8.1",
"pandas~=2.2.2",
"pyarrow~=16.1.0",
)
.env(
{
Expand Down Expand Up @@ -66,9 +66,7 @@
local_path=LOCAL_DBT_PROJECT / "profiles.yml",
remote_path=Path(PROFILES_PATH, "profiles.yml"),
)
dbt_target = modal.NetworkFileSystem.from_name(
"dbt-target", create_if_missing=True
)
dbt_target = modal.Volume.from_name("dbt-target-vol", create_if_missing=True)
# Create this secret using the "AWS" template at https://modal.com/secrets/create.
# Be sure that the AWS user you provide credentials for has permission to
# create S3 buckets and read/write data from them.
Expand Down Expand Up @@ -104,6 +102,8 @@
# we have this `create_source_data` function which creates an AWS S3 bucket and
# populates it with .parquet files based of CSV data in the seeds/ directory.
#
# `modal run dbt_duckdb.py::create_source_data`
#
# This is not the typical way that seeds/ data is used, but it is fine for this
# demonstration example. See https://docs.getdbt.com/docs/build/seeds for more info.

Expand Down Expand Up @@ -135,16 +135,18 @@ def create_source_data():
# up-to-date. Currently, the source data for this warehouse is static, so the updates
# don't really update anything, just re-build. But this example could be extended
# to have sources which continually provide new data across time.
# It will also generate the dbt docs daily to keep them fresh.


@app.function(
schedule=modal.Period(days=1),
secrets=[s3_secret],
mounts=[dbt_project, dbt_profiles],
network_file_systems={TARGET_PATH: dbt_target},
volumes={TARGET_PATH: dbt_target},
)
def daily_build() -> None:
run("build")
run.remote("build")
run.remote("docs generate")


# `modal run dbt_duckdb.py::run --command run`
Expand Down Expand Up @@ -179,12 +181,12 @@ def daily_build() -> None:
@app.function(
secrets=[s3_secret],
mounts=[dbt_project, dbt_profiles],
network_file_systems={TARGET_PATH: dbt_target},
volumes={TARGET_PATH: dbt_target},
)
def run(command: str) -> None:
from dbt.cli.main import dbtRunner

res = dbtRunner().invoke([command])
res = dbtRunner().invoke(command.split(" "))
if res.exception:
print(res.exception)

Expand All @@ -195,3 +197,24 @@ def run(command: str) -> None:
# After running the 'run' command and seeing it succeed, check what's contained
# under the bucket's `out/` key prefix. You'll see that DBT has run the transformations
# defined in `sample_proj_duckdb_s3/models/` and produced output .parquet files.


# You can also serve the dbt docs generated from the daily build and access them through modal
# Just look for the url in your deployment
# Created web function serve_dbt_docs => <output-url>


@app.function(volumes={TARGET_PATH: dbt_target})
@modal.asgi_app()
def serve_dbt_docs():
import fastapi
from fastapi.staticfiles import StaticFiles

web_app = fastapi.FastAPI()
web_app.mount(
"/",
StaticFiles(directory=TARGET_PATH, html=True),
name="static",
)

return web_app

0 comments on commit cde8d29

Please sign in to comment.