diff --git a/.github/workflows/preview-link.yml b/.github/workflows/preview-link.yml new file mode 100644 index 00000000000..f128f44b8cd --- /dev/null +++ b/.github/workflows/preview-link.yml @@ -0,0 +1,169 @@ +name: Vercel deployment preview link generator + +on: + pull_request: + types: [opened, synchronize] + paths: + - 'website/docs/docs/**' + - 'website/docs/best-practices/**' + - 'website/docs/guides/**' + - 'website/docs/faqs/**' + - 'website/docs/reference/**' + +permissions: + contents: write + pull-requests: write + +jobs: + update-pr-description: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Install necessary tools + run: | + sudo apt-get update + sudo apt-get install -y jq curl + + - name: Generate Vercel deployment URL + id: vercel_url + run: | + # Get the branch name + BRANCH_NAME="${{ github.head_ref }}" + + # Convert to lowercase + BRANCH_NAME_LOWER=$(echo "$BRANCH_NAME" | tr '[:upper:]' '[:lower:]') + + # Replace non-alphanumeric characters with hyphens + BRANCH_NAME_SANITIZED=$(echo "$BRANCH_NAME_LOWER" | sed 's/[^a-z0-9]/-/g') + + # Construct the deployment URL + DEPLOYMENT_URL="https://docs-getdbt-com-git-${BRANCH_NAME_SANITIZED}-dbt-labs.vercel.app" + + echo "deployment_url=$DEPLOYMENT_URL" >> $GITHUB_OUTPUT + + - name: Wait for deployment to be accessible + id: wait_for_deployment + run: | + DEPLOYMENT_URL="${{ steps.vercel_url.outputs.deployment_url }}" + echo "Waiting for deployment at $DEPLOYMENT_URL to become accessible..." + + MAX_ATTEMPTS=60 # Wait up to 10 minutes + SLEEP_TIME=10 # Check every 10 seconds + ATTEMPTS=0 + + while [ $ATTEMPTS -lt $MAX_ATTEMPTS ]; do + STATUS_CODE=$(curl -s -o /dev/null -w "%{http_code}" "$DEPLOYMENT_URL") + if [ "$STATUS_CODE" -eq 200 ]; then + echo "Deployment is accessible." + break + else + echo "Deployment not yet accessible (status code: $STATUS_CODE). Waiting..." + sleep $SLEEP_TIME + ATTEMPTS=$((ATTEMPTS + 1)) + fi + done + + if [ $ATTEMPTS -eq $MAX_ATTEMPTS ]; then + echo "Deployment did not become accessible within the expected time." + exit 1 + fi + + - name: Get changed files + id: files + run: | + CHANGED_FILES=$(git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.sha }} | grep -E '^website/docs/(docs|best-practices|guides|faqs|reference)/.*\.md$' || true) + if [ -z "$CHANGED_FILES" ]; then + echo "No documentation files were changed." + echo "changed_files=" >> $GITHUB_OUTPUT + else + CHANGED_FILES=$(echo "$CHANGED_FILES" | tr '\n' ' ') + echo "changed_files=$CHANGED_FILES" >> $GITHUB_OUTPUT + fi + + - name: Generate file preview links + id: links + run: | + DEPLOYMENT_URL="${{ steps.vercel_url.outputs.deployment_url }}" + CHANGED_FILES="${{ steps.files.outputs.changed_files }}" + + if [ -z "$CHANGED_FILES" ]; then + echo "No changed files found in the specified directories." + LINKS="No documentation files were changed." + else + LINKS="" + # Convert CHANGED_FILES back to newline-separated for processing + CHANGED_FILES=$(echo "$CHANGED_FILES" | tr ' ' '\n') + for FILE in $CHANGED_FILES; do + # Remove 'website/docs/' prefix + FILE_PATH="${FILE#website/docs/}" + # Remove the .md extension + FILE_PATH="${FILE_PATH%.md}" + + # Construct the full URL + FULL_URL="$DEPLOYMENT_URL/$FILE_PATH" + LINKS="$LINKS\n- $FULL_URL" + done + fi + + # Properly set the multi-line output + echo "links<> $GITHUB_OUTPUT + echo -e "$LINKS" >> $GITHUB_OUTPUT + echo "EOF" >> $GITHUB_OUTPUT + + - name: Update PR description with deployment links + uses: actions/github-script@v6 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const prNumber = context.issue.number; + + // Fetch the current PR description + const { data: pullRequest } = await github.rest.pulls.get({ + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: prNumber, + }); + + let body = pullRequest.body || ''; + + // Define the markers + const startMarker = ''; + const endMarker = ''; + + // Get the deployment URL and links from environment variables + const deploymentUrl = process.env.DEPLOYMENT_URL; + const links = process.env.LINKS; + + // Build the deployment content without leading whitespace + const deploymentContent = [ + `${startMarker}`, + '---', + '🚀 Deployment available! Here are the direct links to the updated files:', + '', + `${links}`, + '', + `${endMarker}` + ].join('\n'); + + // Remove existing deployment content between markers + const regex = new RegExp(`${startMarker}[\\s\\S]*?${endMarker}`, 'g'); + body = body.replace(regex, '').trim(); + + // Append the new deployment content + body = `${body}\n\n${deploymentContent}`; + + // Update the PR description + await github.rest.pulls.update({ + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: prNumber, + body: body, + }); + env: + DEPLOYMENT_URL: ${{ steps.vercel_url.outputs.deployment_url }} + LINKS: ${{ steps.links.outputs.links }} diff --git a/.github/workflows/vale.yml b/.github/workflows/vale.yml new file mode 100644 index 00000000000..5feaaa12a20 --- /dev/null +++ b/.github/workflows/vale.yml @@ -0,0 +1,80 @@ +name: Vale linting + +on: + pull_request: + types: [opened, synchronize, reopened] + paths: + - 'website/docs/**/*' + - 'website/blog/**/*' + - 'website/**/*' + +jobs: + vale: + name: Vale linting + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v3 + with: + fetch-depth: 1 + + - name: List repository contents + run: | + pwd + ls -R + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + + - name: Install Vale + run: pip install vale==2.27.0 # Install a stable version of Vale + + - name: Get changed files + id: changed-files + uses: tj-actions/changed-files@v34 + with: + files: | + website/**/*.md + separator: ' ' + + - name: Debugging - Print changed files + if: ${{ steps.changed-files.outputs.any_changed == 'true' }} + run: | + echo "Changed files:" + echo "${{ steps.changed-files.outputs.all_changed_and_modified_files }}" + + - name: Confirm files exist + if: ${{ steps.changed-files.outputs.any_changed == 'true' }} + run: | + echo "Checking if files exist..." + for file in ${{ steps.changed-files.outputs.all_changed_and_modified_files }}; do + if [ -f "$file" ]; then + echo "Found: $file" + else + echo "File not found: $file" + exit 1 + fi + done + + - name: Run vale + if: ${{ steps.changed-files.outputs.any_changed == 'true' }} + uses: errata-ai/vale-action@reviewdog + with: + token: ${{ secrets.GITHUB_TOKEN }} + reporter: github-check + files: ${{ steps.changed-files.outputs.all_changed_and_modified_files }} + separator: ' ' + version: '2.27.0' + +# - name: Post summary comment +# if: ${{ steps.changed-files.outputs.any_changed == 'true' }} +# run: | +# COMMENT="❗️Oh no, some Vale linting found issues! Please check the **Files change** tab for detailed results and make the necessary updates." +# COMMENT+=$'\n' +# COMMENT+=$'\n\n' +# COMMENT+="➡️ Link to detailed report: [Files changed](${{ github.event.pull_request.html_url }}/files)" +# gh pr comment ${{ github.event.pull_request.number }} --body "$COMMENT" +# env: +# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.hyperlint/config.yaml b/.hyperlint/config.yaml new file mode 100644 index 00000000000..03082114ae1 --- /dev/null +++ b/.hyperlint/config.yaml @@ -0,0 +1,10 @@ +content_dir: /docs +authorized_users: + - mirnawong1 + - matthewshaver + - nghi-ly + - runleonarun + - nataliefiann + +vale: + enabled: true diff --git a/.vale.ini b/.vale.ini new file mode 100644 index 00000000000..58aff923afe --- /dev/null +++ b/.vale.ini @@ -0,0 +1,7 @@ +StylesPath = styles +MinAlertLevel = warning + +Vocab = EN + +[*.md] +BasedOnStyles = custom diff --git a/contributing/adding-page-components.md b/contributing/adding-page-components.md index 68294e7d149..7a92d627995 100644 --- a/contributing/adding-page-components.md +++ b/contributing/adding-page-components.md @@ -4,7 +4,7 @@ You can use the following components to provide code snippets for each supported Identify code by labeling with the warehouse names: -```code +```sql
@@ -32,7 +32,7 @@ You can use the following components to provide code snippets in a tabbed view. Identify code and code files by labeling with the component they are describing: -```code +```sql ` tag. This allows you to share a link to a page with a pre-selected tab so that clicking on a tab creates a unique hyperlink for that tab. However, this feature doesn't provide an anchor link, which means the browser won't scroll to the tab. Additionally, you can define the search parameter name to use. If the tabs content is under a header, you can alternatively link to the header itself, instaed of the `queryString` prop. +You can use the [queryString](https://docusaurus.io/docs/next/markdown-features/tabs?current-os=ios#query-string) prop in the `` tag. This allows you to share a link to a page with a pre-selected tab so that clicking on a tab creates a unique hyperlink for that tab. However, this feature doesn't provide an anchor link, which means the browser won't scroll to the tab. Additionally, you can define the search parameter name to use. If the tabs content is under a header, you can alternatively link to the header itself, instead of the `queryString` prop. In the following example, clicking a tab adds a search parameter to the end of the URL: `?current-os=android or ?current-os=ios`. -``` +```sql Android @@ -105,3 +105,48 @@ In the following example, clicking a tab adds a search parameter to the end of t ``` + +## Markdown Links + +Refer to the Links section of the Content Style Guide to read about how you can use links in the dbt product documentation. + +## Collapsible header + + +
+

Shows and hides children elements

+
+
+ +```markdown + +
+

Shows and hides children elements

+
+
+
+``` + +## File component + +```yml + + +```yaml +password: hunter2 +``` + +``` + +## LoomVideo component + +
{``}
+ + + +## YoutubeVideo component + +
{``}
+ + + diff --git a/styles/Vocab/EN/accept.txt b/styles/Vocab/EN/accept.txt new file mode 100644 index 00000000000..e673e2ef83d --- /dev/null +++ b/styles/Vocab/EN/accept.txt @@ -0,0 +1,67 @@ +dbt Cloud +dbt Core +dbt Semantic Layer +dbt Explorer +dbt +dbt-tonic +dbtonic +IDE +CLI +Config +info +docs +yaml +YAML +SQL +bash +shell +MetricFlow +jinja +jinja2 +sqlmesh +Snowflake +Databricks +Fabric +Redshift +Azure +DevOps +Athena +Amazon +UI +CSV +S3 +SCD +repo +dbt_project.yml +boolean +defaultValue= +DWH +DWUs +shoutout +ADF +BQ +gcloud +MSFT +DDL +APIs +API +SSIS +PBI +PowerBI +datetime +PySpark +:::caution +:::note +:::info +:::tip +:::warning +\<[^>]+\> +\b[A-Z]{2,}(?:/[A-Z]{2,})?\b +\w+-\w+ +\w+/\w+ +n/a +N/A +\ diff --git a/styles/custom/LatinAbbreviations.yml b/styles/custom/LatinAbbreviations.yml new file mode 100644 index 00000000000..44a3c9d6e8c --- /dev/null +++ b/styles/custom/LatinAbbreviations.yml @@ -0,0 +1,15 @@ +# LatinAbbreviations.yml +extends: substitution +message: "Avoid Latin abbreviations: '%s'. Consider using '%s' instead." +level: warning + +swap: + 'e.g.': 'for example' + 'e.g': 'for example' + 'eg': 'for example' + 'i.e.': 'that is' + 'i.e': 'that is' + 'etc.': 'and so on' + 'etc': 'and so on' + 'N.B.': 'Note' + 'NB': 'Note' diff --git a/styles/custom/Repitition.yml b/styles/custom/Repitition.yml new file mode 100644 index 00000000000..4cd620146cf --- /dev/null +++ b/styles/custom/Repitition.yml @@ -0,0 +1,6 @@ +extends: repetition +message: "'%s' is repeated!" +level: warning +alpha: true +tokens: + - '[^\s]+' diff --git a/styles/custom/SentenceCaseHeaders.yml b/styles/custom/SentenceCaseHeaders.yml new file mode 100644 index 00000000000..d1d6cd97c67 --- /dev/null +++ b/styles/custom/SentenceCaseHeaders.yml @@ -0,0 +1,34 @@ +extends: capitalization +message: "'%s' should use sentence-style capitalization. Try '%s' instead." +level: warning +scope: heading +match: $sentence # Enforces sentence-style capitalization +indicators: + - ":" +exceptions: + - '\bdbt\b' + - '\bdbt\s+Cloud\b' + - '\bdbt\s+Core\b' + - '\bdbt\s+Cloud\s+CLI\b' + - Snowflake + - Databricks + - Azure + - GCP + - AWS + - SQL + - CLI + - API + - YAML + - JSON + - HTML + - Redshift + - Google + - BigQuery + - SnowSQL + - Snowsight + - Snowpark + - Fabric + - Microsoft + - Postgres + - Explorer + - IDE diff --git a/styles/custom/Typos.yml b/styles/custom/Typos.yml new file mode 100644 index 00000000000..456517950a9 --- /dev/null +++ b/styles/custom/Typos.yml @@ -0,0 +1,39 @@ +extends: spelling + +message: "Oops there's a typo -- did you really mean '%s'? " +level: warning + +action: + name: suggest + params: + - spellings + +custom: true +filters: + - '\bdbt\b' + - '\bdbt\s+Cloud\b' + - '\bdbt\s+Core\b' + - '\bdbt\s+Cloud\s+CLI\b' + - '\bdbt\s+.*?\b' + - '<[^>]+>' # Ignore all HTML-like components starting with < and ending with > + - '<[^>]+>.*<\/[^>]+>' + +--- + +extends: existence + +message: "Ignore specific patterns" +level: skip +tokens: + - '\bdbt\b' + - '\bdbt\s+Cloud\b' + - '\bdbt\s+Core\b' + - '\bdbt\s+Cloud\s+CLI\b' + - '\bdbt\s+.*?\b' + - '<[^>]+>' # Ignore all HTML-like components starting with < and ending with > + - '<[^>]+>.*<\/[^>]+>' + - '\w+-\w+' + - '\w+/\w+' + - '\w+/\w+|\w+-\w+|n/a' + - 'n/a' + - 'N/A' diff --git a/styles/custom/UIElements.yml b/styles/custom/UIElements.yml new file mode 100644 index 00000000000..f78a15af4b4 --- /dev/null +++ b/styles/custom/UIElements.yml @@ -0,0 +1,17 @@ +# styles/custom/BoldUIElements.yml +extends: existence +message: "UI elements like '%s' should be bold." +level: warning +tokens: + # Match UI elements that are not bolded (i.e., not within **), but exclude those starting a sentence or following a list number + - '(? + ## What is Iceberg? To have this conversation, we need to start with the same foundational understanding of Iceberg. Apache Iceberg is a high-performance open table format developed for modern data lakes. It was designed for large-scale datasets, and within the project, there are many ways to interact with it. When people talk about Iceberg, it often means multiple components including but not limited to: diff --git a/website/blog/2024-10-05-snowflake-feature-store.md b/website/blog/2024-10-05-snowflake-feature-store.md index fb62955d4a4..cf5c55be1b5 100644 --- a/website/blog/2024-10-05-snowflake-feature-store.md +++ b/website/blog/2024-10-05-snowflake-feature-store.md @@ -13,6 +13,8 @@ Flying home into Detroit this past week working on this blog post on a plane and Think of the manufacturing materials needed as our data and the building of the bridge as the building of our ML models. There are thousands of engineers and construction workers taking materials from all over the world, pulling only the specific pieces needed for each part of the project. However, to make this project truly work at this scale, we need the warehousing and logistics to ensure that each load of concrete rebar and steel meets the standards for quality and safety needed and is available to the right people at the right time — as even a single fault can have catastrophic consequences or cause serious delays in project success. This warehouse and the associated logistics play the role of the feature store, ensuring that data is delivered consistently where and when it is needed to train and run ML models. + + ## What is a feature? A feature is a transformed or enriched data that serves as an input into a machine learning model to make predictions. In machine learning, a data scientist derives features from various data sources to build a model that makes predictions based on historical data. To capture the value from this model, the enterprise must operationalize the data pipeline, ensuring that the features being used in production at inference time match those being used in training and development. diff --git a/website/blog/ctas.yml b/website/blog/ctas.yml index ac56d4cc749..1f9b13afa7b 100644 --- a/website/blog/ctas.yml +++ b/website/blog/ctas.yml @@ -25,3 +25,8 @@ subheader: Coalesce is the premiere analytics engineering conference! Sign up now for innovation, collaboration, and inspiration. Don't miss out! button_text: Register now url: https://coalesce.getdbt.com/register +- name: coalesce_2024_catchup + header: Missed Coalesce 2024? + subheader: Catch up on Coalesce 2024 and register to access a select number of on-demand sessions. + button_text: Register and watch + url: https://coalesce.getdbt.com/register/online diff --git a/website/blog/metadata.yml b/website/blog/metadata.yml index d0009fd62c4..8b53a7a2a04 100644 --- a/website/blog/metadata.yml +++ b/website/blog/metadata.yml @@ -2,7 +2,7 @@ featured_image: "" # This CTA lives in right sidebar on blog index -featured_cta: "coalesce_2024_signup" +featured_cta: "coalesce_2024_catchup" # Show or hide hero title, description, cta from blog index show_title: true diff --git a/website/docs/docs/build/data-tests.md b/website/docs/docs/build/data-tests.md index ae3ac9225db..b4f25a3d111 100644 --- a/website/docs/docs/build/data-tests.md +++ b/website/docs/docs/build/data-tests.md @@ -66,9 +66,25 @@ having total_amount < 0 -The name of this test is the name of the file: `assert_total_payment_amount_is_positive`. Simple enough. +The name of this test is the name of the file: `assert_total_payment_amount_is_positive`. -Singular data tests are easy to write—so easy that you may find yourself writing the same basic structure over and over, only changing the name of a column or model. By that point, the test isn't so singular! In that case, we recommend... +To add a data test to your project, add a `.yml` file to your `tests` directory, for example, `tests/schema.yml` with the following content: + + + +```yaml +version: 2 +data_tests: + - name: assert_total_payment_amount_is_positive + description: > + Refunds have a negative amount, so the total amount should always be >= 0. + Therefore return records where total amount < 0 to make the test fail. + +``` + + + +Singular data tests are so easy that you may find yourself writing the same basic structure repeatedly, only changing the name of a column or model. By that point, the test isn't so singular! In that case, we recommend generic data tests. ## Generic data tests Certain data tests are generic: they can be reused over and over again. A generic data test is defined in a `test` block, which contains a parametrized query and accepts arguments. It might look like: diff --git a/website/docs/docs/build/environment-variables.md b/website/docs/docs/build/environment-variables.md index 955bb79ed22..c26425401a7 100644 --- a/website/docs/docs/build/environment-variables.md +++ b/website/docs/docs/build/environment-variables.md @@ -101,7 +101,7 @@ dbt Cloud has a number of pre-defined variables built in. Variables are set auto The following environment variable is set automatically for the dbt Cloud IDE: -- `DBT_CLOUD_GIT_BRANCH`: Provides the development Git branch name in the [dbt Cloud IDE](/docs/cloud/dbt-cloud-ide/develop-in-the-cloud). +- `DBT_CLOUD_GIT_BRANCH` — Provides the development Git branch name in the [dbt Cloud IDE](/docs/cloud/dbt-cloud-ide/develop-in-the-cloud). - Available in dbt v 1.6 and later. - The variable changes when the branch is changed. - Doesn't require restarting the IDE after a branch change. @@ -113,26 +113,26 @@ Use case — This is useful in cases where you want to dynamically use the G The following environment variables are set automatically: -- `DBT_ENV`: This key is reserved for the dbt Cloud application and will always resolve to 'prod'. For deployment runs only. -- `DBT_CLOUD_ENVIRONMENT_NAME`: The name of the dbt Cloud environment in which `dbt` is running. -- `DBT_CLOUD_ENVIRONMENT_TYPE`: The type of dbt Cloud environment in which `dbt` is running. The valid values are `development` or `deployment`. +- `DBT_ENV` — This key is reserved for the dbt Cloud application and will always resolve to 'prod'. For deployment runs only. +- `DBT_CLOUD_ENVIRONMENT_NAME` — The name of the dbt Cloud environment in which `dbt` is running. +- `DBT_CLOUD_ENVIRONMENT_TYPE` — The type of dbt Cloud environment in which `dbt` is running. The valid values are `development` or `deployment`. #### Run details -- `DBT_CLOUD_PROJECT_ID`: The ID of the dbt Cloud Project for this run -- `DBT_CLOUD_JOB_ID`: The ID of the dbt Cloud Job for this run -- `DBT_CLOUD_RUN_ID`: The ID of this particular run -- `DBT_CLOUD_RUN_REASON_CATEGORY`: The "category" of the trigger for this run (one of: `scheduled`, `github_pull_request`, `gitlab_merge_request`, `azure_pull_request`, `other`) -- `DBT_CLOUD_RUN_REASON`: The specific trigger for this run (eg. `Scheduled`, `Kicked off by `, or custom via `API`) -- `DBT_CLOUD_ENVIRONMENT_ID`: The ID of the environment for this run -- `DBT_CLOUD_ACCOUNT_ID`: The ID of the dbt Cloud account for this run +- `DBT_CLOUD_PROJECT_ID` — The ID of the dbt Cloud Project for this run +- `DBT_CLOUD_JOB_ID` — The ID of the dbt Cloud Job for this run +- `DBT_CLOUD_RUN_ID` — The ID of this particular run +- `DBT_CLOUD_RUN_REASON_CATEGORY` — The "category" of the trigger for this run (one of: `scheduled`, `github_pull_request`, `gitlab_merge_request`, `azure_pull_request`, `other`) +- `DBT_CLOUD_RUN_REASON` — The specific trigger for this run (eg. `Scheduled`, `Kicked off by `, or custom via `API`) +- `DBT_CLOUD_ENVIRONMENT_ID` — The ID of the environment for this run +- `DBT_CLOUD_ACCOUNT_ID` — The ID of the dbt Cloud account for this run #### Git details _The following variables are currently only available for GitHub, GitLab, and Azure DevOps PR builds triggered via a webhook_ -- `DBT_CLOUD_PR_ID`: The Pull Request ID in the connected version control system -- `DBT_CLOUD_GIT_SHA`: The git commit SHA which is being run for this Pull Request build +- `DBT_CLOUD_PR_ID` — The Pull Request ID in the connected version control system +- `DBT_CLOUD_GIT_SHA` — The git commit SHA which is being run for this Pull Request build ### Example usage diff --git a/website/docs/docs/build/incremental-microbatch.md b/website/docs/docs/build/incremental-microbatch.md index d200dd6e4b6..18122af4b7b 100644 --- a/website/docs/docs/build/incremental-microbatch.md +++ b/website/docs/docs/build/incremental-microbatch.md @@ -24,7 +24,7 @@ Each "batch" corresponds to a single bounded time period (by default, a single d ### Example -A `sessions` model is aggregating and enriching data that comes from two other models: +A `sessions` model aggregates and enriches data that comes from two other models. - `page_views` is a large, time-series table. It contains many rows, new records almost always arrive after existing ones, and existing records rarely update. - `customers` is a relatively small dimensional table. Customer attributes update often, and not in a time-based manner — that is, older customers are just as likely to change column values as newer customers. @@ -39,12 +39,15 @@ models: event_time: page_view_start ``` + We run the `sessions` model on October 1, 2024, and then again on October 2. It produces the following queries: +The `event_time` for the `sessions` model is set to `session_start`, which marks the beginning of a user’s session on the website. This setting allows dbt to combine multiple page views (each tracked by their own `page_view_start` timestamps) into a single session. This way, `session_start` differentiates the timing of individual page views from the broader timeframe of the entire user session. + ```sql @@ -70,7 +73,13 @@ customers as ( ), -... +select + page_views.id as session_id, + page_views.page_view_start as session_start, + customers.* + from page_views + left join customers + on page_views.customer_id = customer.id ``` @@ -141,7 +150,7 @@ customers as ( dbt will instruct the data platform to take the result of each batch query and insert, update, or replace the contents of the `analytics.sessions` table for the same day of data. To perform this operation, dbt will use the most efficient atomic mechanism for "full batch" replacement that is available on each data platform. -It does not matter whether the table already contains data for that day, or not. Given the same input data, no matter how many times a batch is reprocessed, the resulting table is the same. +It does not matter whether the table already contains data for that day. Given the same input data, the resulting table is the same no matter how many times a batch is reprocessed. @@ -153,7 +162,7 @@ Several configurations are relevant to microbatch models, and some are required: |----------|------|---------------|---------| | `event_time` | Column (required) | The column indicating "at what time did the row occur." Required for your microbatch model and any direct parents that should be filtered. | N/A | | `begin` | Date (required) | The "beginning of time" for the microbatch model. This is the starting point for any initial or full-refresh builds. For example, a daily-grain microbatch model run on `2024-10-01` with `begin = '2023-10-01` will process 366 batches (it's a leap year!) plus the batch for "today." | N/A | -| `batch_size` | String (required) | The granularity of your batches. The default is `day` (and currently this is the only granularity supported). | `day` | +| `batch_size` | String (required) | The granularity of your batches. Supported values are `hour`, `day`, `month`, and `year` | N/A | | `lookback` | Integer (optional) | Process X batches prior to the latest bookmark to capture late-arriving records. | `0` | @@ -175,11 +184,11 @@ During standard incremental runs, dbt will process batches according to the curr -**Note:** If there’s an upstream model that configures `event_time`, but you *don’t* want the reference to it to be filtered, you can specify `ref('upstream_model').render()` to opt-out of auto-filtering. This isn't generally recommended — most models which configure `event_time` are fairly large, and if the reference is not filtered, each batch will perform a full scan of this input table. +**Note:** If there’s an upstream model that configures `event_time`, but you *don’t* want the reference to it to be filtered, you can specify `ref('upstream_model').render()` to opt-out of auto-filtering. This isn't generally recommended — most models that configure `event_time` are fairly large, and if the reference is not filtered, each batch will perform a full scan of this input table. ### Backfills -Whether to fix erroneous source data, or retroactively apply a change in business logic, you may need to reprocess a large amount of historical data. +Whether to fix erroneous source data or retroactively apply a change in business logic, you may need to reprocess a large amount of historical data. Backfilling a microbatch model is as simple as selecting it to run or build, and specifying a "start" and "end" for `event_time`. As always, dbt will process the batches between the start and end as independent queries. @@ -204,7 +213,7 @@ For now, dbt assumes that all values supplied are in UTC: - `--event-time-start` - `--event-time-end` -While we may consider adding support for custom timezones in the future, we also believe that defining these values in UTC makes everyone's lives easier. +While we may consider adding support for custom time zones in the future, we also believe that defining these values in UTC makes everyone's lives easier. ## How `microbatch` compares to other incremental strategies? @@ -261,7 +270,7 @@ select * from {{ ref('stg_events') }} -- this ref will be auto-filtered -Where you’ve also set an `event_time` for the model’s direct parents - in this case `stg_events`: +Where you’ve also set an `event_time` for the model’s direct parents - in this case, `stg_events`: diff --git a/website/docs/docs/build/incremental-models.md b/website/docs/docs/build/incremental-models.md index c48030cc32d..2968496290a 100644 --- a/website/docs/docs/build/incremental-models.md +++ b/website/docs/docs/build/incremental-models.md @@ -94,7 +94,7 @@ Not specifying a `unique_key` will result in append-only behavior, which means d The optional `unique_key` parameter specifies a field (or combination of fields) that defines the grain of your model. That is, the field(s) identify a single unique row. You can define `unique_key` in a configuration block at the top of your model, and it can be a single column name or a list of column names. -The `unique_key` should be supplied in your model definition as a string representing a single column or a list of single-quoted column names that can be used together, for example, `['col1', 'col2', …])`. Columns used in this way should not contain any nulls, or the incremental model run may fail. Either ensure that each column has no nulls (for example with `coalesce(COLUMN_NAME, 'VALUE_IF_NULL')`), or define a single-column [surrogate key](https://www.getdbt.com/blog/guide-to-surrogate-key) (for example with [`dbt_utils.generate_surrogate_key`](https://github.com/dbt-labs/dbt-utils#generate_surrogate_key-source)). +The `unique_key` should be supplied in your model definition as a string representing a single column or a list of single-quoted column names that can be used together, for example, `['col1', 'col2', …])`. Columns used in this way should not contain any nulls, or the incremental model may fail to match rows and generate duplicate rows. Either ensure that each column has no nulls (for example with `coalesce(COLUMN_NAME, 'VALUE_IF_NULL')`) or define a single-column [surrogate key](https://www.getdbt.com/blog/guide-to-surrogate-key) (for example with [`dbt_utils.generate_surrogate_key`](https://github.com/dbt-labs/dbt-utils#generate_surrogate_key-source)). :::tip In cases where you need multiple columns in combination to uniquely identify each row, we recommend you pass these columns as a list (`unique_key = ['user_id', 'session_number']`), rather than a string expression (`unique_key = 'concat(user_id, session_number)'`). diff --git a/website/docs/docs/cloud/dbt-copilot.md b/website/docs/docs/cloud/dbt-copilot.md index 42a05dd91ba..403df86a089 100644 --- a/website/docs/docs/cloud/dbt-copilot.md +++ b/website/docs/docs/cloud/dbt-copilot.md @@ -13,7 +13,7 @@ dbt Copilot is a powerful artificial intelligence (AI) engine that's fully integ :::tip Beta feature dbt Copilot is designed to _help_ developers generate documentation, tests, and semantic models in dbt Cloud. It's available in beta, in the dbt Cloud IDE only. -To use dbt Copilot, you must have an active [dbt Cloud Enterprise account](https://www.getdbt.com/pricing) and agree to use dbt Labs' OpenAI key. [Register your interest](https://docs.google.com/forms/d/e/1FAIpQLScPjRGyrtgfmdY919Pf3kgqI5E95xxPXz-8JoVruw-L9jVtxg/viewform) to join the private beta or reach out to your Account team to begin this process. +To use dbt Copilot, you must have an active [dbt Cloud Enterprise account](https://www.getdbt.com/pricing) and either agree to use dbt Labs' OpenAI key or provide your own Open AI API key. [Register here](https://docs.google.com/forms/d/e/1FAIpQLScPjRGyrtgfmdY919Pf3kgqI5E95xxPXz-8JoVruw-L9jVtxg/viewform) or reach out to the Account Team if you're interested in joining the private beta. ::: diff --git a/website/docs/docs/cloud/enable-dbt-copilot.md b/website/docs/docs/cloud/enable-dbt-copilot.md index 23c253ecf7a..07a9f6294da 100644 --- a/website/docs/docs/cloud/enable-dbt-copilot.md +++ b/website/docs/docs/cloud/enable-dbt-copilot.md @@ -13,12 +13,12 @@ This page explains how to enable the dbt Copilot engine in dbt Cloud, leveraging - Available in the dbt Cloud IDE only. - Must have an active [dbt Cloud Enterprise account](https://www.getdbt.com/pricing). - Development environment has been upgraded to ["Versionless"](/docs/dbt-versions/upgrade-dbt-version-in-cloud#versionless). -- Current dbt Copilot deployments use a central OpenAI API key managed by dbt Labs. In the future, you may provide your own key for Azure OpenAI or OpenAI. +- By default, dbt Copilot deployments use a central OpenAI API key managed by dbt Labs. Alternatively, you can [provide your own OpenAI API key](#bringing-your-own-openai-api-key-byok). - Accept and sign legal agreements. Reach out to your Account team to begin this process. ## Enable dbt Copilot -dbt Copilot is only available at an account level after your organization has signed the legal requirements. It's disabled by default. A dbt Cloud admin(s) can enable it by following these steps: +dbt Copilot is only available to your account after your organization has signed the required legal documents. It's disabled by default. A dbt Cloud admin can enable it by following these steps: 1. Navigate to **Account settings** in the navigation menu. @@ -32,4 +32,20 @@ dbt Copilot is only available at an account level after your organization has si Note: To disable (only after enabled), repeat steps 1 to 3, toggle off in step 4, and repeat step 5. - \ No newline at end of file + + +### Bringing your own OpenAI API key (BYOK) + +Once AI features have been enabled, you can provide your organization's OpenAI API key. dbt Cloud will then leverage your OpenAI account and terms to power dbt CoPilot. This will incur billing charges to your organization from OpenAI for requests made by dbt CoPilot. + +Note that Azure OpenAI is not currently supported, but will be in the future. + +A dbt Cloud admin can provide their API key by following these steps: + +1. Navigate to **Account settings** in the side menu. + +2. Find the **Settings** section and click on **Integrations**. + +3. Scroll to **AI** and select the toggle for **OpenAI** + +4. Enter your API key and click **Save**. \ No newline at end of file diff --git a/website/docs/docs/collaborate/govern/model-contracts.md b/website/docs/docs/collaborate/govern/model-contracts.md index b07ce909480..d30024157c8 100644 --- a/website/docs/docs/collaborate/govern/model-contracts.md +++ b/website/docs/docs/collaborate/govern/model-contracts.md @@ -178,14 +178,14 @@ Currently, `not_null` and `check` constraints are enforced only after a model is ### Which models should have contracts? Any model meeting the criteria described above _can_ define a contract. We recommend defining contracts for ["public" models](model-access) that are being relied on downstream. -- Inside of dbt: Shared with other groups, other teams, and (in the future) other dbt projects. +- Inside of dbt: Shared with other groups, other teams, and [other dbt projects](/best-practices/how-we-mesh/mesh-1-intro). - Outside of dbt: Reports, dashboards, or other systems & processes that expect this model to have a predictable structure. You might reflect these downstream uses with [exposures](/docs/build/exposures). ### How are contracts different from tests? A model's contract defines the **shape** of the returned dataset. If the model's logic or input data doesn't conform to that shape, the model does not build. -[Data Tests](/docs/build/data-tests) are a more flexible mechanism for validating the content of your model _after_ it's built. So long as you can write the query, you can run the data test. Data tests are more configurable, such as with [custom severity thresholds](/reference/resource-configs/severity). They are easier to debug after finding failures, because you can query the already-built model, or [store the failing records in the data warehouse](/reference/resource-configs/store_failures). +[Data Tests](/docs/build/data-tests) are a more flexible mechanism for validating the content of your model _after_ it's built. So long as you can write the query, you can run the data test. Data tests are more configurable, such as with [custom severity thresholds](/reference/resource-configs/severity). They are easier to debug after finding failures because you can query the already-built model, or [store the failing records in the data warehouse](/reference/resource-configs/store_failures). In some cases, you can replace a data test with its equivalent constraint. This has the advantage of guaranteeing the validation at build time, and it probably requires less compute (cost) in your data platform. The prerequisites for replacing a data test with a constraint are: - Making sure that your data platform can support and enforce the constraint that you need. Most platforms only enforce `not_null`. diff --git a/website/docs/docs/dbt-versions/core-upgrade/06-upgrading-to-v1.9.md b/website/docs/docs/dbt-versions/core-upgrade/06-upgrading-to-v1.9.md index 29f3650e7a6..aaa85e4ecef 100644 --- a/website/docs/docs/dbt-versions/core-upgrade/06-upgrading-to-v1.9.md +++ b/website/docs/docs/dbt-versions/core-upgrade/06-upgrading-to-v1.9.md @@ -107,6 +107,6 @@ You can read more about each of these behavior changes in the following links: We also made some quality-of-life improvements in Core 1.9, enabling you to: - Maintain data quality now that dbt returns an an error (versioned models) or warning (unversioned models) when someone [removes a contracted model by deleting, renaming, or disabling](/docs/collaborate/govern/model-contracts#how-are-breaking-changes-handled) it. -- Document [singular data tests](/docs/build/data-tests#document-singular-tests). +- Document [singular data tests](/docs/build/data-tests#singular-data-tests). - Use `ref` and `source` in [foreign key constraints](/reference/resource-properties/constraints). - Use `dbt test` with the `--resource-type` / `--exclude-resource-type` flag, making it possible to include or exclude data tests (`test`) or unit tests (`unit_test`). diff --git a/website/docs/docs/dbt-versions/release-notes.md b/website/docs/docs/dbt-versions/release-notes.md index fc8d0265072..662fd0f381a 100644 --- a/website/docs/docs/dbt-versions/release-notes.md +++ b/website/docs/docs/dbt-versions/release-notes.md @@ -20,6 +20,32 @@ Release notes are grouped by month for both multi-tenant and virtual private clo ## October 2024 + + + Documentation for new features and functionality announced at Coalesce 2024: + + - Iceberg table support for [Snowflake](https://docs.getdbt.com/reference/resource-configs/snowflake-configs#iceberg-table-format) + - [Athena](https://docs.getdbt.com/reference/resource-configs/athena-configs) and [Teradata](https://docs.getdbt.com/reference/resource-configs/teradata-configs) adapter support in dbt Cloud + - dbt Cloud now hosted on [Azure](https://docs.getdbt.com/docs/cloud/about-cloud/access-regions-ip-addresses) + - Get comfortable with [Versionless dbt Cloud](https://docs.getdbt.com/docs/dbt-versions/versionless-cloud) + - Scalable [microbatch incremental models](https://docs.getdbt.com/docs/build/incremental-microbatch) + - Advanced CI [features](https://docs.getdbt.com/docs/deploy/advanced-ci) + - [Linting with CI jobs](https://docs.getdbt.com/docs/deploy/continuous-integration#sql-linting) + - dbt Assist is now [dbt Copilot](https://docs.getdbt.com/docs/cloud/dbt-copilot) + - Developer blog on [Snowflake Feature Store and dbt: A bridge between data pipelines and ML](https://docs.getdbt.com/blog/snowflake-feature-store) + - New [Quickstart for dbt Cloud CLI](https://docs.getdbt.com/guides/dbt-cloud-cli?step=1) + - [Auto-exposures with Tableau](https://docs.getdbt.com/docs/collaborate/auto-exposures) + - Semantic Layer integration with [Excel desktop and M365](https://docs.getdbt.com/docs/cloud-integrations/semantic-layer/excel) + - [Data health tiles](https://docs.getdbt.com/docs/collaborate/data-tile) + - [Semantic Layer and Cloud IDE integration](https://docs.getdbt.com/docs/build/metricflow-commands#metricflow-commands) + - Query history in [Explorer](https://docs.getdbt.com/docs/collaborate/model-query-history#view-query-history-in-explorer) + - Semantic Layer Metricflow improvements, including [improved granularity and custom calendar](https://docs.getdbt.com/docs/build/metricflow-time-spine#custom-calendar) + - [Python SDK](https://docs.getdbt.com/docs/dbt-cloud-apis/sl-python) is now generally available + + + + +- **New**: The [dbt Semantic Layer Python software development kit](/docs/dbt-cloud-apis/sl-python) is now [generally available](/docs/dbt-versions/product-lifecycles). It provides users with easy access to the dbt Semantic Layer with Python and enables developers to interact with the dbt Semantic Layer APIs to query metrics/dimensions in downstream tools. - **Enhancement**: You can now add a description to a singular data test in dbt Cloud Versionless. Use the [`description` property](/reference/resource-properties/description) to document [singular data tests](/docs/build/data-tests#singular-data-tests). You can also use [docs block](/docs/build/documentation#using-docs-blocks) to capture your test description. The enhancement will be included in upcoming dbt Core 1.9 release. - **New**: Introducing the [microbatch incremental model strategy](/docs/build/incremental-microbatch) (beta), available in dbt Cloud Versionless and will soon be supported in dbt Core 1.9. The microbatch strategy allows for efficient, batch-based processing of large time-series datasets for improved performance and resiliency, especially when you're working with data that changes over time (like new records being added daily). To enable this feature in dbt Cloud, set the `DBT_EXPERIMENTAL_MICROBATCH` environment variable to `true` in your project. - **New**: The dbt Semantic Layer supports custom calendar configurations in MetricFlow, available in [Preview](/docs/dbt-versions/product-lifecycles#dbt-cloud). Custom calendar configurations allow you to query data using non-standard time periods like `fiscal_year` or `retail_month`. Refer to [custom calendar](/docs/build/metricflow-time-spine#custom-calendar) to learn how to define these custom granularities in your MetricFlow timespine YAML configuration. diff --git a/website/docs/reference/commands/build.md b/website/docs/reference/commands/build.md index c7ac29862c2..9f8e83d2abd 100644 --- a/website/docs/reference/commands/build.md +++ b/website/docs/reference/commands/build.md @@ -31,32 +31,9 @@ In DAG order, for selected resources or an entire project. The `build` command supports the `--empty` flag for building schema-only dry runs. The `--empty` flag limits the refs and sources to zero rows. dbt will still execute the model SQL against the target data warehouse but will avoid expensive reads of input data. This validates dependencies and ensures your models will build properly. -#### SQL compilation error when running the `--empty` flag on a model - -If you encounter the error: `SQL compilation error: syntax error line 1 at position 21 unexpected '('.` when running a model with the `--empty` flag, explicitly call the `.render()` method on that relation. - - - - -```Jinja - --- models/staging/stg_sys__customers.sql -{{ config( - pre_hook = [ - "alter external table {{ source('sys', 'customers').render() }} refresh" - ] -) }} - -with cus as ( - select * from {{ source("sys", "customers") }} -- leave this as is! -) - -select * from cus - -``` - - +import SQLCompilationError from '/snippets/_render-method.md'; + ## Tests diff --git a/website/docs/reference/node-selection/defer.md b/website/docs/reference/node-selection/defer.md index 99dbea401b3..863494de12e 100644 --- a/website/docs/reference/node-selection/defer.md +++ b/website/docs/reference/node-selection/defer.md @@ -31,7 +31,7 @@ dbt test --models [...] --defer --state path/to/artifacts When the `--defer` flag is provided, dbt will resolve `ref` calls differently depending on two criteria: 1. Is the referenced node included in the model selection criteria of the current run? -2. Does the reference node exist as a database object in the current environment? +2. Does the referenced node exist as a database object in the current environment? If the answer to both is **no**—a node is not included _and_ it does not exist as a database object in the current environment—references to it will use the other namespace instead, provided by the state manifest. @@ -71,8 +71,6 @@ group by 1 I want to test my changes. Nothing exists in my development schema, `dev_alice`. -### test - +### test + I also have a `relationships` test that establishes referential integrity between `model_a` and `model_b`: diff --git a/website/docs/reference/resource-configs/bigquery-configs.md b/website/docs/reference/resource-configs/bigquery-configs.md index a6f3036ede8..b943f114861 100644 --- a/website/docs/reference/resource-configs/bigquery-configs.md +++ b/website/docs/reference/resource-configs/bigquery-configs.md @@ -21,7 +21,7 @@ This will allow you to read and write from multiple BigQuery projects. Same for ### Partition clause -BigQuery supports the use of a [partition by](https://cloud.google.com/bigquery/docs/data-definition-language#specifying_table_partitioning_options) clause to easily partition a by a column or expression. This option can help decrease latency and cost when querying large tables. Note that partition pruning [only works](https://cloud.google.com/bigquery/docs/querying-partitioned-tables#pruning_limiting_partitions) when partitions are filtered using literal values (so selecting partitions using a won't improve performance). +BigQuery supports the use of a [partition by](https://cloud.google.com/bigquery/docs/data-definition-language#specifying_table_partitioning_options) clause to easily partition a by a column or expression. This option can help decrease latency and cost when querying large tables. Note that partition pruning [only works](https://cloud.google.com/bigquery/docs/querying-partitioned-tables#use_a_constant_filter_expression) when partitions are filtered using literal values (so selecting partitions using a won't improve performance). The `partition_by` config can be supplied as a dictionary with the following format: @@ -265,7 +265,7 @@ If your model has `partition_by` configured, you may optionally specify two addi -### Clustering Clause +### Clustering clause BigQuery tables can be [clustered](https://cloud.google.com/bigquery/docs/clustered-tables) to colocate related data. @@ -286,7 +286,7 @@ select * from ... -Clustering on a multiple columns: +Clustering on multiple columns: @@ -303,11 +303,11 @@ select * from ... -## Managing KMS Encryption +## Managing KMS encryption [Customer managed encryption keys](https://cloud.google.com/bigquery/docs/customer-managed-encryption) can be configured for BigQuery tables using the `kms_key_name` model configuration. -### Using KMS Encryption +### Using KMS encryption To specify the KMS key name for a model (or a group of models), use the `kms_key_name` model configuration. The following example sets the `kms_key_name` for all of the models in the `encrypted/` directory of your dbt project. @@ -328,7 +328,7 @@ models: -## Labels and Tags +## Labels and tags ### Specifying labels @@ -373,8 +373,6 @@ models: - - ### Specifying tags @@ -434,7 +432,7 @@ The `incremental_strategy` config can be set to one of two values: ### Performance and cost The operations performed by dbt while building a BigQuery incremental model can -be made cheaper and faster by using [clustering keys](#clustering-keys) in your +be made cheaper and faster by using a [clustering clause](#clustering-clause) in your model configuration. See [this guide](https://discourse.getdbt.com/t/benchmarking-incremental-strategies-on-bigquery/981) for more information on performance tuning for BigQuery incremental models. **Note:** These performance and cost benefits are applicable to incremental models @@ -673,7 +671,7 @@ select ... -## Authorized Views +## Authorized views If the `grant_access_to` config is specified for a model materialized as a view, dbt will grant the view model access to select from the list of datasets diff --git a/website/docs/reference/resource-configs/firebolt-configs.md b/website/docs/reference/resource-configs/firebolt-configs.md index 394823e33de..0ab14354003 100644 --- a/website/docs/reference/resource-configs/firebolt-configs.md +++ b/website/docs/reference/resource-configs/firebolt-configs.md @@ -38,8 +38,8 @@ models: +table_type: fact +primary_index: [ , ... ] +indexes: - - type: aggregating - key_column: [ , ... ] + - index_type: aggregating + key_columns: [ , ... ] aggregation: [ , ... ] ... ``` @@ -58,8 +58,8 @@ models: table_type: fact primary_index: [ , ... ] indexes: - - type: aggregating - key_column: [ , ... ] + - index_type: aggregating + key_columns: [ , ... ] aggregation: [ , ... ] ... ``` @@ -77,9 +77,9 @@ models: primary_index = [ "", ... ], indexes = [ { - type = "aggregating" - key_column = [ "", ... ], - aggregation = [ "", ... ], + "index_type": "aggregating" + "key_columns": [ "", ... ], + "aggregation": [ "", ... ], }, ... ] @@ -99,8 +99,8 @@ models: | `table_type` | Whether the materialized table will be a [fact or dimension](https://docs.firebolt.io/godocs/Overview/working-with-tables/working-with-tables.html#fact-and-dimension-tables) table. | | `primary_index` | Sets the primary index for the fact table using the inputted list of column names from the model. Required for fact tables. | | `indexes` | A list of aggregating indexes to create on the fact table. | -| `type` | Specifies that the index is an [aggregating index](https://docs.firebolt.io/godocs/Guides/working-with-indexes/using-aggregating-indexes.html). Should be set to `aggregating`. | -| `key_column` | Sets the grouping of the aggregating index using the inputted list of column names from the model. | +| `index_type` | Specifies that the index is an [aggregating index](https://docs.firebolt.io/godocs/Guides/working-with-indexes/using-aggregating-indexes.html). Should be set to `aggregating`. | +| `key_columns` | Sets the grouping of the aggregating index using the inputted list of column names from the model. | | `aggregation` | Sets the aggregations on the aggregating index using the inputted list of SQL agg expressions. | @@ -113,9 +113,9 @@ models: primary_index = "id", indexes = [ { - type: "aggregating", - key_column: "order_id", - aggregation: ["COUNT(DISTINCT status)", "AVG(customer_id)"] + "index_type": "aggregating", + "key_columns": "order_id", + "aggregation": ["COUNT(DISTINCT status)", "AVG(customer_id)"] } ] ) }} diff --git a/website/docs/reference/resource-configs/pre-hook-post-hook.md b/website/docs/reference/resource-configs/pre-hook-post-hook.md index ce818768134..bd01a7be840 100644 --- a/website/docs/reference/resource-configs/pre-hook-post-hook.md +++ b/website/docs/reference/resource-configs/pre-hook-post-hook.md @@ -154,6 +154,10 @@ Pre- and post-hooks can also call macros that return SQL statements. If your mac dbt aims to provide all the boilerplate SQL you need (DDL, DML, and DCL) via out-of-the-box functionality, which you can configure quickly and concisely. In some cases, there may be SQL that you want or need to run, specific to functionality in your data platform, which dbt does not (yet) offer as a built-in feature. In those cases, you can write the exact SQL you need, using dbt's compilation context, and pass it into a `pre-` or `post-` hook to run before or after your model, seed, or snapshot. +import SQLCompilationError from '/snippets/_render-method.md'; + + + ## Examples diff --git a/website/docs/reference/resource-properties/deprecation_date.md b/website/docs/reference/resource-properties/deprecation_date.md index be76ccb07f6..70f150dc465 100644 --- a/website/docs/reference/resource-properties/deprecation_date.md +++ b/website/docs/reference/resource-properties/deprecation_date.md @@ -53,11 +53,11 @@ Additionally, [`WARN_ERROR_OPTIONS`](/reference/global-configs/warnings) gives a |--------------------------------|----------------------------------------------------|------------------------| | `DeprecatedModel` | Parsing a project that defines a deprecated model | Producer | | `DeprecatedReference` | Referencing a model with a past deprecation date | Producer and consumers | -| `UpcomingDeprecationReference` | Referencing a model with a future deprecation date | Producer and consumers | +| `UpcomingReferenceDeprecation` | Referencing a model with a future deprecation date | Producer and consumers | ** Example ** -Example output for an `UpcomingDeprecationReference` warning: +Example output for an `UpcomingReferenceDeprecation` warning: ``` $ dbt parse 15:48:14 Running with dbt=1.6.0 diff --git a/website/docusaurus.config.js b/website/docusaurus.config.js index d0e09b6c067..b68e2e8ec5c 100644 --- a/website/docusaurus.config.js +++ b/website/docusaurus.config.js @@ -72,15 +72,14 @@ var siteSettings = { }, announcementBar: { id: "biweekly-demos", - content: - "Register now for Coalesce 2024 ✨ The Analytics Engineering Conference!", - backgroundColor: "#7444FD", + content: "Join our biweekly demos and see dbt Cloud in action!", + backgroundColor: "#047377", textColor: "#fff", isCloseable: true, }, announcementBarActive: true, announcementBarLink: - "https://coalesce.getdbt.com/register/?utm_medium=internal&utm_source=docs&utm_campaign=q3-2025_coalesce-2024_aw&utm_content=coalesce____&utm_term=all_all__", + "https://www.getdbt.com/resources/webinars/dbt-cloud-demos-with-experts/?utm_medium=i[…]ly-demos_aw&utm_content=biweekly-demos____&utm_term=all_all__", // Set community spotlight member on homepage // This is the ID for a specific file under docs/community/spotlight communitySpotlightMember: "meagan-palmer", diff --git a/website/snippets/_render-method.md b/website/snippets/_render-method.md new file mode 100644 index 00000000000..00407a20251 --- /dev/null +++ b/website/snippets/_render-method.md @@ -0,0 +1,17 @@ +#### The render method + +The `.render()` method is generally used to resolve or evaluate Jinja expressions (such as `{{ source(...) }}`) during runtime. + +When using the `--empty flag`, dbt may skip processing `ref()` or `source()` for optimization. To avoid compilation errors and to explicitly tell dbt to process a specific relation (`ref()` or `source()`), use the `.render()` method in your model file. For example: + + + + +```Jinja +{{ config( + pre_hook = [ + "alter external table {{ source('sys', 'customers').render() }} refresh" + ] +``` + + diff --git a/website/snippets/_sl-partner-links.md b/website/snippets/_sl-partner-links.md index 28e4dc24b39..aaefcc77747 100644 --- a/website/snippets/_sl-partner-links.md +++ b/website/snippets/_sl-partner-links.md @@ -54,9 +54,9 @@ The following tools integrate with the dbt Semantic Layer: - @@ -68,9 +68,9 @@ The following tools integrate with the dbt Semantic Layer: - @@ -82,9 +82,9 @@ The following tools integrate with the dbt Semantic Layer: - diff --git a/website/src/pages/styles.js b/website/src/pages/styles.js deleted file mode 100644 index 23d13d10813..00000000000 --- a/website/src/pages/styles.js +++ /dev/null @@ -1,176 +0,0 @@ - -import React from 'react'; -import Layout from '@theme/Layout'; -import CodeBlock from '@theme/CodeBlock'; -import Changelog from '@site/src/components/changelog'; -import CloudCore from '@site/src/components/cloudcore'; -import Collapsible from '@site/src/components/collapsible'; -import FAQ from '@site/src/components/faqs'; -import File from '@site/src/components/file'; -import Lightbox from '@site/src/components/lightbox'; -import LoomVideo from '@site/src/components/loom'; -import Tabs from '@theme/Tabs'; -import TabItem from '@theme/TabItem'; -import YoutubeVideo from '@site/src/components/youtube'; - -function Styles() { - return ( - -
-
-

- The following components are baked into the Markdown compilation context, - so there is no need to import them from Markdown files. Simply add the components - inline to use them. -

-
-
-

Changelog

-
{`
-    

This functionality has changed in dbt v0.16.0

-
-`}
- This functionality has changed in dbt v0.16.0 -
-
-

CloudCore

-
{`
-    
-

The first div contains Cloud info

-
-
-

The second div contains Core info

-
-
-`}
- -
-

The first div contains Cloud info

-
-
-

The second div contains Core info

-
-
-
-
-

Collapsible

-
{`
-    
-

Shows and hides children elements

-
-
-`}
- -
-

Shows and hides children elements

-
-
-
-
-

FAQList

-
{``}
-

(Not shown)

-
- -
-

FAQ

-
{``}
- - -
- -
-

File

-
{`
-
-\`\`\`yml
-password: hunter2
-\`\`\`
-
-
-
-`}
- -
-                        password: hunter2
-                    
-
-
- -
-

Lightbox

-
{``}
- -
- -
-

Markdown Links

- Refer to the
Links section of the Content Style Guide to read about how you can use links in the dbt product documentation. -
- -
-

LoomVideo

-
{``}
- -
- -
-

Tabs

-
{`
-
-
-
-\`\`\`sql
-select id from customers
-\`\`\`
-
-
-
-
-\`\`\`sql
-select "ID" from customers
-\`\`\`
-
-
-
-`}
- - - -
-select id from customers
-
- -
- - -
-select "ID" from customers
-
- -
-
-
-
-

YoutubeVideo

-
{``}
- -
-
- - ); -} - -export default Styles; diff --git a/website/vercel.json b/website/vercel.json index 0674313f3f5..74f0eeff65b 100644 --- a/website/vercel.json +++ b/website/vercel.json @@ -2,6 +2,11 @@ "cleanUrls": true, "trailingSlash": false, "redirects": [ + { + "source": "/styles", + "destination": "https://github.com/dbt-labs/docs.getdbt.com/blob/current/contributing/adding-page-components.md", + "permanent": true + }, { "source": "/docs/dbt-cloud-apis/sl-manifest", "destination": "/reference/artifacts/sl-manifest",