diff --git a/.github/workflows/preview-link.yml b/.github/workflows/preview-link.yml new file mode 100644 index 00000000000..f128f44b8cd --- /dev/null +++ b/.github/workflows/preview-link.yml @@ -0,0 +1,169 @@ +name: Vercel deployment preview link generator + +on: + pull_request: + types: [opened, synchronize] + paths: + - 'website/docs/docs/**' + - 'website/docs/best-practices/**' + - 'website/docs/guides/**' + - 'website/docs/faqs/**' + - 'website/docs/reference/**' + +permissions: + contents: write + pull-requests: write + +jobs: + update-pr-description: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Install necessary tools + run: | + sudo apt-get update + sudo apt-get install -y jq curl + + - name: Generate Vercel deployment URL + id: vercel_url + run: | + # Get the branch name + BRANCH_NAME="${{ github.head_ref }}" + + # Convert to lowercase + BRANCH_NAME_LOWER=$(echo "$BRANCH_NAME" | tr '[:upper:]' '[:lower:]') + + # Replace non-alphanumeric characters with hyphens + BRANCH_NAME_SANITIZED=$(echo "$BRANCH_NAME_LOWER" | sed 's/[^a-z0-9]/-/g') + + # Construct the deployment URL + DEPLOYMENT_URL="https://docs-getdbt-com-git-${BRANCH_NAME_SANITIZED}-dbt-labs.vercel.app" + + echo "deployment_url=$DEPLOYMENT_URL" >> $GITHUB_OUTPUT + + - name: Wait for deployment to be accessible + id: wait_for_deployment + run: | + DEPLOYMENT_URL="${{ steps.vercel_url.outputs.deployment_url }}" + echo "Waiting for deployment at $DEPLOYMENT_URL to become accessible..." + + MAX_ATTEMPTS=60 # Wait up to 10 minutes + SLEEP_TIME=10 # Check every 10 seconds + ATTEMPTS=0 + + while [ $ATTEMPTS -lt $MAX_ATTEMPTS ]; do + STATUS_CODE=$(curl -s -o /dev/null -w "%{http_code}" "$DEPLOYMENT_URL") + if [ "$STATUS_CODE" -eq 200 ]; then + echo "Deployment is accessible." + break + else + echo "Deployment not yet accessible (status code: $STATUS_CODE). Waiting..." + sleep $SLEEP_TIME + ATTEMPTS=$((ATTEMPTS + 1)) + fi + done + + if [ $ATTEMPTS -eq $MAX_ATTEMPTS ]; then + echo "Deployment did not become accessible within the expected time." + exit 1 + fi + + - name: Get changed files + id: files + run: | + CHANGED_FILES=$(git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.sha }} | grep -E '^website/docs/(docs|best-practices|guides|faqs|reference)/.*\.md$' || true) + if [ -z "$CHANGED_FILES" ]; then + echo "No documentation files were changed." + echo "changed_files=" >> $GITHUB_OUTPUT + else + CHANGED_FILES=$(echo "$CHANGED_FILES" | tr '\n' ' ') + echo "changed_files=$CHANGED_FILES" >> $GITHUB_OUTPUT + fi + + - name: Generate file preview links + id: links + run: | + DEPLOYMENT_URL="${{ steps.vercel_url.outputs.deployment_url }}" + CHANGED_FILES="${{ steps.files.outputs.changed_files }}" + + if [ -z "$CHANGED_FILES" ]; then + echo "No changed files found in the specified directories." + LINKS="No documentation files were changed." + else + LINKS="" + # Convert CHANGED_FILES back to newline-separated for processing + CHANGED_FILES=$(echo "$CHANGED_FILES" | tr ' ' '\n') + for FILE in $CHANGED_FILES; do + # Remove 'website/docs/' prefix + FILE_PATH="${FILE#website/docs/}" + # Remove the .md extension + FILE_PATH="${FILE_PATH%.md}" + + # Construct the full URL + FULL_URL="$DEPLOYMENT_URL/$FILE_PATH" + LINKS="$LINKS\n- $FULL_URL" + done + fi + + # Properly set the multi-line output + echo "links<> $GITHUB_OUTPUT + echo -e "$LINKS" >> $GITHUB_OUTPUT + echo "EOF" >> $GITHUB_OUTPUT + + - name: Update PR description with deployment links + uses: actions/github-script@v6 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const prNumber = context.issue.number; + + // Fetch the current PR description + const { data: pullRequest } = await github.rest.pulls.get({ + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: prNumber, + }); + + let body = pullRequest.body || ''; + + // Define the markers + const startMarker = ''; + const endMarker = ''; + + // Get the deployment URL and links from environment variables + const deploymentUrl = process.env.DEPLOYMENT_URL; + const links = process.env.LINKS; + + // Build the deployment content without leading whitespace + const deploymentContent = [ + `${startMarker}`, + '---', + '🚀 Deployment available! Here are the direct links to the updated files:', + '', + `${links}`, + '', + `${endMarker}` + ].join('\n'); + + // Remove existing deployment content between markers + const regex = new RegExp(`${startMarker}[\\s\\S]*?${endMarker}`, 'g'); + body = body.replace(regex, '').trim(); + + // Append the new deployment content + body = `${body}\n\n${deploymentContent}`; + + // Update the PR description + await github.rest.pulls.update({ + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: prNumber, + body: body, + }); + env: + DEPLOYMENT_URL: ${{ steps.vercel_url.outputs.deployment_url }} + LINKS: ${{ steps.links.outputs.links }} diff --git a/.github/workflows/vale.yml b/.github/workflows/vale.yml new file mode 100644 index 00000000000..5feaaa12a20 --- /dev/null +++ b/.github/workflows/vale.yml @@ -0,0 +1,80 @@ +name: Vale linting + +on: + pull_request: + types: [opened, synchronize, reopened] + paths: + - 'website/docs/**/*' + - 'website/blog/**/*' + - 'website/**/*' + +jobs: + vale: + name: Vale linting + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v3 + with: + fetch-depth: 1 + + - name: List repository contents + run: | + pwd + ls -R + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + + - name: Install Vale + run: pip install vale==2.27.0 # Install a stable version of Vale + + - name: Get changed files + id: changed-files + uses: tj-actions/changed-files@v34 + with: + files: | + website/**/*.md + separator: ' ' + + - name: Debugging - Print changed files + if: ${{ steps.changed-files.outputs.any_changed == 'true' }} + run: | + echo "Changed files:" + echo "${{ steps.changed-files.outputs.all_changed_and_modified_files }}" + + - name: Confirm files exist + if: ${{ steps.changed-files.outputs.any_changed == 'true' }} + run: | + echo "Checking if files exist..." + for file in ${{ steps.changed-files.outputs.all_changed_and_modified_files }}; do + if [ -f "$file" ]; then + echo "Found: $file" + else + echo "File not found: $file" + exit 1 + fi + done + + - name: Run vale + if: ${{ steps.changed-files.outputs.any_changed == 'true' }} + uses: errata-ai/vale-action@reviewdog + with: + token: ${{ secrets.GITHUB_TOKEN }} + reporter: github-check + files: ${{ steps.changed-files.outputs.all_changed_and_modified_files }} + separator: ' ' + version: '2.27.0' + +# - name: Post summary comment +# if: ${{ steps.changed-files.outputs.any_changed == 'true' }} +# run: | +# COMMENT="❗️Oh no, some Vale linting found issues! Please check the **Files change** tab for detailed results and make the necessary updates." +# COMMENT+=$'\n' +# COMMENT+=$'\n\n' +# COMMENT+="➡️ Link to detailed report: [Files changed](${{ github.event.pull_request.html_url }}/files)" +# gh pr comment ${{ github.event.pull_request.number }} --body "$COMMENT" +# env: +# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.hyperlint/config.yaml b/.hyperlint/config.yaml new file mode 100644 index 00000000000..03082114ae1 --- /dev/null +++ b/.hyperlint/config.yaml @@ -0,0 +1,10 @@ +content_dir: /docs +authorized_users: + - mirnawong1 + - matthewshaver + - nghi-ly + - runleonarun + - nataliefiann + +vale: + enabled: true diff --git a/.vale.ini b/.vale.ini new file mode 100644 index 00000000000..58aff923afe --- /dev/null +++ b/.vale.ini @@ -0,0 +1,7 @@ +StylesPath = styles +MinAlertLevel = warning + +Vocab = EN + +[*.md] +BasedOnStyles = custom diff --git a/contributing/adding-page-components.md b/contributing/adding-page-components.md index 68294e7d149..7a92d627995 100644 --- a/contributing/adding-page-components.md +++ b/contributing/adding-page-components.md @@ -4,7 +4,7 @@ You can use the following components to provide code snippets for each supported Identify code by labeling with the warehouse names: -```code +```sql
@@ -32,7 +32,7 @@ You can use the following components to provide code snippets in a tabbed view. Identify code and code files by labeling with the component they are describing: -```code +```sql ` tag. This allows you to share a link to a page with a pre-selected tab so that clicking on a tab creates a unique hyperlink for that tab. However, this feature doesn't provide an anchor link, which means the browser won't scroll to the tab. Additionally, you can define the search parameter name to use. If the tabs content is under a header, you can alternatively link to the header itself, instaed of the `queryString` prop. +You can use the [queryString](https://docusaurus.io/docs/next/markdown-features/tabs?current-os=ios#query-string) prop in the `` tag. This allows you to share a link to a page with a pre-selected tab so that clicking on a tab creates a unique hyperlink for that tab. However, this feature doesn't provide an anchor link, which means the browser won't scroll to the tab. Additionally, you can define the search parameter name to use. If the tabs content is under a header, you can alternatively link to the header itself, instead of the `queryString` prop. In the following example, clicking a tab adds a search parameter to the end of the URL: `?current-os=android or ?current-os=ios`. -``` +```sql Android @@ -105,3 +105,48 @@ In the following example, clicking a tab adds a search parameter to the end of t ``` + +## Markdown Links + +Refer to the Links section of the Content Style Guide to read about how you can use links in the dbt product documentation. + +## Collapsible header + + +
+

Shows and hides children elements

+
+
+ +```markdown + +
+

Shows and hides children elements

+
+
+
+``` + +## File component + +```yml + + +```yaml +password: hunter2 +``` + +``` + +## LoomVideo component + +
{``}
+ + + +## YoutubeVideo component + +
{``}
+ + + diff --git a/styles/Vocab/EN/accept.txt b/styles/Vocab/EN/accept.txt new file mode 100644 index 00000000000..e673e2ef83d --- /dev/null +++ b/styles/Vocab/EN/accept.txt @@ -0,0 +1,67 @@ +dbt Cloud +dbt Core +dbt Semantic Layer +dbt Explorer +dbt +dbt-tonic +dbtonic +IDE +CLI +Config +info +docs +yaml +YAML +SQL +bash +shell +MetricFlow +jinja +jinja2 +sqlmesh +Snowflake +Databricks +Fabric +Redshift +Azure +DevOps +Athena +Amazon +UI +CSV +S3 +SCD +repo +dbt_project.yml +boolean +defaultValue= +DWH +DWUs +shoutout +ADF +BQ +gcloud +MSFT +DDL +APIs +API +SSIS +PBI +PowerBI +datetime +PySpark +:::caution +:::note +:::info +:::tip +:::warning +\<[^>]+\> +\b[A-Z]{2,}(?:/[A-Z]{2,})?\b +\w+-\w+ +\w+/\w+ +n/a +N/A +\ diff --git a/styles/custom/LatinAbbreviations.yml b/styles/custom/LatinAbbreviations.yml new file mode 100644 index 00000000000..44a3c9d6e8c --- /dev/null +++ b/styles/custom/LatinAbbreviations.yml @@ -0,0 +1,15 @@ +# LatinAbbreviations.yml +extends: substitution +message: "Avoid Latin abbreviations: '%s'. Consider using '%s' instead." +level: warning + +swap: + 'e.g.': 'for example' + 'e.g': 'for example' + 'eg': 'for example' + 'i.e.': 'that is' + 'i.e': 'that is' + 'etc.': 'and so on' + 'etc': 'and so on' + 'N.B.': 'Note' + 'NB': 'Note' diff --git a/styles/custom/Repitition.yml b/styles/custom/Repitition.yml new file mode 100644 index 00000000000..4cd620146cf --- /dev/null +++ b/styles/custom/Repitition.yml @@ -0,0 +1,6 @@ +extends: repetition +message: "'%s' is repeated!" +level: warning +alpha: true +tokens: + - '[^\s]+' diff --git a/styles/custom/SentenceCaseHeaders.yml b/styles/custom/SentenceCaseHeaders.yml new file mode 100644 index 00000000000..d1d6cd97c67 --- /dev/null +++ b/styles/custom/SentenceCaseHeaders.yml @@ -0,0 +1,34 @@ +extends: capitalization +message: "'%s' should use sentence-style capitalization. Try '%s' instead." +level: warning +scope: heading +match: $sentence # Enforces sentence-style capitalization +indicators: + - ":" +exceptions: + - '\bdbt\b' + - '\bdbt\s+Cloud\b' + - '\bdbt\s+Core\b' + - '\bdbt\s+Cloud\s+CLI\b' + - Snowflake + - Databricks + - Azure + - GCP + - AWS + - SQL + - CLI + - API + - YAML + - JSON + - HTML + - Redshift + - Google + - BigQuery + - SnowSQL + - Snowsight + - Snowpark + - Fabric + - Microsoft + - Postgres + - Explorer + - IDE diff --git a/styles/custom/Typos.yml b/styles/custom/Typos.yml new file mode 100644 index 00000000000..456517950a9 --- /dev/null +++ b/styles/custom/Typos.yml @@ -0,0 +1,39 @@ +extends: spelling + +message: "Oops there's a typo -- did you really mean '%s'? " +level: warning + +action: + name: suggest + params: + - spellings + +custom: true +filters: + - '\bdbt\b' + - '\bdbt\s+Cloud\b' + - '\bdbt\s+Core\b' + - '\bdbt\s+Cloud\s+CLI\b' + - '\bdbt\s+.*?\b' + - '<[^>]+>' # Ignore all HTML-like components starting with < and ending with > + - '<[^>]+>.*<\/[^>]+>' + +--- + +extends: existence + +message: "Ignore specific patterns" +level: skip +tokens: + - '\bdbt\b' + - '\bdbt\s+Cloud\b' + - '\bdbt\s+Core\b' + - '\bdbt\s+Cloud\s+CLI\b' + - '\bdbt\s+.*?\b' + - '<[^>]+>' # Ignore all HTML-like components starting with < and ending with > + - '<[^>]+>.*<\/[^>]+>' + - '\w+-\w+' + - '\w+/\w+' + - '\w+/\w+|\w+-\w+|n/a' + - 'n/a' + - 'N/A' diff --git a/styles/custom/UIElements.yml b/styles/custom/UIElements.yml new file mode 100644 index 00000000000..f78a15af4b4 --- /dev/null +++ b/styles/custom/UIElements.yml @@ -0,0 +1,17 @@ +# styles/custom/BoldUIElements.yml +extends: existence +message: "UI elements like '%s' should be bold." +level: warning +tokens: + # Match UI elements that are not bolded (i.e., not within **), but exclude those starting a sentence or following a list number + - '(? + ## What is Iceberg? To have this conversation, we need to start with the same foundational understanding of Iceberg. Apache Iceberg is a high-performance open table format developed for modern data lakes. It was designed for large-scale datasets, and within the project, there are many ways to interact with it. When people talk about Iceberg, it often means multiple components including but not limited to: diff --git a/website/blog/2024-10-05-snowflake-feature-store.md b/website/blog/2024-10-05-snowflake-feature-store.md index fb62955d4a4..cf5c55be1b5 100644 --- a/website/blog/2024-10-05-snowflake-feature-store.md +++ b/website/blog/2024-10-05-snowflake-feature-store.md @@ -13,6 +13,8 @@ Flying home into Detroit this past week working on this blog post on a plane and Think of the manufacturing materials needed as our data and the building of the bridge as the building of our ML models. There are thousands of engineers and construction workers taking materials from all over the world, pulling only the specific pieces needed for each part of the project. However, to make this project truly work at this scale, we need the warehousing and logistics to ensure that each load of concrete rebar and steel meets the standards for quality and safety needed and is available to the right people at the right time — as even a single fault can have catastrophic consequences or cause serious delays in project success. This warehouse and the associated logistics play the role of the feature store, ensuring that data is delivered consistently where and when it is needed to train and run ML models. + + ## What is a feature? A feature is a transformed or enriched data that serves as an input into a machine learning model to make predictions. In machine learning, a data scientist derives features from various data sources to build a model that makes predictions based on historical data. To capture the value from this model, the enterprise must operationalize the data pipeline, ensuring that the features being used in production at inference time match those being used in training and development. diff --git a/website/blog/ctas.yml b/website/blog/ctas.yml index ac56d4cc749..1f9b13afa7b 100644 --- a/website/blog/ctas.yml +++ b/website/blog/ctas.yml @@ -25,3 +25,8 @@ subheader: Coalesce is the premiere analytics engineering conference! Sign up now for innovation, collaboration, and inspiration. Don't miss out! button_text: Register now url: https://coalesce.getdbt.com/register +- name: coalesce_2024_catchup + header: Missed Coalesce 2024? + subheader: Catch up on Coalesce 2024 and register to access a select number of on-demand sessions. + button_text: Register and watch + url: https://coalesce.getdbt.com/register/online diff --git a/website/blog/metadata.yml b/website/blog/metadata.yml index d0009fd62c4..8b53a7a2a04 100644 --- a/website/blog/metadata.yml +++ b/website/blog/metadata.yml @@ -2,7 +2,7 @@ featured_image: "" # This CTA lives in right sidebar on blog index -featured_cta: "coalesce_2024_signup" +featured_cta: "coalesce_2024_catchup" # Show or hide hero title, description, cta from blog index show_title: true diff --git a/website/docs/docs/build/data-tests.md b/website/docs/docs/build/data-tests.md index ae3ac9225db..b4f25a3d111 100644 --- a/website/docs/docs/build/data-tests.md +++ b/website/docs/docs/build/data-tests.md @@ -66,9 +66,25 @@ having total_amount < 0 -The name of this test is the name of the file: `assert_total_payment_amount_is_positive`. Simple enough. +The name of this test is the name of the file: `assert_total_payment_amount_is_positive`. -Singular data tests are easy to write—so easy that you may find yourself writing the same basic structure over and over, only changing the name of a column or model. By that point, the test isn't so singular! In that case, we recommend... +To add a data test to your project, add a `.yml` file to your `tests` directory, for example, `tests/schema.yml` with the following content: + + + +```yaml +version: 2 +data_tests: + - name: assert_total_payment_amount_is_positive + description: > + Refunds have a negative amount, so the total amount should always be >= 0. + Therefore return records where total amount < 0 to make the test fail. + +``` + + + +Singular data tests are so easy that you may find yourself writing the same basic structure repeatedly, only changing the name of a column or model. By that point, the test isn't so singular! In that case, we recommend generic data tests. ## Generic data tests Certain data tests are generic: they can be reused over and over again. A generic data test is defined in a `test` block, which contains a parametrized query and accepts arguments. It might look like: diff --git a/website/docs/docs/build/incremental-microbatch.md b/website/docs/docs/build/incremental-microbatch.md index 2cc39e9e3b9..38e0edddfd6 100644 --- a/website/docs/docs/build/incremental-microbatch.md +++ b/website/docs/docs/build/incremental-microbatch.md @@ -12,6 +12,8 @@ The `microbatch` strategy is available in beta for [dbt Cloud Versionless](/docs Read and participate in the discussion: [dbt-core#10672](https://github.com/dbt-labs/dbt-core/discussions/10672) +Refer to [Supported incremental strategies by adapter](/docs/build/incremental-strategy#supported-incremental-strategies-by-adapter) for a list of supported adapters. + ::: ## What is "microbatch" in dbt? @@ -162,7 +164,7 @@ Several configurations are relevant to microbatch models, and some are required: |----------|------|---------------|---------| | `event_time` | Column (required) | The column indicating "at what time did the row occur." Required for your microbatch model and any direct parents that should be filtered. | N/A | | `begin` | Date (required) | The "beginning of time" for the microbatch model. This is the starting point for any initial or full-refresh builds. For example, a daily-grain microbatch model run on `2024-10-01` with `begin = '2023-10-01` will process 366 batches (it's a leap year!) plus the batch for "today." | N/A | -| `batch_size` | String (required) | The granularity of your batches. The default is `day` (and currently this is the only granularity supported). | `day` | +| `batch_size` | String (required) | The granularity of your batches. Supported values are `hour`, `day`, `month`, and `year` | N/A | | `lookback` | Integer (optional) | Process X batches prior to the latest bookmark to capture late-arriving records. | `0` | diff --git a/website/docs/docs/build/incremental-models.md b/website/docs/docs/build/incremental-models.md index c48030cc32d..2968496290a 100644 --- a/website/docs/docs/build/incremental-models.md +++ b/website/docs/docs/build/incremental-models.md @@ -94,7 +94,7 @@ Not specifying a `unique_key` will result in append-only behavior, which means d The optional `unique_key` parameter specifies a field (or combination of fields) that defines the grain of your model. That is, the field(s) identify a single unique row. You can define `unique_key` in a configuration block at the top of your model, and it can be a single column name or a list of column names. -The `unique_key` should be supplied in your model definition as a string representing a single column or a list of single-quoted column names that can be used together, for example, `['col1', 'col2', …])`. Columns used in this way should not contain any nulls, or the incremental model run may fail. Either ensure that each column has no nulls (for example with `coalesce(COLUMN_NAME, 'VALUE_IF_NULL')`), or define a single-column [surrogate key](https://www.getdbt.com/blog/guide-to-surrogate-key) (for example with [`dbt_utils.generate_surrogate_key`](https://github.com/dbt-labs/dbt-utils#generate_surrogate_key-source)). +The `unique_key` should be supplied in your model definition as a string representing a single column or a list of single-quoted column names that can be used together, for example, `['col1', 'col2', …])`. Columns used in this way should not contain any nulls, or the incremental model may fail to match rows and generate duplicate rows. Either ensure that each column has no nulls (for example with `coalesce(COLUMN_NAME, 'VALUE_IF_NULL')`) or define a single-column [surrogate key](https://www.getdbt.com/blog/guide-to-surrogate-key) (for example with [`dbt_utils.generate_surrogate_key`](https://github.com/dbt-labs/dbt-utils#generate_surrogate_key-source)). :::tip In cases where you need multiple columns in combination to uniquely identify each row, we recommend you pass these columns as a list (`unique_key = ['user_id', 'session_number']`), rather than a string expression (`unique_key = 'concat(user_id, session_number)'`). diff --git a/website/docs/docs/cloud/dbt-copilot.md b/website/docs/docs/cloud/dbt-copilot.md index 42a05dd91ba..403df86a089 100644 --- a/website/docs/docs/cloud/dbt-copilot.md +++ b/website/docs/docs/cloud/dbt-copilot.md @@ -13,7 +13,7 @@ dbt Copilot is a powerful artificial intelligence (AI) engine that's fully integ :::tip Beta feature dbt Copilot is designed to _help_ developers generate documentation, tests, and semantic models in dbt Cloud. It's available in beta, in the dbt Cloud IDE only. -To use dbt Copilot, you must have an active [dbt Cloud Enterprise account](https://www.getdbt.com/pricing) and agree to use dbt Labs' OpenAI key. [Register your interest](https://docs.google.com/forms/d/e/1FAIpQLScPjRGyrtgfmdY919Pf3kgqI5E95xxPXz-8JoVruw-L9jVtxg/viewform) to join the private beta or reach out to your Account team to begin this process. +To use dbt Copilot, you must have an active [dbt Cloud Enterprise account](https://www.getdbt.com/pricing) and either agree to use dbt Labs' OpenAI key or provide your own Open AI API key. [Register here](https://docs.google.com/forms/d/e/1FAIpQLScPjRGyrtgfmdY919Pf3kgqI5E95xxPXz-8JoVruw-L9jVtxg/viewform) or reach out to the Account Team if you're interested in joining the private beta. ::: diff --git a/website/docs/docs/cloud/enable-dbt-copilot.md b/website/docs/docs/cloud/enable-dbt-copilot.md index 23c253ecf7a..07a9f6294da 100644 --- a/website/docs/docs/cloud/enable-dbt-copilot.md +++ b/website/docs/docs/cloud/enable-dbt-copilot.md @@ -13,12 +13,12 @@ This page explains how to enable the dbt Copilot engine in dbt Cloud, leveraging - Available in the dbt Cloud IDE only. - Must have an active [dbt Cloud Enterprise account](https://www.getdbt.com/pricing). - Development environment has been upgraded to ["Versionless"](/docs/dbt-versions/upgrade-dbt-version-in-cloud#versionless). -- Current dbt Copilot deployments use a central OpenAI API key managed by dbt Labs. In the future, you may provide your own key for Azure OpenAI or OpenAI. +- By default, dbt Copilot deployments use a central OpenAI API key managed by dbt Labs. Alternatively, you can [provide your own OpenAI API key](#bringing-your-own-openai-api-key-byok). - Accept and sign legal agreements. Reach out to your Account team to begin this process. ## Enable dbt Copilot -dbt Copilot is only available at an account level after your organization has signed the legal requirements. It's disabled by default. A dbt Cloud admin(s) can enable it by following these steps: +dbt Copilot is only available to your account after your organization has signed the required legal documents. It's disabled by default. A dbt Cloud admin can enable it by following these steps: 1. Navigate to **Account settings** in the navigation menu. @@ -32,4 +32,20 @@ dbt Copilot is only available at an account level after your organization has si Note: To disable (only after enabled), repeat steps 1 to 3, toggle off in step 4, and repeat step 5. - \ No newline at end of file + + +### Bringing your own OpenAI API key (BYOK) + +Once AI features have been enabled, you can provide your organization's OpenAI API key. dbt Cloud will then leverage your OpenAI account and terms to power dbt CoPilot. This will incur billing charges to your organization from OpenAI for requests made by dbt CoPilot. + +Note that Azure OpenAI is not currently supported, but will be in the future. + +A dbt Cloud admin can provide their API key by following these steps: + +1. Navigate to **Account settings** in the side menu. + +2. Find the **Settings** section and click on **Integrations**. + +3. Scroll to **AI** and select the toggle for **OpenAI** + +4. Enter your API key and click **Save**. \ No newline at end of file diff --git a/website/docs/docs/dbt-versions/core-upgrade/06-upgrading-to-v1.9.md b/website/docs/docs/dbt-versions/core-upgrade/06-upgrading-to-v1.9.md index 29f3650e7a6..aaa85e4ecef 100644 --- a/website/docs/docs/dbt-versions/core-upgrade/06-upgrading-to-v1.9.md +++ b/website/docs/docs/dbt-versions/core-upgrade/06-upgrading-to-v1.9.md @@ -107,6 +107,6 @@ You can read more about each of these behavior changes in the following links: We also made some quality-of-life improvements in Core 1.9, enabling you to: - Maintain data quality now that dbt returns an an error (versioned models) or warning (unversioned models) when someone [removes a contracted model by deleting, renaming, or disabling](/docs/collaborate/govern/model-contracts#how-are-breaking-changes-handled) it. -- Document [singular data tests](/docs/build/data-tests#document-singular-tests). +- Document [singular data tests](/docs/build/data-tests#singular-data-tests). - Use `ref` and `source` in [foreign key constraints](/reference/resource-properties/constraints). - Use `dbt test` with the `--resource-type` / `--exclude-resource-type` flag, making it possible to include or exclude data tests (`test`) or unit tests (`unit_test`). diff --git a/website/docs/docs/deploy/ci-jobs.md b/website/docs/docs/deploy/ci-jobs.md index cd04d1f4035..0bdf9e711f5 100644 --- a/website/docs/docs/deploy/ci-jobs.md +++ b/website/docs/docs/deploy/ci-jobs.md @@ -14,7 +14,7 @@ dbt Labs recommends that you create your CI job in a dedicated dbt Cloud [deploy - You have a dbt Cloud account. - CI features: - For both the [concurrent CI checks](/docs/deploy/continuous-integration#concurrent-ci-checks) and [smart cancellation of stale builds](/docs/deploy/continuous-integration#smart-cancellation) features, your dbt Cloud account must be on the [Team or Enterprise plan](https://www.getdbt.com/pricing/). - - The [SQL linting](/docs/deploy/continuous-integration#sql-linting) feature is currently available in beta to a limited group of users and is gradually being rolled out. If you're in the beta, the **Linting** option is available for use. + - The [SQL linting](/docs/deploy/continuous-integration#sql-linting) feature is currently available in [beta](/docs/dbt-versions/product-lifecycles#dbt-cloud) to a limited group of users and is gradually being rolled out. If you're in the beta, the **Linting** option is available for use. - [Advanced CI](/docs/deploy/advanced-ci) features: - For the [compare changes](/docs/deploy/advanced-ci#compare-changes) feature, your dbt Cloud account must be on the [Enterprise plan](https://www.getdbt.com/pricing/) and have enabled Advanced CI features. Please ask your [dbt Cloud administrator to enable](/docs/cloud/account-settings#account-access-to-advanced-ci-features) this feauture for you. After enablement, the **Run compare changes** option becomes available in the CI job settings. - Set up a [connection with your Git provider](/docs/cloud/git/git-configuration-in-dbt-cloud). This integration lets dbt Cloud run jobs on your behalf for job triggering. diff --git a/website/docs/docs/deploy/continuous-integration.md b/website/docs/docs/deploy/continuous-integration.md index 2119724e609..c10cdfc9db1 100644 --- a/website/docs/docs/deploy/continuous-integration.md +++ b/website/docs/docs/deploy/continuous-integration.md @@ -58,8 +58,8 @@ CI runs don't consume run slots. This guarantees a CI check will never block a p ### SQL linting -When enabled for your CI job, dbt invokes [SQLFluff](https://sqlfluff.com/) which is a modular and configurable SQL linter that warns you of complex functions, syntax, formatting, and compilation errors. By default, it lints all the SQL files in your project. +When enabled for your CI job, dbt invokes [SQLFluff](https://sqlfluff.com/) which is a modular and configurable SQL linter that warns you of complex functions, syntax, formatting, and compilation errors. By default, it lints all the changed SQL files in your project (compared to the last deferred production state). If the linter runs into errors, you can specify whether dbt should fail the job or continue running it. When failing jobs, it helps reduce compute costs by avoiding builds for pull requests that don't meet your SQL code quality CI check. -To override the default linting behavior, create an `.sqlfluff` config file in your project and add your linting rules to it. dbt Cloud will use the rules defined in the config file when linting. For details about linting rules, refer to [Custom Usage](https://docs.sqlfluff.com/en/stable/gettingstarted.html#custom-usage) in the SQLFluff documentation. +You can use [SQLFluff Configuration Files](https://docs.sqlfluff.com/en/stable/configuration/setting_configuration.html#configuration-files) to override the default linting behavior in dbt. Create an `.sqlfluff` configuration file in your project, add your linting rules to it, and dbt Cloud will use them when linting. For complete details, refer to [Custom Usage](https://docs.sqlfluff.com/en/stable/gettingstarted.html#custom-usage) in the SQLFluff documentation. diff --git a/website/docs/reference/commands/build.md b/website/docs/reference/commands/build.md index c7ac29862c2..9f8e83d2abd 100644 --- a/website/docs/reference/commands/build.md +++ b/website/docs/reference/commands/build.md @@ -31,32 +31,9 @@ In DAG order, for selected resources or an entire project. The `build` command supports the `--empty` flag for building schema-only dry runs. The `--empty` flag limits the refs and sources to zero rows. dbt will still execute the model SQL against the target data warehouse but will avoid expensive reads of input data. This validates dependencies and ensures your models will build properly. -#### SQL compilation error when running the `--empty` flag on a model - -If you encounter the error: `SQL compilation error: syntax error line 1 at position 21 unexpected '('.` when running a model with the `--empty` flag, explicitly call the `.render()` method on that relation. - - - - -```Jinja - --- models/staging/stg_sys__customers.sql -{{ config( - pre_hook = [ - "alter external table {{ source('sys', 'customers').render() }} refresh" - ] -) }} - -with cus as ( - select * from {{ source("sys", "customers") }} -- leave this as is! -) - -select * from cus - -``` - - +import SQLCompilationError from '/snippets/_render-method.md'; + ## Tests diff --git a/website/docs/reference/global-configs/behavior-changes.md b/website/docs/reference/global-configs/behavior-changes.md index d35b83765e3..ae109b8f7c7 100644 --- a/website/docs/reference/global-configs/behavior-changes.md +++ b/website/docs/reference/global-configs/behavior-changes.md @@ -60,7 +60,7 @@ flags: -When we use dbt Cloud in the following table, we're referring to accounts that have gone "[Versionless](/docs/dbt-versions/upgrade-dbt-version-in-cloud#versionless)." +When we use dbt Cloud in the following table, we're referring to accounts that have gone "[Versionless](/docs/dbt-versions/upgrade-dbt-version-in-cloud#versionless)." This table outlines which version of dbt Core contains the behavior change or the date the behavior change was added to dbt Cloud. | Flag | dbt Cloud: Intro | dbt Cloud: Maturity | dbt Core: Intro | dbt Core: Maturity | |-----------------------------------------------------------------|------------------|---------------------|-----------------|--------------------| diff --git a/website/docs/reference/resource-configs/bigquery-configs.md b/website/docs/reference/resource-configs/bigquery-configs.md index a6f3036ede8..b943f114861 100644 --- a/website/docs/reference/resource-configs/bigquery-configs.md +++ b/website/docs/reference/resource-configs/bigquery-configs.md @@ -21,7 +21,7 @@ This will allow you to read and write from multiple BigQuery projects. Same for ### Partition clause -BigQuery supports the use of a [partition by](https://cloud.google.com/bigquery/docs/data-definition-language#specifying_table_partitioning_options) clause to easily partition a by a column or expression. This option can help decrease latency and cost when querying large tables. Note that partition pruning [only works](https://cloud.google.com/bigquery/docs/querying-partitioned-tables#pruning_limiting_partitions) when partitions are filtered using literal values (so selecting partitions using a won't improve performance). +BigQuery supports the use of a [partition by](https://cloud.google.com/bigquery/docs/data-definition-language#specifying_table_partitioning_options) clause to easily partition a by a column or expression. This option can help decrease latency and cost when querying large tables. Note that partition pruning [only works](https://cloud.google.com/bigquery/docs/querying-partitioned-tables#use_a_constant_filter_expression) when partitions are filtered using literal values (so selecting partitions using a won't improve performance). The `partition_by` config can be supplied as a dictionary with the following format: @@ -265,7 +265,7 @@ If your model has `partition_by` configured, you may optionally specify two addi -### Clustering Clause +### Clustering clause BigQuery tables can be [clustered](https://cloud.google.com/bigquery/docs/clustered-tables) to colocate related data. @@ -286,7 +286,7 @@ select * from ... -Clustering on a multiple columns: +Clustering on multiple columns: @@ -303,11 +303,11 @@ select * from ... -## Managing KMS Encryption +## Managing KMS encryption [Customer managed encryption keys](https://cloud.google.com/bigquery/docs/customer-managed-encryption) can be configured for BigQuery tables using the `kms_key_name` model configuration. -### Using KMS Encryption +### Using KMS encryption To specify the KMS key name for a model (or a group of models), use the `kms_key_name` model configuration. The following example sets the `kms_key_name` for all of the models in the `encrypted/` directory of your dbt project. @@ -328,7 +328,7 @@ models: -## Labels and Tags +## Labels and tags ### Specifying labels @@ -373,8 +373,6 @@ models: - - ### Specifying tags @@ -434,7 +432,7 @@ The `incremental_strategy` config can be set to one of two values: ### Performance and cost The operations performed by dbt while building a BigQuery incremental model can -be made cheaper and faster by using [clustering keys](#clustering-keys) in your +be made cheaper and faster by using a [clustering clause](#clustering-clause) in your model configuration. See [this guide](https://discourse.getdbt.com/t/benchmarking-incremental-strategies-on-bigquery/981) for more information on performance tuning for BigQuery incremental models. **Note:** These performance and cost benefits are applicable to incremental models @@ -673,7 +671,7 @@ select ... -## Authorized Views +## Authorized views If the `grant_access_to` config is specified for a model materialized as a view, dbt will grant the view model access to select from the list of datasets diff --git a/website/docs/reference/resource-configs/pre-hook-post-hook.md b/website/docs/reference/resource-configs/pre-hook-post-hook.md index ce818768134..bd01a7be840 100644 --- a/website/docs/reference/resource-configs/pre-hook-post-hook.md +++ b/website/docs/reference/resource-configs/pre-hook-post-hook.md @@ -154,6 +154,10 @@ Pre- and post-hooks can also call macros that return SQL statements. If your mac dbt aims to provide all the boilerplate SQL you need (DDL, DML, and DCL) via out-of-the-box functionality, which you can configure quickly and concisely. In some cases, there may be SQL that you want or need to run, specific to functionality in your data platform, which dbt does not (yet) offer as a built-in feature. In those cases, you can write the exact SQL you need, using dbt's compilation context, and pass it into a `pre-` or `post-` hook to run before or after your model, seed, or snapshot. +import SQLCompilationError from '/snippets/_render-method.md'; + + + ## Examples diff --git a/website/docs/reference/resource-properties/deprecation_date.md b/website/docs/reference/resource-properties/deprecation_date.md index be76ccb07f6..70f150dc465 100644 --- a/website/docs/reference/resource-properties/deprecation_date.md +++ b/website/docs/reference/resource-properties/deprecation_date.md @@ -53,11 +53,11 @@ Additionally, [`WARN_ERROR_OPTIONS`](/reference/global-configs/warnings) gives a |--------------------------------|----------------------------------------------------|------------------------| | `DeprecatedModel` | Parsing a project that defines a deprecated model | Producer | | `DeprecatedReference` | Referencing a model with a past deprecation date | Producer and consumers | -| `UpcomingDeprecationReference` | Referencing a model with a future deprecation date | Producer and consumers | +| `UpcomingReferenceDeprecation` | Referencing a model with a future deprecation date | Producer and consumers | ** Example ** -Example output for an `UpcomingDeprecationReference` warning: +Example output for an `UpcomingReferenceDeprecation` warning: ``` $ dbt parse 15:48:14 Running with dbt=1.6.0 diff --git a/website/docusaurus.config.js b/website/docusaurus.config.js index dbd389a2299..b68e2e8ec5c 100644 --- a/website/docusaurus.config.js +++ b/website/docusaurus.config.js @@ -72,15 +72,14 @@ var siteSettings = { }, announcementBar: { id: "biweekly-demos", - content: - "Register now for Coalesce 2024 ✨ The Analytics Engineering Conference!", - backgroundColor: "#7444FD", + content: "Join our biweekly demos and see dbt Cloud in action!", + backgroundColor: "#047377", textColor: "#fff", isCloseable: true, }, announcementBarActive: true, announcementBarLink: - "https://coalesce.getdbt.com/register/?utm_medium=internal&utm_source=docs&utm_campaign=q3-2025_coalesce-2024_aw&utm_content=coalesce____&utm_term=all_all__", + "https://www.getdbt.com/resources/webinars/dbt-cloud-demos-with-experts/?utm_medium=i[…]ly-demos_aw&utm_content=biweekly-demos____&utm_term=all_all__", // Set community spotlight member on homepage // This is the ID for a specific file under docs/community/spotlight communitySpotlightMember: "meagan-palmer", @@ -206,6 +205,7 @@ var siteSettings = { src="https://solve-widget.forethought.ai/embed.js" id="forethought-widget-embed-script" data-api-key="9d421bf3-96b8-403e-9900-6fb059132264" data-ft-workflow-tag="docs" config-ft-greeting-message="Welcome to dbt Product docs! Ask a question." + config-ft-widget-header-title = "Ask a question" >