diff --git a/.github/ISSUE_TEMPLATE/improve-docs.yml b/.github/ISSUE_TEMPLATE/a-improve-docs.yml similarity index 98% rename from .github/ISSUE_TEMPLATE/improve-docs.yml rename to .github/ISSUE_TEMPLATE/a-improve-docs.yml index 57dc64cc312..70b173e49a4 100644 --- a/.github/ISSUE_TEMPLATE/improve-docs.yml +++ b/.github/ISSUE_TEMPLATE/a-improve-docs.yml @@ -39,4 +39,4 @@ body: label: Additional information description: Add any other context or screenshots about the feature request here. validations: - required: false \ No newline at end of file + required: false diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 9349000f66b..f3a3521bdec 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,8 +1,5 @@ blank_issues_enabled: true contact_links: - - name: Want to see new content? Open a discussion! - url: https://github.com/dbt-labs/docs.getdbt.com/discussions/new - about: You can open a discussion to propose new content for the dbt product documentation. - name: Have questions about dbt? Join the Community! url: https://www.getdbt.com/community/join-the-community about: You can join the dbt Labs Community to ask and answer questions. diff --git a/.github/ISSUE_TEMPLATE/improve-the-site.yml b/.github/ISSUE_TEMPLATE/improve-the-site.yml index e0556d7374f..dd585324f89 100644 --- a/.github/ISSUE_TEMPLATE/improve-the-site.yml +++ b/.github/ISSUE_TEMPLATE/improve-the-site.yml @@ -1,6 +1,6 @@ -name: Improve the docs.getdbt.com site -description: Make a suggestion or report a problem about the technical implementation of docs.getdbt.com. -labels: ["engineering"] +name: Report a docs.getdbt.com site issue +description: Report a problem about the technical implementation of docs.getdbt.com. +labels: ["engineering","bug"] body: - type: markdown attributes: @@ -39,4 +39,4 @@ body: label: Additional information description: Any additional information, configuration, or data that might be necessary to reproduce the issue. validations: - required: false \ No newline at end of file + required: false diff --git a/.github/ISSUE_TEMPLATE/new-dbt-feature.yml b/.github/ISSUE_TEMPLATE/new-dbt-feature.yml deleted file mode 100644 index fa46a189fc4..00000000000 --- a/.github/ISSUE_TEMPLATE/new-dbt-feature.yml +++ /dev/null @@ -1,33 +0,0 @@ -name: Start docs project for a new feature -description: For dbt PMs to add docs for their new or updated dbt product features. -labels: ["content","upcoming release"] -body: - - type: markdown - attributes: - value: | - * Before you file an issue read the [Contributing guide](https://github.com/dbt-labs/docs.getdbt.com#contributing). - * Check to make sure someone hasn't already opened a similar [issue](https://github.com/dbt-labs/docs.getdbt.com/issues). - - - type: checkboxes - id: contributions - attributes: - label: Contributions - description: This applies to new, unreleased content. - options: - - label: I am a PM or subject matter expert at dbt who is responsible for this feature. - - - type: textarea - attributes: - label: Where does this content belong? - description: | - - Give as much detail as you can to help us understand where you expect the content to live. - validations: - required: true - - - type: textarea - attributes: - label: Link to source material - description: | - Use the [source material template](https://docs.google.com/document/d/1lLWGMXJFjkY4p7r8ZKhBX73dOLmIjgXZBYq39LqmAJs/edit) to provide source material for this feature. - validations: - required: true \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/zzz_add-adapter-to-trusted-list.yml b/.github/ISSUE_TEMPLATE/zzz_add-adapter-to-trusted-list.yml new file mode 100644 index 00000000000..e19accf6ebb --- /dev/null +++ b/.github/ISSUE_TEMPLATE/zzz_add-adapter-to-trusted-list.yml @@ -0,0 +1,62 @@ +name: Add adapter to Trusted list +description: For adapter maintainers who wish to have theirs added to the list of Trusted adapters. +title: "Trust dbt-myadapter" +labels: ["adapter maintainers"] +assignees: + - dataders +body: + - type: markdown + attributes: + value: | + We're excited that you'd like to support your adapter formally as "Trusted"! This template will ensure that you are aware of the process and the guidelines. Additionally, that you can vouch that your adapter currently meets the standards of a Trusted adapter. For more information, see [Trusted adapters](https://docs.getdbt.com/docs/trusted-adapters) + + - type: input + id: adapter-repo + attributes: + label: Link to adapter repo + description: Please link to the GitHub repo + validations: + required: true + + - type: input + id: contact + attributes: + label: Contact Details + description: How can we get in touch with you? + placeholder: your preferred email and/or dbt Slack handle + validations: + required: true + + - type: dropdown + id: author_type + attributes: + label: Which of these best describes you? + options: + - I am a dbt Community member + - I work for the vendor on top of which the dbt adapter functions + validations: + required: true + + - type: checkboxes + id: read-program-guide + attributes: + label: Please agree to the each of the following + options: + - label: I am a maintainer of the adapter being submited for Trusted status + required: true + - label: I have read both the [Trusted adapters](https://docs.getdbt.com/docs/trusted-adapters) and [Building a Trusted Adapter](https://docs.getdbt.com/guides/dbt-ecosystem/adapter-development/8-building-a-trusted-adapter) pages. + required: true + - label: I believe that the adapter currently meets the expectations given above + required: true + - label: I will ensure this adapter stays in compliance with the guidelines + required: true + - label: I understand that dbt Labs reserves the right to remove an adapter from the trusted adapter list at any time, should any of the below guidelines not be met + required: true + + - type: textarea + id: icon + attributes: + label: What icon should be used? + description: | + Please share an svg image that you'd like to be displayed in for your adapter. Normally, this is the logo for the data platform on top of which your adapter works. If there's a dark mode version, please also share that. + Pasting the image from your clipboard will upload the file to GitHub and create markdown formatting for it to be rendered inline diff --git a/.github/workflows/autogenerated_labeler.yml b/.github/workflows/autogenerated_labeler.yml new file mode 100644 index 00000000000..e6aab0492b8 --- /dev/null +++ b/.github/workflows/autogenerated_labeler.yml @@ -0,0 +1,40 @@ +# **what?** +# Labels issues autogenerated in dbt-core + +# **why?** +# To organize autogenerated issues from dbt-core to make it easier to find and track them. + +# **when?** +# When an issue is opened by the FishtownBuildBot + +name: Add Labels to Autogenerated Issues + +on: + issues: + types: [opened] + +jobs: + add_customized_labels: + if: github.event.issue.user.login == 'FishtownBuildBot' + permissions: + issues: write + + runs-on: ubuntu-latest + steps: + - name: "Determine appropriate labels by repo in title" + id: repo + env: + ISSUE_TITLE: ${{ github.event.issue.title }} + run: | + if [[ "$ISSUE_TITLE" == *"dbt-core"* ]]; then + echo "labels='content,improvement,dbt Core'" >> $GITHUB_OUTPUT + else + echo "labels='content,improvement,adapters'" >> $GITHUB_OUTPUT + fi + + - name: "Add Labels to autogenerated Issues" + id: add-labels + run: | + gh issue edit ${{ github.event.issue.number }} --repo ${{ github.repository }} --add-label ${{ steps.repo.outputs.labels }} + env: + GH_TOKEN: ${{ secrets.DOCS_SECRET }} diff --git a/.gitignore b/.gitignore index b2746893814..1f24fd3f1fc 100755 --- a/.gitignore +++ b/.gitignore @@ -11,10 +11,11 @@ website/yarn.lock website/node_modules website/i18n/* -# Local vs code +# IDE configs .vscode +.idea + # Local Netlify folder .netlify -.vscode .eslintcache diff --git a/README.md b/README.md index 4dfd8a8be9e..da82ab45fd6 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ You can add code snippets and other content in a tabbed view. To learn more abou # Running the Docs site locally -You can click a link available in a netlify bot PR comment to see and review your changes rendered on a staging server. You are also able to see and review your proposed modifications locally on your computer. Our setup instructions use [homebrew](https://brew.sh/): +You can click a link available in a Vercel bot PR comment to see and review your changes rendered on a staging server. You are also able to see and review your proposed modifications locally on your computer. Our setup instructions use [homebrew](https://brew.sh/): ## Prerequisites diff --git a/contributing/single-sourcing-content.md b/contributing/single-sourcing-content.md index 5b87d494c94..7c345a6631a 100644 --- a/contributing/single-sourcing-content.md +++ b/contributing/single-sourcing-content.md @@ -90,7 +90,7 @@ This component can be added directly to a markdown file in a similar way as othe Both properties can be used together to set a range where the content should show. In the example below, this content will only show if the selected version is between **0.21** and **1.0**: ```markdown - + Versioned content here diff --git a/website/api/get-discourse-comments.js b/website/api/get-discourse-comments.js new file mode 100644 index 00000000000..5ac59cfe5f2 --- /dev/null +++ b/website/api/get-discourse-comments.js @@ -0,0 +1,169 @@ +const axios = require('axios') +require("dotenv").config(); + +const { DISCOURSE_DEVBLOG_API_KEY , DISCOURSE_USER_SYSTEM } = process.env +const DEVBLOG_PROD_URL = 'https://docs.getdbt.com/blog/' +const DEV_ENV = 'dev-' +const PREVIEW_ENV = 'deploy-preview-' + +// Set API endpoint and headers +let discourse_endpoint = `https://discourse.getdbt.com` +let headers = { + 'Accept': 'application/json', + 'Api-Key': DISCOURSE_DEVBLOG_API_KEY, + 'Api-Username': DISCOURSE_USER_SYSTEM, +} + +async function getDiscourseComments(request, response) { + let topicId, comments, DISCOURSE_TOPIC_ID; + + const blogUrl = await getBlogUrl(request) + + if (blogUrl === DEVBLOG_PROD_URL) { + DISCOURSE_TOPIC_ID = 21 + } else { + DISCOURSE_TOPIC_ID = 2 + } + + try { + const env = + blogUrl === DEVBLOG_PROD_URL + ? "" + : blogUrl.includes("localhost") + ? DEV_ENV + : PREVIEW_ENV; + const postTitle = `${env}${request.query.title}`; + const postSlug = request.query.slug; + const cleanSlug = cleanUrl(request.query.slug); + const externalId = truncateString(`${env}${cleanSlug}`); + + console.table({ + blogUrl, + postTitle, + postSlug, + cleanSlug, + externalId, + }); + + + if (!postSlug) throw new Error("Unable to query Discourse API. Error reading slug."); + + topicId = await searchDiscourseExternalId(externalId); + + // First check if the dev blog post exists in Discourse + // Get the comments if it does + if (typeof topicId === "number") { + comments = await getDiscourseTopicbyID(topicId); + } else { + // If the dev blog post does not exist in Discourse + // Create a new topic and get the comments + topicId = await createDiscourseTopic(postTitle, externalId, cleanSlug, blogUrl, DISCOURSE_TOPIC_ID); + if (typeof topicId === "number") { + comments = await getDiscourseTopicbyID(topicId); + comments.shift(); + comments = { topicId, comments }; + + return await response.status(200).json(comments); + } else { + console.log("Unable to create Discourse topic TopicID is not a number."); + return await response.status(500).json({ error: "Unable to create Discourse topic TopicID is not a number." }); + } + } + + comments.shift(); + comments = { topicId, comments }; + + return await response.status(200).json(comments); + } catch (err) { + console.log("err on getDiscourseComments", err); + return await response.status(500).json({ error: "Unable to get topics from Discourse." }); + } +} + +async function createDiscourseTopic(title, externalId, slug, blogUrl, DISCOURSE_TOPIC_ID) { + console.log(`Creating a new topic in Discourse - ${title}`) + try { + const response = await axios.post(`${discourse_endpoint}/posts`, { + title: title, + raw: `This is a companion discussion topic for the original entry at ${blogUrl}${slug}`, + category: DISCOURSE_TOPIC_ID, + embed_url: `${blogUrl}${slug}`, + external_id: externalId, + tags: ['devblog'], + visible: false + }, { headers }) + + let topicId = await response.data.topic_id + + console.log('Topic successfully created with topic_id', topicId) + + return topicId + + } catch(err) { + console.log('err on createDiscourseTopic', err) + return err + } +} + +async function getDiscourseTopicbyID(topicId) { + console.log(`Topic found setting topic id - ${topicId}`) + try { + let response = await axios.get(`${discourse_endpoint}/t/${topicId}.json`, { headers }) + let { data } = await response + let post_stream = data.post_stream + let post_count = data.posts_count + + // If there is more than one comment make the topic visibile in Discourse + if (post_count > 1 && data.visible === false) { + console.log(`Topic has more than one comment. Changing visibility to visible.`) + await axios.put(`${discourse_endpoint}/t/${topicId}`, { + visible: true + }, { headers }) + } + + // Filter only 'regular' posts in Discourse. (e.g. not moderator actions, small_actions, whispers) + post_stream.posts = post_stream.posts.filter(post => post.post_type === 1) + + return post_stream.posts + } catch(err) { + console.log('err on getDiscourseTopicbyID', err) + return err + } +} + +async function searchDiscourseExternalId(externalId) { + console.log(`Searching for external_id in Discourse - ${externalId}`); + try { + const data = await axios.get(`${discourse_endpoint}/t/external_id/${externalId}.json`, { headers }); + return data.data.id; + } catch (err) { + if (err.response.status === 404) { + console.log("No topics found in Discourse."); + return null; + } + console.log("Unable to search Discourse for external_id.", err); + return err; + } +} + + +// Truncate external_id to 50 characters per Discourse API requirements +function truncateString(str) { + if (str.length <= 50) { + return str + } + return str.slice(0, 50) +} + +// Remove query params and hash from URL to prevent duplicate topics +function cleanUrl(url) { + return url.split("?")[0].split("#")[0]; +} + +// Create a function to get the host name from the request and add /blog/ to the end +async function getBlogUrl(req) { + const host = req.headers.host + return `https://${host}/blog/` +} + +module.exports = getDiscourseComments; diff --git a/website/api/get-discourse-topics.js b/website/api/get-discourse-topics.js new file mode 100644 index 00000000000..90d6e5af80e --- /dev/null +++ b/website/api/get-discourse-topics.js @@ -0,0 +1,136 @@ +const axios = require('axios') + +async function getDiscourseTopics(request, response) { + const { DISCOURSE_API_KEY , DISCOURSE_USER } = process.env + + const body = request.body + + try { + // Set API endpoint and headers + let discourse_endpoint = `https://discourse.getdbt.com` + let headers = { + 'Accept': 'application/json', + 'Api-Key': DISCOURSE_API_KEY, + 'Api-Username': DISCOURSE_USER, + } + + const query = buildQueryString(body) + if(!query) throw new Error('Unable to build query string.') + + // Get topics from Discourse + let { data: { posts, topics } } = await axios.get(`${discourse_endpoint}/search?q=${query}`, { headers }) + + // Return empty array if no topics found for search query + // 200 status is used to prevent triggering Datadog alerts + if(!topics || topics?.length <= 0) { + // Log message with encoded query and end function + console.log('Unable to get results from api request.') + console.log(`Search query: ${query}`) + return await response.status(200).json([]) + } + + // Set author and like_count for topics if not querying by specific term + let allTopics = topics + if(!body?.term) { + allTopics = topics.reduce((topicsArr, topic) => { + // Get first post in topic + const firstTopicPost = posts?.find(post => + post?.post_number === 1 && + post?.topic_id === topic?.id + ) + // If post found + // Get username + if(firstTopicPost?.username) { + topic.author = firstTopicPost.username + } + // Get like count + if(firstTopicPost?.like_count) { + topic.like_count = firstTopicPost.like_count + } + + if(firstTopicPost?.blurb) { + topic.blurb = firstTopicPost.blurb + } + + // Push updated topic to array + topicsArr.push(topic) + + return topicsArr + }, []) + } + + // Return topics + //return await returnResponse(200, allTopics) + return await response.status(200).json(allTopics) + } catch(err) { + // Log and return the error + console.log('err', err) + return await response.status(500).json({ error: 'Unable to get topics from Discourse.'}) + } +} + +function buildQueryString(body) { + if(!body) return null + + // start with empty query string + let query = '' + + // check param and apply to query if set + for (const [key, value] of Object.entries(body)) { + // validate categories + // if valid, add to query string + if(validateItem({ key, value })) { + if(key === 'category') { + query += `#${value} ` + } else if(key === 'inString') { + query += `in:${value}` + } else if(key === 'status' && Array.isArray(value)) { + value?.map(item => { + query += `${key}:${item} ` + }) + } else { + query += `${key}:${value} ` + } + } + } + + if(query) { + const encodedQuery = encodeURIComponent(query) + return encodedQuery + } +} + +function validateItem({ key, value }) { + // predefined Discourse values + // https://docs.discourse.org/#tag/Search/operation/search + const inStringValues = ['title', 'first', 'pinned', 'wiki'] + const orderValues = ['latest', 'likes', 'views', 'latest_topic'] + const statusValues = ['open', 'closed', 'public', 'archived', 'noreplies', 'single_user', 'solved', 'unsolved'] + + // validate keys + if(key === 'inString') { + return inStringValues.includes(value) + ? true + : false + } else if(key === 'order') { + return orderValues.includes(value) + ? true + : false + } else if(key === 'status') { + if(Array.isArray(value)) { + let isValid = true + value?.map(item => { + if(!statusValues.includes(item)) isValid = false + }) + return isValid + } else { + return statusValues.includes(value) + ? true + : false + } + } else { + return true + } +} + +module.exports = getDiscourseTopics diff --git a/website/blog/2023-07-03-data-vault-2-0-with-dbt-cloud.md b/website/blog/2023-07-03-data-vault-2-0-with-dbt-cloud.md index a6f3682f9e9..2a4879ac98d 100644 --- a/website/blog/2023-07-03-data-vault-2-0-with-dbt-cloud.md +++ b/website/blog/2023-07-03-data-vault-2-0-with-dbt-cloud.md @@ -115,7 +115,9 @@ In terms of the implementation of the Data Vault itself, we recommend familiariz ### AutomateDV (formerly known as dbtvault) -AutomateDV is the most popular open source Data Vault package for dbt, with some users having over 5000 Data Vault components in their project. Here in Infinite Lambda, we’ve been using this package for quite some time now, even building on top of it (depending on the specifics of the project). This mature system provides a great way to start your Data Vault with dbt Cloud journey as the learning curve is quite manageable, it is well documented and even comes with tutorials and working examples built on top of Snowflake’s TPCH standard dataset. There is one limitation to using the package and that is _AutomateDV _expects your source data to contain only one delta load. In order to work around this issue, owners of the package came up with custom dbt materializations to help you with the initial load of your system, however, the performance of such load is in our experience not acceptable. +AutomateDV is the most popular open source Data Vault package for dbt, with some users having over 5000 Data Vault components in their project. Here in Infinite Lambda, we’ve been using this package for quite some time now, even building on top of it (depending on the specifics of the project). This mature system provides a great way to start your Data Vault with dbt Cloud journey as the learning curve is quite manageable, it is well documented and even comes with tutorials and working examples built on top of Snowflake’s TPCH standard dataset. There is one limitation to using the package and that is _AutomateDV_ expects your source data to contain only one delta load. In order to work around this issue, owners of the package came up with custom dbt materializations to help you with the initial load of your system, however, the performance of such load is in our experience not acceptable. + +_(Editor's note: As of AutomateDV v0.10.0, this performance issue has been resolved and users may use the standard incremental configuration.)_ ### datavault4dbt diff --git a/website/dbt-versions.js b/website/dbt-versions.js index a59822101e9..adee9230c7a 100644 --- a/website/dbt-versions.js +++ b/website/dbt-versions.js @@ -1,4 +1,9 @@ exports.versions = [ + { + version: "1.7", + EOLDate: "2024-07-31", + isPrerelease: "true" + }, { version: "1.6", EOLDate: "2024-07-31", @@ -23,10 +28,6 @@ exports.versions = [ version: "1.1", EOLDate: "2023-04-28", }, - { - version: "1.0", - EOLDate: "2022-12-03" - }, ] exports.versionedPages = [ diff --git a/website/docs/docs/build/cumulative-metrics.md b/website/docs/docs/build/cumulative-metrics.md index 5312d133d4d..3104fd7578a 100644 --- a/website/docs/docs/build/cumulative-metrics.md +++ b/website/docs/docs/build/cumulative-metrics.md @@ -18,8 +18,8 @@ This metric is common for calculating things like weekly active users, or month- | `label` | The value that will be displayed in downstream tools. | Required | | `type_params` | The type parameters of the metric. | Required | | `measure` | The measure you are referencing. | Required | -| `window` | The accumulation window, such as 1 month, 7 days, 1 year. This can't be used with `window`. | Optional | -| `grain_to_date` | Sets the accumulation grain, such as month will accumulate data for one month. Then restart at the beginning of the next. This can't be used with window. | Optional | +| `window` | The accumulation window, such as 1 month, 7 days, 1 year. This can't be used with `grain_to_date`. | Optional | +| `grain_to_date` | Sets the accumulation grain, such as month will accumulate data for one month. Then restart at the beginning of the next. This can't be used with `window`. | Optional | The following displays the complete specification for cumulative metrics, along with an example: diff --git a/website/docs/docs/build/derived-metrics.md b/website/docs/docs/build/derived-metrics.md index 375794cc5c8..2ad1c3e368c 100644 --- a/website/docs/docs/build/derived-metrics.md +++ b/website/docs/docs/build/derived-metrics.md @@ -6,7 +6,7 @@ sidebar_label: Derived tags: [Metrics, Semantic Layer] --- -In MetricFlow, derived metrics are metrics created by defining an expression using other metrics. They allow performing calculations on top of existing metrics. This proves useful for combining metrics and applying arithmetic functions to aggregated columns, such as, you can define a profit metric. +In MetricFlow, derived metrics are metrics created by defining an expression using other metrics. They enable you to perform calculations with existing metrics. This is helpful for combining metrics and doing math functions on aggregated columns, like creating a profit metric. The parameters, description, and type for derived metrics are: @@ -21,7 +21,7 @@ In MetricFlow, derived metrics are metrics created by defining an expression usi | `metrics` | The list of metrics used in the derived metrics. | Required | | `alias` | Optional alias for the metric that you can use in the expr. | Optional | | `filter` | Optional filter to apply to the metric. | Optional | -| `offset_window` | Set the period for the offset window, such as 1 month. This will return the value of the metric one month from the metric time. | Required | +| `offset_window` | Set the period for the offset window, such as 1 month. This will return the value of the metric one month from the metric time. | Required | The following displays the complete specification for derived metrics, along with an example. @@ -37,7 +37,7 @@ metrics: - name: the name of the metrics. must reference a metric you have already defined # Required alias: optional alias for the metric that you can use in the expr # Optional filter: optional filter to apply to the metric # Optional - offset_window: set the period for the offset window i.e 1 month. This will return the value of the metric one month from the metric time. # Required + offset_window: set the period for the offset window, such as 1 month. This will return the value of the metric one month from the metric time. # Required ``` ## Derived metrics example @@ -85,17 +85,75 @@ metrics: ## Derived metric offset -You may want to use an offset value of a metric in the definition of a derived metric. For example, you can model the retention rate by using a derived metric with an offset, which involves calculating (active customers at the end of the month/active customers at the beginning of the month). +To perform calculations using a metric's value from a previous time period, you can add an offset parameter to a derived metric. For example, if you want to calculate period-over-period growth or track user retention, you can use this metric offset. + +**Note:** You must include the [`metric_time` dimension](/docs/build/dimensions#time) when querying a derived metric with an offset window. + +The following example displays how you can calculate monthly revenue growth using a 1-month offset window: ```yaml -metrics: -- name: user_retention - type: derived +- name: customer_retention + description: Percentage of customers that are active now and those active 1 month ago + label: customer_retention type_params: - expr: active_customers/active_customers_t1m + expr: (active_customers/ active_customers_prev_month) metrics: - - name: active_customers # these are all metrics (can be a derived metric, meaning building a derived metric with derived metrics) + - name: active_customers + alias: current_active_customers - name: active_customers offset_window: 1 month - alias: active_customers_t1m + alias: active_customers_prev_month ``` + +### Offset windows and granularity + +You can query any granularity and offset window combination. The following example queries a metric with a 7-day offset and a monthly grain: + +```yaml +- name: d7_booking_change + description: Difference between bookings now and 7 days ago + type: derived + label: d7 Bookings Change + type_params: + expr: bookings - bookings_7_days_ago + metrics: + - name: bookings + alias: current_bookings + - name: bookings + offset_window: 7 days + alias: bookings_7_days_ago +``` + +When you run the query `mf query --metrics d7_booking_change --group-by metric_time__month` for the metric, here's how it's calculated: + +1. We retrieve the raw, unaggregated dataset with the specified measures and dimensions at the smallest level of detail, which is currently 'day'. +2. Then, we perform an offset join on the daily dataset, followed by performing a date trunc and aggregation to the requested granularity. + For example, to calculate `d7_booking_change` for July 2017: + - First, we sum up all the booking values for each day in July to calculate the bookings metric. + - The following table displays the range of days that make up this monthly aggregation. + +| | Orders | Metric_time | +| - | ---- | -------- | +| | 330 | 2017-07-31 | +| | 7030 | 2017-07-30 to 2017-07-02 | +| | 78 | 2017-07-01 | +| Total | 7438 | 2017-07-01 | + +3. Next, we calculate July's bookings with a 7-day offset. The following table displays the range of days that make up this monthly aggregation. Note that the month begins 7 days later (offset by 7 days) on 2017-07-24. + +| | Orders | Metric_time | +| - | ---- | -------- | +| | 329 | 2017-07-24 | +| | 6840 | 2017-07-23 to 2017-06-30 | +| | 83 | 2017-06-24 | +| Total | 7252 | 2017-07-01 | + +4. Lastly, we calculate the derived metric and return the final result set: + +```bash +bookings - bookings_7_days_ago would be compile as 7438 - 7252 = 186. +``` + +| d7_booking_change | metric_time__month | +| ----------------- | ------------------ | +| 186 | 2017-07-01 | diff --git a/website/docs/docs/build/groups.md b/website/docs/docs/build/groups.md index aa33db07ccc..7ac5337ba0d 100644 --- a/website/docs/docs/build/groups.md +++ b/website/docs/docs/build/groups.md @@ -1,6 +1,6 @@ --- title: "Add groups to your DAG" -sidebar_title: "Groups" +sidebar_label: "Groups" id: "groups" description: "When you define groups in dbt projects, you turn implicit relationships into an explicit grouping." keywords: diff --git a/website/docs/docs/build/incremental-models.md b/website/docs/docs/build/incremental-models.md index 89115652a9c..d3c3f25890b 100644 --- a/website/docs/docs/build/incremental-models.md +++ b/website/docs/docs/build/incremental-models.md @@ -79,12 +79,6 @@ A `unique_key` enables updating existing rows instead of just appending new rows Not specifying a `unique_key` will result in append-only behavior, which means dbt inserts all rows returned by the model's SQL into the preexisting target table without regard for whether the rows represent duplicates. - - -The optional `unique_key` parameter specifies a field that can uniquely identify each row within your model. You can define `unique_key` in a configuration block at the top of your model. If your model doesn't contain a single field that is unique, but rather a combination of columns, we recommend that you create a single column that can serve as a unique identifier (by concatenating and hashing those columns), and pass it into your model's configuration. - - - The optional `unique_key` parameter specifies a field (or combination of fields) that define the grain of your model. That is, the field(s) identify a single unique row. You can define `unique_key` in a configuration block at the top of your model, and it can be a single column name or a list of column names. diff --git a/website/docs/docs/build/materializations.md b/website/docs/docs/build/materializations.md index 70c7878bd69..619880e5d1b 100644 --- a/website/docs/docs/build/materializations.md +++ b/website/docs/docs/build/materializations.md @@ -5,12 +5,13 @@ id: "materializations" --- ## Overview -Materializations are strategies for persisting dbt models in a warehouse. There are four types of materializations built into dbt. They are: +Materializations are strategies for persisting dbt models in a warehouse. There are five types of materializations built into dbt. They are: - - - incremental - ephemeral +- materialized view ## Configuring materializations @@ -103,6 +104,45 @@ When using the `table` materialization, your model is rebuilt as a ### Local packages -Packages that you have stored locally can be installed by specifying the path to the project, like so: +A "local" package is a dbt project accessible from your local file system. You can install it by specifying the project's path. It works best when you nest the project within a subdirectory relative to your current project's directory. ```yaml packages: - - local: /opt/dbt/redshift # use a local path + - local: relative/path/to/subdirectory ``` -Local packages should only be used for specific situations, for example, when testing local changes to a package. +Other patterns may work in some cases, but not always. For example, if you install this project as a package elsewhere, or try running it on a different system, the relative and absolute paths will yield the same results. + + + +```yaml +packages: + # not recommended - support for these patterns vary + - local: /../../redshift # relative path to a parent directory + - local: /opt/dbt/redshift # absolute path on the system +``` + + + +There are a few specific use cases where we recommend using a "local" package: +1. **Monorepo** — When you have multiple projects, each nested in a subdirectory, within a monorepo. "Local" packages allow you to combine projects for coordinated development and deployment. +2. **Testing changes** — To test changes in one project or package within the context of a downstream project or package that uses it. By temporarily switching the installation to a "local" package, you can make changes to the former and immediately test them in the latter for quicker iteration. This is similar to [editable installs](https://pip.pypa.io/en/stable/topics/local-project-installs/) in Python. +3. **Nested project** — When you have a nested project that defines fixtures and tests for a project of utility macros, like [the integration tests within the `dbt-utils` package](https://github.com/dbt-labs/dbt-utils/tree/main/integration_tests). + ## What packages are available? Check out [dbt Hub](https://hub.getdbt.com) to see the library of published dbt packages! diff --git a/website/docs/docs/build/projects.md b/website/docs/docs/build/projects.md index a7ca3638590..0d7dd889fa6 100644 --- a/website/docs/docs/build/projects.md +++ b/website/docs/docs/build/projects.md @@ -18,6 +18,7 @@ At a minimum, all a project needs is the `dbt_project.yml` project configuration | [sources](/docs/build/sources) | A way to name and describe the data loaded into your warehouse by your Extract and Load tools. | | [exposures](/docs/build/exposures) | A way to define and describe a downstream use of your project. | | [metrics](/docs/build/metrics) | A way for you to define metrics for your project. | +| [groups](/docs/build/groups) | Groups enable collaborative node organization in restricted collections. | | [analysis](/docs/build/analyses) | A way to organize analytical SQL queries in your project such as the general ledger from your QuickBooks. | When building out the structure of your project, you should consider these impacts on your organization's workflow: diff --git a/website/docs/docs/build/python-models.md b/website/docs/docs/build/python-models.md index 12825648501..bff65362d06 100644 --- a/website/docs/docs/build/python-models.md +++ b/website/docs/docs/build/python-models.md @@ -16,11 +16,15 @@ We encourage you to: dbt Python (`dbt-py`) models can help you solve use cases that can't be solved with SQL. You can perform analyses using tools available in the open-source Python ecosystem, including state-of-the-art packages for data science and statistics. Before, you would have needed separate infrastructure and orchestration to run Python transformations in production. Python transformations defined in dbt are models in your project with all the same capabilities around testing, documentation, and lineage. + Python models are supported in dbt Core 1.3 and higher. Learn more about [upgrading your version in dbt Cloud](https://docs.getdbt.com/docs/dbt-cloud/cloud-configuring-dbt-cloud/cloud-upgrading-dbt-versions) and [upgrading dbt Core versions](https://docs.getdbt.com/docs/core-versions#upgrading-to-new-patch-versions). To read more about Python models, change the [docs version to 1.3](/docs/build/python-models?version=1.3) (or higher) in the menu bar. + + + @@ -711,3 +715,5 @@ You can also install packages at cluster creation time by [defining cluster prop + + \ No newline at end of file diff --git a/website/docs/docs/build/tests.md b/website/docs/docs/build/tests.md index 1a40dd42b53..c107dacf7b2 100644 --- a/website/docs/docs/build/tests.md +++ b/website/docs/docs/build/tests.md @@ -1,10 +1,12 @@ --- title: "Add tests to your DAG" -sidebar_title: "Tests" +sidebar_label: "Tests" description: "Read this tutorial to learn how to use tests when building in dbt." +search_weight: "heavy" id: "tests" +keywords: + - test, tests, testing, dag --- - ## Related reference docs * [Test command](/reference/commands/test) * [Test properties](/reference/resource-properties/tests) diff --git a/website/docs/docs/building-a-dbt-project/building-models/python-models.md b/website/docs/docs/building-a-dbt-project/building-models/python-models.md deleted file mode 100644 index 1aab8ac7a92..00000000000 --- a/website/docs/docs/building-a-dbt-project/building-models/python-models.md +++ /dev/null @@ -1,719 +0,0 @@ ---- -title: "Python models" ---- - -:::info Brand new! - -dbt Core v1.3 included first-ever support for Python models. Note that only [specific data platforms](#specific-data-platforms) support dbt-py models. - -We encourage you to: -- Read [the original discussion](https://github.com/dbt-labs/dbt-core/discussions/5261) that proposed this feature. -- Contribute to [best practices for developing Python models in dbt](https://discourse.getdbt.com/t/dbt-python-model-dbt-py-best-practices/5204 ). -- Weigh in on [next steps for Python models, beyond v1.3](https://github.com/dbt-labs/dbt-core/discussions/5742). -- Join the **#dbt-core-python-models** channel in the [dbt Community Slack](https://www.getdbt.com/community/join-the-community/). - -Below, you'll see sections entitled "❓ **Our questions**." We are excited to have released a first narrow set of functionality in v1.3, which will solve real use cases. We also know this is a first step into a much wider field of possibility. We don't pretend to have all the answers. We're excited to keep developing our opinionated recommendations and next steps for product development—and we want your help. Comment in the GitHub discussions; leave thoughts in Slack; bring up dbt + Python in casual conversation with colleagues and friends. -::: - -## About Python models in dbt - -dbt Python ("dbt-py") models will help you solve use cases that can't be solved with SQL. You can perform analyses using tools available in the open source Python ecosystem, including state-of-the-art packages for data science and statistics. Before, you would have needed separate infrastructure and orchestration to run Python transformations in production. By defining your Python transformations in dbt, they're just models in your project, with all the same capabilities around testing, documentation, and lineage. - - - -Python models are supported in dbt Core 1.3 and above. Learn more about [upgrading your version in dbt Cloud](https://docs.getdbt.com/docs/dbt-cloud/cloud-configuring-dbt-cloud/cloud-upgrading-dbt-versions) and [upgrading dbt Core versions](https://docs.getdbt.com/docs/core-versions#upgrading-to-new-patch-versions). - -To read more about Python models, change the docs version to 1.3 or higher in the menu above. - - - - - - - - -```python -import ... - -def model(dbt, session): - - my_sql_model_df = dbt.ref("my_sql_model") - - final_df = ... # stuff you can't write in SQL! - - return final_df -``` - - - - - -```yml -version: 2 - -models: - - name: my_python_model - - # Document within the same codebase - description: My transformation written in Python - - # Configure in ways that feel intuitive and familiar - config: - materialized: table - tags: ['python'] - - # Test the results of my Python transformation - columns: - - name: id - # Standard validation for 'grain' of Python results - tests: - - unique - - not_null - tests: - # Write your own validation logic (in SQL) for Python results - - [custom_generic_test](writing-custom-generic-tests) -``` - - - - - - -The prerequisites for dbt Python models include using an adapter for a data platform that supports a fully featured Python runtime. In a dbt Python model, all Python code is executed remotely on the platform. None of it is run by dbt locally. We believe in clearly separating _model definition_ from _model execution_. In this and many other ways, you'll find that dbt's approach to Python models mirrors its longstanding approach to modeling data in SQL. - -We've written this guide assuming that you have some familiarity with dbt. If you've never before written a dbt model, we encourage you to start by first reading [dbt Models](/docs/build/models). Throughout, we'll be drawing connections between Python models and SQL models, as well as making clear their differences. - -### What is a Python model? - -A dbt Python model is a function that reads in dbt sources or other models, applies a series of transformations, and returns a transformed dataset. DataFrame operations define the starting points, the end state, and each step along the way. - -This is similar to the role of CTEs in dbt SQL models. We use CTEs to pull in upstream datasets, define (and name) a series of meaningful transformations, and end with a final `select` statement. You can run the compiled version of a dbt SQL model to see the data included in the resulting view or table. When you `dbt run`, dbt wraps that query in `create view`, `create table`, or more complex DDL to save its results in the database. - -Instead of a final `select` statement, each Python model returns a final DataFrame. Each DataFrame operation is "lazily evaluated." In development, you can preview its data, using methods like `.show()` or `.head()`. When you run a Python model, the full result of the final DataFrame will be saved as a table in your data warehouse. - -dbt Python models have access to almost all of the same configuration options as SQL models. You can test them, document them, add `tags` and `meta` properties to them, grant access to their results to other users, and so on. You can select them by their name, their file path, their configurations, whether they are upstream or downstream of another model, or whether they have been modified compared to a previous project state. - -### Defining a Python model - -Each Python model lives in a `.py` file in your `models/` folder. It defines a function named **`model()`**, which takes two parameters: -- **`dbt`**: A class compiled by dbt Core, unique to each model, enables you to run your Python code in the context of your dbt project and DAG. -- **`session`**: A class representing your data platform’s connection to the Python backend. The session is needed to read in tables as DataFrames, and to write DataFrames back to tables. In PySpark, by convention, the `SparkSession` is named `spark`, and available globally. For consistency across platforms, we always pass it into the `model` function as an explicit argument called `session`. - -The `model()` function must return a single DataFrame. On Snowpark (Snowflake), this can be a Snowpark or pandas DataFrame. Via PySpark (Databricks + BigQuery), this can be a Spark, pandas, or pandas-on-Spark DataFrame. For more about choosing between pandas and native DataFrames, see [DataFrame API + syntax](#dataframe-api--syntax). - -When you `dbt run --select python_model`, dbt will prepare and pass in both arguments (`dbt` and `session`). All you have to do is define the function. This is how every single Python model should look: - - - -```python -def model(dbt, session): - - ... - - return final_df -``` - - - - -### Referencing other models - -Python models participate fully in dbt's directed acyclic graph (DAG) of transformations. Use the `dbt.ref()` method within a Python model to read in data from other models (SQL or Python). If you want to read directly from a raw source table, use `dbt.source()`. These methods return DataFrames pointing to the upstream source, model, seed, or snapshot. - - - -```python -def model(dbt, session): - - # DataFrame representing an upstream model - upstream_model = dbt.ref("upstream_model_name") - - # DataFrame representing an upstream source - upstream_source = dbt.source("upstream_source_name", "table_name") - - ... -``` - - - -Of course, you can `ref()` your Python model in downstream SQL models, too: - - - -```sql -with upstream_python_model as ( - - select * from {{ ref('my_python_model') }} - -), - -... -``` - - - -### Configuring Python models - -Just like SQL models, there are three ways to configure Python models: -1. In `dbt_project.yml`, where you can configure many models at once -2. In a dedicated `.yml` file, within the `models/` directory -3. Within the model's `.py` file, using the `dbt.config()` method - -Calling the `dbt.config()` method will set configurations for your model right within your `.py` file, similar to the `{{ config() }}` macro in `.sql` model files: - - - -```python -def model(dbt, session): - - # setting configuration - dbt.config(materialized="table") -``` - - - -There's a limit to how fancy you can get with the `dbt.config()` method. It accepts _only_ literal values (strings, booleans, and numeric types). Passing another function or a more complex data structure is not possible. The reason is that dbt statically analyzes the arguments to `config()` while parsing your model without executing your Python code. If you need to set a more complex configuration, we recommend you define it using the [`config` property](resource-properties/config) in a YAML file. - -#### Accessing project context - -dbt Python models don't use Jinja to render compiled code. Python models have limited access to global project contexts compared to SQL models. That context is made available from the `dbt` class, passed in as an argument to the `model()` function. - -Out of the box, the `dbt` class supports: -- Returning DataFrames referencing the locations of other resources: `dbt.ref()` + `dbt.source()` -- Accessing the database location of the current model: `dbt.this()` (also: `dbt.this.database`, `.schema`, `.identifier`) -- Determining if the current model's run is incremental: `dbt.is_incremental` - -It is possible to extend this context by "getting" them via `dbt.config.get()` after they are configured in the [model's config](/reference/model-configs). This includes inputs such as `var`, `env_var`, and `target`. If you want to use those values to power conditional logic in your model, we require setting them through a dedicated `.yml` file config: - - - -```yml -version: 2 - -models: - - name: my_python_model - config: - materialized: table - target_name: "{{ target.name }}" - specific_var: "{{ var('SPECIFIC_VAR') }}" - specific_env_var: "{{ env_var('SPECIFIC_ENV_VAR') }}" -``` - - - -Then, within the model's Python code, use the `dbt.config.get()` function to _access_ values of configurations that have been set: - - - -```python -def model(dbt, session): - target_name = dbt.config.get("target_name") - specific_var = dbt.config.get("specific_var") - specific_env_var = dbt.config.get("specific_env_var") - - orders_df = dbt.ref("fct_orders") - - # limit data in dev - if target_name == "dev": - orders_df = orders_df.limit(500) -``` - - - -### Materializations - -Python models support two materializations: -- `table` -- `incremental` - -Incremental Python models support all the same [incremental strategies](/docs/build/incremental-models#about-incremental_strategy) as their SQL counterparts. The specific strategies supported depend on your adapter. - -Python models can't be materialized as `view` or `ephemeral`. Python isn't supported for non-model resource types (like tests and snapshots). - -For incremental models, like SQL models, you will need to filter incoming tables to only new rows of data: - - - -
- - - -```python -import snowflake.snowpark.functions as F - -def model(dbt, session): - dbt.config( - materialized = "incremental", - unique_key = "id", - ) - df = dbt.ref("upstream_table") - - if dbt.is_incremental: - - # only new rows compared to max in current table - max_from_this = f"select max(updated_at) from {dbt.this}" - df = df.filter(df.updated_at > session.sql(max_from_this).collect()[0][0]) - - # or only rows from the past 3 days - df = df.filter(df.updated_at >= F.dateadd("day", F.lit(-3), F.current_timestamp())) - - ... - - return df -``` - - - -
- -
- - - -```python -import pyspark.sql.functions as F - -def model(dbt, session): - dbt.config( - materialized = "incremental", - unique_key = "id", - ) - df = dbt.ref("upstream_table") - - if dbt.is_incremental: - - # only new rows compared to max in current table - max_from_this = f"select max(updated_at) from {dbt.this}" - df = df.filter(df.updated_at > session.sql(max_from_this).collect()[0][0]) - - # or only rows from the past 3 days - df = df.filter(df.updated_at >= F.date_add(F.current_timestamp(), F.lit(-3))) - - ... - - return df -``` - - - -
- -
- -**Note:** Incremental models are supported on BigQuery/Dataproc for the `merge` incremental strategy. The `insert_overwrite` strategy is not yet supported. - -## Python-specific functionality - -### Defining functions - -In addition to defining a `model` function, the Python model can import other functions or define its own. Here's an example, on Snowpark, defining a custom `add_one` function: - - - -```python -def add_one(x): - return x + 1 - -def model(dbt, session): - dbt.config(materialized="table") - temps_df = dbt.ref("temperatures") - - # warm things up just a little - df = temps_df.withColumn("degree_plus_one", add_one(temps_df["degree"])) - return df -``` - - - -At present, Python functions defined in one dbt model can't be imported and reused in other models. See the ["Code reuse"](#code-reuse) section for the potential patterns we're considering. - -### Using PyPI packages - -You can also define functions that depend on third-party packages, so long as those packages are installed and available to the Python runtime on your data platform. See notes on "Installing Packages" for [specific data warehouses](#specific-data-warehouses). - -In this example, we use the `holidays` package to determine if a given date is a holiday in France. For simplicity and consistency across platforms, the code below uses the pandas API. The exact syntax, and the need to refactor for multi-node processing, still varies. - - - -
- - - -```python -import holidays - -def is_holiday(date_col): - # Chez Jaffle - french_holidays = holidays.France() - is_holiday = (date_col in french_holidays) - return is_holiday - -def model(dbt, session): - dbt.config( - materialized = "table", - packages = ["holidays"] - ) - - orders_df = dbt.ref("stg_orders") - - df = orders_df.to_pandas() - - # apply our function - # (columns need to be in uppercase on Snowpark) - df["IS_HOLIDAY"] = df["ORDER_DATE"].apply(is_holiday) - - # return final dataset (Pandas DataFrame) - return df -``` - - - -
- -
- - - -```python -import holidays - -def is_holiday(date_col): - # Chez Jaffle - french_holidays = holidays.France() - is_holiday = (date_col in french_holidays) - return is_holiday - -def model(dbt, session): - dbt.config( - materialized = "table", - packages = ["holidays"] - ) - - orders_df = dbt.ref("stg_orders") - - df = orders_df.to_pandas_on_spark() # Spark 3.2+ - # df = orders_df.toPandas() in earlier versions - - # apply our function - df["is_holiday"] = df["order_date"].apply(is_holiday) - - # convert back to PySpark - df = df.to_spark() # Spark 3.2+ - # df = session.createDataFrame(df) in earlier versions - - # return final dataset (PySpark DataFrame) - return df -``` - - - -
- -
- -#### Configuring packages - -We encourage you to explicitly configure required packages and versions so dbt can track them in project metadata. This configuration is required for the implementation on some platforms. If you need specific versions of packages, specify them. - - - -```python -def model(dbt, session): - dbt.config( - packages = ["numpy==1.23.1", "scikit-learn"] - ) -``` - - - - - -```yml -version: 2 - -models: - - name: my_python_model - config: - packages: - - "numpy==1.23.1" - - scikit-learn -``` - - - -#### UDFs - -You can use the `@udf` decorator or `udf` function to define an "anonymous" function and call it within your `model` function's DataFrame transformation. This is a typical pattern for applying more complex functions as DataFrame operations, especially if those functions require inputs from third-party packages. -- [Snowpark Python: Creating UDFs](https://docs.snowflake.com/en/developer-guide/snowpark/python/creating-udfs.html) -- [PySpark functions: udf](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.udf.html) - - - -
- - - -```python -import snowflake.snowpark.types as T -import snowflake.snowpark.functions as F -import numpy - -def register_udf_add_random(): - add_random = F.udf( - # use 'lambda' syntax, for simple functional behavior - lambda x: x + numpy.random.normal(), - return_type=T.FloatType(), - input_types=[T.FloatType()] - ) - return add_random - -def model(dbt, session): - - dbt.config( - materialized = "table", - packages = ["numpy"] - ) - - temps_df = dbt.ref("temperatures") - - add_random = register_udf_add_random() - - # warm things up, who knows by how much - df = temps_df.withColumn("degree_plus_random", add_random("degree")) - return df -``` - - - -**Note:** Due to a Snowpark limitation, it is not currently possible to register complex named UDFs within stored procedures, and therefore dbt Python models. We are looking to add native support for Python UDFs as a project/DAG resource type in a future release. For the time being, if you want to create a "vectorized" Python UDF via the Batch API, we recommend either: -- Writing [`create function`](https://docs.snowflake.com/en/developer-guide/udf/python/udf-python-batch.html) inside a SQL macro, to run as a hook or run-operation -- [Registering from a staged file](https://docs.snowflake.com/ko/developer-guide/snowpark/reference/python/_autosummary/snowflake.snowpark.udf.html#snowflake.snowpark.udf.UDFRegistration.register_from_file) within your Python model code - -
- -
- - - -```python -from pyspark.sql.types as T -import pyspark.sql.functions as F -import numpy - -# use a 'decorator' for more readable code -@F.udf(returnType=T.DoubleType()) -def add_random(x): - random_number = numpy.random.normal() - return x + random_number - -def model(dbt, session): - dbt.config( - materialized = "table", - packages = ["numpy"] - ) - - temps_df = dbt.ref("temperatures") - - # warm things up, who knows by how much - df = temps_df.withColumn("degree_plus_random", add_random("degree")) - return df -``` - - - -
- -
- -#### Code reuse - -Currently, you cannot import or reuse Python functions defined in one dbt model, in other models. This is something we'd like dbt to support. There are two patterns we're considering: -1. Creating and registering **"named" UDFs**. This process is different across data platforms and has some performance limitations. (Snowpark does support ["vectorized" UDFs](https://docs.snowflake.com/en/developer-guide/udf/python/udf-python-batch.html): pandas-like functions that you can execute in parallel.) -2. Using **private Python packages**. In addition to importing reusable functions from public PyPI packages, many data platforms support uploading custom Python assets and registering them as packages. The upload process looks different across platforms, but your code’s actual `import` looks the same. - -:::note ❓ Our questions - -- Should dbt have a role in abstracting over UDFs? Should dbt support a new type of DAG node, `function`? Would the primary use case be code reuse across Python models or defining Python-language functions that can be called from SQL models? -- How can dbt help users when uploading or initializing private Python assets? Is this a new form of `dbt deps`? -- How can dbt support users who want to test custom functions? If defined as UDFs: "unit testing" in the database? If "pure" functions in packages: encourage adoption of `pytest`? - -💬 Discussion: ["Python models: package, artifact/object storage, and UDF management in dbt"](https://github.com/dbt-labs/dbt-core/discussions/5741) -::: - -### DataFrame API and syntax - -Over the past decade, most people writing data transformations in Python have adopted DataFrame as their common abstraction. dbt follows this convention by returning `ref()` and `source()` as DataFrames, and it expects all Python models to return a DataFrame. - -A DataFrame is a two-dimensional data structure (rows and columns). It supports convenient methods for transforming that data, creating new columns from calculations performed on existing columns. It also offers convenient ways for previewing data while developing locally or in a notebook. - -That's about where the agreement ends. There are numerous frameworks with their own syntaxes and APIs for DataFrames. The [pandas](https://pandas.pydata.org/docs/) library offered one of the original DataFrame APIs, and its syntax is the most common to learn for new data professionals. Most newer DataFrame APIs are compatible with pandas-style syntax, though few can offer perfect interoperability. This is true for Snowpark and PySpark, which have their own DataFrame APIs. - -When developing a Python model, you will find yourself asking these questions: - -**Why pandas?** It's the most common API for DataFrames. It makes it easy to explore sampled data and develop transformations locally. You can “promote” your code as-is into dbt models and run it in production for small datasets. - -**Why _not_ pandas?** Performance. pandas runs "single-node" transformations, which cannot benefit from the parallelism and distributed computing offered by modern data warehouses. This quickly becomes a problem as you operate on larger datasets. Some data platforms support optimizations for code written using pandas' DataFrame API, preventing the need for major refactors. For example, ["pandas on PySpark"](https://spark.apache.org/docs/latest/api/python/getting_started/quickstart_ps.html) offers support for 95% of pandas functionality, using the same API while still leveraging parallel processing. - -:::note ❓ Our questions -- When developing a new dbt Python model, should we recommend pandas-style syntax for rapid iteration and then refactor? -- Which open source libraries provide compelling abstractions across different data engines and vendor-specific APIs? -- Should dbt attempt to play a longer-term role in standardizing across them? - -💬 Discussion: ["Python models: the pandas problem (and a possible solution)"](https://github.com/dbt-labs/dbt-core/discussions/5738) -::: - -### Limitations - -Python models have capabilities that SQL models do not. They also have some drawbacks compared to SQL models: - -- **Time and cost.** Python models are slower to run than SQL models, and the cloud resources that run them can be more expensive. Running Python requires more general-purpose compute. That compute might sometimes live on a separate service or architecture from your SQL models. **However:** We believe that deploying Python models via dbt—with unified lineage, testing, and documentation—is, from a human standpoint, **dramatically** faster and cheaper. By comparison, spinning up separate infrastructure to orchestrate Python transformations in production and different tooling to integrate with dbt is much more time-consuming and expensive. -- **Syntax differences** are even more pronounced. Over the years, dbt has done a lot, via dispatch patterns and packages such as `dbt_utils`, to abstract over differences in SQL dialects across popular data warehouses. Python offers a **much** wider field of play. If there are five ways to do something in SQL, there are 500 ways to write it in Python, all with varying performance and adherence to standards. Those options can be overwhelming. As the maintainers of dbt, we will be learning from state-of-the-art projects tackling this problem and sharing guidance as we develop it. -- **These capabilities are very new.** As data warehouses develop new features, we expect them to offer cheaper, faster, and more intuitive mechanisms for deploying Python transformations. **We reserve the right to change the underlying implementation for executing Python models in future releases.** Our commitment to you is around the code in your model `.py` files, following the documented capabilities and guidance we're providing here. - -As a general rule, if there's a transformation you could write equally well in SQL or Python, we believe that well-written SQL is preferable: it's more accessible to a greater number of colleagues, and it's easier to write code that's performant at scale. If there's a transformation you _can't_ write in SQL, or where ten lines of elegant and well-annotated Python could save you 1000 lines of hard-to-read Jinja-SQL, Python is the way to go. - -## Specific data platforms - -In their initial launch, Python models are supported on three of the most popular data platforms: Snowflake, Databricks, and BigQuery/GCP (via Dataproc). Both Databricks and GCP's Dataproc use PySpark as the processing framework. Snowflake uses its own framework, Snowpark, which has many similarities to PySpark. - - - -
- -**Additional setup:** You will need to [acknowledge and accept Snowflake Third Party Terms](https://docs.snowflake.com/en/developer-guide/udf/python/udf-python-packages.html#getting-started) to use Anaconda packages. - -**Installing packages:** Snowpark supports several popular packages via Anaconda. The complete list is at https://repo.anaconda.com/pkgs/snowflake/. Packages are installed at the time your model is being run. Different models can have different package dependencies. If you are using third-party packages, Snowflake recommends using a dedicated virtual warehouse for best performance rather than one with many concurrent users. - -**About "sprocs":** dbt submits Python models to run as "stored procedures," which some people call "sprocs" for short. By default, dbt will create a named sproc containing your model's compiled Python code, and then "call" it to execute. Snowpark has a Private Preview feature for "temporary" or "anonymous" stored procedures ([docs](https://docs.snowflake.com/en/LIMITEDACCESS/call-with.html)), which are faster and leave a cleaner query history. If this feature is enabled for your account, you can switch it on for your models by configuring `use_anonymous_sproc: True`. We plan to switch this on for all dbt + Snowpark Python models in a future release. - - - -```yml -# I asked Snowflake Support to enable this Private Preview feature, -# and now my dbt-py models run even faster! -models: - use_anonymous_sproc: True -``` - - - -**Docs:** ["Developer Guide: Snowpark Python"](https://docs.snowflake.com/en/developer-guide/snowpark/python/index.html) - -
- -
- -**Submission methods:** Databricks supports a few different mechanisms to submit PySpark code, each with relative advantages. Some are better for supporting iterative development, while others are better for supporting lower-cost production deployments. The options are: -- `all_purpose_cluster` (default): dbt will run your Python model using the cluster ID configured as `cluster` in your connection profile or for this specific model. These clusters are more expensive but also much more responsive. We recommend using an interactive all-purpose cluster for quicker iteration in development. - - `create_notebook: True`: dbt will upload your model's compiled PySpark code to a notebook in the namespace `/Shared/dbt_python_model/{schema}`, where `{schema}` is the configured schema for the model, and execute that notebook to run using the all-purpose cluster. The appeal of this approach is that you can easily open the notebook in the Databricks UI for debugging or fine-tuning right after running your model. Remember to copy any changes into your dbt `.py` model code before re-running. - - `create_notebook: False` (default): dbt will use the [Command API](https://docs.databricks.com/dev-tools/api/1.2/index.html#run-a-command), which is slightly faster. -- `job_cluster`: dbt will upload your model's compiled PySpark code to a notebook in the namespace `/Shared/dbt_python_model/{schema}`, where `{schema}` is the configured schema for the model, and execute that notebook to run using a short-lived jobs cluster. For each Python model, Databricks will need to spin up the cluster, execute the model's PySpark transformation, and then spin down the cluster. As such, job clusters take longer before and after model execution, but they're also less expensive, so we recommend these for longer-running Python models in production. To use the `job_cluster` submission method, your model must be configured with `job_cluster_config`, which defines key-value properties for `new_cluster`, as defined in the [JobRunsSubmit API](https://docs.databricks.com/dev-tools/api/latest/jobs.html#operation/JobsRunsSubmit). - -You can configure each model's `submission_method` in all the standard ways you supply configuration: - -```python -def model(dbt, session): - dbt.config( - submission_method="all_purpose_cluster", - create_notebook=True, - cluster_id="abcd-1234-wxyz" - ) - ... -``` -```yml -version: 2 -models: - - name: my_python_model - config: - submission_method: job_cluster - job_cluster_config: - spark_version: ... - node_type_id: ... -``` -```yml -# dbt_project.yml -models: - project_name: - subfolder: - # set defaults for all .py models defined in this subfolder - +submission_method: all_purpose_cluster - +create_notebook: False - +cluster_id: abcd-1234-wxyz -``` - -If not configured, `dbt-spark` will use the built-in defaults: the all-purpose cluster (based on `cluster` in your connection profile) without creating a notebook. The `dbt-databricks` adapter will default to the cluster configured in `http_path`. We encourage explicitly configuring the clusters for Python models in Databricks projects. - -**Installing packages:** When using all-purpose clusters, we recommend installing packages which you will be using to run your Python models. - -**Docs:** -- [PySpark DataFrame syntax](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.html) -- [Databricks: Introduction to DataFrames - Python](https://docs.databricks.com/spark/latest/dataframes-datasets/introduction-to-dataframes-python.html) - -
- -
- -The `dbt-bigquery` adapter uses a service called Dataproc to submit your Python models as PySpark jobs. That Python/PySpark code will read from your tables and views in BigQuery, perform all computation in Dataproc, and write the final result back to BigQuery. - -**Submission methods.** Dataproc supports two submission methods: `serverless` and `cluster`. Dataproc Serverless does not require a ready cluster, which saves on hassle and cost—but it is slower to start up, and much more limited in terms of available configuration. For example, Dataproc Serverless supports only a small set of Python packages, though it does include `pandas`, `numpy`, and `scikit-learn`. (See the full list [here](https://cloud.google.com/dataproc-serverless/docs/guides/custom-containers#example_custom_container_image_build), under "The following packages are installed in the default image"). Whereas, by creating a Dataproc Cluster in advance, you can fine-tune the cluster's configuration, install any PyPI packages you want, and benefit from faster, more responsive runtimes. - -Use the `cluster` submission method with dedicated Dataproc clusters you or your organization manage. Use the `serverless` submission method to avoid managing a Spark cluster. The latter may be quicker for getting started, but both are valid for production. - -**Additional setup:** -- Create or use an existing [Cloud Storage bucket](https://cloud.google.com/storage/docs/creating-buckets) -- Enable Dataproc APIs for your project + region -- If using the `cluster` submission method: Create or use an existing [Dataproc cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster) with the [Spark BigQuery connector initialization action](https://github.com/GoogleCloudDataproc/initialization-actions/tree/master/connectors#bigquery-connectors). (Google recommends copying the action into your own Cloud Storage bucket, rather than using the example version shown in the screenshot below.) - - - -The following configurations are needed to run Python models on Dataproc. You can add these to your [BigQuery profile](/reference/warehouse-setups/bigquery-setup#running-python-models-on-dataproc), or configure them on specific Python models: -- `gcs_bucket`: Storage bucket to which dbt will upload your model's compiled PySpark code. -- `dataproc_region`: GCP region in which you have enabled Dataproc (for example `us-central1`) -- `dataproc_cluster_name`: Name of Dataproc cluster to use for running Python model (executing PySpark job). Only required if `submission_method: cluster`. - -```python -def model(dbt, session): - dbt.config( - submission_method="cluster", - dataproc_cluster_name="my-favorite-cluster" - ) - ... -``` -```yml -version: 2 -models: - - name: my_python_model - config: - submission_method: serverless -``` - -Any user or service account that runs dbt Python models will need the following permissions, in addition to permissions needed for BigQuery ([docs](https://cloud.google.com/dataproc/docs/concepts/iam/iam)): -``` -dataproc.clusters.use -dataproc.jobs.create -dataproc.jobs.get -dataproc.operations.get -storage.buckets.get -storage.objects.create -storage.objects.delete -``` - -**Installing packages:** If you are using a Dataproc Cluster (as opposed to Dataproc Serverless), you can add third-party packages while creating the cluster. - -Google recommends installing Python packages on Dataproc clusters via initialization actions: -- [How initialization actions are used](https://github.com/GoogleCloudDataproc/initialization-actions/blob/master/README.md#how-initialization-actions-are-used) -- [Actions for installing via `pip` or `conda`](https://github.com/GoogleCloudDataproc/initialization-actions/tree/master/python) - -You can also install packages at cluster creation time by [defining cluster properties](https://cloud.google.com/dataproc/docs/tutorials/python-configuration#image_version_20): `dataproc:pip.packages` or `dataproc:conda.packages`. - - - -**Docs:** -- [Dataproc overview](https://cloud.google.com/dataproc/docs/concepts/overview) -- [PySpark DataFrame syntax](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.html) - -
- -
- -
diff --git a/website/docs/docs/cloud/about-cloud/dbt-cloud-features.md b/website/docs/docs/cloud/about-cloud/dbt-cloud-features.md index a7555adb6a8..f1d8b32cdb1 100644 --- a/website/docs/docs/cloud/about-cloud/dbt-cloud-features.md +++ b/website/docs/docs/cloud/about-cloud/dbt-cloud-features.md @@ -78,6 +78,12 @@ link="/docs/cloud/dbt-cloud-ide/develop-in-the-cloud" body="View the history of your runs and the model timing dashboard to help identify where improvements can be made to the scheduled jobs." link="/docs/deploy/run-visibility" icon="pencil-paper"/> + +
*These features are available on [selected plans](https://www.getdbt.com/pricing/). diff --git a/website/docs/docs/cloud/about-cloud/regions-ip-addresses.md b/website/docs/docs/cloud/about-cloud/regions-ip-addresses.md index bc8c180f2fd..caeb0203a5e 100644 --- a/website/docs/docs/cloud/about-cloud/regions-ip-addresses.md +++ b/website/docs/docs/cloud/about-cloud/regions-ip-addresses.md @@ -11,10 +11,17 @@ dbt Cloud is [hosted](/docs/cloud/about-cloud/architecture) in multiple regions | Region | Location | Access URL | IP addresses | Developer plan | Team plan | Enterprise plan | |--------|----------|------------|--------------|----------------|-----------|-----------------| -| North America [^1] | AWS us-east-1 (N. Virginia) | cloud.getdbt.com | 52.45.144.63
54.81.134.249
52.22.161.231 | ✅ | ✅ | ✅ | +| North America multi-tenant [^1] | AWS us-east-1 (N. Virginia) | cloud.getdbt.com | 52.45.144.63
54.81.134.249
52.22.161.231 | ✅ | ✅ | ✅ | +| North America Cell 1 [^1] | AWS use-east-1 (N.Virginia) | {account prefix}.us1.dbt.com | [Located in Account Settings](#locating-your-dbt-cloud-ip-addresses) | ❌ | ❌ | ❌ | | EMEA [^1] | AWS eu-central-1 (Frankfurt) | emea.dbt.com | 3.123.45.39
3.126.140.248
3.72.153.148 | ❌ | ❌ | ✅ | | APAC [^1] | AWS ap-southeast-2 (Sydney)| au.dbt.com | 52.65.89.235
3.106.40.33
13.239.155.206
| ❌ | ❌ | ✅ | | Virtual Private dbt or Single tenant | Customized | Customized | Ask [Support](/community/resources/getting-help#dbt-cloud-support) for your IPs | ❌ | ❌ | ✅ | [^1]: These regions support [multi-tenant](/docs/cloud/about-cloud/tenancy) deployment environments hosted by dbt Labs. + +### Locating your dbt Cloud IP addresses + +There are two ways to view your dbt Cloud IP addresses: +- If no projects exist in the account, create a new project, and the IP addresses will be displayed during the **Configure your environment** steps. +- If you have an existing project, navigate to **Account Settings** and ensure you are in the **Projects** pane. Click on a project name, and the **Project Settings** window will open. Locate the **Connection** field and click on the name. Scroll down to the **Settings**, and the first text block lists your IP addresses. diff --git a/website/docs/docs/cloud/billing.md b/website/docs/docs/cloud/billing.md index 9f1b53fa62d..fcdbaf28bc2 100644 --- a/website/docs/docs/cloud/billing.md +++ b/website/docs/docs/cloud/billing.md @@ -15,7 +15,7 @@ As a customer, you pay for the number of seats you have and the amount of usage dbt Cloud considers a Successful Model Built as any model that is successfully built via a run through dbt Cloud’s orchestration functionality in a dbt Cloud deployment environment. Models are counted when built and run. This includes any jobs run via dbt Cloud's scheduler, CI builds (jobs triggered by pull requests), runs kicked off via the dbt Cloud API, and any successor dbt Cloud tools with similar functionality. This also includes models that are successfully built even when a run may fail to complete. For example, you may have a job that contains 100 models and on one of its runs, 51 models are successfully built and then the job fails. In this situation, only 51 models would be counted. -Any models built in a dbt Cloud development environment (for example, via the IDE) do not count towards your usage. Tests, seeds, and snapshots also do not count. +Any models built in a dbt Cloud development environment (for example, via the IDE) do not count towards your usage. Tests, seeds, ephemeral models, and snapshots also do not count. ### What counts as a seat license? diff --git a/website/docs/docs/cloud/cloud-cli-installation.md b/website/docs/docs/cloud/cloud-cli-installation.md index 5f03c9fca92..68a8ef365d6 100644 --- a/website/docs/docs/cloud/cloud-cli-installation.md +++ b/website/docs/docs/cloud/cloud-cli-installation.md @@ -6,7 +6,9 @@ description: "Instructions for installing and configuring dbt Cloud CLI" :::warning Alpha functionality -The following installation instructions are for the dbt Cloud CLI, currently in alpha. These instructions are not intended for general audiences at this time. +The following installation instructions are for the dbt Cloud CLI, currently in Alpha (actively in development and being tested). + +These instructions are not intended for general audiences at this time. ::: @@ -14,7 +16,7 @@ The following installation instructions are for the dbt Cloud CLI, currently in ### Install and update with Brew on MacOS (recommended) -1. Install the CLI: +1. Install the dbt Cloud CLI: ```bash brew tap dbt-labs/dbt-cli @@ -30,7 +32,7 @@ dbt --help ### Manually install (Windows and Linux) -1. Download the latest release for your platform from [Github](https://github.com/dbt-labs/dbt-cli/releases). +1. Download the latest release for your platform from [GitHub](https://github.com/dbt-labs/dbt-cli/releases). 2. Add the `dbt` executable to your path. 3. Move to a directory with a dbt project, and create a `dbt_cloud.yml` file containing your `project-id` from dbt Cloud. 4. Invoke `dbt --help` from your terminal to see a list of supported commands. @@ -41,50 +43,54 @@ Follow the same process in [Installing dbt Cloud CLI](#manually-install-windows- ## Setting up the CLI +The following instructions are for setting up the dbt Cloud CLI. + 1. Ensure that you have created a project in [dbt Cloud](https://cloud.getdbt.com/). 2. Ensure that your personal [development credentials](https://cloud.getdbt.com/settings/profile/credentials) are set on the project. -3. Navigate to [your profile](https://cloud.getdbt.com/settings/profile) and enable the "beta features" flag under "Experimental Features." +3. Navigate to [your profile](https://cloud.getdbt.com/settings/profile) and enable the **Beta** flag under **Experimental Features.** -4. Create an environment variable with your [dbt cloud API key](https://cloud.getdbt.com/settings/profile#api-access): +4. Create an environment variable with your [dbt Cloud API key](https://cloud.getdbt.com/settings/profile#api-access): ```bash +vi ~/.zshrc - > $ vi ~/.zshrc - - ... - - # dbt Cloud CLI - export DBT_CLOUD_API_KEY="1234" - +# dbt Cloud CLI +export DBT_CLOUD_API_KEY="1234" # Replace "1234" with your API key ``` -5. Load the new environment variable. Note: you may need to reactivate your python virtual environment after sourcing your shell's dot file. Alternatively, restart your shell instead of sourcing the shell's dot file +5. Load the new environment variable. Note: You may need to reactivate your Python virtual environment after sourcing your shell's dot file. Alternatively, restart your shell instead of sourcing the shell's dot file ```bash - > $ source ~/.zshrc +source ~/.zshrc ``` 6. Navigate to a dbt project ```bash - > $ cd ~/dbt-projects/jaffle_shop +cd ~/dbt-projects/jaffle_shop ``` -7. Create a dbt_cloud.yml in the root project directory. The file is required to have a `project-id` field with a valid [project ID](#glossary): +7. Create a `dbt_cloud.yml` in the root project directory. The file is required to have a `project-id` field with a valid [project ID](#glossary). Enter the following commands: ```bash -> $ pwd -/Users/user/dbt-projects/jaffle_shop +pwd # Input +/Users/user/dbt-projects/jaffle_shop # Output +``` -> $ cat dbt_cloud.yml -project-id: '123456' +```bash +echo "project-id: ''" > dbt_cloud.yml # Input +``` + +```bash +cat dbt_cloud.yml # Input +project-id: '123456' # Output ``` -You can find your project ID by selecting your project and clicking on **Develop** in the navigation bar. Your project ID is the number in the URL: https://cloud.getdbt.com/develop/26228/projects/`PROJECT_ID`. +You can find your project ID by selecting your project and clicking on **Develop** in the navigation bar. Your project ID is the number in the URL: https://cloud.getdbt.com/develop/26228/projects/PROJECT_ID. -If dbt_cloud.yml already exists, edit the file and verify the project ID field uses a valid project ID. +If `dbt_cloud.yml` already exists, edit the file, and verify the project ID field uses a valid project ID. #### Upgrade the CLI with Brew @@ -97,8 +103,8 @@ brew upgrade dbt-cloud-cli **Coming soon** -### Glossary +## Glossary -- **dbt cloud API key:** your API key found by navigating to the **gear icon**, clicking **Profile Settings**, and scrolling down to **API**. -- **Project ID:** the ID of the dbt project you're working with. Can be retrieved from the dbt cloud URL after a project has been selected, for example, `https://cloud.getdbt.com/deploy/{accountID}/projects/{projectID}` -- **Development credentials:** your personal warehouse credentials for the project you’re working with. They can be set by selecting the project and entering them in dbt Cloud. Navigate to the **gear icon**, click **Profile Settings**, and click **Credentials** from the left-side menu. +- **dbt cloud API key:** Your API key found by navigating to the **gear icon**, clicking **Profile Settings**, and scrolling down to **API**. +- **Project ID:** The ID of the dbt project you're working with. Can be retrieved from the dbt Cloud URL after a project has been selected, for example, `https://cloud.getdbt.com/deploy/{accountID}/projects/{projectID}` +- **Development credentials:** Your personal warehouse credentials for the project you’re working with. They can be set by selecting the project and entering them in dbt Cloud. Navigate to the **gear icon**, click **Profile Settings**, and click **Credentials** from the left-side menu. diff --git a/website/docs/docs/cloud/connect-data-platform/connect-redshift-postgresql-alloydb.md b/website/docs/docs/cloud/connect-data-platform/connect-redshift-postgresql-alloydb.md index 72fe9e0449c..dae0ee1d178 100644 --- a/website/docs/docs/cloud/connect-data-platform/connect-redshift-postgresql-alloydb.md +++ b/website/docs/docs/cloud/connect-data-platform/connect-redshift-postgresql-alloydb.md @@ -47,20 +47,16 @@ To configure the SSH tunnel in dbt Cloud, you'll need to provide the hostname/IP - Verify the bastion server has its network security rules set up to accept connections from the [dbt Cloud IP addresses](/docs/cloud/about-cloud/regions-ip-addresses) on whatever port you configured. - Set up the user account by using the bastion servers instance's CLI, The following example uses the username `dbtcloud:` - `sudo groupadd dbtcloud`
- - `sudo useradd -m -g dbtcloud dbtcloud`
- - `sudo su - dbtcloud`
- - `mkdir ~/.ssh`
- - `chmod 700 ~/.ssh`
- - `touch ~/.ssh/authorized_keys`
- - `chmod 600 ~/.ssh/authorized_keys`
- +```shell +sudo groupadd dbtcloud +sudo useradd -m -g dbtcloud dbtcloud +sudo su - dbtcloud +mkdir ~/.ssh +chmod 700 ~/.ssh +touch ~/.ssh/authorized_keys +chmod 600 ~/.ssh/authorized_keys +``` + - Copy and paste the dbt Cloud generated public key, into the authorized_keys file. The Bastion server should now be ready for dbt Cloud to use as a tunnel into the Redshift environment. diff --git a/website/docs/docs/cloud/git/connect-azure-devops.md b/website/docs/docs/cloud/git/connect-azure-devops.md index a84e593a1e2..bc5bb81dd24 100644 --- a/website/docs/docs/cloud/git/connect-azure-devops.md +++ b/website/docs/docs/cloud/git/connect-azure-devops.md @@ -23,3 +23,4 @@ To connect Azure DevOps in dbt Cloud: 2. dbt Cloud developers need to [personally authenticate with Azure DevOps](/docs/cloud/git/authenticate-azure) from dbt Cloud. +If you're a Business Critical customer using [IP restrictions](/docs/cloud/secure/ip-restrictions), ensure you've added the appropriate Azure DevOps CIDRs to your IP restriction rules, or else the Azure DevOps connection will fail. diff --git a/website/docs/docs/cloud/git/connect-gitlab.md b/website/docs/docs/cloud/git/connect-gitlab.md index 1ec8fb08817..9bf0d3971e1 100644 --- a/website/docs/docs/cloud/git/connect-gitlab.md +++ b/website/docs/docs/cloud/git/connect-gitlab.md @@ -71,6 +71,8 @@ The application form in GitLab should look as follows when completed: Click **Save application** in GitLab, and GitLab will then generate an **Application ID** and **Secret**. These values will be available even if you close the app screen, so this is not the only chance you have to save them. +If you're a Business Critical customer using [IP restrictions](/docs/cloud/secure/ip-restrictions), ensure you've added the appropriate Gitlab CIDRs to your IP restriction rules, or else the Gitlab connection will fail. + ### Adding the GitLab OAuth application to dbt Cloud After you've created your GitLab application, you need to provide dbt Cloud information about the app. In dbt Cloud, account admins should navigate to **Account Settings**, click on the **Integrations** tab, and expand the GitLab section. diff --git a/website/docs/docs/cloud/manage-access/audit-log.md b/website/docs/docs/cloud/manage-access/audit-log.md index 818ec553e7b..98bf660b259 100644 --- a/website/docs/docs/cloud/manage-access/audit-log.md +++ b/website/docs/docs/cloud/manage-access/audit-log.md @@ -16,13 +16,9 @@ The dbt Cloud audit log stores all the events that occurred in your organization ## Accessing the audit log -To access audit log, click the gear icon in the top right, then click **Audit Log**. +To access the audit log, click the gear icon in the top right, then click **Audit Log**. -
- - - -
+ ## Understanding the audit log @@ -161,19 +157,17 @@ The audit log supports various events for different objects in dbt Cloud. You wi You can search the audit log to find a specific event or actor, which is limited to the ones listed in [Events in audit log](#events-in-audit-log). The audit log successfully lists historical events spanning the last 90 days. You can search for an actor or event using the search bar, and then narrow your results using the time window. -
- + -
## Exporting logs You can use the audit log to export all historical audit results for security, compliance, and analysis purposes: -- For events within 90 days — dbt Cloud will automatically display the 90 days selectable date range. Select **Export Selection** to download a CSV file of all the events that occurred in your organization within 90 days. +- For events within 90 days — dbt Cloud will automatically display the 90-day selectable date range. Select **Export Selection** to download a CSV file of all the events that occurred in your organization within 90 days. - For events beyond 90 days — Select **Export All**. The Account Admin will receive an email link to download a CSV file of all the events that occurred in your organization. - + diff --git a/website/docs/docs/cloud/manage-access/cloud-seats-and-users.md b/website/docs/docs/cloud/manage-access/cloud-seats-and-users.md index 6b68d440ba3..04dfbe093c3 100644 --- a/website/docs/docs/cloud/manage-access/cloud-seats-and-users.md +++ b/website/docs/docs/cloud/manage-access/cloud-seats-and-users.md @@ -8,8 +8,8 @@ sidebar: "Users and licenses" In dbt Cloud, _licenses_ are used to allocate users to your account. There are three different types of licenses in dbt Cloud: - **Developer** — Granted access to the Deployment and [Development](/docs/cloud/dbt-cloud-ide/develop-in-the-cloud) functionality in dbt Cloud. -- **Read-Only** — Intended to view the [artifacts](/docs/deploy/artifacts) created in a dbt Cloud account. -- **IT** — Can manage users, groups, and licenses, among other permissions. Available on Enterprise and Team plans only. +- **Read-Only** — Intended to view the [artifacts](/docs/deploy/artifacts) created in a dbt Cloud account. Read-Only users can receive job notifications but not configure them. +- **IT** — Can manage users, groups, and licenses, among other permissions. IT users can receive job notifications but not configure them. Available on Enterprise and Team plans only. The user's assigned license determines the specific capabilities they can access in dbt Cloud. @@ -21,8 +21,8 @@ The user's assigned license determines the specific capabilities they can access | API Access | ✅ | ❌ | ❌ | | Use [Source Freshness](/docs/deploy/source-freshness) | ✅ | ✅ | ❌ | | Use [Docs](/docs/collaborate/build-and-view-your-docs) | ✅ | ✅ | ❌ | -| Receive [Job notifications](/docs/deploy/job-notifications) | ✅ | ✅ | ✅ | -*Available on Enterprise and Team plans only and doesn't count toward seat usage. Please note, IT seats are limited to 1 seat per Team or Enterprise account. +| Receive [Job notifications](/docs/deploy/job-notifications) | ✅ | ✅ | ✅ | +*Available on Enterprise and Team plans only and doesn't count toward seat usage. Please note, that IT seats are limited to 1 seat per Team or Enterprise account. ## Licenses diff --git a/website/docs/docs/cloud/manage-access/set-up-sso-azure-active-directory.md b/website/docs/docs/cloud/manage-access/set-up-sso-azure-active-directory.md index f58bceff816..349c3d8ecd7 100644 --- a/website/docs/docs/cloud/manage-access/set-up-sso-azure-active-directory.md +++ b/website/docs/docs/cloud/manage-access/set-up-sso-azure-active-directory.md @@ -146,7 +146,7 @@ To complete setup, follow the steps below in the dbt Cloud application. | **Client ID** | Paste the **Application (client) ID** recorded in the steps above | | **Client Secret** | Paste the **Client Secret** (remember to use the Secret Value instead of the Secret ID) recorded in the steps above | | **Tenant ID** | Paste the **Directory (tenant ID)** recorded in the steps above | -| **Domain** | Enter the domain name for your Azure directory (eg. `fishtownanalytics.com`). Only users with accounts in this directory with this primary domain will be able to log into the dbt Cloud application. Optionally, you may specify a CSV of domains which are _all_ authorized to access your dbt Cloud account (eg. `fishtownanalytics.com, fishtowndata.com`) Ensure that the domain(s) match the values configured on user accounts in Azure | +| **Domain** | Enter the domain name for your Azure directory (such as `fishtownanalytics.com`). Only use the primary domain; this won't block access for other domains. | | **Slug** | Enter your desired login slug. Users will be able to log into dbt Cloud by navigating to `https://YOUR_ACCESS_URL/enterprise-login/LOGIN-SLUG`, replacing `YOUR_ACCESS_URL` with the [appropriate Access URL](/docs/cloud/manage-access/sso-overview#auth0-multi-tenant-uris) for your region and plan. Login slugs must be unique across all dbt Cloud accounts, so pick a slug that uniquely identifies your company. | diff --git a/website/docs/docs/cloud/manage-access/set-up-sso-google-workspace.md b/website/docs/docs/cloud/manage-access/set-up-sso-google-workspace.md index a206d359270..19779baf615 100644 --- a/website/docs/docs/cloud/manage-access/set-up-sso-google-workspace.md +++ b/website/docs/docs/cloud/manage-access/set-up-sso-google-workspace.md @@ -49,7 +49,7 @@ Client Secret for use in dbt Cloud. | **Application type** | internal | required | | **Application name** | dbt Cloud | required | | **Application logo** | Download the logo here | optional | -| **Authorized domains** | `getdbt.com` (US) `dbt.com` (EMEA or AU) | If deploying into a VPC, use the domain for your deployment | +| **Authorized domains** | `getdbt.com` (US multi-tenant) `getdbt.com` and `dbt.com`(US Cell 1) `dbt.com` (EMEA or AU) | If deploying into a VPC, use the domain for your deployment | | **Scopes** | `email, profile, openid` | The default scopes are sufficient | diff --git a/website/docs/docs/cloud/manage-access/set-up-sso-okta.md b/website/docs/docs/cloud/manage-access/set-up-sso-okta.md index 0d493bcf29f..41381b57eca 100644 --- a/website/docs/docs/cloud/manage-access/set-up-sso-okta.md +++ b/website/docs/docs/cloud/manage-access/set-up-sso-okta.md @@ -95,9 +95,9 @@ Expected **User Attribute Statements**: | Name | Name format | Value | Description | | -------------- | ----------- | -------------------- | -------------------------- | -| `email` | Unspecified | `${user.email}` | _The user's email address_ | -| `first_name` | Unspecified | `${user.firstName}` | _The user's first name_ | -| `last_name` | Unspecified | `${user.lastName}` | _The user's last name_ | +| `email` | Unspecified | `user.email` | _The user's email address_ | +| `first_name` | Unspecified | `user.firstName` | _The user's first name_ | +| `last_name` | Unspecified | `user.lastName` | _The user's last name_ | Expected **Group Attribute Statements**: diff --git a/website/docs/docs/cloud/manage-access/set-up-sso-saml-2.0.md b/website/docs/docs/cloud/manage-access/set-up-sso-saml-2.0.md index 22a8094553d..2a23d686032 100644 --- a/website/docs/docs/cloud/manage-access/set-up-sso-saml-2.0.md +++ b/website/docs/docs/cloud/manage-access/set-up-sso-saml-2.0.md @@ -56,9 +56,10 @@ Additionally, you may configure the IdP attributes passed from your identity pro | name | name format | value | description | | ---- | ----------- | ----- | ----------- | -| email | Unspecified | ${user.email} | The user's email address | -| first_name | Unspecified | ${user.first_name} | The user's first name | -| last_name | Unspecified | ${user.last_name} | The user's last name | +| email | Unspecified | user.email | The user's email address | +| first_name | Unspecified | user.first_name | The user's first name | +| last_name | Unspecified | user.last_name | The user's last name | +| NameID (if applicable) | Unspecified | user.email | The user's email address | dbt Cloud's [role-based access control](/docs/cloud/manage-access/about-user-access#role-based-access-control) relies on group mappings from the IdP to assign dbt Cloud users to dbt Cloud groups. To @@ -154,9 +155,9 @@ dbt Cloud expects by using the Attribute Statements and Group Attribute Statemen | Name | Name format | Value | Description | | -------------- | ----------- | -------------------- | -------------------------- | - | `email` | Unspecified | `${user.email}` | _The user's email address_ | - | `first_name` | Unspecified | `${user.firstName}` | _The user's first name_ | - | `last_name` | Unspecified | `${user.lastName}` | _The user's last name_ | + | `email` | Unspecified | `user.email` | _The user's email address_ | + | `first_name` | Unspecified | `user.firstName` | _The user's first name_ | + | `last_name` | Unspecified | `user.lastName` | _The user's last name_ | 4. The following table illustrates expected **Group Attribute Statements**: @@ -380,6 +381,7 @@ We recommend using the following values: | name | name format | value | | ---- | ----------- | ----- | +| NameID | Unspecified | Email | | email | Unspecified | Email | | first_name | Unspecified | First Name | | last_name | Unspecified | Last Name | diff --git a/website/docs/docs/cloud/secure/ip-restrictions.md b/website/docs/docs/cloud/secure/ip-restrictions.md index dacd0c885c4..49a7fe4f267 100644 --- a/website/docs/docs/cloud/secure/ip-restrictions.md +++ b/website/docs/docs/cloud/secure/ip-restrictions.md @@ -19,7 +19,9 @@ To configure IP restrictions, go to **Account Settings** → **IP Restrictions** - Deny IPs flagged by the Security team - Allow only VPN traffic but make an exception for contractors’ IP addresses -IP restrictions will block all user requests done via the API (via personal user token) and the UI. Service tokens are exempt from IP restrictions and can still make requests to dbt Cloud API. +IP restrictions will block all service tokens, user requests done via the API (via personal user token), and the UI if they come from blocked IP addresses. + +For any version control system integrations (Github, Gitlab, ADO, etc.) inbound into dbt Cloud, ensure their IP addresses are added to the allowed list. ### Allowing IPs diff --git a/website/docs/docs/collaborate/explore-projects.md b/website/docs/docs/collaborate/explore-projects.md new file mode 100644 index 00000000000..a4c914259ef --- /dev/null +++ b/website/docs/docs/collaborate/explore-projects.md @@ -0,0 +1,142 @@ +--- +title: "Explore your dbt projects (beta)" +sidebar_label: "Explore dbt projects (beta)" +description: "Learn about dbt Explorer and how to interact with it to understand, improve, and leverage your data pipelines." +--- + +With dbt Explorer, you can view your project's [resources](/docs/build/projects) (such as models, tests, and metrics) and their lineage to gain a better understanding of its latest production state. Navigate and manage your projects within dbt Cloud to help your data consumers discover and leverage your dbt resources. + +To display the details about your [project state](/docs/dbt-cloud-apis/project-state), dbt Explorer utilizes the metadata provided through the [Discovery API](/docs/dbt-cloud-apis/discovery-api). The metadata that's available on your project depends on the [deployment environment](/docs/deploy/deploy-environments) you've designated as _production_ in your dbt Cloud project. dbt Explorer automatically retrieves the metadata updates after each job run in the production deployment environment so it will always have the latest state on your project. The metadata it displays depends on the [commands executed by the jobs](/docs/deploy/job-commands). For instance: + +- To update model details or results, you must run `dbt run` or `dbt build` on a given model within a job in the environment. +- To view catalog statistics and columns, you must run `dbt docs generate` within a job in the environment. +- To view test results, you must run `dbt test` or `dbt build` within a job in the environment. +- To view source freshness check results, you must run `dbt source freshness` within a job in the environment. + +The need to run these commands will diminish, and richer, more timely metadata will become available as the Discovery API and its underlying platform evolve. + +:::tip Join the beta + +dbt Explorer is a [beta feature](/docs/dbt-versions/product-lifecycles#dbt-cloud) and subject to change without notification. More updates to this feature coming soon. + +If you’re interested in joining the beta, please contact your account team. + +::: + +## Prerequisites + +- You have a [multi-tenant](/docs/cloud/about-cloud/tenancy#multi-tenant) or AWS single-tenant dbt Cloud account on the [Team or Enterprise plan](https://www.getdbt.com/pricing/). +- You have set up a [production deployment environment](/docs/deploy/deploy-environments#set-as-production-environment-beta) for each project you want to explore. + - There has been at least one successful job run in the production deployment environment. +- You are on the dbt Explorer page. This requires the feature to be enabled for your account. + - To go to the page, select **Explore (Beta)** from the top navigation bar in dbt Cloud. + +## Explore the project’s lineage + +dbt Explorer provides a visualization of your project’s DAG that you can interact with. To start, select **Overview** in the left sidebar and click the **Explore Lineage** button on the main (center) section of the page. + +If you don't see the lineage graph immediately, click **Render Lineage**. It can take some time for the graph to render depending on the size of your project and your computer’s available memory. The graph of very large projects might not render so, instead, you can select a subset of nodes by using selectors. + +The nodes in the lineage graph represent the project’s resources and the edges represent the relationships between the nodes. Resources like tests and macros display in the lineage within their [resource details pages](#view-resource-details) but not within the overall project lineage graph. Nodes are color-coded and include iconography according to their resource type. + +To interact with the lineage graph, you can: + +- Hover over any item in the graph to display the resource’s name and type. +- Zoom in and out on the graph by mouse-scrolling. +- Grab and move the graph. +- Click on a resource to highlight its relationship with other resources in your project. +- [Search and select specific resources](#search-resources) or a subset of the DAG using selectors and lineage (for example, `+[YOUR_RESOURCE_NAME]` displays all nodes upstream of a particular resource). +- [View resource details](#view-resource-details) by selecting a node in the graph (double-clicking). + + + + + +## Search for resources {#search-resources} +With the search bar (on the upper left of the page or in a lineage graph), you can search using keywords or selectors (also known as *selector methods*). The resources that match your search criteria will display as a table in the main section of the page. When you select a resource in the table, its [resource details page](#view-resource-details) will display. + +When using keyword search, dbt Explorer will search through your resources using metadata such as resource type, resource name, column name, source name, tags, schema, database, version, alias/identifier, and package name. + +When using selector search, you can utilize the dbt node selection syntax including set and graph operators (like `+`). To learn more about selectors, refer to [Syntax overview](/reference/node-selection/syntax), [Graph operators](/reference/node-selection/graph-operators), and [Set operators](/reference/node-selection/set-operators). + +Below are the selection methods currently available in dbt Explorer. For more information about each of them, refer to [Methods](/reference/node-selection/methods). + +- **fqn:** — Find resources by [file or fully qualified name](/reference/node-selection/methods#the-file-or-fqn-method). +- **source:** — Find resources by a specified [source](/reference/node-selection/methods#the-source-method). +- **resource_type:** — Find resources by their [type](/reference/node-selection/methods#the-resource_type-method). +- **package:** — Find resources by the [dbt package](/reference/node-selection/methods#the-package-method) that defines them. +- **tag:** — Find resources by a specified [tag](/reference/node-selection/methods#the-tag-method). + + + +- **group:** — Find models defined within a specified [group](/reference/node-selection/methods#the-group-method). +- **access:** — Find models based on their [access](/reference/node-selection/methods#the-access-method) property. + + + + + +## Use the catalog sidebar + +By default, the catalog sidebar lists all your project’s resources. Select any resource type in the list and all those resources in the project will display as a table in the main section of the page. For a description on the different resource types (like models, metrics, and so on), refer to [About dbt projects](https://docs.getdbt.com/docs/build/projects). + +To browse using a different view, you can choose one of these options from the **View by** dropdown: + +- **Resources** (default) — All resources in the project organized by type. +- **Packages** — All resources in the project organized by the project in which they are defined. +- **File Tree** — All resources in the project organized by the file in which they are defined. This mirrors the file tree in your dbt project repository. +- **Database** — All resources in the project organized by the database and schema in which they are built. This mirrors your data platform structure. + + + +## View resource details {#view-resource-details} +You can view the definition and latest run results of any resource in your project. To find a resource and view its details, you can interact with the lineage graph, use search, or browse the catalog. The details (metadata) available to you depends on the resource’s type, its definition, and the [commands](/docs/deploy/job-commands) run within jobs in the production environment. + + + + + +### Example of model details + +An example of the details you might get for a model: + +- **General** — The model’s lineage graph that you can interact with. +- **Code** — The source code and compiled code for the model. +- **Columns** — The available columns in the model. +- **Description** — A [description of the model](/docs/collaborate/documentation#adding-descriptions-to-your-project). +- **Recent** — Information on the last time the model ran, how long it ran for, whether the run was successful, the job ID, and the run ID. +- **Tests** — [Tests](/docs/build/tests) for the model. +- **Details** — Key properties like the model’s relation name (for example, how it’s represented and how you can query it in the data platform: `database.schema.identifier`); model governance attributes like access, group, and if contracted; and more. +- **Relationships** — The nodes the model **Depends On** and is **Referenced by.** + +### Example of exposure details + +An example of the details you might get for an exposure: + +- **Status** — The status on data freshness and data quality. +- **Lineage** — The exposure’s lineage graph. +- **Description** — A description of the exposure. +- **Details** — Details like exposure type, maturity, owner information, and more. +- **Relationships** — The nodes the exposure **Depends On**. + +### Example of test details + +An example of the details you might get for a test: + +- **General** — The test’s lineage graph that you can interact with. +- **Code** — The source code and compiled code for the test. +- **Description** — A description of the test. +- **Recent** — Information on the last time the test ran, how long it ran for, whether the test passed, the job ID, and the run ID. +- **Details** — Details like schema, severity, package, and more. +- **Relationships** — The nodes the test **Depends On**. + +### Example of source details + +An example of the details you might get for each source table within a source collection: + +- **General** — The source’s lineage graph that you can interact with. +- **Columns** — The available columns in the source. +- **Description** — A description of the source. +- **Source freshness** — Information on whether refreshing the data was successful, the last time the source was loaded, the timestamp of when a run generated data, and the run ID. +- **Details** — Details like database, schema, and more. +- **Relationships** — A table that lists all the sources used with their freshness status, the timestamp of when freshness was last checked, and the timestamp of when the source was last loaded. \ No newline at end of file diff --git a/website/docs/docs/collaborate/govern/model-contracts.md b/website/docs/docs/collaborate/govern/model-contracts.md index 380da3c1b72..442a20df1b6 100644 --- a/website/docs/docs/collaborate/govern/model-contracts.md +++ b/website/docs/docs/collaborate/govern/model-contracts.md @@ -192,8 +192,21 @@ In some cases, you can replace a test with its equivalent constraint. This has t **Why aren't tests part of the contract?** In a parallel for software APIs, the structure of the API response is the contract. Quality and reliability ("uptime") are also very important attributes of an API's quality, but they are not part of the contract per se. When the contract changes in a backwards-incompatible way, it is a breaking change that requires a bump in major version. -### Can I define a "partial" contract? +### Do I need to define every column for a contract? Currently, dbt contracts apply to **all** columns defined in a model, and they require declaring explicit expectations about **all** of those columns. The explicit declaration of a contract is not an accident—it's very much the intent of this feature. -We are investigating the feasibility of supporting "inferred" or "partial" contracts in the future. This would enable you to define constraints and strict data typing for a subset of columns, while still detecting breaking changes on other columns by comparing against the same model in production. If you're interested, please upvote or comment on [dbt-core#7432](https://github.com/dbt-labs/dbt-core/issues/7432). +At the same time, for models with many columns, we understand that this can mean a _lot_ of yaml. We are investigating the feasibility of supporting "inferred" contracts. This would enable you to define constraints and strict data typing for a subset of columns, while still detecting breaking changes on other columns by comparing against the same model in production. This isn't the same as a "partial" contract, because all columns in the model are still checked at runtime, and matched up with what's defined _explicitly_ in your yaml contract or _implicitly_ with the comparison state. If you're interested in "inferred" contract, please upvote or comment on [dbt-core#7432](https://github.com/dbt-labs/dbt-core/issues/7432). + + +### How are breaking changes handled? + +When comparing to a previous project state, dbt will look for breaking changes that could impact downstream consumers. If breaking changes are detected, dbt will present a contract error. + +Breaking changes include: +- Removing an existing column +- Changing the `data_type` of an existing column +- Removing or modifying one of the `constraints` on an existing column (dbt v1.6 or higher) + +More details are available in the [contract reference](/reference/resource-configs/contract#detecting-breaking-changes). + diff --git a/website/docs/docs/connect-adapters.md b/website/docs/docs/connect-adapters.md index 5632fb3793e..f45da732abb 100644 --- a/website/docs/docs/connect-adapters.md +++ b/website/docs/docs/connect-adapters.md @@ -5,32 +5,18 @@ id: "connect-adapters" Adapters are an essential component of dbt. At their most basic level, they are how dbt connects with the various supported data platforms. At a higher-level, adapters strive to give analytics engineers more transferrable skills as well as standardize how analytics projects are structured. Gone are the days where you have to learn a new language or flavor of SQL when you move to a new job that has a different data platform. That is the power of adapters in dbt — for more detail, read the [What are adapters](/guides/dbt-ecosystem/adapter-development/1-what-are-adapters) guide. -This section provides more details on different ways you can connect dbt to an adapter, and explains what a maintainer is. +This section provides more details on different ways you can connect dbt to an adapter, and explains what a maintainer is. ### Set up in dbt Cloud -Explore the fastest and most reliable way to deploy dbt using dbt Cloud, a hosted architecture that runs dbt Core across your organization. dbt Cloud lets you seamlessly [connect](/docs/cloud/about-cloud-setup) with a variety of [verified](/docs/supported-data-platforms) data platform providers directly in the dbt Cloud UI. - -dbt Cloud supports data platforms that are verified and [maintained](#maintainers) by dbt Labs or partners. This level of support ensures that users can trust certain adapters for use in production. +Explore the fastest and most reliable way to deploy dbt using dbt Cloud, a hosted architecture that runs dbt Core across your organization. dbt Cloud lets you seamlessly [connect](/docs/cloud/about-cloud-setup) with a variety of [verified](/docs/supported-data-platforms) data platform providers directly in the dbt Cloud UI. ### Install using the CLI -Install dbt Core, which is an open-source tool, locally using the CLI. dbt communicates with a number of different data platforms by using a dedicated adapter plugin for each. When you install dbt Core, you'll also need to install the specific adapter for your database, [connect to dbt Core](/docs/core/about-core-setup), and set up a `profiles.yml` file. - -Data platforms supported in dbt Core may be verified or unverified, and are [maintained](#maintainers) by dbt Labs, partners, or community members. +Install dbt Core, which is an open-source tool, locally using the CLI. dbt communicates with a number of different data platforms by using a dedicated adapter plugin for each. When you install dbt Core, you'll also need to install the specific adapter for your database, [connect to dbt Core](/docs/core/about-core-setup), and set up a `profiles.yml` file. With a few exceptions [^1], you can install all [Verified adapters](/docs/supported-data-platforms) from PyPI using `pip install adapter-name`. For example to install Snowflake, use the command `pip install dbt-snowflake`. The installation will include `dbt-core` and any other required dependencies, which may include both other dependencies and even other adapter plugins. Read more about [installing dbt](/docs/core/installation). - -## Maintainers - -Who made and maintains an adapter is certainly relevant, but we recommend using an adapter's verification status to determine the quality and health of an adapter. So far there are three categories of maintainers: - -| Supported by | Maintained By | -| ------------ | ---------------- | -| dbt Labs | dbt Labs maintains a set of adapter plugins for some of the most common databases, warehouses, and platforms. As for why particular data platforms were chosen, see ["Why Verify an Adapter"](/guides/dbt-ecosystem/adapter-development/7-verifying-a-new-adapter#why-verify-an-adapter) | -| Partner | These adapter plugins are built and maintained by the same people who build and maintain the complementary data technology. | -| Community | These adapter plugins are contributed and maintained by members of the community. 🌱 | [^1]: Here are the two different adapters. Use the PyPI package name when installing with `pip` | Adapter repo name | PyPI package name | diff --git a/website/docs/docs/core/connect-data-platform/bigquery-setup.md b/website/docs/docs/core/connect-data-platform/bigquery-setup.md index b0fc9fa7cf0..6b5bac53600 100644 --- a/website/docs/docs/core/connect-data-platform/bigquery-setup.md +++ b/website/docs/docs/core/connect-data-platform/bigquery-setup.md @@ -11,7 +11,7 @@ meta: min_supported_version: 'n/a' slack_channel_name: '#db-bigquery' slack_channel_link: 'https://getdbt.slack.com/archives/C99SNSRTK' - platform_name: 'Big Query' + platform_name: 'BigQuery' config_page: '/reference/resource-configs/bigquery-configs' --- @@ -251,11 +251,17 @@ In older versions of `dbt-bigquery`, this same config was called `timeout_second ::: -No timeout is set by default. (For historical reasons, some query types use a default of 300 seconds when the `job_execution_timeout_seconds` configuration is not set.) When `job_execution_timeout_seconds` is set, if any dbt query, including a model's SQL transformation, takes longer than 300 seconds to complete, BigQuery might cancel the query and issue the following error: +No timeout is set by default. (For historical reasons, some query types use a default of 300 seconds when the `job_execution_timeout_seconds` configuration is not set). When you do set the `job_execution_timeout_seconds`, if any dbt query takes more than 300 seconds to finish, the dbt-bigquery adapter will run into an exception: ``` Operation did not complete within the designated timeout. ``` + +:::caution Note + +The `job_execution_timeout_seconds` represents the number of seconds to wait for the [underlying HTTP transport](https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.QueryJob#google_cloud_bigquery_job_QueryJob_result). It _doesn't_ represent the maximum allowable time for a BigQuery job itself. So, if dbt-bigquery ran into an exception at 300 seconds, the actual BigQuery job could still be running for the time set in BigQuery's own timeout settings. + +::: You can change the timeout seconds for the job execution step by configuring `job_execution_timeout_seconds` in the BigQuery profile: @@ -317,56 +323,6 @@ my-profile:
- - -BigQuery supports query timeouts. By default, the timeout is set to 300 seconds. If a dbt model takes longer than this timeout to complete, then BigQuery may cancel the query and issue the following error: - -``` - Operation did not complete within the designated timeout. -``` - -To change this timeout, use the `timeout_seconds` configuration: - - - -```yaml -my-profile: - target: dev - outputs: - dev: - type: bigquery - method: oauth - project: abc-123 - dataset: my_dataset - timeout_seconds: 600 # 10 minutes -``` - - - -The `retries` profile configuration designates the number of times dbt should retry queries that result in unhandled server errors. This configuration is only specified for BigQuery targets. Example: - - - -```yaml -# This example target will retry BigQuery queries 5 -# times with a delay. If the query does not succeed -# after the fifth attempt, then dbt will raise an error - -my-profile: - target: dev - outputs: - dev: - type: bigquery - method: oauth - project: abc-123 - dataset: my_dataset - retries: 5 -``` - - - - - ### Dataset locations The location of BigQuery datasets can be configured using the `location` configuration in a BigQuery profile. diff --git a/website/docs/docs/core/connect-data-platform/duckdb-setup.md b/website/docs/docs/core/connect-data-platform/duckdb-setup.md index 7896e4abeae..a3fee5a5164 100644 --- a/website/docs/docs/core/connect-data-platform/duckdb-setup.md +++ b/website/docs/docs/core/connect-data-platform/duckdb-setup.md @@ -4,7 +4,7 @@ description: "Read this guide to learn about the DuckDB warehouse setup in dbt." meta: maintained_by: Community authors: 'Josh Wills (https://github.com/jwills)' - github_repo: 'jwills/dbt-duckdb' + github_repo: 'duckdb/dbt-duckdb' pypi_package: 'dbt-duckdb' min_core_version: 'v1.0.1' cloud_support: Not Supported diff --git a/website/docs/docs/core/connect-data-platform/glue-setup.md b/website/docs/docs/core/connect-data-platform/glue-setup.md index e0fb9556853..e56e5bcd902 100644 --- a/website/docs/docs/core/connect-data-platform/glue-setup.md +++ b/website/docs/docs/core/connect-data-platform/glue-setup.md @@ -58,15 +58,14 @@ For further (and more likely up-to-date) info, see the [README](https://github.c ### Configuring your AWS profile for Glue Interactive Session There are two IAM principals used with interactive sessions. -- Client principal: The princpal (either user or role) calling the AWS APIs (Glue, Lake Formation, Interactive Sessions) -from the local client. This is the principal configured in the AWS CLI and likely the same. +- Client principal: The principal (either user or role) calling the AWS APIs (Glue, Lake Formation, Interactive Sessions) +from the local client. This is the principal configured in the AWS CLI and is likely the same. - Service role: The IAM role that AWS Glue uses to execute your session. This is the same as AWS Glue ETL. Read [this documentation](https://docs.aws.amazon.com/glue/latest/dg/glue-is-security.html) to configure these principals. - -You will find bellow a least privileged policy to enjoy all features of **`dbt-glue`** adapter. +You will find below a least privileged policy to enjoy all features of **`dbt-glue`** adapter. Please to update variables between **`<>`**, here are explanations of these arguments: @@ -74,7 +73,7 @@ Please to update variables between **`<>`**, here are explanations of these argu |---|---| |region|The region where your Glue database is stored | |AWS Account|The AWS account where you run your pipeline| -|dbt output database|The database updated by dbt (this is the database configured in the profile.yml of your dbt environment)| +|dbt output database|The database updated by dbt (this is the schema configured in the profile.yml of your dbt environment)| |dbt source database|All databases used as source| |dbt output bucket|The bucket name where the data will be generated by dbt (the location configured in the profile.yml of your dbt environment)| |dbt source bucket|The bucket name of source databases (if they are not managed by Lake Formation)| @@ -113,9 +112,19 @@ Please to update variables between **`<>`**, here are explanations of these argu "glue:BatchDeleteTableVersion", "glue:BatchDeleteTable", "glue:DeletePartition", + "glue:GetUserDefinedFunctions", "lakeformation:ListResources", "lakeformation:BatchGrantPermissions", - "lakeformation:ListPermissions" + "lakeformation:ListPermissions", + "lakeformation:GetDataAccess", + "lakeformation:GrantPermissions", + "lakeformation:RevokePermissions", + "lakeformation:BatchRevokePermissions", + "lakeformation:AddLFTagsToResource", + "lakeformation:RemoveLFTagsFromResource", + "lakeformation:GetResourceLFTags", + "lakeformation:ListLFTags", + "lakeformation:GetLFTag", ], "Resource": [ "arn:aws:glue:::catalog", @@ -189,7 +198,7 @@ Please to update variables between **`<>`**, here are explanations of these argu ### Configuration of the local environment -Because **`dbt`** and **`dbt-glue`** adapter are compatible with Python versions 3.8, and 3.9, check the version of Python: +Because **`dbt`** and **`dbt-glue`** adapters are compatible with Python versions 3.7, 3.8, and 3.9, check the version of Python: ```bash $ python3 --version @@ -212,12 +221,17 @@ $ unzip awscliv2.zip $ sudo ./aws/install ``` -Configure the aws-glue-session package +Install boto3 package ```bash $ sudo yum install gcc krb5-devel.x86_64 python3-devel.x86_64 -y $ pip3 install —upgrade boto3 -$ pip3 install —upgrade aws-glue-sessions +``` + +Install the package: + +```bash +$ pip3 install dbt-glue ``` ### Example config @@ -232,7 +246,6 @@ workers: 2 worker_type: G.1X idle_timeout: 10 schema: "dbt_demo" -database: "dbt_demo" session_provisioning_timeout_in_seconds: 120 location: "s3://dbt_demo_bucket/dbt_demo_data" ``` @@ -241,24 +254,788 @@ location: "s3://dbt_demo_bucket/dbt_demo_data" The table below describes all the options. -|Option |Description | Mandatory | -|---|---|---| -|project_name |The dbt project name. This must be the same as the one configured in the dbt project. |yes| -|type |The driver to use. |yes| -|query-comment |A string to inject as a comment in each query that dbt runs. |no| -|role_arn |The ARN of the interactive session role created as part of the CloudFormation template. |yes| -|region |The AWS Region where you run the data pipeline. |yes| -|workers |The number of workers of a defined workerType that are allocated when a job runs. |yes| -|worker_type |The type of predefined worker that is allocated when a job runs. Accepts a value of Standard, G.1X, or G.2X. |yes| -|schema |The schema used to organize data stored in Amazon S3. |yes| -|database |The database in Lake Formation. The database stores metadata tables in the Data Catalog. |yes| -|session_provisioning_timeout_in_seconds |The timeout in seconds for AWS Glue interactive session provisioning. |yes| -|location |The Amazon S3 location of your target data. |yes| -|idle_timeout |The AWS Glue session idle timeout in minutes. (The session stops after being idle for the specified amount of time.) |no| -|glue_version |The version of AWS Glue for this session to use. Currently, the only valid options are 2.0 and 3.0. The default value is 2.0. |no| -|security_configuration |The security configuration to use with this session. |no| -|connections |A comma-separated list of connections to use in the session. |no| +| Option | Description | Mandatory | +|-----------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------| +| project_name | The dbt project name. This must be the same as the one configured in the dbt project. | yes | +| type | The driver to use. | yes | +| query-comment | A string to inject as a comment in each query that dbt runs. | no | +| role_arn | The ARN of the glue interactive session IAM role. | yes | +| region | The AWS Region where you run the data pipeline. | yes | +| workers | The number of workers of a defined workerType that are allocated when a job runs. | yes | +| worker_type | The type of predefined worker that is allocated when a job runs. Accepts a value of Standard, G.1X, or G.2X. | yes | +| schema | The schema used to organize data stored in Amazon S3.Additionally, is the database in AWS Lake Formation that stores metadata tables in the Data Catalog. | yes | +| session_provisioning_timeout_in_seconds | The timeout in seconds for AWS Glue interactive session provisioning. | yes | +| location | The Amazon S3 location of your target data. | yes | +| query_timeout_in_minutes | The timeout in minutes for a single query. Default is 300 | no | +| idle_timeout | The AWS Glue session idle timeout in minutes. (The session stops after being idle for the specified amount of time) | no | +| glue_version | The version of AWS Glue for this session to use. Currently, the only valid options are 2.0 and 3.0. The default value is 3.0. | no | +| security_configuration | The security configuration to use with this session. | no | +| connections | A comma-separated list of connections to use in the session. | no | +| conf | Specific configuration used at the startup of the Glue Interactive Session (arg --conf) | no | +| extra_py_files | Extra python Libs that can be used by the interactive session. | no | +| delta_athena_prefix | A prefix used to create Athena-compatible tables for Delta tables (if not specified, then no Athena-compatible table will be created) | no | +| tags | The map of key-value pairs (tags) belonging to the session. Ex: `KeyName1=Value1,KeyName2=Value2` | no | +| seed_format | By default `parquet`, can be Spark format compatible like `csv` or `json` | no | +| seed_mode | By default `overwrite`, the seed data will be overwritten, you can set it to `append` if you just want to add new data in your dataset | no | +| default_arguments | The map of key-value pairs parameters belonging to the session. More information on [Job parameters used by AWS Glue](https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html). Ex: `--enable-continuous-cloudwatch-log=true,--enable-continuous-log-filter=true` | no | +| glue_session_id | re-use the glue-session to run multiple dbt run commands: set a glue session id you need to use | no | +| glue_session_reuse | Reuse the glue-session to run multiple dbt run commands: If set to true, the glue session will not be closed for re-use. If set to false, the session will be closed | no | +| datalake_formats | The ACID data lake format that you want to use if you are doing merge, can be `hudi`, `ìceberg` or `delta` |no| + +## Configs + +### Configuring tables + +When materializing a model as `table`, you may include several optional configs that are specific to the dbt-spark plugin, in addition to the standard [model configs](/reference/model-configs). + +| Option | Description | Required? | Example | +|---------|----------------------------------------------------|-------------------------|--------------------------| +| file_format | The file format to use when creating tables (`parquet`, `csv`, `json`, `text`, `jdbc` or `orc`). | Optional | `parquet`| +| partition_by | Partition the created table by the specified columns. A directory is created for each partition. | Optional | `date_day` | +| clustered_by | Each partition in the created table will be split into a fixed number of buckets by the specified columns. | Optional | `country_code` | +| buckets | The number of buckets to create while clustering | Required if `clustered_by` is specified | `8` | +| custom_location | By default, the adapter will store your data in the following path: `location path`/`schema`/`table`. If you don't want to follow that default behaviour, you can use this parameter to set your own custom location on S3 | No | `s3://mycustombucket/mycustompath` | +| hudi_options | When using file_format `hudi`, gives the ability to overwrite any of the default configuration options. | Optional | `{'hoodie.schema.on.read.enable': 'true'}` | +## Incremental models + +dbt seeks to offer useful and intuitive modeling abstractions by means of its built-in configurations and materializations. + +For that reason, the dbt-glue plugin leans heavily on the [`incremental_strategy` config](/docs/build/incremental-models). This config tells the incremental materialization how to build models in runs beyond their first. It can be set to one of three values: + - **`append`** (default): Insert new records without updating or overwriting any existing data. + - **`insert_overwrite`**: If `partition_by` is specified, overwrite partitions in the table with new data. If no `partition_by` is specified, overwrite the entire table with new data. + - **`merge`** (Apache Hudi and Apache Iceberg only): Match records based on a `unique_key`; update old records, and insert new ones. (If no `unique_key` is specified, all new data is inserted, similar to `append`.) + +Each of these strategies has its pros and cons, which we'll discuss below. As with any model config, `incremental_strategy` may be specified in `dbt_project.yml` or within a model file's `config()` block. + +**Notes:** +The default strategy is **`insert_overwrite`** + +### The `append` strategy + +Following the `append` strategy, dbt will perform an `insert into` statement with all new data. The appeal of this strategy is that it is straightforward and functional across all platforms, file types, connection methods, and Apache Spark versions. However, this strategy _cannot_ update, overwrite, or delete existing data, so it is likely to insert duplicate records for many data sources. + +#### Source code +```sql +{{ config( + materialized='incremental', + incremental_strategy='append', +) }} + +-- All rows returned by this query will be appended to the existing table + +select * from {{ ref('events') }} +{% if is_incremental() %} + where event_ts > (select max(event_ts) from {{ this }}) +{% endif %} +``` +#### Run Code +```sql +create temporary view spark_incremental__dbt_tmp as + + select * from analytics.events + + where event_ts >= (select max(event_ts) from {{ this }}) + +; + +insert into table analytics.spark_incremental + select `date_day`, `users` from spark_incremental__dbt_tmp +``` + +### The `insert_overwrite` strategy + +This strategy is most effective when specified alongside a `partition_by` clause in your model config. dbt will run an [atomic `insert overwrite` statement](https://spark.apache.org/docs/latest/sql-ref-syntax-dml-insert-overwrite-table.html) that dynamically replaces all partitions included in your query. Be sure to re-select _all_ of the relevant data for a partition when using this incremental strategy. + +If no `partition_by` is specified, then the `insert_overwrite` strategy will atomically replace all contents of the table, overriding all existing data with only the new records. The column schema of the table remains the same, however. This can be desirable in some limited circumstances since it minimizes downtime while the table contents are overwritten. The operation is comparable to running `truncate` + `insert` on other databases. For atomic replacement of Delta-formatted tables, use the `table` materialization (which runs `create or replace`) instead. + +#### Source Code +```sql +{{ config( + materialized='incremental', + partition_by=['date_day'], + file_format='parquet' +) }} + +/* + Every partition returned by this query will be overwritten + when this model runs +*/ + +with new_events as ( + + select * from {{ ref('events') }} + + {% if is_incremental() %} + where date_day >= date_add(current_date, -1) + {% endif %} + +) + +select + date_day, + count(*) as users + +from events +group by 1 +``` + +#### Run Code + +```sql +create temporary view spark_incremental__dbt_tmp as + + with new_events as ( + + select * from analytics.events + + + where date_day >= date_add(current_date, -1) + + + ) + + select + date_day, + count(*) as users + + from events + group by 1 + +; + +insert overwrite table analytics.spark_incremental + partition (date_day) + select `date_day`, `users` from spark_incremental__dbt_tmp +``` + +Specifying `insert_overwrite` as the incremental strategy is optional since it's the default strategy used when none is specified. + +### The `merge` strategy + +**Compatibility:** +- Hudi : OK +- Delta Lake : OK +- Iceberg : OK +- Lake Formation Governed Tables : On going + +NB: + +- For Glue 3: you have to set up a [Glue connectors](https://docs.aws.amazon.com/glue/latest/ug/connectors-chapter.html). + +- For Glue 4: use the `datalake_formats` option in your profile.yml + +When using a connector be sure that your IAM role has these policies: +``` +{ + "Sid": "access_to_connections", + "Action": [ + "glue:GetConnection", + "glue:GetConnections" + ], + "Resource": [ + "arn:aws:glue:::catalog", + "arn:aws:glue:::connection/*" + ], + "Effect": "Allow" +} +``` +and that the managed policy `AmazonEC2ContainerRegistryReadOnly` is attached. +Be sure that you follow the getting started instructions [here](https://docs.aws.amazon.com/glue/latest/ug/setting-up.html#getting-started-min-privs-connectors). + + +This [blog post](https://aws.amazon.com/blogs/big-data/part-1-integrate-apache-hudi-delta-lake-apache-iceberg-datasets-at-scale-aws-glue-studio-notebook/) also explains how to set up and works with Glue Connectors + +#### Hudi + +**Usage notes:** The `merge` with Hudi incremental strategy requires: +- To add `file_format: hudi` in your table configuration +- To add a datalake_formats in your profile : `datalake_formats: hudi` + - Alternatively, to add a connection in your profile: `connections: name_of_your_hudi_connector` +- To add Kryo serializer in your Interactive Session Config (in your profile): `conf: spark.serializer=org.apache.spark.serializer.KryoSerializer --conf spark.sql.hive.convertMetastoreParquet=false` + +dbt will run an [atomic `merge` statement](https://hudi.apache.org/docs/writing_data#spark-datasource-writer) which looks nearly identical to the default merge behavior on Snowflake and BigQuery. If a `unique_key` is specified (recommended), dbt will update old records with values from new records that match the key column. If a `unique_key` is not specified, dbt will forgo match criteria and simply insert all new records (similar to `append` strategy). + +#### Profile config example +```yaml +test_project: + target: dev + outputs: + dev: + type: glue + query-comment: my comment + role_arn: arn:aws:iam::1234567890:role/GlueInteractiveSessionRole + region: eu-west-1 + glue_version: "4.0" + workers: 2 + worker_type: G.1X + schema: "dbt_test_project" + session_provisioning_timeout_in_seconds: 120 + location: "s3://aws-dbt-glue-datalake-1234567890-eu-west-1/" + conf: spark.serializer=org.apache.spark.serializer.KryoSerializer --conf spark.sql.hive.convertMetastoreParquet=false + datalake_formats: hudi +``` + +#### Source Code example +```sql +{{ config( + materialized='incremental', + incremental_strategy='merge', + unique_key='user_id', + file_format='hudi', + hudi_options={ + 'hoodie.datasource.write.precombine.field': 'eventtime', + } +) }} + +with new_events as ( + + select * from {{ ref('events') }} + + {% if is_incremental() %} + where date_day >= date_add(current_date, -1) + {% endif %} + +) + +select + user_id, + max(date_day) as last_seen + +from events +group by 1 +``` + +#### Delta + +You can also use Delta Lake to be able to use merge feature on tables. + +**Usage notes:** The `merge` with Delta incremental strategy requires: +- To add `file_format: delta` in your table configuration +- To add a datalake_formats in your profile : `datalake_formats: delta` + - Alternatively, to add a connection in your profile: `connections: name_of_your_delta_connector` +- To add the following config in your Interactive Session Config (in your profile): `conf: "spark.sql.extensions=io.delta.sql.DeltaSparkSessionExtension --conf spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog` + +**Athena:** Athena is not compatible by default with delta tables, but you can configure the adapter to create Athena tables on top of your delta table. To do so, you need to configure the two following options in your profile: +- For Delta Lake 2.1.0 supported natively in Glue 4.0: `extra_py_files: "/opt/aws_glue_connectors/selected/datalake/delta-core_2.12-2.1.0.jar"` +- For Delta Lake 1.0.0 supported natively in Glue 3.0: `extra_py_files: "/opt/aws_glue_connectors/selected/datalake/delta-core_2.12-1.0.0.jar"` +- `delta_athena_prefix: "the_prefix_of_your_choice"` +- If your table is partitioned, then the addition of new partition is not automatic, you need to perform an `MSCK REPAIR TABLE your_delta_table` after each new partition adding + +#### Profile config example +```yaml +test_project: + target: dev + outputs: + dev: + type: glue + query-comment: my comment + role_arn: arn:aws:iam::1234567890:role/GlueInteractiveSessionRole + region: eu-west-1 + glue_version: "4.0" + workers: 2 + worker_type: G.1X + schema: "dbt_test_project" + session_provisioning_timeout_in_seconds: 120 + location: "s3://aws-dbt-glue-datalake-1234567890-eu-west-1/" + datalake_formats: delta + conf: "spark.sql.extensions=io.delta.sql.DeltaSparkSessionExtension --conf spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog" + extra_py_files: "/opt/aws_glue_connectors/selected/datalake/delta-core_2.12-2.1.0.jar" + delta_athena_prefix: "delta" +``` + +#### Source Code example +```sql +{{ config( + materialized='incremental', + incremental_strategy='merge', + unique_key='user_id', + partition_by=['dt'], + file_format='delta' +) }} + +with new_events as ( + + select * from {{ ref('events') }} + + {% if is_incremental() %} + where date_day >= date_add(current_date, -1) + {% endif %} + +) + +select + user_id, + max(date_day) as last_seen, + current_date() as dt + +from events +group by 1 +``` + +#### Iceberg + +**Usage notes:** The `merge` with Iceberg incremental strategy requires: +- To attach the AmazonEC2ContainerRegistryReadOnly Manged policy to your execution role : +- To add the following policy to your execution role to enable commit locking in a dynamodb table (more info [here](https://iceberg.apache.org/docs/latest/aws/#dynamodb-lock-manager)). Note that the DynamoDB table specified in the resource field of this policy should be the one that is mentioned in your dbt profiles (`--conf spark.sql.catalog.glue_catalog.lock.table=myGlueLockTable`). By default, this table is named `myGlueLockTable` and is created automatically (with On-Demand Pricing) when running a dbt-glue model with Incremental Materialization and Iceberg file format. If you want to name the table differently or to create your own table without letting Glue do it on your behalf, please provide the `iceberg_glue_commit_lock_table` parameter with your table name (eg. `MyDynamoDbTable`) in your dbt profile. +```yaml +iceberg_glue_commit_lock_table: "MyDynamoDbTable" +``` +- the latest connector for iceberg in AWS marketplace uses Ver 0.14.0 for Glue 3.0, and Ver 1.2.1 for Glue 4.0 where Kryo serialization fails when writing iceberg, use "org.apache.spark.serializer.JavaSerializer" for spark.serializer instead, more info [here](https://github.com/apache/iceberg/pull/546) + +Make sure you update your conf with `--conf spark.sql.catalog.glue_catalog.lock.table=` and, you change the below iam permission with your correct table name. +``` +{ + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "CommitLockTable", + "Effect": "Allow", + "Action": [ + "dynamodb:CreateTable", + "dynamodb:BatchGetItem", + "dynamodb:BatchWriteItem", + "dynamodb:ConditionCheckItem", + "dynamodb:PutItem", + "dynamodb:DescribeTable", + "dynamodb:DeleteItem", + "dynamodb:GetItem", + "dynamodb:Scan", + "dynamodb:Query", + "dynamodb:UpdateItem" + ], + "Resource": "arn:aws:dynamodb:::table/myGlueLockTable" + } + ] +} +``` +- To add `file_format: Iceberg` in your table configuration +- To add a datalake_formats in your profile : `datalake_formats: iceberg` + - Alternatively, to add connections in your profile: `connections: name_of_your_iceberg_connector` ( + - For Athena version 3: + - The adapter is compatible with the Iceberg Connector from AWS Marketplace with Glue 3.0 as Fulfillment option and 0.14.0 (Oct 11, 2022) as Software version) + - the latest connector for iceberg in AWS marketplace uses Ver 0.14.0 for Glue 3.0, and Ver 1.2.1 for Glue 4.0 where Kryo serialization fails when writing iceberg, use "org.apache.spark.serializer.JavaSerializer" for spark.serializer instead, more info [here](https://github.com/apache/iceberg/pull/546) + - For Athena version 2: The adapter is compatible with the Iceberg Connector from AWS Marketplace with Glue 3.0 as Fulfillment option and 0.12.0-2 (Feb 14, 2022) as Software version) +- To add the following config in your Interactive Session Config (in your profile): +```--conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions + --conf spark.serializer=org.apache.spark.serializer.KryoSerializer + --conf spark.sql.warehouse=s3:// + --conf spark.sql.catalog.glue_catalog=org.apache.iceberg.spark.SparkCatalog + --conf spark.sql.catalog.glue_catalog.catalog-impl=org.apache.iceberg.aws.glue.GlueCatalog + --conf spark.sql.catalog.glue_catalog.io-impl=org.apache.iceberg.aws.s3.S3FileIO + --conf spark.sql.catalog.glue_catalog.lock-impl=org.apache.iceberg.aws.dynamodb.DynamoDbLockManager + --conf spark.sql.catalog.glue_catalog.lock.table=myGlueLockTable + --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions +``` + - For Glue 3.0, set `spark.sql.catalog.glue_catalog.lock-impl` to `org.apache.iceberg.aws.glue.DynamoLockManager` instead + +dbt will run an [atomic `merge` statement](https://iceberg.apache.org/docs/latest/spark-writes/) which looks nearly identical to the default merge behavior on Snowflake and BigQuery. You need to provide a `unique_key` to perform merge operation otherwise it will fail. This key is to provide in a Python list format and can contains multiple column name to create a composite unique_key. + +##### Notes +- When using a custom_location in Iceberg, avoid to use final trailing slash. Adding a final trailing slash lead to an un-proper handling of the location, and issues when reading the data from query engines like Trino. The issue should be fixed for Iceberg version > 0.13. Related Github issue can be find [here](https://github.com/apache/iceberg/issues/4582). +- Iceberg also supports `insert_overwrite` and `append` strategies. +- The `warehouse` conf must be provided, but it's overwritten by the adapter `location` in your profile or `custom_location` in model configuration. +- By default, this materialization has `iceberg_expire_snapshots` set to 'True', if you need to have historical auditable changes, set: `iceberg_expire_snapshots='False'`. +- Currently, due to some dbt internal, the iceberg catalog used internally when running glue interactive sessions with dbt-glue has a hardcoded name `glue_catalog`. This name is an alias pointing to the AWS Glue Catalog but is specific to each session. If you want to interact with your data in another session without using dbt-glue (from a Glue Studio notebook, for example), you can configure another alias (ie. another name for the Iceberg Catalog). To illustrate this concept, you can set in your configuration file : +``` +--conf spark.sql.catalog.RandomCatalogName=org.apache.iceberg.spark.SparkCatalog +``` +And then run in an AWS Glue Studio Notebook a session with the following config: +``` +--conf spark.sql.catalog.AnotherRandomCatalogName=org.apache.iceberg.spark.SparkCatalog +``` +In both cases, the underlying catalog would be the AWS Glue Catalog, unique in your AWS Account and Region, and you would be able to work with the exact same data. Also make sure that if you change the name of the Glue Catalog Alias, you change it in all the other `--conf` where it's used: +``` + --conf spark.sql.catalog.RandomCatalogName=org.apache.iceberg.spark.SparkCatalog + --conf spark.sql.catalog.RandomCatalogName.catalog-impl=org.apache.iceberg.aws.glue.GlueCatalog + ... + --conf spark.sql.catalog.RandomCatalogName.lock-impl=org.apache.iceberg.aws.glue.DynamoLockManager +``` +- A full reference to `table_properties` can be found [here](https://iceberg.apache.org/docs/latest/configuration/). +- Iceberg Tables are natively supported by Athena. Therefore, you can query tables created and operated with dbt-glue adapter from Athena. +- Incremental Materialization with Iceberg file format supports dbt snapshot. You are able to run a dbt snapshot command that queries an Iceberg Table and create a dbt fashioned snapshot of it. + +#### Profile config example +```yaml +test_project: + target: dev + outputs: + dev: + type: glue + query-comment: my comment + role_arn: arn:aws:iam::1234567890:role/GlueInteractiveSessionRole + region: eu-west-1 + glue_version: "4.0" + workers: 2 + worker_type: G.1X + schema: "dbt_test_project" + session_provisioning_timeout_in_seconds: 120 + location: "s3://aws-dbt-glue-datalake-1234567890-eu-west-1/" + datalake_formats: iceberg + conf: --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions --conf spark.serializer=org.apache.spark.serializer.KryoSerializer --conf spark.sql.warehouse=s3://aws-dbt-glue-datalake-1234567890-eu-west-1/dbt_test_project --conf spark.sql.catalog.glue_catalog=org.apache.iceberg.spark.SparkCatalog --conf spark.sql.catalog.glue_catalog.catalog-impl=org.apache.iceberg.aws.glue.GlueCatalog --conf spark.sql.catalog.glue_catalog.io-impl=org.apache.iceberg.aws.s3.S3FileIO --conf spark.sql.catalog.glue_catalog.lock-impl=org.apache.iceberg.aws.dynamodb.DynamoDbLockManager --conf spark.sql.catalog.glue_catalog.lock.table=myGlueLockTable --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions +``` + +#### Source Code example +```sql +{{ config( + materialized='incremental', + incremental_strategy='merge', + unique_key=['user_id'], + file_format='iceberg', + iceberg_expire_snapshots='False', + partition_by=['status'] + table_properties={'write.target-file-size-bytes': '268435456'} +) }} + +with new_events as ( + + select * from {{ ref('events') }} + + {% if is_incremental() %} + where date_day >= date_add(current_date, -1) + {% endif %} + +) + +select + user_id, + max(date_day) as last_seen + +from events +group by 1 +``` +#### Iceberg Snapshot source code example +```sql + +{% snapshot demosnapshot %} + +{{ + config( + strategy='timestamp', + target_schema='jaffle_db', + updated_at='dt', + file_format='iceberg' +) }} + +select * from {{ ref('customers') }} + +{% endsnapshot %} + +``` + +## Monitoring your Glue Interactive Session + +Monitoring is an important part of maintaining the reliability, availability, +and performance of AWS Glue and your other AWS solutions. AWS provides monitoring +tools that you can use to watch AWS Glue, identify the required number of workers +required for your Glue Interactive Session, report when something is wrong and +take action automatically when appropriate. AWS Glue provides Spark UI, +and CloudWatch logs and metrics for monitoring your AWS Glue jobs. +More information on: [Monitoring AWS Glue Spark jobs](https://docs.aws.amazon.com/glue/latest/dg/monitor-spark.html) + +**Usage notes:** Monitoring requires: +- To add the following IAM policy to your IAM role: +``` +{ + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "CloudwatchMetrics", + "Effect": "Allow", + "Action": "cloudwatch:PutMetricData", + "Resource": "*", + "Condition": { + "StringEquals": { + "cloudwatch:namespace": "Glue" + } + } + }, + { + "Sid": "CloudwatchLogs", + "Effect": "Allow", + "Action": [ + "s3:PutObject", + "logs:CreateLogStream", + "logs:CreateLogGroup", + "logs:PutLogEvents" + ], + "Resource": [ + "arn:aws:logs:*:*:/aws-glue/*", + "arn:aws:s3:::bucket-to-write-sparkui-logs/*" + ] + } + ] +} +``` + +- To add monitoring parameters in your Interactive Session Config (in your profile). +More information on [Job parameters used by AWS Glue](https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html) + +#### Profile config example +```yaml +test_project: + target: dev + outputs: + dev: + type: glue + query-comment: my comment + role_arn: arn:aws:iam::1234567890:role/GlueInteractiveSessionRole + region: eu-west-1 + glue_version: "4.0" + workers: 2 + worker_type: G.1X + schema: "dbt_test_project" + session_provisioning_timeout_in_seconds: 120 + location: "s3://aws-dbt-glue-datalake-1234567890-eu-west-1/" + default_arguments: "--enable-metrics=true, --enable-continuous-cloudwatch-log=true, --enable-continuous-log-filter=true, --enable-spark-ui=true, --spark-event-logs-path=s3://bucket-to-write-sparkui-logs/dbt/" +``` + +If you want to use the Spark UI, you can launch the Spark history server using a +AWS CloudFormation template that hosts the server on an EC2 instance, +or launch locally using Docker. More information on [Launching the Spark history server](https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-history.html#monitor-spark-ui-history-local) + +## Enabling AWS Glue Auto Scaling +Auto Scaling is available since AWS Glue version 3.0 or later. More information +on the following AWS blog post: ["Introducing AWS Glue Auto Scaling: Automatically resize serverless computing resources for lower cost with optimized Apache Spark"](https://aws.amazon.com/blogs/big-data/introducing-aws-glue-auto-scaling-automatically-resize-serverless-computing-resources-for-lower-cost-with-optimized-apache-spark/) + +With Auto Scaling enabled, you will get the following benefits: + +* AWS Glue automatically adds and removes workers from the cluster depending on the parallelism at each stage or microbatch of the job run. + +* It removes the need for you to experiment and decide on the number of workers to assign for your AWS Glue Interactive sessions. + +* Once you choose the maximum number of workers, AWS Glue will choose the right size resources for the workload. +* You can see how the size of the cluster changes during the Glue Interactive sessions run by looking at CloudWatch metrics. +More information on [Monitoring your Glue Interactive Session](#Monitoring-your-Glue-Interactive-Session). + +**Usage notes:** AWS Glue Auto Scaling requires: +- To set your AWS Glue version 3.0 or later. +- To set the maximum number of workers (if Auto Scaling is enabled, the `workers` +parameter sets the maximum number of workers) +- To set the `--enable-auto-scaling=true` parameter on your Glue Interactive Session Config (in your profile). +More information on [Job parameters used by AWS Glue](https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html) + +#### Profile config example +```yaml +test_project: + target: dev + outputs: + dev: + type: glue + query-comment: my comment + role_arn: arn:aws:iam::1234567890:role/GlueInteractiveSessionRole + region: eu-west-1 + glue_version: "3.0" + workers: 2 + worker_type: G.1X + schema: "dbt_test_project" + session_provisioning_timeout_in_seconds: 120 + location: "s3://aws-dbt-glue-datalake-1234567890-eu-west-1/" + default_arguments: "--enable-auto-scaling=true" +``` + +## Access Glue catalog in another AWS account +In many cases, you may need to run you dbt jobs to read from another AWS account. + +Review the following link https://repost.aws/knowledge-center/glue-tables-cross-accounts to set up access policies in source and target accounts + +Add the following `"spark.hadoop.hive.metastore.glue.catalogid="` to your conf in the DBT profile, as such, you can have multiple outputs for each of the accounts that you have access to. + +Note: The access cross-accounts need to be within the same AWS Region +#### Profile config example +```yaml +test_project: + target: dev + outputsAccountB: + dev: + type: glue + query-comment: my comment + role_arn: arn:aws:iam::1234567890:role/GlueInteractiveSessionRole + region: eu-west-1 + glue_version: "3.0" + workers: 2 + worker_type: G.1X + schema: "dbt_test_project" + session_provisioning_timeout_in_seconds: 120 + location: "s3://aws-dbt-glue-datalake-1234567890-eu-west-1/" + conf: "--conf hive.metastore.client.factory.class=com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory + --conf spark.hadoop.hive.metastore.glue.catalogid=" +``` + +## Persisting model descriptions + +Relation-level docs persistence is supported since dbt v0.17.0. For more +information on configuring docs persistence, see [the docs](/reference/resource-configs/persist_docs). + +When the `persist_docs` option is configured appropriately, you'll be able to +see model descriptions in the `Comment` field of `describe [table] extended` +or `show table extended in [database] like '*'`. + +## Always `schema`, never `database` + +Apache Spark uses the terms "schema" and "database" interchangeably. dbt understands +`database` to exist at a higher level than `schema`. As such, you should _never_ +use or set `database` as a node config or in the target profile when running dbt-glue. + +If you want to control the schema/database in which dbt will materialize models, +use the `schema` config and `generate_schema_name` macro _only_. +For more information, check the dbt documentation about [custom schemas](https://docs.getdbt.com/docs/build/custom-schemas). + +## AWS Lakeformation integration +The adapter supports AWS Lake Formation tags management enabling you to associate existing tags defined out of dbt-glue to database objects built by dbt-glue (database, table, view, snapshot, incremental models, seeds). + +- You can enable or disable lf-tags management via config, at model and dbt-project level (disabled by default) +- If enabled, lf-tags will be updated on every dbt run. There are table level lf-tags configs and column-level lf-tags configs. +- You can specify that you want to drop existing database, table column Lake Formation tags by setting the drop_existing config field to True (False by default, meaning existing tags are kept) +- Please note that if the tag you want to associate with the table does not exist, the dbt-glue execution will throw an error + +The adapter also supports AWS Lakeformation data cell filtering. +- You can enable or disable data-cell filtering via config, at model and dbt-project level (disabled by default) +- If enabled, data_cell_filters will be updated on every dbt run. +- You can specify that you want to drop existing table data-cell filters by setting the drop_existing config field to True (False by default, meaning existing filters are kept) +- You can leverage excluded_columns_names **OR** columns config fields to perform Column level security as well. **Please note that you can use one or the other but not both**. +- By default, if you don't specify any column or excluded_columns, dbt-glue does not perform Column level filtering and let the principal access all the columns. + +The below configuration let the specified principal (lf-data-scientist IAM user) access rows that have a customer_lifetime_value > 15 and all the columns specified ('customer_id', 'first_order', 'most_recent_order', 'number_of_orders') + +```sql +lf_grants={ + 'data_cell_filters': { + 'enabled': True, + 'drop_existing' : True, + 'filters': { + 'the_name_of_my_filter': { + 'row_filter': 'customer_lifetime_value>15', + 'principals': ['arn:aws:iam::123456789:user/lf-data-scientist'], + 'column_names': ['customer_id', 'first_order', 'most_recent_order', 'number_of_orders'] + } + }, + } + } +``` +The below configuration let the specified principal (lf-data-scientist IAM user) access rows that have a customer_lifetime_value > 15 and all the columns *except* the one specified ('first_name') + +```sql +lf_grants={ + 'data_cell_filters': { + 'enabled': True, + 'drop_existing' : True, + 'filters': { + 'the_name_of_my_filter': { + 'row_filter': 'customer_lifetime_value>15', + 'principals': ['arn:aws:iam::123456789:user/lf-data-scientist'], + 'excluded_column_names': ['first_name'] + } + }, + } + } +``` + +See below some examples of how you can integrate LF Tags management and data cell filtering to your configurations : + +#### At model level +This way of defining your Lakeformation rules is appropriate if you want to handle the tagging and filtering policy at object level. Remember that it overrides any configuration defined at dbt-project level. + +```sql +{{ config( + materialized='incremental', + unique_key="customer_id", + incremental_strategy='append', + lf_tags_config={ + 'enabled': true, + 'drop_existing' : False, + 'tags_database': + { + 'name_of_my_db_tag': 'value_of_my_db_tag' + }, + 'tags_table': + { + 'name_of_my_table_tag': 'value_of_my_table_tag' + }, + 'tags_columns': { + 'name_of_my_lf_tag': { + 'value_of_my_tag': ['customer_id', 'customer_lifetime_value', 'dt'] + }}}, + lf_grants={ + 'data_cell_filters': { + 'enabled': True, + 'drop_existing' : True, + 'filters': { + 'the_name_of_my_filter': { + 'row_filter': 'customer_lifetime_value>15', + 'principals': ['arn:aws:iam::123456789:user/lf-data-scientist'], + 'excluded_column_names': ['first_name'] + } + }, + } + } +) }} + + select + customers.customer_id, + customers.first_name, + customers.last_name, + customer_orders.first_order, + customer_orders.most_recent_order, + customer_orders.number_of_orders, + customer_payments.total_amount as customer_lifetime_value, + current_date() as dt + + from customers + + left join customer_orders using (customer_id) + + left join customer_payments using (customer_id) + +``` + +#### At dbt-project level +This way you can specify tags and data filtering policy for a particular path in your dbt project (eg. models, seeds, models/model_group1, etc.) +This is especially useful for seeds, for which you can't define configuration in the file directly. + +```yml +seeds: + +lf_tags_config: + enabled: true + tags_table: + name_of_my_table_tag: 'value_of_my_table_tag' + tags_database: + name_of_my_database_tag: 'value_of_my_database_tag' +models: + +lf_tags_config: + enabled: true + drop_existing: True + tags_database: + name_of_my_database_tag: 'value_of_my_database_tag' + tags_table: + name_of_my_table_tag: 'value_of_my_table_tag' +``` + +## Tests + +To perform a functional test: +1. Install dev requirements: +```bash +$ pip3 install -r dev-requirements.txt +``` + +2. Install dev locally +```bash +$ python3 setup.py build && python3 setup.py install_lib +``` + +3. Export variables +```bash +$ export DBT_S3_LOCATION=s3://mybucket/myprefix +$ export DBT_ROLE_ARN=arn:aws:iam::1234567890:role/GlueInteractiveSessionRole +``` + +4. Run the test +```bash +$ python3 -m pytest tests/functional +``` + +For more information, check the dbt documentation about [testing a new adapter](https://docs.getdbt.com/docs/contributing/testing-a-new-adapter). ## Caveats @@ -269,6 +1046,7 @@ Most dbt Core functionality is supported, but some features are only available w Apache Hudi-only features: 1. Incremental model updates by `unique_key` instead of `partition_by` (see [`merge` strategy](/reference/resource-configs/glue-configs#the-merge-strategy)) + Some dbt features, available on the core adapters, are not yet supported on Glue: 1. [Persisting](/reference/resource-configs/persist_docs) column-level descriptions as database comments 2. [Snapshots](/docs/build/snapshots) diff --git a/website/docs/docs/core/connect-data-platform/snowflake-setup.md b/website/docs/docs/core/connect-data-platform/snowflake-setup.md index 6bc9c980922..22254c30ee0 100644 --- a/website/docs/docs/core/connect-data-platform/snowflake-setup.md +++ b/website/docs/docs/core/connect-data-platform/snowflake-setup.md @@ -163,9 +163,13 @@ my-snowflake-db: ### SSO Authentication -To use SSO authentication for Snowflake, omit a `password` and instead supply an `authenticator` config to your target. `authenticator` can be one of 'externalbrowser' or a valid Okta URL. +To use SSO authentication for Snowflake, omit a `password` and instead supply an `authenticator` config to your target. +`authenticator` can be one of 'externalbrowser' or a valid Okta URL. -**Note**: By default, every connection that dbt opens will require you to re-authenticate in a browser. The Snowflake connector package supports caching your session token, but it [currently only supports Windows and Mac OS](https://docs.snowflake.com/en/user-guide/admin-security-fed-auth-use.html#optional-using-connection-caching-to-minimize-the-number-of-prompts-for-authentication). See [the Snowflake docs](https://docs.snowflake.com/en/sql-reference/parameters.html#label-allow-id-token) for how to enable this feature in your account. +Refer to the following tabs for more info and examples: + + + @@ -175,15 +179,15 @@ my-snowflake-db: outputs: dev: type: snowflake - account: [account id] - user: [username] - role: [user role] + account: [account id] # Snowflake + user: [username] # Snowflake username + role: [user role] # Snowflake user role # SSO config authenticator: externalbrowser - database: [database name] - warehouse: [warehouse name] + database: [database name] # Snowflake database name + warehouse: [warehouse name] # Snowflake warehouse name schema: [dbt schema] threads: [between 1 and 8] client_session_keep_alive: False @@ -199,6 +203,50 @@ my-snowflake-db: + + + + + + +```yaml +my-snowflake-db: + target: dev + outputs: + dev: + type: snowflake + account: [account id] # Snowflake + user: [username] # Snowflake username + role: [user role] # Snowflake user role + + # SSO config -- The three following fields are REQUIRED + authenticator: [Okta account URL] + username: [Okta username] + password: [Okta password] + + database: [database name] # Snowflake database name + warehouse: [warehouse name] # Snowflake warehouse name + schema: [dbt schema] + threads: [between 1 and 8] + client_session_keep_alive: False + query_tag: [anything] + + # optional + connect_retries: 0 # default 0 + connect_timeout: 10 # default: 10 + retry_on_database_errors: False # default: false + retry_all: False # default: false + reuse_connections: False # default: false +``` + + + + + + +**Note**: By default, every connection that dbt opens will require you to re-authenticate in a browser. The Snowflake connector package supports caching your session token, but it [currently only supports Windows and Mac OS](https://docs.snowflake.com/en/user-guide/admin-security-fed-auth-use.html#optional-using-connection-caching-to-minimize-the-number-of-prompts-for-authentication). + +Refer to the [Snowflake docs](https://docs.snowflake.com/en/sql-reference/parameters.html#label-allow-id-token) for info on how to enable this feature in your account. ## Configurations @@ -224,7 +272,7 @@ The "base" configs for Snowflake targets are shown below. Note that you should a | reuse_connections | No | A boolean flag indicating whether to reuse idle connections to help reduce total connections opened. Default is `False`. | ### account -For AWS accounts in the US West default region, you can use `abc123` (without any other segments). For some AWS accounts you will have to append the region and/or cloud platform. For example, `abc123.eu-west-1` or `abc123.eu-west-2.aws`. For GCP and Azure-based accounts, you have to append the region and cloud platform, such as `gcp` or `azure`, respectively. For example, `abc123.us-central1.gcp`. For details, see Snowflake's documentation: "[Specifying Region Information in Your Account Hostname](https://docs.snowflake.com/en/user-guide/intro-regions.html#specifying-region-information-in-your-account-hostname)" and "[Account Identifier Formats by Cloud Platform and Region](https://docs.snowflake.com/en/user-guide/admin-account-identifier.html#account-identifier-formats-by-cloud-platform-and-region)". +For AWS accounts in the US West default region, you can use `abc123` (without any other segments). For some AWS accounts you will have to append the region and/or cloud platform. For example, `abc123.eu-west-1` or `abc123.eu-west-2.aws`. For GCP and Azure-based accounts, you have to append the region and cloud platform, such as `gcp` or `azure`, respectively. For example, `abc123.us-central1.gcp`. For details, see Snowflake's documentation: "[Specifying Region Information in Your Account Hostname](https://docs.snowflake.com/en/user-guide/intro-regions.html#specifying-region-information-in-your-account-hostname)". Please also note that the Snowflake account name should only be the without the prefixed . Relevant documentation: "[Account Identifier Formats by Cloud Platform and Region](https://docs.snowflake.com/en/user-guide/admin-account-identifier.html#account-identifier-formats-by-cloud-platform-and-region)". ### client_session_keep_alive diff --git a/website/docs/docs/core/connect-data-platform/spark-setup.md b/website/docs/docs/core/connect-data-platform/spark-setup.md index 7c7ac15204b..b22416fd3a5 100644 --- a/website/docs/docs/core/connect-data-platform/spark-setup.md +++ b/website/docs/docs/core/connect-data-platform/spark-setup.md @@ -211,8 +211,6 @@ your_profile_name:
- - ## Optional configurations ### Retries @@ -231,13 +229,14 @@ connect_retries: 3 - + ### Server side configuration Spark can be customized using [Application Properties](https://spark.apache.org/docs/latest/configuration.html). Using these properties the execution can be customized, for example, to allocate more memory to the driver process. Also, the Spark SQL runtime can be set through these properties. For example, this allows the user to [set a Spark catalogs](https://spark.apache.org/docs/latest/configuration.html#spark-sql). + ## Caveats ### Usage with EMR diff --git a/website/docs/docs/dbt-versions/release-notes/04-Sept-2023/ci-updates-phase2-rn.md b/website/docs/docs/dbt-versions/release-notes/04-Sept-2023/ci-updates-phase2-rn.md new file mode 100644 index 00000000000..fefa07e6d6c --- /dev/null +++ b/website/docs/docs/dbt-versions/release-notes/04-Sept-2023/ci-updates-phase2-rn.md @@ -0,0 +1,32 @@ +--- +title: "Update: Improvements to dbt Cloud continuous integration" +description: "September 2023: dbt Cloud now has two types of jobs — deploy jobs and CI jobs — with streamlined setup and improved efficiency. " +sidebar_label: "Update: Improvements to dbt jobs" +tags: [Sept-2023, CI] +date: 2023-09-11 +sidebar_position: 10 +--- + +dbt Cloud now has two distinct job types: [deploy jobs](/docs/deploy/deploy-jobs) for building production data assets, and [CI jobs](/docs/deploy/ci-jobs) for checking code changes. These jobs perform fundamentally different tasks so dbt Labs improved the setup experience with better defaults for each. + +With two types of jobs, instead of one generic type, we can better guide you through the setup flow. Best practices are built into the default settings so you can go from curious to being set up in seconds. + + + +And, we now have more efficient state comparisons on CI checks: never waste a build or test on code that hasn’t been changed. We now diff between the Git PR code and what’s running in production more efficiently with the introduction of deferral to an environment versus a job. To learn more, refer to [Continuous integration in dbt Cloud](/docs/deploy/continuous-integration) and [Get started with continuous integration tests](/guides/orchestration/set-up-ci/overview). + +Below is a comparison table that describes how deploy jobs and CI jobs behave differently: + +| | Deploy Jobs | CI Jobs | +| --- | --- | --- | +| Purpose | Builds production data assets. | Builds and tests new code before merging changes into production. | +| Trigger types | Triggered by a schedule or by API. | Triggered by a commit to a PR or by API. | +| Destination | Builds into a production database and schema. | Builds into a staging database and ephemeral schema, lived for the lifetime of the PR. | +| Execution mode | Runs execute sequentially, so as to not have collisions on the underlying DAG. | Runs execute in parallel to promote team velocity. | +| Efficiency run savings | Detects over-scheduled jobs and cancels unnecessary runs to avoid queue clog. | Cancels existing runs when a newer commit is pushed to avoid redundant work. | +| State comparison | Only sometimes needs to detect state. | Almost always needs to compare state against the production environment to build on modified code and its dependents. | + + +## What you need to update + +If you previously set up a job using the [Create Job](/dbt-cloud/api-v2#/operations/Create%20Job) API endpoint before September 11, 2023, you must re-create the job as described in [Trigger a CI job with the API](/docs/deploy/ci-jobs#trigger-a-ci-job-with-the-api). This is because you must set the `job_type` to be `ci`. \ No newline at end of file diff --git a/website/docs/docs/dbt-versions/release-notes/04-Sept-2023/product-docs-summer-rn.md b/website/docs/docs/dbt-versions/release-notes/04-Sept-2023/product-docs-summer-rn.md new file mode 100644 index 00000000000..a647bb5f585 --- /dev/null +++ b/website/docs/docs/dbt-versions/release-notes/04-Sept-2023/product-docs-summer-rn.md @@ -0,0 +1,43 @@ +--- +title: "Summer 2023 product docs updates" +id: "product-docs-summer" +description: "Summer 2023: The Product docs team merged 256 PRs, made various updates to dbt Cloud and Core, such as adding What's New, writing Semantic Layer beta docs, releasing dbt 1.6 docs, and more!" +sidebar_label: "Update: Product docs changes" +tags: [July-2023, Aug-2023, product-docs] +date: 2023-09-13 +sidebar_position: 09 +--- + +Hello from dbt's Product Documentation team (the stewards of the docs.getdbt.com site): @mirnawong1, @matthewshaver, @nghi-ly, and @runleonarun. What a busy summer! We merged 256 PRs between July 1st and August 31. + +We'd like to recognize all of the docs and support from our partner team, Developer Experience: @jasnonaz @gwenwindflower @dbeatty10 @dataders @joellabes @Jstein77 @dave-connors-3! + +We'd also like to give a special thanks to the 22 community members who contributed to the [dbt Product docs](https://docs.getdbt.com) for the first time. :pray: Based on feedback from the dbt community, we made these changes: + +- Added a [permissions table](/docs/cloud/manage-access/enterprise-permissions) for Enterprise accounts +- Added a [browser session page](/docs/cloud/about-cloud/browsers#browser-sessions) that clarifies dbt Cloud’s browser session time and when it logs users off. + +You can provide feedback by opening a pull request or issue in [our repo](https://github.com/dbt-labs/docs.getdbt.com) or reaching out in the dbt community Slack channel [#dbt-product-docs](https://getdbt.slack.com/archives/C0441GSRU04)). + +## :zap: General docs projects + +* Added the ability to collapse sections you’re not currently looking at. There were quite a few people who wanted this, and it bugged us too, so we were happy to get this shipped! +* Introduced the idea of [“Trusted” adapters](/docs/supported-data-platforms#types-of-adapters). + +## ☁ Cloud projects + +* The **What’s new?** product update widget is back in the dbt Cloud UI! The Docs team will begin updating the content to keep you informed about new features. +* Launched the re-released [Semantic Layer beta docs](/docs/use-dbt-semantic-layer/dbt-sl), which introduces users to the new API, new guide to set up MetricFlow and the new Semantic Layer, as well as revamp the ‘Use the dbt Semantic Layer’ section for users. +* Updated [Admin API v2 and v3](/docs/dbt-cloud-apis/admin-cloud-api) to help you understand the differences between them and which version includes the endpoints you use. +* To improve discoverability, the docs team made changes to the [deploy dbt sidebar](/docs/deploy/deployments). We added cards and aligned better with the dbt Cloud UI and the way it’s used. +* Deprecated legacy job schemas in the [Discovery API](/docs/dbt-cloud-apis/discovery-api). +* Added a page to describe [experimental and beta features](/docs/dbt-versions/experimental-features) in dbt Cloud and what you need to know about them. +* Added a section to introduce a new beta feature [**Extended Attributes**](/docs/dbt-cloud-environments#extended-attributes-beta), which allows users to set a flexible `profiles.yml` snippet in their dbt Cloud Environment settings. +## 🎯 Core projects + +* We released [dbt 1.6](/guides/migration/versions/upgrading-to-v1.6)! We added docs for the new commands `dbt retry` and `dbt clone` + +## New 📚 Guides, ✏️ blog posts, and FAQs +* Check out how these community members use the dbt community in the [Community spotlight](/community/spotlight). +* Blog posts published this summer include [Optimizing Materialized Views with dbt](/blog/announcing-materialized-views), [Data Vault 2.0 with dbt Cloud](/blog/data-vault-with-dbt-cloud), and [Create dbt Documentation and Tests 10x faster with ChatGPT](/blog/create-dbt-documentation-10x-faster-with-chatgpt) +* We now have two new best practice guides: [How we build our metrics](/guides/best-practices/how-we-build-our-metrics/semantic-layer-1-intro) and [Set up Continuous Integration](/guides/orchestration/set-up-ci/overview). diff --git a/website/docs/docs/dbt-versions/release-notes/05-Aug-2023/deprecation-endpoints-discovery.md b/website/docs/docs/dbt-versions/release-notes/05-Aug-2023/deprecation-endpoints-discovery.md new file mode 100644 index 00000000000..cd088b92fab --- /dev/null +++ b/website/docs/docs/dbt-versions/release-notes/05-Aug-2023/deprecation-endpoints-discovery.md @@ -0,0 +1,126 @@ +--- +title: "Deprecation: Query patterns and endpoints in the dbt Cloud Discovery API" +description: "August 2023: Learn about the upcoming deprecation of certain endpoints and query patterns in the Discovery API." +sidebar_position: 6 +sidebar_label: "Deprecation: Certain Discovery API endpoints and query patterns" +tags: [Aug-2023, API] +date: 2023-08-31 +--- + +dbt Labs has deprecated and will be deprecating certain query patterns and replacing them with new conventions to enhance the performance of the dbt Cloud [Discovery API](/docs/dbt-cloud-apis/discovery-api). + +All these changes will be in effect on _September 7, 2023_. + +We understand that these changes might require adjustments to your existing integration with the Discovery API. Please [contact us](mailto:support@getdbt.com) with any questions. We're here to help you during this transition period. + +## Job-based queries + +Job-based queries that use the data type `Int` for IDs will be deprecated. They will be marked as deprecated in the [GraphQL explorer](https://metadata.cloud.getdbt.com/graphql). The new convention will be for you to use the data type `BigInt` instead. + +This change will be in effect starting September 7, 2023. + + +Example of query before deprecation: + +```graphql +query ($jobId: Int!) { + models(jobId: $jobId){ + uniqueId + } +} +``` + +Example of query after deprecation: + +```graphql +query ($jobId: BigInt!) { + job(id: $jobId) { + models { + uniqueId + } + } +} +``` + +## modelByEnvironment queries + +The `modelByEnvironment` object has been renamed and moved into the `environment` object. This change is in effect and has been since August 15, 2023. + +Example of query before deprecation: + +```graphql +query ($environmentId: Int!, $uniqueId: String) { + modelByEnvironment(environmentId: $environmentId, uniqueId: $uniqueId) { + uniqueId + executionTime + executeCompletedAt + } +} +``` + +Example of query after deprecation: + +```graphql +query ($environmentId: BigInt!, $uniqueId: String) { + environment(id: $environmentId) { + applied { + modelHistoricalRuns(uniqueId: $uniqueId) { + uniqueId + executionTime + executeCompletedAt + } + } + } +} +``` + + +## Environment and account queries + +Environment and account queries that use `Int` as a data type for ID have been deprecated. IDs must now be in `BigInt`. This change is in effect and has been since August 15, 2023. + + +Example of query before deprecation: + +```graphql +query ($environmentId: Int!, $first: Int!) { + environment(id: $environmentId) { + applied { + models(first: $first) { + edges { + node { + uniqueId + executionInfo { + lastRunId + } + } + } + } + } + } +} +``` + + +Example of query after deprecation: + +```graphql +query ($environmentId: BigInt!, $first: Int!) { + environment(id: $environmentId) { + applied { + models(first: $first) { + edges { + node { + uniqueId + executionInfo { + lastRunId + } + } + } + } + } + } +} +``` + + diff --git a/website/docs/docs/dbt-versions/release-notes/08-May-2023/may-ide-updates.md b/website/docs/docs/dbt-versions/release-notes/08-May-2023/may-ide-updates.md index 5503b40576d..d85ffa154dd 100644 --- a/website/docs/docs/dbt-versions/release-notes/08-May-2023/may-ide-updates.md +++ b/website/docs/docs/dbt-versions/release-notes/08-May-2023/may-ide-updates.md @@ -1,46 +1,46 @@ ---- -title: "May IDE updates and fixes" -id: "may-ide-updates" -description: "May 2023 release note: We've launched SQLFluff in beta, released an IDE UI page, significantly improved IDE performance, improved error messages, fixed bugs, and more." -sidebar_label: "Update and fixes: IDE" -sidebar_position: 2 -tags: [May-2023, IDE] ---- - -To continue improving your [Cloud IDE](/docs/cloud/dbt-cloud-ide/develop-in-the-cloud) development experience, the dbt Labs team continues to work on adding new features, fixing bugs, and increasing reliability ✨. - -Stay up-to-date with [IDE-related changes](/tags/ide). - -## New features -- Lint via SQL Fluff is now available in beta (GA over the next 2-3 weeks) -- Format markdown files with prettier -- Leverage developer experience shortcuts, including ``Ctrl + ` `` (toggle history drawer), `CMD + Option + /` (toggle block comment), `CMD + Shift + P` (open command palette), `Option + W` (close editor tab) -- Display parent folder name for files with same name in Changes section -- Navigate the new IDE features quickly using [the IDE User Interface](/docs/cloud/dbt-cloud-ide/ide-user-interface) help page -- Use `top X` in SQL when previewing in the IDE -- Opt into the new IDE backend layer over the past month (still with dbt-rpc). Ready for beta later in June! - - -## Product refinements - -- Performance-related upgrades: - - Reduced cold start time by 60+% - - Improved render time of modals in the IDE by 98% - - Improved IDE performance with dbt Core v1.5+ (faster and snappier – highly encourage you to [upgrade your dbt version](/docs/dbt-versions/upgrade-core-in-cloud)!) -- Upgraded sqlfmt (which powers the Format button) to 0.18.0 -- Updated Build button to change menu options based on file/model type (snapshot, macro, etc.) -- Display message to disable adblocker for file contents error -- Moved Format button to console bar -- Made many security enhancements in the IDE -## Bug fixes - -- File icon sizes no longer get wonky in small screen -- Toast notifications no longer take over command bar menu -- Hover info inside the text editor no longer gets cut off -- Transition between a file and a recently modified scratchpad no longer triggers a console error -- dbt v1.5+ now can access the IDE -- Confirm button on the Unsaved Changes modal now closes after clicking it -- Long node names no longer overflow in the parsed logs section in history drawer -- Status pill in history drawer no longer scales with longer command -- Tooltip for tab name with a long file name is no longer cut off -- Lint button should no longer available in main branch +--- +title: "May IDE updates and fixes" +id: "may-ide-updates" +description: "May 2023 release note: We've launched SQLFluff in beta, released an IDE UI page, significantly improved IDE performance, improved error messages, fixed bugs, and more." +sidebar_label: "Update and fixes: IDE" +sidebar_position: 2 +tags: [May-2023, IDE] +--- + +To continue improving your [Cloud IDE](/docs/cloud/dbt-cloud-ide/develop-in-the-cloud) development experience, the dbt Labs team continues to work on adding new features, fixing bugs, and increasing reliability ✨. + +Stay up-to-date with [IDE-related changes](/tags/ide). + +## New features +- Lint via SQL Fluff is now available in beta (GA over the next 2-3 weeks) +- Format markdown files with prettier +- Leverage developer experience shortcuts, including ``Ctrl + ` `` (toggle history drawer), `CMD + Option + /` (toggle block comment), `CMD + Shift + P` (open command palette), `Option + W` (close editor tab) +- Display parent folder name for files with same name in Changes section +- Navigate the new IDE features quickly using [the IDE User Interface](/docs/cloud/dbt-cloud-ide/ide-user-interface) help page +- Use `top X` in SQL when previewing in the IDE +- Opt into the new IDE backend layer over the past month (still with dbt-rpc). Ready for beta later in June! + + +## Product refinements + +- Performance-related upgrades: + - Reduced cold start time by 60+% + - Improved render time of modals in the IDE by 98% + - Improved IDE performance with dbt Core v1.5+ (faster and snappier – highly encourage you to [upgrade your dbt version](/docs/dbt-versions/upgrade-core-in-cloud)!) +- Upgraded sqlfmt (which powers the Format button) to 0.18.0 +- Updated Build button to change menu options based on file/model type (snapshot, macro, etc.) +- Display message to disable adblocker for file contents error +- Moved Format button to console bar +- Made many security enhancements in the IDE +## Bug fixes + +- File icon sizes no longer get wonky in small screen +- Toast notifications no longer take over command bar menu +- Hover info inside the text editor no longer gets cut off +- Transition between a file and a recently modified scratchpad no longer triggers a console error +- dbt v1.5+ now can access the IDE +- Confirm button on the Unsaved Changes modal now closes after clicking it +- Long node names no longer overflow in the parsed logs section in history drawer +- Status pill in history drawer no longer scales with longer command +- Tooltip for tab name with a long file name is no longer cut off +- Lint button should no longer available in main branch diff --git a/website/docs/docs/deploy/ci-jobs.md b/website/docs/docs/deploy/ci-jobs.md index 74a39dd7828..27229671cc4 100644 --- a/website/docs/docs/deploy/ci-jobs.md +++ b/website/docs/docs/deploy/ci-jobs.md @@ -4,45 +4,20 @@ sidebar_label: "CI jobs" description: "Learn how to create and set up CI checks to test code changes before deploying to production." --- -You can set up [continuous integration](/docs/deploy/continuous-integration) (CI) jobs to run when someone opens a new pull request in your dbt repository. By running and testing only _modified_ models, dbt Cloud ensures these jobs are as efficient and resource conscientious as possible on your data platform. +You can set up [continuous integration](/docs/deploy/continuous-integration) (CI) jobs to run when someone opens a new pull request (PR) in your dbt Git repository. By running and testing only _modified_ models, dbt Cloud ensures these jobs are as efficient and resource conscientious as possible on your data platform. -:::tip Join our beta -dbt Labs is currently running a beta that provides improved UI updates for setting up CI jobs. For docs, refer to [Set up CI jobs (Beta version)](/docs/deploy/ci-jobs?version=beta#set-up-ci-jobs) on this page. - -If you're interested in joining our beta, please fill out our Google Form to [sign up](https://forms.gle/VxwBD1xjzouE84EQ6). - -::: +## Set up CI jobs {#set-up-ci-jobs} -## Prerequisites +dbt Labs recommends that you create your CI job in a dedicated dbt Cloud [deployment environment](/docs/deploy/deploy-environments#create-a-deployment-environment) that's connected to a staging database. Having a separate environment dedicated for CI will provide better isolation between your temporary CI schema builds and your production data builds. Additionally, sometimes teams need their CI jobs to be triggered when a PR is made to a branch other than main. If your team maintains a staging branch as part of your release process, having a separate environment will allow you to set a [custom branch](/faqs/environments/custom-branch-settings) and, accordingly, the CI job in that dedicated environment will be triggered only when PRs are made to the specified custom branch. To learn more, refer to [Get started with CI tests](/guides/orchestration/set-up-ci/overview). +### Prerequisites - You have a dbt Cloud account. - - For the [Concurrent CI checks](/docs/deploy/continuous-integration#concurrent-ci-checks) and [Smart cancellation of stale builds](/docs/deploy/continuous-integration#smart-cancellation) features, your account must be on the [Team or Enterprise plan](https://www.getdbt.com/pricing/). -- You must be connected using dbt Cloud’s native integration with [GitHub account](/docs/cloud/git/connect-github), [GitLab account](/docs/cloud/git/connect-gitlab), or [Azure DevOps account](/docs/cloud/git/connect-azure-devops). +- For the [Concurrent CI checks](/docs/deploy/continuous-integration#concurrent-ci-checks) and [Smart cancellation of stale builds](/docs/deploy/continuous-integration#smart-cancellation) features, your dbt Cloud account must be on the [Team or Enterprise plan](https://www.getdbt.com/pricing/). +- You must be connected using dbt Cloud’s native Git integration with [GitHub](/docs/cloud/git/connect-github), [GitLab](/docs/cloud/git/connect-gitlab), or [Azure DevOps](/docs/cloud/git/connect-azure-devops). - If you’re using GitLab, you must use a paid or self-hosted account which includes support for GitLab webhooks. - If you previously configured your dbt project by providing a generic git URL that clones using SSH, you must reconfigure the project to connect through dbt Cloud's native integration. -## Set up CI jobs {#set-up-ci-jobs} - -dbt Labs recommends that you create your CI job in a dedicated dbt Cloud [deployment environment](/docs/deploy/deploy-environments#create-a-deployment-environment) that's connected to a staging database. Having a separate environment dedicated for CI will provide better isolation between your temporary CI schema builds and your production data builds. Additionally, sometimes teams need their CI jobs to be triggered when a PR is made to a branch other than main. If your team maintains a staging branch as part of your release process, having a separate environment will allow you to set a [custom branch](/faqs/environments/custom-branch-settings) and, accordingly, the CI job in that dedicated environment will be triggered only when PRs are made to the specified custom branch. To learn more, refer to [Get started with CI tests](/guides/orchestration/set-up-ci/overview). - - - - -1. On your deployment environment page, click **Create One** to create a new CI job. -2. In the **Execution Settings** section: - - For the option **Defer to a previous run state**, choose whichever production job that's set to run often. If you don't see any jobs to select from the dropdown, you first need to run a production job successfully. Deferral tells dbt Cloud to compare the manifest of the current CI job against the project representation that was materialized the last time the deferred job was run successfully. By setting this option, dbt Cloud only checks the modified code and compares the changes against what’s running in production, instead of building the full table or the entire DAG. - - - - - For the option **Commands**, enter `dbt build --select state:modified+` in the field. This informs dbt Cloud to build only new or changed models and their downstream dependents. Importantly, state comparison can only happen when there is a deferred job selected to compare state to. - - -3. In the **Triggers** section, choose the **Continuous Integration** (CI) tab. Then, enable the **Run on Pull Requests** option. This configures pull requests and new commits to be a trigger for the CI job. - - - - To make CI job creation easier, many options on the **CI job** page are set to default values that dbt Labs recommends that you use. If you don't want to use the defaults, you can change them. @@ -75,9 +50,25 @@ To make CI job creation easier, many options on the **CI job** page are set to d - - +## Trigger a CI job with the API + +If you're not using dbt Cloud’s native Git integration with [GitHub](/docs/cloud/git/connect-github), [GitLab](/docs/cloud/git/connect-gitlab), or [Azure DevOps](/docs/cloud/git/connect-azure-devops), you can use the [Administrative API](/docs/dbt-cloud-apis/admin-cloud-api) to trigger a CI job to run. However, dbt Cloud will not automatically delete the temporary schema for you. This is because automatic deletion relies on incoming webhooks from Git providers, which is only available through the native integrations. + +### Prerequisites + +- You have a dbt Cloud account. +- For the [Concurrent CI checks](/docs/deploy/continuous-integration#concurrent-ci-checks) and [Smart cancellation of stale builds](/docs/deploy/continuous-integration#smart-cancellation) features, your dbt Cloud account must be on the [Team or Enterprise plan](https://www.getdbt.com/pricing/). + + +1. Set up a CI job with the [Create Job](/dbt-cloud/api-v2#/operations/Create%20Job) API endpoint using `"job_type": ci` or from the [dbt Cloud UI](#set-up-ci-jobs). +1. Call the [Trigger Job Run](/dbt-cloud/api-v2#/operations/Trigger%20Job%20Run) API endpoint to trigger the CI job. Provide the pull request (PR) ID to the payload using one of these fields, even if you're using a different Git provider (like Bitbucket): + + - `github_pull_request_id` + - `gitlab_merge_request_id` + - `azure_devops_pull_request_id`  + + This can make your code less human-readable but it will _not_ affect dbt functionality. ## Example pull requests @@ -112,6 +103,7 @@ If you're experiencing any issues, review some of the common questions and answe Confirm that you'd like to disconnect your repository. You should then see a new Configure a repository link in your old repository's place. Click through to the configuration page:



+ Select the GitHub, GitLab, or AzureDevOps tab and reselect your repository. That should complete the setup of the project and enable you to set up a dbt Cloud CI job. diff --git a/website/docs/docs/deploy/deploy-jobs.md b/website/docs/docs/deploy/deploy-jobs.md index 3d754beb609..e43020bf66e 100644 --- a/website/docs/docs/deploy/deploy-jobs.md +++ b/website/docs/docs/deploy/deploy-jobs.md @@ -15,13 +15,6 @@ You can use deploy jobs to build production data assets. Deploy jobs make it eas You can create a deploy job and configure it to run on [scheduled days and times](#schedule-days) or enter a [custom cron schedule](#custom-cron-schedules). -:::tip Join our beta - -dbt Labs is currently running a beta that provides improved UI updates for setting up deploy jobs. For docs, refer to [Create and schedule jobs (Beta version)](/docs/deploy/deploy-jobs?version=beta#create-and-schedule-jobs) on this page. - -If you're interested in joining our beta, please fill out our Google Form to [sign up](https://forms.gle/VxwBD1xjzouE84EQ6). - -::: ## Prerequisites @@ -32,45 +25,6 @@ If you're interested in joining our beta, please fill out our Google Form to [si ## Create and schedule jobs {#create-and-schedule-jobs} - - - -1. Create a new deploy job by clicking **Deploy** in the header, click **Jobs**, and then **Create job**. -1. Provide a job name, for example "Hourly Customer Job". -1. Under **Environment**, add the following: - * **Environment** — Link to an existing deployment environment. - * **dbt Version** — Select the dbt [version](/docs/dbt-versions/core). dbt Labs recommends inheriting the version from the environment settings. - * **Target Name** — Define the [target name](/docs/build/custom-target-names) for any dbt cloud job to correspond to settings in your project. - * **Threads** — The default value is 4 [threads](/docs/core/connect-data-platform/connection-profiles#understanding-threads). Increase the thread count to increase model execution concurrency. - -1. Define [environment variables](/docs/build/environment-variables) if you want to customize the behavior of your project. - - - -5. Under **Execution Settings**, you can configure the fields needed to execute your job: - - * **Run Timeout** — Configure the number of seconds a run will execute before dbt Cloud cancels it. Setting this to 0 means it'll never time out runs for that job. - * **Defer to a previous run state** — Select a production job you want to defer to. This enables dbt Cloud to examine the artifacts from the most recent, successful run of that deferred job, enabling state comparison and rewiring of upstream dependencies to any model that doesn’t exist in the current run's schema.  - * **Generate docs on run** checkbox — Configure the job to automatically [generate project docs](/docs/collaborate/build-and-view-your-docs) each time this job runs. - * **Run on source freshness** checkbox — Configure [dbt source freshness](/docs/deploy/source-freshness) as the first step of the job without breaking subsequent steps. - * **Commands** — Add or remove [job commands](/docs/deploy/job-commands), which are specific tasks you set in your dbt Cloud jobs. - - - -6. Under the **Triggers** section, you can configure when and how dbt will trigger the deploy job. - - * **Schedule** tab — Enable the **Run on schedule** option. Use either the [scheduled days](#schedule-days) or the [custom cron schedule](#custom-cron-schedule) method to configure your desired days, times, and intervals for running your deploy job. - * **Continuous Integration** tab — Configure [continuous integration (CI)](/docs/deploy/continuous-integration) to run when someone opens a new pull request in your dbt repository. - * **API** tab — Use the [dbt API](/docs/dbt-cloud-apis/overview) to trigger a job. - - - -7. Select **Save**, then click **Run Now** to run your deploy job. Click the run and watch its progress under **Run history**. - - - - - 1. On your deployment environment page, click **Create Job** > **Deploy Job** to create a new deploy job. 2. Options in the **Job Description** section: - **Job Name** — Specify the name for the deploy job. For example, `Daily build`. @@ -90,7 +44,7 @@ If you're interested in joining our beta, please fill out our Google Form to [si - **Environment Variables** — Define [environment variables](/docs/build/environment-variables) to customize the behavior of your project when the deploy job runs. - **Target Name** — Define the [target name](/docs/build/custom-target-names) to customize the behavior of your project when the deploy job runs. Environment variables and target names are often used interchangeably. - **Run Timeout** — Cancel the deploy job if the run time exceeds the timeout value. - - **Compare changes against an environment (Deferral)** option — By default, it’s set to **No deferral**. + - **Compare changes against** — By default, it’s set to **No deferral**. Select either **Environment** or **This Job** to let dbt Cloud know what it should compare the changes against. :::info Older versions of dbt Cloud only allow you to defer to a specific job instead of an environment. Deferral to a job compares state against the project code that was run in the deferred job's last successful run. While deferral to an environment is more efficient as dbt Cloud will compare against the project representation (which is stored in the `manifest.json`) of the last successful deploy job run that executed in the deferred environment. By considering _all_ deploy jobs that run in the deferred environment, dbt Cloud will get a more accurate, latest project representation state. @@ -101,10 +55,6 @@ If you're interested in joining our beta, please fill out our Google Form to [si - - - - ### Schedule days To set your job's schedule, use the **Schedule Days** option to choose specific days of the week, and select customized hours or intervals. @@ -148,4 +98,4 @@ Refer to the following example snippets: - [Artifacts](/docs/deploy/artifacts) - [Continuous integration (CI) jobs](/docs/deploy/ci-jobs) -- [Webhooks](/docs/deploy/webhooks) \ No newline at end of file +- [Webhooks](/docs/deploy/webhooks) diff --git a/website/docs/docs/deploy/deployment-tools.md b/website/docs/docs/deploy/deployment-tools.md index b9ab14e1c0c..80622880c2c 100644 --- a/website/docs/docs/deploy/deployment-tools.md +++ b/website/docs/docs/deploy/deployment-tools.md @@ -109,6 +109,10 @@ If your organization is using [Prefect](https://www.prefect.io/), the way you wi If your organization is using [Dagster](https://dagster.io/), you can use the [dagster_dbt](https://docs.dagster.io/_apidocs/libraries/dagster-dbt) library to integrate dbt commands into your pipelines. This library supports the execution of dbt through dbt Cloud, dbt CLI and the dbt RPC server. Running dbt from Dagster automatically aggregates metadata about your dbt runs. Refer to the [example pipeline](https://dagster.io/blog/dagster-dbt) for details. +## Kestra + +If your organization uses [Kestra](http://kestra.io/), you can leverage the [dbt plugin](https://kestra.io/plugins/plugin-dbt) to orchestrate dbt Cloud and dbt Core jobs. Kestra's user interface (UI) has built-in [Blueprints](https://kestra.io/docs/user-interface-guide/blueprints), providing ready-to-use workflows. Navigate to the Blueprints page in the left navigation menu and [select the dbt tag](https://demo.kestra.io/ui/blueprints/community?selectedTag=36) to find several examples of scheduling dbt CLI commands and dbt Cloud jobs as part of your data pipelines. After each scheduled or ad-hoc workflow execution, the Outputs tab in the Kestra UI allows you to download and preview all dbt build artifacts. The Gantt and Topology view additionally render the metadata to visualize dependencies and runtimes of your dbt models and tests. The dbt Cloud task provides convenient links to easily navigate between Kestra and dbt Cloud UI. + ## Automation servers Automation servers, like CodeDeploy, GitLab CI/CD ([video](https://youtu.be/-XBIIY2pFpc?t=1301)), Bamboo and Jenkins, can be used to schedule bash commands for dbt. They also provide a UI to view logging to the command line, and integrate with your git repository. diff --git a/website/docs/docs/deploy/job-notifications.md b/website/docs/docs/deploy/job-notifications.md index 72725a1e460..8d242abac78 100644 --- a/website/docs/docs/deploy/job-notifications.md +++ b/website/docs/docs/deploy/job-notifications.md @@ -9,10 +9,10 @@ Setting up notifications in dbt Cloud will allow you to receive alerts via Email ### Email -These are the following options for setting up email notifications: +These are the following options for setting up email notifications. Refer to [Users and licenses](/docs/cloud/manage-access/seats-and-users) for info on license types eligible for email notifications. -- As a **user** — You can set up email notifications for yourself under your Profile. -- As an **admin** — You can set up notifications on behalf of your team members. Refer to [Users and licenses](/docs/cloud/manage-access/seats-and-users) for info on license types eligible for email notifications. +- As a **user** — You can set up email notifications for yourself under your Profile. +- As an **admin** — You can set up notifications on behalf of your team members. To set up job notifications, follow these steps: diff --git a/website/docs/docs/deploy/jobs.md b/website/docs/docs/deploy/jobs.md index d66cdd87619..e8ca864d65f 100644 --- a/website/docs/docs/deploy/jobs.md +++ b/website/docs/docs/deploy/jobs.md @@ -5,25 +5,17 @@ description: "Learn about deploy jobs and continuous integration (CI) jobs in db tags: [scheduler] --- -In dbt Cloud, you can create and set up triggers for these jobs: -- [Deploy jobs](/docs/deploy/deploy-jobs) -- [Continuous integration (CI) jobs](/docs/deploy/continuous-integration) - -:::tip Join our beta - -dbt Labs is currently running a beta that provides improved UI updates for setting up deploy jobs and CI jobs. For docs on deploy jobs, refer to [Create and schedule jobs (Beta version)](/docs/deploy/deploy-jobs?version=beta#create-and-schedule-jobs). For docs on CI jobs, refer to [Set up CI jobs (Beta version)](/docs/deploy/ci-jobs?version=beta#set-up-ci-jobs). - -If you're interested in joining our beta, please fill out our Google Form to [sign up](https://forms.gle/VxwBD1xjzouE84EQ6). - -::: +In dbt Cloud, there are two types of jobs: +- [Deploy jobs](/docs/deploy/deploy-jobs) — To create and set up triggers for building production data assets +- [Continuous integration (CI) jobs](/docs/deploy/continuous-integration) — To create and set up triggers for checking code changes Below is a comparison table that describes how deploy jobs and CI jobs behave differently: | | Deploy Jobs | CI Jobs | | --- | --- | --- | -| Purpose | Builds production data assets | Builds and tests new code before merging changes into production | -| Trigger types | Triggered by a schedule or by API | Triggered by a webhook from a commit to a PR or by API | -| Destination | Builds into a production database and schema | Builds into a staging database and ephemeral schema, lived for the lifetime of the PR | -| Execution Mode | Runs execute sequentially, so as to not have collisions on the underlying DAG. | Runs execute in parallel to promote team velocity. | -| Efficiency run savings | Detects over scheduled jobs and cancels unnecessary runs to avoid queue clog. | Cancels runs when an in-flight run becomes stale when a new commit is pushed to the pull request. | -| State comparison | Only sometimes needs to detect state | Almost always needs to compare state against the production environment to build on modified code and its dependents. | +| Purpose | Builds production data assets. | Builds and tests new code before merging changes into production. | +| Trigger types | Triggered by a schedule or by API. | Triggered by a commit to a PR or by API. | +| Destination | Builds into a production database and schema. | Builds into a staging database and ephemeral schema, lived for the lifetime of the PR. | +| Execution mode | Runs execute sequentially, so as to not have collisions on the underlying DAG. | Runs execute in parallel to promote team velocity. | +| Efficiency run savings | Detects over-scheduled jobs and cancels unnecessary runs to avoid queue clog. | Cancels existing runs when a newer commit is pushed to avoid redundant work. | +| State comparison | Only sometimes needs to detect state. | Almost always needs to compare state against the production environment to build on modified code and its dependents. | \ No newline at end of file diff --git a/website/docs/docs/deploy/webhooks.md b/website/docs/docs/deploy/webhooks.md index b4ce7195363..069e7a3e283 100644 --- a/website/docs/docs/deploy/webhooks.md +++ b/website/docs/docs/deploy/webhooks.md @@ -167,7 +167,7 @@ An example of a webhook payload for an errored run: You can use the dbt Cloud API to create new webhooks that you want to subscribe to, get detailed information about your webhooks, and to manage the webhooks that are associated with your account. The following sections describe the API endpoints you can use for this. :::info Access URLs -dbt Cloud is hosted in multiple regions in the world and each region has a different access URL. People on Enterprise plans can choose to have their account hosted in any one of these regions. This section uses `cloud.getdbt.com` (which is for North America) as part of the endpoint but your access URL might be different. For a complete list of available dbt Cloud access URLs, refer to [Regions & IP addresses](/docs/cloud/about-cloud/regions-ip-addresses). +dbt Cloud is hosted in multiple regions in the world and each region has a different access URL. People on Enterprise plans can choose to have their account hosted in any one of these regions. For a complete list of available dbt Cloud access URLs, refer to [Regions & IP addresses](/docs/cloud/about-cloud/regions-ip-addresses). ::: ### List all webhook subscriptions @@ -175,12 +175,13 @@ List all webhooks that are available from a specific dbt Cloud account. #### Request ```shell -GET https://cloud.getdbt.com/api/v3/accounts/{account_id}/webhooks/subscriptions +GET https://{your access URL}/api/v3/accounts/{account_id}/webhooks/subscriptions ``` #### Path parameters | Name | Description | |------------|--------------------------------------| +| `your access URL` | The login URL for your dbt Cloud account. | | `account_id` | The dbt Cloud account the webhooks are associated with. | #### Response sample @@ -265,11 +266,12 @@ Get detailed information about a specific webhook. #### Request ```shell -GET https://cloud.getdbt.com/api/v3/accounts/{account_id}/webhooks/subscription/{webhook_id} +GET https://{your access URL}/api/v3/accounts/{account_id}/webhooks/subscription/{webhook_id} ``` #### Path parameters | Name | Description | |------------|--------------------------------------| +| `your access URL` | The login URL for your dbt Cloud account. | | `account_id` | The dbt Cloud account the webhook is associated with. | | `webhook_id` | The webhook you want detailed information on. | @@ -322,7 +324,7 @@ Create a new outbound webhook and specify the endpoint URL that will be subscrib #### Request sample ```shell -POST https://cloud.getdbt.com/api/v3/accounts/{account_id}/webhooks/subscriptions +POST https://{your access URL}/api/v3/accounts/{account_id}/webhooks/subscriptions ``` ```json @@ -344,6 +346,7 @@ POST https://cloud.getdbt.com/api/v3/accounts/{account_id}/webhooks/subscription #### Path parameters | Name | Description | | --- | --- | +| `your access URL` | The login URL for your dbt Cloud account. | | `account_id` | The dbt Cloud account the webhook is associated with. | #### Request parameters @@ -407,7 +410,7 @@ Update the configuration details for a specific webhook. #### Request sample ```shell -PUT https://cloud.getdbt.com/api/v3/accounts/{account_id}/webhooks/subscription/{webhook_id} +PUT https://{your access URL}/api/v3/accounts/{account_id}/webhooks/subscription/{webhook_id} ``` ```json @@ -429,6 +432,7 @@ PUT https://cloud.getdbt.com/api/v3/accounts/{account_id}/webhooks/subscription/ #### Path parameters | Name | Description | |------------|--------------------------------------| +| `your access URL` | The login URL for your dbt Cloud account. | | `account_id` | The dbt Cloud account the webhook is associated with. | | `webhook_id` | The webhook you want to update. | @@ -491,12 +495,13 @@ Test a specific webhook. #### Request ```shell -GET https://cloud.getdbt.com/api/v3/accounts/{account_id}/webhooks/subscription/{webhook_id}/test +GET https://{your access URL}/api/v3/accounts/{account_id}/webhooks/subscription/{webhook_id}/test ``` #### Path parameters | Name | Description | |------------|--------------------------------------| +| `your access URL` | The login URL for your dbt Cloud account. | | `account_id` | The dbt Cloud account the webhook is associated with. | | `webhook_id` | The webhook you want to test. | @@ -518,12 +523,13 @@ Delete a specific webhook. #### Request ```shell -DELETE https://cloud.getdbt.com/api/v3/accounts/{account_id}/webhooks/subscription/{webhook_id} +DELETE https://{your access URL}/api/v3/accounts/{account_id}/webhooks/subscription/{webhook_id} ``` #### Path parameters | Name | Description | |------------|--------------------------------------| +| `your access URL` | The login URL for your dbt Cloud account. | | `account_id` | The dbt Cloud account the webhook is associated with. | | `webhook_id` | The webhook you want to delete. | diff --git a/website/docs/docs/supported-data-platforms.md b/website/docs/docs/supported-data-platforms.md index d1f88536693..8ac782991c8 100644 --- a/website/docs/docs/supported-data-platforms.md +++ b/website/docs/docs/supported-data-platforms.md @@ -6,78 +6,35 @@ description: "Connect dbt to any data platform in dbt Cloud or dbt Core, using a hide_table_of_contents: true --- -dbt connects to and runs SQL against your database, warehouse, lake, or query engine. These SQL-speaking platforms are collectively referred to as _data platforms_. dbt connects with data platforms by using a dedicated adapter plugin for each. Plugins are built as Python modules that dbt Core discovers if they are installed on your system. Read [What are Adapters](/guides/dbt-ecosystem/adapter-development/1-what-are-adapters) for more info. +dbt connects to and runs SQL against your database, warehouse, lake, or query engine. These SQL-speaking platforms are collectively referred to as _data platforms_. dbt connects with data platforms by using a dedicated adapter plugin for each. Plugins are built as Python modules that dbt Core discovers if they are installed on your system. Read [What are Adapters](/guides/dbt-ecosystem/adapter-development/1-what-are-adapters) for more info. -You can [connect](/docs/connect-adapters) to adapters and data platforms either directly in the dbt Cloud user interface (UI) or install them manually using the command line (CLI). There are two types of adapters available and to evaluate quality and maintenance, we recommend you consider their verification status. You can also [further configure](/reference/resource-configs/postgres-configs) your specific data platform to optimize performance. +You can [connect](/docs/connect-adapters) to adapters and data platforms either directly in the dbt Cloud user interface (UI) or install them manually using the command line (CLI). -- **Verified** — dbt Labs' strict [adapter program](/guides/dbt-ecosystem/adapter-development/7-verifying-a-new-adapter) assures users of trustworthy, tested, and regularly updated adapters for production use. Verified adapters earn a "Verified" status, providing users with trust and confidence. -- **Community** — [Community adapters](/docs/community-adapters) are open-source and maintained by community members. +You can also further customize how dbt works with your specific data platform via configuration: see [Configuring Postgres](/reference/resource-configs/postgres-configs) for an example. + +## Types of Adapters + +There are three types of adapters available today: + +- **Verified** — [Verified adapters](verified-adapters) are those that have completed a rigorous verification process in collaboration with dbt Labs. +- **Trusted** — [Trusted adapters](trusted-adapters) are those where the adapter maintainers have agreed to meet a higher standard of quality. +- **Community** — [Community adapters](community-adapters) are open-source and maintained by community members. ### Verified adapters The following are **Verified adapters** ✓ you can connect to either in dbt Cloud or dbt Core: -
- - - - - - - - - - - - - - - - - - - - - - - -
- -
-* Install these adapters using the CLI as they're not currently supported in dbt Cloud.
+import AdaptersVerified from '/snippets/_adapters-verified.md'; + + + +### Trusted adapters + +The following are **Trusted adapters** ✓ you can connect to in dbt Core: + +import AdaptersTrusted from '/snippets/_adapters-trusted.md'; + + + +
* Install these adapters using the CLI as they're not currently supported in dbt Cloud.
+ diff --git a/website/docs/docs/trusted-adapters.md b/website/docs/docs/trusted-adapters.md new file mode 100644 index 00000000000..e19bb40785f --- /dev/null +++ b/website/docs/docs/trusted-adapters.md @@ -0,0 +1,41 @@ +--- +title: "Trusted adapters" +id: "trusted-adapters" +hide_table_of_contents: true +--- + +Trusted adapters are adapters not maintained by dbt Labs, that we feel comfortable recommending to users for use in production. + +Free and open-source tools for the data professional are increasingly abundant. This is by-and-large a *good thing*, however it requires due dilligence that wasn't required in a paid-license, closed-source software world. As a user, there are questions to answer important before taking a dependency on an open-source project. The trusted adapter designation is meant to streamline this process for end users. + +
Considerations for depending on an open-source project + +1. Does it work? +2. Does anyone "own" the code, or is anyone liable for ensuring it works? +3. Do bugs get fixed quickly? +4. Does it stay up-to-date with new Core features? +5. Is the usage substantial enough to self-sustain? +pendency on this library? + +
+ +### Trusted adapter specifications + +See [Building a Trusted Adapter](/guides/dbt-ecosystem/adapter-development/8-building-a-trusted-adapter) for more information, particularly if you are an adapter maintainer considering having your adapter be added to the trusted list. + +### Trusted vs Verified + +The Verification program exists to highlight adapters that meets both of the following criteria: + +- the guidelines given in the Trusted program, +- formal agreements required for integration with dbt Cloud + +For more information on the Verified Adapter program, reach out the [dbt Labs partnerships team](mailto:partnerships@dbtlabs.com) + +### Trusted adapters + +The following are **Trusted adapters** ✓ you can connect to in dbt Core: + +import AdaptersTrusted from '/snippets/_adapters-trusted.md'; + + diff --git a/website/docs/docs/verified-adapters.md b/website/docs/docs/verified-adapters.md index 9604d05391c..8ec0c700ea4 100644 --- a/website/docs/docs/verified-adapters.md +++ b/website/docs/docs/verified-adapters.md @@ -4,27 +4,16 @@ id: "verified-adapters" --- -The dbt Labs has a rigorous verified adapter program which provides reassurance to users about which adapters can be trusted to use in production, has been tested, and is actively maintained and updated. The process covers aspects of development, documentation, user experience, and maintenance. +The dbt Labs has a rigorous verified adapter program which provides reassurance to users about which adapters can be trusted to use in production, has been tested, and is actively maintained and updated. The process covers aspects of development, documentation, user experience, and maintenance. These adapters then earn a "Verified" status so that users can have a certain level of trust and expectation when they use them. The adapters also have maintainers and we recommend using the adapter's verification status to determine its quality and health. -Here's the list of the verified data platforms that can connect to dbt and its latest version. - -| dbt Cloud setup | CLI installation | latest verified version | -| ---------------- | ----------------------------------------- | ------------------------ | -| [Setup AlloyDB](/docs/cloud/connect-data-platform/connect-redshift-postgresql-alloydb) | [Install AlloyDB](/docs/core/connect-data-platform/alloydb-setup) | (same as `dbt-postgres`) | -| Not supported | [Install Azure Synapse](/docs/core/connect-data-platform/azuresynapse-setup) | 1.3 :construction: | -| [Set up BigQuery](/docs/cloud/connect-data-platform/connect-bigquery) | [Install BigQuery](/docs/core/connect-data-platform/bigquery-setup) | 1.4 | -| [Set up Databricks ](/docs/cloud/connect-data-platform/connect-databricks)| [ Install Databricks](/docs/core/connect-data-platform/databricks-setup) | 1.4 | -| Not supported | [Install Dremio](/docs/core/connect-data-platform/dremio-setup) | 1.4 :construction: | -| [Set up Postgres](/docs/cloud/connect-data-platform/connect-redshift-postgresql-alloydb) | [Install Postgres](/docs/core/connect-data-platform/postgres-setup) | 1.4 | -| [Set up Redshift](/docs/cloud/connect-data-platform/connect-redshift-postgresql-alloydb) | [Install Redshift](/docs/core/connect-data-platform/redshift-setup) | 1.4 | -| [Set up Snowflake](/docs/cloud/connect-data-platform/connect-snowflake) | [ Install Snowflake](/docs/core/connect-data-platform/snowflake-setup) | 1.4 | -| [Set up Spark](/docs/cloud/connect-data-platform/connect-apache-spark) | [Install Spark](/docs/core/connect-data-platform/spark-setup) | 1.4 | -| [Set up Starburst & Trino](/docs/cloud/connect-data-platform/connect-starburst-trino)| [Installl Starburst & Trino](/docs/core/connect-data-platform/trino-setup) | 1.4 | - -:construction:: Verification in progress +The verification process serves as the on-ramp to integration with dbt Cloud. As such, we restrict applicants to data platform vendors with whom we are already engaged. To learn more, see [Verifying a new adapter](/guides/dbt-ecosystem/adapter-development/7-verifying-a-new-adapter). +Here's the list of the verified data platforms that can connect to dbt and its latest version. + +import AdaptersVerified from '/snippets/_adapters-verified.md'; + diff --git a/website/docs/faqs/Accounts/cloud-upgrade-instructions.md b/website/docs/faqs/Accounts/cloud-upgrade-instructions.md index 76d03870478..f8daf393f9b 100644 --- a/website/docs/faqs/Accounts/cloud-upgrade-instructions.md +++ b/website/docs/faqs/Accounts/cloud-upgrade-instructions.md @@ -38,7 +38,7 @@ To unlock your account and select a plan, review the following guidance per plan 2. To unlock your account and continue using the Team plan, you need to enter your payment details. 3. Go to **Payment Information** and click **Edit** on the right. 4. Enter your payment details and click **Save**. -5. This automatically unlocks your dbt Cloud account, and you can now enjoy the benefits of the Team plan. 🎉 +5. This automatically unlocks your dbt Cloud account, and you can now enjoy the benefits of the Team plan. 🎉 @@ -59,7 +59,7 @@ For commonly asked billings questions, refer to the dbt Cloud [pricing page](htt
How does billing work?
-
Team plans are billed monthly on the credit card used to sign up, based on developer seat count. You’ll also be sent a monthly receipt to the billing email of your choice. You can change any billing information in your Account Settings -> Billing page.



+
Team plans are billed monthly on the credit card used to sign up, based on [developer seat count and usage](/docs/cloud/billing). You’ll also be sent a monthly receipt to the billing email of your choice. You can change any billing information in your Account Settings > Billing page.



Enterprise plan customers are billed annually based on the number of developer seats, as well as any additional services + features in your chosen plan.
@@ -75,7 +75,7 @@ For commonly asked billings questions, refer to the dbt Cloud [pricing page](htt
Can I pay by invoice?
-
At present, dbt Cloud Team plan payments must be made via credit card, and by default they will be billed monthly based on the number of developer seats.



+
Currently, dbt Cloud Team plan payments must be made with a credit card, and by default they will be billed monthly based on the number of [developer seats and usage](/docs/cloud/billing).



We don’t have any plans to do invoicing for Team plan accounts in the near future, but we do currently support invoices for companies on the dbt Cloud Enterprise plan. Feel free to contact us to build your Enterprise pricing plan.
diff --git a/website/docs/faqs/Accounts/payment-accepted.md b/website/docs/faqs/Accounts/payment-accepted.md index 2e26063c684..c0e949833a2 100644 --- a/website/docs/faqs/Accounts/payment-accepted.md +++ b/website/docs/faqs/Accounts/payment-accepted.md @@ -5,6 +5,6 @@ sidebar_label: 'Can I pay invoice' id: payment-accepted --- -Presently for Team plans, self-service dbt Cloud payments must be made via credit card and by default, they will be billed monthly based on the number of active developer seats. +Currently for Team plans, self-service dbt Cloud payments must be made with a credit card and by default, they will be billed monthly based on the number of [active developer seats and usage](/docs/cloud/billing). We don't have any plans to do invoicing for self-service teams in the near future, but we *do* currently support invoices for companies on the **dbt Cloud Enterprise plan.** Feel free to [contact us](https://www.getdbt.com/contact) to build your Enterprise pricing. diff --git a/website/docs/faqs/Core/install-python-compatibility.md b/website/docs/faqs/Core/install-python-compatibility.md index d24466f4990..4d6066d931b 100644 --- a/website/docs/faqs/Core/install-python-compatibility.md +++ b/website/docs/faqs/Core/install-python-compatibility.md @@ -23,12 +23,6 @@ The latest version of `dbt-core` is compatible with Python versions 3.7, 3.8, 3. - - -As of v1.0, `dbt-core` is compatible with Python versions 3.7, 3.8, and 3.9. - - - Adapter plugins and their dependencies are not always compatible with the latest version of Python. For example, dbt-snowflake v0.19 is not compatible with Python 3.9, but dbt-snowflake versions 0.20+ are. New dbt minor versions will add support for new Python3 minor versions as soon as all dependencies can support it. In turn, dbt minor versions will drop support for old Python3 minor versions right before they reach [end of life](https://endoflife.date/python). diff --git a/website/docs/faqs/Models/available-materializations.md b/website/docs/faqs/Models/available-materializations.md index 25ba745a2b2..011d3ba3fb0 100644 --- a/website/docs/faqs/Models/available-materializations.md +++ b/website/docs/faqs/Models/available-materializations.md @@ -5,6 +5,7 @@ sidebar_label: 'Materializations available' id: available-materializations --- -dbt ships with four materializations: `view`, `table`, `incremental` and `ephemeral`. Check out the documentation on [materializations](/docs/build/materializations) for more information on each of these options. +dbt ships with five materializations: `view`, `table`, `incremental`, `ephemeral` and `materialized_view`. +Check out the documentation on [materializations](/docs/build/materializations) for more information on each of these options. You can also create your own [custom materializations](/guides/advanced/creating-new-materializations), if required however this is an advanced feature of dbt. diff --git a/website/docs/guides/best-practices/how-we-build-our-metrics/semantic-layer-2-setup.md b/website/docs/guides/best-practices/how-we-build-our-metrics/semantic-layer-2-setup.md index 7861767e25d..34c0e813725 100644 --- a/website/docs/guides/best-practices/how-we-build-our-metrics/semantic-layer-2-setup.md +++ b/website/docs/guides/best-practices/how-we-build-our-metrics/semantic-layer-2-setup.md @@ -23,8 +23,8 @@ We'll use pip to install MetricFlow and our dbt adapter: python -m venv [virtual environment name] source [virtual environment name]/bin/activate # install dbt and MetricFlow -pip install dbt-metricflow[adapter name] -# e.g. dbt-metricflow[snowflake] +pip install "dbt-metricflow[adapter name]" +# e.g. pip install "dbt-metricflow[snowflake]" ``` Lastly, to get to the pre-Semantic Layer starting state, checkout the `start-here` branch. diff --git a/website/docs/guides/best-practices/how-we-build-our-metrics/semantic-layer-3-build-semantic-models.md b/website/docs/guides/best-practices/how-we-build-our-metrics/semantic-layer-3-build-semantic-models.md index 2c2122572b8..a2dc55e37ae 100644 --- a/website/docs/guides/best-practices/how-we-build-our-metrics/semantic-layer-3-build-semantic-models.md +++ b/website/docs/guides/best-practices/how-we-build-our-metrics/semantic-layer-3-build-semantic-models.md @@ -148,7 +148,9 @@ from source ```YAML dimensions: - - name: date_trunc('day', ordered_at) + - name: ordered_at + expr: date_trunc('day', ordered_at) + # use date_trunc(ordered_at, DAY) if using [BigQuery](/docs/build/dimensions#time) type: time type_params: time_granularity: day @@ -166,7 +168,9 @@ We'll discuss an alternate situation, dimensional tables that have static numeri ```YAML ... dimensions: - - name: date_trunc('day', ordered_at) + - name: ordered_at + expr: date_trunc('day', ordered_at) + # use date_trunc(ordered_at, DAY) if using BigQuery type: time type_params: time_granularity: day @@ -254,6 +258,8 @@ semantic_models: dimensions: - name: ordered_at + expr: date_trunc('day', ordered_at) + # use date_trunc(ordered_at, DAY) if using BigQuery type: time type_params: time_granularity: day diff --git a/website/docs/guides/dbt-ecosystem/adapter-development/5-documenting-a-new-adapter.md b/website/docs/guides/dbt-ecosystem/adapter-development/5-documenting-a-new-adapter.md index f8335dfcbc4..80b994aefb0 100644 --- a/website/docs/guides/dbt-ecosystem/adapter-development/5-documenting-a-new-adapter.md +++ b/website/docs/guides/dbt-ecosystem/adapter-development/5-documenting-a-new-adapter.md @@ -8,6 +8,7 @@ If you've already [built](3-building-a-new-adapter), and [tested](4-testing-a-ne ## Making your adapter available Many community members maintain their adapter plugins under open source licenses. If you're interested in doing this, we recommend: + - Hosting on a public git provider (for example, GitHub or Gitlab) - Publishing to [PyPI](https://pypi.org/) - Adding to the list of ["Supported Data Platforms"](/docs/supported-data-platforms#community-supported) (more info below) @@ -35,17 +36,12 @@ We ask our adapter maintainers to use the [docs.getdbt.com repo](https://github. To simplify things, assume the reader of this documentation already knows how both dbt and your data platform works. There's already great material for how to learn dbt and the data platform out there. The documentation we're asking you to add should be what a user who is already profiecient in both dbt and your data platform would need to know in order to use both. Effectively that boils down to two things: how to connect, and how to configure. - ## Topics and Pages to Cover - The following subjects need to be addressed across three pages of this docs site to have your data platform be listed on our documentation. After the corresponding pull request is merged, we ask that you link to these pages from your adapter repo's `REAMDE` as well as from your product documentation. To contribute, all you will have to do make the changes listed in the table below. - - - | How To... | File to change within `/website/docs/` | Action | Info to Include | |----------------------|--------------------------------------------------------------|--------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | Connect | `/docs/core/connect-data-platform/{MY-DATA-PLATFORM}-setup.md` | Create | Give all information needed to define a target in `~/.dbt/profiles.yml` and get `dbt debug` to connect to the database successfully. All possible configurations should be mentioned. | @@ -55,7 +51,6 @@ The following subjects need to be addressed across three pages of this docs site For example say I want to document my new adapter: `dbt-ders`. For the "Connect" page, I will make a new Markdown file, `ders-setup.md` and add it to the `/website/docs/core/connect-data-platform/` directory. - ## Example PRs to add new adapter documentation Below are some recent pull requests made by partners to document their data platform's adapter: diff --git a/website/docs/guides/dbt-ecosystem/adapter-development/8-building-a-trusted-adapter.md b/website/docs/guides/dbt-ecosystem/adapter-development/8-building-a-trusted-adapter.md new file mode 100644 index 00000000000..9783ec66460 --- /dev/null +++ b/website/docs/guides/dbt-ecosystem/adapter-development/8-building-a-trusted-adapter.md @@ -0,0 +1,79 @@ +--- +title: "Building a Trusted Adapter" +id: "8-building-a-trusted-adapter" +--- + +The Trusted adapter program exists to allow adapter maintainers to demonstrate to the dbt community that your adapter is trusted to be used in production. + +## What does it mean to be trusted + +By opting into the below, you agree to this, and we take you at your word. dbt Labs reserves the right to remove an adapter from the trusted adapter list at any time, should any of the below guidelines not be met. + +### Feature Completeness + +To be considered for the Trusted Adapter program, the adapter must cover the essential functionality of dbt Core given below, with best effort given to support the entire feature set. + +Essential functionality includes (but is not limited to the following features): + +- table, view, and seed materializations +- dbt tests + +The adapter should have the required documentation for connecting and configuring the adapter. The dbt docs site should be the single source of truth for this information. These docs should be kept up-to-date. + +See [Documenting a new adapter](/guides/dbt-ecosystem/adapter-development/5-documenting-a-new-adapter) for more information. + +### Release Cadence + +Keeping an adapter up-to-date with dbt Core is an integral part of being a trusted adapter. Therefore, we ask that adapter maintainers: + +- Release of new minor versions of the adapter with all tests passing within four weeks of dbt Core's release cut. +- Release of new major versions of the adapter with all tests passing within eight weeks of dbt Core's release cut. + +### Community Responsiveness + +On a best effort basis, active participation and engagement with the dbt Community across the following forums: + +- Being responsive to feedback and supporting user enablement in dbt Community’s Slack workspace +- Responding with comments to issues raised in public dbt adapter code repository +- Merging in code contributions from community members as deemed appropriate + +### Security Practices + +Trusted adapters will not do any of the following: + +- Output to logs or file either access credentials information to or data from the underlying data platform itself. +- Make API calls other than those expressly required for using dbt features (adapters may not add additional logging) +- Obfuscate code and/or functionality so as to avoid detection + +Additionally, to avoid supply-chain attacks: + +- Use an automated service to keep Python dependencies up-to-date (such as Dependabot or similar), +- Publish directly to PyPI from the dbt adapter code repository by using trusted CI/CD process (such as GitHub actions) +- Restrict admin access to both the respective code (GitHub) and package (PyPI) repositories +- Identify and mitigate security vulnerabilities by use of a static code analyzing tool (such as Snyk) as part of a CI/CD process + +### Other considerations + +The adapter repository is: + +- open-souce licensed, +- published to PyPI, and +- automatically tests the codebase against dbt Lab's provided adapter test suite + +## How to get an adapter verified? + +Open an issue on the [docs.getdbt.com GitHub repository](https://github.com/dbt-labs/docs.getdbt.com) using the "Add adapter to Trusted list" template. In addition to contact information, it will ask confirm that you agree to the following. + +1. my adapter meet the guidelines given above +2. I will make best reasonable effort that this continues to be so +3. checkbox: I acknowledge that dbt Labs reserves the right to remove an adapter from the trusted adapter list at any time, should any of the above guidelines not be met. + +The approval workflow is as follows: + +1. create and populate the template-created issue +2. dbt Labs will respond as quickly as possible (maximally four weeks, though likely faster) +3. If approved, dbt Labs will create and merge a Pull request to formally add the adapter to the list. + +## How to get help with my trusted adapter? + +Ask your question in #adapter-ecosystem channel of the community Slack. diff --git a/website/docs/guides/dbt-ecosystem/sl-partner-integration-guide.md b/website/docs/guides/dbt-ecosystem/sl-partner-integration-guide.md index decad95a516..20e2d8fce71 100644 --- a/website/docs/guides/dbt-ecosystem/sl-partner-integration-guide.md +++ b/website/docs/guides/dbt-ecosystem/sl-partner-integration-guide.md @@ -4,7 +4,6 @@ id: "sl-partner-integration-guide" description: Learn about partner integration guidelines, roadmap, and connectivity. --- - import NewChanges from '/snippets/_new-sl-changes.md'; @@ -21,7 +20,7 @@ This is an evolving guide that is meant to provide recommendations based on our To build a dbt Semantic Layer integration: -- Initially, we recommend building an integration with the [JDBC](/docs/dbt-cloud-apis/sl-jdbc) followed by enhancements of additional features. Refer to the dedicated [dbt Semantic Layer API](/docs/dbt-cloud-apis/sl-api-overview) for more technical integration details. +- We offer a [JDBC](/docs/dbt-cloud-apis/sl-jdbc) API (and will soon offer a GraphQL API). Refer to the dedicated [dbt Semantic Layer API](/docs/dbt-cloud-apis/sl-api-overview) for more technical integration details. - Familiarize yourself with the [dbt Semantic Layer](/docs/use-dbt-semantic-layer/dbt-sl) and [MetricFlow](/docs/build/about-metricflow)'s key concepts. There are two main objects: @@ -30,16 +29,14 @@ To build a dbt Semantic Layer integration: ### Connection parameters -The dbt Semantic Layer authenticates with `environmentId`, `SERVICE_TOKEN`, and `host`. +The dbt Semantic Layer APIs authenticate with `environmentId`, `SERVICE_TOKEN`, and `host`. -This applies to the dbt Semantic Layer APIs, which all currently use different host names. We recommend you provide users with separate input fields with these components (which dbt Cloud provides). +We recommend you provide users with separate input fields with these components for authentication (dbt Cloud will surface these parameters for the user). -For [JDBC](/docs/dbt-cloud-apis/sl-jdbc), you can construct the JDBC URL from these inputs. Or, you could request the full URL string. +## Best practices on exposing metrics -## Best practices on exposing metrics: - -Best practices for exposing metrics is summarized into five themes: +Best practices for exposing metrics are summarized into five themes: - [Governance](#governance-and-traceability) — Recommendations on how to establish guardrails for governed data work. - [Discoverability](#discoverability) — Recommendations on how to make user-friendly data interactions. @@ -51,9 +48,9 @@ Best practices for exposing metrics is summarized into five themes: When working with more governed data, it's essential to establish clear guardrails. Here are some recommendations: -- **Aggregations control** — Users shouldn't generally be allowed to modify aggregations unless they are performing post-processing calculations on data from the Semantic Layer (such as year over year analysis). +- **Aggregations control** — Users shouldn't generally be allowed to modify aggregations unless they perform post-processing calculations on Semantic Layer data (such as year-over-year analysis). -- **Time series alignment and using metric_time** — Make sure users view metrics across the correct time series. When displaying metric graphs, using a non-default time aggregation dimension might lead to misleading interpretations. While users can still group by other time dimensions, they should be careful not to create trend lines with incorrect time axes.

When looking at one or multiple metrics, users should use `metric_time` as the main time dimension to guarantee they are looking at the right time series for the metric(s).

As such, when building an application, we recommend exposing `metric_time` as a separate, "special" time dimension on its own. This dimension is always going to align with all metrics and be common across them. Other time dimensions can still be looked at and grouped by, but having a clear delineation between the `metric_time` dimension and the other time dimensions is clarifying so that people do not confuse how metrics should be plotted.

Also, when a user requests a time granularity change for the main time series, the query that your application runs should use `metric_time` as this will always give you the correct slice. Note that when looking at a single metric, the primary time dimension and `metric_time` are equivalent. +- **Time series alignment and using metric_time** — Make sure users view metrics across the correct time series. When displaying metric graphs, using a non-default time aggregation dimension might lead to misleading interpretations. While users can still group by other time dimensions, they should be careful not to create trend lines with incorrect time axes.

When looking at one or multiple metrics, users should use `metric_time` as the main time dimension to guarantee they are looking at the right time series for the metric(s).

As such, when building an application, we recommend exposing `metric_time` as a separate, "special" time dimension on its own. This dimension is always going to align with all metrics and be common across them. Other time dimensions can still be looked at and grouped by, but having a clear delineation between the `metric_time` dimension and the other time dimensions is clarifying so that people do not confuse how metrics should be plotted.

Also, when a user requests a time granularity change for the main time series, the query that your application runs should use `metric_time` as this will always give you the correct slice. Related to this, we also strongly recommend that you have a way to expose what dimension `metric_time` actually maps to for users who may not be familiar. Our APIs allow you to fetch the actual underlying time dimensions that makeup metric_time (such as `transaction_date`) so you can expose them to your users. - **Units consistency** — If units are supported, it's vital to avoid plotting data incorrectly with different units. Ensuring consistency in unit representation will prevent confusion and misinterpretation of the data. @@ -78,25 +75,25 @@ By implementing these recommendations, the data interaction process becomes more We recommend organizing metrics and dimensions in ways that a non-technical user can understand the data model, without needing much context: -- **Organizing Dimensions** — To help non-technical users understand the data model better, we recommend organizing dimensions based on the entity they originated from. For example, consider dimensions like `user__country` and `product__category`.

You can create groups by extracting `user` and `product` and then nest the respective dimensions under each group. This way, dimensions align with the entity or semantic model they belong to and makes them them more user-friendly and accessible. +- **Organizing Dimensions** — To help non-technical users understand the data model better, we recommend organizing dimensions based on the entity they originated from. For example, consider dimensions like `user__country` and `product__category`.

You can create groups by extracting `user` and `product` and then nest the respective dimensions under each group. This way, dimensions align with the entity or semantic model they belong to and make them more user-friendly and accessible. -- **Organizing Metrics** — The goal is to organize metrics into a hierarchy in our configurations, instead of presenting them in a long list.

This hierarchy helps you organize metrics based on a specific criteria, such as business unit or team. By providing this structured organization, users can find and navigate metrics more efficiently, enhancing their overall data analysis experience. +- **Organizing Metrics** — The goal is to organize metrics into a hierarchy in our configurations, instead of presenting them in a long list.

This hierarchy helps you organize metrics based on specific criteria, such as business unit or team. By providing this structured organization, users can find and navigate metrics more efficiently, enhancing their overall data analysis experience. ### Query flexibility Allow users to query either one metric alone without dimensions or multiple metrics with dimensions. -- Allow toggling between metrics / dimensions seamlessly. +- Allow toggling between metrics/dimensions seamlessly. - Be clear on exposing what dimensions are queryable with what metrics and hide things that don’t apply, and vice versa. - Only expose time granularities (monthly, daily, yearly) that match the available metrics. * For example, if a dbt model and its resulting semantic model have a monthly granularity, make sure querying data with a 'daily' granularity isn't available to the user. Our APIs have functionality that will help you surface the correct granularities -- We recommend that time granularity is treated as a general time dimension-specific concept and that it can be applied to more than just the primary aggregation (or `metric_time`). Consider a situation where a user wants to look at `sales` over time by `customer signup month`; in this situation, having the ability able to apply granularities to both time dimensions is crucial. Note: initially, as a starting point, it makes sense to only support `metric_time` or the primary time dimension, but we recommend expanding that as your solution evolves. +- We recommend that time granularity is treated as a general time dimension-specific concept and that it can be applied to more than just the primary aggregation (or `metric_time`). Consider a situation where a user wants to look at `sales` over time by `customer signup month`; in this situation, having the ability to apply granularities to both time dimensions is crucial. Our APIs include information to fetch the granularities for the primary (metric_time) dimensions, as well as all time dimensions. You can treat each time dimension and granularity selection independently in your application. Note: Initially, as a starting point, it makes sense to only support `metric_time` or the primary time dimension, but we recommend expanding that as your solution evolves. - You should allow users to filter on date ranges and expose a calendar and nice presets for filtering these. - * For example: last 30 days, last week etc. + * For example, last 30 days, last week, and so on. ### Context and interpretation @@ -112,12 +109,7 @@ For better analysis, it's best to have the context of the metrics close to where - Allow for creating other metadata that’s useful for the metric. We can provide some of this information in our configuration (Display name, Default Granularity for View, Default Time range), but there may be other metadata that your tool wants to provide to make the metric richer. -### A note on transparency and using explain - -For transparency and additional context, we recommend you have an easy way for the user to obtain the SQL that MetricFlow generates. You can do this by appending `explain=True` to any query. This is incredibly powerful because we want to be very transparent to the user about what we're doing and do not want to be a black box. This would be mostly a power user / technical user functionality. - - -### Example stages of an integration +## Example stages of an integration These are recommendations on how to evolve a Semantic Layer integration and not a strict runbook. @@ -125,7 +117,7 @@ These are recommendations on how to evolve a Semantic Layer integration and not * Supporting and using the new [JDBC](/docs/dbt-cloud-apis/sl-jdbc) is the first step. Refer to the [dbt Semantic Layer API](/docs/dbt-cloud-apis/sl-api-overview) for more technical details. **Stage 2 - More discoverability and basic querying** -* Support listing metrics defined in project +* Support listing metrics defined in the project * Listing available dimensions based on one or many metrics * Querying defined metric values on their own or grouping by available dimensions * Display metadata from [Discovery API](/docs/dbt-cloud-apis/discovery-api) and other context @@ -133,7 +125,7 @@ These are recommendations on how to evolve a Semantic Layer integration and not **Stage 3 - More querying flexibility and better user experience (UX)** * More advanced filtering * Time filters with good presets/calendar UX - * Filtering metrics on pre-populated set of dimensions values + * Filtering metrics on a pre-populated set of dimension values * Make dimension values more user-friendly by organizing them effectively * Intelligent filtering of metrics based on available dimensions and vice versa @@ -144,7 +136,14 @@ These are recommendations on how to evolve a Semantic Layer integration and not * Querying dimensions without metrics and other more advanced querying functionality * Suggest metrics to users based on teams/identity, and so on. -
+### A note on transparency and using explain + +For transparency and additional context, we recommend you have an easy way for the user to obtain the SQL that MetricFlow generates. Depending on what API you are using, you can do this by using our explain parameter (JDBC) or compileSQL mutation (GraphQL). This is incredibly powerful because we want to be very transparent to the user about what we're doing and do not want to be a black box. This would be mostly beneficial to a technical user. + + +### A note on where filters + +In the cases where our APIs support either a string or a filter list for the `where` clause, we always recommend that your application utilizes the filter list in order to gain maximum pushdown benefits. The `where` string may be more intuitive for users writing queries during testing, but it will not have the performance benefits of the filter list in a production environment. ## Related docs diff --git a/website/docs/guides/legacy/best-practices.md b/website/docs/guides/legacy/best-practices.md index 018d48ba181..10e02271518 100644 --- a/website/docs/guides/legacy/best-practices.md +++ b/website/docs/guides/legacy/best-practices.md @@ -159,12 +159,6 @@ dbt test --select result:fail --exclude --defer --state path/to/p > Note: If you're using the `--state target/` flag, `result:error` and `result:fail` flags can only be selected concurrently(in the same command) if using the `dbt build` command. `dbt test` will overwrite the `run_results.json` from `dbt run` in a previous command invocation. - - -Only supported by v1.1 or newer. - - - Only supported by v1.1 or newer. diff --git a/website/docs/guides/migration/versions/00-upgrading-to-v1.7.md b/website/docs/guides/migration/versions/00-upgrading-to-v1.7.md new file mode 100644 index 00000000000..036c734dfb1 --- /dev/null +++ b/website/docs/guides/migration/versions/00-upgrading-to-v1.7.md @@ -0,0 +1,24 @@ +--- +title: "Upgrading to v1.7 (beta)" +description: New features and changes in dbt Core v1.7 +--- + +## Resources + +- [Changelog](https://github.com/dbt-labs/dbt-core/blob/8aaed0e29f9560bc53d9d3e88325a9597318e375/CHANGELOG.md) +- [CLI Installation guide](/docs/core/installation) +- [Cloud upgrade guide](/docs/dbt-versions/upgrade-core-in-cloud) +- [Release schedule](https://github.com/dbt-labs/dbt-core/issues/7481) + +## What to know before upgrading + +dbt Labs is committed to providing backward compatibility for all versions 1.x, with the exception of any changes explicitly mentioned below. If you encounter an error upon upgrading, please let us know by [opening an issue](https://github.com/dbt-labs/dbt-core/issues/new). + +### Behavior changes + +**COMING SOON** + +### Quick hits + +**COMING SOON** + diff --git a/website/docs/guides/orchestration/set-up-ci/2-quick-setup.md b/website/docs/guides/orchestration/set-up-ci/2-quick-setup.md index 89e04daa8df..9b6d46fe2b2 100644 --- a/website/docs/guides/orchestration/set-up-ci/2-quick-setup.md +++ b/website/docs/guides/orchestration/set-up-ci/2-quick-setup.md @@ -4,13 +4,10 @@ slug: in-15-minutes description: Find issues before they are deployed to production with dbt Cloud's Slim CI. --- -:::tip Join the beta - -dbt Labs is currently running a beta that provides improved UI updates for setting up CI jobs. For docs, refer to [Set up CI jobs (Beta version)](/docs/deploy/ci-jobs?version=beta#set-up-ci-jobs). This guide assumes you are using the improvements available in the beta. - -If you're interested in joining our beta, please fill out our Google Form to [sign up](https://forms.gle/VxwBD1xjzouE84EQ6). +In this guide, we're going to add a **CI environment**, where proposed changes can be validated in the context of the entire project without impacting production systems. We will use a single set of deployment credentials (like the Prod environment), but models are built in a separate location to avoid impacting others (like the Dev environment). -::: +Your git flow will look like this: + ## Prerequisites @@ -19,11 +16,6 @@ As part of your initial dbt Cloud setup, you should already have Development and - Your **Development environment** powers the IDE. Each user has individual credentials, and builds into an individual dev schema. Nothing you do here impacts any of your colleagues. - Your **Production environment** brings the canonical version of your project to life for downstream consumers. There is a single set of deployment credentials, and everything is built into your production schema(s). -In this guide, we're going to add a **CI environment**, where proposed changes can be validated in the context of the entire project without impacting production systems. We will use a single set of deployment credentials (like the Prod environment), but models are built in a separate location to avoid impacting others (like the Dev environment). - -Your git flow will look like this: - - ## Step 1: Create a new CI environment See [Create a new environment](/docs/dbt-cloud-environments#create-a-deployment-environment). The environment should be called **CI**. Just like your existing Production environment, it will be a Deployment-type environment. diff --git a/website/docs/guides/orchestration/set-up-ci/5-multiple-checks.md b/website/docs/guides/orchestration/set-up-ci/5-multiple-checks.md index 0b4173cffee..4bfe2d936d4 100644 --- a/website/docs/guides/orchestration/set-up-ci/5-multiple-checks.md +++ b/website/docs/guides/orchestration/set-up-ci/5-multiple-checks.md @@ -12,22 +12,15 @@ As such, it may slow down the time it takes to get new features into production. The team at Sunrun maintained a SOX-compliant deployment in dbt while reducing the number of environments. Check out [their Coalesce presentation](https://www.youtube.com/watch?v=vmBAO2XN-fM) to learn more. ::: -:::tip Join the beta - -dbt Labs is currently running a beta that provides improved UI updates for setting up CI jobs. For docs, refer to [Set up CI jobs (Beta version)](/docs/deploy/ci-jobs?version=beta#set-up-ci-jobs). This guide assumes you are using the improvements available in the beta. - -If you're interested in joining our beta, please fill out our Google Form to [sign up](https://forms.gle/VxwBD1xjzouE84EQ6). +In this section, we will add a new **QA** environment. New features will branch off from and be merged back into the associated `qa` branch, and a member of your team (the "Release Manager") will create a PR against `main` to be validated in the CI environment before going live. -::: +The git flow will look like this: + ## Prerequisites -This section assumes you already have the **Development**, **CI** and **Production** environments described in [the Baseline setup](/guides/orchestration/set-up-ci/in-15-minutes). - -In this section, we will add a new **QA** environment. New features will branch off from and be merged back into the associated `qa` branch, and a member of your team (the "Release Manager") will create a PR against `main` to be validated in the CI environment before going live. +- You have the **Development**, **CI**, and **Production** environments, as described in [the Baseline setup](/guides/orchestration/set-up-ci/in-15-minutes). -The git flow will look like this: - ## Step 1: Create a `release` branch in your git repo diff --git a/website/docs/quickstarts/manual-install-qs.md b/website/docs/quickstarts/manual-install-qs.md index ea3c6c7ec84..05336178ff6 100644 --- a/website/docs/quickstarts/manual-install-qs.md +++ b/website/docs/quickstarts/manual-install-qs.md @@ -18,11 +18,11 @@ When you use dbt Core to work with dbt, you will be editing files locally using * Complete [Setting up (in BigQuery)](/quickstarts/bigquery?step=2) and [Loading data (BigQuery)](/quickstarts/bigquery?step=3). * [Create a GitHub account](https://github.com/join) if you don't already have one. -## Create a starter project +### Create a starter project After setting up BigQuery to work with dbt, you are ready to create a starter project with example models, before building your own models. -### Create a repository +## Create a repository The following steps use [GitHub](https://github.com/) as the Git provider for this guide, but you can use any Git provider. You should have already [created a GitHub account](https://github.com/join). @@ -32,7 +32,7 @@ The following steps use [GitHub](https://github.com/) as the Git provider for th 4. Click **Create repository**. 5. Save the commands from "…or create a new repository on the command line" to use later in [Commit your changes](#commit-your-changes). -### Create a project +## Create a project Learn how to use a series of commands using the command line of the Terminal to create your project. dbt Core includes an `init` command that helps scaffold a dbt project. @@ -40,56 +40,56 @@ To create your dbt project: 1. Make sure you have dbt Core installed and check the version using the `dbt --version` command: - ```terminal - dbt --version - ``` +```shell +dbt --version +``` 2. Initiate the `jaffle_shop` project using the `init` command: - ```terminal - dbt init jaffle_shop - ``` +```shell +dbt init jaffle_shop +``` 3. Navigate into your project's directory: - ```terminal - cd jaffle_shop - ``` +```shell +cd jaffle_shop +``` 4. Use `pwd` to confirm that you are in the right spot: - ```terminal - $ pwd - > Users/BBaggins/dbt-tutorial/jaffle_shop - ``` +```shell +$ pwd +> Users/BBaggins/dbt-tutorial/jaffle_shop +``` 5. Use a code editor like Atom or VSCode to open the project directory you created in the previous steps, which we named jaffle_shop. The content includes folders and `.sql` and `.yml` files generated by the `init` command. -
- -
+
+ +
6. Update the following values in the `dbt_project.yml` file: - + - ```yaml - name: jaffle_shop # Change from the default, `my_new_project` +```yaml +name: jaffle_shop # Change from the default, `my_new_project` - ... +... - profile: jaffle_shop # Change from the default profile name, `default` +profile: jaffle_shop # Change from the default profile name, `default` - ... +... - models: - jaffle_shop: # Change from `my_new_project` to match the previous value for `name:` - ... - ``` +models: + jaffle_shop: # Change from `my_new_project` to match the previous value for `name:` + ... +``` - + -### Connect to BigQuery +## Connect to BigQuery When developing locally, dbt connects to your using a [profile](/docs/core/connect-data-platform/connection-profiles), which is a YAML file with all the connection details to your warehouse. @@ -97,38 +97,38 @@ When developing locally, dbt connects to your using 2. Move your BigQuery keyfile into this directory. 3. Copy the following and paste into the new profiles.yml file. Make sure you update the values where noted. - - - ```yaml - jaffle_shop: # this needs to match the profile in your dbt_project.yml file - target: dev - outputs: - dev: - type: bigquery - method: service-account - keyfile: /Users/BBaggins/.dbt/dbt-tutorial-project-331118.json # replace this with the full path to your keyfile - project: grand-highway-265418 # Replace this with your project id - dataset: dbt_bbagins # Replace this with dbt_your_name, e.g. dbt_bilbo - threads: 1 - timeout_seconds: 300 - location: US - priority: interactive - ``` - - + + +```yaml +jaffle_shop: # this needs to match the profile in your dbt_project.yml file + target: dev + outputs: + dev: + type: bigquery + method: service-account + keyfile: /Users/BBaggins/.dbt/dbt-tutorial-project-331118.json # replace this with the full path to your keyfile + project: grand-highway-265418 # Replace this with your project id + dataset: dbt_bbagins # Replace this with dbt_your_name, e.g. dbt_bilbo + threads: 1 + timeout_seconds: 300 + location: US + priority: interactive +``` + + 4. Run the `debug` command from your project to confirm that you can successfully connect: - ```terminal - $ dbt debug - > Connection test: OK connection ok - ``` +```shell +$ dbt debug +> Connection test: OK connection ok +``` -
- -
+
+ +
-#### FAQs +### FAQs @@ -136,69 +136,72 @@ When developing locally, dbt connects to your using -### Perform your first dbt run +## Perform your first dbt run Our sample project has some example models in it. We're going to check that we can run them to confirm everything is in order. 1. Enter the `run` command to build example models: - ```terminal - dbt run - ``` +```shell +dbt run +``` You should have an output that looks like this: +
-### Commit your changes +## Commit your changes Commit your changes so that the repository contains the latest code. 1. Link the GitHub repository you created to your dbt project by running the following commands in Terminal. Make sure you use the correct git URL for your repository, which you should have saved from step 5 in [Create a repository](#create-a-repository). - ```terminal - git init - git branch -M main - git add . - git commit -m "Create a dbt project" - git remote add origin https://github.com/USERNAME/dbt-tutorial.git - git push -u origin main - ``` +```shell +git init +git branch -M main +git add . +git commit -m "Create a dbt project" +git remote add origin https://github.com/USERNAME/dbt-tutorial.git +git push -u origin main +``` 2. Return to your GitHub repository to verify your new files have been added. -## Build your first models +### Build your first models -Now that you set up your sample project, you can get to the fun part — [building models](/docs/build/sql-models)! You will take a sample query and turn it into a model in your dbt project. +Now that you set up your sample project, you can get to the fun part — [building models](/docs/build/sql-models)! +In the next steps, you will take a sample query and turn it into a model in your dbt project. -### Checkout a new git branch +## Checkout a new git branch Check out a new git branch to work on new code: 1. Create a new branch by using the `checkout` command and passing the `-b` flag: - ```terminal - $ git checkout -b add-customers-model - > Switched to a new branch `add-customer-model` - ``` +```shell +$ git checkout -b add-customers-model +> Switched to a new branch `add-customer-model` +``` + +## Build your first model -### Build your first model 1. Open your project in your favorite code editor. 2. Create a new SQL file in the `models` directory, named `models/customers.sql`. 3. Paste the following query into the `models/customers.sql` file. - + 4. From the command line, enter `dbt run`. -
- -
+
+ +
When you return to the BigQuery console, you can `select` from this model. -#### FAQs +### FAQs @@ -206,210 +209,210 @@ When you return to the BigQuery console, you can `select` from this model. -### Change the way your model is materialized +## Change the way your model is materialized -### Delete the example models +## Delete the example models -### Build models on top of other models +## Build models on top of other models 1. Create a new SQL file, `models/stg_customers.sql`, with the SQL from the `customers` CTE in our original query. 2. Create a second new SQL file, `models/stg_orders.sql`, with the SQL from the `orders` CTE in our original query. - + -
+
- + - ```sql - select - id as customer_id, - first_name, - last_name +```sql +select + id as customer_id, + first_name, + last_name - from `dbt-tutorial`.jaffle_shop.customers - ``` +from `dbt-tutorial`.jaffle_shop.customers +``` - + - + - ```sql - select - id as order_id, - user_id as customer_id, - order_date, - status +```sql +select + id as order_id, + user_id as customer_id, + order_date, + status - from `dbt-tutorial`.jaffle_shop.orders - ``` +from `dbt-tutorial`.jaffle_shop.orders +``` - + -
+
-
+
- + - ```sql - select - id as customer_id, - first_name, - last_name +```sql +select + id as customer_id, + first_name, + last_name - from jaffle_shop_customers - ``` +from jaffle_shop_customers +``` - + - + - ```sql - select - id as order_id, - user_id as customer_id, - order_date, - status +```sql +select + id as order_id, + user_id as customer_id, + order_date, + status - from jaffle_shop_orders - ``` +from jaffle_shop_orders +``` - + -
+
-
+
- + - ```sql - select - id as customer_id, - first_name, - last_name +```sql +select + id as customer_id, + first_name, + last_name - from jaffle_shop.customers - ``` +from jaffle_shop.customers +``` - + - + - ```sql - select - id as order_id, - user_id as customer_id, - order_date, - status +```sql +select + id as order_id, + user_id as customer_id, + order_date, + status - from jaffle_shop.orders - ``` +from jaffle_shop.orders +``` - + -
+
-
+
- + - ```sql - select - id as customer_id, - first_name, - last_name +```sql +select + id as customer_id, + first_name, + last_name - from raw.jaffle_shop.customers - ``` +from raw.jaffle_shop.customers +``` - + - + - ```sql - select - id as order_id, - user_id as customer_id, - order_date, - status +```sql +select + id as order_id, + user_id as customer_id, + order_date, + status - from raw.jaffle_shop.orders - ``` +from raw.jaffle_shop.orders +``` - + -
+
-
+
3. Edit the SQL in your `models/customers.sql` file as follows: - + + +```sql +with customers as ( - ```sql - with customers as ( + select * from {{ ref('stg_customers') }} - select * from {{ ref('stg_customers') }} +), - ), +orders as ( - orders as ( + select * from {{ ref('stg_orders') }} - select * from {{ ref('stg_orders') }} +), - ), +customer_orders as ( - customer_orders as ( + select + customer_id, - select - customer_id, + min(order_date) as first_order_date, + max(order_date) as most_recent_order_date, + count(order_id) as number_of_orders - min(order_date) as first_order_date, - max(order_date) as most_recent_order_date, - count(order_id) as number_of_orders + from orders - from orders + group by 1 - group by 1 +), - ), +final as ( - final as ( + select + customers.customer_id, + customers.first_name, + customers.last_name, + customer_orders.first_order_date, + customer_orders.most_recent_order_date, + coalesce(customer_orders.number_of_orders, 0) as number_of_orders - select - customers.customer_id, - customers.first_name, - customers.last_name, - customer_orders.first_order_date, - customer_orders.most_recent_order_date, - coalesce(customer_orders.number_of_orders, 0) as number_of_orders + from customers - from customers + left join customer_orders using (customer_id) - left join customer_orders using (customer_id) +) - ) +select * from final - select * from final - - ``` +``` - + 4. Execute `dbt run`. - This time, when you performed a `dbt run`, separate views/tables were created for `stg_customers`, `stg_orders` and `customers`. dbt inferred the order to run these models. Because `customers` depends on `stg_customers` and `stg_orders`, dbt builds `customers` last. You do not need to explicitly define these dependencies. +This time, when you performed a `dbt run`, separate views/tables were created for `stg_customers`, `stg_orders` and `customers`. dbt inferred the order to run these models. Because `customers` depends on `stg_customers` and `stg_orders`, dbt builds `customers` last. You do not need to explicitly define these dependencies. -#### FAQs {#faq-2} +### FAQs {#faq-2} @@ -424,13 +427,11 @@ You can also explore: * The `target` directory to see all of the compiled SQL. The `run` directory shows the create or replace table statements that are running, which are the select statements wrapped in the correct DDL. * The `logs` file to see how dbt Core logs all of the action happening within your project. It shows the select statements that are running and the python logging happening when dbt runs. -## Test and document your project - -### Add tests to your models +## Add tests to your models -### Document your models +## Document your models @@ -446,7 +447,7 @@ You can also explore: -### Commit updated changes +## Commit updated changes You need to commit the changes you made to the project so that the repository has your latest code. @@ -457,4 +458,10 @@ You need to commit the changes you made to the project so that the repository ha ## Schedule a job -We recommend using dbt Cloud to schedule a job. For more information about using dbt Core to schedule a job, see [dbt airflow](/blog/dbt-airflow-spiritual-alignment) blog post or [deployments](/docs/deploy/deployments). +We recommend using dbt Cloud as the easiest and most reliable way to [deploy jobs](/docs/deploy/deployments) and automate your dbt project in production. + +For more info on how to get started, refer to [create and schedule jobs](/docs/deploy/deploy-jobs#create-and-schedule-jobs). + + + +For more information about using dbt Core to schedule a job, refer [dbt airflow](/blog/dbt-airflow-spiritual-alignment) blog post. diff --git a/website/docs/reference/commands/retry.md b/website/docs/reference/commands/retry.md index 8f5c617ef80..d494a46cf1f 100644 --- a/website/docs/reference/commands/retry.md +++ b/website/docs/reference/commands/retry.md @@ -28,3 +28,80 @@ Retry works with the following commands: `dbt retry` reuses the [selectors](/reference/node-selection/yaml-selectors) from the previously executed command. + +Example results of executing `dbt retry` after a successful `dbt run`: + +```shell +Running with dbt=1.6.1 +Registered adapter: duckdb=1.6.0 +Found 5 models, 3 seeds, 20 tests, 0 sources, 0 exposures, 0 metrics, 348 macros, 0 groups, 0 semantic models + +Nothing to do. Try checking your model configs and model specification args +``` + +Example of when `dbt run` encounters a syntax error in a model: + +```shell +Running with dbt=1.6.1 +Registered adapter: duckdb=1.6.0 +Found 5 models, 3 seeds, 20 tests, 0 sources, 0 exposures, 0 metrics, 348 macros, 0 groups, 0 semantic models + +Concurrency: 24 threads (target='dev') + +1 of 5 START sql view model main.stg_customers ................................. [RUN] +2 of 5 START sql view model main.stg_orders .................................... [RUN] +3 of 5 START sql view model main.stg_payments .................................. [RUN] +1 of 5 OK created sql view model main.stg_customers ............................ [OK in 0.06s] +2 of 5 OK created sql view model main.stg_orders ............................... [OK in 0.06s] +3 of 5 OK created sql view model main.stg_payments ............................. [OK in 0.07s] +4 of 5 START sql table model main.customers .................................... [RUN] +5 of 5 START sql table model main.orders ....................................... [RUN] +4 of 5 ERROR creating sql table model main.customers ........................... [ERROR in 0.03s] +5 of 5 OK created sql table model main.orders .................................. [OK in 0.04s] + +Finished running 3 view models, 2 table models in 0 hours 0 minutes and 0.15 seconds (0.15s). + +Completed with 1 error and 0 warnings: + +Runtime Error in model customers (models/customers.sql) + Parser Error: syntax error at or near "selct" + +Done. PASS=4 WARN=0 ERROR=1 SKIP=0 TOTAL=5 +``` + + +Example of a subsequent failed `dbt retry` run without fixing the error(s): + +```shell +Running with dbt=1.6.1 +Registered adapter: duckdb=1.6.0 +Found 5 models, 3 seeds, 20 tests, 0 sources, 0 exposures, 0 metrics, 348 macros, 0 groups, 0 semantic models + +Concurrency: 24 threads (target='dev') + +1 of 1 START sql table model main.customers .................................... [RUN] +1 of 1 ERROR creating sql table model main.customers ........................... [ERROR in 0.03s] + +Done. PASS=4 WARN=0 ERROR=1 SKIP=0 TOTAL=5 +``` + +Example of a successful `dbt retry` run after fixing error(s): + +```shell +Running with dbt=1.6.1 +Registered adapter: duckdb=1.6.0 +Found 5 models, 3 seeds, 20 tests, 0 sources, 0 exposures, 0 metrics, 348 macros, 0 groups, 0 semantic models + +Concurrency: 24 threads (target='dev') + +1 of 1 START sql table model main.customers .................................... [RUN] +1 of 1 OK created sql table model main.customers ............................... [OK in 0.05s] + +Finished running 1 table model in 0 hours 0 minutes and 0.09 seconds (0.09s). + +Completed successfully + +Done. PASS=1 WARN=0 ERROR=0 SKIP=0 TOTAL=1 +``` + +In each scenario `dbt retry` picks up from the error rather than running all of the upstream dependencies again. diff --git a/website/docs/reference/commands/rpc.md b/website/docs/reference/commands/rpc.md index a98799356ee..2b9a96688de 100644 --- a/website/docs/reference/commands/rpc.md +++ b/website/docs/reference/commands/rpc.md @@ -12,16 +12,19 @@ description: "Remote Procedure Call (rpc) dbt server compiles and runs queries, -### Overview +:::caution The dbt-rpc plugin is deprecated -You can use the `dbt-rpc` plugin to run a Remote Procedure Call (rpc) dbt server. This server compiles and runs queries in the context of a dbt project. Additionally, the RPC server provides methods that enable you to list and terminate running processes. We recommend running an rpc server from a directory containing a dbt project. The server will compile the project into memory, then accept requests to operate against that project's dbt context. -:::caution Deprecation -**The dbt-rpc plugin will be fully deprecated by the second half of 2023.** +dbt Labs actively maintained `dbt-rpc` for compatibility with dbt-core versions up to v1.5. Starting with dbt-core v1.6 (released in July 2023), `dbt-rpc` is no longer supported for ongoing compatibility. + +In the meantime, dbt Labs will be performing critical maintenance only for `dbt-rpc`, until the last compatible version of dbt-core has reached the [end of official support](/docs/dbt-versions/core#latest-releases). At that point, dbt Labs will archive this repository to be read-only. -dbt Labs is actively maintaining `dbt-rpc` up to dbt v1.4. Starting in v1.5, we intend to break `dbt-rpc` compatibility in favor of [the new dbt Server](https://github.com/dbt-labs/dbt-server). dbt Labs will perform critical maintenance only on `dbt-rpc`, until the last compatible version of dbt has reached the end of official support (thus 12 months after release of v1.4; [see Core version policies](/docs/dbt-versions/core)). ::: +### Overview + +You can use the `dbt-rpc` plugin to run a Remote Procedure Call (rpc) dbt server. This server compiles and runs queries in the context of a dbt project. Additionally, the RPC server provides methods that enable you to list and terminate running processes. We recommend running an rpc server from a directory containing a dbt project. The server will compile the project into memory, then accept requests to operate against that project's dbt context. + :::caution Running on Windows We do not recommend running the rpc server on Windows because of reliability issues. A Docker container may provide a useful workaround, if required. ::: diff --git a/website/docs/reference/commands/run.md b/website/docs/reference/commands/run.md index fbc1a513cb1..f22cea71522 100644 --- a/website/docs/reference/commands/run.md +++ b/website/docs/reference/commands/run.md @@ -77,7 +77,7 @@ For more information on running parents or children of specific models, see the -See [global configs](/reference/global-configs/failing-fast) +See [global configs](/reference/global-configs/warnings) ## Failing fast diff --git a/website/docs/reference/dbt-jinja-functions/builtins.md b/website/docs/reference/dbt-jinja-functions/builtins.md index 40848705dc4..a7e96640351 100644 --- a/website/docs/reference/dbt-jinja-functions/builtins.md +++ b/website/docs/reference/dbt-jinja-functions/builtins.md @@ -5,6 +5,7 @@ id: "builtins" description: "Read this guide to understand the builtins Jinja function in dbt." --- + The `builtins` variable exists to provide references to builtin dbt context methods. This allows macros to be created with names that _mask_ dbt builtin context methods, while still making those methods accessible in the dbt compilation context. The `builtins` variable is a dictionary containing the following keys: @@ -15,9 +16,51 @@ The `builtins` variable is a dictionary containing the following keys: ## Usage -The following macro overrides the `ref` method available in the model compilation context to return a [Relation](/reference/dbt-classes#relation) with the database name overriden to `dev`. +:::important + +Using the `builtins` variable in this way is an advanced development workflow. Users should be ready to maintain and update these overrides when upgrading in the future. +::: + + + +From dbt v1.5 and higher, use the following macro to extract user-provided arguments, including version, and call the builtins.ref() function with either a single modelname argument or both packagename and modelname arguments, based on the number of positional arguments in varargs: + +

+ ``` +{% macro ref() %} +-- extract user-provided positional and keyword arguments + {% set version = kwargs.get('version') %} + {% set packagename = none %} + {%- if (varargs | length) == 1 -%} + {% set modelname = varargs[0] %} +{%- else -%} + {% set packagename = varargs[0] %} + {% set modelname = varargs[1] %} +{% endif %} +-- call builtins.ref based on provided positional arguments +{% set rel = None %} +{% if packagename is not none %} + {% set rel = return(builtins.ref(packagename, modelname, version=version)) %} +{% else %} + {% set rel = return(builtins.ref(modelname, version=version)) %} +{% endif %} + +-- finally, override the database name with "dev" +{% set newrel = rel.replace_path(database="dev") %} +{% do return(newrel) %} + +{% endmacro %} +``` +
+ + + +From dbt v1.4 and lower, use the following macro to override the `ref` method available in the model compilation context to return a [Relation](/reference/dbt-classes#relation) with the database name overriden to `dev`: + +``` + {% macro ref(model_name) %} {% set rel = builtins.ref(model_name) %} @@ -26,6 +69,7 @@ The following macro overrides the `ref` method available in the model compilatio {% endmacro %} ``` + The ref macro can also be used to control which elements of the model path are rendered when run, for example the following macro overrides the `ref` method to render only the schema and object identifier, but not the database reference i.e. `my_schema.my_model` rather than `my_database.my_schema.my_model`. This is especially useful when using snowflake as a warehouse, if you intend to change the name of the database post-build and wish the references to remain accurate. diff --git a/website/docs/reference/dbt-jinja-functions/ref.md b/website/docs/reference/dbt-jinja-functions/ref.md index b9b14bed42a..6df06a2f415 100644 --- a/website/docs/reference/dbt-jinja-functions/ref.md +++ b/website/docs/reference/dbt-jinja-functions/ref.md @@ -69,7 +69,7 @@ select * from {{ ref('model_name') }} ### Two-argument variant -There is also a two-argument variant of the `ref` function. With this variant, you can pass both a namespace (project or package) and model name to `ref` to avoid ambiguity. +You can also use a two-argument variant of the `ref` function. With this variant, you can pass both a namespace (project or package) and model name to `ref` to avoid ambiguity. When using two arguments with projects (not packages), you also need to set [cross project dependencies](/docs/collaborate/govern/project-dependencies). ```sql select * from {{ ref('project_or_package', 'model_name') }} diff --git a/website/docs/reference/global-configs/cache.md b/website/docs/reference/global-configs/cache.md index db4eabd14b7..6157e1a3bfb 100644 --- a/website/docs/reference/global-configs/cache.md +++ b/website/docs/reference/global-configs/cache.md @@ -17,7 +17,7 @@ There are two ways to optionally modify this behavior: For example, to quickly compile a model that requires no database metadata or introspective queries: ```text -dbt --skip-populate-cache compile --select my_model_name +dbt --no-populate-cache compile --select my_model_name ``` @@ -63,4 +63,4 @@ config: -
\ No newline at end of file + diff --git a/website/docs/reference/node-selection/graph-operators.md b/website/docs/reference/node-selection/graph-operators.md index 1e7c88fadfc..4fdc2f10628 100644 --- a/website/docs/reference/node-selection/graph-operators.md +++ b/website/docs/reference/node-selection/graph-operators.md @@ -34,12 +34,3 @@ The `@` operator is similar to `+`, but will also include _the parents of the ch ```bash $ dbt run --models @my_model # select my_model, its children, and the parents of its children ``` - -### The "star" operator -The `*` operator matches all models within a package or directory. - - - ```bash - $ dbt run --select snowplow.* # run all of the models in the snowplow package - $ dbt run --select finance.base.* # run all of the models in models/finance/base - ``` diff --git a/website/docs/reference/node-selection/methods.md b/website/docs/reference/node-selection/methods.md index a2c33cc4ff6..3ffed493c23 100644 --- a/website/docs/reference/node-selection/methods.md +++ b/website/docs/reference/node-selection/methods.md @@ -261,11 +261,6 @@ $ dbt seed --select result:error --state path/to/artifacts # run all seeds that ``` ### The "source_status" method - - -Supported in v1.1 or newer. - - diff --git a/website/docs/reference/node-selection/syntax.md b/website/docs/reference/node-selection/syntax.md index 1a43a32e2bc..a60d23cd16f 100644 --- a/website/docs/reference/node-selection/syntax.md +++ b/website/docs/reference/node-selection/syntax.md @@ -174,12 +174,6 @@ $ dbt run --select result:+ state:modified+ --defer --state ./ - -Only supported by v1.1 or newer. - - - Only supported by v1.1 or newer. @@ -199,11 +193,6 @@ dbt build --select source_status:fresher+ For more example commands, refer to [Pro-tips for workflows](/guides/legacy/best-practices.md#pro-tips-for-workflows). ### The "source_status" status - - -Only supported by v1.1 or newer. - - diff --git a/website/docs/reference/programmatic-invocations.md b/website/docs/reference/programmatic-invocations.md index 8bd9bf84047..6afcd65c1bc 100644 --- a/website/docs/reference/programmatic-invocations.md +++ b/website/docs/reference/programmatic-invocations.md @@ -30,7 +30,7 @@ Each command returns a `dbtRunnerResult` object, which has three attributes: - `result`: If the command completed (successfully or with handled errors), its result(s). Return type varies by command. - `exception`: If the dbt invocation encountered an unhandled error and did not complete, the exception it encountered. -There is a 1:1 correspondence between [CLI exit codes](reference/exit-codes) and the `dbtRunnerResult` returned by a programmatic invocation: +There is a 1:1 correspondence between [CLI exit codes](/reference/exit-codes) and the `dbtRunnerResult` returned by a programmatic invocation: | Scenario | CLI Exit Code | `success` | `result` | `exception` | |---------------------------------------------------------------------------------------------|--------------:|-----------|-------------------|-------------| diff --git a/website/docs/reference/resource-configs/contract.md b/website/docs/reference/resource-configs/contract.md index 66072fc8b89..e8ea6d82287 100644 --- a/website/docs/reference/resource-configs/contract.md +++ b/website/docs/reference/resource-configs/contract.md @@ -95,32 +95,3 @@ Imagine: - The result is a delta between the yaml-defined contract, and the actual table in the database - which means the contract is now incorrect! Why `append_new_columns`, rather than `sync_all_columns`? Because removing existing columns is a breaking change for contracted models! - -### Detecting breaking changes - -When you use the `state:modified` selection method in Slim CI, dbt will detect changes to model contracts, and raise an error if any of those changes could be breaking for downstream consumers. - -Breaking changes include: -- Removing an existing column -- Changing the `data_type` of an existing column -- Removing or modifying one of the `constraints` on an existing column (dbt v1.6 or higher) - -``` -Breaking Change to Contract Error in model sometable (models/sometable.sql) - While comparing to previous project state, dbt detected a breaking change to an enforced contract. - - The contract's enforcement has been disabled. - - Columns were removed: - - order_name - - Columns with data_type changes: - - order_id (number -> int) - - Consider making an additive (non-breaking) change instead, if possible. - Otherwise, create a new model version: https://docs.getdbt.com/docs/collaborate/govern/model-versions -``` - -Additive changes are **not** considered breaking: -- Adding a new column to a contracted model -- Adding new `constraints` to an existing column in a contracted model diff --git a/website/docs/reference/resource-configs/databricks-configs.md b/website/docs/reference/resource-configs/databricks-configs.md index 41b0bfcc5ea..31dcc827741 100644 --- a/website/docs/reference/resource-configs/databricks-configs.md +++ b/website/docs/reference/resource-configs/databricks-configs.md @@ -12,6 +12,7 @@ When materializing a model as `table`, you may include several optional configs | file_format | The file format to use when creating tables (`parquet`, `delta`, `hudi`, `csv`, `json`, `text`, `jdbc`, `orc`, `hive` or `libsvm`). | Optional | `delta`| | location_root | The created table uses the specified directory to store its data. The table alias is appended to it. | Optional | `/mnt/root` | | partition_by | Partition the created table by the specified columns. A directory is created for each partition. | Optional | `date_day` | +| liquid_clustered_by | Cluster the created table by the specified columns. Clustering method is based on [Delta's Liquid Clustering feature](https://docs.databricks.com/en/delta/clustering.html). Available since dbt-databricks 1.6.2. | Optional | `date_day` | | clustered_by | Each partition in the created table will be split into a fixed number of buckets by the specified columns. | Optional | `country_code` | | buckets | The number of buckets to create while clustering | Required if `clustered_by` is specified | `8` | @@ -120,7 +121,7 @@ select date_day, count(*) as users -from events +from new_events group by 1 ``` diff --git a/website/docs/reference/resource-configs/grants.md b/website/docs/reference/resource-configs/grants.md index 68d1e6eb14e..3a65672fa5e 100644 --- a/website/docs/reference/resource-configs/grants.md +++ b/website/docs/reference/resource-configs/grants.md @@ -243,6 +243,7 @@ models: - Databricks automatically enables `grants` on SQL endpoints. For interactive clusters, admins should enable grant functionality using these two setup steps in the Databricks documentation: - [Enable table access control for your workspace](https://docs.databricks.com/administration-guide/access-control/table-acl.html) - [Enable table access control for a cluster](https://docs.databricks.com/security/access-control/table-acls/table-acl.html) +- In order to grant `READ_METADATA` or `USAGE`, use [post-hooks](https://docs.getdbt.com/reference/resource-configs/pre-hook-post-hook)
diff --git a/website/docs/reference/resource-configs/persist_docs.md b/website/docs/reference/resource-configs/persist_docs.md index 6facf3945cb..7134972d2ca 100644 --- a/website/docs/reference/resource-configs/persist_docs.md +++ b/website/docs/reference/resource-configs/persist_docs.md @@ -151,7 +151,7 @@ Some known issues and limitations: - + - Column names that must be quoted, such as column names containing special characters, will cause runtime errors if column-level `persist_docs` is enabled. This is fixed in v1.2. diff --git a/website/docs/reference/resource-configs/postgres-configs.md b/website/docs/reference/resource-configs/postgres-configs.md index b2d1f644e90..79a1a07ad8c 100644 --- a/website/docs/reference/resource-configs/postgres-configs.md +++ b/website/docs/reference/resource-configs/postgres-configs.md @@ -19,7 +19,7 @@ In dbt-postgres, the following incremental materialization strategies are suppor -## Performance Optimizations +## Performance optimizations ### Unlogged @@ -60,7 +60,7 @@ While Postgres works reasonably well for datasets smaller than about 10m rows, d -Table models, incremental models, seeds, and snapshots may have a list of `indexes` defined. Each Postgres index can have three components: +Table models, incremental models, seeds, snapshots, and materialized views may have a list of `indexes` defined. Each Postgres index can have three components: - `columns` (list, required): one or more columns on which the index is defined - `unique` (boolean, optional): whether the index should be [declared unique](https://www.postgresql.org/docs/9.4/indexes-unique.html) - `type` (string, optional): a supported [index type](https://www.postgresql.org/docs/current/indexes-types.html) (B-tree, Hash, GIN, etc) @@ -113,66 +113,35 @@ models: -## Materialized view +## Materialized views -The Postgres adapter supports [materialized views](https://www.postgresql.org/docs/current/rules-materializedviews.html) and refreshes them for every subsequent `dbt run` you execute. For more information, see [Refresh Materialized Views](https://www.postgresql.org/docs/15/sql-refreshmaterializedview.html) in the Postgres docs. +The Postgres adapter supports [materialized views](https://www.postgresql.org/docs/current/rules-materializedviews.html). +Indexes are the only configuration that is specific to `dbt-postgres`. +The remaining configuration follows the general [materialized view](/docs/build/materializations#materialized-view) configuration. +There are also some limitations that we hope to address in the next version. -Materialized views support the optional configuration `on_configuration_change` with the following values: -- `apply` (default) — attempts to update the existing database object if possible, avoiding a complete rebuild. The following index action can be applied without the need to rebuild the materialized view: - - Added - - Dropped - - Updated -- `skip` — allows runs to continue while also providing a warning that the model was skipped -- `fail` — forces runs to fail if a change is detected in a materialized view +### Monitored configuration changes -You can create a materialized view by editing _one_ of these files: -- the SQL file for your model -- the `dbt_project.yml` configuration file +The settings below are monitored for changes applicable to `on_configuration_change`. -The following examples create a materialized view: +#### Indexes - - -```sql -{{ - config( - materialized = 'materialized_view', - on_configuration_change = 'apply', - ) -}} -``` - - - - - - -```yaml -models: - path: - materialized: materialized_view -``` - +Index changes (`CREATE`, `DROP`) can be applied without the need to rebuild the materialized view. +This differs from a table model, where the table needs to be dropped and re-created to update the indexes. +If the `indexes` portion of the `config` block is updated, the changes will be detected and applied +directly to the materialized view in place. ### Limitations -Below are current limitations that we hope to address in a future release. #### Changing materialization to and from "materialized_view" -Swapping an already materialized model to a materialized view and vice versa. The workaround is manually dropping the existing materialization in the data warehouse before calling `dbt run` again. Normally, re-running with the `--full-refresh` flag would resolve this, but not in this case. - -For example, assume the model below, `my_model`, has already been materialized to the underlying data platform via `dbt run`. If a user changes the model's config to `materialized="materialized_view"`, they will get an error. The solution is to execute `DROP TABLE my_model` on the data warehouse before trying the model again. - - - -```yaml - -{{ config( - materialized="table" # or any model type eg view, incremental -) }} +Swapping an already materialized model to a materialized view, and vice versa, is not supported. +The workaround is to manually drop the existing materialization in the data warehouse prior to calling `dbt run`. +Running with `--full-refresh` flag will not work to drop the existing table or view and create the materialized view (and vice versa). +This would only need to be done once as the existing object would then be a materialized view. -``` - - +For example,`my_model`, has already been materialized as a table in the underlying data platform via `dbt run`. +If the user changes the model's config to `materialized="materialized_view"`, they will get an error. +The solution is to execute `DROP TABLE my_model` on the data warehouse before trying the model again. diff --git a/website/docs/reference/resource-configs/redshift-configs.md b/website/docs/reference/resource-configs/redshift-configs.md index 1cf4f193cd2..9bd127a1e1a 100644 --- a/website/docs/reference/resource-configs/redshift-configs.md +++ b/website/docs/reference/resource-configs/redshift-configs.md @@ -15,22 +15,27 @@ To-do: In dbt-redshift, the following incremental materialization strategies are supported: + - `append` (default) - `delete+insert` + + + - `append` (default) - `merge` - `delete+insert` + -All of these strategies are inheirited via from dbt-postgres. +All of these strategies are inherited from dbt-postgres. ## Performance optimizations ### Using sortkey and distkey -Tables in Amazon Redshift have two powerful optimizations to improve query performance: distkeys and sortkeys. Supplying these values as model-level configurations apply the corresponding settings in the generated `CREATE TABLE` . Note that these settings will have no effect for models set to `view` or `ephemeral` models. +Tables in Amazon Redshift have two powerful optimizations to improve query performance: distkeys and sortkeys. Supplying these values as model-level configurations apply the corresponding settings in the generated `CREATE TABLE` . Note that these settings will have no effect on models set to `view` or `ephemeral` models. - `dist` can have a setting of `all`, `even`, `auto`, or the name of a key. - `sort` accepts a list of sort keys, for example: `['timestamp', 'userid']`. dbt will build the sort key in the same order the fields are supplied. @@ -70,7 +75,7 @@ For more information on distkeys and sortkeys, view Amazon's docs: - [AWS Documentation » Amazon Redshift » Database Developer Guide » Designing Tables » Choosing a Data Distribution Style](https://docs.aws.amazon.com/redshift/latest/dg/t_Distributing_data.html) - [AWS Documentation » Amazon Redshift » Database Developer Guide » Designing Tables » Choosing Sort Keys](https://docs.aws.amazon.com/redshift/latest/dg/t_Sorting_data.html) -## Late Binding Views +## Late binding views Redshift supports views unbound from their dependencies, or [late binding views](https://docs.aws.amazon.com/redshift/latest/dg/r_CREATE_VIEW.html#late-binding-views). This DDL option "unbinds" a view from the data it selects from. In practice, this means that if upstream views or tables are dropped with a cascade qualifier, the late-binding view does not get dropped as well. @@ -104,54 +109,51 @@ models: -## Materialized view +## Materialized views -The Redshift adapter supports [materialized views](https://docs.aws.amazon.com/redshift/latest/dg/materialized-view-overview.html) and refreshes them for every subsequent `dbt run` that you execute. For more information, see [Refresh Materialized Views](https://docs.aws.amazon.com/redshift/latest/dg/materialized-view-refresh.html) in the Redshift docs. +The Redshift adapter supports [materialized views](https://docs.aws.amazon.com/redshift/latest/dg/materialized-view-overview.html). +Redshift-specific configuration includes the typical `dist`, `sort_type`, `sort`, and `backup`. +For materialized views, there is also the `auto_refresh` setting, which allows Redshift to [automatically refresh](https://docs.aws.amazon.com/redshift/latest/dg/materialized-view-refresh.html) the materialized view for you. +The remaining configuration follows the general [materialized view](/docs/build/materializations#Materialized-View) configuration. +There are also some limitations that we hope to address in the next version. -Materialized views support the optional configuration `on_configuration_change` with the following values: -- `apply` (default) — attempts to update the existing database object if possible, avoiding a complete rebuild. -- `continue` — allows runs to continue while also providing a warning that the model was not executed -- `fail` — forces runs to fail if a change is detected in a materialized view +### Monitored configuration changes -Additionally, you can apply the `auto_refresh` configuration to have Redshift [automatically refresh](https://docs.aws.amazon.com/redshift/latest/dg/materialized-view-refresh.html) the materialized view for you. This action can applied without the need to rebuild the materialized view. +The settings below are monitored for changes applicable to `on_configuration_change`. -You can create a materialized view by editing _one_ of these files: -- the SQL file for your model -- the `dbt_project.yml` configuration file +#### Dist -The following examples create a materialized view: +Changes to `dist` will result in a full refresh of the existing materialized view (applied at the time of the next `dbt run` of the model). Redshift requires a materialized view to be +dropped and recreated to apply a change to the `distkey` or `diststyle`. - +#### Sort type, sort -```sql -{{ - config( - materialized = 'materialized_view', - auto_refresh = False, - on_configuration_change = 'apply', - ) -}} -``` +Changes to `sort_type` or `sort` will result in a full refresh. Redshift requires a materialized +view to be dropped and recreated to apply a change to the `sortkey` or `sortstyle`. - +#### Backup +Changes to `backup` will result in a full refresh. Redshift requires a materialized +view to be dropped and recreated to apply a change to the `backup` setting. - +#### Auto refresh -```yaml -models: - path: - materialized: materialized_view -``` - +The `auto_refresh` setting can be updated via an `ALTER` statement. This setting effectively toggles +automatic refreshes on or off. The default setting for this config is off (`False`). If this +is the only configuration change for the materialized view, dbt will choose to apply +an `ALTER` statement instead of issuing a full refresh, ### Limitations -We hope to address the following limitations in a future release. -#### Changing materialization from "materialized_view" to table or view +#### Changing materialization from "materialized_view" to "table" or "view" -Swapping a materialized view to a table or view is not supported. You must manually drop the existing materialized view in the data warehouse before calling `dbt run` again. +Swapping a materialized view to a table or view is not supported. +You must manually drop the existing materialized view in the data warehouse prior to calling `dbt run`. +Normally, re-running with the `--full-refresh` flag would resolve this, but not in this case. +This would only need to be done once as the existing object would then be a materialized view. -For example, assume that a view, `my_mv.sql`, has already been materialized to the underlying data platform via `dbt run`. If a user then changes the model's config to be `materialized="table"`, they will get an error. The workaround is to execute `DROP MATERIALIZE VIEW my_mv CASCADE` on the data warehouse before trying the model again. +For example, assume that a materialized view, `my_mv.sql`, has already been materialized to the underlying data platform via `dbt run`. +If the user changes the model's config to `materialized="table"`, they will get an error. +The workaround is to execute `DROP MATERIALIZED VIEW my_mv CASCADE` on the data warehouse before trying the model again. diff --git a/website/docs/reference/resource-configs/snowflake-configs.md b/website/docs/reference/resource-configs/snowflake-configs.md index f822773ee41..30c7966ec68 100644 --- a/website/docs/reference/resource-configs/snowflake-configs.md +++ b/website/docs/reference/resource-configs/snowflake-configs.md @@ -301,7 +301,7 @@ models: -## Temporary Tables +## Temporary tables Beginning in dbt version 1.3, incremental table merges for Snowflake prefer to utilize a `view` rather than a `temporary table`. The reasoning was to avoid the database write step that a temporary table would initiate and save compile time. @@ -344,18 +344,26 @@ In the configuration format for the model SQL file: -## Dynamic Tables +## Dynamic tables -### Parameters +The Snowflake adapter supports [dynamic tables](https://docs.snowflake.com/en/sql-reference/sql/create-dynamic-table). +This materialization is specific to Snowflake, which means that any model configuration that +would normally come along for the ride from `dbt-core` (e.g. as with a `view`) may not be available +for dynamic tables. This gap will decrease in future patches and versions. +While this materialization is specific to Snowflake, it very much follows the implementation +of [materialized views](/docs/build/materializations#Materialized-View). +In particular, dynamic tables have access to the `on_configuration_change` setting. +There are also some limitations that we hope to address in the next version. -dbt-snowflake requires the following parameters: +### Parameters +Dynamic tables in `dbt-snowflake` require the following parameters: - `target_lag` - `snowflake_warehouse` - `on_configuration_change` To learn more about each parameter and what values it can take, see -the Snowflake docs page: [`CREATE DYNAMIC TABLE: Parameters`](https://docs.snowflake.com/en/sql-reference/sql/create-dynamic-table). Note that `downstream` for `target_lag` is not supported. +the Snowflake docs page: [`CREATE DYNAMIC TABLE: Parameters`](https://docs.snowflake.com/en/sql-reference/sql/create-dynamic-table). ### Usage @@ -369,13 +377,11 @@ The following examples create a dynamic table: ```sql -{{ - config( +{{ config( materialized = 'dynamic_table', snowflake_warehouse = 'snowflake_warehouse', target_lag = '10 minutes', - ) -}} +) }} ``` @@ -392,13 +398,31 @@ models: +### Monitored configuration changes + +The settings below are monitored for changes applicable to `on_configuration_change`. + +#### Target lag + +Changes to `target_lag` can be applied by running an `ALTER` statement. Refreshing is essentially +always on for dynamic tables; this setting changes how frequently the dynamic table is updated. + +#### Warehouse + +Changes to `snowflake_warehouse` can be applied via an `ALTER` statement. + ### Limitations #### Changing materialization to and from "dynamic_table" -Swapping an already materialized model to be a dynamic table and vice versa. The workaround is manually dropping the existing materialization in the data warehouse before calling `dbt run` again. +Swapping an already materialized model to be a dynamic table and vice versa. +The workaround is manually dropping the existing materialization in the data warehouse prior to calling `dbt run`. +Normally, re-running with the `--full-refresh` flag would resolve this, but not in this case. +This would only need to be done once as the existing object would then be a dynamic table. -For example, assume for the example model below, `my_model`, has already been materialized to the underlying data platform via `dbt run`. If a user then changes the model's config to be `materialized="dynamic_table"`, they will get an error. The workaround is to execute `DROP TABLE my_model` on the data warehouse before trying the model again. +For example, assume for the example model below, `my_model`, has already been materialized to the underlying data platform via `dbt run`. +If the user changes the model's config to `materialized="dynamic_table"`, they will get an error. +The workaround is to execute `DROP TABLE my_model` on the data warehouse before trying the model again. @@ -412,6 +436,4 @@ For example, assume for the example model below, `my_model`, has already been ma - - diff --git a/website/docs/reference/resource-configs/upsolver-configs.md b/website/docs/reference/resource-configs/upsolver-configs.md index c50e49e877f..b917ee2cc58 100644 --- a/website/docs/reference/resource-configs/upsolver-configs.md +++ b/website/docs/reference/resource-configs/upsolver-configs.md @@ -4,9 +4,9 @@ id: "upsolver-configs" description: "Upsolver Configurations - Read this in-depth guide to learn about configurations in dbt." --- -## Supported Upsolver SQLake functionality: +## Supported Upsolver SQLake functionality -| Command | State | Materialized | +| COMMAND | STATE | MATERIALIZED | | ------ | ------ | ------ | | SQL compute cluster| not supported | - | | SQL connections| supported | connection | @@ -14,7 +14,7 @@ description: "Upsolver Configurations - Read this in-depth guide to learn about | SQL merge job | supported | incremental | | SQL insert job | supported | incremental | | SQL materialized views | supported | materializedview | - +| Expectations | supported | incremental | ## Configs materialization @@ -24,10 +24,12 @@ description: "Upsolver Configurations - Read this in-depth guide to learn about | connection_options | Yes | connection | Dictionary of options supported by selected connection | connection_options={ 'aws_role': 'aws_role', 'external_id': 'SAMPLES', 'read_only': True } | | incremental_strategy | No | incremental | Define one of incremental strategies: merge/copy/insert. Default: copy | incremental_strategy='merge' | | source | No | incremental | Define source to copy from: S3/KAFKA/KINESIS | source = 'S3' | -| target_type | No | incremental | Define supported target to copy into. Default: copy into a table created in a metastore connection | target_type='Snowflake' | -| target_schema | Yes/No | incremental | Define target schema. Required if target_type not table created in a metastore connection | target_schema = 'your_schema' | -| target_connection | Yes/No | incremental | Define target connection. Required if target_type not table created in a metastore connection | target_connection = 'your_snowflake_connection' | -| target_table_alias | Yes/No | incremental | Define target table. Required if target_type not table created in a metastore connection | target_table_alias = 'target_table' | +| target_type | No | incremental | Define target type REDSHIFT/ELASTICSEARCH/S3/SNOWFLAKE/POSTGRES. Default None for Data lake | target_type='Snowflake' | +| target_prefix | False | incremental | Define PREFIX for ELASTICSEARCH target type | target_prefix = 'orders' | +| target_location | False | incremental | Define LOCATION for S3 target type | target_location = 's3://your-bucket-name/path/to/folder/' | +| schema | Yes/No | incremental | Define target schema. Required if target_type, no table created in a metastore connection | schema = 'target_schema' | +| database | Yes/No | incremental | Define target connection. Required if target_type, no table created in a metastore connection | database = 'target_connection' | +| alias | Yes/No | incremental | Define target table. Required if target_type, no table created in a metastore connection | alias = 'target_table' | | delete_condition | No | incremental | Records that match the ON condition and a delete condition can be deleted | delete_condition='nettotal > 1000' | | partition_by | No | incremental | List of dictionaries to define partition_by for target metastore table | partition_by=[{'field':'$field_name'}] | | primary_key | No | incremental | List of dictionaries to define partition_by for target metastore table | primary_key=[{'field':'customer_email', 'type':'string'}] | @@ -35,8 +37,7 @@ description: "Upsolver Configurations - Read this in-depth guide to learn about | sync | No | incremental/materializedview | Boolean option to define if job is synchronized or non-msynchronized. Default: False | sync=True | | options | No | incremental/materializedview | Dictionary of job options | options={ 'START_FROM': 'BEGINNING', 'ADD_MISSING_COLUMNS': True } | - -## SQL connection options +## SQL connection Connections are used to provide Upsolver with the proper credentials to bring your data into SQLake as well as to write out your transformed data to various services. More details on ["Upsolver SQL connections"](https://docs.upsolver.com/sqlake/sql-command-reference/sql-connections) As a dbt model connection is a model with materialized='connection' @@ -52,26 +53,26 @@ As a dbt model connection is a model with materialized='connection' Running this model will compile CREATE CONNECTION(or ALTER CONNECTION if exists) SQL and send it to Upsolver engine. Name of the connection will be name of the model. - ## SQL copy job A COPY FROM job allows you to copy your data from a given source into a table created in a metastore connection. This table then serves as your staging table and can be used with SQLake transformation jobs to write to various target locations. More details on ["Upsolver SQL copy-from"](https://docs.upsolver.com/sqlake/sql-command-reference/sql-jobs/create-job/copy-from) As a dbt model copy job is model with materialized='incremental' + ```sql {{ config( materialized='incremental', sync=True|False, source = 'S3'| 'KAFKA' | ... , - options={ - 'option_name': 'option_value' + options={ + 'option_name': 'option_value' }, - partition_by=[{}] - ) + partition_by=[{}] + ) }} SELECT * FROM {{ ref() }} ``` -Running this model will compile CREATE TABLE SQL(or ALTER TABLE if exists) and CREATE COPY JOB(or ALTER COPY JOB if exists) SQL and send it to Upsolver engine. Name of the table will be name of the model. Name of the job will be the name of the model plus '_job' +Running this model will compile CREATE TABLE SQL for target type Data lake (or ALTER TABLE if exists) and CREATE COPY JOB(or ALTER COPY JOB if exists) SQL and send it to Upsolver engine. Name of the table will be name of the model. Name of the job will be name of the model plus '_job' ## SQL insert job @@ -85,7 +86,7 @@ As a dbt model insert job is model with materialized='incremental' and increment map_columns_by_name=True|False, incremental_strategy='insert', options={ - 'option_name': 'option_value' + 'option_name': 'option_value' }, primary_key=[{}] ) @@ -97,8 +98,7 @@ GROUP BY ... HAVING COUNT(DISTINCT orderid::string) ... ``` -Running this model will compile CREATE TABLE SQL(or ALTER TABLE if exists) and CREATE INSERT JOB(or ALTER INSERT JOB if exists) SQL and send it to Upsolver engine. Name of the table will be name of the model. Name of the job will be the name of the model plus '_job' - +Running this model will compile CREATE TABLE SQL for target type Data lake(or ALTER TABLE if exists) and CREATE INSERT JOB(or ALTER INSERT JOB if exists) SQL and send it to Upsolver engine. Name of the table will be name of the model. Name of the job will be name of the model plus '_job' ## SQL merge job @@ -112,7 +112,7 @@ As a dbt model merge job is model with materialized='incremental' and incrementa map_columns_by_name=True|False, incremental_strategy='merge', options={ - 'option_name': 'option_value' + 'option_name': 'option_value' }, primary_key=[{}] ) @@ -124,14 +124,14 @@ GROUP BY ... HAVING COUNT ... ``` -Running this model will compile CREATE TABLE SQL(or ALTER TABLE if exists) and CREATE MERGE JOB(or ALTER MERGE JOB if exists) SQL and send it to Upsolver engine. Name of the table will be name of the model. Name of the job will be the name of the model plus '_job' +Running this model will compile CREATE TABLE SQL for target type Data lake(or ALTER TABLE if exists) and CREATE MERGE JOB(or ALTER MERGE JOB if exists) SQL and send it to Upsolver engine. Name of the table will be name of the model. Name of the job will be name of the model plus '_job' ## SQL materialized views When transforming your data, you may find that you need data from multiple source tables in order to achieve your desired result. In such a case, you can create a materialized view from one SQLake table in order to join it with your other table (which in this case is considered the main table). More details on ["Upsolver SQL materialized views"](https://docs.upsolver.com/sqlake/sql-command-reference/sql-jobs/create-job/sql-transformation-jobs/sql-materialized-views). -As a dbt model materialized views are models with materialized='materializedview'. +As a dbt model materialized views is model with materialized='materializedview'. ```sql {{ config( materialized='materializedview', @@ -145,9 +145,9 @@ WHERE ... GROUP BY ... ``` -Running this model will compile CREATE MATERIALIZED VIEW SQL(or ALTER MATERIALIZED VIEW if exists) and send it to Upsolver engine. Name of the materializedview will be the name of the model. +Running this model will compile CREATE MATERIALIZED VIEW SQL(or ALTER MATERIALIZED VIEW if exists) and send it to Upsolver engine. Name of the materializedview will be name of the model. -## Expectations and constraints +## Expectations/constraints Data quality conditions can be added to your job to drop a row or trigger a warning when a column violates a predefined condition. @@ -169,7 +169,7 @@ models: # model-level constraints constraints: - type: check - columns: [`''`, `''`] + columns: ['', ''] expression: "column1 <= column2" name: - type: not_null @@ -190,7 +190,7 @@ models: ## Projects examples -> Refer to the projects examples link: [github.com/dbt-upsolver/examples/](https://github.com/Upsolver/dbt-upsolver/tree/main/examples) +> projects examples link: [github.com/dbt-upsolver/examples/](https://github.com/Upsolver/dbt-upsolver/tree/main/examples) ## Connection options @@ -199,12 +199,12 @@ models: | aws_role | s3 | True | True | 'aws_role': `''` | | external_id | s3 | True | True | 'external_id': `''` | | aws_access_key_id | s3 | True | True | 'aws_access_key_id': `''` | -| aws_secret_access_key_id | s3 | True | True | 'aws_secret_access_key_id': `''` | +| aws_secret_access_key | s3 | True | True | 'aws_secret_access_key_id': `''` | | path_display_filter | s3 | True | True | 'path_display_filter': `''` | | path_display_filters | s3 | True | True | 'path_display_filters': (`''`, ...) | | read_only | s3 | True | True | 'read_only': True/False | | encryption_kms_key | s3 | True | True | 'encryption_kms_key': `''` | -| encryption_customer_kms_key | s3 | True | True | 'encryption_customer_kms_key': `''` | +| encryption_customer_managed_key | s3 | True | True | 'encryption_customer_kms_key': `''` | | comment | s3 | True | True | 'comment': `''` | | host | kafka | False | False | 'host': `''` | | hosts | kafka | False | False | 'hosts': (`''`, ...) | @@ -231,19 +231,19 @@ models: | aws_secret_access_key | kinesis | True | True | 'aws_secret_access_key': `''` | | region | kinesis | False | False | 'region': `''` | | read_only | kinesis | False | True | 'read_only': True/False | -| max_writers | kinesis | True | True | 'max_writers': `''` | +| max_writers | kinesis | True | True | 'max_writers': `` | | stream_display_filter | kinesis | True | True | 'stream_display_filter': `''` | | stream_display_filters | kinesis | True | True | 'stream_display_filters': (`''`, ...) | | comment | kinesis | True | True | 'comment': `''` | | connection_string | snowflake | True | False | 'connection_string': `''` | | user_name | snowflake | True | False | 'user_name': `''` | | password | snowflake | True | False | 'password': `''` | -| max_concurrent_connections | snowflake | True | True | 'max_concurrent_connections': `''` | +| max_concurrent_connections | snowflake | True | True | 'max_concurrent_connections': `` | | comment | snowflake | True | True | 'comment': `''` | | connection_string | redshift | True | False | 'connection_string': `''` | | user_name | redshift | True | False | 'user_name': `''` | | password | redshift | True | False | 'password': `''` | -| max_concurrent_connections | redshift | True | True | 'max_concurrent_connections': `''` | +| max_concurrent_connections | redshift | True | True | 'max_concurrent_connections': `` | | comment | redshift | True | True | 'comment': `''` | | connection_string | mysql | True | False | 'connection_string': `''` | | user_name | mysql | True | False | 'user_name': `''` | @@ -257,7 +257,15 @@ models: | user_name | elasticsearch | True | False | 'user_name': `''` | | password | elasticsearch | True | False | 'password': `''` | | comment | elasticsearch | True | True | 'comment': `''` | - +| connection_string | mongodb | True | False | 'connection_string': `''` | +| user_name | mongodb | True | False | 'user_name': `''` | +| password | mongodb | True | False | 'password': `''` | +| timeout | mongodb | True | True | 'timeout': "INTERVAL 'N' SECONDS" | +| comment | mongodb | True | True | 'comment': `''` | +| connection_string | mssql | True | False | 'connection_string': `''` | +| user_name | mssql | True | False | 'user_name': `''` | +| password | mssql | True | False | 'password': `''` | +| comment | mssql | True | True | 'comment': `''` | ## Target options @@ -268,7 +276,7 @@ models: | storage_location | datalake | False | True | 'storage_location': `''` | | compute_cluster | datalake | True | True | 'compute_cluster': `''` | | compression | datalake | True | True | 'compression': 'SNAPPY/GZIP' | -| compaction_processes | datalake | True | True | 'compaction_processes': `''` | +| compaction_processes | datalake | True | True | 'compaction_processes': `` | | disable_compaction | datalake | True | True | 'disable_compaction': True/False | | retention_date_partition | datalake | False | True | 'retention_date_partition': `''` | | table_data_retention | datalake | True | True | 'table_data_retention': `''` | @@ -284,32 +292,33 @@ models: | create_table_if_missing | snowflake | False | True | 'create_table_if_missing': True/False} | | run_interval | snowflake | False | True | 'run_interval': `''` | - ## Transformation options | Option | Storage | Editable | Optional | Config Syntax | | -------| --------- | -------- | -------- | ------------- | | run_interval | s3 | False | True | 'run_interval': `''` | -| start_from | s3 | False | True | 'start_from': `''` | -| end_at | s3 | True | True | 'end_at': `''` | +| start_from | s3 | False | True | 'start_from': `'/NOW/BEGINNING'` | +| end_at | s3 | True | True | 'end_at': `'/NOW'` | | compute_cluster | s3 | True | True | 'compute_cluster': `''` | | comment | s3 | True | True | 'comment': `''` | -| allow_cartesian_products | s3 | False | True | 'allow_cartesian_products': True/False | -| aggregation_parallelism | s3 | True | True | 'aggregation_parallelism': `''` | -| run_parallelism | s3 | True | True | 'run_parallelism': `''` | -| file_format | s3 | False | False | 'file_format': 'CSV/TSV ...' | +| skip_validations | s3 | False | True | 'skip_validations': ('ALLOW_CARTESIAN_PRODUCT', ...) | +| skip_all_validations | s3 | False | True | 'skip_all_validations': True/False | +| aggregation_parallelism | s3 | True | True | 'aggregation_parallelism': `` | +| run_parallelism | s3 | True | True | 'run_parallelism': `` | +| file_format | s3 | False | False | 'file_format': '(type = ``)' | | compression | s3 | False | True | 'compression': 'SNAPPY/GZIP ...' | | date_pattern | s3 | False | True | 'date_pattern': `''` | | output_offset | s3 | False | True | 'output_offset': `''` | -| location | s3 | False | False | 'location': `''` | | run_interval | elasticsearch | False | True | 'run_interval': `''` | -| start_from | elasticsearch | False | True | 'start_from': `''` | -| end_at | elasticsearch | True | True | 'end_at': `''` | +| routing_field_name | elasticsearch | True | True | 'routing_field_name': `''` | +| start_from | elasticsearch | False | True | 'start_from': `'/NOW/BEGINNING'` | +| end_at | elasticsearch | True | True | 'end_at': `'/NOW'` | | compute_cluster | elasticsearch | True | True | 'compute_cluster': `''` | -| allow_cartesian_products | elasticsearch | False | True | 'allow_cartesian_products': True/False | -| aggregation_parallelism | elasticsearch | True | True | 'aggregation_parallelism': `''` | -| run_parallelism | elasticsearch | True | True | 'run_parallelism': `''` | -| bulk_max_size_bytes | elasticsearch | True | True | 'bulk_max_size_bytes': `''` | +| skip_validations | elasticsearch | False | True | 'skip_validations': ('ALLOW_CARTESIAN_PRODUCT', ...) | +| skip_all_validations | elasticsearch | False | True | 'skip_all_validations': True/False | +| aggregation_parallelism | elasticsearch | True | True | 'aggregation_parallelism': `` | +| run_parallelism | elasticsearch | True | True | 'run_parallelism': `` | +| bulk_max_size_bytes | elasticsearch | True | True | 'bulk_max_size_bytes': `` | | index_partition_size | elasticsearch | True | True | 'index_partition_size': 'HOURLY/DAILY ...' | | comment | elasticsearch | True | True | 'comment': `''` | | custom_insert_expressions | snowflake | True | True | 'custom_insert_expressions': {'INSERT_TIME' : 'CURRENT_TIMESTAMP()','MY_VALUE': `''`} | @@ -317,70 +326,88 @@ models: | keep_existing_values_when_null | snowflake | True | True | 'keep_existing_values_when_null': True/False | | add_missing_columns | snowflake | False | True | 'add_missing_columns': True/False | | run_interval | snowflake | False | True | 'run_interval': `''` | -| start_from | snowflake | False | True | 'start_from': `''` | -| end_at | snowflake | True | True | 'end_at': `''` | +| commit_interval | snowflake | True | True | 'commit_interval': `''` | +| start_from | snowflake | False | True | 'start_from': `'/NOW/BEGINNING'` | +| end_at | snowflake | True | True | 'end_at': `'/NOW'` | | compute_cluster | snowflake | True | True | 'compute_cluster': `''` | -| allow_cartesian_products | snowflake | False | True | 'allow_cartesian_products': True/False | -| aggregation_parallelism | snowflake | True | True | 'aggregation_parallelism': `''` | -| run_parallelism | snowflake | True | True | 'run_parallelism': `''` | +| skip_validations | snowflake | False | True | 'skip_validations': ('ALLOW_CARTESIAN_PRODUCT', ...) | +| skip_all_validations | snowflake | False | True | 'skip_all_validations': True/False | +| aggregation_parallelism | snowflake | True | True | 'aggregation_parallelism': `` | +| run_parallelism | snowflake | True | True | 'run_parallelism': `` | | comment | snowflake | True | True | 'comment': `''` | | add_missing_columns | datalake | False | True | 'add_missing_columns': True/False | | run_interval | datalake | False | True | 'run_interval': `''` | -| start_from | datalake | False | True | 'start_from': `''` | -| end_at | datalake | True | True | 'end_at': `'' | +| start_from | datalake | False | True | 'start_from': `'/NOW/BEGINNING'` | +| end_at | datalake | True | True | 'end_at': `'/NOW'` | | compute_cluster | datalake | True | True | 'compute_cluster': `''` | -| allow_cartesian_products | datalake | False | True | 'allow_cartesian_products': True/False | -| aggregation_parallelism | datalake | True | True | 'aggregation_parallelism': `''` | -| run_parallelism | datalake | True | True | 'run_parallelism': `''` | +| skip_validations | datalake | False | True | 'skip_validations': ('ALLOW_CARTESIAN_PRODUCT', ...) | +| skip_all_validations | datalake | False | True | 'skip_all_validations': True/False | +| aggregation_parallelism | datalake | True | True | 'aggregation_parallelism': `` | +| run_parallelism | datalake | True | True | 'run_parallelism': `` | | comment | datalake | True | True | 'comment': `''` | | run_interval | redshift | False | True | 'run_interval': `''` | -| start_from | redshift | False | True | 'start_from': `''` | -| end_at | redshift | True | True | 'end_at': `'` | +| start_from | redshift | False | True | 'start_from': `'/NOW/BEGINNING'` | +| end_at | redshift | True | True | 'end_at': `'/NOW'` | | compute_cluster | redshift | True | True | 'compute_cluster': `''` | -| allow_cartesian_products | redshift | False | True | 'allow_cartesian_products': True/False | -| aggregation_parallelism | redshift | True | True | 'aggregation_parallelism': `''` | -| run_parallelism | redshift | True | True | 'run_parallelism': `''` | +| skip_validations | redshift | False | True | 'skip_validations': ('ALLOW_CARTESIAN_PRODUCT', ...) | +| skip_all_validations | redshift | False | True | 'skip_all_validations': True/False | +| aggregation_parallelism | redshift | True | True | 'aggregation_parallelism': `` | +| run_parallelism | redshift | True | True | 'run_parallelism': `` | | skip_failed_files | redshift | False | True | 'skip_failed_files': True/False | | fail_on_write_error | redshift | False | True | 'fail_on_write_error': True/False | | comment | redshift | True | True | 'comment': `''` | - +| run_interval | postgres | False | True | 'run_interval': `''` | +| start_from | postgres | False | True | 'start_from': `'/NOW/BEGINNING'` | +| end_at | postgres | True | True | 'end_at': `'/NOW'` | +| compute_cluster | postgres | True | True | 'compute_cluster': `''` | +| skip_validations | postgres | False | True | 'skip_validations': ('ALLOW_CARTESIAN_PRODUCT', ...) | +| skip_all_validations | postgres | False | True | 'skip_all_validations': True/False | +| aggregation_parallelism | postgres | True | True | 'aggregation_parallelism': `` | +| run_parallelism | postgres | True | True | 'run_parallelism': `` | +| comment | postgres | True | True | 'comment': `''` | ## Copy options | Option | Storage | Category | Editable | Optional | Config Syntax | | -------| ---------- | -------- | -------- | -------- | ------------- | -| topic | kafka | source_options | False | False | 'comment': `''` | +| topic | kafka | source_options | False | False | 'topic': `''` | | exclude_columns | kafka | job_options | False | True | 'exclude_columns': (`''`, ...) | | deduplicate_with | kafka | job_options | False | True | 'deduplicate_with': {'COLUMNS' : ['col1', 'col2'],'WINDOW': 'N HOURS'} | -| consumer_properties | kafka | job_options | True | True | 'comment': `''` | -| reader_shards | kafka | job_options | True | True | 'reader_shards': `''` | +| consumer_properties | kafka | job_options | True | True | 'consumer_properties': `''` | +| reader_shards | kafka | job_options | True | True | 'reader_shards': `` | | store_raw_data | kafka | job_options | False | True | 'store_raw_data': True/False | | start_from | kafka | job_options | False | True | 'start_from': 'BEGINNING/NOW' | -| end_at | kafka | job_options | True | True | 'end_at': `''` | +| end_at | kafka | job_options | True | True | 'end_at': `'/NOW'` | | compute_cluster | kafka | job_options | True | True | 'compute_cluster': `''` | -| run_parallelism | kafka | job_options | True | True | 'run_parallelism': `''` | +| run_parallelism | kafka | job_options | True | True | 'run_parallelism': `` | | content_type | kafka | job_options | True | True | 'content_type': 'AUTO/CSV/...' | | compression | kafka | job_options | False | True | 'compression': 'AUTO/GZIP/...' | +| column_transformations | kafka | job_options | False | True | 'column_transformations': {`''` : `''` , ...} | +| commit_interval | kafka | job_options | True | True | 'commit_interval': `''` | +| skip_validations | kafka | job_options | False | True | 'skip_validations': ('MISSING_TOPIC') | +| skip_all_validations | kafka | job_options | False | True | 'skip_all_validations': True/False | | comment | kafka | job_options | True | True | 'comment': `''` | | table_include_list | mysql | source_options | True | True | 'table_include_list': (`''`, ...) | | column_exclude_list | mysql | source_options | True | True | 'column_exclude_list': (`''`, ...) | | exclude_columns | mysql | job_options | False | True | 'exclude_columns': (`''`, ...) | | column_transformations | mysql | job_options | False | True | 'column_transformations': {`''` : `''` , ...} | | skip_snapshots | mysql | job_options | True | True | 'skip_snapshots': True/False | -| end_at | mysql | job_options | True | True | 'end_at': `''` | +| end_at | mysql | job_options | True | True | 'end_at': `'/NOW'` | | compute_cluster | mysql | job_options | True | True | 'compute_cluster': `''` | +| snapshot_parallelism | mysql | job_options | True | True | 'snapshot_parallelism': `` | +| ddl_filters | mysql | job_options | False | True | 'ddl_filters': (`''`, ...) | | comment | mysql | job_options | True | True | 'comment': `''` | | table_include_list | postgres | source_options | False | False | 'table_include_list': (`''`, ...) | | column_exclude_list | postgres | source_options | False | True | 'column_exclude_list': (`''`, ...) | | heartbeat_table | postgres | job_options | False | True | 'heartbeat_table': `''` | | skip_snapshots | postgres | job_options | False | True | 'skip_snapshots': True/False | | publication_name | postgres | job_options | False | False | 'publication_name': `''` | -| end_at | postgres | job_options | True | True | 'end_at': `''` | -| start_from | postgres | job_options | False | True | 'start_from': `''` | +| end_at | postgres | job_options | True | True | 'end_at': `'/NOW'` | | compute_cluster | postgres | job_options | True | True | 'compute_cluster': `''` | | comment | postgres | job_options | True | True | 'comment': `''` | | parse_json_columns | postgres | job_options | False | False | 'parse_json_columns': True/False | | column_transformations | postgres | job_options | False | True | 'column_transformations': {`''` : `''` , ...} | +| snapshot_parallelism | postgres | job_options | True | True | 'snapshot_parallelism': `` | | exclude_columns | postgres | job_options | False | True | 'exclude_columns': (`''`, ...) | | location | s3 | source_options | False | False | 'location': `''` | | date_pattern | s3 | job_options | False | True | 'date_pattern': `''` | @@ -389,25 +416,49 @@ models: | initial_load_prefix | s3 | job_options | False | True | 'initial_load_prefix': `''` | | delete_files_after_load | s3 | job_options | False | True | 'delete_files_after_load': True/False | | deduplicate_with | s3 | job_options | False | True | 'deduplicate_with': {'COLUMNS' : ['col1', 'col2'],'WINDOW': 'N HOURS'} | -| end_at | s3 | job_options | True | True | 'end_at': `''` | -| start_from | s3 | job_options | False | True | 'start_from': `''` | +| end_at | s3 | job_options | True | True | 'end_at': `'/NOW'` | +| start_from | s3 | job_options | False | True | 'start_from': `'/NOW/BEGINNING'` | | compute_cluster | s3 | job_options | True | True | 'compute_cluster': `''` | -| run_parallelism | s3 | job_options | True | True | 'run_parallelism': `''` | +| run_parallelism | s3 | job_options | True | True | 'run_parallelism': `` | | content_type | s3 | job_options | True | True | 'content_type': 'AUTO/CSV...' | | compression | s3 | job_options | False | True | 'compression': 'AUTO/GZIP...' | | comment | s3 | job_options | True | True | 'comment': `''` | | column_transformations | s3 | job_options | False | True | 'column_transformations': {`''` : `''` , ...} | +| commit_interval | s3 | job_options | True | True | 'commit_interval': `''` | +| skip_validations | s3 | job_options | False | True | 'skip_validations': ('EMPTY_PATH') | +| skip_all_validations | s3 | job_options | False | True | 'skip_all_validations': True/False | | exclude_columns | s3 | job_options | False | True | 'exclude_columns': (`''`, ...) | | stream | kinesis | source_options | False | False | 'stream': `''` | -| reader_shards | kinesis | job_options | True | True | 'reader_shards': `''` | +| reader_shards | kinesis | job_options | True | True | 'reader_shards': `` | | store_raw_data | kinesis | job_options | False | True | 'store_raw_data': True/False | -| start_from | kinesis | job_options | False | True | 'start_from': `''` | -| end_at | kinesis | job_options | False | True | 'end_at': `''` | +| start_from | kinesis | job_options | False | True | 'start_from': `'/NOW/BEGINNING'` | +| end_at | kinesis | job_options | False | True | 'end_at': `'/NOW'` | | compute_cluster | kinesis | job_options | True | True | 'compute_cluster': `''` | -| run_parallelism | kinesis | job_options | False | True | 'run_parallelism': `''` | +| run_parallelism | kinesis | job_options | False | True | 'run_parallelism': `` | | content_type | kinesis | job_options | True | True | 'content_type': 'AUTO/CSV...' | | compression | kinesis | job_options | False | True | 'compression': 'AUTO/GZIP...' | | comment | kinesis | job_options | True | True | 'comment': `''` | | column_transformations | kinesis | job_options | True | True | 'column_transformations': {`''` : `''` , ...} | | deduplicate_with | kinesis | job_options | False | True | 'deduplicate_with': {'COLUMNS' : ['col1', 'col2'],'WINDOW': 'N HOURS'} | +| commit_interval | kinesis | job_options | True | True | 'commit_interval': `''` | +| skip_validations | kinesis | job_options | False | True | 'skip_validations': ('MISSING_STREAM') | +| skip_all_validations | kinesis | job_options | False | True | 'skip_all_validations': True/False | | exclude_columns | kinesis | job_options | False | True | 'exclude_columns': (`''`, ...) | +| table_include_list | mssql | source_options | True | True | 'table_include_list': (`''`, ...) | +| column_exclude_list | mssql | source_options | True | True | 'column_exclude_list': (`''`, ...) | +| exclude_columns | mssql | job_options | False | True | 'exclude_columns': (`''`, ...) | +| column_transformations | mssql | job_options | False | True | 'column_transformations': {`''` : `''` , ...} | +| skip_snapshots | mssql | job_options | True | True | 'skip_snapshots': True/False | +| end_at | mssql | job_options | True | True | 'end_at': `'/NOW'` | +| compute_cluster | mssql | job_options | True | True | 'compute_cluster': `''` | +| snapshot_parallelism | mssql | job_options | True | True | 'snapshot_parallelism': `` | +| parse_json_columns | mssql | job_options | False | False | 'parse_json_columns': True/False | +| comment | mssql | job_options | True | True | 'comment': `''` | +| collection_include_list | mongodb | source_options | True | True | 'collection_include_list': (`''`, ...) | +| exclude_columns | mongodb | job_options | False | True | 'exclude_columns': (`''`, ...) | +| column_transformations | mongodb | job_options | False | True | 'column_transformations': {`''` : `''` , ...} | +| skip_snapshots | mongodb | job_options | True | True | 'skip_snapshots': True/False | +| end_at | mongodb | job_options | True | True | 'end_at': `'/NOW'` | +| compute_cluster | mongodb | job_options | True | True | 'compute_cluster': `''` | +| snapshot_parallelism | mongodb | job_options | True | True | 'snapshot_parallelism': `` | +| comment | mongodb | job_options | True | True | 'comment': `''` | diff --git a/website/docs/reference/resource-properties/config.md b/website/docs/reference/resource-properties/config.md index 32143c1da07..1d3a2de6592 100644 --- a/website/docs/reference/resource-properties/config.md +++ b/website/docs/reference/resource-properties/config.md @@ -108,12 +108,6 @@ version: 2 - - -We have added support for the `config` property on sources in dbt Core v1.1 - - - diff --git a/website/docs/reference/resource-properties/versions.md b/website/docs/reference/resource-properties/versions.md index 3b8aa7dd1b3..86e9abf34a8 100644 --- a/website/docs/reference/resource-properties/versions.md +++ b/website/docs/reference/resource-properties/versions.md @@ -65,3 +65,62 @@ Note that the value of `defined_in` and the `alias` configuration of a model are - Follow a consistent naming convention for model versions and aliases. - Use `defined_in` and `alias` only if you have good reason. - Create a view that always points to the latest version of your model. You can automate this for all versioned models in your project with an `on-run-end` hook. For more details, read the full docs on ["Model versions"](/docs/collaborate/govern/model-versions#configuring-database-location-with-alias) + +### Detecting breaking changes + +When you use the `state:modified` selection method in Slim CI, dbt will detect changes to versioned model contracts, and raise an error if any of those changes could be breaking for downstream consumers. + +Breaking changes include: +- Removing an existing column +- Changing the `data_type` of an existing column +- Removing or modifying one of the `constraints` on an existing column (dbt v1.6 or higher) +- Changing unversioned, contracted models. + - dbt also warns if a model has or had a contract but isn't versioned + + + + + +``` + Breaking Change to Unversioned Contract for contracted_model (models/contracted_models/contracted_model.sql) + While comparing to previous project state, dbt detected a breaking change to an unversioned model. + - Contract enforcement was removed: Previously, this model's configuration included contract: {enforced: true}. It is no longer configured to enforce its contract, and this is a breaking change. + - Columns were removed: + - color + - date_day + - Enforced column level constraints were removed: + - id (ConstraintType.not_null) + - id (ConstraintType.primary_key) + - Enforced model level constraints were removed: + - ConstraintType.check -> ['id'] + - Materialization changed with enforced constraints: + - table -> view +``` + + + + +``` +Breaking Change to Contract Error in model sometable (models/sometable.sql) + While comparing to previous project state, dbt detected a breaking change to an enforced contract. + + The contract's enforcement has been disabled. + + Columns were removed: + - order_name + + Columns with data_type changes: + - order_id (number -> int) + + Consider making an additive (non-breaking) change instead, if possible. + Otherwise, create a new model version: https://docs.getdbt.com/docs/collaborate/govern/model-versions +``` + + + + + + +Additive changes are **not** considered breaking: +- Adding a new column to a contracted model +- Adding new `constraints` to an existing column in a contracted model diff --git a/website/docs/reference/source-configs.md b/website/docs/reference/source-configs.md index 1ee2fe1daba..43b9bfbff6b 100644 --- a/website/docs/reference/source-configs.md +++ b/website/docs/reference/source-configs.md @@ -71,14 +71,6 @@ Sources can be configured via a `config:` block within their `.yml` definitions, - - -Sources can be configured from the `dbt_project.yml` file under the `sources:` key. This configuration is most useful for configuring sources imported from [a package](package-management). You can disable sources imported from a package to prevent them from rendering in the documentation, or to prevent [source freshness checks](/docs/build/sources#snapshotting-source-data-freshness) from running on source tables imported from packages. - -Unlike other resource types, sources do not yet support a `config` property. It is not possible to (re)define source configs hierarchically across multiple YAML files. - - - ### Examples #### Disable all sources imported from a package To apply a configuration to all sources included from a [package](/docs/build/packages), diff --git a/website/docs/terms/data-lineage.md b/website/docs/terms/data-lineage.md index bb3751df3da..a03687eaba3 100644 --- a/website/docs/terms/data-lineage.md +++ b/website/docs/terms/data-lineage.md @@ -63,7 +63,7 @@ In the greater data world, you may often hear of data lineage systems based on t If you use a transformation tool such as dbt that automatically infers relationships between data sources and models, a DAG automatically populates to show you the lineage that exists for your [data transformations](https://www.getdbt.com/analytics-engineering/transformation/). - + Your is used to visually show upstream dependencies, the nodes that must come before a current model, and downstream relationships, the work that is impacted by the current model. DAGs are also directional—they show a defined flow of movement and form non-cyclical loops. diff --git a/website/docs/terms/monotonically-increasing.md b/website/docs/terms/monotonically-increasing.md index 397e333942a..b4e3987995d 100644 --- a/website/docs/terms/monotonically-increasing.md +++ b/website/docs/terms/monotonically-increasing.md @@ -1,11 +1,11 @@ --- id: monotonically-increasing title: Monotonically increasing -description: Monotonicity means unchanging (think monotone). A monotonically-increasing value is a value which increases at a constant rate, for example the values 1, 2, 3, 4. +description: A monotonically increasing sequence is a sequence whose values are sorted in ascending order and do not decrease. For example, the sequences 1, 6, 7, 11, 131 or 2, 5, 5, 5, 6, 10. displayText: monotonically increasing -hoverSnippet: Monotonicity means unchanging (think monotone). A monotonically-increasing value is a value which increases at a constant rate, for example the values 1, 2, 3, 4. +hoverSnippet: A monotonically-increasing sequence is a sequence whose values are sorted in ascending order and do not decrease. For example, the sequences 1, 6, 7, 11, 131 or 2, 5, 5, 5, 6, 10. --- -Monotonicity means unchanging (think monotone). A monotonically-increasing value is a value which increases at a constant rate, for example the values `[1, 2, 3, 4]`. +Monotonicity means unchanging (think monotone); a monotonic sequence is a sequence where the order of the value of the elements does not change. In other words, a monotonically-increasing sequence is a sequence whose values are sorted in ascending order and do not decrease. For example the sequences `[1, 6, 7, 11, 131]` or `[2, 5, 5, 5, 6, 10]`.. -Monotonically-increasing values often appear in primary keys generated by production systems. In an analytics engineering context, you should avoid generating such values or assuming their existence in your models, because they make it more difficult to create an data model. Instead you should create a which is derived from the unique component(s) of a row. \ No newline at end of file +Monotonically-increasing values often appear in primary keys generated by production systems. In an analytics engineering context, you should avoid generating such values or assuming their existence in your models, because they make it more difficult to create an data model. Instead you should create a which is derived from the unique component(s) of a row. diff --git a/website/docusaurus.config.js b/website/docusaurus.config.js index 24030624290..d4115a700cc 100644 --- a/website/docusaurus.config.js +++ b/website/docusaurus.config.js @@ -47,10 +47,14 @@ var siteSettings = { onBrokenLinks: "throw", onBrokenMarkdownLinks: "throw", trailingSlash: false, + customFields: { + isVercel: process.env.REACT_APP_VERCEL + }, themeConfig: { docs:{ sidebar: { hideable: true, + autoCollapseCategories: true, }, }, image: "/img/avatar.png", diff --git a/website/functions/image-cache-wrapper.js b/website/functions/image-cache-wrapper.js new file mode 100644 index 00000000000..84f85c12a17 --- /dev/null +++ b/website/functions/image-cache-wrapper.js @@ -0,0 +1,18 @@ +// This function is used to break the cache on images +// preventing stale or broken images from being served + +import useDocusaurusContext from '@docusaurus/useDocusaurusContext'; + +const CACHE_VERSION = '2' + +export default function imageCacheWrapper(src) { + const { siteConfig: {customFields} } = useDocusaurusContext(); + + const cacheParam = customFields?.isVercel === '1' + ? `?v=${CACHE_VERSION}` + : `` + + return ( + src + cacheParam + ) +} diff --git a/website/sidebars.js b/website/sidebars.js index d1a6c4664e7..be4e20e75e1 100644 --- a/website/sidebars.js +++ b/website/sidebars.js @@ -8,6 +8,8 @@ const sidebarSettings = { link: { type: "doc", id: "docs/supported-data-platforms" }, items: [ "docs/connect-adapters", + "docs/verified-adapters", + "docs/trusted-adapters", "docs/community-adapters", "docs/contribute-core-adapters", ], @@ -228,7 +230,6 @@ const sidebarSettings = { label: "Build your DAG", collapsed: true, items: [ - "docs/build/sources", { type: "category", label: "Models", @@ -238,11 +239,15 @@ const sidebarSettings = { "docs/build/python-models", ], }, - "docs/build/seeds", "docs/build/snapshots", + "docs/build/seeds", + "docs/build/tests", + "docs/build/jinja-macros", + "docs/build/sources", "docs/build/exposures", "docs/build/metrics", "docs/build/groups", + "docs/build/analyses", ], }, { @@ -291,7 +296,6 @@ const sidebarSettings = { label: "Enhance your models", collapsed: true, items: [ - "docs/build/tests", "docs/build/materializations", "docs/build/incremental-models", ], @@ -301,11 +305,9 @@ const sidebarSettings = { label: "Enhance your code", collapsed: true, items: [ - "docs/build/jinja-macros", "docs/build/project-variables", "docs/build/environment-variables", "docs/build/packages", - "docs/build/analyses", "docs/build/hooks-operations", ], }, @@ -363,6 +365,7 @@ const sidebarSettings = { type: "category", label: "Collaborate with others", items: [ + "docs/collaborate/explore-projects", { type: "category", label: "Git version control", @@ -1090,6 +1093,7 @@ const sidebarSettings = { "guides/dbt-ecosystem/adapter-development/5-documenting-a-new-adapter", "guides/dbt-ecosystem/adapter-development/6-promoting-a-new-adapter", "guides/dbt-ecosystem/adapter-development/7-verifying-a-new-adapter", + "guides/dbt-ecosystem/adapter-development/8-building-a-trusted-adapter", ], }, { diff --git a/website/snippets/_adapters-trusted.md b/website/snippets/_adapters-trusted.md new file mode 100644 index 00000000000..10af0218e22 --- /dev/null +++ b/website/snippets/_adapters-trusted.md @@ -0,0 +1,8 @@ +
+ + + +
diff --git a/website/snippets/_adapters-verified.md b/website/snippets/_adapters-verified.md new file mode 100644 index 00000000000..7caf099b7d1 --- /dev/null +++ b/website/snippets/_adapters-verified.md @@ -0,0 +1,62 @@ +
+ + + + + + + + + + + + + + + + + + + + + + +
+ +* Install these adapters using the CLI as they're not currently supported in dbt Cloud.
diff --git a/website/snippets/_cloud-environments-info.md b/website/snippets/_cloud-environments-info.md index 9311dc25139..5388379dc34 100644 --- a/website/snippets/_cloud-environments-info.md +++ b/website/snippets/_cloud-environments-info.md @@ -65,6 +65,8 @@ If you're developing in the [dbt Cloud IDE](/docs/cloud/dbt-cloud-ide/develop-in - If the attribute doesn't exist, it will add the attribute or value pair to the profile. +Only the **top-level keys** are accepted in extended attributes. This means that if you want to change a specific sub-key value, you must provide the entire top-level key as a JSON block in your resulting YAML. For example, if you want to customize a particular field within a [service account JSON](/docs/core/connect-data-platform/bigquery-setup#service-account-json) for your BigQuery connection (like 'project_id' or 'client_email'), you need to provide an override for the entire top-level `keyfile_json` main key/attribute using extended attributes. Include the sub-fields as a nested JSON block. + The following code is an example of the types of attributes you can add in the **Extended Attributes** text box: ```yaml diff --git a/website/snippets/auth0-uri.md b/website/snippets/auth0-uri.md index 829aa310ba9..1187902f2e4 100644 --- a/website/snippets/auth0-uri.md +++ b/website/snippets/auth0-uri.md @@ -3,7 +3,8 @@ The URI used for SSO connections on multi-tenant dbt Cloud instances will vary b | Region | dbt Cloud Access URL | Auth0 SSO URI | Auth0 Entity ID * | |--------|-----------------------|-------------------------------|----------------------------------------| -| US | cloud.getdbt.com | auth.cloud.getdbt.com | us-production-mt | +| US multi-tenant | cloud.getdbt.com | auth.cloud.getdbt.com | us-production-mt | +| US cell 1 | {account prefix}.us1.dbt.com | auth.cloud.getdbt.com | us-production-mt | | EMEA | emea.dbt.com | auth.emea.dbt.com | emea-production-mt | | APAC | au.dbt.com | auth.au.dbt.com | au-production-mt | diff --git a/website/snippets/cloud-feature-parity.md b/website/snippets/cloud-feature-parity.md index 31644343908..7bc6c91e9ba 100644 --- a/website/snippets/cloud-feature-parity.md +++ b/website/snippets/cloud-feature-parity.md @@ -5,10 +5,10 @@ The following table outlines which dbt Cloud features are supported on the diffe | Scheduler | ✅ | ✅ | ✅ | | Cloud IDE | ✅ | ✅ | ✅ | | Audit logs | ✅ | ✅ | ✅ | -| Discovery API | ✅ | ✅ (select customers) | ❌ | -| Webhooks (Outbound) | ✅ | ❌ | ❌ | -| Continuous Integration, including CI jobs | ✅ | ✅ | ✅ | +| Discovery API | ✅ | ✅ | ❌ | +| Webhooks (Outbound) | ✅ | ✅ | ❌ | +| Continuous Integration, including CI jobs | ✅ | ✅ | ✅ | | Semantic Layer | ✅ (North America Only) | ❌ | ❌ | | IP Restrictions | ✅ | ✅ | ✅ | -| PrivateLink egress | ✅ | ✅ | ✅ | +| PrivateLink egress | ✅ (AWS only)| ✅ | ✅ | | PrivateLink ingress | ❌ | ✅ | ✅ | diff --git a/website/snippets/core-versions-table.md b/website/snippets/core-versions-table.md index fb2e2a5d60e..7860ac90cb3 100644 --- a/website/snippets/core-versions-table.md +++ b/website/snippets/core-versions-table.md @@ -2,13 +2,14 @@ | dbt Core | Initial Release | Support Level | Critical Support Until | |------------------------------------------------------------|-----------------|----------------|-------------------------| +| [**v1.7**](/guides/migration/versions/upgrading-to-v1.7) (beta)| Oct 26, 2023 | - | - | | [**v1.6**](/guides/migration/versions/upgrading-to-v1.6) | Jul 31, 2023 | Active | Jul 30, 2024 | | [**v1.5**](/guides/migration/versions/upgrading-to-v1.5) | Apr 27, 2023 | Critical | Apr 27, 2024 | | [**v1.4**](/guides/migration/versions/upgrading-to-v1.4) | Jan 25, 2023 | Critical | Jan 25, 2024 | | [**v1.3**](/guides/migration/versions/upgrading-to-v1.3) | Oct 12, 2022 | Critical | Oct 12, 2023 | | [**v1.2**](/guides/migration/versions/upgrading-to-v1.2) | Jul 26, 2022 | End of Life* ⚠️ | Jul 26, 2023 | | [**v1.1**](/guides/migration/versions/upgrading-to-v1.1) ⚠️ | Apr 28, 2022 | End of Life* ⚠️ | Apr 28, 2023 | -| [**v1.0**](/guides/migration/versions/upgrading-to-v1.0) ⚠️ | Dec 3, 2021 | End of Life* ⚠️ | Dec 3, 2022 ⚠️ | +| [**v1.0**](/guides/migration/versions/upgrading-to-v1.0) ⚠️ | Dec 3, 2021 | Deprecated ⛔️ | Deprecated ⛔️ | | **v0.X** ⛔️ | (Various dates) | Deprecated ⛔️ | Deprecated ⛔️ | _*All versions of dbt Core since v1.0 are available in dbt Cloud until further notice. Versions that are EOL do not receive any fixes. For the best support, we recommend upgrading to a version released within the past 12 months._ ### Planned future releases @@ -17,7 +18,6 @@ _Future release dates are tentative and subject to change._ | dbt Core | Planned Release | Critical & dbt Cloud Support Until | |----------|-----------------|-------------------------------------| -| **v1.6** | _July 2023_ | _July 2024_ | | **v1.7** | _Oct 2023_ | _Oct 2024_ | | **v1.8** | _Jan 2024_ | _Jan 2025_ | | **v1.9** | _Apr 2024_ | _Apr 2025_ | diff --git a/website/src/components/author/index.js b/website/src/components/author/index.js index a8b7ad7c0ef..6b49295936d 100644 --- a/website/src/components/author/index.js +++ b/website/src/components/author/index.js @@ -4,6 +4,7 @@ import Link from '@docusaurus/Link'; import useDocusaurusContext from '@docusaurus/useDocusaurusContext'; import BlogLayout from '@theme/BlogLayout'; import getAllPosts from '../../utils/get-all-posts'; +import imageCacheWrapper from '../../../functions/image-cache-wrapper'; function Author(props) { const { authorData } = props @@ -38,7 +39,7 @@ function Author(props) { itemType="http://schema.org/Person">
- {name} + {name}

{name}

diff --git a/website/src/components/communitySpotlightCard/index.js b/website/src/components/communitySpotlightCard/index.js index 06b77aa2be2..08707a93dd4 100644 --- a/website/src/components/communitySpotlightCard/index.js +++ b/website/src/components/communitySpotlightCard/index.js @@ -1,6 +1,7 @@ import React from 'react' import Link from '@docusaurus/Link'; import styles from './styles.module.css'; +import imageCacheWrapper from '../../../functions/image-cache-wrapper'; const SpotlightWrapper = ({ isSpotlightMember, frontMatter, children }) => { return isSpotlightMember ? ( @@ -55,13 +56,13 @@ function CommunitySpotlightCard({ frontMatter, isSpotlightMember = false }) {
{id && isSpotlightMember ? ( {title} ) : ( {title} diff --git a/website/src/components/discourse/index.js b/website/src/components/discourse/index.js index 18e4d3e7254..759903a175f 100644 --- a/website/src/components/discourse/index.js +++ b/website/src/components/discourse/index.js @@ -1,6 +1,7 @@ import React, { useState, useEffect } from 'react' import axios from 'axios' import feedStyles from './styles.module.css'; +import useDocusaurusContext from '@docusaurus/useDocusaurusContext'; // Bare component with no default props set export const DiscourseFeed = ({ @@ -24,6 +25,8 @@ export const DiscourseFeed = ({ styles = {} }) => { + const { siteConfig: {customFields} } = useDocusaurusContext(); + const [topics, setTopics] = useState([]) const [loading, setLoading] = useState(true) const [isError, setIsError] = useState(false) @@ -38,10 +41,10 @@ export const DiscourseFeed = ({ setLoading(true) setIsError(false) - // Build Netlify Function endpoint - const endpoint = window?.location?.hostname?.includes('localhost') - ? 'http://localhost:8888/.netlify/functions/get-discourse-topics' - : '/.netlify/functions/get-discourse-topics' + // Build function endpoint + const endpoint = customFields?.isVercel === '1' + ? `/api/get-discourse-topics` + : `/.netlify/functions/get-discourse-topics` // If 'after' prop not passed in, set relative after date let afterDate = after diff --git a/website/src/components/discourseBlogComments/index.js b/website/src/components/discourseBlogComments/index.js index 63279285f2a..091f1047cb7 100644 --- a/website/src/components/discourseBlogComments/index.js +++ b/website/src/components/discourseBlogComments/index.js @@ -2,9 +2,12 @@ import React, { useState, useEffect } from 'react' import styles from './styles.module.css' import axios from 'axios' import sanitizeHtml from 'sanitize-html'; +import useDocusaurusContext from '@docusaurus/useDocusaurusContext'; export const DiscourseBlogComments = ({title,slug}) => { + const { siteConfig: {customFields} } = useDocusaurusContext(); + const DISCOURSE_TOPIC_ENDPOINT = `https://discourse.getdbt.com/t/` const commentsToLoad = 6 @@ -28,10 +31,10 @@ export const DiscourseBlogComments = ({title,slug}) => { const fetchData = async () => { try { - const endpoint = window?.location?.hostname?.includes('localhost') - ? `http://localhost:8888/.netlify/functions/get-discourse-comments?title=${title}&slug=${slug}` - : `/.netlify/functions/get-discourse-comments?title=${title}&slug=${slug}` - + const endpoint = customFields?.isVercel === '1' + ? `/api/get-discourse-comments?title=${title}&slug=${slug}` + : `/.netlify/functions/get-discourse-comments?title=${title}&slug=${slug}` + const { data } = await axios.get(endpoint) // Set error state if data not available diff --git a/website/src/components/lightbox/index.js b/website/src/components/lightbox/index.js index b4c2da3c905..1c748bbb04f 100644 --- a/website/src/components/lightbox/index.js +++ b/website/src/components/lightbox/index.js @@ -1,5 +1,6 @@ import React from 'react'; import styles from './styles.module.css'; +import imageCacheWrapper from '../../../functions/image-cache-wrapper'; function Lightbox({ src, @@ -35,7 +36,7 @@ function Lightbox({ data-toggle="lightbox" alt={alt ? alt : title ? title : ''} title={title ? title : ''} - src={src} + src={imageCacheWrapper(src)} /> diff --git a/website/src/components/quickstartTOC/index.js b/website/src/components/quickstartTOC/index.js index 49209273964..8c9b8fba910 100644 --- a/website/src/components/quickstartTOC/index.js +++ b/website/src/components/quickstartTOC/index.js @@ -26,16 +26,6 @@ function QuickstartTOC() { const steps = quickstartContainer.querySelectorAll("h2"); const snippetContainer = document.querySelectorAll(".snippet"); - // Add snippet container to its parent step - snippetContainer.forEach((snippet) => { - const parent = snippet?.parentNode; - while (snippet?.firstChild && parent.className) { - if (parent) { - parent.insertBefore(snippet.firstChild, snippet); - } - } - }); - // Create an array of objects with the id and title of each step const data = Array.from(steps).map((step, index) => ({ id: step.id, @@ -49,6 +39,16 @@ function QuickstartTOC() { // Wrap all h2 (steps), along with all of their direct siblings, in a div until the next h2 if (mounted) { + // Add snippet container to its parent step + snippetContainer.forEach((snippet) => { + const parent = snippet?.parentNode; + while (snippet?.firstChild && parent.className) { + if (parent) { + parent.insertBefore(snippet.firstChild, snippet); + } + } + }); + steps.forEach((step, index) => { const wrapper = document.createElement("div"); wrapper.classList.add(style.stepWrapper); diff --git a/website/src/theme/BlogPostItem/Header/Author/index.js b/website/src/theme/BlogPostItem/Header/Author/index.js index a37d9e9985a..f82428df789 100644 --- a/website/src/theme/BlogPostItem/Header/Author/index.js +++ b/website/src/theme/BlogPostItem/Header/Author/index.js @@ -1,6 +1,7 @@ import React from 'react'; import clsx from 'clsx'; import Link from '@docusaurus/Link'; +import imageCacheWrapper from '../../../../../functions/image-cache-wrapper'; function MaybeLink(props) { if (props.href || props.slug) { return ; @@ -21,7 +22,7 @@ export default function BlogPostItemHeaderAuthor({author, className}) {
{imageURL && ( - {name} + {name} )} diff --git a/website/static/img/docs/collaborate/dbt-explorer/catalog-sidebar-v1.gif b/website/static/img/docs/collaborate/dbt-explorer/catalog-sidebar-v1.gif new file mode 100644 index 00000000000..458aa8e874d Binary files /dev/null and b/website/static/img/docs/collaborate/dbt-explorer/catalog-sidebar-v1.gif differ diff --git a/website/static/img/docs/collaborate/dbt-explorer/lineage-v1.gif b/website/static/img/docs/collaborate/dbt-explorer/lineage-v1.gif new file mode 100644 index 00000000000..2772eaa9619 Binary files /dev/null and b/website/static/img/docs/collaborate/dbt-explorer/lineage-v1.gif differ diff --git a/website/static/img/docs/collaborate/dbt-explorer/model-resource-details-v1.gif b/website/static/img/docs/collaborate/dbt-explorer/model-resource-details-v1.gif new file mode 100644 index 00000000000..24c8312af11 Binary files /dev/null and b/website/static/img/docs/collaborate/dbt-explorer/model-resource-details-v1.gif differ diff --git a/website/static/img/docs/collaborate/dbt-explorer/search-v1.gif b/website/static/img/docs/collaborate/dbt-explorer/search-v1.gif new file mode 100644 index 00000000000..1343f58171d Binary files /dev/null and b/website/static/img/docs/collaborate/dbt-explorer/search-v1.gif differ diff --git a/website/static/img/docs/dbt-cloud/deployment/run-overview.jpg b/website/static/img/docs/dbt-cloud/deployment/run-overview.jpg new file mode 100644 index 00000000000..8ab14b8ce2b Binary files /dev/null and b/website/static/img/docs/dbt-cloud/deployment/run-overview.jpg differ diff --git a/website/static/img/docs/release-notes/ci-job-setup.gif b/website/static/img/docs/release-notes/ci-job-setup.gif new file mode 100644 index 00000000000..56beea4ab88 Binary files /dev/null and b/website/static/img/docs/release-notes/ci-job-setup.gif differ diff --git a/website/static/img/docs/terms/data-lineage/dag_example.jpg b/website/static/img/docs/terms/data-lineage/dag_example.jpg new file mode 100644 index 00000000000..3d1e4153590 Binary files /dev/null and b/website/static/img/docs/terms/data-lineage/dag_example.jpg differ diff --git a/website/static/img/icons/athena.svg b/website/static/img/icons/athena.svg new file mode 100644 index 00000000000..c2c6a81dd64 --- /dev/null +++ b/website/static/img/icons/athena.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/website/static/img/icons/white/athena.svg b/website/static/img/icons/white/athena.svg new file mode 100644 index 00000000000..c2c6a81dd64 --- /dev/null +++ b/website/static/img/icons/white/athena.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/website/vercel.json b/website/vercel.json new file mode 100644 index 00000000000..c5fb0638fba --- /dev/null +++ b/website/vercel.json @@ -0,0 +1,3996 @@ +{ + "cleanUrls": true, + "trailingSlash": false, + "redirects": [ + { + "source": "/docs/deploy/job-triggers", + "destination": "/docs/deploy/deploy-jobs", + "permanent": true + }, + { + "source": "/docs/deploy/job-settings", + "destination": "/docs/deploy/deploy-jobs", + "permanent": true + }, + { + "source": "/docs/deploy/dbt-cloud-job", + "destination": "/docs/deploy/deploy-jobs", + "permanent": true + }, + { + "source": "/faqs/environments/beta-release", + "destination": "/docs/dbt-versions/product-lifecycles", + "permanent": true + }, + { + "source": "/docs/deploy/slim-ci-jobs", + "destination": "/docs/deploy/ci-jobs", + "permanent": true + }, + { + "source": "/guides/dbt-ecosystem/sl-partner-integration-guide", + "destination": "/docs/use-dbt-semantic-layer/avail-sl-integrations", + "permanent": true + }, + { + "source": "/docs/use-dbt-semantic-layer/dbt-semantic-layer", + "destination": "/docs/use-dbt-semantic-layer/dbt-sl", + "permanent": true + }, + { + "source": "/docs/use-dbt-semantic-layer/set-up-semantic-layer", + "destination": "/docs/use-dbt-semantic-layer/setup-sl", + "permanent": true + }, + { + "source": "/docs/use-dbt-semantic-layer/setup-dbt-semantic-layer", + "destination": "/docs/use-dbt-semantic-layer/setup-sl", + "permanent": true + }, + { + "source": "/docs/use-dbt-semantic-layer/quickstart-semantic-layer", + "destination": "/docs/use-dbt-semantic-layer/quickstart-sl", + "permanent": true + }, + { + "source": "/docs/collaborate/environments/environments-in-dbt", + "destination": "/docs/environments-in-dbt", + "permanent": true + }, + { + "source": "/docs/collaborate/environments/dbt-cloud-environments", + "destination": "/docs/deploy/dbt-cloud-environments", + "permanent": true + }, + { + "source": "/docs/collaborate/environments/dbt-core-environments", + "destination": "/docs/core/dbt-core-environments", + "permanent": true + }, + { + "source": "/docs/cloud/manage-access/licenses-and-groups", + "destination": "/docs/cloud/manage-access/about-user-access", + "permanent": true + }, + { + "source": "/docs/deploy/cloud-ci-job", + "destination": "/docs/deploy/continuous-integration", + "permanent": true + }, + { + "source": "/docs/quickstarts/dbt-cloud/bigquery", + "destination": "/quickstarts/bigquery", + "permanent": true + }, + { + "source": "/docs/quickstarts/dbt-cloud/databricks", + "destination": "/quickstarts/databricks", + "permanent": true + }, + { + "source": "/docs/quickstarts/dbt-cloud/redshift", + "destination": "/quickstarts/redshift", + "permanent": true + }, + { + "source": "/docs/quickstarts/dbt-cloud/snowflake", + "destination": "/quickstarts/snowflake", + "permanent": true + }, + { + "source": "/docs/quickstarts/dbt-cloud/starburst-galaxy", + "destination": "/quickstarts/starburst-galaxy", + "permanent": true + }, + { + "source": "/docs/quickstarts/dbt-core/codespace", + "destination": "/quickstarts/codespace", + "permanent": true + }, + { + "source": "/docs/quickstarts/dbt-core/manual-install", + "destination": "/quickstarts/manual-install", + "permanent": true + }, + { + "source": "/docs/deploy/project-state", + "destination": "/reference/node-selection/syntax", + "permanent": true + }, + { + "source": "/reference/global-configs", + "destination": "/reference/global-configs/about-global-configs", + "permanent": true + }, + { + "source": "/docs/quickstarts/overview", + "destination": "/quickstarts", + "permanent": true + }, + { + "source": "/docs/supported-data-platforms#verified-adapters", + "destination": "/docs/supported-data-platforms", + "permanent": true + }, + { + "source": "/docs/supported-data-platforms#community-adapters", + "destination": "/docs/community-adapters", + "permanent": true + }, + { + "source": "/docs/supported-data-platforms#adapter-installation", + "destination": "/docs/connect-adapters", + "permanent": true + }, + { + "source": "/docs/supported-data-platforms#adapter-taxonomy", + "destination": "/docs/supported-data-platforms", + "permanent": true + }, + { + "source": "/docs/supported-data-platforms#verified-by-dbt-labs", + "destination": "/docs/supported-data-platforms", + "permanent": true + }, + { + "source": "/docs/supported-data-platforms#maintainers", + "destination": "/docs/connect-adapters#maintainers", + "permanent": true + }, + { + "source": "/docs/supported-data-platforms#contributing-to-dbt-core-adapters", + "destination": "/docs/contribute-core-adapters", + "permanent": true + }, + { + "source": "/docs/supported-data-platforms#contributing-to-a-pre-existing-adapter", + "destination": "/docs/contribute-core-adapters#contribute-to-a-pre-existing-adapter", + "permanent": true + }, + { + "source": "/docs/supported-data-platforms#creating-a-new-adapter", + "destination": "/docs/contribute-core-adapters#create-a-new-adapter", + "permanent": true + }, + { + "source": "/docs/core/connection-profiles", + "destination": "/docs/core/connect-data-platform/connection-profiles", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/bigquery-setup", + "destination": "/docs/core/connect-data-platform/bigquery-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/postgres-setup", + "destination": "/docs/core/connect-data-platform/postgres-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/redshift-setup", + "destination": "/docs/core/connect-data-platform/redshift-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/snowflake-setup", + "destination": "/docs/core/connect-data-platform/snowflake-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/mssql-setup", + "destination": "/docs/core/connect-data-platform/mssql-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/trino-setup", + "destination": "/docs/core/connect-data-platform/trino-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/singlestore-setup", + "destination": "/docs/core/connect-data-platform/singlestore-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/spark-setup", + "destination": "/docs/core/connect-data-platform/spark-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/databricks-setup", + "destination": "/docs/core/connect-data-platform/databricks-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/hive-setup", + "destination": "/docs/core/connect-data-platform/hive-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/exasol-setup", + "destination": "/docs/core/connect-data-platform/exasol-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/oracle-setup", + "destination": "/docs/core/connect-data-platform/oracle-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/azuresynapse-setup", + "destination": "/docs/core/connect-data-platform/azuresynapse-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/dremio-setup", + "destination": "/docs/core/connect-data-platform/dremio-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/clickhouse-setup", + "destination": "/docs/core/connect-data-platform/clickhouse-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/materialize-setup", + "destination": "/docs/core/connect-data-platform/materialize-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/rockset-setup", + "destination": "/docs/core/connect-data-platform/rockset-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/firebolt-setup", + "destination": "/docs/core/connect-data-platform/firebolt-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/teradata-setup", + "destination": "/docs/core/connect-data-platform/teradata-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/athena-setup", + "destination": "/docs/core/connect-data-platform/athena-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/vertica-setup", + "destination": "/docs/core/connect-data-platform/vertica-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/tidb-setup", + "destination": "/docs/core/connect-data-platform/tidb-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/glue-setup", + "destination": "/docs/core/connect-data-platform/glue-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/mindsdb-setup", + "destination": "/docs/core/connect-data-platform/mindsdb-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/greenplum-setup", + "destination": "/docs/core/connect-data-platform/greenplum-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/impala-setup", + "destination": "/docs/core/connect-data-platform/impala-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/layer-setup", + "destination": "/docs/core/connect-data-platform/layer-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/iomete-setup", + "destination": "/docs/core/connect-data-platform/iomete-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/duckdb-setup", + "destination": "/docs/core/connect-data-platform/duckdb-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/sqlite-setup", + "destination": "/docs/core/connect-data-platform/sqlite-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/mysql-setup", + "destination": "/docs/core/connect-data-platform/mysql-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/ibmdb2-setup", + "destination": "/docs/core/connect-data-platform/ibmdb2-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/alloydb-setup", + "destination": "/docs/core/connect-data-platform/alloydb-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/doris-setup", + "destination": "/docs/core/connect-data-platform/doris-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/infer-setup", + "destination": "/docs/core/connect-data-platform/infer-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/databend-setup", + "destination": "/docs/core/connect-data-platform/databend-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/fal-setup", + "destination": "/docs/core/connect-data-platform/fal-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/decodable-setup", + "destination": "/docs/core/connect-data-platform/decodable-setup", + "permanent": true + }, + { + "source": "/docs/dbt-cloud-apis/metadata-schema-source", + "destination": "/docs/dbt-cloud-apis/discovery-schema-source", + "permanent": true + }, + { + "source": "/docs/dbt-cloud-apis/metadata-schema-sources", + "destination": "/docs/dbt-cloud-apis/discovery-schema-sources", + "permanent": true + }, + { + "source": "/docs/dbt-cloud-apis/metadata-schema-test", + "destination": "/docs/dbt-cloud-apis/discovery-schema-test", + "permanent": true + }, + { + "source": "/docs/dbt-cloud-apis/metadata-schema-tests", + "destination": "/docs/dbt-cloud-apis/discovery-schema-tests", + "permanent": true + }, + { + "source": "/docs/dbt-cloud-apis/metadata-schema-seed", + "destination": "/docs/dbt-cloud-apis/discovery-schema-seed", + "permanent": true + }, + { + "source": "/docs/dbt-cloud-apis/metadata-schema-seeds", + "destination": "/docs/dbt-cloud-apis/discovery-schema-seeds", + "permanent": true + }, + { + "source": "/docs/dbt-cloud-apis/metadata-schema-snapshots", + "destination": "/docs/dbt-cloud-apis/discovery-schema-snapshots", + "permanent": true + }, + { + "source": "/docs/dbt-cloud-apis/metadata-schema-model", + "destination": "/docs/dbt-cloud-apis/discovery-schema-model", + "permanent": true + }, + { + "source": "/docs/dbt-cloud-apis/metadata-schema-models", + "destination": "/docs/dbt-cloud-apis/discovery-schema-models", + "permanent": true + }, + { + "source": "/docs/dbt-cloud-apis/metadata-schema-modelByEnv", + "destination": "/docs/dbt-cloud-apis/discovery-schema-modelByEnv", + "permanent": true + }, + { + "source": "/docs/dbt-cloud-apis/metadata-schema-metrics", + "destination": "/docs/dbt-cloud-apis/discovery-schema-metrics", + "permanent": true + }, + { + "source": "/docs/dbt-cloud-apis/metadata-schema-metric", + "destination": "/docs/dbt-cloud-apis/discovery-schema-metric", + "permanent": true + }, + { + "source": "/docs/dbt-cloud-apis/metadata-schema-exposures", + "destination": "/docs/dbt-cloud-apis/discovery-schema-exposures", + "permanent": true + }, + { + "source": "/docs/dbt-cloud-apis/metadata-schema-exposure", + "destination": "/docs/dbt-cloud-apis/discovery-schema-exposure", + "permanent": true + }, + { + "source": "/docs/dbt-cloud-apis/metadata-use-case-guides", + "destination": "/docs/dbt-cloud-apis/discovery-use-cases-and-examples", + "permanent": true + }, + { + "source": "/docs/dbt-cloud-apis/metadata-api", + "destination": "/docs/dbt-cloud-apis/discovery-api", + "permanent": true + }, + { + "source": "/docs/dbt-cloud-apis/metadata-querying", + "destination": "/docs/dbt-cloud-apis/discovery-querying", + "permanent": true + }, + { + "source": "/docs/core/connection-profiles#understanding-threads", + "destination": "/docs/running-a-dbt-project/using-threads", + "permanent": true + }, + { + "source": "/docs/cloud/privatelink/about-privatelink", + "destination": "/docs/cloud/secure/about-privatelink", + "permanent": true + }, + { + "source": "/docs/cloud/privatelink/snowflake-privatelink", + "destination": "/docs/cloud/secure/about-privatelink", + "permanent": true + }, + { + "source": "/docs/cloud/privatelink/redshift-privatelink", + "destination": "/docs/cloud/secure/about-privatelink", + "permanent": true + }, + { + "source": "/docs/cloud/privatelink/databricks-privatelink", + "destination": "/docs/cloud/secure/about-privatelink", + "permanent": true + }, + { + "source": "/docs/cloud/privatelink/ip-restrictions", + "destination": "/docs/cloud/secure/about-privatelink", + "permanent": true + }, + { + "source": "/docs/deploy/dbt-cloud-job#create-and-schedule-jobs", + "destination": "/docs/deploy/dbt-cloud-job#create-and-schedule-jobs", + "permanent": true + }, + { + "source": "/docs/cloud/dbt-cloud-tips", + "destination": "/docs/cloud/dbt-cloud-ide/dbt-cloud-tips", + "permanent": true + }, + { + "source": "/docs/cloud/develop-in-the-cloud", + "destination": "/docs/cloud/dbt-cloud-ide/develop-in-the-cloud", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/using-dbt-cloud/cloud-model-timing-tab", + "destination": "/docs/deploy/dbt-cloud-job#model-timing", + "permanent": true + }, + { + "source": "/docs/quickstarts/dbt-core/quickstart", + "destination": "/quickstarts/manual-install", + "permanent": true + }, + { + "source": "/docs/dbt-versions/release-notes/January-2022/model-timing-more", + "destination": "/docs/deploy/dbt-cloud-job#model-timing", + "permanent": true + }, + { + "source": "/docs/deploy/deployments#dbt-cloud", + "destination": "/docs/deploy/dbt-cloud-job", + "permanent": true + }, + { + "source": "/docs/deploy/deployments#airflow", + "destination": "/docs/deploy/deployment-tools", + "permanent": true + }, + { + "source": "/docs/deploy/deployments#prefect", + "destination": "/docs/deploy/deployment-tools", + "permanent": true + }, + { + "source": "/docs/deploy/deployments#run-dbt-in-production", + "destination": "/docs/deploy/deployments", + "permanent": true + }, + { + "source": "/docs/deploy/deployments#on-prefect-2", + "destination": "/docs/deploy/deployment-tools", + "permanent": true + }, + { + "source": "/docs/deploy/deployments#on-prefect-1", + "destination": "/docs/deploy/deployment-tools", + "permanent": true + }, + { + "source": "/docs/deploy/deployments#dagster", + "destination": "/docs/deploy/deployment-tools", + "permanent": true + }, + { + "source": "/docs/deploy/deployments#automation-servers", + "destination": "/docs/deploy/deployment-tools", + "permanent": true + }, + { + "source": "/docs/deploy/deployments#cron", + "destination": "/docs/deploy/deployment-tools", + "permanent": true + }, + { + "source": "/docs/collaborate/manage-access/enterprise-permissions#permission-sets", + "destination": "/docs/cloud/manage-access/enterprise-permissions#permission-sets", + "permanent": true + }, + { + "source": "/docs/get-started/privatelink/about-privatelink", + "destination": "/docs/cloud/privatelink/about-privatelink", + "permanent": true + }, + { + "source": "/docs/get-started/privatelink/snowflake-privatelink", + "destination": "/docs/cloud/privatelink/snowflake-privatelink", + "permanent": true + }, + { + "source": "/docs/get-started/privatelink/redshift-privatelink", + "destination": "/docs/cloud/privatelink/redshift-privatelink", + "permanent": true + }, + { + "source": "/docs/get-started/privatelink/databricks-privatelink", + "destination": "/docs/cloud/privatelink/databricks-privatelink", + "permanent": true + }, + { + "source": "/docs/get-started/dbt-cloud-features", + "destination": "/docs/cloud/about-cloud/dbt-cloud-features", + "permanent": true + }, + { + "source": "/docs/deploy/regions-ip-addresses", + "destination": "/docs/cloud/about-cloud/regions-ip-addresses", + "permanent": true + }, + { + "source": "/docs/deploy/architecture", + "destination": "/docs/cloud/about-cloud/architecture", + "permanent": true + }, + { + "source": "/docs/deploy/single-tenant", + "destination": "/docs/cloud/about-cloud/tenancy", + "permanent": true + }, + { + "source": "/docs/deploy/multi-tenant", + "destination": "/docs/cloud/about-cloud/tenancy", + "permanent": true + }, + { + "source": "/docs/cloud/manage-access/about-access", + "destination": "/docs/cloud/manage-access/about-user-access", + "permanent": true + }, + { + "source": "/docs/collaborate/git/connect-github", + "destination": "/docs/cloud/git/connect-github", + "permanent": true + }, + { + "source": "/docs/collaborate/git/connect-gitlab", + "destination": "/docs/cloud/git/connect-gitlab", + "permanent": true + }, + { + "source": "/docs/collaborate/git/connect-azure-devops", + "destination": "/docs/cloud/git/connect-azure-devops", + "permanent": true + }, + { + "source": "/docs/collaborate/git/setup-azure", + "destination": "/docs/cloud/git/setup-azure", + "permanent": true + }, + { + "source": "/docs/collaborate/git/authenticate-azure", + "destination": "/docs/cloud/git/authenticate-azure", + "permanent": true + }, + { + "source": "/docs/collaborate/git/import-a-project-by-git-url", + "destination": "/docs/cloud/git/import-a-project-by-git-url", + "permanent": true + }, + { + "source": "/docs/collaborate/publish/about-publishing-models", + "destination": "/docs/collaborate/govern/about-model-governance", + "permanent": true + }, + { + "source": "/docs/collaborate/publish/model-contracts", + "destination": "/docs/collaborate/govern/model-contracts", + "permanent": true + }, + { + "source": "/docs/collaborate/publish/model-access", + "destination": "/docs/collaborate/govern/model-access", + "permanent": true + }, + { + "source": "/docs/collaborate/publish/model-versions", + "destination": "/docs/collaborate/govern/model-versions", + "permanent": true + }, + { + "source": "/docs/collaborate/manage-access/about-access", + "destination": "/docs/cloud/manage-access/about-user-access", + "permanent": true + }, + { + "source": "/docs/collaborate/manage-access/seats-and-users", + "destination": "/docs/cloud/manage-access/seats-and-users", + "permanent": true + }, + { + "source": "/docs/collaborate/manage-access/self-service-permissions", + "destination": "/docs/cloud/manage-access/self-service-permissions", + "permanent": true + }, + { + "source": "/docs/collaborate/manage-access/enterprise-permissions", + "destination": "/docs/cloud/manage-access/enterprise-permissions", + "permanent": true + }, + { + "source": "/docs/collaborate/manage-access/sso-overview", + "destination": "/docs/cloud/manage-access/sso-overview", + "permanent": true + }, + { + "source": "/docs/collaborate/manage-access/set-up-sso-saml-2.0", + "destination": "/docs/cloud/manage-access/set-up-sso-saml-2.0", + "permanent": true + }, + { + "source": "/docs/collaborate/manage-access/set-up-sso-okta", + "destination": "/docs/cloud/manage-access/set-up-sso-okta", + "permanent": true + }, + { + "source": "/docs/collaborate/manage-access/set-up-sso-google-workspace", + "destination": "/docs/cloud/manage-access/set-up-sso-google-workspace", + "permanent": true + }, + { + "source": "/docs/collaborate/manage-access/set-up-sso-azure-active-directory", + "destination": "/docs/cloud/manage-access/set-up-sso-azure-active-directory", + "permanent": true + }, + { + "source": "/docs/collaborate/manage-access/set-up-snowflake-oauth", + "destination": "/docs/cloud/manage-access/set-up-snowflake-oauth", + "permanent": true + }, + { + "source": "/docs/collaborate/manage-access/set-up-bigquery-oauth", + "destination": "/docs/cloud/manage-access/set-up-bigquery-oauth", + "permanent": true + }, + { + "source": "/docs/collaborate/manage-access/audit-log", + "destination": "/docs/cloud/manage-access/audit-log", + "permanent": true + }, + { + "source": "/docs/get-started/develop-in-the-cloud", + "destination": "/docs/cloud/develop-in-the-cloud", + "permanent": true + }, + { + "source": "/docs/get-started/dbt-cloud-tips", + "destination": "/docs/cloud/dbt-cloud-tips", + "permanent": true + }, + { + "source": "/docs/get-started/installation", + "destination": "/docs/core/installation", + "permanent": true + }, + { + "source": "/docs/get-started/about-the-cli", + "destination": "/docs/core/about-the-cli", + "permanent": true + }, + { + "source": "/docs/get-started/homebrew-install", + "destination": "/docs/core/homebrew-install", + "permanent": true + }, + { + "source": "/docs/get-started/pip-install", + "destination": "/docs/core/pip-install", + "permanent": true + }, + { + "source": "/docs/get-started/docker-install", + "destination": "/docs/core/docker-install", + "permanent": true + }, + { + "source": "/docs/get-started/source-install", + "destination": "/docs/core/source-install", + "permanent": true + }, + { + "source": "/docs/get-started/connection-profiles", + "destination": "/docs/core/connection-profiles", + "permanent": true + }, + { + "source": "/docs/get-started/run-your-dbt-projects", + "destination": "/docs/running-a-dbt-project/run-your-dbt-projects", + "permanent": true + }, + { + "source": "/docs/get-started/learning-more/refactoring-legacy-sql", + "destination": "/guides/migration/tools/refactoring-legacy-sql", + "permanent": true + }, + { + "source": "/docs/get-started/learning-more/using-jinja", + "destination": "/guides/advanced/using-jinja", + "permanent": true + }, + { + "source": "/docs/get-started/getting-started/set-up-dbt-cloud", + "destination": "/quickstarts", + "permanent": true + }, + { + "source": "/docs/get-started/getting-started/getting-set-up/setting-up-snowflake", + "destination": "/docs/quickstarts/dbt-cloud/snowflake", + "permanent": true + }, + { + "source": "/docs/get-started/getting-started/getting-set-up/setting-up-redshift", + "destination": "/docs/quickstarts/dbt-cloud/redshift", + "permanent": true + }, + { + "source": "/docs/get-started/getting-started/getting-set-up/setting-up-databricks", + "destination": "/quickstarts/databricks", + "permanent": true + }, + { + "source": "/docs/get-started/getting-started/getting-set-up/setting-up-bigquery", + "destination": "/docs/quickstarts/dbt-cloud/bigquery", + "permanent": true + }, + { + "source": "/docs/get-started/getting-started/getting-set-up/setting-up-databricks", + "destination": "/quickstarts/databricks", + "permanent": true + }, + { + "source": "/docs/get-started/getting-started/getting-set-up/setting-up-redshift", + "destination": "/docs/quickstarts/dbt-cloud/redshift", + "permanent": true + }, + { + "source": "/docs/get-started/getting-started/getting-set-up/setting-up-snowflake", + "destination": "/docs/quickstarts/dbt-cloud/snowflake", + "permanent": true + }, + { + "source": "/docs/get-started/getting-started/building-your-first-project/schedule-a-job", + "destination": "/quickstarts/bigquery", + "permanent": true + }, + { + "source": "/docs/get-started/getting-started/building-your-first-project/test-and-document-your-project", + "destination": "/docs/quickstarts/dbt-cloud/bigquery#add-tests-to-your-models", + "permanent": true + }, + { + "source": "/docs/get-started/getting-started/building-your-first-project/build-your-first-models", + "destination": "/quickstarts/bigquery?step=8", + "permanent": true + }, + { + "source": "/docs/get-started/getting-started/overview", + "destination": "/quickstarts", + "permanent": true + }, + { + "source": "/docs/get-started/getting-started-dbt-core", + "destination": "/docs/quickstarts/dbt-core/quickstart", + "permanent": true + }, + { + "source": "/docs/get-started/develop-in-the-cloud#set-up-environments", + "destination": "/docs/get-started/develop-in-the-cloud", + "permanent": true + }, + { + "source": "/docs/get-started/develop-in-the-cloud#developer-credentials", + "destination": "/docs/get-started/develop-in-the-cloud", + "permanent": true + }, + { + "source": "/docs/getting-started/develop-in-the-cloud#setting-up-developer-credentials", + "destination": "/docs/get-started/develop-in-the-cloud", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/cloud-configuring-dbt-cloud/connecting-your-database#connecting-to-redshift-and-postgres", + "destination": "/docs/get-started/connect-your-database#connecting-to-postgres-redshift-and-alloydb", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/cloud-configuring-dbt-cloud/connecting-your-database#connecting-to-snowflake", + "destination": "/docs/get-started/connect-your-database#connecting-to-snowflake", + "permanent": true + }, + { + "source": "/docs/get-started/connect-your-database#connecting-to-snowflake", + "destination": "/docs/cloud/connect-data-platform/connect-snowflake", + "permanent": true + }, + { + "source": "/docs/get-started/connect-your-database#connecting-to-postgres-redshift-and-alloydb", + "destination": "/cloud/connect-data-platform/connect-redshift-postgresql-alloydb", + "permanent": true + }, + { + "source": "/docs/cloud/connect-data-platform/connect-your-database", + "destination": "/docs/cloud/connect-data-platform/about-connections", + "permanent": true + }, + { + "source": "/faqs/connecting-to-two-dbs-not-allowed", + "destination": "/faqs/warehouse/connecting-to-two-dbs-not-allowed", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/cloud-ide/ide-beta", + "destination": "/docs/get-started/develop-in-the-cloud", + "permanent": true + }, + { + "source": "/docs/quickstarts/dbt-cloud/bigquery", + "destination": "/quickstarts/bigquery", + "permanent": true + }, + { + "source": "/docs/quickstarts/dbt-cloud/redshift", + "destination": "/quickstarts/redshift", + "permanent": true + }, + { + "source": "/docs/quickstarts/dbt-cloud/snowflake", + "destination": "/quickstarts/snowflake", + "permanent": true + }, + { + "source": "/quickstarts/starburst-galaxy", + "destination": "/quickstarts/starburst-galaxy", + "permanent": true + }, + { + "source": "/quickstarts/codespace", + "destination": "/quickstarts/codespace", + "permanent": true + }, + { + "source": "/quickstarts/manual-install", + "destination": "/quickstarts/manual-install", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/using-dbt-cloud/cloud-model-timing-tab", + "destination": "/docs/get-started/dbt-cloud-features#model-timing-dashboard", + "permanent": true + }, + { + "source": "/docs/dbt-cloud", + "destination": "/docs/get-started/getting-started/set-up-dbt-cloud", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/cloud-configuring-dbt-cloud/cloud-choosing-a-dbt-version", + "destination": "/docs/dbt-versions/upgrade-core-in-cloud", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/cloud-ide/viewing-docs-in-the-ide", + "destination": "/docs/get-started/develop-in-the-cloud", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/cloud-overview", + "destination": "/docs/get-started/getting-started/set-up-dbt-cloud", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/using-dbt-cloud/artifacts", + "destination": "/docs/deploy/artifacts", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/building-models/python-models", + "destination": "/docs/build/python-models", + "permanent": true + }, + { + "source": "/docs/deploy/regions", + "destination": "/docs/deploy/regions-ip-addresses", + "permanent": true + }, + { + "source": "/advanced/adapter-development/1-what-are-adapters", + "destination": "/guides/dbt-ecosystem/adapter-development/1-what-are-adapters", + "permanent": true + }, + { + "source": "/advanced/adapter-development/2-prerequisites-for-a-new-adapter", + "destination": "/guides/dbt-ecosystem/adapter-development/2-prerequisites-for-a-new-adapter", + "permanent": true + }, + { + "source": "/advanced/adapter-development/3-building-a-new-adapter", + "destination": "/guides/dbt-ecosystem/adapter-development/3-building-a-new-adapter", + "permanent": true + }, + { + "source": "/advanced/adapter-development/4-testing-a-new-adapter", + "destination": "/guides/dbt-ecosystem/adapter-development/4-testing-a-new-adapter", + "permanent": true + }, + { + "source": "/advanced/adapter-development/5-documenting-a-new-adapter", + "destination": "/guides/dbt-ecosystem/adapter-development/5-documenting-a-new-adapter", + "permanent": true + }, + { + "source": "/advanced/adapter-development/6-promoting-a-new-adapter", + "destination": "/guides/dbt-ecosystem/adapter-development/6-promoting-a-new-adapter", + "permanent": true + }, + { + "source": "/advanced/adapter-development/7-verifying-a-new-adapter", + "destination": "/guides/dbt-ecosystem/adapter-development/7-verifying-a-new-adapter", + "permanent": true + }, + { + "source": "/guides/advanced/adapter-development/1-what-are-adapters", + "destination": "/guides/dbt-ecosystem/adapter-development/1-what-are-adapters", + "permanent": true + }, + { + "source": "/guides/advanced/adapter-development/2-prerequisites-for-a-new-adapter", + "destination": "/guides/dbt-ecosystem/adapter-development/2-prerequisites-for-a-new-adapter", + "permanent": true + }, + { + "source": "/guides/advanced/adapter-development/3-building-a-new-adapter", + "destination": "/guides/dbt-ecosystem/adapter-development/3-building-a-new-adapter", + "permanent": true + }, + { + "source": "/guides/advanced/adapter-development/4-testing-a-new-adapter", + "destination": "/guides/dbt-ecosystem/adapter-development/4-testing-a-new-adapter", + "permanent": true + }, + { + "source": "/guides/advanced/adapter-development/5-documenting-a-new-adapter", + "destination": "/guides/dbt-ecosystem/adapter-development/5-documenting-a-new-adapter", + "permanent": true + }, + { + "source": "/guides/advanced/adapter-development/6-promoting-a-new-adapter", + "destination": "/guides/dbt-ecosystem/adapter-development/6-promoting-a-new-adapter", + "permanent": true + }, + { + "source": "/guides/advanced/adapter-development/7-verifying-a-new-adapter", + "destination": "/guides/dbt-ecosystem/adapter-development/7-verifying-a-new-adapter", + "permanent": true + }, + { + "source": "/guides/legacy/debugging-errors", + "destination": "/guides/best-practices/debugging-errors", + "permanent": true + }, + { + "source": "/guides/legacy/writing-custom-generic-tests", + "destination": "/guides/best-practices/writing-custom-generic-tests", + "permanent": true + }, + { + "source": "/guides/legacy/creating-new-materializations", + "destination": "/guides/advanced/creating-new-materializations", + "permanent": true + }, + { + "source": "/guides/getting-started", + "destination": "/docs/get-started/getting-started/overview", + "permanent": true + }, + { + "source": "/docs/get-started/getting-started/building-your-first-project", + "destination": "/docs/get-started/getting-started/building-your-first-project/build-your-first-models", + "permanent": true + }, + { + "source": "/docs/get-started/getting-started/create-a-project", + "destination": "/docs/get-started/getting-started/set-up-dbt-cloud", + "permanent": true + }, + { + "source": "/guides/getting-started/building-your-first-project", + "destination": "/docs/get-started/getting-started/building-your-first-project/build-your-first-models", + "permanent": true + }, + { + "source": "/guides/getting-started/building-your-first-project/build-your-first-models", + "destination": "/docs/get-started/getting-started/building-your-first-project/build-your-first-models", + "permanent": true + }, + { + "source": "/guides/getting-started/building-your-first-project/schedule-a-job", + "destination": "/docs/get-started/getting-started/building-your-first-project/schedule-a-job", + "permanent": true + }, + { + "source": "/guides/getting-started/building-your-first-project/test-and-document-your-project", + "destination": "/docs/get-started/getting-started/building-your-first-project/test-and-document-your-project", + "permanent": true + }, + { + "source": "/guides/getting-started/create-a-project", + "destination": "/docs/get-started/getting-started/building-your-first-project/build-your-first-models301", + "permanent": true + }, + { + "source": "/guides/getting-started/getting-set-up", + "destination": "/docs/get-started/getting-started/set-up-dbt-cloud", + "permanent": true + }, + { + "source": "/guides/getting-started/getting-set-up/setting-up-bigquery", + "destination": "/docs/get-started/getting-started/getting-set-up/setting-up-bigquery", + "permanent": true + }, + { + "source": "/guides/getting-started/getting-set-up/setting-up-databricks", + "destination": "/docs/get-started/getting-started/getting-set-up/setting-up-databricks", + "permanent": true + }, + { + "source": "/guides/getting-started/getting-set-up/setting-up-redshift", + "destination": "/docs/get-started/getting-started/getting-set-up/setting-up-redshift", + "permanent": true + }, + { + "source": "/guides/getting-started/getting-set-up/setting-up-snowflake", + "destination": "/docs/get-started/getting-started/getting-set-up/setting-up-snowflake", + "permanent": true + }, + { + "source": "/guides/getting-started/getting-started", + "destination": "/docs/get-started/getting-started/set-up-dbt-cloud", + "permanent": true + }, + { + "source": "/guides/getting-started/learning-more", + "destination": "/docs/get-started/getting-started-dbt-core", + "permanent": true + }, + { + "source": "/guides/getting-started/learning-more/getting-started-dbt-core", + "destination": "/docs/get-started/getting-started-dbt-core", + "permanent": true + }, + { + "source": "/guides/getting-started/learning-more/refactoring-legacy-sql", + "destination": "/docs/get-started/learning-more/refactoring-legacy-sql", + "permanent": true + }, + { + "source": "/guides/getting-started/learning-more/using-jinja", + "destination": "/docs/get-started/learning-more/using-jinja", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/cloud-quickstart", + "destination": "/docs/get-started/getting-started/set-up-dbt-cloud", + "permanent": true + }, + { + "source": "/docs/cloud-quickstart", + "destination": "/docs/dbt-cloud/cloud-quickstart", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/cloud-configuring-dbt-cloud", + "destination": "/docs/get-started/getting-started/set-up-dbt-cloud", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/cloud-configuring-dbt-cloud/connecting-your-database", + "destination": "/docs/cloud/connect-data-platform/about-connections", + "permanent": true + }, + { + "source": "/docs/get-started/connect-your-database", + "destination": "/docs/cloud/connect-data-platform/about-connections", + "permanent": true + }, + { + "source": "/docs/running-a-dbt-project/profile", + "destination": "/docs/get-started/connection-profiles", + "permanent": true + }, + { + "source": "/guides/best-practices/materializations/guides/best-practices/materializations/1-overview", + "destination": "/guides/best-practices/materializations/1-guide-overview", + "permanent": true + }, + { + "source": "/docs/deploy/understanding-state", + "destination": "/docs/deploy/about-state", + "permanent": true + }, + { + "source": "/guides/legacy/understanding-state", + "destination": "/docs/deploy/about-state", + "permanent": true + }, + { + "source": "/guides/migration/versions/Older%20versions/understanding-state", + "destination": "/docs/deploy/about-state", + "permanent": true + }, + { + "source": "/docs/collaborate/git/resolve-merge-conflicts", + "destination": "/docs/collaborate/git/merge-conflicts", + "permanent": true + }, + { + "source": "/docs/collaborate/environments", + "destination": "/docs/collaborate/environments/environments-in-dbt", + "permanent": true + }, + { + "source": "/docs/running-a-dbt-project/running-dbt-in-production", + "destination": "/docs/deploy/deployments", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/using-dbt-cloud/cloud-slack-notifications", + "destination": "/docs/deploy/job-notifications", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/using-dbt-cloud", + "destination": "/docs/develop/develop-in-the-cloud", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/january-2020-pricing-updates", + "destination": "https://www.getdbt.com/pricing/", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/dbt-cloud-enterprise", + "destination": "https://www.getdbt.com/pricing/", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/archival", + "destination": "/docs/build/snapshots", + "permanent": true + }, + { + "source": "/docs/about/license", + "destination": "/community/resources/contributor-license-agreements", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/cloud-configuring-dbt-cloud/cloud-using-a-managed-repository", + "destination": "/docs/collaborate/git/managed-repository", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/release-notes", + "destination": "/docs/dbt-versions/dbt-cloud-release-notes", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/dbt-cloud-enterprise/audit-log", + "destination": "/docs/collaborate/manage-access/audit-log", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/cloud-configuring-dbt-cloud/cloud-setting-up-bigquery-oauth", + "destination": "/docs/collaborate/manage-access/set-up-bigquery-oauth", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/dbt-cloud-enterprise/setting-up-enterprise-snowflake-oauth", + "destination": "/docs/collaborate/manage-access/set-up-snowflake-oauth", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/dbt-cloud-enterprise/setting-up-sso-with-okta", + "destination": "/docs/collaborate/manage-access/set-up-sso-okta", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/dbt-cloud-enterprise/setting-up-enterprise-sso-with-azure-active-directory", + "destination": "/docs/collaborate/manage-access/set-up-sso-azure-active-directory", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/dbt-cloud-enterprise/setting-up-sso-with-google-gsuite", + "destination": "/docs/collaborate/manage-access/set-up-sso-google-workspace", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/dbt-cloud-enterprise/setting-up-sso-with-saml-2.0", + "destination": "/docs/collaborate/manage-access/set-up-sso-saml-2.0", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/dbt-cloud-enterprise/sso-overview", + "destination": "/docs/collaborate/manage-access/sso-overview", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/access-control/enterprise-permissions", + "destination": "/docs/collaborate/manage-access/enterprise-permissions", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/access-control/self-service-permissions", + "destination": "/docs/collaborate/manage-access/self-service-permissions", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/access-control/cloud-seats-and-users", + "destination": "/docs/collaborate/manage-access/seats-and-users", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/access-control/access-control-overview", + "destination": "/docs/collaborate/manage-access/about-access", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/using-dbt-cloud/cloud-generating-documentation", + "destination": "/docs/collaborate/build-and-view-your-docs", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/documentation", + "destination": "/docs/collaborate/documentation", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/managing-environments", + "destination": "/docs/collaborate/environments/environments-in-dbt", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/cloud-configuring-dbt-cloud/cloud-import-a-project-by-git-url", + "destination": "/docs/collaborate/git/import-a-project-by-git-url", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/cloud-configuring-dbt-cloud/authenticate-azure", + "destination": "/docs/collaborate/git/authenticate-azure", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/cloud-configuring-dbt-cloud/setup-azure", + "destination": "/docs/collaborate/git/setup-azure", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/cloud-configuring-dbt-cloud/connecting-azure-devops", + "destination": "/docs/collaborate/git/connect-azure-devops", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/cloud-configuring-dbt-cloud/connecting-gitlab", + "destination": "/docs/collaborate/git/connect-gitlab", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/cloud-configuring-dbt-cloud/cloud-installing-the-github-application", + "destination": "/docs/collaborate/git/connect-github", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/cloud-configuring-dbt-cloud/setting-up", + "destination": "/", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/cloud-ide/handling-merge-conflicts", + "destination": "/docs/collaborate/git/resolve-merge-conflicts", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/cloud-ide/viewing-docs-in-the-ide", + "destination": "/docs/collaborate/cloud-build-and-view-your-docs", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/cloud-configuring-dbt-cloud/cloud-configuring-repositories", + "destination": "/docs/collaborate/git/pr-template", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/using-dbt-cloud/cloud-enabling-continuous-integration", + "destination": "/docs/deploy/cloud-ci-job", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/using-dbt-cloud/cloud-dashboard-status-tiles", + "destination": "/docs/deploy/dashboard-status-tiles", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/using-dbt-cloud/cloud-snapshotting-source-freshness", + "destination": "/docs/deploy/source-freshness", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/using-dbt-cloud/cloud-notifications", + "destination": "/docs/deploy/job-notifications", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/using-dbt-cloud/cloud-using-a-custom-cron-schedule", + "destination": "/docs/deploy/job-triggers", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/deployments/airgapped-deployment", + "destination": "/docs/deploy/airgapped-deployment", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/deployments/single-tenant-deployment", + "destination": "/docs/deploy/single-tenant", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/deployments/multi-tenant-deployment", + "destination": "/docs/deploy/multi-tenant", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/deployments/deployment-architecture", + "destination": "/docs/deploy/architecture", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/deployments/deployment-overview", + "destination": "/docs/deploy/deployments", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/using-dbt-cloud/cloud-setting-a-custom-target-name", + "destination": "/docs/build/custom-target-names", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/building-models/using-custom-aliases", + "destination": "/docs/build/custom-aliases", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/building-models/using-custom-databases", + "destination": "/docs/build/custom-databases", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/building-models/using-custom-schemas", + "destination": "/docs/build/custom-schemas", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/dbt-cloud-api/metadata/schema/metadata-schema-exposures", + "destination": "/docs/dbt-cloud-apis/metadata-schema-exposures", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/dbt-cloud-api/metadata/schema/metadata-schema-exposure", + "destination": "/docs/dbt-cloud-apis/metadata-schema-exposure", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/dbt-cloud-api/metadata/schema/metadata-schema-tests", + "destination": "/docs/dbt-cloud-apis/metadata-schema-tests", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/dbt-cloud-api/metadata/schema/metadata-schema-test", + "destination": "/docs/dbt-cloud-apis/metadata-schema-test", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/dbt-cloud-api/metadata/schema/metadata-schema-snapshots", + "destination": "/docs/dbt-cloud-apis/metadata-schema-snapshots", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/dbt-cloud-api/metadata/schema/metadata-schema-seeds", + "destination": "/docs/dbt-cloud-apis/metadata-schema-seeds", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/dbt-cloud-api/metadata/schema/metadata-schema-seed", + "destination": "/docs/dbt-cloud-apis/metadata-schema-seed", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/dbt-cloud-api/metadata/schema/metadata-schema-sources", + "destination": "/docs/dbt-cloud-apis/metadata-schema-sources", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/dbt-cloud-api/metadata/schema/metadata-schema-source", + "destination": "/docs/dbt-cloud-apis/metadata-schema-source", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/dbt-cloud-api/metadata/schema/metadata-schema-metrics", + "destination": "/docs/dbt-cloud-apis/metadata-schema-metrics", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/dbt-cloud-api/metadata/schema/metadata-schema-metric", + "destination": "/docs/dbt-cloud-apis/metadata-schema-metric", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/dbt-cloud-api/metadata/schema/metadata-schema-modelByEnv", + "destination": "/docs/dbt-cloud-apis/metadata-schema-modelByEnv", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/dbt-cloud-api/metadata/schema/metadata-schema-models", + "destination": "/docs/dbt-cloud-apis/metadata-schema-models", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/dbt-cloud-api/metadata/schema/metadata-schema-model", + "destination": "/docs/dbt-cloud-apis/metadata-schema-model", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/dbt-cloud-api/metadata/metadata-querying", + "destination": "/docs/dbt-cloud-apis/metadata-querying", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/dbt-cloud-api/metadata/metadata-overview", + "destination": "/docs/dbt-cloud-apis/metadata-api", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/dbt-cloud-api/admin-cloud-api", + "destination": "/docs/dbt-cloud-apis/admin-cloud-api", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/dbt-cloud-api/service-tokens", + "destination": "/docs/dbt-cloud-apis/service-tokens", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/dbt-cloud-api/user-tokens", + "destination": "/docs/dbt-cloud-apis/user-tokens", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/dbt-cloud-api/cloud-apis", + "destination": "/docs/dbt-cloud-apis/overview", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/hooks-operations", + "destination": "/docs/build/hooks-operations", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/analyses", + "destination": "/docs/build/analyses", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/package-management", + "destination": "/docs/build/packages", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/using-dbt-cloud/cloud-environment-variables", + "destination": "/docs/build/environment-variables", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/building-models/using-variables", + "destination": "/docs/build/project-variables", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/jinja-macros", + "destination": "/docs/build/jinja-macros", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/building-models/configuring-incremental-models", + "destination": "/docs/build/incremental-models", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/building-models/materializations", + "destination": "/docs/build/materializations", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/tests", + "destination": "/docs/build/tests", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/metrics", + "destination": "/docs/build/metrics", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/exposures", + "destination": "/docs/build/exposures", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/snapshots", + "destination": "/docs/build/snapshots", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/seeds", + "destination": "/docs/build/seeds", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/building-models", + "destination": "/docs/build/sql-models", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/using-sources", + "destination": "/docs/build/sources", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/projects", + "destination": "/docs/build/projects", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/building-models/python-models", + "destination": "/docs/build/python-models", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/macros", + "destination": "/docs/guides/building-packages", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/setting-up", + "destination": "/docs/guides/building-packages", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/dbt-jinja-functions", + "destination": "/docs/guides/building-packages", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/cloud-configuring-dbt-cloud/cloud-upgrading-dbt-versions", + "destination": "/docs/dbt-versions/upgrade-core-in-cloud", + "permanent": true + }, + { + "source": "/docs/core-versions", + "destination": "/docs/dbt-versions/core", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/cloud-dbt-cloud-support", + "destination": "/docs/dbt-support", + "permanent": true + }, + { + "source": "/docs/about/viewpoint", + "destination": "/community/resources/viewpoint", + "permanent": true + }, + { + "source": "/docs/viewpoint", + "destination": "/community/resources/viewpoint", + "permanent": true + }, + { + "source": "/dbt-cli/configure-your-profile", + "destination": "/docs/get-started/connection-profiles", + "permanent": true + }, + { + "source": "/docs/running-a-dbt-project/using-the-cli", + "destination": "/docs/get-started/about-the-cli", + "permanent": true + }, + { + "source": "/dbt-cli/install/from-source", + "destination": "/docs/get-started/source-install", + "permanent": true + }, + { + "source": "/dbt-cli/install/docker", + "destination": "/docs/get-started/docker-install", + "permanent": true + }, + { + "source": "/dbt-cli/install/pip", + "destination": "/docs/get-started/pip-install", + "permanent": true + }, + { + "source": "/dbt-cli/install/homebrew", + "destination": "/docs/get-started/homebrew-install", + "permanent": true + }, + { + "source": "/dbt-cli/install/overview", + "destination": "/docs/get-started/installation", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/cloud-ide/the-dbt-ide", + "destination": "/docs/get-started/dbt-cloud-features", + "permanent": true + }, + { + "source": "/((?!useful).*components)", + "destination": "https://github.com/dbt-labs/docs.getdbt.com/blob/current/contributing/adding-page-components.md", + "permanent": true + }, + { + "source": "/guides/legacy/managing-environments", + "destination": "/docs/building-a-dbt-project/managing-environments", + "permanent": true + }, + { + "source": "/docs/running-a-dbt-project/dbt-api", + "destination": "/docs/introduction", + "permanent": true + }, + { + "source": "/img/docs/dbt-cloud/dbt-cloud-enterprise/icon.png", + "destination": "https://www.getdbt.com/ui/img/dbt-icon.png", + "permanent": true + }, + { + "source": "/dbt-cli/installation-guides/centos", + "destination": "/docs/get-started/installation", + "permanent": true + }, + { + "source": "/dbt-cli/installation-guides/centos", + "destination": "/docs/get-started/installation", + "permanent": true + }, + { + "source": "/dbt-cli/installation-guides/install-from-source", + "destination": "/dbt-cli/install/from-source", + "permanent": true + }, + { + "source": "/dbt-cli/installation-guides/macos", + "destination": "/docs/get-started/installation", + "permanent": true + }, + { + "source": "/dbt-cli/installation-guides/ubuntu-debian", + "destination": "/docs/get-started/installation", + "permanent": true + }, + { + "source": "/dbt-cli/installation-guides/windows", + "destination": "/docs/get-started/installation", + "permanent": true + }, + { + "source": "/dbt-cli/installation", + "destination": "/docs/get-started/installation", + "permanent": true + }, + { + "source": "/dbt-jinja-functions", + "destination": "/reference/dbt-jinja-functions", + "permanent": true + }, + { + "source": "/docs", + "destination": "/docs/introduction", + "permanent": true + }, + { + "source": "/docs/adapter", + "destination": "/docs/writing-code-in-dbt/jinja-context/adapter", + "permanent": true + }, + { + "source": "/docs/analyses", + "destination": "/docs/building-a-dbt-project/analyses", + "permanent": true + }, + { + "source": "/docs/api-variable", + "destination": "/docs/writing-code-in-dbt/api-variable", + "permanent": true + }, + { + "source": "/docs/archival", + "destination": "/docs/building-a-dbt-project/archival", + "permanent": true + }, + { + "source": "/docs/artifacts", + "destination": "/docs/dbt-cloud/using-dbt-cloud/artifacts", + "permanent": true + }, + { + "source": "/docs/bigquery-configs", + "destination": "/reference/resource-configs/bigquery-configs", + "permanent": true + }, + { + "source": "/reference/resource-properties/docs", + "destination": "/reference/resource-configs/docs", + "permanent": true + }, + { + "source": "/reference/resource-properties/latest-version", + "destination": "/reference/resource-properties/latest_version", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/building-models/bigquery-configs", + "destination": "/reference/resource-configs/bigquery-configs", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/building-models/configuring-models", + "destination": "/reference/model-configs", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/building-models/enable-and-disable-models", + "destination": "/reference/resource-configs/enabled", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/building-models/redshift-configs", + "destination": "/reference/resource-configs/redshift-configs", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/building-models/snowflake-configs", + "destination": "/reference/resource-configs/snowflake-configs", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/building-models/spark-configs", + "destination": "/reference/resource-configs/spark-configs", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/building-models/tags", + "destination": "/reference/resource-configs/tags", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/building-models/using-sql-headers", + "destination": "/reference/resource-configs/sql_header", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/dbt-projects", + "destination": "/docs/building-a-dbt-project/projects", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/dbt-projects/configuring-query-comments", + "destination": "/reference/project-configs/query-comment", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/dbt-projects/configuring-quoting", + "destination": "/reference/project-configs/quoting", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/dbt-projects/creating-a-project", + "destination": "/docs/building-a-dbt-project/projects#creating-a-dbt-project", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/dbt-projects/requiring-specific-dbt-versions", + "destination": "/reference/project-configs/require-dbt-version", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/dbt-projects/use-an-existing-project", + "destination": "/docs/building-a-dbt-project/projects#using-an-existing-project", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/hooks", + "destination": "/docs/building-a-dbt-project/hooks-operations", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/testing-and-documentation", + "destination": "/docs/building-a-dbt-project/tests", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/testing-and-documentation/documentation", + "destination": "/docs/building-a-dbt-project/testing-and-documentation/documentation", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/testing-and-documentation/documentation-website", + "destination": "/docs/building-a-dbt-project/testing-and-documentation/documentation", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/testing-and-documentation/schemayml-files", + "destination": "/reference/declaring-properties", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/testing-and-documentation/testing", + "destination": "/docs/building-a-dbt-project/tests", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/using-operations", + "destination": "/docs/building-a-dbt-project/hooks-operations", + "permanent": true + }, + { + "source": "/docs/building-models", + "destination": "/docs/building-a-dbt-project/building-models", + "permanent": true + }, + { + "source": "/docs/building-packages", + "destination": "/guides/legacy/building-packages", + "permanent": true + }, + { + "source": "/docs/centos", + "destination": "/dbt-cli/installation", + "permanent": true + }, + { + "source": "/docs/clean", + "destination": "/reference/commands/clean", + "permanent": true + }, + { + "source": "/docs/cloud-choosing-a-dbt-version", + "destination": "/docs/dbt-cloud/cloud-configuring-dbt-cloud/cloud-choosing-a-dbt-version", + "permanent": true + }, + { + "source": "/docs/cloud-configuring-dbt-cloud", + "destination": "/docs/dbt-cloud/cloud-configuring-dbt-cloud", + "permanent": true + }, + { + "source": "/docs/cloud-enabling-continuous-integration-with-github", + "destination": "/docs/deploy/cloud-ci-job", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/using-dbt-cloud/cloud-enabling-continuous-integration-with-github", + "destination": "/docs/dbt-cloud/using-dbt-cloud/cloud-enabling-continuous-integration", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/using-dbt-cloud/cloud-enabling-continuous-integration-with-github", + "destination": "/docs/dbt-cloud/using-dbt-cloud/cloud-enabling-continuous-integration", + "permanent": true + }, + { + "source": "/docs/cloud-generating-documentation", + "destination": "/docs/dbt-cloud/using-dbt-cloud/cloud-generating-documentation", + "permanent": true + }, + { + "source": "/docs/cloud-import-a-project-by-git-url", + "destination": "/docs/dbt-cloud/cloud-configuring-dbt-cloud/cloud-import-a-project-by-git-url", + "permanent": true + }, + { + "source": "/docs/cloud-installing-the-github-application", + "destination": "/docs/cloud/git/connect-github", + "permanent": true + }, + { + "source": "/docs/cloud-managing-permissions", + "destination": "/docs/dbt-cloud/cloud-configuring-dbt-cloud/cloud-managing-permissions", + "permanent": true + }, + { + "source": "/docs/cloud-overview", + "destination": "/docs/dbt-cloud/cloud-overview", + "permanent": true + }, + { + "source": "/docs/cloud-seats-and-users", + "destination": "/docs/dbt-cloud/cloud-configuring-dbt-cloud/cloud-seats-and-users", + "permanent": true + }, + { + "source": "/docs/cloud-setting-a-custom-target-name", + "destination": "/docs/dbt-cloud/using-dbt-cloud/cloud-setting-a-custom-target-name", + "permanent": true + }, + { + "source": "/docs/cloud-snapshotting-source-freshness", + "destination": "/docs/dbt-cloud/using-dbt-cloud/cloud-snapshotting-source-freshness", + "permanent": true + }, + { + "source": "/docs/cloud-supported-dbt-versions", + "destination": "/docs/dbt-cloud/cloud-configuring-dbt-cloud/cloud-choosing-a-dbt-version", + "permanent": true + }, + { + "source": "/docs/cloud-using-a-custom-cron-schedule", + "destination": "/docs/dbt-cloud/using-dbt-cloud/cloud-using-a-custom-cron-schedule", + "permanent": true + }, + { + "source": "/docs/cloud-using-a-managed-repository", + "destination": "/docs/dbt-cloud/cloud-configuring-dbt-cloud/cloud-using-a-managed-repository", + "permanent": true + }, + { + "source": "/docs/cmd-docs", + "destination": "/reference/commands/cmd-docs", + "permanent": true + }, + { + "source": "/docs/command-line-interface", + "destination": "/reference/dbt-commands", + "permanent": true + }, + { + "source": "/docs/compile", + "destination": "/reference/commands/compile", + "permanent": true + }, + { + "source": "/docs/config", + "destination": "/docs/writing-code-in-dbt/jinja-context/config", + "permanent": true + }, + { + "source": "/docs/configure-your-profile", + "destination": "/dbt-cli/configure-your-profile", + "permanent": true + }, + { + "source": "/docs/configuring-incremental-models", + "destination": "/docs/building-a-dbt-project/building-models/configuring-incremental-models", + "permanent": true + }, + { + "source": "/docs/configuring-models", + "destination": "/reference/model-configs", + "permanent": true + }, + { + "source": "/docs/configuring-query-comments", + "destination": "/docs/building-a-dbt-project/dbt-projects/configuring-query-comments", + "permanent": true + }, + { + "source": "/docs/configuring-quoting", + "destination": "/docs/building-a-dbt-project/dbt-projects/configuring-quoting", + "permanent": true + }, + { + "source": "/docs/configuring-resources-from-the-project-file", + "destination": "/docs/building-a-dbt-project/dbt-projects/configuring-resources-from-the-project-file", + "permanent": true + }, + { + "source": "/docs/connecting-your-database", + "destination": "/docs/dbt-cloud/cloud-configuring-dbt-cloud/connecting-your-database", + "permanent": true + }, + { + "source": "/docs/contributor-license-agreements", + "destination": "/docs/contributing/contributor-license-agreements", + "permanent": true + }, + { + "source": "/docs/creating-a-project", + "destination": "/docs/building-a-dbt-project/dbt-projects/creating-a-project", + "permanent": true + }, + { + "source": "/docs/creating-new-materializations", + "destination": "/guides/legacy/creating-new-materializations", + "permanent": true + }, + { + "source": "/docs/creating-date-partitioned-tables", + "destination": "/docs/guides/database-specific-guides/creating-date-partitioned-tables", + "permanent": true + }, + { + "source": "/docs/custom-schema-tests", + "destination": "/guides/legacy/writing-custom-generic-tests", + "permanent": true + }, + { + "source": "/docs/database-specific-guides", + "destination": "/", + "permanent": true + }, + { + "source": "/docs/dbt-api", + "destination": "/docs/running-a-dbt-project/dbt-api", + "permanent": true + }, + { + "source": "/docs/dbt-cloud-enterprise", + "destination": "/docs/dbt-cloud/dbt-cloud-enterprise", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/cloud-configuring-repositories", + "destination": "/docs/dbt-cloud/cloud-configuring-dbt-cloud/cloud-configuring-repositories", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/cloud-configuring-dbt-cloud/cloud-choosing-a-dbt-version", + "destination": "/docs/dbt-versions/upgrade-core-in-cloud", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/dbt-cloud-enterprise/enterprise-permissions", + "destination": "/docs/dbt-cloud/access-control/enterprise-permissions", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/on-premises/architecture", + "destination": "/dbt-cloud/on-premises/dependencies", + "permanent": true + }, + { + "source": "/docs/dbt-projects", + "destination": "/docs/building-a-dbt-project/dbt-projects", + "permanent": true + }, + { + "source": "/docs/dbt_projectyml-file", + "destination": "/docs/building-a-dbt-project/dbt-projects/dbt_projectyml-file", + "permanent": true + }, + { + "source": "/docs/debug", + "destination": "/reference/commands/debug", + "permanent": true + }, + { + "source": "/docs/debug-method", + "destination": "/docs/writing-code-in-dbt/jinja-context/debug-method", + "permanent": true + }, + { + "source": "/docs/deps", + "destination": "/reference/commands/deps", + "permanent": true + }, + { + "source": "/docs/doc", + "destination": "/docs/writing-code-in-dbt/jinja-context/doc", + "permanent": true + }, + { + "source": "/docs/documentation", + "destination": "/docs/building-a-dbt-project/documentation", + "permanent": true + }, + { + "source": "/docs/documentation-website", + "destination": "/docs/building-a-dbt-project/documentation", + "permanent": true + }, + { + "source": "/docs/dont-nest-your-curlies", + "destination": "/docs/building-a-dbt-project/dont-nest-your-curlies", + "permanent": true + }, + { + "source": "/docs/enable-and-disable-models", + "destination": "/reference/resource-configs/enabled", + "permanent": true + }, + { + "source": "/docs/enterprise-permissions", + "destination": "/docs/dbt-cloud/dbt-cloud-enterprise/enterprise-permissions", + "permanent": true + }, + { + "source": "/docs/env_var", + "destination": "/docs/writing-code-in-dbt/jinja-context/env_var", + "permanent": true + }, + { + "source": "/docs/exceptions", + "destination": "/docs/writing-code-in-dbt/jinja-context/exceptions", + "permanent": true + }, + { + "source": "/docs/execute", + "destination": "/docs/writing-code-in-dbt/jinja-context/execute", + "permanent": true + }, + { + "source": "/docs/exit-codes", + "destination": "/reference/exit-codes", + "permanent": true + }, + { + "source": "/docs/flags", + "destination": "/docs/writing-code-in-dbt/jinja-context/flags", + "permanent": true + }, + { + "source": "/docs/fromjson", + "destination": "/docs/writing-code-in-dbt/jinja-context/fromjson", + "permanent": true + }, + { + "source": "/docs/getting-started-with-jinja", + "destination": "/docs/building-a-dbt-project/jinja-macros", + "permanent": true + }, + { + "source": "/docs/global-cli-flags", + "destination": "/reference/global-cli-flags", + "permanent": true + }, + { + "source": "/docs/graph", + "destination": "/docs/writing-code-in-dbt/jinja-context/graph", + "permanent": true + }, + { + "source": "/docs/guides/building-packages", + "destination": "/guides/legacy/building-packages", + "permanent": true + }, + { + "source": "/docs/guides/creating-new-materializations", + "destination": "/guides/legacy/creating-new-materializations", + "permanent": true + }, + { + "source": "/docs/guides/debugging-errors", + "destination": "/guides/legacy/debugging-errors", + "permanent": true + }, + { + "source": "/docs/guides/debugging-schema-names", + "destination": "/guides/legacy/debugging-schema-names", + "permanent": true + }, + { + "source": "/docs/guides/getting-help", + "destination": "/guides/legacy/getting-help", + "permanent": true + }, + { + "source": "/docs/guides/managing-environments", + "destination": "/guides/legacy/managing-environments", + "permanent": true + }, + { + "source": "/docs/guides/navigating-the-docs", + "destination": "/guides/legacy/navigating-the-docs", + "permanent": true + }, + { + "source": "/docs/guides/understanding-state", + "destination": "/guides/legacy/understanding-state", + "permanent": true + }, + { + "source": "/docs/guides/videos", + "destination": "/guides/legacy/videos", + "permanent": true + }, + { + "source": "/docs/guides/writing-custom-generic-tests", + "destination": "/guides/legacy/writing-custom-generic-tests", + "permanent": true + }, + { + "source": "/docs/guides/writing-custom-schema-tests", + "destination": "/guides/legacy/writing-custom-generic-tests", + "permanent": true + }, + { + "source": "/docs/guides/best-practices#choose-your-materializations-wisely", + "destination": "/guides/legacy/best-practices#choose-your-materializations-wisely", + "permanent": true + }, + { + "source": "/docs/guides/best-practices#version-control-your-dbt-project", + "destination": "/guides/legacy/best-practices#version-control-your-dbt-project", + "permanent": true + }, + { + "source": "/docs/best-practices", + "destination": "/guides/legacy/best-practices", + "permanent": true + }, + { + "source": "/docs/guides/best-practices", + "destination": "/guides/best-practices", + "permanent": true + }, + { + "source": "/docs/hooks", + "destination": "/docs/building-a-dbt-project/hooks-operations", + "permanent": true + }, + { + "source": "/docs/init", + "destination": "/reference/commands/init", + "permanent": true + }, + { + "source": "/docs/install-from-source", + "destination": "/dbt-cli/installation", + "permanent": true + }, + { + "source": "/docs/installation", + "destination": "/docs/core/installation", + "permanent": true + }, + { + "source": "/docs/invocation_id", + "destination": "/docs/writing-code-in-dbt/jinja-context/invocation_id", + "permanent": true + }, + { + "source": "/docs/jinja-context", + "destination": "/docs/writing-code-in-dbt/jinja-context", + "permanent": true + }, + { + "source": "/docs/license", + "destination": "/docs/about/license", + "permanent": true + }, + { + "source": "/docs/list", + "destination": "/reference/commands/list", + "permanent": true + }, + { + "source": "/docs/log", + "destination": "/docs/writing-code-in-dbt/jinja-context/log", + "permanent": true + }, + { + "source": "/docs/macos", + "destination": "/dbt-cli/installation", + "permanent": true + }, + { + "source": "/docs/macros", + "destination": "/guides/legacy/building-packages", + "permanent": true + }, + { + "source": "/docs/maintaining-multiple-environments-with-dbt", + "destination": "/", + "permanent": true + }, + { + "source": "/docs/managing-environments", + "destination": "/guides/legacy/managing-environments", + "permanent": true + }, + { + "source": "/docs/materializations", + "destination": "/docs/building-a-dbt-project/building-models/materializations", + "permanent": true + }, + { + "source": "/docs/model-selection-syntax", + "destination": "/reference/node-selection/syntax", + "permanent": true + }, + { + "source": "/docs/modules", + "destination": "/docs/writing-code-in-dbt/jinja-context/modules", + "permanent": true + }, + { + "source": "/docs/on-run-end-context", + "destination": "/docs/writing-code-in-dbt/jinja-context/on-run-end-context", + "permanent": true + }, + { + "source": "/docs/overview", + "destination": "/docs/introduction", + "permanent": true + }, + { + "source": "/docs/performance-optimization", + "destination": "/", + "permanent": true + }, + { + "source": "/docs/package-management", + "destination": "/docs/building-a-dbt-project/package-management", + "permanent": true + }, + { + "source": "/docs/profile-bigquery", + "destination": "/reference/warehouse-profiles/bigquery-profile", + "permanent": true + }, + { + "source": "/docs/profile-mssql", + "destination": "/reference/warehouse-profiles/mssql-profile", + "permanent": true + }, + { + "source": "/docs/profile-postgres", + "destination": "/reference/warehouse-profiles/postgres-profile", + "permanent": true + }, + { + "source": "/docs/profile-presto", + "destination": "/reference/warehouse-profiles/presto-profile", + "permanent": true + }, + { + "source": "/docs/profile-redshift", + "destination": "/reference/warehouse-profiles/redshift-profile", + "permanent": true + }, + { + "source": "/docs/profile-snowflake", + "destination": "/reference/warehouse-profiles/snowflake-profile", + "permanent": true + }, + { + "source": "/docs/profile-spark", + "destination": "/reference/warehouse-profiles/spark-profile", + "permanent": true + }, + { + "source": "/docs/redshift-configs", + "destination": "/reference/resource-configs/redshift-configs", + "permanent": true + }, + { + "source": "/docs/spark-configs", + "destination": "/reference/resource-configs/spark-configs", + "permanent": true + }, + { + "source": "/docs/redshift-v2", + "destination": "/reference/warehouse-profiles/redshift-profile", + "permanent": true + }, + { + "source": "/docs/ref", + "destination": "/docs/writing-code-in-dbt/jinja-context/ref", + "permanent": true + }, + { + "source": "/docs/requiring-specific-dbt-versions", + "destination": "/docs/building-a-dbt-project/dbt-projects/requiring-specific-dbt-versions", + "permanent": true + }, + { + "source": "/docs/requiring-dbt-versions", + "destination": "/", + "permanent": true + }, + { + "source": "/docs/return", + "destination": "/docs/writing-code-in-dbt/jinja-context/return", + "permanent": true + }, + { + "source": "/docs/rpc", + "destination": "/reference/commands/rpc", + "permanent": true + }, + { + "source": "/docs/run", + "destination": "/reference/commands/run", + "permanent": true + }, + { + "source": "/docs/run-operation", + "destination": "/reference/commands/run-operation", + "permanent": true + }, + { + "source": "/docs/run_query", + "destination": "/docs/writing-code-in-dbt/jinja-context/run_query", + "permanent": true + }, + { + "source": "/docs/run_started_at", + "destination": "/docs/writing-code-in-dbt/jinja-context/run_started_at", + "permanent": true + }, + { + "source": "/docs/running-a-dbt-project/command-line-interface", + "destination": "/reference/dbt-commands", + "permanent": true + }, + { + "source": "/docs/running-a-dbt-project/command-line-interface/clean", + "destination": "/reference/commands/clean", + "permanent": true + }, + { + "source": "/docs/running-a-dbt-project/command-line-interface/cmd-docs", + "destination": "/reference/commands/cmd-docs", + "permanent": true + }, + { + "source": "/docs/running-a-dbt-project/command-line-interface/compile", + "destination": "/reference/commands/compile", + "permanent": true + }, + { + "source": "/docs/running-a-dbt-project/command-line-interface/debug", + "destination": "/reference/commands/debug", + "permanent": true + }, + { + "source": "/docs/running-a-dbt-project/command-line-interface/deps", + "destination": "/reference/commands/deps", + "permanent": true + }, + { + "source": "/docs/running-a-dbt-project/command-line-interface/exit-codes", + "destination": "/reference/exit-codes", + "permanent": true + }, + { + "source": "/docs/running-a-dbt-project/command-line-interface/global-cli-flags", + "destination": "/reference/global-cli-flags", + "permanent": true + }, + { + "source": "/docs/running-a-dbt-project/command-line-interface/init", + "destination": "/reference/commands/init", + "permanent": true + }, + { + "source": "/docs/running-a-dbt-project/command-line-interface/list", + "destination": "/reference/commands/list", + "permanent": true + }, + { + "source": "/docs/running-a-dbt-project/command-line-interface/model-selection-syntax", + "destination": "/reference/model-selection-syntax", + "permanent": true + }, + { + "source": "/docs/running-a-dbt-project/command-line-interface/rpc", + "destination": "/reference/commands/rpc", + "permanent": true + }, + { + "source": "/docs/running-a-dbt-project/command-line-interface/run", + "destination": "/reference/commands/run", + "permanent": true + }, + { + "source": "/docs/running-a-dbt-project/command-line-interface/run-operation", + "destination": "/reference/commands/run-operation", + "permanent": true + }, + { + "source": "/docs/running-a-dbt-project/command-line-interface/seed", + "destination": "/reference/commands/seed", + "permanent": true + }, + { + "source": "/docs/running-a-dbt-project/command-line-interface/snapshot", + "destination": "/reference/commands/snapshot", + "permanent": true + }, + { + "source": "/docs/running-a-dbt-project/command-line-interface/source", + "destination": "/reference/commands/source", + "permanent": true + }, + { + "source": "/docs/running-a-dbt-project/command-line-interface/test", + "destination": "/reference/commands/test", + "permanent": true + }, + { + "source": "/docs/running-a-dbt-project/command-line-interface/version", + "destination": "/reference/global-cli-flags#version", + "permanent": true + }, + { + "source": "/docs/running-a-dbt-project/using-the-command-line-interface", + "destination": "/docs/running-a-dbt-project/using-the-cli", + "permanent": true + }, + { + "source": "/docs/running-a-dbt-project/using-the-command-line-interface/centos", + "destination": "/dbt-cli/installation-guides/centos", + "permanent": true + }, + { + "source": "/docs/running-a-dbt-project/using-the-command-line-interface/configure-your-profile", + "destination": "/dbt-cli/configure-your-profile", + "permanent": true + }, + { + "source": "/docs/running-a-dbt-project/using-the-command-line-interface/install-from-source", + "destination": "/dbt-cli/installation-guides/install-from-source", + "permanent": true + }, + { + "source": "/docs/running-a-dbt-project/using-the-command-line-interface/installation", + "destination": "/dbt-cli/installation", + "permanent": true + }, + { + "source": "/docs/running-a-dbt-project/using-the-command-line-interface/macos", + "destination": "/dbt-cli/installation-guides/macos", + "permanent": true + }, + { + "source": "/docs/running-a-dbt-project/using-the-command-line-interface/ubuntu-debian", + "destination": "/dbt-cli/installation-guides/ubuntu-debian", + "permanent": true + }, + { + "source": "/docs/running-a-dbt-project/using-the-command-line-interface/windows", + "destination": "/dbt-cli/installation-guides/windows", + "permanent": true + }, + { + "source": "/docs/running-dbt-in-production", + "destination": "/docs/running-a-dbt-project/running-dbt-in-production", + "permanent": true + }, + { + "source": "/docs/schema", + "destination": "/docs/writing-code-in-dbt/jinja-context/schema", + "permanent": true + }, + { + "source": "/docs/schemas", + "destination": "/docs/writing-code-in-dbt/jinja-context/schemas", + "permanent": true + }, + { + "source": "/docs/schemayml-files", + "destination": "/reference/declaring-properties", + "permanent": true + }, + { + "source": "/docs/seed", + "destination": "/reference/commands/seed", + "permanent": true + }, + { + "source": "/docs/seeds", + "destination": "/docs/building-a-dbt-project/seeds", + "permanent": true + }, + { + "source": "/docs/setting-up-enterprise-sso-with-azure-active-directory", + "destination": "/docs/dbt-cloud/dbt-cloud-enterprise/setting-up-enterprise-sso-with-azure-active-directory", + "permanent": true + }, + { + "source": "/docs/setting-up-snowflake-sso", + "destination": "/docs/dbt-cloud/dbt-cloud-enterprise/setting-up-enterprise-snowflake-oauth", + "permanent": true + }, + { + "source": "/docs/setting-up-sso-with-google-gsuite", + "destination": "/docs/dbt-cloud/dbt-cloud-enterprise/setting-up-sso-with-google-gsuite", + "permanent": true + }, + { + "source": "/docs/setting-up-sso-with-okta", + "destination": "/docs/dbt-cloud/dbt-cloud-enterprise/setting-up-sso-with-okta", + "permanent": true + }, + { + "source": "/docs/snapshot", + "destination": "/reference/commands/snapshot", + "permanent": true + }, + { + "source": "/docs/snapshots", + "destination": "/docs/building-a-dbt-project/snapshots", + "permanent": true + }, + { + "source": "/docs/snowflake-configs", + "destination": "/reference/resource-configs/snowflake-configs", + "permanent": true + }, + { + "source": "/docs/source", + "destination": "/reference/commands/source", + "permanent": true + }, + { + "source": "/docs/statement-blocks", + "destination": "/docs/writing-code-in-dbt/jinja-context/statement-blocks", + "permanent": true + }, + { + "source": "/docs/supported-databases/profile-bigquery", + "destination": "/reference/bigquery-profile", + "permanent": true + }, + { + "source": "/docs/supported-databases/profile-mssql", + "destination": "/reference/mssql-profile", + "permanent": true + }, + { + "source": "/docs/supported-databases/profile-postgres", + "destination": "/reference/postgres-profile", + "permanent": true + }, + { + "source": "/docs/supported-databases/profile-presto", + "destination": "/reference/presto-profile", + "permanent": true + }, + { + "source": "/docs/supported-databases/profile-redshift", + "destination": "/reference/redshift-profile", + "permanent": true + }, + { + "source": "/docs/supported-databases/profile-snowflake", + "destination": "/reference/snowflake-profile", + "permanent": true + }, + { + "source": "/docs/supported-databases/profile-spark", + "destination": "/reference/spark-profile", + "permanent": true + }, + { + "source": "/docs/tags", + "destination": "/reference/resource-configs/tags", + "permanent": true + }, + { + "source": "/docs/target", + "destination": "/docs/writing-code-in-dbt/jinja-context/target", + "permanent": true + }, + { + "source": "/docs/test", + "destination": "/reference/commands/test", + "permanent": true + }, + { + "source": "/docs/testing", + "destination": "/docs/building-a-dbt-project/tests", + "permanent": true + }, + { + "source": "/docs/testing-and-documentation", + "destination": "/docs/building-a-dbt-project/tests", + "permanent": true + }, + { + "source": "/docs/the-dbt-ide", + "destination": "/docs/cloud/about-cloud/dbt-cloud-features", + "permanent": true + }, + { + "source": "/docs/this", + "destination": "/docs/writing-code-in-dbt/jinja-context/this", + "permanent": true + }, + { + "source": "/docs/tojson", + "destination": "/docs/writing-code-in-dbt/jinja-context/tojson", + "permanent": true + }, + { + "source": "/docs/ubuntu-debian", + "destination": "/dbt-cli/installation", + "permanent": true + }, + { + "source": "/docs/use-an-existing-project", + "destination": "/docs/building-a-dbt-project/dbt-projects/use-an-existing-project", + "permanent": true + }, + { + "source": "/docs/using-custom-aliases", + "destination": "/docs/building-a-dbt-project/building-models/using-custom-aliases", + "permanent": true + }, + { + "source": "/docs/using-custom-database", + "destination": "/docs/building-a-dbt-project/building-models/using-custom-databases", + "permanent": true + }, + { + "source": "/docs/using-custom-schemas", + "destination": "/docs/building-a-dbt-project/building-models/using-custom-schemas", + "permanent": true + }, + { + "source": "/docs/using-dbt-cloud", + "destination": "/docs/dbt-cloud/using-dbt-cloud", + "permanent": true + }, + { + "source": "/docs/using-jinja", + "destination": "/guides/getting-started/learning-more/using-jinja", + "permanent": true + }, + { + "source": "/docs/using-operations", + "destination": "/docs/building-a-dbt-project/hooks-operations", + "permanent": true + }, + { + "source": "/docs/using-sources", + "destination": "/docs/building-a-dbt-project/using-sources", + "permanent": true + }, + { + "source": "/docs/using-sql-headers", + "destination": "/reference/resource-configs/sql_header", + "permanent": true + }, + { + "source": "/docs/using-the-command-line-interface", + "destination": "/docs/running-a-dbt-project/using-the-cli", + "permanent": true + }, + { + "source": "/docs/using-the-dbt-ide", + "destination": "/docs/running-a-dbt-project/using-the-dbt-ide", + "permanent": true + }, + { + "source": "/docs/using-variables", + "destination": "/docs/building-a-dbt-project/building-models/using-variables", + "permanent": true + }, + { + "source": "/docs/var", + "destination": "/docs/writing-code-in-dbt/jinja-context/var", + "permanent": true + }, + { + "source": "/docs/version", + "destination": "/reference/global-cli-flags#version", + "permanent": true + }, + { + "source": "/docs/videos", + "destination": "/guides/legacy/videos", + "permanent": true + }, + { + "source": "/docs/warehouse-specific-configurations", + "destination": "/", + "permanent": true + }, + { + "source": "/docs/windows", + "destination": "/dbt-cli/installation", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/api-variable", + "destination": "/", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/class-reference", + "destination": "/reference/dbt-classes", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/extending-dbts-programming-environment/creating-new-materializations", + "destination": "/guides/legacy/creating-new-materializations", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/extending-dbts-programming-environment/custom-schema-tests", + "destination": "/guides/legacy/writing-custom-schema-tests", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/getting-started-with-jinja", + "destination": "/docs/building-a-dbt-project/jinja-macros", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/jinja-context/adapter", + "destination": "/reference/dbt-jinja-functions/adapter", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/jinja-context/as_text", + "destination": "/reference/dbt-jinja-functions/as_text", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/jinja-context/builtins", + "destination": "/reference/dbt-jinja-functions/builtins", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/jinja-context/config", + "destination": "/reference/dbt-jinja-functions/config", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/jinja-context/dbt-project-yml-context", + "destination": "/reference/dbt-jinja-functions/dbt-project-yml-context", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/jinja-context/dbt_version", + "destination": "/reference/dbt-jinja-functions/dbt_version", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/jinja-context/debug-method", + "destination": "/reference/dbt-jinja-functions/debug-method", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/jinja-context/doc", + "destination": "/reference/dbt-jinja-functions/doc", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/jinja-context/env_var", + "destination": "/reference/dbt-jinja-functions/env_var", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/jinja-context/exceptions", + "destination": "/reference/dbt-jinja-functions/exceptions", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/jinja-context/execute", + "destination": "/reference/dbt-jinja-functions/execute", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/jinja-context/flags", + "destination": "/reference/dbt-jinja-functions/flags", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/jinja-context/fromjson", + "destination": "/reference/dbt-jinja-functions/fromjson", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/jinja-context/fromyaml", + "destination": "/reference/dbt-jinja-functions/fromyaml", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/jinja-context/graph", + "destination": "/reference/dbt-jinja-functions/graph", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/jinja-context/invocation_id", + "destination": "/reference/dbt-jinja-functions/invocation_id", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/jinja-context/log", + "destination": "/reference/dbt-jinja-functions/log", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/jinja-context/modules", + "destination": "/reference/dbt-jinja-functions/modules", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/jinja-context/on-run-end-context", + "destination": "/reference/dbt-jinja-functions/on-run-end-context", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/jinja-context/profiles-yml-context", + "destination": "/reference/dbt-jinja-functions/profiles-yml-context", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/jinja-context/project_name", + "destination": "/reference/dbt-jinja-functions/project_name", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/jinja-context/ref", + "destination": "/reference/dbt-jinja-functions/ref", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/jinja-context/return", + "destination": "/reference/dbt-jinja-functions/return", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/jinja-context/run_query", + "destination": "/reference/dbt-jinja-functions/run_query", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/jinja-context/run_started_at", + "destination": "/reference/dbt-jinja-functions/run_started_at", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/jinja-context/schema", + "destination": "/reference/dbt-jinja-functions/schema", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/jinja-context/schemas", + "destination": "/reference/dbt-jinja-functions/schemas", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/jinja-context/source", + "destination": "/reference/dbt-jinja-functions/source", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/jinja-context/statement-blocks", + "destination": "/reference/dbt-jinja-functions/statement-blocks", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/jinja-context/target", + "destination": "/reference/dbt-jinja-functions/target", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/jinja-context/this", + "destination": "/reference/dbt-jinja-functions/this", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/jinja-context/tojson", + "destination": "/reference/dbt-jinja-functions/tojson", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/jinja-context/toyaml", + "destination": "/reference/dbt-jinja-functions/toyaml", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/jinja-context/var", + "destination": "/reference/dbt-jinja-functions/var", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/macros", + "destination": "/docs/building-a-dbt-project/jinja-macros", + "permanent": true + }, + { + "source": "/docs/writing-code-in-dbt/using-jinja", + "destination": "/guides/getting-started/learning-more/using-jinja", + "permanent": true + }, + { + "source": "/faqs/getting-help", + "destination": "/guides/legacy/getting-help", + "permanent": true + }, + { + "source": "/migration-guide/upgrading-to-0-17-0", + "destination": "/guides/migration/versions", + "permanent": true + }, + { + "source": "/migration-guide/upgrading-to-0-18-0", + "destination": "/guides/migration/versions", + "permanent": true + }, + { + "source": "/reference", + "destination": "/", + "permanent": true + }, + { + "source": "/reference/accounts", + "destination": "/dbt-cloud/api", + "permanent": true + }, + { + "source": "/reference/api", + "destination": "/dbt-cloud/api", + "permanent": true + }, + { + "source": "/reference/bigquery-profile", + "destination": "/reference/warehouse-profile/bigquery-profile", + "permanent": true + }, + { + "source": "/reference/connections", + "destination": "/dbt-cloud/api", + "permanent": true + }, + { + "source": "/reference/data-test-configs", + "destination": "/reference/test-configs", + "permanent": true + }, + { + "source": "/reference/declaring-properties", + "destination": "/reference/configs-and-properties", + "permanent": true + }, + { + "source": "/reference/dbt-artifacts", + "destination": "/reference/artifacts/dbt-artifacts", + "permanent": true + }, + { + "source": "/reference/environments", + "destination": "/dbt-cloud/api", + "permanent": true + }, + { + "source": "/reference/events", + "destination": "/reference/events-logging", + "permanent": true + }, + { + "source": "/reference/jobs", + "destination": "/dbt-cloud/api", + "permanent": true + }, + { + "source": "/reference/model-selection-syntax", + "destination": "/reference/node-selection/syntax", + "permanent": true + }, + { + "source": "/reference/project-configs/on-run-end", + "destination": "/reference/project-configs/on-run-start-on-run-end", + "permanent": true + }, + { + "source": "/reference/project-configs/on-run-start", + "destination": "/reference/project-configs/on-run-start-on-run-end", + "permanent": true + }, + { + "source": "/reference/repositories", + "destination": "/dbt-cloud/api", + "permanent": true + }, + { + "source": "/reference/resource-configs/post-hook", + "destination": "/reference/resource-configs/pre-hook-post-hook", + "permanent": true + }, + { + "source": "/reference/resource-configs/pre-hook", + "destination": "/reference/resource-configs/pre-hook-post-hook", + "permanent": true + }, + { + "source": "/reference/resource-properties/tags", + "destination": "/reference/resource-configs/tags", + "permanent": true + }, + { + "source": "/reference/resource-properties/meta", + "destination": "/reference/resource-configs/meta", + "permanent": true + }, + { + "source": "/reference/runs", + "destination": "/dbt-cloud/api", + "permanent": true + }, + { + "source": "/reference/using-the-dbt-cloud-api", + "destination": "/dbt-cloud/api", + "permanent": true + }, + { + "source": "/reference/model-selection-syntax/#test-selection-examples", + "destination": "/reference/node-selection/test-selection-examples", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/building-models/using-custom-database", + "destination": "/docs/building-a-dbt-project/building-models/using-custom-databases", + "permanent": true + }, + { + "source": "/dbt-cloud/api", + "destination": "/dbt-cloud/api-v2", + "permanent": true + }, + { + "source": "/dbt-cloud/api-v2-old", + "destination": "/dbt-cloud/api-v2-legacy", + "permanent": true + }, + { + "source": "/dbt-cloud/api-v4", + "destination": "/docs/dbt-cloud-apis/admin-cloud-api", + "permanent": true + }, + { + "source": "/reference/project-configs/source-paths", + "destination": "/reference/project-configs/model-paths", + "permanent": true + }, + { + "source": "/reference/project-configs/data-paths", + "destination": "/reference/project-configs/seed-paths", + "permanent": true + }, + { + "source": "/reference/project-configs/modules-paths", + "destination": "/reference/project-configs/packages-install-path", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/using-dbt-cloud/cloud-slack-notifications", + "destination": "/docs/dbt-cloud/using-dbt-cloud/cloud-notifications", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/presto-profile", + "destination": "/reference/profiles.yml", + "permanent": true + }, + { + "source": "/setting-up", + "destination": "/guides/getting-started/getting-set-up/setting-up-bigquery", + "permanent": true + }, + { + "source": "/tutorial/setting-up", + "destination": "/quickstarts", + "permanent": true + }, + { + "source": "/tutorial/test-and-document-your-project", + "destination": "/guides/getting-started/building-your-first-project/test-and-document-your-project", + "permanent": true + }, + { + "source": "/tutorial/build-your-first-models", + "destination": "/guides/getting-started/building-your-first-project/build-your-first-models", + "permanent": true + }, + { + "source": "/tutorial/deploy-your-project", + "destination": "/guides/getting-started/building-your-first-project/schedule-a-job", + "permanent": true + }, + { + "source": "/tutorial/using-jinja", + "destination": "/guides/getting-started/learning-more/using-jinja", + "permanent": true + }, + { + "source": "/tutorial/2b-create-a-project-dbt-cli", + "destination": "/guides/getting-started/learning-more/getting-started-dbt-core", + "permanent": true + }, + { + "source": "/tutorial/create-a-project-dbt-cli", + "destination": "/guides/getting-started/learning-more/getting-started-dbt-core", + "permanent": true + }, + { + "source": "/tutorial/2a-create-a-project-dbt-cloud", + "destination": "/guides/getting-started", + "permanent": true + }, + { + "source": "/tutorial/create-a-project-dbt-cloud", + "destination": "/guides/getting-started", + "permanent": true + }, + { + "source": "/tutorial/getting-started", + "destination": "/guides/getting-started", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/cloud-changelog", + "destination": "/docs/dbt-cloud/release-notes", + "permanent": true + }, + { + "source": "/faqs/all", + "destination": "/docs/faqs", + "permanent": true + }, + { + "source": "/faqs/:slug", + "destination": "/docs/faqs/:slug*", + "permanent": true + }, + { + "source": "/faqs/dbt-jinja-functions", + "destination": "/reference/dbt-jinja-functions", + "permanent": true + }, + { + "source": "/tutorial/learning-more/:slug", + "destination": "/guides/getting-started/learning-more/:slug*", + "permanent": true + }, + { + "source": "/tutorial/getting-set-up/:slug", + "destination": "/guides/getting-started/getting-set-up/:slug*", + "permanent": true + }, + { + "source": "/tutorial/building-your-first-project/:slug", + "destination": "/guides/getting-started/building-your-first-project/:slug*", + "permanent": true + }, + { + "source": "/tutorial/refactoring-legacy-sql", + "destination": "/guides/migration/tools/refactoring-legacy-sql", + "permanent": true + }, + { + "source": "/blog/change-data-capture-metrics", + "destination": "/blog/change-data-capture", + "permanent": true + }, + { + "source": "/blog/intelligent-slim-ci", + "destination": "/docs/deploy/continuous-integration", + "permanent": true + }, + { + "source": "/blog/model-timing-tab", + "destination": "/blog/how-we-shaved-90-minutes-off-model", + "permanent": true + }, + { + "source": "/reference/warehouse-setups/resource-configs/materialize-configs/indexes", + "destination": "/reference/resource-configs/materialize-configs#indexes", + "permanent": true + }, + { + "source": "/docs/build/building-models", + "destination": "/docs/build/models", + "permanent": true + }, + { + "source": "/docs/build/bigquery-profile", + "destination": "/reference/resource-configs/bigquery-configs", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/bigquery-setup", + "destination": "/reference/warehouse-setups/bigquery-setup", + "permanent": true + }, + { + "source": "/date-trunc-sql", + "destination": "/blog/date-trunc-sql", + "permanent": true + }, + { + "source": "/docs/using-hooks", + "destination": "/", + "permanent": true + }, + { + "source": "/blog/how-we-structure-our-dbt-projects", + "destination": "/guides/best-practices/how-we-structure/1-guide-overview", + "permanent": true + }, + { + "source": "/data-testing-why-you-need-it-and-how-to-get-started", + "destination": "https://www.getdbt.com/blog/data-quality-testing/", + "permanent": true + }, + { + "source": "/docs/profile", + "destination": "/docs/supported-data-platforms", + "permanent": true + }, + { + "source": "/docs/available-adapters", + "destination": "/docs/supported-data-platforms", + "permanent": true + }, + { + "source": "/docs/supported-databases", + "destination": "/docs/supported-data-platforms", + "permanent": true + }, + { + "source": "/docs/guides/migration-guide/upgrading-to-0-14-0", + "destination": "/guides/migration/versions", + "permanent": true + }, + { + "source": "/docs/guides/migration-guide/upgrading-to-0-15-0", + "destination": "/guides/migration/versions", + "permanent": true + }, + { + "source": "/docs/guides/migration-guide/upgrading-to-0-16-0", + "destination": "/guides/migration/versions", + "permanent": true + }, + { + "source": "/docs/guides/migration-guide/upgrading-to-0-17-0", + "destination": "/guides/migration/versions", + "permanent": true + }, + { + "source": "/docs/guides/migration-guide/upgrading-to-0-18-0", + "destination": "/guides/migration/versions", + "permanent": true + }, + { + "source": "/docs/guides/migration-guide/upgrading-to-0-19-0", + "destination": "/guides/migration/versions", + "permanent": true + }, + { + "source": "/docs/guides/migration-guide/upgrading-from-0-10-to-0-11", + "destination": "/guides/migration/versions", + "permanent": true + }, + { + "source": "/docs/guides/migration-guide/upgrading-to-014", + "destination": "/guides/migration/versions", + "permanent": true + }, + { + "source": "/docs/upgrading-to-014", + "destination": "/guides/migration/versions", + "permanent": true + }, + { + "source": "/docs/upgrading-to-0-14-1", + "destination": "/guides/migration/versions", + "permanent": true + }, + { + "source": "/docs/upgrading-to-0-16-0", + "destination": "/guides/migration/versions", + "permanent": true + }, + { + "source": "/docs/guides/migration-guide/upgrading-to-0-20-0", + "destination": "/guides/migration/versions/upgrading-to-v0.20", + "permanent": true + }, + { + "source": "/docs/guides/migration-guide/upgrading-to-0-21-0", + "destination": "/guides/migration/versions/upgrading-to-v0.21", + "permanent": true + }, + { + "source": "/docs/guides/migration-guide/upgrading-to-1-0-0", + "destination": "/guides/migration/versions/upgrading-to-v1.0", + "permanent": true + }, + { + "source": "/docs/guides/migration-guide/upgrading-to-v1.0", + "destination": "/guides/migration/versions/upgrading-to-v1.0", + "permanent": true + }, + { + "source": "/docs/guides/getting-help", + "destination": "/guides/legacy/getting-help", + "permanent": true + }, + { + "source": "/docs/guides/migration-guide/:slug", + "destination": "/guides/migration/versions/:slug*", + "permanent": true + }, + { + "source": "/docs/guides/:slug", + "destination": "/guides/legacy/:slug*", + "permanent": true + }, + { + "source": "/guides/best-practices/environment-setup/1-env-guide-overview", + "destination": "/guides/orchestration/set-up-ci/overview", + "permanent": true + }, + { + "source": "/guides/best-practices/environment-setup/2-one-deployment-environment", + "destination": "/guides/orchestration/set-up-ci/in-15-minutes", + "permanent": true + }, + { + "source": "/guides/best-practices/environment-setup/3-many-deployment-environments", + "destination": "/guides/orchestration/set-up-ci/multiple-environments", + "permanent": true + }, + { + "source": "/docs/contributing/what-are-adapters", + "destination": "/guides/advanced/adapter-development/1-what-are-adapters", + "permanent": true + }, + { + "source": "/docs/contributing/adapter-development/1-what-are-adapters", + "destination": "/guides/advanced/adapter-development/1-what-are-adapters", + "permanent": true + }, + { + "source": "/docs/contributing/prerequisites-for-a-new-adapter", + "destination": "/guides/advanced/adapter-development/2-prerequisites-for-a-new-adapter", + "permanent": true + }, + { + "source": "/docs/contributing/adapter-development/2-prerequisites-for-a-new-adapter", + "destination": "/guides/advanced/adapter-development/2-prerequisites-for-a-new-adapter", + "permanent": true + }, + { + "source": "/docs/contributing/building-a-new-adapter", + "destination": "/guides/advanced/adapter-development/3-building-a-new-adapter", + "permanent": true + }, + { + "source": "/docs/contributing/adapter-development/3-building-a-new-adapter", + "destination": "/guides/advanced/adapter-development/3-building-a-new-adapter", + "permanent": true + }, + { + "source": "/v0.13/docs/building-a-new-adapter", + "destination": "/guides/dbt-ecosystem/adapter-development/3-building-a-new-adapter", + "permanent": true + }, + { + "source": "/docs/building-a-new-adapter", + "destination": "/guides/advanced/adapter-development/3-building-a-new-adapter", + "permanent": true + }, + { + "source": "/docs/contributing/testing-a-new-adapter", + "destination": "/guides/advanced/adapter-development/4-testing-a-new-adapter", + "permanent": true + }, + { + "source": "/docs/contributing/adapter-development/4-testing-a-new-adapter", + "destination": "/guides/advanced/adapter-development/4-testing-a-new-adapter", + "permanent": true + }, + { + "source": "/docs/contributing/documenting-a-new-adapter", + "destination": "/guides/advanced/adapter-development/5-documenting-a-new-adapter", + "permanent": true + }, + { + "source": "/docs/contributing/adapter-development/5-documenting-a-new-adapter", + "destination": "/guides/advanced/adapter-development/5-documenting-a-new-adapter", + "permanent": true + }, + { + "source": "/docs/contributing/promoting-a-new-adapter", + "destination": "/guides/advanced/adapter-development/6-promoting-a-new-adapter", + "permanent": true + }, + { + "source": "/docs/contributing/adapter-development/6-promoting-a-new-adapter", + "destination": "/guides/advanced/adapter-development/6-promoting-a-new-adapter", + "permanent": true + }, + { + "source": "/docs/contributing/verifying-a-new-adapter", + "destination": "/guides/advanced/adapter-development/7-verifying-a-new-adapter", + "permanent": true + }, + { + "source": "/docs/contributing/adapter-development/7-verifying-a-new-adapter", + "destination": "/guides/advanced/adapter-development/7-verifying-a-new-adapter", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/using-dbt-cloud/cloud-metrics-layer", + "destination": "/docs/use-dbt-semantic-layer/dbt-semantic-layer", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/impala-profile", + "destination": "/reference/warehouse-setups/impala-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/exasol-profile", + "destination": "/reference/warehouse-setups/exasol-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/layer-profile", + "destination": "/reference/warehouse-setups/layer-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/postgres-profile", + "destination": "/reference/warehouse-setups/postgres-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/greenplum-profile", + "destination": "/reference/warehouse-setups/greenplum-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/alloydb-profile", + "destination": "/reference/warehouse-setups/alloydb-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/azuresynapse-profile", + "destination": "/reference/warehouse-setups/azuresynapse-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/snowflake-profile", + "destination": "/reference/warehouse-setups/snowflake-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/rockset-profile", + "destination": "/reference/warehouse-setups/rockset-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/trino-profile", + "destination": "/reference/warehouse-setups/trino-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/glue-profile", + "destination": "/reference/warehouse-setups/glue-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/duckdb-profile", + "destination": "/reference/warehouse-setups/duckdb-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/vertica-profile", + "destination": "/reference/warehouse-setups/vertica-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/clickhouse-profile", + "destination": "/reference/warehouse-setups/clickhouse-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/athena-profile", + "destination": "/reference/warehouse-setups/athena-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/iomete-profile", + "destination": "/reference/warehouse-setups/iomete-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/mssql-profile", + "destination": "/reference/warehouse-setups/mssql-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/tidb-profile", + "destination": "/reference/warehouse-setups/tidb-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/materialize-profile", + "destination": "/reference/warehouse-setups/materialize-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/redshift-profile", + "destination": "/reference/warehouse-setups/redshift-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/databricks-profile", + "destination": "/reference/warehouse-setups/databricks-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/bigquery-profile", + "destination": "/reference/warehouse-setups/bigquery-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/dremio-profile", + "destination": "/reference/warehouse-setups/dremio-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/oracle-profile", + "destination": "/reference/warehouse-setups/oracle-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/teradata-profile", + "destination": "/reference/warehouse-setups/teradata-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/singlestore-profile", + "destination": "/reference/warehouse-setups/singlestore-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/sqlite-profile", + "destination": "/reference/warehouse-setups/sqlite-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/spark-profile", + "destination": "/reference/warehouse-setups/spark-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/mindsdb-profile", + "destination": "/reference/warehouse-setups/mindsdb-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/ibmdb2-profile", + "destination": "/reference/warehouse-setups/ibmdb2-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/firebolt-profile", + "destination": "/reference/warehouse-setups/firebolt-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/mysql-profile", + "destination": "/reference/warehouse-setups/mysql-setup", + "permanent": true + }, + { + "source": "/reference/warehouse-profiles/hive-profile", + "destination": "/reference/warehouse-setups/hive-setup", + "permanent": true + }, + { + "source": "/reference/using-sources", + "destination": "/docs/build/sources", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/cloud-ide/the-dbt-ide", + "destination": "/docs/getting-started/dbt-cloud-features", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/cloud-ide/handling-merge-conflicts", + "destination": "/docs/collaborate/git/resolve-merge-conflicts", + "permanent": true + }, + { + "source": "/dbt-cloud/cloud-ide/viewing-docs-in-the-ide", + "destination": "/docs/getting-started/develop-in-the-cloud", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/cloud-ide/ide-beta", + "destination": "/docs/getting-started/develop-in-the-cloud", + "permanent": true + }, + { + "source": "/docs/running-a-dbt-project/using-the-dbt-ide", + "destination": "/docs/getting-started/develop-in-the-cloud", + "permanent": true + }, + { + "source": "/dbt-cloud/cloud-ide/the-ide-git-button", + "destination": "/docs/collaborate/git/version-control-basics", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/setting-up", + "destination": "/guides/legacy/building-packages", + "permanent": true + }, + { + "source": "/docs/building-a-dbt-project/dbt-jinja-functions", + "destination": "/reference/dbt-jinja-functions", + "permanent": true + }, + { + "source": "/docs/contributing/long-lived-discussions-guidelines", + "destination": "/community/resources/forum-guidelines", + "permanent": true + }, + { + "source": "/docs/guides/legacy/navigating-the-docs.md", + "destination": "/community/contribute", + "permanent": true + }, + { + "source": "/community/writing-on-discourse", + "destination": "/community/contributing/contributing-online-community", + "permanent": true + }, + { + "source": "/community/contributing", + "destination": "/community/contribute", + "permanent": true + }, + { + "source": "/docs/contributing/contributor-license-agreements", + "destination": "/community/resources/contributor-license-agreements", + "permanent": true + }, + { + "source": "/community/maintaining-a-channel", + "destination": "/community/resources/maintaining-a-channel", + "permanent": true + }, + { + "source": "/docs/contributing/oss-expectations", + "destination": "/community/resources/oss-expectations", + "permanent": true + }, + { + "source": "/docs/slack-rules-of-the-road", + "destination": "/community/resources/community-rules-of-the-road", + "permanent": true + }, + { + "source": "/docs/contributing/slack-rules-of-the-road", + "destination": "/community/resources/community-rules-of-the-road", + "permanent": true + }, + { + "source": "/community/resources/slack-rules-of-the-road", + "destination": "/community/resources/community-rules-of-the-road", + "permanent": true + }, + { + "source": "/blog/getting-started-with-the-dbt-semantic-layer", + "destination": "/blog/understanding-the-components-of-the-dbt-semantic-layer", + "permanent": true + }, + { + "source": "/docs/getting-started/develop-in-the-cloud#creating-a-development-environment", + "destination": "/docs/get-started/develop-in-the-cloud#set-up-and-access-the-cloud-ide", + "permanent": true + }, + { + "source": "/docs/cloud-developer-ide", + "destination": "/docs/build/custom-target-names#dbt-cloud-ide", + "permanent": true + }, + { + "source": "/website/docs/docs/contributing/building-a-new-adapter.md", + "destination": "/guides/dbt-ecosystem/adapter-development/3-building-a-new-adapter", + "permanent": true + }, + { + "source": "/guides/legacy/getting-help", + "destination": "/community/resources/getting-help", + "permanent": true + }, + { + "source": "/blog/tags/release-notes", + "destination": "/docs/dbt-versions/dbt-cloud-release-notes", + "permanent": true + }, + { + "source": "/faqs/dbt-jinja-functions", + "destination": "/reference/dbt-jinja-functions", + "permanent": true + }, + { + "source": "/website/docs/docs/contributing/documenting-a-new-adapter.md", + "destination": "/guides/dbt-ecosystem/adapter-development/5-documenting-a-new-adapter", + "permanent": true + }, + { + "source": "/docs/docs/contributing/documenting-a-new-adapter", + "destination": "/docs/contributing/documenting-a-new-adapter", + "permanent": true + }, + { + "source": "/v0.8/reference", + "destination": "/", + "permanent": true + }, + { + "source": "/v0.10/reference", + "destination": "/", + "permanent": true + }, + { + "source": "/v0.12/reference", + "destination": "/", + "permanent": true + }, + { + "source": "/v0.13/reference", + "destination": "/", + "permanent": true + }, + { + "source": "/v0.13/docs/requiring-dbt-versions", + "destination": "/", + "permanent": true + }, + { + "source": "/v0.14/docs/cloud-developer-ide", + "destination": "/", + "permanent": true + }, + { + "source": "/v0.15/docs/cloud-import-a-project-by-git-url", + "destination": "/docs/cloud/git/import-a-project-by-git-url", + "permanent": true + }, + { + "source": "/v0.15/docs/configure-your-profile", + "destination": "/docs/core/connection-profiles", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/on-premises/dependencies", + "destination": "/docs/deploy/single-tenant", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/on-premises/faqs", + "destination": "/docs/deploy/single-tenant", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/on-premises/index", + "destination": "/docs/deploy/single-tenant", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/on-premises/installation", + "destination": "/docs/deploy/single-tenant", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/on-premises/prerequisites", + "destination": "/docs/deploy/single-tenant", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/on-premises/setup", + "destination": "/docs/deploy/single-tenant", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/on-premises/system-requirements", + "destination": "/docs/deploy/single-tenant", + "permanent": true + }, + { + "source": "/docs/dbt-cloud/on-premises/upgrading-kots", + "destination": "/docs/deploy/single-tenant", + "permanent": true + } + ] +}