Merge branch 'current' into amychen1776-patch-8

dbt-labs · Feb 15, 2024 · 4d13b67 · 4d13b67
2 parents 70e85e1 + 0913057
commit 4d13b67
Show file tree

Hide file tree

Showing 76 changed files with 1,084 additions and 110 deletions.
diff --git a/.github/ISSUE_TEMPLATE/internal-orch-team.yml b/.github/ISSUE_TEMPLATE/internal-orch-team.yml
@@ -0,0 +1,49 @@
+name: Orchestration team - Request changes to docs
+description: File a docs update request that is not already tracked in Orch team's Release Plans (Notion database). 
+labels: ["content","internal-orch-team"]
+body:
+  - type: markdown
+    attributes:
+      value: |
+        * You can ask questions or submit ideas for the dbt docs in [Issues](https://github.com/dbt-labs/docs-internal/issues/new/choose)
+        * Before you file an issue read the [Contributing guide](https://github.com/dbt-labs/docs-internal#contributing).
+        * Check to make sure someone hasn't already opened a similar [issue](https://github.com/dbt-labs/docs-internal/issues).
+
+  - type: checkboxes
+    id: contributions
+    attributes:
+      label: Contributions
+      description: Please read the contribution docs before opening an issue or pull request.
+      options:
+        - label: I have read the contribution docs, and understand what's expected of me. 
+
+  - type: textarea
+    attributes:
+      label: Link to the page on docs.getdbt.com requiring updates
+      description: Please link to the page or pages you'd like to see improved.
+    validations:
+      required: true
+
+  - type: textarea
+    attributes:
+      label: What part(s) of the page would you like to see updated?
+      description: |
+        - Give as much detail as you can to help us understand the change you want to see. 
+        - Why should the docs be changed? What use cases does it support? 
+        - What is the expected outcome?
+    validations:
+      required: true
+
+  - type: textarea
+    attributes:
+      label: Reviewers/Stakeholders/SMEs
+      description: List the reviewers, stakeholders, and subject matter experts (SMEs) to collaborate with for the docs update.
+    validations:
+      required: true
+
+  - type: textarea
+    attributes:
+      label: Related Jira tickets
+      description: Add any other context or screenshots about the feature request here.
+    validations:
+      required: false
diff --git a/.github/workflows/repo-sync.yml b/.github/workflows/repo-sync.yml
@@ -0,0 +1,111 @@
+name: Repo Sync
+
+# **What it does**: Syncs docs.getdbt.com public repo into the docs private repo
+# This GitHub Actions workflow keeps the `current` branch of those two repos in sync.
+# **Why we have it**: To keep the open-source repository up-to-date
+# while still having an internal repository for sensitive work.
+# For more details, see https://github.com/repo-sync/repo-sync#how-it-works
+
+on:
+  schedule:
+    - cron: '0 6,12,18 * * *' # Run at 6:00 AM, 12:00 PM, and 6:00 PM
+
+jobs:
+  repo-sync:
+    permissions:
+      contents: write
+      pull-requests: write
+    name: Repo Sync
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v3
+      with:
+        # Use the INTERMEDIATE_BRANCH as the checkout reference
+        ref: ${{ secrets.INTERMEDIATE_BRANCH }}
+        token: ${{ secrets.GITHUB_TOKEN }}
+        # Fetch all history for all branches and tags
+        fetch-depth: 0  
+
+    # Sync the source repo to the destination branch using repo-sync/github-sync
+    - uses: repo-sync/github-sync@v2
+      name: Sync repo to branch
+      with:
+        # Source repository to sync from
+        source_repo: ${{ secrets.SOURCE_REPO }}
+        # Source branch to sync from
+        source_branch: current
+        # Destination branch to sync to
+        destination_branch: ${{ secrets.INTERMEDIATE_BRANCH }}
+        github_token: ${{ secrets.WORKFLOW_TOKEN }}
+
+    - name: Ship pull request
+      uses: actions/github-script@v6
+      with:
+        github-token: ${{ secrets.WORKFLOW_TOKEN }}
+        result-encoding: string
+        script: |
+            const {owner, repo} = context.repo;
+            const head = '${{ secrets.INTERMEDIATE_BRANCH }}';
+            const base = 'current'
+
+            async function closePullRequest(prNumber) {
+              console.log('closing PR', prNumber)
+              await github.rest.pulls.update({
+                owner,
+                repo,
+                pull_number: prNumber,
+                state: 'closed'
+              });
+              console.log('closed PR', prNumber)
+            }
+          
+            console.log('Creating new PR')
+            let pull, pull_number
+            try {
+              const response = await github.rest.pulls.create({
+                owner,
+                repo,
+                head,
+                base,
+                title: 'REPO SYNC - Public to Private',
+                body: 'This is an automated pull request to sync changes between the public and private repos.',
+              });
+              pull = response.data
+              pull_number = pull.number
+              console.log('Created pull request successfully', pull.html_url)
+          
+              } catch (err) {
+                // Don't error/alert if there's no commits to sync
+                if (err.message?.includes('No commits')) {
+                  console.log(err.message)
+                  return
+                }
+                throw err
+              }
+
+              const { data: prFiles } = await github.rest.pulls.listFiles({ owner, repo, pull_number })
+              if (prFiles.length) {
+                console.log(prFiles.length, 'files have changed')
+              } else {
+                console.log('No files changed, closing')
+                await closePullRequest(pull_number)
+                return
+              }
+  
+              console.log('Checking for merge conflicts')
+              if (pull.mergeable_state === 'dirty') {
+                console.log('Pull request has a conflict', pull.html_url)
+                await closePullRequest(pull_number)
+                throw new Error('PR has a conflict, please resolve manually')
+              }
+              console.log('No detected merge conflicts')
+
+
+              console.log('Merging the PR')
+              await github.rest.pulls.merge({
+                owner,
+                repo,
+                pull_number,
+                merge_method: 'merge',
+              })
+              console.log('Merged the PR successfully')
diff --git a/website/blog/2021-11-23-how-to-upgrade-dbt-versions.md b/website/blog/2021-11-23-how-to-upgrade-dbt-versions.md
@@ -12,6 +12,18 @@ date: 2021-11-29
 is_featured: true
 ---
 
+:::tip February 2024 Update
+
+It's been a few years since dbt-core turned 1.0! Since then, we've committed to releasing zero breaking changes whenever possible and it's become much easier to upgrade dbt Core versions.
+
+In 2024, we're taking this promise further by:
+- Stabilizing interfaces for everyone — adapter maintainers, metadata consumers, and (of course) people writing dbt code everywhere — as discussed in [our November 2023 roadmap update](https://github.com/dbt-labs/dbt-core/blob/main/docs/roadmap/2023-11-dbt-tng.md).
+- Introducing **Keep on latest version** in dbt Cloud. No more manual upgrades and no more need for _a second sandbox project_ just to try out new features in development. For more details, refer to [Upgrade Core version in Cloud](/docs/dbt-versions/upgrade-dbt-version-in-cloud).
+
+We're leaving the rest of this post as is, so we can all remember how it used to be. Enjoy a stroll down memory lane.
+
+:::
+
 As we get closer to dbt v1.0 shipping in December, it's a perfect time to get your installation up to scratch. dbt 1.0 represents the culmination of over five years of development and refinement to the analytics engineering experience - smoothing off sharp edges, speeding up workflows and enabling whole new classes of work.
 
 Even with all the new shinies on offer, upgrading can be daunting – you rely on dbt to power your analytics workflow and can’t afford to change things just to discover that your daily run doesn’t work anymore. I’ve been there. This is the checklist I wish I had when I owned my last company’s dbt project.

diff --git a/website/blog/2023-12-15-serverless-free-tier-data-stack-with-dlt-and-dbt-core.md b/website/blog/2023-12-15-serverless-free-tier-data-stack-with-dlt-and-dbt-core.md
@@ -23,6 +23,7 @@ Unfortunately, there are significantly fewer properties than ads - it seems many
 
 **The tools:** I want to be able to run my project on [Google Cloud Functions](https://cloud.google.com/functions) due to the generous free tier. [dlt](https://dlthub.com/) is a new Python library for declarative data ingestion which I have wanted to test for some time. Finally, I will use dbt Core for transformation.
 
+<!-- truncate -->
 ## The starting point
 
 If I want to have reliable information on the state of the market I will need to:

diff --git a/website/blog/2024-02-13-dbt-explorer.md b/website/blog/2024-02-13-dbt-explorer.md
@@ -0,0 +1,86 @@
+---
+title: "Column-Level Lineage, Model Performance, and Recommendations: ship trusted data products with dbt Explorer"
+description: "Learn about how to get the most out of the new features in dbt Explorer"
+slug: dbt-explorer
+
+authors: [dave_connors]
+
+tags: [analytics craft]
+hide_table_of_contents: false
+
+date: 2024-02-13
+is_featured: true
+---
+
+## What’s in a data platform?
+
+[Raising a dbt project](https://docs.getdbt.com/blog/how-to-build-a-mature-dbt-project-from-scratch) is hard work. We, as data professionals, have poured ourselves into raising happy healthy data products, and we should be proud of the insights they’ve driven. It certainly wasn’t without its challenges though — we remember the terrible twos, where we worked hard to just get the platform to walk straight. We remember the angsty teenage years where tests kept failing, seemingly just to spite us. A lot of blood, sweat, and tears are shed in the service of clean data!
+
+Once the project could dress and feed itself, we also worked hard to get buy-in from our colleagues who put their trust in our little project. Without deep trust and understanding of what we built, our colleagues who depend on your data (or even those involved in developing it with you — it takes a village after all!) are more likely to be in your DMs with questions than in their BI tools, generating insights.
+
+When our teammates ask about where the data in their reports come from, how fresh it is, or about the right calculation for a metric, what a joy! This means they want to put what we’ve built to good use — the challenge is that, historically, *it hasn’t been all that easy to answer these questions well.* That has often meant a manual, painstaking process of cross checking run logs and your dbt documentation site to get the stakeholder the information they need.
+
+Enter [dbt Explorer](https://www.getdbt.com/product/dbt-explorer)! dbt Explorer centralizes documentation, lineage, and execution metadata to reduce the work required to ship trusted data products faster.
+
+<!-- truncate -->
+## dbt Explorer: an upgrade to data discovery
+
+In the days of yore, answering a question about your data platform may have required a bit of cryptography, sifting through possibly-up-to-date documentation in your internal wiki, run logs to figure out when your models were executed, and slacking the data team member with the most tenure. In the past several years, dbt Docs helped centralize the documentation workflow and dramatically improved the documentation process. While useful, dbt Docs only ever provides a single point in time snapshot, and lacks any sense of your platform’s deployment and execution information. dbt Explorer supercharges the docs experience by providing stateful awareness of your data platform, making support and triage of your platform easier than ever — it even proactively lets you know what to focus on to build even higher quality data products!
+
+### Where’s this data coming from?
+
+Your stakeholders and fellow developers both need a way to orient themselves within your dbt project, and a way to know the full provenance of the number staring at them in their spreadsheet. *Where did this info come from? Does it include XYZ data source, or just ABC?*
+
+It’s the classic stakeholder question for a reason! Knowing data lineage inherently increases your level of trust in the reporting you use to make the right decisions. The dbt DAG has long served as the map of your data flows, tracing the flow from raw data to ready-to-query data mart.
+
+
+<Lightbox src="/img/blog/2024-02-13-dbt-explorer/full-lineage.png" width="85%" title="Look at that lineage!" />
+
+
+dbt Explorer builds on this experience in three key ways:
+
+- **Lineage 🤝 Docs** - dbt Explorer’s lineage is embedded into the documentation page for each resource, meaning there’s no need to toggle between your DAG and your docs, and lose valuable context. Similarly, when you’re navigating the DAG in full screen mode, clicking on a resource in your project loads a summary panel of the most critical info about the resource you’re interested in (including execution status, data contract info, you name it). Understanding the lineage via the DAG and the context from your written documentation is one workflow in Explorer, not two.
+- **Cross project lineage -**  if you’re using the new [dbt Mesh](https://www.getdbt.com/product/dbt-mesh) architecture, you may trace your data back to the end of the DAG and find its source is not raw data, but in fact the output of another team’s dbt project! Luckily, dbt Explorer provides first class support for visualizing and understanding cross project lineage when using the dbt Mesh:
+  - **Account View + Project DAG:** dbt Explorer provides a higher level view of the relationships between all your projects in your dbt Cloud Account — you can trace the lineage across the projects, and easily drill down into each project. When you click on a project in this view, the side panel includes a list of all the public models available for use. Double clicking opens up the lineage for that specific project, making it easy to traverse across your organization’s knowledge graph!
+  - **Cross Project Icons:** When you’re in a project’s lineage, dbt Explorer marks cross-project relationships to make it clear when there are dependencies that span multiple projects. Stakeholders can quickly understand which project owners they may need to contact if they need more information about a dataset.
+- **Column level lineage -** long time listeners of the pod know that column level lineage is a frequently requested feature within dbt. It’s one thing to know how data flows between models, but the column level relationships help you understand *precisely* how data is used in models — this makes debugging data issues a lot simpler! We’re stoked to announce that dbt Explorer offers this feature embedded alongside your model lineage as well.
+
+<Lightbox src="/img/blog/2024-02-13-dbt-explorer/column-level-lineage.png" width="85%" title="You can trace the data in a column from the source to the end of your DAG!" />
+
+With dbt Explorer, you can answer any question about your data’s lineage at any grain, whether its project to project, model to model, or column to column.  
+
+### Ok but is it fresh? Is it *right*?
+
+Once the data’s journey to your BI tool is clear, there’s a natural second question one would ask before using it — is it, uh, *good data?* Just knowing where it came from is not enough to build trust in the data product — you need to know if it’s timely and accurate.
+
+dbt Explorer marries the execution metadata to the documentation experience  — it reflects the latest state of your project across all your job runs in your [production environment,](https://docs.getdbt.com/docs/deploy/deploy-environments#set-as-production-environment) and embeds the execution information throughout the product. For each model, seed, or snapshot, Explorer displays its latest execution status, as well as statuses for any tests run against those resources. Sources show the latest source freshness info, and exposures embed the aggregate test and freshness info right into the details page! No more leaving the docs site to check the most recent logs to see what’s fresh and what’s not — Explorer centralizes everything so you don’t have to!
+
+<Lightbox src="/img/blog/2024-02-13-dbt-explorer/embedded-metadata-model.png" width="85%" title="passing model! passing tests!" />
+
+<Lightbox src="/img/blog/2024-02-13-dbt-explorer/embedded-metadata-source.png" width="85%" title="have you ever seen a fresher source?" />
+
+
+### Is the project healthy? Are we managing it properly?
+
+Beyond building solid data products and making sure they are trusted and used, developers need to know how they may improve their projects’ quality, or what areas may need some focus for refactoring and optimization in the next quarter. There’s always a balance between maintaining a data platform and adding new features to it. Historically, it’s been hard to know exactly where to invest time and effort to improve the health of your project — dbt Explorer provides two features that shine a light on possible areas for improvement within your project.
+
+#### Recommendations
+
+One of dbt’s more popular open source packages is [dbt_project_evaluator](https://github.com/dbt-labs/dbt-project-evaluator) , which tests your project against a set of well established dbt best practices. dbt Explorer now surfaces many of the same recommendations directly within the explorer UI using the metadata from the Discovery API, without any need to download and run the package!
+
+Each model and source has a `Recommendations` tab on their resource details page, with specific recommendations on how to improve the quality of that resource. Explorer also offers a global view, showing *****all***** the recommendations across the project, and includes some top level metrics measuring the test and documentation coverage of the models in your project. These recommendations provide insight into how you can build a more well documented, well tested, and well built project, leading to less confusion and more trust.
+
+
+<Lightbox src="/img/blog/2024-02-13-dbt-explorer/recommendations.png" width="85%" title="The recommendations summary — I’ve got some work to do!" />
+
+#### Model Performance Trends
+
+A huge pain point for analytics engineers is trying to understand if their [dbt models are taking longer or are running less efficiently over time](https://docs.getdbt.com/blog/how-we-shaved-90-minutes-off-model). A model that worked great when your data was small may not work so great when your platform matures! Unless things start to actively break, it can be hard to know where to focus your refactoring work.
+
+dbt Explorer now surfaces model execution metadata to take the guesswork out of fine tuning your dbt runs. There’s a new high level overview page to highlight models that are taking the longest to run, erroring the most, and that have the highest rate of test failures. Each model details page also has a new `Performance` tab, which shows that particular model’s execution history for up to three months of job runs. Spotting an ominous slow increase in runtimes may indicate it’s time for some refactoring — no need to comb through countless `run_results.json` files yourself! dbt Explorer gets you the data you need where you need it.
+
+<Lightbox src="/img/blog/2024-02-13-dbt-explorer/model-execution.png" width="85%" title="maybe I should check on that one long run!" />
+
+## Bon voyage!
+
+They say the best time to ~~invest~~ ~~plant a tree~~ document your dbt project is yesterday, and the second best time is today. With all the bells and whistles that supercharge your documentation experience in dbt Explorer, there’s no time like the present! Leaning into your documentation and taking advantage of your metadata in dbt Explorer will lead to better data products shipped faster — get out there and explore!
diff --git a/website/docs/best-practices/clone-incremental-models.md b/website/docs/best-practices/clone-incremental-models.md
@@ -7,7 +7,7 @@ hoverSnippet: Learn how to clone incremental models for CI jobs.
 ---
 
 Before you begin, you must be aware of a few conditions:
-- `dbt clone` is only available with dbt version 1.6 and newer. Refer to our [upgrade guide](/docs/dbt-versions/upgrade-core-in-cloud) for help enabling newer versions in dbt Cloud
+- `dbt clone` is only available with dbt version 1.6 and newer. Refer to our [upgrade guide](/docs/dbt-versions/upgrade-dbt-version-in-cloud) for help enabling newer versions in dbt Cloud
 - This strategy only works for warehouse that support zero copy cloning (otherwise `dbt clone` will just create pointer views).
 - Some teams may want to test that their incremental models run in both incremental mode and full-refresh mode.