-
Notifications
You must be signed in to change notification settings - Fork 1.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into snapshot_column_names
- Loading branch information
Showing
29 changed files
with
1,431 additions
and
395 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
kind: Features | ||
body: Enable `--resource-type` and `--exclude-resource-type` CLI flags and environment variables for `dbt test` | ||
time: 2024-09-03T13:24:28.592837+01:00 | ||
custom: | ||
Author: TowardOliver dbeatty10 | ||
Issue: "10656" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
kind: Features | ||
body: Execute microbatch models in batches | ||
time: 2024-09-13T23:21:11.935434-04:00 | ||
custom: | ||
Author: michelleark | ||
Issue: "10700" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
kind: Fixes | ||
body: Fix `--resource-type test` for `dbt list` and `dbt build` | ||
time: 2024-09-17T17:44:46.121032-06:00 | ||
custom: | ||
Author: dbeatty10 | ||
Issue: "10730" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
kind: Under the Hood | ||
body: Add Snowplow tracking for behavior flag deprecations | ||
time: 2024-09-11T16:27:30.293832-04:00 | ||
custom: | ||
Author: mikealfare | ||
Issue: "10552" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
kind: Under the Hood | ||
body: Replace `TestSelector` with `ResourceTypeSelector` | ||
time: 2024-09-16T10:22:01.339462-06:00 | ||
custom: | ||
Author: dbeatty10 | ||
Issue: "10718" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
kind: Under the Hood | ||
body: Standardize returning `ResourceTypeSelector` instances in `dbt list` and `dbt | ||
build` | ||
time: 2024-09-18T17:03:25.639516-06:00 | ||
custom: | ||
Author: dbeatty10 | ||
Issue: "10739" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,164 @@ | ||
from datetime import datetime, timedelta | ||
from typing import List, Optional, Tuple | ||
|
||
import pytz | ||
|
||
from dbt.artifacts.resources.types import BatchSize | ||
from dbt.contracts.graph.nodes import ModelNode, NodeConfig | ||
from dbt.exceptions import DbtInternalError, DbtRuntimeError | ||
|
||
|
||
class MicrobatchBuilder: | ||
"""A utility class for building microbatch definitions associated with a specific model""" | ||
|
||
def __init__( | ||
self, | ||
model: ModelNode, | ||
is_incremental: bool, | ||
event_time_start: Optional[datetime], | ||
event_time_end: Optional[datetime], | ||
): | ||
if model.config.incremental_strategy != "microbatch": | ||
raise DbtInternalError( | ||
f"Model '{model.name}' does not use 'microbatch' incremental_strategy." | ||
) | ||
self.model = model | ||
|
||
if self.model.config.batch_size is None: | ||
raise DbtRuntimeError( | ||
f"Microbatch model '{self.model.name}' does not have a 'batch_size' config (one of {[batch_size.value for batch_size in BatchSize]}) specificed." | ||
) | ||
|
||
self.is_incremental = is_incremental | ||
self.event_time_start = ( | ||
event_time_start.replace(tzinfo=pytz.UTC) if event_time_start else None | ||
) | ||
self.event_time_end = event_time_end.replace(tzinfo=pytz.UTC) if event_time_end else None | ||
|
||
def build_end_time(self): | ||
"""Defaults the end_time to the current time in UTC unless a non `None` event_time_end was provided""" | ||
return self.event_time_end or datetime.now(tz=pytz.utc) | ||
|
||
def build_start_time(self, checkpoint: Optional[datetime]): | ||
"""Create a start time based off the passed in checkpoint. | ||
If the checkpoint is `None`, then `None` will be returned as a checkpoint is necessary | ||
to build a start time. This is because we build the start time relative to the checkpoint | ||
via the batchsize and offset, and we cannot offset a checkpoint if there is no checkpoint. | ||
""" | ||
|
||
if self.event_time_start: | ||
return MicrobatchBuilder.truncate_timestamp( | ||
self.event_time_start, self.model.config.batch_size | ||
) | ||
|
||
if not self.is_incremental or checkpoint is None: | ||
# TODO: return new model-level configuration or raise error | ||
return None | ||
|
||
assert isinstance(self.model.config, NodeConfig) | ||
batch_size = self.model.config.batch_size | ||
|
||
lookback = self.model.config.lookback | ||
start = MicrobatchBuilder.offset_timestamp(checkpoint, batch_size, -1 * lookback) | ||
|
||
return start | ||
|
||
def build_batches( | ||
self, start: Optional[datetime], end: datetime | ||
) -> List[Tuple[Optional[datetime], datetime]]: | ||
""" | ||
Given a start and end datetime, builds a list of batches where each batch is | ||
the size of the model's batch_size. | ||
""" | ||
if start is None: | ||
return [(start, end)] | ||
|
||
batch_size = self.model.config.batch_size | ||
curr_batch_start: datetime = start | ||
curr_batch_end: datetime = MicrobatchBuilder.offset_timestamp( | ||
curr_batch_start, batch_size, 1 | ||
) | ||
|
||
batches: List[Tuple[Optional[datetime], datetime]] = [(curr_batch_start, curr_batch_end)] | ||
while curr_batch_end <= end: | ||
curr_batch_start = curr_batch_end | ||
curr_batch_end = MicrobatchBuilder.offset_timestamp(curr_batch_start, batch_size, 1) | ||
batches.append((curr_batch_start, curr_batch_end)) | ||
|
||
# use exact end value as stop | ||
batches[-1] = (batches[-1][0], end) | ||
|
||
return batches | ||
|
||
@staticmethod | ||
def offset_timestamp(timestamp: datetime, batch_size: BatchSize, offset: int) -> datetime: | ||
"""Truncates the passed in timestamp based on the batch_size and then applies the offset by the batch_size. | ||
Note: It's important to understand that the offset applies to the truncated timestamp, not | ||
the origin timestamp. Thus being offset by a day isn't relative to the any given hour that day, | ||
but relative to the start of the day. So if the timestamp is the very end of a day, 2024-09-17 23:59:59, | ||
you have a batch size of a day, and an offset of +1, then the returned value ends up being only one | ||
second later, 2024-09-18 00:00:00. | ||
2024-09-17 16:06:00 + Batchsize.hour -1 -> 2024-09-17 15:00:00 | ||
2024-09-17 16:06:00 + Batchsize.hour +1 -> 2024-09-17 17:00:00 | ||
2024-09-17 16:06:00 + Batchsize.day -1 -> 2024-09-16 00:00:00 | ||
2024-09-17 16:06:00 + Batchsize.day +1 -> 2024-09-18 00:00:00 | ||
2024-09-17 16:06:00 + Batchsize.month -1 -> 2024-08-01 00:00:00 | ||
2024-09-17 16:06:00 + Batchsize.month +1 -> 2024-10-01 00:00:00 | ||
2024-09-17 16:06:00 + Batchsize.year -1 -> 2023-01-01 00:00:00 | ||
2024-09-17 16:06:00 + Batchsize.year +1 -> 2025-01-01 00:00:00 | ||
""" | ||
truncated = MicrobatchBuilder.truncate_timestamp(timestamp, batch_size) | ||
|
||
offset_timestamp: datetime | ||
if batch_size == BatchSize.hour: | ||
offset_timestamp = truncated + timedelta(hours=offset) | ||
elif batch_size == BatchSize.day: | ||
offset_timestamp = truncated + timedelta(days=offset) | ||
elif batch_size == BatchSize.month: | ||
offset_timestamp = truncated | ||
for _ in range(abs(offset)): | ||
if offset < 0: | ||
offset_timestamp = offset_timestamp - timedelta(days=1) | ||
else: | ||
offset_timestamp = offset_timestamp + timedelta(days=31) | ||
offset_timestamp = MicrobatchBuilder.truncate_timestamp( | ||
offset_timestamp, batch_size | ||
) | ||
elif batch_size == BatchSize.year: | ||
offset_timestamp = truncated.replace(year=truncated.year + offset) | ||
|
||
return offset_timestamp | ||
|
||
@staticmethod | ||
def truncate_timestamp(timestamp: datetime, batch_size: BatchSize): | ||
"""Truncates the passed in timestamp based on the batch_size. | ||
2024-09-17 16:06:00 + Batchsize.hour -> 2024-09-17 16:00:00 | ||
2024-09-17 16:06:00 + Batchsize.day -> 2024-09-17 00:00:00 | ||
2024-09-17 16:06:00 + Batchsize.month -> 2024-09-01 00:00:00 | ||
2024-09-17 16:06:00 + Batchsize.year -> 2024-01-01 00:00:00 | ||
""" | ||
if batch_size == BatchSize.hour: | ||
truncated = datetime( | ||
timestamp.year, | ||
timestamp.month, | ||
timestamp.day, | ||
timestamp.hour, | ||
0, | ||
0, | ||
0, | ||
pytz.utc, | ||
) | ||
elif batch_size == BatchSize.day: | ||
truncated = datetime( | ||
timestamp.year, timestamp.month, timestamp.day, 0, 0, 0, 0, pytz.utc | ||
) | ||
elif batch_size == BatchSize.month: | ||
truncated = datetime(timestamp.year, timestamp.month, 1, 0, 0, 0, 0, pytz.utc) | ||
elif batch_size == BatchSize.year: | ||
truncated = datetime(timestamp.year, 1, 1, 0, 0, 0, 0, pytz.utc) | ||
|
||
return truncated |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
Oops, something went wrong.