forked from datahub-project/datahub
-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(models): Introducing Dataset Partitions Aspect (datahub-project#…
…10997) Co-authored-by: John Joyce <[email protected]> Co-authored-by: John Joyce <[email protected]>
- Loading branch information
1 parent
9321e94
commit 6f09b96
Showing
5 changed files
with
60 additions
and
10 deletions.
There are no files selected for viewing
24 changes: 24 additions & 0 deletions
24
metadata-models/src/main/pegasus/com/linkedin/dataset/PartitionSummary.pdl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
namespace com.linkedin.dataset | ||
|
||
import com.linkedin.common.AuditStamp | ||
|
||
/** | ||
* Defines how the data is partitioned | ||
*/ | ||
record PartitionSummary { | ||
/** | ||
* A unique id / value for the partition for which statistics were collected, | ||
* generated by applying the key definition to a given row. | ||
*/ | ||
partition: string | ||
|
||
/** | ||
* The created time for a given partition. | ||
*/ | ||
created: optional AuditStamp | ||
|
||
/** | ||
* The last modified / touched time for a given partition. | ||
*/ | ||
lastModified: optional AuditStamp | ||
} |
19 changes: 19 additions & 0 deletions
19
metadata-models/src/main/pegasus/com/linkedin/dataset/PartitionsSummary.pdl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
namespace com.linkedin.dataset | ||
|
||
/** | ||
* Defines how the data is partitioned for Data Lake tables (e.g. Hive, S3, Iceberg, Delta, Hudi, etc). | ||
*/ | ||
@Aspect = { | ||
"name": "partitionsSummary" | ||
} | ||
record PartitionsSummary { | ||
/** | ||
* The minimum partition as ordered | ||
*/ | ||
minPartition: optional PartitionSummary | ||
|
||
/** | ||
* The maximum partition as ordered | ||
*/ | ||
maxPartition: optional PartitionSummary | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
24 changes: 14 additions & 10 deletions
24
metadata-models/src/main/pegasus/com/linkedin/timeseries/PartitionSpec.pdl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,24 +1,28 @@ | ||
namespace com.linkedin.timeseries | ||
|
||
/** | ||
* Defines how the data is partitioned | ||
* A reference to a specific partition in a dataset. | ||
*/ | ||
record PartitionSpec { | ||
|
||
type: enum PartitionType { | ||
FULL_TABLE, | ||
QUERY, | ||
PARTITION | ||
} = "PARTITION" | ||
|
||
/** | ||
* String representation of the partition | ||
* A unique id / value for the partition for which statistics were collected, | ||
* generated by applying the key definition to a given row. | ||
*/ | ||
@TimeseriesField = {} | ||
partition: string | ||
|
||
/** | ||
* Time window of the partition if applicable | ||
* Time window of the partition, if we are able to extract it from the partition key. | ||
*/ | ||
timePartition: optional TimeWindow | ||
|
||
/** | ||
* Unused! | ||
*/ | ||
@deprecated | ||
type: enum PartitionType { | ||
FULL_TABLE, | ||
QUERY, | ||
PARTITION | ||
} = "PARTITION" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters