diff --git a/.changes/unreleased/Features-20230606-165351.yaml b/.changes/unreleased/Features-20230606-165351.yaml new file mode 100644 index 00000000000..a650612840f --- /dev/null +++ b/.changes/unreleased/Features-20230606-165351.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Added support for parsing and serializaing semantic models +time: 2023-06-06T16:53:51.117429-04:00 +custom: + Author: peterallenwebb + Issue: 7499 7503 diff --git a/core/dbt/contracts/files.py b/core/dbt/contracts/files.py index ec98ec6b2b7..f4f23617133 100644 --- a/core/dbt/contracts/files.py +++ b/core/dbt/contracts/files.py @@ -228,6 +228,7 @@ class SchemaSourceFile(BaseSourceFile): groups: List[str] = field(default_factory=list) # node patches contain models, seeds, snapshots, analyses ndp: List[str] = field(default_factory=list) + semantic_nodes: List[str] = field(default_factory=list) # any macro patches in this file by macro unique_id. mcp: Dict[str, str] = field(default_factory=dict) # any source patches in this file. The entries are package, name pairs diff --git a/core/dbt/contracts/graph/manifest.py b/core/dbt/contracts/graph/manifest.py index 48132b0fbcb..a3be197513c 100644 --- a/core/dbt/contracts/graph/manifest.py +++ b/core/dbt/contracts/graph/manifest.py @@ -25,21 +25,22 @@ from dbt.contracts.publication import ProjectDependencies, PublicationConfig, PublicModel from dbt.contracts.graph.nodes import ( - Macro, + BaseNode, Documentation, - SourceDefinition, - GenericTestNode, Exposure, - Metric, + GenericTestNode, + GraphMemberNode, Group, - UnpatchedSourceDefinition, + Macro, ManifestNode, - GraphMemberNode, - ResultNode, - BaseNode, ManifestOrPublicNode, + Metric, ModelNode, RelationalNode, + ResultNode, + SemanticModel, + SourceDefinition, + UnpatchedSourceDefinition, ) from dbt.contracts.graph.unparsed import SourcePatch, NodeVersion, UnparsedVersion from dbt.contracts.graph.manifest_upgrade import upgrade_manifest_json @@ -706,6 +707,7 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin): public_nodes: MutableMapping[str, PublicModel] = field(default_factory=dict) project_dependencies: Optional[ProjectDependencies] = None publications: MutableMapping[str, PublicationConfig] = field(default_factory=dict) + semantic_nodes: MutableMapping[str, SemanticModel] = field(default_factory=dict) _doc_lookup: Optional[DocLookup] = field( default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None} @@ -894,7 +896,7 @@ def build_group_map(self): group_map[node.group].append(node.unique_id) self.group_map = group_map - def writable_manifest(self): + def writable_manifest(self) -> "WritableManifest": self.build_parent_and_child_maps() self.build_group_map() return WritableManifest( @@ -912,6 +914,7 @@ def writable_manifest(self): child_map=self.child_map, parent_map=self.parent_map, group_map=self.group_map, + semantic_nodes=self.semantic_nodes, ) def write(self, path): @@ -1246,6 +1249,11 @@ def add_doc(self, source_file: SourceFile, doc: Documentation): self.docs[doc.unique_id] = doc source_file.docs.append(doc.unique_id) + def add_semantic_model(self, source_file: SchemaSourceFile, semantic_model: SemanticModel): + _check_duplicates(semantic_model, self.semantic_nodes) + self.semantic_nodes[semantic_model.unique_id] = semantic_model + source_file.semantic_nodes.append(semantic_model.unique_id) + # end of methods formerly in ParseResult # Provide support for copy.deepcopy() - we just need to avoid the lock! @@ -1345,6 +1353,9 @@ class WritableManifest(ArtifactMixin): public_nodes: Mapping[UniqueID, PublicModel] = field( metadata=dict(description=("The public models used in the dbt project")) ) + semantic_nodes: Mapping[UniqueID, SemanticModel] = field( + metadata=dict(description=("The semantic models defined in the dbt project")) + ) metadata: ManifestMetadata = field( metadata=dict( description="Metadata about the manifest", diff --git a/core/dbt/contracts/graph/manifest_upgrade.py b/core/dbt/contracts/graph/manifest_upgrade.py index a3c299e8a25..7db051f7e4c 100644 --- a/core/dbt/contracts/graph/manifest_upgrade.py +++ b/core/dbt/contracts/graph/manifest_upgrade.py @@ -127,4 +127,6 @@ def upgrade_manifest_json(manifest: dict) -> dict: if "root_path" in doc_content: del doc_content["root_path"] doc_content["resource_type"] = "doc" + if "semantic_nodes" not in manifest: + manifest["semantic_nodes"] = {} return manifest diff --git a/core/dbt/contracts/graph/nodes.py b/core/dbt/contracts/graph/nodes.py index a3b988f35ff..f34c12359e3 100644 --- a/core/dbt/contracts/graph/nodes.py +++ b/core/dbt/contracts/graph/nodes.py @@ -6,29 +6,23 @@ import hashlib from mashumaro.types import SerializableType -from typing import ( - Optional, - Union, - List, - Dict, - Any, - Sequence, - Tuple, - Iterator, -) +from typing import Optional, Union, List, Dict, Any, Sequence, Tuple, Iterator, Protocol from dbt.dataclass_schema import dbtClassMixin, ExtensibleDbtClassMixin from dbt.clients.system import write_file from dbt.contracts.files import FileHash from dbt.contracts.graph.unparsed import ( + Dimension, Docs, + Entity, ExposureType, ExternalTable, FreshnessThreshold, HasYamlMetadata, MacroArgument, MaturityType, + Measure, MetricFilter, MetricTime, Owner, @@ -62,12 +56,6 @@ EmptySnapshotConfig, SnapshotConfig, ) -import sys - -if sys.version_info >= (3, 8): - from typing import Protocol -else: - from typing_extensions import Protocol # ===================================================================== @@ -564,6 +552,30 @@ def depends_on_macros(self): return self.depends_on.macros +@dataclass +class FileSlice(dbtClassMixin, Replaceable): + """Provides file slice level context about what something was created from. + + Implementation of the dbt-semantic-interfaces `FileSlice` protocol + """ + + filename: str + content: str + start_line_number: int + end_line_number: int + + +@dataclass +class SourceFileMetadata(dbtClassMixin, Replaceable): + """Provides file context about what something was created from. + + Implementation of the dbt-semantic-interfaces `Metadata` protocol + """ + + repo_file_path: str + file_slice: FileSlice + + # ==================================== # CompiledNode subclasses # ==================================== @@ -1411,6 +1423,28 @@ class Group(BaseNode): resource_type: NodeType = field(metadata={"restrict": [NodeType.Group]}) +# ==================================== +# SemanticModel and related classes +# ==================================== + + +@dataclass +class NodeRelation(dbtClassMixin): + alias: str + schema_name: str # TODO: Could this be called simply "schema" so we could reuse StateRelation? + database: Optional[str] = None + + +@dataclass +class SemanticModel(GraphNode): + description: Optional[str] + model: str + node_relation: Optional[NodeRelation] + entities: Sequence[Entity] + measures: Sequence[Measure] + dimensions: Sequence[Dimension] + + # ==================================== # Patches # ==================================== diff --git a/core/dbt/contracts/graph/unparsed.py b/core/dbt/contracts/graph/unparsed.py index 4730b094dfa..14a0585a3bd 100644 --- a/core/dbt/contracts/graph/unparsed.py +++ b/core/dbt/contracts/graph/unparsed.py @@ -661,6 +661,60 @@ def validate(cls, data): raise ValidationError("Group owner must have at least one of 'name' or 'email'.") +# +# semantic interfaces unparsed objects +# + + +@dataclass +class Entity(dbtClassMixin): + name: str + type: str # actually an enum + description: Optional[str] = None + role: Optional[str] = None + expr: Optional[str] = None + + +@dataclass +class MeasureAggregationParameters(dbtClassMixin): + percentile: Optional[float] = None + use_discrete_percentile: bool = False + use_approximate_percentile: bool = False + + +@dataclass +class Measure(dbtClassMixin): + name: str + agg: str # actually an enum + description: Optional[str] = None + create_metric: Optional[bool] = None + expr: Optional[str] = None + agg_params: Optional[MeasureAggregationParameters] = None + non_additive_dimension: Optional[Dict[str, Any]] = None + agg_time_dimension: Optional[str] = None + + +@dataclass +class Dimension(dbtClassMixin): + name: str + type: str # actually an enum + description: Optional[str] = None + is_partition: Optional[bool] = False + type_params: Optional[Dict[str, Any]] = None + expr: Optional[str] = None + # TODO metadata: Optional[Metadata] (this would actually be the YML for the dimension) + + +@dataclass +class UnparsedSemanticModel(dbtClassMixin): + name: str + description: Optional[str] + model: str # looks like "ref(...)" + entities: List[Entity] = field(default_factory=list) + measures: List[Measure] = field(default_factory=list) + dimensions: List[Dimension] = field(default_factory=list) + + def normalize_date(d: Optional[datetime.date]) -> Optional[datetime.datetime]: """Convert date to datetime (at midnight), and add local time zone if naive""" if d is None: diff --git a/core/dbt/node_types.py b/core/dbt/node_types.py index 1312d7ace3e..5a98cb95be1 100644 --- a/core/dbt/node_types.py +++ b/core/dbt/node_types.py @@ -33,6 +33,7 @@ class NodeType(StrEnum): Exposure = "exposure" Metric = "metric" Group = "group" + SemanticModel = "semantic model" @classmethod def executable(cls) -> List["NodeType"]: diff --git a/core/dbt/parser/manifest.py b/core/dbt/parser/manifest.py index ada54f575d8..fafeeaab94a 100644 --- a/core/dbt/parser/manifest.py +++ b/core/dbt/parser/manifest.py @@ -57,6 +57,7 @@ DeprecatedReference, UpcomingReferenceDeprecation, ) +from dbt_extractor import py_extract_from_source # type: ignore from dbt.logger import DbtProcessState from dbt.node_types import NodeType, AccessType from dbt.clients.jinja import get_rendered, MacroStack @@ -99,6 +100,7 @@ ManifestNode, ResultNode, ModelNode, + NodeRelation, ) from dbt.contracts.graph.unparsed import NodeVersion from dbt.contracts.util import Writable @@ -528,6 +530,7 @@ def load(self): self.process_refs(self.root_project.project_name) self.process_docs(self.root_project) self.process_metrics(self.root_project) + self.process_semantic_models() self.check_valid_group_config() # update tracking data @@ -1176,6 +1179,28 @@ def process_metrics(self, config: RuntimeConfig): continue _process_metrics_for_node(self.manifest, current_project, exposure) + def process_semantic_models(self) -> None: + for semantic_model in self.manifest.semantic_nodes.values(): + if semantic_model.model: + statically_parsed = py_extract_from_source(f"{{{{ {semantic_model.model} }}}}") + if statically_parsed["refs"]: + + ref = statically_parsed["refs"][0] + if len(ref) == 2: + input_package_name, input_model_name = ref + else: + input_package_name, input_model_name = None, ref[0] + + refd_node = self.manifest.ref_lookup.find( + input_model_name, input_package_name, None, self.manifest + ) + if isinstance(refd_node, ModelNode): + semantic_model.node_relation = NodeRelation( + alias=refd_node.alias, + schema_name=refd_node.schema, + database=refd_node.database, + ) + # nodes: node and column descriptions # sources: source and table descriptions, column descriptions # macros: macro argument descriptions diff --git a/core/dbt/parser/schema_yaml_readers.py b/core/dbt/parser/schema_yaml_readers.py index 4a5863102c1..4ab0545799d 100644 --- a/core/dbt/parser/schema_yaml_readers.py +++ b/core/dbt/parser/schema_yaml_readers.py @@ -1,8 +1,13 @@ from dbt.parser.schemas import YamlReader, SchemaParser from dbt.parser.common import YamlBlock from dbt.node_types import NodeType -from dbt.contracts.graph.unparsed import UnparsedExposure, UnparsedMetric, UnparsedGroup -from dbt.contracts.graph.nodes import Exposure, Metric, Group +from dbt.contracts.graph.unparsed import ( + UnparsedExposure, + UnparsedGroup, + UnparsedMetric, + UnparsedSemanticModel, +) +from dbt.contracts.graph.nodes import Exposure, Group, Metric, SemanticModel from dbt.exceptions import DbtInternalError, YamlParseDictError, JSONValidationError from dbt.context.providers import generate_parse_exposure, generate_parse_metrics from dbt.contracts.graph.model_config import MetricConfig, ExposureConfig @@ -269,3 +274,46 @@ def parse(self): raise YamlParseDictError(self.yaml.path, self.key, data, exc) self.parse_group(unparsed) + + +class SemanticModelParser(YamlReader): + def __init__(self, schema_parser: SchemaParser, yaml: YamlBlock): + super().__init__(schema_parser, yaml, "semantic_models") + self.schema_parser = schema_parser + self.yaml = yaml + + def parse_semantic_model(self, unparsed: UnparsedSemanticModel): + package_name = self.project.project_name + unique_id = f"{NodeType.SemanticModel}.{package_name}.{unparsed.name}" + path = self.yaml.path.relative_path + + fqn = self.schema_parser.get_fqn_prefix(path) + fqn.append(unparsed.name) + + parsed = SemanticModel( + description=unparsed.description, + fqn=fqn, + model=unparsed.model, + name=unparsed.name, + node_relation=None, # Resolved from the value of "model" after parsing + original_file_path=self.yaml.path.original_file_path, + package_name=package_name, + path=path, + resource_type=NodeType.SemanticModel, + unique_id=unique_id, + entities=unparsed.entities, + measures=unparsed.measures, + dimensions=unparsed.dimensions, + ) + + self.manifest.add_semantic_model(self.yaml.file, parsed) + + def parse(self): + for data in self.get_key_dicts(): + try: + UnparsedSemanticModel.validate(data) + unparsed = UnparsedSemanticModel.from_dict(data) + except (ValidationError, JSONValidationError) as exc: + raise YamlParseDictError(self.yaml.path, self.key, data, exc) + + self.parse_semantic_model(unparsed) diff --git a/core/dbt/parser/schemas.py b/core/dbt/parser/schemas.py index da2cf22b4f8..adf29b2a091 100644 --- a/core/dbt/parser/schemas.py +++ b/core/dbt/parser/schemas.py @@ -75,6 +75,7 @@ "analyses", "exposures", "metrics", + "semantic_models", ) @@ -218,6 +219,12 @@ def parse_file(self, block: FileBlock, dct: Dict = None) -> None: group_parser = GroupParser(self, yaml_block) group_parser.parse() + if "semantic_models" in dct: + from dbt.parser.schema_yaml_readers import SemanticModelParser + + semantic_model_parser = SemanticModelParser(self, yaml_block) + semantic_model_parser.parse() + Parsed = TypeVar("Parsed", UnpatchedSourceDefinition, ParsedNodePatch, ParsedMacroPatch) NodeTarget = TypeVar("NodeTarget", UnparsedNodeUpdate, UnparsedAnalysisUpdate, UnparsedModelUpdate) diff --git a/core/setup.py b/core/setup.py index 0c4167998de..2eeb0d1389b 100644 --- a/core/setup.py +++ b/core/setup.py @@ -70,6 +70,7 @@ "cffi>=1.9,<2.0.0", "pyyaml>=5.3", "urllib3~=1.0", + "dbt-semantic-interfaces==0.1.0.dev3", ], zip_safe=False, classifiers=[ diff --git a/schemas/dbt/manifest/v10.json b/schemas/dbt/manifest/v10.json index 3b6773d66a6..c7e3863f3c4 100644 --- a/schemas/dbt/manifest/v10.json +++ b/schemas/dbt/manifest/v10.json @@ -10,7 +10,8 @@ "metrics", "groups", "selectors", - "public_nodes" + "public_nodes", + "semantic_nodes" ], "properties": { "metadata": { @@ -209,10 +210,17 @@ "$ref": "#/definitions/PublicModel" }, "description": "The public models used in the dbt project" + }, + "semantic_nodes": { + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/SemanticModel" + }, + "description": "The semantic models defined in the dbt project" } }, "additionalProperties": false, - "description": "WritableManifest(metadata: dbt.contracts.graph.manifest.ManifestMetadata, nodes: Mapping[str, Union[dbt.contracts.graph.nodes.AnalysisNode, dbt.contracts.graph.nodes.SingularTestNode, dbt.contracts.graph.nodes.HookNode, dbt.contracts.graph.nodes.ModelNode, dbt.contracts.graph.nodes.RPCNode, dbt.contracts.graph.nodes.SqlNode, dbt.contracts.graph.nodes.GenericTestNode, dbt.contracts.graph.nodes.SnapshotNode, dbt.contracts.graph.nodes.SeedNode]], sources: Mapping[str, dbt.contracts.graph.nodes.SourceDefinition], macros: Mapping[str, dbt.contracts.graph.nodes.Macro], docs: Mapping[str, dbt.contracts.graph.nodes.Documentation], exposures: Mapping[str, dbt.contracts.graph.nodes.Exposure], metrics: Mapping[str, dbt.contracts.graph.nodes.Metric], groups: Mapping[str, dbt.contracts.graph.nodes.Group], selectors: Mapping[str, Any], disabled: Optional[Mapping[str, List[Union[dbt.contracts.graph.nodes.AnalysisNode, dbt.contracts.graph.nodes.SingularTestNode, dbt.contracts.graph.nodes.HookNode, dbt.contracts.graph.nodes.ModelNode, dbt.contracts.graph.nodes.RPCNode, dbt.contracts.graph.nodes.SqlNode, dbt.contracts.graph.nodes.GenericTestNode, dbt.contracts.graph.nodes.SnapshotNode, dbt.contracts.graph.nodes.SeedNode, dbt.contracts.graph.nodes.SourceDefinition, dbt.contracts.graph.nodes.Exposure, dbt.contracts.graph.nodes.Metric]]]], parent_map: Optional[Dict[str, List[str]]], child_map: Optional[Dict[str, List[str]]], group_map: Optional[Dict[str, List[str]]], public_nodes: Mapping[str, dbt.contracts.publication.PublicModel])", + "description": "WritableManifest(metadata: dbt.contracts.graph.manifest.ManifestMetadata, nodes: Mapping[str, Union[dbt.contracts.graph.nodes.AnalysisNode, dbt.contracts.graph.nodes.SingularTestNode, dbt.contracts.graph.nodes.HookNode, dbt.contracts.graph.nodes.ModelNode, dbt.contracts.graph.nodes.RPCNode, dbt.contracts.graph.nodes.SqlNode, dbt.contracts.graph.nodes.GenericTestNode, dbt.contracts.graph.nodes.SnapshotNode, dbt.contracts.graph.nodes.SeedNode]], sources: Mapping[str, dbt.contracts.graph.nodes.SourceDefinition], macros: Mapping[str, dbt.contracts.graph.nodes.Macro], docs: Mapping[str, dbt.contracts.graph.nodes.Documentation], exposures: Mapping[str, dbt.contracts.graph.nodes.Exposure], metrics: Mapping[str, dbt.contracts.graph.nodes.Metric], groups: Mapping[str, dbt.contracts.graph.nodes.Group], selectors: Mapping[str, Any], disabled: Optional[Mapping[str, List[Union[dbt.contracts.graph.nodes.AnalysisNode, dbt.contracts.graph.nodes.SingularTestNode, dbt.contracts.graph.nodes.HookNode, dbt.contracts.graph.nodes.ModelNode, dbt.contracts.graph.nodes.RPCNode, dbt.contracts.graph.nodes.SqlNode, dbt.contracts.graph.nodes.GenericTestNode, dbt.contracts.graph.nodes.SnapshotNode, dbt.contracts.graph.nodes.SeedNode, dbt.contracts.graph.nodes.SourceDefinition, dbt.contracts.graph.nodes.Exposure, dbt.contracts.graph.nodes.Metric]]]], parent_map: Optional[Dict[str, List[str]]], child_map: Optional[Dict[str, List[str]]], group_map: Optional[Dict[str, List[str]]], public_nodes: Mapping[str, dbt.contracts.publication.PublicModel], semantic_nodes: Mapping[str, dbt.contracts.graph.nodes.SemanticModel])", "definitions": { "ManifestMetadata": { "type": "object", @@ -229,7 +237,7 @@ "generated_at": { "type": "string", "format": "date-time", - "default": "2023-06-01T17:10:44.803525Z" + "default": "2023-06-07T22:58:05.223879Z" }, "invocation_id": { "oneOf": [ @@ -240,7 +248,7 @@ "type": "null" } ], - "default": "2cd4fe5a-501b-422f-a628-80b34230967b" + "default": "468b0d14-2c98-40b2-ae17-fc3ab0257c34" }, "env": { "type": "object", @@ -470,7 +478,7 @@ }, "created_at": { "type": "number", - "default": 1685639444.805417 + "default": 1686178685.225652 }, "config_call_dict": { "type": "object", @@ -1182,7 +1190,7 @@ }, "created_at": { "type": "number", - "default": 1685639444.807345 + "default": 1686178685.226929 }, "config_call_dict": { "type": "object", @@ -1570,7 +1578,7 @@ }, "created_at": { "type": "number", - "default": 1685639444.807956 + "default": 1686178685.2275221 }, "config_call_dict": { "type": "object", @@ -1846,7 +1854,7 @@ }, "created_at": { "type": "number", - "default": 1685639444.8085592 + "default": 1686178685.228151 }, "config_call_dict": { "type": "object", @@ -2258,7 +2266,7 @@ }, "created_at": { "type": "number", - "default": 1685639444.809459 + "default": 1686178685.229036 }, "config_call_dict": { "type": "object", @@ -2524,7 +2532,7 @@ }, "created_at": { "type": "number", - "default": 1685639444.810038 + "default": 1686178685.2295911 }, "config_call_dict": { "type": "object", @@ -2783,7 +2791,7 @@ }, "created_at": { "type": "number", - "default": 1685639444.810735 + "default": 1686178685.2302458 }, "config_call_dict": { "type": "object", @@ -3079,7 +3087,7 @@ }, "created_at": { "type": "number", - "default": 1685639444.8118432 + "default": 1686178685.231343 }, "config_call_dict": { "type": "object", @@ -3577,7 +3585,7 @@ }, "created_at": { "type": "number", - "default": 1685639444.812869 + "default": 1686178685.232364 }, "config_call_dict": { "type": "object", @@ -3989,7 +3997,7 @@ }, "created_at": { "type": "number", - "default": 1685639444.814112 + "default": 1686178685.2336159 } }, "additionalProperties": false, @@ -4301,7 +4309,7 @@ }, "created_at": { "type": "number", - "default": 1685639444.814394 + "default": 1686178685.2339032 }, "supported_languages": { "oneOf": [ @@ -4542,7 +4550,7 @@ }, "created_at": { "type": "number", - "default": 1685639444.815081 + "default": 1686178685.234565 } }, "additionalProperties": false, @@ -4763,7 +4771,7 @@ }, "created_at": { "type": "number", - "default": 1685639444.8157508 + "default": 1686178685.23521 }, "group": { "oneOf": [ @@ -4983,7 +4991,7 @@ "generated_at": { "type": "string", "format": "date-time", - "default": "2023-06-01T17:10:44.816336Z" + "default": "2023-06-07T22:58:05.235799Z" }, "deprecation_date": { "oneOf": [ @@ -4999,6 +5007,343 @@ }, "additionalProperties": false, "description": "Used to represent cross-project models" + }, + "SemanticModel": { + "type": "object", + "required": [ + "name", + "resource_type", + "package_name", + "path", + "original_file_path", + "unique_id", + "fqn", + "model", + "entities", + "measures", + "dimensions" + ], + "properties": { + "name": { + "type": "string" + }, + "resource_type": { + "type": "string", + "enum": [ + "model", + "analysis", + "test", + "snapshot", + "operation", + "seed", + "rpc", + "sql operation", + "doc", + "source", + "macro", + "exposure", + "metric", + "group", + "semantic model" + ] + }, + "package_name": { + "type": "string" + }, + "path": { + "type": "string" + }, + "original_file_path": { + "type": "string" + }, + "unique_id": { + "type": "string" + }, + "fqn": { + "type": "array", + "items": { + "type": "string" + } + }, + "description": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] + }, + "model": { + "type": "string" + }, + "node_relation": { + "oneOf": [ + { + "$ref": "#/definitions/NodeRelation" + }, + { + "type": "null" + } + ] + }, + "entities": { + "type": "array", + "items": { + "$ref": "#/definitions/Entity" + } + }, + "measures": { + "type": "array", + "items": { + "$ref": "#/definitions/Measure" + } + }, + "dimensions": { + "type": "array", + "items": { + "$ref": "#/definitions/Dimension" + } + } + }, + "additionalProperties": false, + "description": "SemanticModel(name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], description: Optional[str], model: str, node_relation: Optional[dbt.contracts.graph.nodes.NodeRelation], entities: Sequence[dbt.contracts.graph.unparsed.Entity], measures: Sequence[dbt.contracts.graph.unparsed.Measure], dimensions: Sequence[dbt.contracts.graph.unparsed.Dimension])" + }, + "NodeRelation": { + "type": "object", + "required": [ + "alias", + "schema_name" + ], + "properties": { + "alias": { + "type": "string" + }, + "schema_name": { + "type": "string" + }, + "database": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] + } + }, + "additionalProperties": false, + "description": "NodeRelation(alias: str, schema_name: str, database: Optional[str] = None)" + }, + "Entity": { + "type": "object", + "required": [ + "name", + "type" + ], + "properties": { + "name": { + "type": "string" + }, + "type": { + "type": "string" + }, + "description": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] + }, + "role": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] + }, + "expr": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] + } + }, + "additionalProperties": false, + "description": "Entity(name: str, type: str, description: Optional[str] = None, role: Optional[str] = None, expr: Optional[str] = None)" + }, + "Measure": { + "type": "object", + "required": [ + "name", + "agg" + ], + "properties": { + "name": { + "type": "string" + }, + "agg": { + "type": "string" + }, + "description": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] + }, + "create_metric": { + "oneOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ] + }, + "expr": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] + }, + "agg_params": { + "oneOf": [ + { + "$ref": "#/definitions/MeasureAggregationParameters" + }, + { + "type": "null" + } + ] + }, + "non_additive_dimension": { + "oneOf": [ + { + "type": "object" + }, + { + "type": "null" + } + ] + }, + "agg_time_dimension": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] + } + }, + "additionalProperties": false, + "description": "Measure(name: str, agg: str, description: Optional[str] = None, create_metric: Optional[bool] = None, expr: Optional[str] = None, agg_params: Optional[dbt.contracts.graph.unparsed.MeasureAggregationParameters] = None, non_additive_dimension: Optional[Dict[str, Any]] = None, agg_time_dimension: Optional[str] = None)" + }, + "MeasureAggregationParameters": { + "type": "object", + "required": [], + "properties": { + "percentile": { + "oneOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ] + }, + "use_discrete_percentile": { + "type": "boolean", + "default": false + }, + "use_approximate_percentile": { + "type": "boolean", + "default": false + } + }, + "additionalProperties": false, + "description": "MeasureAggregationParameters(percentile: Optional[float] = None, use_discrete_percentile: bool = False, use_approximate_percentile: bool = False)" + }, + "Dimension": { + "type": "object", + "required": [ + "name", + "type" + ], + "properties": { + "name": { + "type": "string" + }, + "type": { + "type": "string" + }, + "description": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] + }, + "is_partition": { + "oneOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": false + }, + "type_params": { + "oneOf": [ + { + "type": "object" + }, + { + "type": "null" + } + ] + }, + "expr": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] + } + }, + "additionalProperties": false, + "description": "Dimension(name: str, type: str, description: Optional[str] = None, is_partition: Optional[bool] = False, type_params: Optional[Dict[str, Any]] = None, expr: Optional[str] = None)" } }, "$schema": "http://json-schema.org/draft-07/schema#", diff --git a/test/unit/test_manifest.py b/test/unit/test_manifest.py index 781d0a21a70..e7de7ba248a 100644 --- a/test/unit/test_manifest.py +++ b/test/unit/test_manifest.py @@ -370,6 +370,7 @@ def test_no_nodes(self): metrics={}, selectors={}, metadata=ManifestMetadata(generated_at=datetime.utcnow()), + semantic_nodes={}, ) invocation_id = dbt.events.functions.EVENT_MANAGER.invocation_id @@ -396,6 +397,7 @@ def test_no_nodes(self): "docs": {}, "disabled": {}, "public_nodes": {}, + "semantic_nodes": {}, }, ) @@ -535,6 +537,7 @@ def test_no_nodes_with_metadata(self, mock_user): metadata=metadata, files={}, exposures={}, + semantic_nodes={}, ) self.assertEqual( @@ -564,6 +567,7 @@ def test_no_nodes_with_metadata(self, mock_user): }, "disabled": {}, "public_nodes": {}, + "semantic_nodes": {}, }, ) @@ -911,6 +915,7 @@ def test_no_nodes(self): metadata=metadata, files={}, exposures={}, + semantic_nodes={}, ) self.assertEqual( manifest.writable_manifest().to_dict(omit_none=True), @@ -935,6 +940,7 @@ def test_no_nodes(self): "docs": {}, "disabled": {}, "public_nodes": {}, + "semantic_nodes": {}, }, ) diff --git a/test/unit/test_node_types.py b/test/unit/test_node_types.py index 5ea4a810833..f56e57b27da 100644 --- a/test/unit/test_node_types.py +++ b/test/unit/test_node_types.py @@ -16,6 +16,7 @@ NodeType.Exposure: "exposures", NodeType.Metric: "metrics", NodeType.Group: "groups", + NodeType.SemanticModel: "semantic models", } diff --git a/tests/functional/artifacts/expected_manifest.py b/tests/functional/artifacts/expected_manifest.py index 7805294a65f..c506070699b 100644 --- a/tests/functional/artifacts/expected_manifest.py +++ b/tests/functional/artifacts/expected_manifest.py @@ -890,6 +890,7 @@ def expected_seeded_manifest(project, model_database=None, quote_model=False): }, "disabled": {}, "public_nodes": {}, + "semantic_nodes": {}, } @@ -1453,6 +1454,7 @@ def expected_references_manifest(project): ], } }, + "semantic_nodes": {}, } @@ -1937,4 +1939,5 @@ def expected_versions_manifest(project): "disabled": {}, "macros": {}, "public_nodes": {}, + "semantic_nodes": {}, } diff --git a/tests/functional/artifacts/test_artifacts.py b/tests/functional/artifacts/test_artifacts.py index df272f3a2f7..bd8c9742420 100644 --- a/tests/functional/artifacts/test_artifacts.py +++ b/tests/functional/artifacts/test_artifacts.py @@ -469,6 +469,7 @@ def verify_manifest(project, expected_manifest, start_time, manifest_schema_path "exposures", "selectors", "public_nodes", + "semantic_nodes", } assert set(manifest.keys()) == manifest_keys diff --git a/tests/functional/artifacts/test_previous_version_state.py b/tests/functional/artifacts/test_previous_version_state.py index 3e428a0a491..f4cc773e233 100644 --- a/tests/functional/artifacts/test_previous_version_state.py +++ b/tests/functional/artifacts/test_previous_version_state.py @@ -283,7 +283,7 @@ def generate_latest_manifest( # The actual test method. Run `dbt list --select state:modified --state ...` # once for each past manifest version. They all have the same content, but different # schema/structure, only some of which are forward-compatible with the - # current WriteableManifest class. + # current WritableManifest class. def compare_previous_state( self, project, diff --git a/tests/functional/semantic_models/test_semantic_model_parsing.py b/tests/functional/semantic_models/test_semantic_model_parsing.py new file mode 100644 index 00000000000..344e58c0f61 --- /dev/null +++ b/tests/functional/semantic_models/test_semantic_model_parsing.py @@ -0,0 +1,57 @@ +import pytest + +from dbt.cli.main import dbtRunner +from dbt.contracts.graph.manifest import Manifest + +schema_yml = """models: + - name: fct_revenue + description: This is the model fct_revenue. It should be able to use doc blocks + +semantic_models: + - name: revenue + description: This is the revenue semantic model. It should be able to use doc blocks + model: ref('fct_revenue') + + measures: + - name: txn_revenue + expr: revenue + agg: sum + + dimensions: + - name: ds + type: time + expr: created_at + type_params: + is_primary: True + time_granularity: day + + entities: + - name: user + type: foreign + expr: user_id +""" + +fct_revenue_sql = """select + 1 as id, + 10 as user_id, + 1000 as revenue, + current_timestamp as created_at""" + + +class TestSemanticModelParsing: + @pytest.fixture(scope="class") + def models(self): + return { + "schema.yml": schema_yml, + "fct_revenue.sql": fct_revenue_sql, + } + + def test_semantic_model_parsing(self, project): + runner = dbtRunner() + result = runner.invoke(["parse"]) + assert result.success + assert isinstance(result.result, Manifest) + manifest = result.result + assert len(manifest.semantic_nodes) == 1 + semantic_model = manifest.semantic_nodes["semantic model.test.revenue"] + assert semantic_model.node_relation.alias == "fct_revenue"