forked from datahub-project/datahub
-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'datahub-project:master' into master
- Loading branch information
Showing
16 changed files
with
836 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
122 changes: 122 additions & 0 deletions
122
metadata-integration/java/datahub-client/scripts/container_key_guid_generator.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
import click | ||
from typing import Dict, Any | ||
import json | ||
from dataclasses import dataclass | ||
from abc import ABC, abstractmethod | ||
from datahub.emitter.mcp_builder import DatabaseKey, SchemaKey | ||
|
||
|
||
class URNGenerator(ABC): | ||
@abstractmethod | ||
def generate(self, args: Dict[str, Any]) -> str: | ||
pass | ||
|
||
|
||
class DatabaseURNGenerator(URNGenerator): | ||
def generate(self, args: Dict[str, Any]) -> str: | ||
required_fields = ["platform", "database"] | ||
for field in required_fields: | ||
if field not in args: | ||
raise ValueError(f"Missing required field: {field}") | ||
|
||
all_fields = required_fields + ["instance"] | ||
for arg in args: | ||
if arg not in all_fields: | ||
raise ValueError(f"Invalid field: {arg}") | ||
|
||
database_key = DatabaseKey( | ||
platform=args["platform"], | ||
instance=args.get("instance"), | ||
database=args["database"], | ||
) | ||
return database_key.as_urn() | ||
|
||
|
||
class SchemaURNGenerator(URNGenerator): | ||
def generate(self, args: Dict[str, Any]) -> str: | ||
required_fields = ["platform", "database", "schema"] | ||
all_fields = required_fields + ["instance", "env"] | ||
for field in required_fields: | ||
if field not in args: | ||
raise ValueError(f"Missing required field: {field}") | ||
|
||
for arg in args: | ||
if arg not in all_fields: | ||
raise ValueError(f"Invalid field: {arg}") | ||
|
||
schema_key = SchemaKey( | ||
platform=args["platform"], | ||
instance=args.get("instance"), | ||
env=args.get("env"), | ||
database=args["database"], | ||
schema=args["schema"], | ||
) | ||
return schema_key.as_urn() | ||
|
||
|
||
URN_GENERATORS = { | ||
"database": DatabaseURNGenerator(), | ||
"schema": SchemaURNGenerator(), | ||
} | ||
|
||
|
||
def validate_key_value(ctx, param, value): | ||
if not value: | ||
return {} | ||
|
||
result = {} | ||
for item in value: | ||
try: | ||
key, val = item.split("=", 1) | ||
result[key.strip()] = val.strip() | ||
except ValueError: | ||
raise click.BadParameter( | ||
f"Invalid key-value pair: {item}. Format should be key=value" | ||
) | ||
return result | ||
|
||
|
||
@click.command() | ||
@click.option( | ||
"--container-type", | ||
type=click.Choice(["database", "schema"]), | ||
required=True, | ||
help="The type of container to generate a URN for", | ||
) | ||
@click.option( | ||
"--param", | ||
"-p", | ||
multiple=True, | ||
callback=validate_key_value, | ||
help="Parameters in key=value format. Can be used multiple times.", | ||
) | ||
@click.option( | ||
"--output-format", | ||
type=click.Choice(["text", "json"]), | ||
default="text", | ||
help="Output format for the URN", | ||
) | ||
def generate_urn(container_type: str, param: Dict[str, str], output_format: str): | ||
"""Generate URNs for different types of containers. | ||
Example usage: | ||
./container_urn_generator.py --container-type database -p platform=test-platform -p instance=DEV -p database=test-database | ||
""" | ||
try: | ||
generator = URN_GENERATORS[container_type] | ||
urn = generator.generate(param) | ||
|
||
if output_format == "json": | ||
result = {"urn": urn, "container_type": container_type, "parameters": param} | ||
click.echo(json.dumps(result, indent=2)) | ||
else: | ||
click.echo(urn) | ||
|
||
except KeyError as e: | ||
raise click.UsageError(f"Unknown container type: {container_type}") | ||
except ValueError as e: | ||
raise click.UsageError(str(e)) | ||
|
||
|
||
if __name__ == "__main__": | ||
generate_urn() |
42 changes: 42 additions & 0 deletions
42
...gration/java/datahub-client/src/main/java/io/datahubproject/models/util/ContainerKey.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
package io.datahubproject.models.util; | ||
|
||
import com.fasterxml.jackson.annotation.JsonInclude; | ||
import com.linkedin.common.urn.Urn; | ||
import java.util.HashMap; | ||
import java.util.Map; | ||
import lombok.Data; | ||
import lombok.EqualsAndHashCode; | ||
import lombok.experimental.SuperBuilder; | ||
|
||
@Data | ||
@SuperBuilder | ||
@EqualsAndHashCode(callSuper = true) | ||
@JsonInclude(JsonInclude.Include.NON_NULL) | ||
public abstract class ContainerKey extends DataHubKey { | ||
private String platform; | ||
private String instance; | ||
|
||
private static final String URN_PREFIX = "urn:li:container:"; | ||
private static final String URN_ENTITY = "container"; | ||
private static final String PLATFORM_MAP_FIELD = "platform"; | ||
private static final String INSTANCE_MAP_FIELD = "instance"; | ||
|
||
@Override | ||
public Map<String, String> guidDict() { | ||
|
||
Map<String, String> bag = new HashMap<>(); | ||
if (platform != null) bag.put(PLATFORM_MAP_FIELD, platform); | ||
if (instance != null) bag.put(INSTANCE_MAP_FIELD, instance); | ||
|
||
return bag; | ||
} | ||
|
||
public String asUrnString() { | ||
String guid = guid(); | ||
return URN_PREFIX + guid; | ||
} | ||
|
||
public Urn asUrn() { | ||
return Urn.createFromTuple(URN_ENTITY, guid()); | ||
} | ||
} |
41 changes: 41 additions & 0 deletions
41
...java/datahub-client/src/main/java/io/datahubproject/models/util/DataHubGuidGenerator.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
package io.datahubproject.models.util; | ||
|
||
import com.fasterxml.jackson.databind.ObjectMapper; | ||
import java.security.MessageDigest; | ||
import java.util.Map; | ||
import lombok.SneakyThrows; | ||
import lombok.extern.slf4j.Slf4j; | ||
|
||
@Slf4j | ||
public class DataHubGuidGenerator { | ||
private static final ObjectMapper objectMapper = new ObjectMapper(); | ||
|
||
@SneakyThrows | ||
public static String dataHubGuid(Map<String, String> obj) { | ||
// Configure ObjectMapper for consistent serialization | ||
objectMapper.configure( | ||
com.fasterxml.jackson.databind.SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS, true); | ||
|
||
// Convert map to JSON string with sorted keys | ||
String jsonKey = objectMapper.writeValueAsString(obj); | ||
|
||
// Generate MD5 hash | ||
MessageDigest md = MessageDigest.getInstance("MD5"); | ||
byte[] hashBytes = md.digest(jsonKey.getBytes()); | ||
|
||
// Convert byte array to hexadecimal string | ||
StringBuilder hexString = new StringBuilder(); | ||
for (byte hashByte : hashBytes) { | ||
String hex = Integer.toHexString(0xff & hashByte); | ||
if (hex.length() == 1) { | ||
hexString.append('0'); | ||
} | ||
hexString.append(hex); | ||
} | ||
|
||
if (log.isDebugEnabled()) { | ||
log.debug("DataHub Guid for {} is : {}", jsonKey, hexString); | ||
} | ||
return hexString.toString(); | ||
} | ||
} |
Oops, something went wrong.