Skip to content

Commit

Permalink
feat(models): add support for generic platform resources (datahub-pro…
Browse files Browse the repository at this point in the history
  • Loading branch information
shirshanka authored Oct 5, 2024
1 parent 5c2c555 commit fff67b9
Show file tree
Hide file tree
Showing 5 changed files with 155 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
namespace com.linkedin.common

/**
* Captures the serialized value of a (usually) schema-d blob.
*/
record SerializedValue {
/**
* The serialized blob value.
*/
blob: bytes

/**
* The content-type of the serialized blob value.
*/
contentType: enum SerializedValueContentType {
JSON,
BINARY
} = "JSON"

/**
* The schema type for the schema that models the object that was serialized
into the blob.
* Absence of this field indicates that the schema is not known.
* If the schema is known, the value should be set to the appropriate schema
* type.
* Use the NONE value if the existing schema categories do not apply.
*/
schemaType: optional enum SerializedValueSchemaType {
AVRO
PROTOBUF
PEGASUS
THRIFT
JSON
NONE
}

/**
* An optional reference to the schema that models the object.
* e.g., 'com.linkedin.platformresource.slack.SlackConversation'
*/
schemaRef: optional string
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
namespace com.linkedin.platformresource

import com.linkedin.common.SerializedValue

/**
* Platform Resource Info.
* These entities are for miscelaneous data that is used in non-core parts of the system.
* For instance, if we want to persist & retrieve data from auxiliary integrations such as Slack or Microsoft Teams.
*/
@Aspect = {
"name": "platformResourceInfo"
}
record PlatformResourceInfo {
/**
* The type of the resource.
* Intended as a loose specifier of the generic type of the resource.
* Producer is not forced to conform to a specific set of symbols for
* resource types.
* The @PlatformResourceType enumeration offers a paved path for agreed upon
* common terms, but is not required to be followed.
* Example values could be: conversation, user, grant, etc.
* Resource types are indexed for ease of access.
* e.g. Get me all platform resources of type user for the platform looker
*/
@Searchable = {
"fieldType": "KEYWORD"
}
resourceType: string

/**
* The primary key for this platform resource.
* e.g. for a slack member this would be the memberID.
* primary keys specified here don't need to include any additional specificity for the
dataPlatform
* The @PlatformResourceKey is supposed to represent that
*/
@Searchable = {
"fieldType": "KEYWORD"
}
primaryKey: string

/**
* The secondary keys this platform resource can be located by.
* I.e., for a slack member this would be email or phone.
*/
@Searchable = {
"/*": {
"fieldType": "KEYWORD"
}
}
secondaryKeys: optional array[string]

/**
* The serialized value of this platform resource item.
*/
value: optional SerializedValue
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
namespace com.linkedin.platformresource

/**
* Key for a Platform Resource.
* Platform Resources are assets that are not part of the core data model.
* They are stored in DataHub primarily to help with application-specific
* use-cases that are not sufficiently generalized to move into the core data model.
* For instance, if we want to persist & retrieve additional user profile data
* from auxiliary integrations such as Slack or Microsoft Teams for resolving details later.
*/
@Aspect = {
"name": "platformResourceKey"
}
record PlatformResourceKey {
/**
* A unique id for this entity.
* There are no constraints on the format of this id, but most implementations
* will choose to use a UUID.
* This id should be globally unique for the entire DataHub instance and
uniquely identify the resource that is being stored, so most
implementations
* will combine logical attributes like platform name, platform instance,
* platform-specific-id and the resource type to create the unique id.
* e.g. slack:slack-instance:slack-user-id:user-info
* or guid(slack, slack-instance, slack-user-id, user-info) etc.
*/
id: string
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
namespace com.linkedin.platformresource

/**
* A set of symbols for loose agreements between producers and consumers of
platform resources
See @PlatformResourceInfo.resourceType for where this can be populated into
**/
enum PlatformResourceType {
/**
* e.g. a Slack member resource, Looker user resource, etc.
*/
USER_INFO,
/**
* e.g. a Slack channel
*/
CONVERSATION
}
11 changes: 11 additions & 0 deletions metadata-models/src/main/resources/entity-registry.yml
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,17 @@ entities:
aspects:
- dataHubConnectionDetails
- dataPlatformInstance
- name: platformResource
doc: >-
Platform Resources are assets that are unmodeled and stored outside of
the core data model. They are stored in DataHub primarily to help with
application-specific use-cases that are not sufficiently generalized to move into the core data model.
category: core
keyAspect: platformResourceKey
aspects:
- dataPlatformInstance
- platformResourceInfo
- status
events:
plugins:
aspectPayloadValidators:
Expand Down

0 comments on commit fff67b9

Please sign in to comment.