Skip to content

Commit

Permalink
Merge branch 'datahub-project:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
anshbansal authored Nov 13, 2024
2 parents e225a52 + 470c858 commit 05e3ed4
Show file tree
Hide file tree
Showing 8 changed files with 199 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import {
getStructuredReport,
RUNNING,
SUCCESS,
SUCCEEDED_WITH_WARNINGS,
} from '../utils';
import { ExecutionRequestResult } from '../../../../types.generated';
import { StructuredReport } from './reporting/StructuredReport';
Expand Down Expand Up @@ -190,7 +191,7 @@ export const ExecutionDetailsModal = ({ urn, open, onClose }: Props) => {
<SubHeaderParagraph>{resultSummaryText}</SubHeaderParagraph>
{structuredReport ? <StructuredReport report={structuredReport} /> : null}
</StatusSection>
{status === SUCCESS && (
{(status === SUCCESS || status === SUCCEEDED_WITH_WARNINGS) && (
<IngestedAssetsSection>
{data?.executionRequest?.id && <IngestedAssets id={data?.executionRequest?.id} />}
</IngestedAssetsSection>
Expand Down
1 change: 1 addition & 0 deletions docs-website/sidebars.js
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,7 @@ module.exports = {
},
{
"DataHub Cloud Release History": [
"docs/managed-datahub/release-notes/v_0_3_7",
"docs/managed-datahub/release-notes/v_0_3_6",
"docs/managed-datahub/release-notes/v_0_3_5",
"docs/managed-datahub/release-notes/v_0_3_4",
Expand Down
117 changes: 117 additions & 0 deletions docs/managed-datahub/release-notes/v_0_3_7.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# v0.3.7
---

Release Availability Date
---
13-Nov-2024

Recommended CLI/SDK
---
- `v0.14.1.7` with release notes at https://github.com/datahub-project/datahub/releases/tag/v0.14.1.7

If you are using an older CLI/SDK version, then please upgrade it. This applies for all CLI/SDK usages, if you are using it through your terminal, GitHub Actions, Airflow, in Python SDK somewhere, Java SDK, etc. This is a strong recommendation to upgrade, as we keep on pushing fixes in the CLI, and it helps us support you better.

## Release Changelog
---

- All changes in https://github.com/datahub-project/datahub/releases/tag/v0.14.1
- Note Breaking Changes: https://datahubproject.io/docs/how/updating-datahub/#0141

- Breaking Changes
- Authentication & RestAPI Authorization enabled by default (since v0.3.6)
- Helm Chart Requirement: 1.4.132+
- Recommend setting timezone for `datahub-gc` and `datahub-usage-reporting`
- ```yaml
acryl-datahub:
global:
datahub:
timezone: 'America/Los_Angeles'
```
- #11486 - Deprecated Criterion filters using `value`. Use `values` instead. This also deprecates the ability to use comma delimited string to represent multiple values using `value`.
- #10472 - `SANDBOX` added as a FabricType. No rollbacks allowed once metadata with this fabric type is added without manual cleanups in databases.
- #11619 - schema field/column paths can no longer be empty strings
- #11619 - schema field/column paths can no longer be duplicated within the schema
- #11570 - The `DatahubClientConfig`'s server field no longer defaults to `http://localhost:8080`. Be sure to explicitly set this.
- #11570 - If a `datahub_api` is explicitly passed to a stateful ingestion config provider, it will be used. We previously ignored it if the pipeline context also had a graph object.
- #11518 - DataHub Garbage Collection: Various entities that are soft-deleted (after 10d) or are timeseries *entities* (dataprocess, execution requests) will be removed automatically using logic in the `datahub-gc` ingestion source.

- Bug Fixes
- [UI] Fix a bug in displaying the filter value counts when selecting filters on the primary search experience
- [UI] Fix unnecessary horizontal scrolling wide markdown documentation.
- [UI] Fix bug in siblings external URLs. Now showing both the dbt and Snowflake URL as separate, correct URLs.
- [UI] Fix bug on listing data product assets with View applied
- [UI] Fix siblings bug in Schema Field queries tab
- [UI] Handle edge cycles in lineage graph more correctly
- [UI] Hide incorrect "Lineage" sidebar section on sibling pages (incorrect merge)
- [UI] Miscellaneous fixes to **Automations** forms UI - creating and editing automations.
- [UI] Fix scrolling to the end of the list of tabs on Asset Profiles
- [UI] Fix Compact View preview on Hover Card (looked squished!)
- [UI] Update asset counts on Domain profile pages after adding and removing assets right away
- [UI] Improve support for Compliance Forms and Structured Properties on sibling asset profile pages
- [Automations] **Column Description Propagation**: Fix Column Description Propagation issue where column description would not propagate if self-lineage was stored in graph index
- [Automations] **Snowflake Tag Sync**: Fix bug in Snowflake Tag Sync that failed to sync to columns with special characters

- Product
- [BETA] Introducing the **BigQuery Metadata Sync Automation** to sync tags, glossary terms, and descriptions from DataHub to BigQuery. Check out the [feature guide](https://datahubproject.io/docs/automations/bigquery-metadata-sync/) for more information. To enable this BETA feature, reach out to your Acryl representative.
- [BETA] Introducing the **AI Classification Automation** to automatically classify your tables & columns using your organization's custom glossary terms. Check out the [feature guide](https://datahubproject.io/docs/automations/ai-term-suggestion) for more information. To enable this BETA feature, reach out to your Acryl representative.
- [BETA] A new way to visualize Column-Level Lineage, focused on a single column. Accessible by clicking on a column name in the column details sidebar or by clicking on the "Explore complete column lineage" button on a column in the regular lineage visualization. This will allow you to view only the upstreams and downstreams of the specific column being viewed. Please reach out to your Acryl representative to enable this feature.
- [BETA] Support running Automations via a Remote Executor using an Executor ID. This is currently in Beta, please reach out to your Acryl representative for more information.
- [BETA] Support plugging in custom Mixpanel or Google Analytics Measurement ID (GA4) to DataHub. Reach out to your Acryl representative for more information.
- Introducing **Structured Properties** UI. Create and manage custom properties for all asset types via the DataHub UI under **Govern** > **Structured Properties**. Feature guide will be coming in v0.3.8 - reach out to your Acryl representative for more information. Requires the `Manage Structured Properties` privilege to edit, `View Structured Properties` privilege to view.
- Introducing **Compliance Forms** UI. Create and manage compliance forms to run large-scale metadata collection initiatives inside your organization. Supported for all asset types via the DataHub UI under **Govern** > **Compliance Forms**. Feature guide will be coming in v0.3.8 - reach out to your Acryl representative for more information. Requires the `Manage Compliance Forms` privilege to edit, `View Compliance Forms` privilege to view. Compliance Forms also support analytics, which are updated once per day by default.
- Support adding and removing structured properties from Table & Column Properties Tab
- Support filtering by Structured Properties in the search UI (main search only, not on lists yet)
- Acryl 2.0 is enabled by default for all users who have no explicitly set their display preference via **Settings** > **Appearance**.
- Support searching the visible lineage graph by asset name
- Support showing 'all' assets in a downstream or upstream lineage level in one click
- Support searching the assets hidden by a collapsed, "show more" node
- On lineage graph, draw an arrow from a column to the "show more" node if that column has lineage to a hidden node
- On lineage graph, add control to show lineage edges to entities that are deleted / do not exist
- Support deleting Data Product from the Data Product page
- Support viewing & editing documentation in full-screen mode
- Support copying queries for View Definitions (sidebar + tabs)
- Support V2 UI with Chrome Extension, fix miscellaneous bugs related to documentations, glossary, and lineage interactions.
- Minor UX improvements (alignments, etc) to Quality, Assertions tabs.
- Reorder the asset sidebar sections to prioritize documentation & lineage, the most used features. Moved down status, and share related tabs.
- Add "Total Views" and "Recent Views" statistics to Dashboard & Chart asset sidebar header.
- Ingestion UI: Always display the number of assets ingested on "Failure" & "Succeeded With Warnings"
- Permissions: Hide **Settings** > **Access Tokens** page if user doesn't have the `Generate Access Tokens` privilege.
- Add a Properties tab to Asset sidebar
- Hide the 'notes' icon from the Columns table on Dataset Profiles, only show in the Column sidebar
- Add Properties Count, Column Count, Incident Count to Asset Profile tab names
- Allow resizing of the browse sizebar
- Display custom Assertion Error messages via the UI
- Add sorting to Columns table
- Add description to "hover preview" of assets
- Rename 'Inbox' navigation item to 'Tasks' to align with rebranding as 'Task Center'
- Support viewing correctly merged schema change history for sibling pages
- Minor UX improvements on lineage graph
- Minox UX improvements on Glossary, Search Cards, Home page, Subscriptions tab, and more.
- Improved usage-based search ranking. Please reach out with any questions or concerns
- Improved UX for setting up and managing SSO

- Ingestion changes
- In addition to the improvements listed here: https://github.com/datahub-project/datahub/releases/tag/v0.14.1.7
- PowerBI: Support for PowerBI Apps and cross-workspace lineage
- Fivetran: Major improvements to configurability and improved reliability with large Fivetran setups
- Snowflake & BigQuery: Improved handling of temporary tables and swap statements when generating lineage
- [Beta] Preset integration

- Platform changes
- Added datahub-usage-reporting job to calculate usage metrics for search ranking
- Metadata Test performance improvements: async ingestion & tag patch support
- Authentication & RestAPI Authorization enabled by default
- Added datahub-gc and datahub-usage-reporting SYSTEM ingestion sources
- Added sweeper to executor to cancel duplicate and stale ingestion jobs
- Added soft delete status to edges in graph store
- Added service side options for newer clients with older service
- ALTERNATE_MCP_VALIDATION=true
- MCP_VALIDATION_IGNORE_UNKNOWN=true
- OpenAPIv3
- Added generic entities scroll endpoint
- Added `async` and `createIfNotExists` on aspect endpoints
- System Operations privilege extended to all system operations
- [BETA] Introduce Entity Change Events Poll API behind permission "Get Platform Events". This enables programmatic access to entity change events in DataHub. Reach out to your Acryl representative for more information.
- (system / internal) Exclude form-prompt tests in live Metadata Tests evaluation
- (system / internal) Exclude form-prompt tests in stored Metadata Test results
- Elasticsearch reindex time limit of 8h removed
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ private static List<ChangeEvent> computeDiffs(
SchemaField renamedField =
findRenamedField(
curBaseField,
new HashSet<>(baseFields.subList(baseFieldIdx, baseFields.size())),
targetFields.subList(targetFieldIdx, targetFields.size()),
renamedFields);
if (renamedField == null) {
Expand All @@ -289,7 +290,10 @@ private static List<ChangeEvent> computeDiffs(
// minor version bump for both.
SchemaField renamedField =
findRenamedField(
curTargetField, baseFields.subList(baseFieldIdx, baseFields.size()), renamedFields);
curTargetField,
new HashSet<>(targetFields.subList(targetFieldIdx, targetFields.size())),
baseFields.subList(baseFieldIdx, baseFields.size()),
renamedFields);
if (renamedField == null) {
processAdd(changeCategories, changeEvents, datasetUrn, curTargetField, auditStamp);
++targetFieldIdx;
Expand Down Expand Up @@ -348,10 +352,14 @@ private static void sortFieldsByPath(SchemaMetadata schemaMetadata) {
}

private static SchemaField findRenamedField(
SchemaField curField, List<SchemaField> targetFields, Set<SchemaField> renamedFields) {
SchemaField curField,
Set<SchemaField> baseFields,
List<SchemaField> targetFields,
Set<SchemaField> renamedFields) {
return targetFields.stream()
.filter(schemaField -> isRenamed(curField, schemaField))
.filter(field -> !renamedFields.contains(field))
.filter(field -> !baseFields.contains(field)) // Filter out fields that will match later
.findFirst()
.orElse(null);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,37 @@ public void testSchemaFieldRename() throws Exception {
Set.of(SchemaFieldModificationCategory.RENAME.toString()), actual);
}

@Test
public void testSchemaFieldRename2() throws Exception {
SchemaMetadataChangeEventGenerator test = new SchemaMetadataChangeEventGenerator();

Urn urn = getTestUrn();
String entity = "dataset";
String aspect = "schemaMetadata";
AuditStamp auditStamp = getTestAuditStamp();

Aspect<SchemaMetadata> from =
getSchemaMetadata(
List.of(
new SchemaField().setFieldPath("id").setNativeDataType("VARCHAR"),
new SchemaField().setFieldPath("fullname").setNativeDataType("VARCHAR"),
new SchemaField().setFieldPath("LastName").setNativeDataType("VARCHAR")));
Aspect<SchemaMetadata> to =
getSchemaMetadata(
List.of(
new SchemaField().setFieldPath("id").setNativeDataType("VARCHAR"),
new SchemaField().setFieldPath("fullname").setNativeDataType("VARCHAR"),
new SchemaField().setFieldPath("lastName").setNativeDataType("VARCHAR")));
List<ChangeEvent> actual = test.getChangeEvents(urn, entity, aspect, from, to, auditStamp);
compareDescriptions(
Set.of(
"A forwards & backwards compatible change due to renaming of the field 'LastName to lastName'."),
actual);
assertEquals(1, actual.size());
compareModificationCategories(
Set.of(SchemaFieldModificationCategory.RENAME.toString()), actual);
}

@Test
public void testSchemaFieldDropAdd() throws Exception {
// When a rename cannot be detected, treated as drop -> add
Expand Down
4 changes: 4 additions & 0 deletions metadata-service/war/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ dependencies {
implementation externalDependency.awsMskIamAuth
testRuntimeOnly externalDependency.logbackClassic
implementation externalDependency.charle

testImplementation externalDependency.testng
testImplementation externalDependency.springBootTest
testRuntimeOnly externalDependency.logbackClassic
}
configurations.all{
exclude group: "com.charleskorn.kaml", module:"kaml"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@
"com.linkedin.gms.factory.plugins",
"com.linkedin.gms.factory.change",
"com.datahub.event.hook",
"com.linkedin.gms.factory.notifications"
"com.linkedin.gms.factory.notifications",
"com.linkedin.gms.factory.telemetry"
})
@PropertySource(value = "classpath:/application.yaml", factory = YamlPropertySourceFactory.class)
@Configuration
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package com.linkedin.gms;

import static org.testng.AssertJUnit.assertNotNull;

import com.linkedin.gms.factory.telemetry.DailyReport;
import com.linkedin.metadata.models.registry.ConfigEntityRegistry;
import com.linkedin.metadata.models.registry.EntityRegistry;
import io.ebean.Database;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.boot.test.mock.mockito.MockBean;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.testng.AbstractTestNGSpringContextTests;
import org.testng.annotations.Test;

@SpringBootTest(
webEnvironment = SpringBootTest.WebEnvironment.MOCK,
properties = {"telemetry.enabledServer=true"})
@ContextConfiguration(classes = CommonApplicationConfig.class)
public class SpringTest extends AbstractTestNGSpringContextTests {

// Mock Beans take precedence, we add these to avoid needing to configure data sources etc. while
// still testing prod config
@MockBean private Database database;
@MockBean private ConfigEntityRegistry configEntityRegistry;
@MockBean private EntityRegistry entityRegistry;

@Test
public void testTelemetry() {
DailyReport dailyReport = this.applicationContext.getBean(DailyReport.class);
assertNotNull(dailyReport);
}
}

0 comments on commit 05e3ed4

Please sign in to comment.