Skip to content

Commit

Permalink
Merge branch 'datahub-project:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
anshbansal authored Aug 20, 2024
2 parents eb12f0d + a9ef48e commit 627f354
Show file tree
Hide file tree
Showing 123 changed files with 3,202 additions and 1,263 deletions.
4 changes: 0 additions & 4 deletions .github/actions/ci-optimization/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,20 +52,17 @@ runs:
frontend:
- "datahub-frontend/**"
- "datahub-web-react/**"
- "smoke-test/tests/cypress/**"
- "docker/datahub-frontend/**"
ingestion:
- "metadata-ingestion-modules/**"
- "metadata-ingestion/**"
- "metadata-models/**"
- "smoke-test/**"
- "docker/datahub-ingestion**"
ingestion-base:
- "docker/datahub-ingestion-base/**"
docker:
- "docker/**"
backend:
- ".github/**"
- "metadata-models/**"
- "datahub-upgrade/**"
- "entity-registry/**"
Expand All @@ -79,7 +76,6 @@ runs:
- "metadata-utils/**"
- "metadata-operation-context/**"
- "datahub-graphql-core/**"
- "smoke-test/**"
- "docker/**"
kafka-setup:
- "docker/kafka-setup/**"
Expand Down
6 changes: 3 additions & 3 deletions .github/actions/docker-custom-build-and-push/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,7 @@ runs:
suffix=${{ inputs.flavor && format('-{0}', inputs.flavor) || '' }}
tags: |
type=raw,value=${{ inputs.image_tag }}
type=raw,value=head,enable=${{ github.ref == format('refs/heads/{0}', 'acryl-main') }}
type=ref,event=pr,prefix=pr
type=raw,value=head,enable={{is_default_branch}}
type=sha,prefix=,format=short
# Code for testing the build when not pushing to Docker Hub.
Expand Down Expand Up @@ -87,7 +86,8 @@ runs:
TAGS="""
${{ inputs.image_tag }}
"""
echo "SINGLE_TAG=$(echo $IMAGES | tr '\n' ' ' | awk -F' ' '{ print $1 }'):$(echo $TAGS | tr '\n' ' ' | awk -F' ' '{ print $1 }')" >> $GITHUB_OUTPUT
echo "SINGLE_TAG=$(echo $IMAGES | tr '\n' ' ' | awk -F' |,' '{ print $1 }'):$(echo $TAGS | tr '\n' ' ' | awk -F' |,' '{ print $1 }')" >> $GITHUB_OUTPUT
id: single_tag
- name: Upload image locally for testing (if not publishing)
uses: ishworkh/docker-image-artifact-upload@v1
if: ${{ inputs.publish != 'true' }}
Expand Down
4 changes: 2 additions & 2 deletions .github/scripts/docker_helpers.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ function get_tag {
}

function get_tag_slim {
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}-slim,g" -e 's,refs/tags/\(.*\),\1-slim,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-slim,g'),${SHORT_SHA}-slim
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}-slim,g" -e 's,refs/tags/\(.*\),\1-slim,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-slim,g')
}

function get_tag_full {
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}-full,g" -e 's,refs/tags/\(.*\),\1-full,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g'),${SHORT_SHA}-full
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}-full,g" -e 's,refs/tags/\(.*\),\1-full,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g')
}

function get_python_docker_release_v {
Expand Down
8 changes: 8 additions & 0 deletions .github/scripts/docker_logs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
TARGET_DIR="${TARGET_DIR:=docker_logs}"
TEST_STRATEGY="${TEST_STRATEGY:=}"

mkdir -p "$TARGET_DIR"
for name in `docker ps -a --format '{{.Names}}'`;
do
docker logs "$name" >& "${TARGET_DIR}/${name}${TEST_STRATEGY}.log" || true
done
32 changes: 15 additions & 17 deletions .github/workflows/docker-unified.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ jobs:
mysql_setup_change: ${{ steps.ci-optimize.outputs.mysql-setup-change == 'true' }}
postgres_setup_change: ${{ steps.ci-optimize.outputs.postgres-setup-change == 'true' }}
elasticsearch_setup_change: ${{ steps.ci-optimize.outputs.elasticsearch-setup-change == 'true' }}
smoke_test_change: ${{ steps.ci-optimize.outputs.smoke-test-change == 'true' }}
steps:
- name: Check out the repo
uses: acryldata/sane-checkout-action@v3
Expand Down Expand Up @@ -500,7 +501,7 @@ jobs:
name: Build and Push DataHub Elasticsearch Setup Docker Image
runs-on: ubuntu-latest
needs: setup
if: ${{ needs.setup.outputs.elasticsearch_setup_change == 'true' || (needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true') }}
if: ${{ needs.setup.outputs.elasticsearch_setup_change == 'true' || (needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' ) }}
steps:
- name: Check out the repo
uses: acryldata/sane-checkout-action@v3
Expand Down Expand Up @@ -812,7 +813,7 @@ jobs:
echo 'matrix=["cypress_suite1","cypress_rest"]' >> $GITHUB_OUTPUT
elif [ '${{ needs.setup.outputs.ingestion_only }}' == 'true' ]; then
echo 'matrix=["no_cypress_suite0","no_cypress_suite1"]' >> $GITHUB_OUTPUT
elif [ '${{ needs.setup.outputs.backend_change }}' == 'true' ]; then
elif [[ '${{ needs.setup.outputs.backend_change }}' == 'true' || '${{ needs.setup.outputs.smoke_test_change }}' == 'true' ]]; then
echo 'matrix=["no_cypress_suite0","no_cypress_suite1","cypress_suite1","cypress_rest"]' >> $GITHUB_OUTPUT
else
echo 'matrix=[]' >> $GITHUB_OUTPUT
Expand Down Expand Up @@ -860,11 +861,6 @@ jobs:
with:
python-version: "3.10"
cache: "pip"
- name: Install dependencies
run: ./metadata-ingestion/scripts/install_deps.sh
- name: Build datahub cli
run: |
./gradlew :metadata-ingestion:install
- name: Login to DockerHub
uses: docker/login-action@v3
if: ${{ needs.setup.outputs.docker-login == 'true' }}
Expand Down Expand Up @@ -992,6 +988,15 @@ jobs:
}
}
}'
- name: Disk Check
run: df -h . && docker images
- name: Install dependencies
run: ./metadata-ingestion/scripts/install_deps.sh
- name: Build datahub cli
run: |
./gradlew :metadata-ingestion:install
- name: Disk Check
run: df -h . && docker images
- name: Remove Source Code
run: find ./*/* ! -path "./metadata-ingestion*" ! -path "./smoke-test*" ! -path "./gradle*" -delete
- name: Disk Check
Expand All @@ -1012,21 +1017,14 @@ jobs:
if: failure()
run: |
docker ps -a
docker logs datahub-datahub-gms-1 >& gms-${{ matrix.test_strategy }}.log || true
docker logs datahub-datahub-actions-1 >& actions-${{ matrix.test_strategy }}.log || true
docker logs datahub-datahub-mae-consumer-1 >& mae-${{ matrix.test_strategy }}.log || true
docker logs datahub-datahub-mce-consumer-1 >& mce-${{ matrix.test_strategy }}.log || true
docker logs datahub-broker-1 >& broker-${{ matrix.test_strategy }}.log || true
docker logs datahub-mysql-1 >& mysql-${{ matrix.test_strategy }}.log || true
docker logs datahub-elasticsearch-1 >& elasticsearch-${{ matrix.test_strategy }}.log || true
docker logs datahub-datahub-frontend-react-1 >& frontend-${{ matrix.test_strategy }}.log || true
docker logs datahub-upgrade-1 >& upgrade-${{ matrix.test_strategy }}.log || true
TEST_STRATEGY="-${{ matrix.test_strategy }}"
source .github/scripts/docker_logs.sh
- name: Upload logs
uses: actions/upload-artifact@v3
if: failure()
with:
name: docker logs
path: "*.log"
path: "docker_logs/*.log"
- name: Upload screenshots
uses: actions/upload-artifact@v3
if: failure()
Expand Down
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ Here are the companies that have officially adopted DataHub. Please feel free to
- [Peloton](https://www.onepeloton.com)
- [PITS Global Data Recovery Services](https://www.pitsdatarecovery.net/)
- [Razer](https://www.razer.com)
- [Rippling](https://www.rippling.com/)
- [Showroomprive](https://www.showroomprive.com/)
- [SpotHero](https://spothero.com)
- [Stash](https://www.stash.com)
Expand All @@ -153,6 +154,7 @@ Here are the companies that have officially adopted DataHub. Please feel free to
- [Zynga](https://www.zynga.com)



## Select Articles & Talks

- [DataHub Blog](https://blog.datahubproject.io/)
Expand All @@ -173,6 +175,23 @@ Here are the companies that have officially adopted DataHub. Please feel free to

See the full list [here](docs/links.md).

## Security Notes

### Multi-Component

The DataHub project uses a wide range of code which is responsible for build automation, documentation generation, and
include both service (i.e. GMS) and client (i.e. ingestion) components. When evaluating security vulnerabilities in
upstream dependencies, it is important to consider which component and how it is used in the project. For example, an
upstream javascript library may include a Denial of Service (DoS) vulnerability however when used for generating
documentation it does not affect the running of DataHub itself and cannot be used to impact DataHub's service. Similarly,
python dependencies for ingestion are part of the DataHub client and are not exposed as a service.

### Known False Positives

DataHub's ingestion client does not include credentials in the code repository, python package, or Docker images.
Upstream python dependencies may include files that look like credentials and are often misinterpreted as credentials
by automated scanners.

## License

[Apache License 2.0](./LICENSE).
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ public static boolean isOwnerEqual(
if (!owner.getOwner().equals(ownerUrn)) {
return false;
}
if (owner.getTypeUrn() != null) {
if (owner.getTypeUrn() != null && ownershipTypeUrn != null) {
return owner.getTypeUrn().equals(ownershipTypeUrn);
}
if (ownershipTypeUrn == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ public CompletableFuture<DocPropagationSettings> get(final DataFetchingEnvironme
final GlobalSettingsInfo globalSettings =
_settingsService.getGlobalSettings(context.getOperationContext());
final DocPropagationSettings defaultSettings = new DocPropagationSettings();
defaultSettings.setDocColumnPropagation(true);
// TODO: Enable by default. Currently the automation trusts the settings aspect, which
// does not have this.
defaultSettings.setDocColumnPropagation(false);
return globalSettings != null && globalSettings.hasDocPropagation()
? mapDocPropagationSettings(globalSettings.getDocPropagation())
: defaultSettings;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ public void testIsOwnerEqualOnlyOwnershipTypeUrn() throws URISyntaxException {
Urn technicalOwnershipTypeUrn = new Urn(TECHNICAL_OWNER_OWNERSHIP_TYPE_URN);
Urn businessOwnershipTypeUrn = new Urn(BUSINESS_OWNER_OWNERSHIP_TYPE_URN);
Urn ownerUrn1 = new Urn("urn:li:corpuser:foo");
Urn ownerUrn2 = new Urn("urn:li:corpuser:bar");

Owner ownerWithTechnicalOwnership = new Owner();
ownerWithTechnicalOwnership.setOwner(ownerUrn1);
Expand All @@ -72,12 +73,17 @@ public void testIsOwnerEqualOnlyOwnershipTypeUrn() throws URISyntaxException {
ownerWithoutOwnershipType.setOwner(ownerUrn1);
ownerWithoutOwnershipType.setType(OwnershipType.NONE);

Owner owner2WithoutOwnershipType = new Owner();
owner2WithoutOwnershipType.setOwner(ownerUrn2);
owner2WithoutOwnershipType.setType(OwnershipType.NONE);

assertTrue(
OwnerUtils.isOwnerEqual(ownerWithTechnicalOwnership, ownerUrn1, technicalOwnershipTypeUrn));
assertFalse(
OwnerUtils.isOwnerEqual(ownerWithBusinessOwnership, ownerUrn1, technicalOwnershipTypeUrn));
assertFalse(OwnerUtils.isOwnerEqual(ownerWithTechnicalOwnership, ownerUrn1, null));
assertTrue(OwnerUtils.isOwnerEqual(ownerWithTechnicalOwnership, ownerUrn1, null));
assertTrue(OwnerUtils.isOwnerEqual(ownerWithoutOwnershipType, ownerUrn1, null));
assertFalse(OwnerUtils.isOwnerEqual(owner2WithoutOwnershipType, ownerUrn1, null));
}

public void testIsOwnerEqualWithBothLegacyAndNewType() throws URISyntaxException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,37 +8,23 @@
import io.ebean.Database;
import javax.annotation.Nonnull;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.context.ApplicationContext;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.DependsOn;

@Slf4j
@Configuration
public class RestoreIndicesConfig {
@Autowired ApplicationContext applicationContext;

@Bean(name = "restoreIndices")
@DependsOn({
"ebeanServer",
"entityService",
"systemMetadataService",
"searchService",
"graphService"
})
@ConditionalOnProperty(name = "entityService.impl", havingValue = "ebean", matchIfMissing = true)
@Nonnull
public RestoreIndices createInstance() {
final Database ebeanServer = applicationContext.getBean(Database.class);
final EntityService<?> entityService = applicationContext.getBean(EntityService.class);
final SystemMetadataService systemMetadataService =
applicationContext.getBean(SystemMetadataService.class);
final EntitySearchService entitySearchService =
applicationContext.getBean(EntitySearchService.class);
final GraphService graphService = applicationContext.getBean(GraphService.class);

public RestoreIndices createInstance(
final Database ebeanServer,
final EntityService<?> entityService,
final EntitySearchService entitySearchService,
final GraphService graphService,
final SystemMetadataService systemMetadataService) {
return new RestoreIndices(
ebeanServer, entityService, systemMetadataService, entitySearchService, graphService);
}
Expand Down
2 changes: 1 addition & 1 deletion datahub-web-react/.eslintrc.js
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ module.exports = {
],
'vitest/prefer-to-be': 'off',
'@typescript-eslint/no-use-before-define': ['error', { functions: false, classes: false }],
'react-refresh/only-export-components': ['warn', { 'allowConstantExport': true }],
'react-refresh/only-export-components': ['warn', { allowConstantExport: true }],
},
settings: {
react: {
Expand Down
Loading

0 comments on commit 627f354

Please sign in to comment.