diff --git a/.github/labeler.yml b/.github/labeler.yml
deleted file mode 100644
index b2712fa177..0000000000
--- a/.github/labeler.yml
+++ /dev/null
@@ -1,7 +0,0 @@
-# Note that any updates to this files will not be applied in CI
-# until this file is merged into main. This is due to oddities of the labeller Github Action.
-'doc-update-needed':
- - dozer-types/src/models/*
- - dozer-cli/src/cli/types.rs
- - dozer-types/protos/*
- - dozer-api/src/generator/protoc/generator/template/proto.tmpl
diff --git a/.github/workflows/general.yaml b/.github/workflows/general.yaml
deleted file mode 100644
index aaa1365106..0000000000
--- a/.github/workflows/general.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-name: Dozer General
-
-on:
- issues:
- types: [opened, edited, milestoned]
- issue_comment:
- types: [created, deleted, edited]
- discussion:
- types: [created, edited]
- discussion_comment:
- types: [created, deleted, edited]
-concurrency:
- group: general
-
-jobs:
- notify:
- name: Discord General
- runs-on: ubuntu-latest
- steps:
- - name: Discord notification
- env:
- DISCORD_WEBHOOK: ${{ secrets.DISCORD_DISCUSSIONS_WEBHOOK }}
- uses: Ilshidur/action-discord@master
diff --git a/.github/workflows/integration.yaml b/.github/workflows/integration.yaml
deleted file mode 100644
index ac38c4949a..0000000000
--- a/.github/workflows/integration.yaml
+++ /dev/null
@@ -1,54 +0,0 @@
-name: Dozer Integration Test
-
-on:
- workflow_dispatch:
- inputs:
- dozer-version:
- description: Expected Dozer version number. Leave blank to skip verifying the version.
-
-env:
- CARGO_TERM_COLOR: always
- DOZER_VERSION: ${{ github.event.inputs.dozer-version }}
-
-concurrency:
- group: integration/${{ github.head_ref }}
- cancel-in-progress: true
-
-jobs:
- integration-linux:
- timeout-minutes: 60
- strategy:
- matrix:
- labels: [ubuntu-latest, ubuntu-20.04]
- fail-fast: false
- runs-on:
- labels: ${{ matrix.labels }}
- steps:
- - uses: actions/checkout@v3
-
- - name: Install Dozer
- run: sudo sh .github/workflows/integration/dockerfiles/install-dozer-ubuntu-amd64.sh
-
- - name: Install Protoc Ubuntu 22.04
- if: matrix.labels == 'ubuntu-latest'
- run: sudo sh .github/workflows/integration/dockerfiles/install-protoc-ubuntu-22.sh
-
- - name: Install Protoc Ubuntu 20.04
- if: matrix.labels == 'ubuntu-20.04'
- run: sudo sh .github/workflows/integration/dockerfiles/install-protoc-ubuntu-20-amd64.sh
-
- - name: Run test
- run: sudo sh .github/workflows/integration/test-dozer-ubuntu.sh
-
- integration-macos:
- timeout-minutes: 60
- runs-on:
- labels: macos-12
- steps:
- - uses: actions/checkout@v3
-
- - name: Install Dozer
- run: brew tap getdozer/dozer && brew install dozer
-
- - name: Run test
- run: sh .github/workflows/integration/test-dozer.sh
diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml
deleted file mode 100644
index d0755a7167..0000000000
--- a/.github/workflows/labeler.yml
+++ /dev/null
@@ -1,17 +0,0 @@
-name: 'Pull Request Labeler'
-on:
- - pull_request_target
-
-jobs:
- triage:
- permissions:
- contents: read
- pull-requests: write
- runs-on: ubuntu-latest
- steps:
- - name: Check out code
- uses: actions/checkout@v3
- - name: Run labeler
- uses: actions/labeler@v4
- with:
- repo-token: '${{ secrets.GITHUB_TOKEN }}'
diff --git a/.github/workflows/pulls.yaml b/.github/workflows/pulls.yaml
deleted file mode 100644
index b241d8d7a8..0000000000
--- a/.github/workflows/pulls.yaml
+++ /dev/null
@@ -1,33 +0,0 @@
-name: Dozer Pulls
-on:
- pull_request_target:
- branches: [main, pull-yaml-dev]
- types: [opened]
- pull_request_review:
- types: [submitted]
- pull_request_review_comment:
- types: [created, deleted]
-
-concurrency:
- group: pull
-
-jobs:
- notify:
- name: Discord Pull
- runs-on: ubuntu-latest
- steps:
- - name: Pull Request
- if: ${{ github.event_name == 'pull_request_target' || github.event_name == 'pull_request' }}
- env:
- DISCORD_WEBHOOK: ${{ secrets.DISCORD_GITHUB_WEBOOK }}
- DISCORD_EMBEDS: '[ {
- "title": " Pull request #${{ github.event.pull_request.number }} opened by ${{ github.actor }}",
- "author": { "icon_url": "https://avatars.githubusercontent.com/${{ github.actor }}", "name": "${{ github.actor }}", "url": "https://github.com/${{ github.actor }}" },
- "fields": [
- { "name": "Pull Request", "value": "[${{ github.event.pull_request.title }}](${{ github.event.pull_request.html_url }})" },
- { "name": "Repository", "value": "[getdozer/dozer](https://github.com/getdozer/dozer)" },
- { "name": "Message", "value": ${{ toJSON(github.event.pull_request.body || github.event.pull_request.title) }}}
- ],
- "color": 990099
- }]'
- uses: Ilshidur/action-discord@master
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
deleted file mode 100644
index 53390587fe..0000000000
--- a/.github/workflows/release.yaml
+++ /dev/null
@@ -1,413 +0,0 @@
-name: Release
-on:
- workflow_dispatch:
- push:
- branches: [release, release-dev, release-test, main]
- tags:
- - "v*.*.*"
-env:
- CARGO_TERM_COLOR: always
- BUCKET_NAME: "dozer-releases"
- ECR_REGISTRY: public.ecr.aws/k7k6x1d4
- ECR_REPOSITORY: dozer
- DOCKERHUB_REGISTRY: getdozer
- DOCKERHUB_REPOSITORY: dozer
-
-permissions:
- id-token: write # This is required for requesting the JWT
- contents: write # This is required for actions/checkout
-
-jobs:
- # https://github.com/orhun/git-cliff/blob/main/.github/workflows/cd.yml
- prepare:
- name: Prepare
- runs-on: ubuntu-20.04
- timeout-minutes: 60
- outputs:
- release_body: ${{ steps.release.outputs.release_body }}
- version: ${{ steps.version.outputs.version }}
- prerelease: ${{ steps.version.outputs.prerelease }}
- steps:
- - name: Checkout
- uses: actions/checkout@v3
- with:
- fetch-depth: 0
- submodules: 'recursive'
- - name: Generate a changelog
- uses: orhun/git-cliff-action@v1
- id: git-cliff
- with:
- config: .github/config/cliff.toml
- args: -vv --latest --strip header
- env:
- OUTPUT: CHANGES.md
-
- - name: Set the release body
- id: release
- shell: bash
- run: |
- r=$(cat ${{ steps.git-cliff.outputs.changelog }})
- r="$(printf "$r" | tail -n +3)"
- r="${r//'%'/'%25'}"
- r="${r//$'\n'/'%0A'}"
- r="${r//$'\r'/'%0D'}"
- echo "::set-output name=release_body::$r"
-
- - name: Set release version
- id: version
- run: |
- tag=$(printf "%q" ${{ github.ref_name }})
-
- if [[ $tag =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
- echo "::set-output name=version::$tag"
- echo "::set-output name=prerelease::false"
- else
- echo "::set-output name=version::dev"
- echo "::set-output name=prerelease::true"
- fi
-
- release-linux-aarch64:
- name: Release Linux binary for aarch64
- runs-on: ubuntu-20.04
- needs: prepare
- env:
- CARGO_TARGET: aarch64-unknown-linux-gnu
- DEB_NAME: dozer-linux-aarch64
- steps:
- - name: Checkout repository
- uses: actions/checkout@v3
- with:
- submodules: 'recursive'
-
- - name: Rust cache
- uses: swatinem/rust-cache@v2
-
- - name: Install toolchain
- uses: dtolnay/rust-toolchain@master
- with:
- toolchain: stable
- target: ${{ env.CARGO_TARGET }}
-
- - name: Install cross
- uses: baptiste0928/cargo-install@v1
- with:
- crate: cross
- cache-key: '${{ env.CARGO_TARGET }}'
-
- - name: Build dozer
- run: cross build --package=dozer-cli --profile=release --target ${{ env.CARGO_TARGET }} --bin dozer
-
- - name: Install cargo-deb
- uses: baptiste0928/cargo-install@v1
- with:
- crate: cargo-deb
- cache-key: '${{ env.CARGO_TARGET }}'
-
- - name: Compile deb file
- run: cargo deb -p dozer-cli --target ${{ env.CARGO_TARGET }} --no-build --no-strip --output ./deb/${{ env.DEB_NAME }}.deb
-
- - name: Prepare release assets
- shell: bash
- run: |
- mkdir -p release
- cp {LICENSE,README.md,CHANGELOG.md} release/ 2> /dev/null || echo "Copy Failed...Ignoring.."
- cp target/${{ env.CARGO_TARGET }}/release/dozer release/
-
- mv release/ ${{ env.DEB_NAME }}/
-
- tar -czvf ${{ env.DEB_NAME }}.tar.gz ${{ env.DEB_NAME }}/
-
- cp deb/${{ env.DEB_NAME }}.deb ./
-
- ls -l ${{ env.DEB_NAME }}*
-
- - name: Upload the release
- uses: svenstaro/upload-release-action@v2
- with:
- repo_token: ${{ secrets.GITHUB_TOKEN }}
- file: ${{ env.DEB_NAME }}*
- file_glob: true
- overwrite: true
- tag: ${{ needs.prepare.outputs.version }}
- release_name: "Development Release - ${{ needs.prepare.outputs.version }}"
- prerelease: ${{ needs.prepare.outputs.prerelease }}
- body: "${{ needs.prepare.outputs.release_body }}"
-
- - name: Set env variables
- env:
- VERSION: ${{ needs.prepare.outputs.version }}
- RELEASE_NAME: ${{ env.DEB_NAME }}.tar.gz
- run: |
- echo "RELEASE_NAME=${{env.RELEASE_NAME}}" >> $GITHUB_ENV
- echo "DEB_NAME=${{ env.DEB_NAME }}.deb" >> $GITHUB_ENV
- echo "VERSION=${{env.VERSION}}" >> $GITHUB_ENV
- echo "ARTIFACT_URL=https://${{ env.BUCKET_NAME }}.s3.ap-southeast-1.amazonaws.com/${{ env.VERSION }}/${{ env.RELEASE_NAME }}" >> $GITHUB_ENV
-
- - name: List deb output files
- run: ls -lR ./deb
-
- - name: configure aws credentials
- uses: aws-actions/configure-aws-credentials@v1
- with:
- role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }}
- role-session-name: deployer
- aws-region: ap-southeast-1
-
- - name: Upload release to S3
- id: upload_s3
- run: |
- aws s3 cp $RELEASE_NAME s3://${{ env.BUCKET_NAME }}/$VERSION/$RELEASE_NAME
-
- - name: Upload release deb to S3
- id: upload_s3_deb
- run: |
- aws s3 cp deb/$DEB_NAME s3://${{ env.BUCKET_NAME }}/$VERSION/$DEB_NAME
-
- release-macos-apple-silicon:
- name: Release binary for macOS silicon
- runs-on: ${{ matrix.os }}
- needs: prepare
- strategy:
- fail-fast: false
- matrix:
- include:
- - os: macos-12
- target: aarch64-apple-darwin
- file_name: dozer
- asset_name: dozer-macos-aarch64
-
- steps:
- - name: Checkout repository
- uses: actions/checkout@v3
- with:
- submodules: 'recursive'
- - name: Installing Rust toolchain
- uses: actions-rs/toolchain@v1
- with:
- toolchain: stable
- profile: minimal
- target: ${{ matrix.target }}
- override: true
- - name: Install Protoc
- uses: arduino/setup-protoc@v1
- with:
- repo-token: ${{ secrets.GITHUB_TOKEN }}
-
- - name: Rust cache
- uses: swatinem/rust-cache@v2
-
- - name: Cargo build
- uses: actions-rs/cargo@v1
- with:
- command: build
- args: --release --target ${{ matrix.target }} --bin ${{ matrix.file_name }}
-
- - name: List target output files
- run: ls -lR ./target
-
- - name: Prepare release assets
- shell: bash
- run: |
- mkdir -p release
- cp {LICENSE,README.md,CHANGELOG.md} release/ 2> /dev/null || echo "Copy Failed...Ignoring.."
- cp target/${{ matrix.target }}/release/${{matrix.file_name}} release/
-
- mv release/ ${{matrix.asset_name}}/
-
- tar -czvf ${{matrix.asset_name}}.tar.gz ${{matrix.asset_name}}/
-
- - name: Upload the release
- uses: svenstaro/upload-release-action@v2
- with:
- repo_token: ${{ secrets.GITHUB_TOKEN }}
- file: ${{matrix.asset_name}}*
- file_glob: true
- overwrite: true
- tag: ${{ needs.prepare.outputs.version }}
- release_name: "Development Release - ${{ needs.prepare.outputs.version }}"
- prerelease: ${{ needs.prepare.outputs.prerelease }}
- body: "${{ needs.prepare.outputs.release_body }}"
-
- - name: Set env variables
- env:
- VERSION: ${{ needs.prepare.outputs.version }}
- RELEASE_NAME: ${{matrix.asset_name}}.tar.gz
- run: |
- echo "RELEASE_NAME=${{env.RELEASE_NAME}}" >> $GITHUB_ENV
- echo "VERSION=${{env.VERSION}}" >> $GITHUB_ENV
- echo "ARTIFACT_URL=https://${{ env.BUCKET_NAME }}.s3.ap-southeast-1.amazonaws.com/${{ env.VERSION }}/${{ env.RELEASE_NAME }}" >> $GITHUB_ENV
-
- - name: configure aws credentials
- uses: aws-actions/configure-aws-credentials@v1
- with:
- role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }}
- role-session-name: deployer
- aws-region: ap-southeast-1
-
- - name: Upload release to S3
- id: upload_s3
- run: |
- aws s3 cp $RELEASE_NAME s3://${{ env.BUCKET_NAME }}/$VERSION/$RELEASE_NAME
-
- release:
- name: Release
- runs-on:
- labels: ${{ matrix.os }}
- needs: prepare
- strategy:
- matrix:
- os: [ubuntu-20.04]
- include:
- - os: ubuntu-20.04
- file_name: dozer
- target: x86_64-unknown-linux-gnu
- asset_name: dozer-linux-amd64
- - os: macos-12
- file_name: dozer
- target: x86_64-apple-darwin
- asset_name: dozer-macos-amd64
- steps:
- - uses: actions/checkout@v3
- with:
- submodules: 'recursive'
- - name: Install minimal stable with clippy and rustfmt
- uses: actions-rs/toolchain@v1
- with:
- profile: minimal
- toolchain: stable
- target: ${{ matrix.target }}
- components: rustfmt, clippy
- - name: Install Protoc
- uses: arduino/setup-protoc@v1
- with:
- repo-token: ${{ secrets.GITHUB_TOKEN }}
-
- - name: Rust cache
- uses: swatinem/rust-cache@v2
-
- - name: Install cargo-deb
- if: matrix.os == 'ubuntu-20.04'
- run: cargo install cargo-deb
-
- - name: Compile deb file
- if: matrix.os == 'ubuntu-20.04'
- run: cargo-deb -p dozer-cli --output ./deb/${{matrix.asset_name}}.deb
-
- - name: Build package
- if: matrix.os != 'ubuntu-20.04'
- run: cargo build --release --bin ${{ matrix.file_name }}
-
- - name: Build package for ubuntu (with kafka & snowflake)
- if: matrix.os == 'ubuntu-20.04'
- run: cargo build --release --bin ${{ matrix.file_name }} --features "kafka snowflake"
-
- - name: Prepare release assets
- shell: bash
- run: |
- mkdir -p release
- cp {LICENSE,README.md,CHANGELOG.md} release/ 2> /dev/null || echo "Copy Failed...Ignoring.."
- cp target/release/${{matrix.file_name}} release/
-
- mv release/ ${{matrix.asset_name}}/
-
- tar -czvf ${{matrix.asset_name}}.tar.gz \
- ${{matrix.asset_name}}/
-
- cp deb/${{matrix.asset_name}}.deb ./ 2>/dev/null || :
-
- ls -l ${{matrix.asset_name}}*
-
- - name: Upload the release
- uses: svenstaro/upload-release-action@v2
- with:
- repo_token: ${{ secrets.GITHUB_TOKEN }}
- file: ${{matrix.asset_name}}*
- file_glob: true
- overwrite: true
- tag: ${{ needs.prepare.outputs.version }}
- release_name: "Development Release - ${{ needs.prepare.outputs.version }}"
- prerelease: ${{ needs.prepare.outputs.prerelease }}
- body: "${{ needs.prepare.outputs.release_body }}"
-
- - name: Set env variables
- env:
- VERSION: ${{ needs.prepare.outputs.version }}
- RELEASE_NAME: ${{matrix.asset_name}}.tar.gz
- run: |
- echo "RELEASE_NAME=${{env.RELEASE_NAME}}" >> $GITHUB_ENV
- echo "DEB_NAME=${{matrix.asset_name}}.deb" >> $GITHUB_ENV
- echo "VERSION=${{env.VERSION}}" >> $GITHUB_ENV
- echo "ARTIFACT_URL=https://${{ env.BUCKET_NAME }}.s3.ap-southeast-1.amazonaws.com/${{ env.VERSION }}/${{ env.RELEASE_NAME }}" >> $GITHUB_ENV
-
- - name: configure aws credentials
- if: matrix.os == 'ubuntu-20.04'
- uses: aws-actions/configure-aws-credentials@v1
- with:
- role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }}
- role-session-name: deployer
- aws-region: ap-southeast-1
-
- - name: Upload release to S3
- id: upload_s3
- if: matrix.os == 'ubuntu-20.04'
- run: |
- aws s3 cp $RELEASE_NAME s3://${{ env.BUCKET_NAME }}/$VERSION/$RELEASE_NAME
-
- - name: Upload release deb to S3
- id: upload_s3_deb
- if: matrix.os == 'ubuntu-20.04'
- run: |
- aws s3 cp deb/$DEB_NAME s3://${{ env.BUCKET_NAME }}/$VERSION/$DEB_NAME
-
- - name: Build, tag, and push image to Amazon ECR
- id: build_push_ecr
- if: matrix.os == 'ubuntu-20.04'
- env:
- IMAGE_TAG: ${{ needs.prepare.outputs.version }}
- run: |
- aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin $ECR_REGISTRY
- docker build -f ci/Dockerfile -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG -t $ECR_REGISTRY/$ECR_REPOSITORY:$GITHUB_SHA .
- docker push $ECR_REGISTRY/$ECR_REPOSITORY --all-tags
-
- - name: Update latest image if releasing
- if: (needs.prepare.outputs.prerelease == 'false') && (matrix.os == 'ubuntu-20.04')
- env:
- IMAGE_TAG: ${{ needs.prepare.outputs.version }}
- run: |
- docker tag $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG $ECR_REGISTRY/$ECR_REPOSITORY:latest
- docker push $ECR_REGISTRY/$ECR_REPOSITORY:latest
-
- - name: Log in to Docker Hub
- if: (github.event_name == 'release') && (needs.prepare.outputs.prerelease == 'false') && (matrix.os == 'ubuntu-20.04')
- uses: docker/login-action@v1
- with:
- username: ${{ secrets.DOCKERHUB_USERNAME }}
- password: ${{ secrets.DOCKERHUB_TOKEN }}
-
- - name: Build and push Docker image to Docker Hub
- if: (github.event_name == 'release') && (needs.prepare.outputs.prerelease == 'false') && (matrix.os == 'ubuntu-20.04')
- env:
- IMAGE_TAG: ${{ needs.prepare.outputs.version }}
- uses: docker/build-push-action@v2
- with:
- context: .
- file: ./ci/Dockerfile
- push: true
- tags: ${{ env.DOCKERHUB_REGISTRY }}/${{ env.DOCKERHUB_REPOSITORY }}:latest,${{ env.DOCKERHUB_REGISTRY }}/${{ env.DOCKERHUB_REPOSITORY }}:${{ env.IMAGE_TAG }}
-
- - name: Release notification
- if: ${{ env.VERSION != 'dev' && matrix.os == 'ubuntu-20.04'}}
- env:
- DISCORD_WEBHOOK: ${{ secrets.DISCORD_RELEASE_HOOK }}
- DISCORD_EMBEDS: '[ {
- "title": "New version `${{env.VERSION}}` released",
- "author": { "icon_url": "https://avatars.githubusercontent.com/${{ github.actor }}", "name": "${{ github.actor }}", "url": "https://github.com/${{github.actor}}" },
- "fields": [
- { "name": "Repository", "value": "[getdozer/dozer](https://github.com/getdozer/dozer)", "inline": true },
- { "name": "Binary", "value": "[${{ env.RELEASE_NAME }}](${{ env.ARTIFACT_URL }})", "inline": true },
- { "name": "Using Binary", "value": "`dozer -h`"},
- { "name": "Release Notes", "value": "Release notes can be found [here](https://github.com/getdozer/dozer/releases/tag/${{env.VERSION}})"}
- ],
- "color": 990099
- }]'
- uses: Ilshidur/action-discord@master
diff --git a/.github/workflows/unit.yaml b/.github/workflows/unit.yaml
deleted file mode 100644
index 9737477e96..0000000000
--- a/.github/workflows/unit.yaml
+++ /dev/null
@@ -1,93 +0,0 @@
-name: Unit Tests
-
-on:
- workflow_dispatch:
- pull_request_target:
- branches: [main]
- merge_group:
-
-env:
- CARGO_TERM_COLOR: always
-
-concurrency:
- group: unit/${{ github.head_ref || github.run_id }}
- cancel-in-progress: true
-
-permissions:
- id-token: write # This is required for requesting the JWT
- contents: write # This is required for actions/checkout
-
-jobs:
- # Run unit tests
- unit:
- timeout-minutes: 60
- runs-on: ubuntu-latest
- services:
- postgres:
- image: debezium/postgres:13
- ports:
- - 5434:5432
- env:
- POSTGRES_DB: dozer_test
- POSTGRES_USER: postgres
- POSTGRES_PASSWORD: postgres
- ALLOW_IP_RANGE: 0.0.0.0/0
- # command: postgres -c hba_file=/var/lib/stock-sample/pg_hba.conf
- options: >-
- --health-cmd pg_isready
- --health-interval 10s
- --health-timeout 5s
- --health-retries 5
-
- steps:
- - name: Configure AWS credentials
- uses: aws-actions/configure-aws-credentials@v2
- with:
- role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }}
- role-session-name: dozer-coverage
- aws-region: us-east-2
-
- - if: github.event_name == 'pull_request_target'
- uses: actions/checkout@v3
- with:
- ref: ${{ github.event.pull_request.head.sha }}
- submodules: 'recursive'
-
- - if: github.event_name != 'pull_request_target'
- uses: actions/checkout@v3
- with:
- submodules: 'recursive'
-
- - name: Install Protoc
- uses: arduino/setup-protoc@v1
- with:
- repo-token: ${{ secrets.GITHUB_TOKEN }}
-
- - name: Rust cache
- uses: swatinem/rust-cache@v2
-
- - uses: ./.github/workflows/setup-snowflake-and-kafka
-
- - uses: ./.github/workflows/setup-mysql-and-mariadb
-
- - name: Run connectors tests
- env:
- SN_SERVER: ${{ secrets.SN_SERVER }}
- SN_USER: ${{ secrets.SN_USER }}
- SN_PASSWORD: ${{ secrets.SN_PASSWORD }}
- SN_DATABASE: ${{ secrets.SN_DATABASE }}
- SN_WAREHOUSE: ${{ secrets.SN_WAREHOUSE }}
- SN_DRIVER: ${{ secrets.SN_DRIVER }}
- shell: bash
- run: |
- cargo test \
- -p dozer-ingestion-postgres \
- -p dozer-ingestion-kafka \
- -p dozer-ingestion-mysql \
- --lib --no-fail-fast -- --ignored
- - name: Run tests
- shell: bash
- run: |
- source ./dozer-tests/python_udf/virtualenv.sh
- cargo test --features snowflake,ethereum,kafka,python,mongodb --no-fail-fast
-
diff --git a/.gitmodules b/.gitmodules
deleted file mode 100644
index a179f0e620..0000000000
--- a/.gitmodules
+++ /dev/null
@@ -1,3 +0,0 @@
-[submodule "dozer-sink-aerospike/aerospike-client-sys/aerospike-client-c"]
- path = dozer-sink-aerospike/aerospike-client-sys/aerospike-client-c
- url = https://github.com/aerospike/aerospike-client-c
diff --git a/Cargo.lock b/Cargo.lock
index 90ea1fa764..55a96a2c33 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -245,13 +245,6 @@ dependencies = [
"generic-array",
]
-[[package]]
-name = "aerospike-client-sys"
-version = "0.1.0"
-dependencies = [
- "bindgen",
-]
-
[[package]]
name = "aes"
version = "0.8.3"
@@ -976,15 +969,12 @@ dependencies = [
"itertools 0.12.1",
"lazy_static",
"lazycell",
- "log",
- "prettyplease",
"proc-macro2",
"quote",
"regex",
"rustc-hash",
"shlex",
"syn 2.0.53",
- "which 4.4.2",
]
[[package]]
@@ -2789,9 +2779,7 @@ dependencies = [
"clap",
"dozer-core",
"dozer-ingestion",
- "dozer-sink-aerospike",
"dozer-sink-clickhouse",
- "dozer-sink-oracle",
"dozer-sql",
"dozer-tracing",
"dozer-types",
@@ -2872,7 +2860,6 @@ dependencies = [
"bytes",
"chrono",
"criterion",
- "dozer-ingestion-aerospike",
"dozer-ingestion-connector",
"dozer-ingestion-deltalake",
"dozer-ingestion-ethereum",
@@ -2882,7 +2869,6 @@ dependencies = [
"dozer-ingestion-mongodb",
"dozer-ingestion-mysql",
"dozer-ingestion-object-store",
- "dozer-ingestion-oracle",
"dozer-ingestion-postgres",
"dozer-ingestion-snowflake",
"dozer-ingestion-webhook",
@@ -2900,16 +2886,6 @@ dependencies = [
"url",
]
-[[package]]
-name = "dozer-ingestion-aerospike"
-version = "0.4.0"
-dependencies = [
- "actix-web",
- "base64 0.21.7",
- "dozer-ingestion-connector",
- "dozer-sink-aerospike",
-]
-
[[package]]
name = "dozer-ingestion-connector"
version = "0.4.0"
@@ -3001,16 +2977,6 @@ dependencies = [
"url",
]
-[[package]]
-name = "dozer-ingestion-oracle"
-version = "0.1.0"
-dependencies = [
- "dozer-ingestion-connector",
- "env_logger 0.11.2",
- "oracle",
- "regex",
-]
-
[[package]]
name = "dozer-ingestion-postgres"
version = "0.4.0"
@@ -3053,17 +3019,6 @@ dependencies = [
"tokio",
]
-[[package]]
-name = "dozer-sink-aerospike"
-version = "0.1.0"
-dependencies = [
- "aerospike-client-sys",
- "dozer-core",
- "dozer-types",
- "itertools 0.12.1",
- "smallvec",
-]
-
[[package]]
name = "dozer-sink-clickhouse"
version = "0.1.0"
@@ -3076,15 +3031,6 @@ dependencies = [
"serde",
]
-[[package]]
-name = "dozer-sink-oracle"
-version = "0.1.0"
-dependencies = [
- "dozer-core",
- "dozer-types",
- "oracle",
-]
-
[[package]]
name = "dozer-sql"
version = "0.4.0"
@@ -5948,31 +5894,6 @@ dependencies = [
"tokio-stream",
]
-[[package]]
-name = "oracle"
-version = "0.5.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cfe80334af1fbaea016fbef0af77f5fa32452362e29a039389b8c93737585003"
-dependencies = [
- "cc",
- "chrono",
- "lazy_static",
- "oracle_procmacro",
- "paste",
-]
-
-[[package]]
-name = "oracle_procmacro"
-version = "0.1.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ad247f3421d57de56a0d0408d3249d4b1048a522be2013656d92f022c3d8af27"
-dependencies = [
- "darling 0.13.4",
- "proc-macro2",
- "quote",
- "syn 1.0.109",
-]
-
[[package]]
name = "ordered-float"
version = "2.10.1"
diff --git a/Cargo.toml b/Cargo.toml
index eadc28e819..b47925d2f1 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -8,9 +8,7 @@ members = [
"dozer-tracing",
"dozer-tests",
"dozer-utils",
- "dozer-sink-aerospike",
"dozer-sink-clickhouse",
- "dozer-sink-oracle",
]
resolver = "2"
diff --git a/README.md b/README.md
index ff37b8fdcf..c971899975 100644
--- a/README.md
+++ b/README.md
@@ -1,88 +1,53 @@
-
-
-
-
-
-
-
-
-
-
## Overview
-Dozer is a **data platform for building, deploying and maintaining real-time data products.**
-
-It is ideal for companies with multiple databases, data warehouses and data lakes that are in need of combining, aggregating and transforming data in real time, and create customer facing or internal data applications.
-
-*Put it simply, Dozer empowers a single developer go from data sources to ready-made APIs in just a few minutes. All with just a with a simple configuration file.*
+Dozer is a **real time data movement tool leveraging CDC from various sources to multiple sinks.**
-## How it works
-Dozer pulls data from various sources like databases, data lakes, and data warehouses using Change Data Capture (CDC) and periodic polling mechanisms. This ensures up-to-date data ingestion in real-time or near-real-time.
-
-After capturing data, Dozer offers the possibility of combining, transforming and aggregating it
-using its own internal real-time transformation engine. It supports Streaming SQL, WebAssembly (coming soon) and TypeScript (coming soon), as well as ONNX for performing AI predictions in real-time.
-
-After processing, data is stored and indexed in a low-latency datastore (based on [LMDB](https://github.com/LMDB/lmdb)), queryable using REST and gRPC.
+Dozer is magnitudes of times faster than Debezium+Kafka and natively supports stateless transformations.
+Primarily used for moving data into warehouses. In our own application, we move data to **Clickhouse** and build data APIs and integration with LLMs.
## How to use it
-
-### ① Build
-A Dozer application consists of a YAML file that can be run locally using the Dozer Live UI or Dozer CLI. As YAML is edited,
-changes are immediately reflected on Dozer Live UI.
-
-![Screenshot](./images/dozer_live_screen1.png)
-
-### ② Test
-Dozer can run the entire infrastructure locally. You can inspect data flowing in in real time or use the built-it API explorer to query data through REST and gRPC. Dozer Live explorer also provides ready-made samples to integrate results into your front-end applications.
-
-![Screenshot](./images/dozer_live_screen2.png)
-
-### ③ Deploy
-Dozer applications can be self-hosted or deployed in the cloud with a single command. Dozer Cloud (coming soon) provides self-healing and monitoring capabilities, making sure your APIs are always available.
-
-
-## Supported Sources and Tranformation Engines
-Dozer currently supports a variety of source databases, data warehouses and object stores. Whenever possible, Dozer leverages Change Data Capture (CDC) to keep data always fresh. For sources that do not support CDC, periodic polling is used.
-
-Dozer transformations can be executed using Dozer's highly cutomizable streaming SQL engine, which provides UDF supports in WASM (coming soon), TypeScript (coming soon) and ONNX.
-
-Here is an overview of all supported source types and transformation engines:
-
-![Screenshot](./images/supported_sources.png)
-
-
-## Why Dozer ?
-As teams embark on the journey of implementing real-time data products, they invariably come across a host of challenges that can make the task seem daunting:
-
-1. **Integration with Various Systems**: Integrating with various data sources can present numerous technical hurdles and interoperability issues.
-
-2. **Managing Latency**: Ensuring low-latency data access, especially for customer-facing applications, can be a significant challenge.
-
-3. **Real-Time Data Transformation**: Managing real-time data transformations, especially when dealing with complex queries or large volumes of data, can be difficult and resource-intensive.
-
-4. **Maintaining Data Freshness**: Keeping the data up-to-date in real-time, particularly when it's sourced from multiple locations like databases, data lakes, or warehouses, can be a daunting task.
-
-4. **Scalability and High Availability**: Building a data application that can efficiently handle high-volume operations and remain reliable under heavy loads requires advanced architecture design and robust infrastructure.
-
-To address all the above issues, teams often find themselves stitching together multiple technologies and a significant amount of custom code. This could involve integrating diverse systems like Kafka for real-time data streaming, Redis for low-latency data access and caching, and Spark or Flink for processing and analyzing streaming data.
-
-![Complex Tools Setup](./images/tools.png)
-
-The complexity of such a setup can become overwhelming. Ensuring that these different technologies communicate effectively, maintaining them, and handling potential failure points requires extensive effort and expertise.
-
-This is where Dozer steps in, aiming to dramatically simplify this process. Dozer is designed as an all-in-one backend solution that integrates the capabilities of these disparate technologies into a single, streamlined tool. By doing so, Dozer offers the capacity to build an end-to-end real-time data product without the need to manage multiple technologies and extensive custom code.
-
-Dozer's goal is to empower a single engineer or a small team of engineers to fully manage the entire lifecycle of a Data Product!
-
-## Getting Started
-
-Follow the links below to get started with Dozer:
-
-- [Installation](https://getdozer.io/docs/installation)
-- [Build a sample application using NY Taxi dataset](https://getdozer.io/docs/getting_started)
-
-For a more comprehensive list of samples check out our [GitHub Samples repo](https://github.com/getdozer/dozer-samples)
+Dozer runs with a single configuration file like the following:
+```yaml
+app_name: dozer-bench
+version: 1
+connections:
+ - name: pg_1
+ config: !Postgres
+ user: user
+ password: postgres
+ host: localhost
+ port: 5432
+ database: customers
+sinks:
+ - name: customers
+ config: !Dummy
+ table_name: customers
+```
+
+Full documentation can be found [here](https://github.com/getdozer/dozer/blob/main/dozer-types/src/models/config.rs#L15)
+
+
+## Supported Sources
+
+| Connector | Extraction | Resuming | Enterprise |
+| -------------------- | ---------- | -------- | ------------------- |
+| Postgres | ✅ | ✅ | ✅ |
+| MySQL | ✅ | ✅ | ✅ |
+| Snowflake | ✅ | ✅ | ✅ |
+| Kafka | ✅ | 🚧 | ✅ |
+| MongoDB | ✅ | 🎯 | ✅ |
+| Amazon S3 | ✅ | 🎯 | ✅ |
+| Google Cloud Storage | ✅ | 🎯 | ✅ |
+| **Oracle | ✅ | ✅ | **Enterprise Only** |
+| **Aerospike | ✅ | ✅ | **Enterprise Only** |
+
+
+## Supported Sinks
+| Database | Connectivity | Enterprise |
+| ---------- | ------------ | ------------------- |
+| Clickhouse | ✅ | |
+| Postgres | ✅ | |
+| MySQL | ✅ | |
+| Big Query | ✅ | |
+| Oracle | ✅ | **Enterprise Only** |
+| Aerospike | ✅ | **Enterprise Only** |
\ No newline at end of file
diff --git a/dozer-cli/Cargo.toml b/dozer-cli/Cargo.toml
index 838c73dff8..cb0519af7e 100644
--- a/dozer-cli/Cargo.toml
+++ b/dozer-cli/Cargo.toml
@@ -16,10 +16,7 @@ dozer-core = { path = "../dozer-core" }
dozer-sql = { path = "../dozer-sql" }
dozer-types = { path = "../dozer-types" }
dozer-tracing = { path = "../dozer-tracing" }
-dozer-sink-aerospike = { path = "../dozer-sink-aerospike" }
dozer-sink-clickhouse = { path = "../dozer-sink-clickhouse" }
-dozer-sink-oracle = { path = "../dozer-sink-oracle" }
-
actix-web = "4.4.0"
async-trait = "0.1.74"
uuid = { version = "1.6.1", features = ["v4", "serde"] }
diff --git a/dozer-cli/src/errors.rs b/dozer-cli/src/errors.rs
index 0f24c67fe8..ecca284d63 100644
--- a/dozer-cli/src/errors.rs
+++ b/dozer-cli/src/errors.rs
@@ -88,6 +88,8 @@ pub enum OrchestrationError {
LockedNoLockFile,
#[error("Command was aborted")]
Aborted,
+ #[error("This feature is only supported in enterprise: {0}")]
+ UnsupportedFeature(String),
}
#[derive(Error, Debug)]
diff --git a/dozer-cli/src/pipeline/builder.rs b/dozer-cli/src/pipeline/builder.rs
index 9371d54e42..45e755a58e 100644
--- a/dozer-cli/src/pipeline/builder.rs
+++ b/dozer-cli/src/pipeline/builder.rs
@@ -24,9 +24,7 @@ use std::hash::Hash;
use tokio::runtime::Runtime;
use crate::pipeline::dummy_sink::DummySinkFactory;
-use dozer_sink_aerospike::AerospikeSinkFactory;
use dozer_sink_clickhouse::ClickhouseSinkFactory;
-use dozer_sink_oracle::OracleSinkFactory;
use super::source_builder::SourceBuilder;
use crate::errors::OrchestrationError;
@@ -246,35 +244,7 @@ impl<'a> PipelineBuilder<'a> {
id,
vec![(get_table_info(&config.table_name)?, DEFAULT_PORT_HANDLE)],
),
- SinkConfig::Aerospike(config) => {
- let connection = self
- .connections
- .iter()
- .find_map(|conn| match conn {
- Connection {
- config: ConnectionConfig::Aerospike(conn_config),
- name,
- } if name == &config.connection => Some(conn_config),
- _ => None,
- })
- .ok_or_else(|| {
- OrchestrationError::ConnectionNotFound(config.connection.clone())
- })?;
- let sink_factory = Box::new(AerospikeSinkFactory::new(
- connection.clone(),
- config.clone(),
- ));
- let table_infos = config
- .tables
- .iter()
- .enumerate()
- .map(|(port, table)| {
- let table_info = get_table_info(&table.source_table_name)?;
- Ok((table_info, port as PortHandle))
- })
- .collect::, OrchestrationError>>()?;
- add_sink_to_pipeline(&mut pipeline, sink_factory, id, table_infos);
- }
+
SinkConfig::Clickhouse(config) => {
let sink =
Box::new(ClickhouseSinkFactory::new(config.clone(), runtime.clone()));
@@ -286,28 +256,8 @@ impl<'a> PipelineBuilder<'a> {
vec![(table_info, DEFAULT_PORT_HANDLE)],
);
}
- SinkConfig::Oracle(config) => {
- let connection = self
- .connections
- .iter()
- .find_map(|conn| match conn {
- Connection {
- config: ConnectionConfig::Oracle(conn_config),
- name,
- } if name == &config.connection => Some(conn_config),
- _ => None,
- })
- .ok_or_else(|| {
- OrchestrationError::ConnectionNotFound(config.connection.clone())
- })?;
- let sink = Box::new(OracleSinkFactory::new(connection.clone(), config.clone()));
- let table_info = get_table_info(&config.table_name)?;
- add_sink_to_pipeline(
- &mut pipeline,
- sink,
- id,
- vec![(table_info, DEFAULT_PORT_HANDLE)],
- );
+ x => {
+ return Err(OrchestrationError::UnsupportedFeature(x.name()));
}
}
}
diff --git a/dozer-ingestion/Cargo.toml b/dozer-ingestion/Cargo.toml
index 4b9fcfe2dd..1525d8cc5f 100644
--- a/dozer-ingestion/Cargo.toml
+++ b/dozer-ingestion/Cargo.toml
@@ -19,9 +19,7 @@ dozer-ingestion-mysql = { path = "./mysql" }
dozer-ingestion-object-store = { path = "./object-store", optional = true }
dozer-ingestion-postgres = { path = "./postgres" }
dozer-ingestion-snowflake = { path = "./snowflake", optional = true }
-dozer-ingestion-aerospike = { path = "./aerospike" }
dozer-ingestion-webhook = { path = "./webhook" }
-dozer-ingestion-oracle = { path = "./oracle" }
tokio = { version = "1", features = ["full"] }
futures = "0.3.28"
diff --git a/dozer-ingestion/aerospike/Cargo.toml b/dozer-ingestion/aerospike/Cargo.toml
deleted file mode 100644
index 5b44eb6818..0000000000
--- a/dozer-ingestion/aerospike/Cargo.toml
+++ /dev/null
@@ -1,13 +0,0 @@
-[package]
-name = "dozer-ingestion-aerospike"
-version = "0.4.0"
-edition = "2021"
-license = "AGPL-3.0-or-later"
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
-[dependencies]
-dozer-ingestion-connector = { path = "../connector" }
-actix-web = "4.5.1"
-base64 = "0.21.7"
-dozer-sink-aerospike = { path = "../../dozer-sink-aerospike" }
diff --git a/dozer-ingestion/aerospike/src/connector.rs b/dozer-ingestion/aerospike/src/connector.rs
deleted file mode 100644
index c87fdef157..0000000000
--- a/dozer-ingestion/aerospike/src/connector.rs
+++ /dev/null
@@ -1,984 +0,0 @@
-use dozer_ingestion_connector::dozer_types::epoch::SourceTime;
-use dozer_ingestion_connector::dozer_types::errors::internal::BoxedError;
-use dozer_ingestion_connector::dozer_types::errors::types::DeserializationError;
-use dozer_ingestion_connector::dozer_types::event::Event;
-use dozer_ingestion_connector::dozer_types::json_types::serde_json_to_json_value;
-use dozer_ingestion_connector::dozer_types::log::{debug, error, info, trace, warn};
-use dozer_ingestion_connector::dozer_types::models::connection::AerospikeConnection;
-use dozer_ingestion_connector::dozer_types::models::ingestion_types::{
- IngestionMessage, TransactionInfo,
-};
-use dozer_ingestion_connector::dozer_types::node::{NodeHandle, OpIdentifier, SourceState};
-use dozer_ingestion_connector::dozer_types::types::Operation::Insert;
-use dozer_ingestion_connector::dozer_types::types::{Field, FieldDefinition, FieldType, Schema};
-use dozer_ingestion_connector::tokio::sync::broadcast::error::RecvError;
-use dozer_ingestion_connector::tokio::sync::broadcast::Receiver;
-use dozer_ingestion_connector::tokio::sync::{mpsc, oneshot};
-use dozer_ingestion_connector::{
- async_trait, dozer_types, tokio, Connector, Ingestor, SourceSchema, SourceSchemaResult,
- TableIdentifier, TableInfo,
-};
-use std::collections::HashMap;
-use std::ffi::{CStr, CString};
-use std::num::TryFromIntError;
-
-use std::time::Duration;
-
-use dozer_ingestion_connector::dozer_types::serde::Deserialize;
-
-use actix_web::dev::Server;
-use actix_web::post;
-use actix_web::web;
-use actix_web::App;
-use actix_web::HttpRequest;
-use actix_web::HttpServer;
-use actix_web::{get, HttpResponse};
-
-use dozer_ingestion_connector::dozer_types::rust_decimal::Decimal;
-use dozer_ingestion_connector::dozer_types::serde_json;
-use dozer_ingestion_connector::dozer_types::serde_json::Value;
-
-use base64::prelude::*;
-use dozer_ingestion_connector::dozer_types::chrono::{DateTime, FixedOffset, NaiveDateTime, Utc};
-
-use dozer_ingestion_connector::dozer_types::thiserror::{self, Error};
-use dozer_ingestion_connector::schema_parser::SchemaParser;
-
-use dozer_sink_aerospike::Client;
-
-#[derive(Debug, Error)]
-pub enum AerospikeConnectorError {
- #[error("Cannot start server: {0}")]
- CannotStartServer(#[from] std::io::Error),
-
- #[error("Set name is none. Key: {0:?}, {1:?}, {2:?}")]
- SetNameIsNone(
- Option,
- Option,
- Option,
- ),
-
- #[error("PK is none: {0:?}, {1:?}, {2:?}")]
- PkIsNone(Option, String, Option),
-
- #[error("Invalid key value: {0:?}. Key is supposed to have 4 elements.")]
- InvalidKeyValue(Vec>),
-
- #[error("Unsupported type. Bin type {bin_type:?}, field type: {field_type:?}")]
- UnsupportedTypeForFieldType {
- bin_type: String,
- field_type: FieldType,
- },
-
- #[error("Unsupported type: {0}")]
- UnsupportedType(FieldType),
-
- #[error("Invalid timestamp: {0}")]
- InvalidTimestamp(i64),
-
- #[error("Invalid days: {0}")]
- InvalidDate(i64),
-
- #[error("Error decoding base64: {0}")]
- BytesDecodingError(#[from] base64::DecodeError),
-
- #[error("Error parsing float: {0}")]
- FloatParsingError(#[from] std::num::ParseFloatError),
-
- #[error("Error parsing int: {0}")]
- IntParsingError(#[from] std::num::ParseIntError),
-
- #[error("Error casting int: {0}")]
- IntCastError(#[from] TryFromIntError),
-
- #[error("Failed days number parsing")]
- ParsingDaysError,
-
- #[error("Failed timestamp parsing")]
- ParsingTimestampFailed,
-
- #[error("Failed point parsing")]
- ParsingPointFailed,
-
- #[error("Failed int parsing")]
- ParsingIntFailed,
-
- #[error("Failed uint parsing")]
- ParsingUIntFailed,
-
- #[error("Failed float parsing")]
- ParsingFloatFailed,
-
- #[error("Failed decimal parsing")]
- ParsingDecimalFailed(#[from] dozer_types::rust_decimal::Error),
-
- #[error("Schema not found: {0}")]
- SchemaNotFound(String),
-
- #[error("Failed parsing timestamp: {0}")]
- TimestampParsingError(#[from] dozer_ingestion_connector::dozer_types::chrono::ParseError),
-
- #[error("Key is neither string or int")]
- KeyNotSupported(Value),
-
- #[error("Failed to parse json")]
- JsonParsingFailed(#[from] DeserializationError),
-
- #[error("Failed to parse duration")]
- ParsingDurationFailed,
-}
-
-#[derive(Deserialize, Debug)]
-#[serde(crate = "dozer_types::serde")]
-pub struct AerospikeEvent {
- msg: String,
- key: Vec >,
- // gen: u32,
- // exp: u32,
- lut: u64,
- bins: Vec,
-}
-
-#[derive(Deserialize, Debug)]
-#[serde(crate = "dozer_types::serde")]
-pub struct Bin {
- name: String,
- value: Option,
- r#type: String,
-}
-
-#[derive(Debug)]
-pub struct AerospikeConnector {
- pub config: AerospikeConnection,
- node_handle: NodeHandle,
- event_receiver: Receiver,
-}
-
-impl AerospikeConnector {
- pub fn new(
- config: AerospikeConnection,
- node_handle: NodeHandle,
- event_receiver: Receiver,
- ) -> Self {
- Self {
- config,
- node_handle,
- event_receiver,
- }
- }
-
- fn start_server(&self, server_state: ServerState) -> Result {
- let address = format!(
- "{}:{}",
- self.config.replication.server_address, self.config.replication.server_port
- );
-
- info!("Starting aerospike replication server on {}", address);
-
- Ok(HttpServer::new(move || {
- App::new()
- .app_data(web::JsonConfig::default().error_handler(|err, _req| {
- error!("Error parsing json: {:?}", err);
- actix_web::error::InternalError::from_response(
- "",
- HttpResponse::BadRequest()
- .content_type("application/json")
- .body(format!(r#"{{"error":"{}"}}"#, err)),
- )
- .into()
- }))
- .app_data(web::Data::new(server_state.clone()))
- .service(healthcheck)
- .service(healthcheck_batch)
- .service(event_request_handler)
- .service(batch_event_request_handler)
- })
- .bind(address)?
- .run())
- }
-
- async fn rewind(
- &self,
- client: &Client,
- dc_name: &str,
- namespace: &str,
- ) -> Result {
- unsafe {
- let request = CString::new(format!(
- "set-config:context=xdr;dc={dc_name};namespace={namespace};action=add;rewind=all"
- ))?;
-
- // Wait until the replication configuration is set.
- // It may take some time, so retrying until rewind returns ok.
- let mut response: *mut i8 = std::ptr::null_mut();
- client.info(&request, &mut response).map_err(Box::new)?;
-
- let string = CStr::from_ptr(response);
-
- let parts: Vec<&str> = string.to_str()?.trim().split('\t').collect();
-
- if let Some(status) = parts.get(1) {
- Ok(status.replace('\n', "") == *"ok")
- } else {
- Ok(false)
- }
- }
- }
-}
-
-#[derive(Debug)]
-struct PendingMessage {
- source_time: SourceTime,
- messages: Vec,
- sender: oneshot::Sender<()>,
-}
-
-#[derive(Debug)]
-struct PendingOperationId {
- operation_id: u64,
- sender: oneshot::Sender<()>,
-}
-
-/// This loop assigns an operation id to each request and sends it to the ingestor.
-async fn ingestor_loop(
- mut message_receiver: mpsc::UnboundedReceiver,
- ingestor: Ingestor,
- operation_id_sender: mpsc::UnboundedSender,
-) {
- let mut operation_id = 0;
- while let Some(message) = message_receiver.recv().await {
- let pending_operation_id = PendingOperationId {
- operation_id,
- sender: message.sender,
- };
-
- // Propagate panic in the pipeline event processor loop.
- operation_id_sender.send(pending_operation_id).unwrap();
-
- // Ignore the error, because the server can be down.
- for message in message.messages {
- let _ = ingestor.handle_message(message).await;
- }
- let _ = ingestor
- .handle_message(IngestionMessage::TransactionInfo(TransactionInfo::Commit {
- id: Some(OpIdentifier::new(0, operation_id)),
- source_time: Some(message.source_time),
- }))
- .await;
-
- operation_id += 1;
- }
-}
-
-/// This loop triggers the pending operation id that's before the event's payload.
-async fn pipeline_event_processor(
- node_handle: NodeHandle,
- mut operation_id_receiver: mpsc::UnboundedReceiver,
- mut event_receiver: Receiver,
-) {
- let mut operation_id_from_pipeline = None;
- let mut pending_operation_id: Option = None;
- loop {
- if operation_id_from_pipeline
- < pending_operation_id
- .as_ref()
- .map(|operation_id| operation_id.operation_id)
- {
- // We have pending operation id, wait for pipeline event.
- let event = match event_receiver.recv().await {
- Ok(event) => event,
- Err(RecvError::Closed) => {
- // Pipeline is down.
- return;
- }
- Err(RecvError::Lagged(_)) => {
- // Ignore lagged events.
- continue;
- }
- };
- if let Some(operation_id) = get_operation_id_from_event(&event, &node_handle) {
- operation_id_from_pipeline = Some(operation_id);
- }
- } else if let Some(pending) = pending_operation_id.take() {
- // This operation id is already confirmed by the pipeline.
- let _ = pending.sender.send(());
- } else {
- // Wait for the next operation id.
- let Some(pending) = operation_id_receiver.recv().await else {
- // Ingestor is down.
- return;
- };
- pending_operation_id = Some(pending);
- }
- }
-}
-
-fn get_operation_id_from_event(event: &Event, node_handle: &NodeHandle) -> Option {
- match event {
- Event::SinkFlushed { epoch, .. } => epoch
- .common_info
- .source_states
- .get(node_handle)
- .and_then(|state| match state {
- SourceState::Restartable(id) => Some(id.seq_in_tx),
- _ => None,
- }),
- }
-}
-
-fn map_error(error: AerospikeConnectorError) -> HttpResponse {
- error!("Aerospike ingestion error: {:?}", error);
- HttpResponse::InternalServerError().finish()
-}
-
-#[get("/")]
-async fn healthcheck(_req: HttpRequest) -> HttpResponse {
- HttpResponse::Ok().finish()
-}
-
-#[get("/batch")]
-async fn healthcheck_batch(_req: HttpRequest) -> HttpResponse {
- HttpResponse::Ok().finish()
-}
-
-#[post("/")]
-async fn event_request_handler(
- json: web::Json,
- data: web::Data,
-) -> HttpResponse {
- let event = json.into_inner();
- let state = data.into_inner();
-
- trace!(target: "aerospike_http_server", "Event data: {:?}", event);
- // TODO: Handle delete
- if event.msg != "write" {
- return HttpResponse::Ok().finish();
- }
-
- let source_time = SourceTime::new(event.lut, 1);
- let message = map_record(event, &state.tables_index_map);
-
- trace!(target: "aerospike_http_server", "Mapped message {:?}", message);
- match message {
- Ok(None) => HttpResponse::Ok().finish(),
- Ok(Some(message)) => {
- let (sender, receiver) = oneshot::channel::<()>();
- if let Err(e) = state.sender.send(PendingMessage {
- source_time,
- messages: vec![message],
- sender,
- }) {
- error!("Ingestor is down: {:?}", e);
- return HttpResponse::InternalServerError().finish();
- }
- if let Err(e) = receiver.await {
- error!("Pipeline event processor is down: {:?}", e);
- HttpResponse::InternalServerError().finish()
- } else {
- HttpResponse::Ok().finish()
- }
- }
- Err(e) => map_error(e),
- }
-}
-
-#[post("/batch")]
-async fn batch_event_request_handler(
- json: web::Json>,
- data: web::Data,
-) -> HttpResponse {
- let events = json.into_inner();
- let state = data.into_inner();
-
- debug!(target: "aerospike_http_server", "Aerospike events count {:?}", events.len());
- trace!(target: "aerospike_http_server", "Aerospike events {:?}", events);
-
- let mut min_lut = u64::MAX;
- let messages = match events
- .into_iter()
- .filter_map(|e| {
- let lut = e.lut;
- let msg = map_record(e, &state.tables_index_map).transpose()?;
- min_lut = min_lut.min(lut);
- Some(msg)
- })
- .collect::, AerospikeConnectorError>>()
- {
- Ok(msgs) => msgs,
- Err(e) => return map_error(e),
- };
-
- debug!(target: "aerospike_http_server", "Mapped {:?} messages", messages.len());
- trace!(target: "aerospike_http_server", "Mapped messages {:?}", messages);
-
- if !messages.is_empty() {
- let (sender, receiver) = oneshot::channel::<()>();
- if let Err(e) = state.sender.send(PendingMessage {
- messages,
- sender,
- source_time: SourceTime::new(min_lut, 1),
- }) {
- error!("Ingestor is down: {:?}", e);
- return HttpResponse::InternalServerError().finish();
- }
-
- if let Err(e) = receiver.await {
- error!("Pipeline event processor is down: {:?}", e);
- return HttpResponse::InternalServerError().finish();
- }
- }
-
- HttpResponse::Ok().finish()
-}
-
-#[derive(Clone, Debug)]
-struct TableIndexMap {
- table_index: usize,
- columns_map: HashMap,
-}
-
-#[derive(Clone)]
-struct ServerState {
- tables_index_map: HashMap,
- sender: mpsc::UnboundedSender,
-}
-
-#[async_trait]
-impl Connector for AerospikeConnector {
- fn types_mapping() -> Vec<(String, Option)>
- where
- Self: Sized,
- {
- vec![
- ("str".into(), Some(FieldType::Decimal)),
- ("bool".into(), Some(FieldType::Boolean)),
- ("int".into(), Some(FieldType::Int)),
- ("float".into(), Some(FieldType::Float)),
- ("blob".into(), Some(FieldType::Boolean)),
- ("list".into(), None),
- ("map".into(), None),
- ("geojson".into(), None),
- ]
- }
-
- async fn validate_connection(&mut self) -> Result<(), BoxedError> {
- Ok(())
- }
-
- async fn list_tables(&mut self) -> Result, BoxedError> {
- Ok(self
- .config
- .sets
- .iter()
- .map(|set| TableIdentifier {
- schema: Some(self.config.namespace.clone()),
- name: set.to_string(),
- })
- .collect())
- }
-
- async fn validate_tables(&mut self, _tables: &[TableIdentifier]) -> Result<(), BoxedError> {
- Ok(())
- }
-
- async fn list_columns(
- &mut self,
- _tables: Vec,
- ) -> Result, BoxedError> {
- Ok(vec![])
- }
-
- async fn get_schemas(
- &mut self,
- table_infos: &[TableInfo],
- ) -> Result, BoxedError> {
- let schemas: HashMap = match self.config.schemas.clone() {
- Some(schemas) => {
- let schema = SchemaParser::parse_config(&schemas)?;
- serde_json::from_str(&schema)?
- }
- None => table_infos
- .iter()
- .map(|table_info| {
- let table_name = table_info.name.clone();
- let primary_index = table_info
- .column_names
- .iter()
- .position(|n| n == "PK")
- .map_or(vec![], |i| vec![i]);
-
- (
- table_name,
- SourceSchema {
- schema: Schema {
- fields: table_info
- .column_names
- .iter()
- .map(|name| FieldDefinition {
- name: name.clone(),
- typ: if name == "inserted_at" {
- FieldType::Timestamp
- } else if name == "PK" {
- FieldType::UInt
- } else {
- FieldType::String
- },
- nullable: name != "PK",
- source: Default::default(),
- description: None,
- })
- .collect(),
- primary_index,
- },
- cdc_type: Default::default(),
- },
- )
- })
- .collect(),
- };
-
- Ok(table_infos
- .iter()
- .map(|table_info| {
- let table_name = table_info.name.clone();
- let schema = schemas
- .get(&table_name)
- .cloned()
- .ok_or(AerospikeConnectorError::SchemaNotFound(table_name.clone()))?;
-
- let filtered_schema = if table_info.column_names.is_empty() {
- schema
- } else {
- let primary_key_field_names: Vec = schema
- .schema
- .primary_index
- .iter()
- .map(|idx| {
- schema
- .schema
- .fields
- .get(*idx)
- .map(|field| field.name.clone())
- .expect("Field should be present")
- })
- .collect();
-
- let filtered_fields: Vec = schema
- .schema
- .fields
- .into_iter()
- .filter(|field| table_info.column_names.contains(&field.name))
- .collect();
-
- let new_primary_index = filtered_fields
- .iter()
- .enumerate()
- .filter_map(|(i, field)| {
- if primary_key_field_names.contains(&field.name) {
- Some(i)
- } else {
- None
- }
- })
- .collect();
-
- SourceSchema {
- schema: Schema {
- fields: filtered_fields,
- primary_index: new_primary_index,
- },
- cdc_type: Default::default(),
- }
- };
-
- Ok(filtered_schema)
- })
- .collect())
- }
-
- async fn serialize_state(&self) -> Result, BoxedError> {
- Ok(vec![])
- }
-
- async fn start(
- &mut self,
- ingestor: &Ingestor,
- tables: Vec,
- last_checkpoint: Option,
- ) -> Result<(), BoxedError> {
- let hosts = CString::new(self.config.hosts.as_str())?;
- let client = Client::new(&hosts).map_err(Box::new)?;
-
- if last_checkpoint.is_none() {
- let dc_name = self.config.replication.datacenter.clone();
- let namespace = self.config.namespace.clone();
-
- // To read data snapshot we need to rewind xdr stream.
- // Before rewinding we need to remove xdr configuration and then add it again.
- unsafe {
- let request = CString::new(format!(
- "set-config:context=xdr;dc={dc_name};namespace={namespace};action=remove"
- ))?;
- let mut response: *mut i8 = std::ptr::null_mut();
- client.info(&request, &mut response).map_err(Box::new)?;
- }
-
- loop {
- if self.rewind(&client, &dc_name, &namespace).await? {
- info!("Aerospike replication configuration set successfully");
- break;
- } else {
- warn!("Aerospike replication configuration set failed");
- tokio::time::sleep(Duration::from_secs(3)).await;
- }
- }
- }
-
- let mapped_schema = self.get_schemas(&tables).await?;
- ingestor
- .handle_message(IngestionMessage::TransactionInfo(
- TransactionInfo::SnapshottingStarted,
- ))
- .await?;
- ingestor
- .handle_message(IngestionMessage::TransactionInfo(
- TransactionInfo::SnapshottingDone { id: None },
- ))
- .await?;
-
- let tables_index_map: HashMap = mapped_schema
- .into_iter()
- .enumerate()
- .map(|(table_index, schema)| {
- let columns_map: HashMap = schema
- .expect("Schema should be present")
- .schema
- .fields
- .iter()
- .enumerate()
- .map(|(i, field)| (field.name.clone(), (i, field.typ)))
- .collect();
-
- (
- tables[table_index].name.clone(),
- TableIndexMap {
- table_index,
- columns_map,
- },
- )
- })
- .collect();
-
- let (message_sender, message_receiver) = mpsc::unbounded_channel();
- let (operation_id_sender, operation_id_receiver) = mpsc::unbounded_channel();
- let ingestor = ingestor.clone();
- tokio::spawn(async move {
- ingestor_loop(message_receiver, ingestor, operation_id_sender).await
- });
- let node_handle = self.node_handle.clone();
- let event_receiver = self.event_receiver.resubscribe();
- tokio::spawn(async move {
- pipeline_event_processor(node_handle, operation_id_receiver, event_receiver).await
- });
- let server_state = ServerState {
- tables_index_map: tables_index_map.clone(),
- sender: message_sender,
- };
-
- let _server = self.start_server(server_state)?.await;
-
- Ok(())
- }
-}
-
-fn map_record(
- event: AerospikeEvent,
- tables_map: &HashMap,
-) -> Result, AerospikeConnectorError> {
- let key: [Option; 4] = match event.key.try_into() {
- Ok(key) => key,
- Err(key) => return Err(AerospikeConnectorError::InvalidKeyValue(key)),
- };
- let [key0, set_name, key2, pk_in_key] = key;
- let Some(set_name) = set_name else {
- return Err(AerospikeConnectorError::SetNameIsNone(
- key0, key2, pk_in_key,
- ));
- };
-
- let table_name = match set_name {
- serde_json::Value::String(s) => s.clone(),
- _ => {
- return Err(AerospikeConnectorError::SetNameIsNone(
- key0, key2, pk_in_key,
- ))
- }
- };
-
- let Some(TableIndexMap {
- columns_map,
- table_index,
- }) = tables_map.get(&table_name)
- else {
- return Ok(None);
- };
-
- let mut fields = vec![Field::Null; columns_map.len()];
- if let Some((pk, _)) = columns_map.get("PK") {
- if let Some(pk_in_key) = pk_in_key {
- match pk_in_key {
- serde_json::Value::String(s) => {
- fields[*pk] = Field::String(s.clone());
- }
- serde_json::Value::Number(n) => {
- fields[*pk] = Field::UInt(
- n.as_u64()
- .ok_or(AerospikeConnectorError::ParsingUIntFailed)?,
- );
- }
- v => return Err(AerospikeConnectorError::KeyNotSupported(v)),
- }
- } else {
- return Err(AerospikeConnectorError::PkIsNone(key0, table_name, key2));
- }
- }
-
- if let Some((index, _)) = columns_map.get("inserted_at") {
- // Create a NaiveDateTime from the timestamp
- let naive = NaiveDateTime::from_timestamp_millis(event.lut as i64)
- .ok_or(AerospikeConnectorError::InvalidTimestamp(event.lut as i64))?;
-
- // Create a normal DateTime from the NaiveDateTime
- let datetime: DateTime =
- DateTime::::from_naive_utc_and_offset(naive, Utc).fixed_offset();
-
- fields[*index] = Field::Timestamp(datetime);
- }
-
- for bin in event.bins {
- if let Some((i, typ)) = columns_map.get(bin.name.as_str()) {
- fields[*i] = match bin.value {
- Some(value) => map_value_to_field(bin.r#type.as_str(), value, *typ)?,
- None => Field::Null,
- };
- }
- }
-
- Ok(Some(IngestionMessage::OperationEvent {
- table_index: *table_index,
- op: Insert {
- new: dozer_types::types::Record::new(fields),
- },
- id: Some(OpIdentifier::new(event.lut, 0)),
- }))
-}
-
-pub(crate) fn map_value_to_field(
- bin_type: &str,
- mut value: Value,
- typ: FieldType,
-) -> Result {
- if value.is_null() {
- return Ok(Field::Null);
- }
- let unsupported_type = || AerospikeConnectorError::UnsupportedTypeForFieldType {
- bin_type: bin_type.to_owned(),
- field_type: typ,
- };
- let check_type = |wanted_typ| {
- if bin_type == wanted_typ {
- Ok(())
- } else {
- Err(unsupported_type())
- }
- };
- match typ {
- FieldType::UInt => {
- check_type("int")?;
- let number = value.as_number().ok_or_else(unsupported_type)?;
- Ok(Field::UInt(number.as_u64().ok_or_else(|| {
- AerospikeConnectorError::ParsingUIntFailed
- })?))
- }
- FieldType::Int => {
- check_type("int")?;
- let number = value.as_number().ok_or_else(unsupported_type)?;
- Ok(Field::Int(number.as_i64().ok_or_else(|| {
- AerospikeConnectorError::ParsingIntFailed
- })?))
- }
- FieldType::Int8 => {
- check_type("int8")?;
- let string = value.as_str().ok_or_else(unsupported_type)?;
- Ok(Field::Int8(string.parse()?))
- }
- FieldType::U128 => {
- check_type("str")?;
- let string = value.as_str().ok_or_else(unsupported_type)?;
- Ok(Field::U128(string.parse()?))
- }
- FieldType::I128 => {
- check_type("str")?;
- let string = value.as_str().ok_or_else(unsupported_type)?;
- Ok(Field::I128(string.parse()?))
- }
- FieldType::Float => {
- check_type("float")?;
- let number = value.as_number().ok_or_else(unsupported_type)?;
- Ok(Field::Float(
- number
- .as_f64()
- .ok_or(AerospikeConnectorError::ParsingFloatFailed)?
- .into(),
- ))
- }
- FieldType::Boolean => {
- check_type("bool")?;
- Ok(Field::Boolean(
- value.as_bool().ok_or_else(unsupported_type)?,
- ))
- }
- FieldType::String => {
- check_type("str")?;
- Ok(Field::String(
- value.as_str().ok_or_else(unsupported_type)?.to_owned(),
- ))
- }
- FieldType::Text => {
- check_type("str")?;
- Ok(Field::Text(
- value.as_str().ok_or_else(unsupported_type)?.to_owned(),
- ))
- }
- FieldType::Binary => {
- check_type("blob")?;
- if bin_type != "blob" {
- return Err(unsupported_type());
- }
- let string = value.as_str().ok_or_else(unsupported_type)?;
- Ok(Field::Binary(BASE64_STANDARD.decode(string)?))
- }
- FieldType::Decimal => {
- check_type("str")?;
- let string = value.as_str().ok_or_else(unsupported_type)?;
- Ok(Field::Decimal(string.parse()?))
- }
- FieldType::Timestamp => {
- check_type("str")?;
- let string = value.as_str().ok_or_else(unsupported_type)?;
- Ok(Field::Timestamp(DateTime::parse_from_rfc3339(string)?))
- }
- FieldType::Date => {
- check_type("str")?;
- let string = value.as_str().ok_or_else(unsupported_type)?;
- Ok(Field::Date(string.parse()?))
- }
- FieldType::Json => Ok(Field::Json(serde_json_to_json_value(value)?)),
- FieldType::Point => {
- check_type("geojson")?;
- let json = value.as_object_mut().ok_or_else(unsupported_type)?;
- if !json.get("type").is_some_and(|type_| type_ == "Point") {
- return Err(AerospikeConnectorError::ParsingPointFailed);
- }
- let Some(Value::Array(coords)) = json.remove("coordinates") else {
- return Err(AerospikeConnectorError::ParsingPointFailed);
- };
- let p: [Value; 2] = coords
- .try_into()
- .map_err(|_| AerospikeConnectorError::ParsingPointFailed)?;
- if let (Some(x), Some(y)) = (p[0].as_f64(), p[1].as_f64()) {
- Ok(Field::Point((x, y).into()))
- } else {
- Err(AerospikeConnectorError::ParsingPointFailed)
- }
- }
- FieldType::Duration => {
- check_type("str")?;
- let string = value.as_str().ok_or_else(unsupported_type)?;
- let duration = parse_duration(string)?;
- Ok(Field::Duration(dozer_types::types::DozerDuration(
- duration,
- dozer_types::types::TimeUnit::Nanoseconds,
- )))
- }
- }
-}
-
-fn parse_duration(string: &str) -> Result {
- let err = |_| AerospikeConnectorError::ParsingDurationFailed;
- if !string.get(0..2).is_some_and(|chars| chars == "PT") {
- return Err(AerospikeConnectorError::ParsingDurationFailed);
- }
- let string = &string[2..];
- let to_duration = |scale, number: &Decimal| -> Result {
- let as_secs: Decimal = number * Decimal::new(scale, 0);
- let secs = as_secs.try_into().map_err(err)?;
- let frac = as_secs.fract() * Decimal::new(1_000_000_000, 0);
- Ok(Duration::new(secs, frac.try_into().map_err(err)?))
- };
- let (hours, string) = parse_duration_part(string, 'H')?;
- let mut duration = to_duration(3600, &hours)?;
- if hours.is_integer() {
- let (mins, string) = parse_duration_part(string, 'M')?;
- duration += to_duration(60, &mins)?;
- if mins.is_integer() {
- let (secs, string) = parse_duration_part(string, 'S')?;
- duration += to_duration(1, &secs)?;
- if !string.is_empty() {
- return Err(AerospikeConnectorError::ParsingDurationFailed);
- }
- } else if !string.is_empty() {
- return Err(AerospikeConnectorError::ParsingDurationFailed);
- }
- } else if !string.is_empty() {
- return Err(AerospikeConnectorError::ParsingDurationFailed);
- }
- Ok(duration)
-}
-
-fn parse_duration_part(
- string: &str,
- delim: char,
-) -> Result<(Decimal, &str), AerospikeConnectorError> {
- let idx = string.find(delim);
- let value = idx
- .map_or(Ok(Decimal::ZERO), |idx| string[..idx].parse())
- .map_err(|_| AerospikeConnectorError::ParsingDurationFailed)?;
- if let Some(idx) = idx {
- Ok((value, &string[idx + 1..]))
- } else {
- Ok((value, string))
- }
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- #[test]
- fn test_parse_duration() {
- assert_eq!(parse_duration("PT3H").unwrap(), Duration::new(3600 * 3, 0));
- assert_eq!(parse_duration("PT3M").unwrap(), Duration::new(60 * 3, 0));
- assert_eq!(parse_duration("PT3S").unwrap(), Duration::new(3, 0));
-
- assert_eq!(
- parse_duration("PT3H3S").unwrap(),
- Duration::new(3600 * 3 + 3, 0)
- );
-
- assert_eq!(
- parse_duration("PT3.2H").unwrap(),
- Duration::new(3600 * 3 + 12 * 60, 0)
- );
-
- assert_eq!(
- parse_duration("PT3.2H").unwrap(),
- Duration::new(3600 * 3 + 12 * 60, 0)
- );
- assert!(parse_duration("PT3.2H2M").is_err());
- assert_eq!(
- parse_duration("PT0.000123S").unwrap(),
- Duration::new(0, 123_000)
- );
- }
-}
diff --git a/dozer-ingestion/aerospike/src/lib.rs b/dozer-ingestion/aerospike/src/lib.rs
deleted file mode 100644
index 4e1dde9fda..0000000000
--- a/dozer-ingestion/aerospike/src/lib.rs
+++ /dev/null
@@ -1,4 +0,0 @@
-pub mod connector;
-
-#[cfg(test)]
-mod tests;
diff --git a/dozer-ingestion/aerospike/src/tests.rs b/dozer-ingestion/aerospike/src/tests.rs
deleted file mode 100644
index 04b1717871..0000000000
--- a/dozer-ingestion/aerospike/src/tests.rs
+++ /dev/null
@@ -1,78 +0,0 @@
-use std::time::Duration;
-
-use crate::connector::map_value_to_field;
-use base64::prelude::BASE64_STANDARD;
-use base64::Engine;
-use dozer_ingestion_connector::dozer_types::ordered_float::OrderedFloat;
-use dozer_ingestion_connector::dozer_types::rust_decimal::Decimal;
-use dozer_ingestion_connector::dozer_types::serde_json::{json, Value};
-use dozer_ingestion_connector::dozer_types::types::{Field, FieldType};
-
-#[test]
-pub fn test_type_conversion() {
- assert_eq!(
- map_value_to_field("str", Value::Null, FieldType::UInt).unwrap(),
- Field::Null
- );
-
- assert_eq!(
- map_value_to_field("bool", Value::Bool(true), FieldType::Boolean).unwrap(),
- Field::Boolean(true)
- );
- assert!(map_value_to_field("str", Value::String("hello".into()), FieldType::Boolean).is_err());
-
- assert_eq!(
- map_value_to_field("int", json!(30), FieldType::UInt).unwrap(),
- (Field::UInt(30))
- );
- assert_eq!(
- map_value_to_field("int", json!(30), FieldType::Int).unwrap(),
- (Field::Int(30))
- );
- assert!(map_value_to_field("float", json!(30), FieldType::UInt).is_err());
-
- assert_eq!(
- map_value_to_field("float", json!(34.35), FieldType::Float).unwrap(),
- (Field::Float(OrderedFloat(34.35)))
- );
-
- assert_eq!(
- map_value_to_field("float", json!(30), FieldType::Float).unwrap(),
- (Field::Float(OrderedFloat(30.)))
- );
- assert!(map_value_to_field("int", json!(1), FieldType::Float).is_err());
-
- assert_eq!(
- map_value_to_field("str", json!("47"), FieldType::String).unwrap(),
- Field::String("47".to_string())
- );
- assert_eq!(
- map_value_to_field("str", json!("48"), FieldType::Text).unwrap(),
- Field::Text("48".to_string())
- );
-
- assert_eq!(
- map_value_to_field(
- "blob",
- json!(BASE64_STANDARD.encode(vec![52, 57])),
- FieldType::Binary
- )
- .unwrap(),
- Field::Binary(vec![52, 57])
- );
-
- assert_eq!(
- map_value_to_field("str", json!("30.42"), FieldType::Decimal).unwrap(),
- Field::Decimal(Decimal::new(3042, 2))
- );
-
- assert_eq!(
- map_value_to_field("str", json!("PT3.0012S"), FieldType::Duration).unwrap(),
- Field::Duration(
- dozer_ingestion_connector::dozer_types::types::DozerDuration(
- Duration::new(3, 1_200_000),
- dozer_ingestion_connector::dozer_types::types::TimeUnit::Nanoseconds
- )
- )
- );
-}
diff --git a/dozer-ingestion/oracle/Cargo.toml b/dozer-ingestion/oracle/Cargo.toml
deleted file mode 100644
index 2532eab1ad..0000000000
--- a/dozer-ingestion/oracle/Cargo.toml
+++ /dev/null
@@ -1,15 +0,0 @@
-[package]
-name = "dozer-ingestion-oracle"
-version = "0.1.0"
-edition = "2021"
-license = "AGPL-3.0-or-later"
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
-[dependencies]
-dozer-ingestion-connector = { path = "../connector" }
-oracle = { version = "0.5.7", features = ["chrono", "stmt_without_lifetime"] }
-regex = "1.10.3"
-
-[dev-dependencies]
-env_logger = "0.11.1"
diff --git a/dozer-ingestion/oracle/src/connector/join.rs b/dozer-ingestion/oracle/src/connector/join.rs
deleted file mode 100644
index 0073463449..0000000000
--- a/dozer-ingestion/oracle/src/connector/join.rs
+++ /dev/null
@@ -1,60 +0,0 @@
-use std::collections::{HashMap, HashSet};
-
-use super::listing::{Constraint, ConstraintColumn, TableColumn};
-
-#[derive(Debug)]
-pub struct Column {
- pub name: String,
- pub data_type: Option,
- pub nullable: Option,
- pub is_primary_key: bool,
- pub precision: Option,
- pub scale: Option,
-}
-
-pub fn join_columns_constraints(
- table_columns: Vec,
- constraint_columns: Vec,
- constraints: Vec,
-) -> HashMap<(String, String), Vec> {
- let constraints = constraints.into_iter().collect::>();
- let mut all_primary_key_columns = HashSet::<(String, String, String)>::new();
- for constraint_column in constraint_columns {
- let Some(column_name) = constraint_column.column_name else {
- continue;
- };
- let constraint = Constraint {
- owner: Some(constraint_column.owner.clone()),
- constraint_name: Some(constraint_column.constraint_name),
- };
- if constraints.contains(&constraint) {
- all_primary_key_columns.insert((
- constraint_column.owner,
- constraint_column.table_name,
- column_name,
- ));
- }
- }
-
- let mut table_to_columns = HashMap::<(String, String), Vec>::new();
- for table_column in table_columns {
- let column_triple = (
- table_column.owner,
- table_column.table_name,
- table_column.column_name,
- );
- let is_primary_key = all_primary_key_columns.contains(&column_triple);
- let column = Column {
- name: column_triple.2,
- data_type: table_column.data_type,
- nullable: table_column.nullable,
- is_primary_key,
- precision: table_column.precision,
- scale: table_column.scale,
- };
- let table_pair = (column_triple.0, column_triple.1);
- table_to_columns.entry(table_pair).or_default().push(column);
- }
-
- table_to_columns
-}
diff --git a/dozer-ingestion/oracle/src/connector/listing.rs b/dozer-ingestion/oracle/src/connector/listing.rs
deleted file mode 100644
index 33abd6f2bc..0000000000
--- a/dozer-ingestion/oracle/src/connector/listing.rs
+++ /dev/null
@@ -1,132 +0,0 @@
-use dozer_ingestion_connector::dozer_types::log::debug;
-use oracle::Connection;
-
-use super::Error;
-
-#[derive(Debug, Clone)]
-pub struct TableColumn {
- pub owner: String,
- pub table_name: String,
- pub column_name: String,
- pub data_type: Option,
- pub nullable: Option,
- pub precision: Option,
- pub scale: Option,
-}
-
-impl TableColumn {
- pub fn list(connection: &Connection, schemas: &[String]) -> Result, Error> {
- assert!(!schemas.is_empty());
- let sql = "
- SELECT OWNER, TABLE_NAME, COLUMN_NAME, DATA_TYPE, NULLABLE, DATA_PRECISION, DATA_SCALE
- FROM ALL_TAB_COLUMNS
- WHERE OWNER IN (SELECT COLUMN_VALUE FROM TABLE(:2))
- ";
- let schemas = super::string_collection(connection, schemas)?;
- debug!("{}, {}", sql, schemas);
- let rows = connection.query_as::<(
- String,
- String,
- String,
- Option,
- Option,
- Option,
- Option,
- )>(sql, &[&schemas])?;
-
- let mut columns = Vec::new();
- for row in rows {
- let (owner, table_name, column_name, data_type, nullable, precision, scale) = row?;
- let column = TableColumn {
- owner,
- table_name,
- column_name,
- data_type,
- nullable,
- precision,
- scale,
- };
- columns.push(column);
- }
- Ok(columns)
- }
-}
-
-#[derive(Debug, Clone)]
-pub struct ConstraintColumn {
- pub owner: String,
- pub constraint_name: String,
- pub table_name: String,
- pub column_name: Option,
-}
-
-impl ConstraintColumn {
- pub fn list(
- connection: &Connection,
- schemas: &[String],
- ) -> Result, Error> {
- assert!(!schemas.is_empty());
- let sql = "
- SELECT
- OWNER,
- CONSTRAINT_NAME,
- TABLE_NAME,
- COLUMN_NAME
- FROM ALL_CONS_COLUMNS
- WHERE OWNER IN (SELECT COLUMN_VALUE FROM TABLE(:2))
- ";
- let schemas = super::string_collection(connection, schemas)?;
- debug!("{}, {}", sql, schemas);
- let rows =
- connection.query_as::<(String, String, String, Option)>(sql, &[&schemas])?;
-
- let mut columns = Vec::new();
- for row in rows {
- let (owner, constraint_name, table_name, column_name) = row?;
- let column = ConstraintColumn {
- owner,
- constraint_name,
- table_name,
- column_name,
- };
- columns.push(column);
- }
- Ok(columns)
- }
-}
-
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct Constraint {
- pub owner: Option,
- pub constraint_name: Option,
-}
-
-impl Constraint {
- pub fn list(connection: &Connection, schemas: &[String]) -> Result, Error> {
- assert!(!schemas.is_empty());
- let sql = "
- SELECT
- OWNER,
- CONSTRAINT_NAME
- FROM ALL_CONSTRAINTS
- WHERE
- OWNER IN (SELECT COLUMN_VALUE FROM TABLE(:2))
- AND
- CONSTRAINT_TYPE = 'P'
- ";
- let schemas = super::string_collection(connection, schemas)?;
- debug!("{}, {}", sql, schemas);
- let rows = connection.query_as::<(Option, Option)>(sql, &[&schemas])?;
-
- let mut constraints = Vec::new();
- for row in rows {
- let (owner, constraint_name) = row?;
- let constraint = Constraint {
- owner,
- constraint_name,
- };
- constraints.push(constraint);
- }
- Ok(constraints)
- }
-}
diff --git a/dozer-ingestion/oracle/src/connector/mapping.rs b/dozer-ingestion/oracle/src/connector/mapping.rs
deleted file mode 100644
index e109cbbbfa..0000000000
--- a/dozer-ingestion/oracle/src/connector/mapping.rs
+++ /dev/null
@@ -1,242 +0,0 @@
-use std::{collections::HashMap, str::FromStr};
-
-use dozer_ingestion_connector::{
- dozer_types::{
- chrono::{DateTime, NaiveDate, Utc},
- log::warn,
- ordered_float::OrderedFloat,
- rust_decimal::Decimal,
- thiserror,
- types::{Field, FieldDefinition, FieldType, Record, Schema, SourceDefinition},
- },
- CdcType, SourceSchema,
-};
-use oracle::Row;
-
-use super::{join::Column, Error};
-
-#[derive(Debug, Clone, Copy)]
-pub struct MappedColumn {
- pub typ: FieldType,
- pub nullable: bool,
-}
-
-#[derive(Debug, Clone, thiserror::Error)]
-pub enum DataTypeError {
- #[error("unsupported data type: {0}")]
- UnsupportedDataType(String),
- #[error("column {schema}.{table_name}.{column_name} has null data type")]
- ColumnDataTypeIsNull {
- schema: String,
- table_name: String,
- column_name: String,
- },
-}
-
-fn map_data_type(
- schema: &str,
- table_name: &str,
- column_name: &str,
- data_type: Option<&str>,
- nullable: Option<&str>,
- precision: Option,
- scale: Option,
-) -> Result {
- let data_type = data_type.ok_or_else(|| DataTypeError::ColumnDataTypeIsNull {
- schema: schema.to_string(),
- table_name: table_name.to_string(),
- column_name: column_name.to_string(),
- })?;
- let typ = if data_type.starts_with("TIMESTAMP") {
- FieldType::Timestamp
- } else {
- match data_type {
- "VARCHAR2" => Ok(FieldType::String),
- "NVARCHAR2" => Ok(FieldType::String),
- "INTEGER" => Ok(FieldType::I128),
- "NUMBER" => match (precision, scale) {
- (Some(precision), Some(0)) if precision <= 19 => Ok(FieldType::Int),
- (_, Some(0)) => Ok(FieldType::I128),
- _ => Ok(FieldType::Decimal),
- },
- "FLOAT" => Ok(FieldType::Float),
- "DATE" => Ok(FieldType::Date),
- "BINARY_FLOAT" => Ok(FieldType::Float),
- "BINARY_DOUBLE" => Ok(FieldType::Float),
- "RAW" => Ok(FieldType::Binary),
- "ROWID" => Ok(FieldType::String),
- "CHAR" => Ok(FieldType::String),
- "NCHAR" => Ok(FieldType::String),
- "CLOB" => Ok(FieldType::String),
- "NCLOB" => Ok(FieldType::String),
- "BLOB" => Ok(FieldType::Binary),
- other => Err(DataTypeError::UnsupportedDataType(other.to_string())),
- }?
- };
- let nullable = nullable != Some("N");
- Ok(MappedColumn { typ, nullable })
-}
-
-pub fn map_row(schema: &Schema, row: Row) -> Result {
- if schema.fields.len() != row.sql_values().len() {
- return Err(Error::ColumnCountMismatch {
- expected: schema.fields.len(),
- actual: row.sql_values().len(),
- });
- }
-
- let values = schema
- .fields
- .iter()
- .enumerate()
- .map(|(index, field)| map_field(index, field, &row))
- .collect::, _>>()?;
- Ok(Record::new(values))
-}
-
-fn map_field(index: usize, field: &FieldDefinition, row: &Row) -> Result {
- Ok(match (field.typ, field.nullable) {
- (FieldType::Int, true) => row
- .get::<_, Option>(index)?
- .map_or(Field::Null, Field::Int),
- (FieldType::Int, false) => Field::Int(row.get(index)?),
- (FieldType::UInt, true) => row
- .get::<_, Option>(index)?
- .map_or(Field::Null, Field::UInt),
- (FieldType::UInt, false) => Field::UInt(row.get(index)?),
- (FieldType::Float, true) => row
- .get::<_, Option>(index)?
- .map_or(Field::Null, |value| Field::Float(OrderedFloat(value))),
- (FieldType::Float, false) => Field::Float(OrderedFloat(row.get(index)?)),
- (FieldType::Decimal, true) => match row.get::<_, Option>(index)? {
- Some(decimal) => Field::Decimal(Decimal::from_str(&decimal)?),
- None => Field::Null,
- },
- (FieldType::Decimal, false) => {
- Field::Decimal(Decimal::from_str(&row.get::<_, String>(index)?)?)
- }
- (FieldType::String, true) => row
- .get::<_, Option>(index)?
- .map_or(Field::Null, Field::String),
- (FieldType::String, false) => Field::String(row.get(index)?),
- (FieldType::Binary, true) => row
- .get::<_, Option>>(index)?
- .map_or(Field::Null, Field::Binary),
- (FieldType::Binary, false) => Field::Binary(row.get(index)?),
- (FieldType::Date, true) => row
- .get::<_, Option>(index)?
- .map_or(Field::Null, Field::Date),
- (FieldType::Date, false) => Field::Date(row.get(index)?),
- (FieldType::Timestamp, true) => row
- .get::<_, Option>>(index)?
- .map_or(Field::Null, |value| Field::Timestamp(value.fixed_offset())),
- (FieldType::Timestamp, false) => {
- Field::Timestamp(row.get::<_, DateTime>(index)?.fixed_offset())
- }
- _ => unreachable!(),
- })
-}
-
-#[derive(Debug, Clone)]
-pub struct MappedColumnResult {
- pub is_primary_key: bool,
- pub is_used: bool,
- pub map_result: Result,
-}
-
-pub type ColumnMap = HashMap;
-
-pub fn map_tables(
- tables: HashMap<(String, String), Vec>,
-) -> HashMap<(String, String), ColumnMap> {
- tables
- .into_iter()
- .map(|((schema, table_name), columns)| {
- let column_map = map_columns(&schema, &table_name, columns);
- ((schema, table_name), column_map)
- })
- .collect()
-}
-
-fn map_columns(schema: &str, table_name: &str, columns: Vec) -> ColumnMap {
- columns
- .into_iter()
- .map(|column| {
- let map_result = map_data_type(
- schema,
- table_name,
- &column.name,
- column.data_type.as_deref(),
- column.nullable.as_deref(),
- column.precision,
- column.scale,
- );
- (
- column.name,
- MappedColumnResult {
- is_primary_key: column.is_primary_key,
- is_used: false,
- map_result,
- },
- )
- })
- .collect()
-}
-
-pub fn decide_schema(
- connection: &str,
- schema: Option,
- table_name: String,
- column_names: &[String],
- mut columns: ColumnMap,
-) -> Result {
- let mut fields = vec![];
- let mut primary_index = vec![];
- for column_name in column_names {
- let Some(column) = columns.get_mut(column_name) else {
- return Err(Error::ColumnNotFound {
- schema,
- table_name,
- column_name: column_name.clone(),
- });
- };
-
- column.is_used = true;
- if column.is_primary_key {
- primary_index.push(fields.len());
- }
-
- match &column.map_result {
- Ok(column) => fields.push(FieldDefinition {
- name: column_name.clone(),
- typ: column.typ,
- nullable: column.nullable,
- source: SourceDefinition::Table {
- connection: connection.to_string(),
- name: table_name.clone(),
- },
- description: None,
- }),
- Err(err) => return Err(Error::DataType(err.clone())),
- }
- }
-
- if let Some((column_name, _)) = columns
- .iter()
- .find(|(_, column)| !column.is_used && column.is_primary_key)
- {
- warn!(
- "Primary key column {} of table {} in connection {} is not used. Dropping primary key.",
- column_name, table_name, connection
- );
- primary_index.clear();
- }
-
- Ok(SourceSchema {
- schema: Schema {
- fields,
- primary_index,
- },
- cdc_type: CdcType::OnlyPK, // Doesn't matter
- })
-}
diff --git a/dozer-ingestion/oracle/src/connector/mod.rs b/dozer-ingestion/oracle/src/connector/mod.rs
deleted file mode 100644
index 70ea0b617f..0000000000
--- a/dozer-ingestion/oracle/src/connector/mod.rs
+++ /dev/null
@@ -1,548 +0,0 @@
-use std::{
- collections::{HashMap, HashSet},
- num::ParseFloatError,
- sync::Arc,
- time::Duration,
-};
-
-use dozer_ingestion_connector::{
- dozer_types::{
- chrono,
- epoch::SourceTime,
- log::{debug, error},
- models::ingestion_types::{IngestionMessage, OracleReplicator, TransactionInfo},
- node::OpIdentifier,
- rust_decimal::{self, Decimal},
- thiserror,
- types::{FieldType, Operation, Schema},
- },
- Ingestor, SourceSchema, TableIdentifier, TableInfo,
-};
-use oracle::{
- sql_type::{Collection, ObjectType},
- Connection,
-};
-
-#[derive(Debug, Clone)]
-pub struct Connector {
- connection_name: String,
- connection: Arc,
- username: String,
- batch_size: usize,
- replicator: OracleReplicator,
-}
-
-#[derive(Debug, thiserror::Error)]
-pub enum Error {
- #[error("oracle error: {0:?}")]
- Oracle(#[from] oracle::Error),
- #[error("pdb not found: {0}")]
- PdbNotFound(String),
- #[error("table not found: {0:?}")]
- TableNotFound(TableIdentifier),
- #[error("data type: {0}")]
- DataType(#[from] mapping::DataTypeError),
- #[error("column {schema:?}.{table_name}.{column_name} not found")]
- ColumnNotFound {
- schema: Option,
- table_name: String,
- column_name: String,
- },
- #[error("column count mismatch: expected {expected}, actual {actual}")]
- ColumnCountMismatch { expected: usize, actual: usize },
- #[error("cannot convert Oracle number to decimal: {0}")]
- NumberToDecimal(#[from] rust_decimal::Error),
- #[error("insert failed to match: {0}")]
- InsertFailedToMatch(String),
- #[error("delete failed to match: {0}")]
- DeleteFailedToMatch(String),
- #[error("update failed to match: {0}")]
- UpdateFailedToMatch(String),
- #[error("field {0} not found")]
- FieldNotFound(String),
- #[error("null value for non-nullable field {0}")]
- NullValue(String),
- #[error("cannot parse float: {0}")]
- ParseFloat(#[from] ParseFloatError),
- #[error("cannot parse date time from {1}: {0}")]
- ParseDateTime(#[source] chrono::ParseError, String),
- #[error("got overflow float number {0}")]
- FloatOverflow(Decimal),
- #[error("got error when parsing uint {0}")]
- ParseUIntFailed(Decimal),
- #[error("got error when parsing int {0}")]
- ParseIntFailed(Decimal),
- #[error("type mismatch for {field}, expected {expected:?}, actual {actual:?}")]
- TypeMismatch {
- field: String,
- expected: FieldType,
- actual: FieldType,
- },
-}
-
-/// `oracle`'s `ToSql` implementation for `&str` uses `NVARCHAR2` type, which Oracle expects to be UTF16 encoded by default.
-/// Here we use `VARCHAR2` type instead, which Oracle expects to be UTF8 encoded by default.
-/// This is a macro because it references a temporary `OracleType`.
-macro_rules! str_to_sql {
- ($s:expr) => {
- // `s.len()` is the upper bound of `s.chars().count()`
- (
- &$s,
- &::oracle::sql_type::OracleType::Varchar2($s.len() as u32),
- )
- };
-}
-
-pub type Scn = u64;
-
-impl Connector {
- pub fn new(
- connection_name: String,
- username: String,
- password: &str,
- connect_string: &str,
- batch_size: usize,
- replicator: OracleReplicator,
- ) -> Result {
- let connection = Connection::connect(&username, password, connect_string)?;
-
- Ok(Self {
- connection_name,
- connection: Arc::new(connection),
- username,
- batch_size,
- replicator,
- })
- }
-
- pub fn get_con_id(&mut self, pdb: &str) -> Result {
- let sql = "SELECT CON_NAME_TO_ID(:1) FROM DUAL";
- let con_id = self
- .connection
- .query_row_as::>(sql, &[&str_to_sql!(pdb)])?
- .ok_or_else(|| Error::PdbNotFound(pdb.to_string()));
- self.connection.commit()?;
- con_id
- }
-
- pub fn list_tables(&mut self, schemas: &[String]) -> Result, Error> {
- let rows = if schemas.is_empty() {
- let sql = "SELECT OWNER, TABLE_NAME FROM ALL_TABLES";
- debug!("{}", sql);
- self.connection.query_as::<(String, String)>(sql, &[])?
- } else {
- let sql = "
- SELECT OWNER, TABLE_NAME
- FROM ALL_TABLES
- WHERE OWNER IN (SELECT COLUMN_VALUE FROM TABLE(:2))
- ";
- let owners = string_collection(&self.connection, schemas)?;
- debug!("{}, {}", sql, owners);
- self.connection
- .query_as::<(String, String)>(sql, &[&owners])?
- };
-
- let tables = rows
- .map(|row| {
- row.map(|(owner, table_name)| TableIdentifier {
- schema: Some(owner),
- name: table_name,
- })
- .map_err(Into::into)
- })
- .collect();
- self.connection.commit()?;
- tables
- }
-
- pub fn list_columns(&mut self, tables: Vec) -> Result, Error> {
- // List all tables and columns.
- let schemas = tables
- .iter()
- .map(|table| {
- table
- .schema
- .clone()
- .unwrap_or_else(|| self.username.clone())
- })
- .collect::>();
- let table_columns =
- listing::TableColumn::list(&self.connection, &schemas.into_iter().collect::>())?;
- let mut table_to_columns = HashMap::<(String, String), Vec>::new();
- for table_column in table_columns {
- let table_pair = (table_column.owner, table_column.table_name);
- table_to_columns
- .entry(table_pair)
- .or_default()
- .push(table_column.column_name);
- }
-
- // Collect columns for requested tables.
- let mut result = vec![];
- for table in tables {
- let schema = table
- .schema
- .clone()
- .unwrap_or_else(|| self.username.clone());
- let table_pair = (schema, table.name.clone());
- let column_names = table_to_columns
- .remove(&table_pair)
- .ok_or_else(|| Error::TableNotFound(table.clone()))?;
- result.push(TableInfo {
- schema: table.schema,
- name: table.name,
- column_names,
- });
- }
- self.connection.commit()?;
- Ok(result)
- }
-
- pub fn get_schemas(
- &mut self,
- table_infos: &[TableInfo],
- ) -> Result>, Error> {
- // Collect all tables and columns.
- let schemas = table_infos
- .iter()
- .map(|table| {
- table
- .schema
- .clone()
- .unwrap_or_else(|| self.username.clone())
- })
- .collect::>()
- .into_iter()
- .collect::>();
- let table_columns = listing::TableColumn::list(&self.connection, &schemas)?;
- let constraint_columns =
- listing::ConstraintColumn::list(&self.connection, &schemas).unwrap();
- let constraints = listing::Constraint::list(&self.connection, &schemas).unwrap();
- let table_columns =
- join::join_columns_constraints(table_columns, constraint_columns, constraints);
-
- // Map all the columns.
- let mut table_columns = mapping::map_tables(table_columns);
-
- // Decide `SourceSchemaResult` for each `table_info`
- let mut result = vec![];
- for table_info in table_infos {
- let schema = table_info
- .schema
- .clone()
- .unwrap_or_else(|| self.username.clone());
- let table_pair = (schema, table_info.name.clone());
- let columns = table_columns.remove(&table_pair).ok_or_else(|| {
- Error::TableNotFound(TableIdentifier {
- schema: table_info.schema.clone(),
- name: table_info.name.clone(),
- })
- })?;
- result.push(mapping::decide_schema(
- &self.connection_name,
- table_info.schema.clone(),
- table_pair.1,
- &table_info.column_names,
- columns,
- ));
- }
- self.connection.commit()?;
-
- Ok(result)
- }
-
- pub fn snapshot(&mut self, ingestor: &Ingestor, tables: Vec) -> Result {
- let schemas = self
- .get_schemas(&tables)?
- .into_iter()
- .collect::, _>>()?;
-
- let sql = "SET TRANSACTION ISOLATION LEVEL SERIALIZABLE";
- debug!("{}", sql);
- self.connection.execute(sql, &[])?;
-
- for (table_index, (table, schema)) in tables.into_iter().zip(schemas).enumerate() {
- let columns = table.column_names.join(", ");
- let owner = table.schema.unwrap_or_else(|| self.username.clone());
- let sql = format!("SELECT {} FROM {}.{}", columns, owner, table.name);
- debug!("{}", sql);
- let rows = self.connection.query(&sql, &[])?;
-
- let mut batch = Vec::with_capacity(self.batch_size);
- for row in rows {
- batch.push(mapping::map_row(&schema.schema, row?)?);
- if batch.len() >= self.batch_size
- && ingestor
- .blocking_handle_message(IngestionMessage::OperationEvent {
- table_index,
- op: Operation::BatchInsert {
- new: std::mem::take(&mut batch),
- },
- id: None,
- })
- .is_err()
- {
- return self.get_scn_and_commit();
- }
- }
-
- if !batch.is_empty()
- && ingestor
- .blocking_handle_message(IngestionMessage::OperationEvent {
- table_index,
- op: Operation::BatchInsert { new: batch },
- id: None,
- })
- .is_err()
- {
- return self.get_scn_and_commit();
- }
- }
-
- self.get_scn_and_commit()
- }
-
- fn get_scn_and_commit(&mut self) -> Result {
- let sql = "SELECT DBMS_FLASHBACK.GET_SYSTEM_CHANGE_NUMBER() FROM DUAL";
- let scn = self.connection.query_row_as::(sql, &[])?;
- self.connection.commit()?;
- Ok(scn)
- }
-
- pub fn replicate(
- &mut self,
- ingestor: &Ingestor,
- tables: Vec,
- schemas: Vec,
- checkpoint: Scn,
- con_id: Option,
- ) {
- match self.replicator {
- OracleReplicator::LogMiner {
- poll_interval_in_milliseconds,
- } => self.replicate_log_miner(
- ingestor,
- tables,
- schemas,
- checkpoint,
- con_id,
- Duration::from_millis(poll_interval_in_milliseconds),
- ),
- OracleReplicator::DozerLogReader => unimplemented!("dozer log reader"),
- }
- }
-
- fn replicate_log_miner(
- &mut self,
- ingestor: &Ingestor,
- tables: Vec,
- schemas: Vec,
- checkpoint: Scn,
- con_id: Option,
- poll_interval: Duration,
- ) {
- let start_scn = checkpoint + 1;
- let table_pair_to_index = tables
- .into_iter()
- .enumerate()
- .map(|(index, table)| {
- let schema = table.schema.unwrap_or_else(|| self.username.clone());
- ((schema, table.name), index)
- })
- .collect::>();
- let processor = replicate::Processor::new(start_scn, table_pair_to_index, schemas);
-
- let (sender, receiver) = std::sync::mpsc::sync_channel(100);
- let handle = {
- let connection = self.connection.clone();
- let ingestor = ingestor.clone();
- std::thread::spawn(move || {
- replicate::log_miner_loop(
- &connection,
- start_scn,
- con_id,
- poll_interval,
- sender,
- &ingestor,
- )
- })
- };
-
- for transaction in processor.process(receiver) {
- let transaction = match transaction {
- Ok(transaction) => transaction,
- Err(e) => {
- error!("Error during transaction processing: {e}");
- continue;
- }
- };
-
- for (seq, (table_index, op)) in transaction.operations.into_iter().enumerate() {
- if ingestor
- .blocking_handle_message(IngestionMessage::OperationEvent {
- table_index,
- op,
- id: Some(OpIdentifier::new(transaction.commit_scn, seq as u64)),
- })
- .is_err()
- {
- return;
- };
- }
-
- if ingestor
- .blocking_handle_message(IngestionMessage::TransactionInfo(
- TransactionInfo::Commit {
- id: Some(OpIdentifier::new(transaction.commit_scn, 0)),
- source_time: Some(SourceTime::from_chrono(
- &transaction.commit_timestamp,
- 1000,
- )),
- },
- ))
- .is_err()
- {
- return;
- }
- }
-
- handle.join().unwrap();
- }
-}
-
-mod join;
-mod listing;
-mod mapping;
-mod replicate;
-
-const TEMP_DOZER_TYPE_NAME: &str = "TEMP_DOZER_TYPE";
-
-fn temp_varray_of_vchar2(
- connection: &Connection,
- num_strings: usize,
- max_num_chars: usize,
-) -> Result {
- let sql = format!(
- "CREATE OR REPLACE TYPE {} AS VARRAY({}) OF VARCHAR2({})",
- TEMP_DOZER_TYPE_NAME, num_strings, max_num_chars
- );
- debug!("{}", sql);
- connection.execute(&sql, &[])?;
- connection
- .object_type(TEMP_DOZER_TYPE_NAME)
- .map_err(Into::into)
-}
-
-fn string_collection(connection: &Connection, strings: &[String]) -> Result {
- let temp_type = temp_varray_of_vchar2(
- connection,
- strings.len(),
- strings.iter().map(|s| s.len()).max().unwrap(),
- )?;
- let mut collection = temp_type.new_collection()?;
- for string in strings {
- collection.push(&str_to_sql!(*string))?;
- }
- Ok(collection)
-}
-
-mod tests {
- #[test]
- #[ignore]
- fn test_connector() {
- use dozer_ingestion_connector::{
- dozer_types::models::ingestion_types::OracleReplicator, IngestionConfig, Ingestor,
- };
- use dozer_ingestion_connector::{
- dozer_types::{models::ingestion_types::IngestionMessage, types::Operation},
- IngestionIterator,
- };
- use std::time::Instant;
-
- fn row_count(message: &IngestionMessage) -> usize {
- match message {
- IngestionMessage::OperationEvent { op, .. } => match op {
- Operation::BatchInsert { new } => new.len(),
- Operation::Insert { .. } => 1,
- Operation::Delete { .. } => 1,
- Operation::Update { .. } => 1,
- },
- _ => 0,
- }
- }
-
- fn estimate_throughput(iterator: IngestionIterator) {
- let mut tic = None;
- let mut count = 0;
- let print_count_interval = 10_000;
- let mut count_mod_interval = 0;
- for message in iterator {
- if tic.is_none() {
- tic = Some(Instant::now());
- }
-
- count += row_count(&message);
- let new_count_mod_interval = count / print_count_interval;
- if new_count_mod_interval > count_mod_interval {
- count_mod_interval = new_count_mod_interval;
- println!("{} rows in {:?}", count, tic.unwrap().elapsed());
- }
- }
- println!("{} rows in {:?}", count, tic.unwrap().elapsed());
- println!(
- "Throughput: {} rows/s",
- count as f64 / tic.unwrap().elapsed().as_secs_f64()
- );
- }
-
- env_logger::init();
-
- let replicate_user = "DOZER";
- let data_user = "DOZER";
- let host = "database-1.cxtwfj9nkwtu.ap-southeast-1.rds.amazonaws.com";
- let sid = "ORCL";
-
- let mut connector = super::Connector::new(
- "oracle".into(),
- replicate_user.into(),
- "123",
- &format!("{}:{}/{}", host, 1521, sid),
- 100_000,
- OracleReplicator::DozerLogReader,
- )
- .unwrap();
- let tables = connector.list_tables(&[data_user.into()]).unwrap();
- let tables = connector.list_columns(tables).unwrap();
- let schemas = connector.get_schemas(&tables).unwrap();
- let schemas = schemas.into_iter().map(Result::unwrap).collect::>();
- dbg!(&schemas);
- let (ingestor, iterator) = Ingestor::initialize_channel(IngestionConfig::default());
- let handle = {
- let tables = tables.clone();
- std::thread::spawn(move || connector.snapshot(&ingestor, tables))
- };
-
- estimate_throughput(iterator);
- let checkpoint = handle.join().unwrap().unwrap();
-
- let mut connector = super::Connector::new(
- "oracle".into(),
- replicate_user.into(),
- "123",
- &format!("{}:{}/{}", host, 1521, sid),
- 1,
- OracleReplicator::LogMiner {
- poll_interval_in_milliseconds: 1000,
- },
- )
- .unwrap();
- let (ingestor, iterator) = Ingestor::initialize_channel(IngestionConfig::default());
- let schemas = schemas.into_iter().map(|schema| schema.schema).collect();
- let handle = std::thread::spawn(move || {
- connector.replicate(&ingestor, tables, schemas, checkpoint, None)
- });
-
- estimate_throughput(iterator);
- handle.join().unwrap();
- }
-}
diff --git a/dozer-ingestion/oracle/src/connector/replicate/log/listing.rs b/dozer-ingestion/oracle/src/connector/replicate/log/listing.rs
deleted file mode 100644
index 533747130a..0000000000
--- a/dozer-ingestion/oracle/src/connector/replicate/log/listing.rs
+++ /dev/null
@@ -1,152 +0,0 @@
-use dozer_ingestion_connector::dozer_types::log::{debug, warn};
-use oracle::Connection;
-
-use crate::connector::{Error, Scn};
-
-#[derive(Debug, Clone)]
-pub struct ArchivedLog {
- pub name: String,
- pub sequence: u32,
- pub first_change: Scn,
- pub next_change: Scn,
-}
-
-impl ArchivedLog {
- pub fn list(connection: &Connection, start_scn: Scn) -> Result, Error> {
- let sql = "SELECT NAME, SEQUENCE#, FIRST_CHANGE#, NEXT_CHANGE# FROM V$ARCHIVED_LOG WHERE NEXT_CHANGE# > :start_scn AND STATUS = 'A' ORDER BY SEQUENCE# ASC";
- debug!("{}, {}", sql, start_scn);
- let rows = connection
- .query_as::<(String, u32, Scn, Scn)>(sql, &[&start_scn])
- .unwrap();
-
- let mut result = vec![];
- for row in rows {
- let (name, sequence, first_change, next_change) = row?;
- let log = ArchivedLog {
- name,
- sequence,
- first_change,
- next_change,
- };
- if is_continuous(result.last(), &log) {
- result.push(log);
- }
- }
-
- Ok(result)
- }
-}
-
-#[derive(Debug, Clone, Copy)]
-pub struct Log {
- pub group: u32,
- pub sequence: u32,
- pub first_change: Scn,
- pub next_change: Scn,
-}
-
-impl Log {
- pub fn list(connection: &Connection, start_scn: Scn) -> Result, Error> {
- let sql = "SELECT GROUP#, SEQUENCE#, FIRST_CHANGE#, NEXT_CHANGE# FROM V$LOG WHERE NEXT_CHANGE# > :start_scn ORDER BY SEQUENCE# ASC";
- debug!("{}, {}", sql, start_scn);
- let rows = connection
- .query_as::<(u32, u32, Scn, Scn)>(sql, &[&start_scn])
- .unwrap();
-
- let mut result = vec![];
- for row in rows {
- let (group, sequence, first_change, next_change) = row?;
- let log = Log {
- group,
- sequence,
- first_change,
- next_change,
- };
- if is_continuous(result.last(), &log) {
- result.push(log);
- }
- }
-
- Ok(result)
- }
-}
-
-#[derive(Debug, Clone)]
-pub struct LogFile {
- pub group: u32,
- pub member: String,
-}
-
-impl LogFile {
- pub fn list(connection: &Connection) -> Result, Error> {
- let sql = "SELECT GROUP#, MEMBER FROM V$LOGFILE WHERE STATUS IS NULL";
- debug!("{}", sql);
- let rows = connection.query_as::<(u32, String)>(sql, &[]).unwrap();
-
- let mut result = vec![];
- for row in rows {
- let (group, member) = row?;
- let log_file = LogFile { group, member };
- result.push(log_file);
- }
-
- Ok(result)
- }
-}
-
-pub trait HasLogIdentifier {
- fn sequence(&self) -> u32;
- fn first_change(&self) -> Scn;
- fn next_change(&self) -> Scn;
-}
-
-impl HasLogIdentifier for ArchivedLog {
- fn sequence(&self) -> u32 {
- self.sequence
- }
-
- fn first_change(&self) -> Scn {
- self.first_change
- }
-
- fn next_change(&self) -> Scn {
- self.next_change
- }
-}
-
-impl HasLogIdentifier for Log {
- fn sequence(&self) -> u32 {
- self.sequence
- }
-
- fn first_change(&self) -> Scn {
- self.first_change
- }
-
- fn next_change(&self) -> Scn {
- self.next_change
- }
-}
-
-pub fn is_continuous(
- last_log: Option<&impl HasLogIdentifier>,
- current_log: &impl HasLogIdentifier,
-) -> bool {
- let Some(last_log) = last_log else {
- return true;
- };
-
- let sequence_is_continuous = last_log.sequence() + 1 == current_log.sequence();
- let scn_is_continuous = last_log.next_change() == current_log.first_change();
-
- if sequence_is_continuous != scn_is_continuous {
- warn!(
- "Log {} has next change {}, but log {} has first change {}",
- last_log.sequence(),
- last_log.next_change(),
- current_log.sequence(),
- current_log.first_change()
- );
- }
- sequence_is_continuous && scn_is_continuous
-}
diff --git a/dozer-ingestion/oracle/src/connector/replicate/log/merge.rs b/dozer-ingestion/oracle/src/connector/replicate/log/merge.rs
deleted file mode 100644
index c51f092bfc..0000000000
--- a/dozer-ingestion/oracle/src/connector/replicate/log/merge.rs
+++ /dev/null
@@ -1,52 +0,0 @@
-use std::collections::HashMap;
-
-use oracle::Connection;
-
-use crate::connector::{Error, Scn};
-
-use super::listing::{is_continuous, ArchivedLog, Log, LogFile};
-
-pub fn list_and_join_online_log(
- connection: &Connection,
- start_scn: Scn,
-) -> Result, Error> {
- let logs = Log::list(connection, start_scn)?;
- let log_files = LogFile::list(connection)?;
- let mut log_files = log_files
- .into_iter()
- .map(|log_file| (log_file.group, log_file.member))
- .collect::>();
-
- let mut result = vec![];
- for log in logs {
- if let Some(name) = log_files.remove(&log.group) {
- let archived_log = ArchivedLog {
- name,
- sequence: log.sequence,
- first_change: log.first_change,
- next_change: log.next_change,
- };
- result.push(archived_log);
- } else {
- // We only want continuous logs
- break;
- }
- }
-
- Ok(result)
-}
-
-pub fn list_and_merge_archived_log(
- connection: &Connection,
- start_scn: Scn,
- mut online_logs: Vec,
-) -> Result, Error> {
- let mut archived_logs = ArchivedLog::list(connection, start_scn)?;
- let first_continuous_online_log_index = online_logs
- .iter()
- .position(|log| is_continuous(archived_logs.last(), log));
- if let Some(index) = first_continuous_online_log_index {
- archived_logs.extend(online_logs.drain(index..));
- }
- Ok(archived_logs)
-}
diff --git a/dozer-ingestion/oracle/src/connector/replicate/log/mod.rs b/dozer-ingestion/oracle/src/connector/replicate/log/mod.rs
deleted file mode 100644
index aa0a15a098..0000000000
--- a/dozer-ingestion/oracle/src/connector/replicate/log/mod.rs
+++ /dev/null
@@ -1,176 +0,0 @@
-use std::{sync::mpsc::SyncSender, time::Duration};
-
-use dozer_ingestion_connector::dozer_types::log::debug;
-use dozer_ingestion_connector::{
- dozer_types::{
- chrono::{DateTime, Utc},
- log::{error, info},
- },
- Ingestor,
-};
-use oracle::Connection;
-
-use crate::connector::{Error, Scn};
-
-mod listing;
-mod merge;
-mod redo;
-
-pub type TransactionId = [u8; 8];
-
-#[derive(Debug, Clone)]
-/// This is a raw row from V$LOGMNR_CONTENTS
-pub struct LogManagerContent {
- pub scn: Scn,
- pub timestamp: DateTime,
- pub xid: TransactionId,
- pub pxid: TransactionId,
- pub operation_code: u8,
- pub seg_owner: Option,
- pub table_name: Option,
- pub rbasqn: u32,
- pub rbablk: u32,
- pub rbabyte: u16,
- pub sql_redo: Option,
- pub csf: u8,
-}
-
-/// `ingestor` is only used for checking if ingestion has ended so we can break the loop.
-pub fn log_miner_loop(
- connection: &Connection,
- start_scn: Scn,
- con_id: Option,
- poll_interval: Duration,
- sender: SyncSender,
- ingestor: &Ingestor,
-) {
- log_reader_loop(
- connection,
- start_scn,
- con_id,
- poll_interval,
- redo::LogMiner,
- sender,
- ingestor,
- )
-}
-
-fn log_reader_loop(
- connection: &Connection,
- mut start_scn: Scn,
- con_id: Option,
- poll_interval: Duration,
- reader: impl redo::RedoReader,
- sender: SyncSender,
- ingestor: &Ingestor,
-) {
- #[derive(Debug, Clone, Copy)]
- struct LastRba {
- sqn: u32,
- blk: u32,
- byte: u16,
- }
- let mut last_rba: Option = None;
-
- loop {
- debug!(target: "oracle_replication", "Listing logs starting from SCN {}", start_scn);
- let mut logs = match list_logs(connection, start_scn) {
- Ok(logs) => logs,
- Err(e) => {
- if ingestor.is_closed() {
- return;
- }
- error!("Error listing logs: {}. Retrying.", e);
- continue;
- }
- };
-
- if logs.is_empty() {
- if ingestor.is_closed() {
- return;
- }
- info!("No logs found, retrying after {:?}", poll_interval);
- std::thread::sleep(poll_interval);
- continue;
- }
-
- 'replicate_logs: while !logs.is_empty() {
- let log = logs.remove(0);
- debug!(target: "oracle_replication",
- "Reading log {} ({}) ({}, {}), starting from {:?}",
- log.name, log.sequence, log.first_change, log.next_change, last_rba
- );
-
- let iterator = {
- let last_rba = last_rba.and_then(|last_rba| {
- if log.sequence == last_rba.sqn {
- Some((last_rba.blk, last_rba.byte))
- } else {
- None
- }
- });
- match reader.read(connection, &log.name, last_rba, con_id) {
- Ok(iterator) => iterator,
- Err(e) => {
- if ingestor.is_closed() {
- return;
- }
- error!("Error reading log {}: {}. Retrying.", log.name, e);
- break 'replicate_logs;
- }
- }
- };
-
- for content in iterator {
- let content = match content {
- Ok(content) => content,
- Err(e) => {
- if ingestor.is_closed() {
- return;
- }
- error!("Error reading log {}: {}. Retrying.", log.name, e);
- break 'replicate_logs;
- }
- };
- last_rba = Some(LastRba {
- sqn: content.rbasqn,
- blk: content.rbablk,
- byte: content.rbabyte,
- });
- if sender.send(content).is_err() {
- return;
- }
- }
-
- if logs.is_empty() {
- if ingestor.is_closed() {
- return;
- }
- debug!(target: "oracle_replication", "Read all logs, retrying after {:?}", poll_interval);
- std::thread::sleep(poll_interval);
- } else {
- // If there are more logs, we need to start from the next log's first change.
- start_scn = log.next_change;
- }
- }
- }
-}
-
-fn list_logs(connection: &Connection, start_scn: Scn) -> Result, Error> {
- let logs = merge::list_and_join_online_log(connection, start_scn)?;
- if !log_contains_scn(logs.first(), start_scn) {
- info!(
- "Online log is empty or doesn't contain start scn {}, listing and merging archived logs",
- start_scn
- );
- merge::list_and_merge_archived_log(connection, start_scn, logs)
- } else {
- Ok(logs)
- }
-}
-
-fn log_contains_scn(log: Option<&listing::ArchivedLog>, scn: Scn) -> bool {
- log.map_or(false, |log| {
- log.first_change <= scn && log.next_change > scn
- })
-}
diff --git a/dozer-ingestion/oracle/src/connector/replicate/log/redo/log_miner.rs b/dozer-ingestion/oracle/src/connector/replicate/log/redo/log_miner.rs
deleted file mode 100644
index 675db2ba58..0000000000
--- a/dozer-ingestion/oracle/src/connector/replicate/log/redo/log_miner.rs
+++ /dev/null
@@ -1,148 +0,0 @@
-use dozer_ingestion_connector::dozer_types::{
- chrono::{DateTime, Utc},
- log::{error, trace},
-};
-use oracle::{Connection, ResultSet, RowValue};
-
-use crate::connector::{Error, Scn};
-
-use super::{LogManagerContent, RedoReader};
-
-#[derive(Debug, Clone, Copy)]
-pub struct LogMiner;
-
-#[derive(Debug)]
-pub struct LogMinerIter<'a> {
- result_set: ResultSet<'a, LogManagerContent>,
- connection: &'a Connection,
-}
-
-impl<'a> Drop for LogMinerIter<'a> {
- fn drop(&mut self) {
- let sql = "BEGIN DBMS_LOGMNR.END_LOGMNR; END;";
- trace!("{}", sql);
- if let Err(e) = self.connection.execute(sql, &[]) {
- error!("Failed to end log miner: {}", e);
- }
- }
-}
-
-impl<'a> Iterator for LogMinerIter<'a> {
- type Item = Result;
-
- fn next(&mut self) -> Option {
- self.result_set.next().map(|row| row.map_err(Into::into))
- }
-}
-
-impl RedoReader for LogMiner {
- type Iterator<'a> = LogMinerIter<'a>;
-
- fn read<'a>(
- &self,
- connection: &'a Connection,
- log_file_name: &str,
- last_rba: Option<(u32, u16)>,
- con_id: Option,
- ) -> Result, Error> {
- let sql =
- "BEGIN DBMS_LOGMNR.ADD_LOGFILE(LOGFILENAME => :name, OPTIONS => DBMS_LOGMNR.NEW); END;";
- trace!("{}, {}", sql, log_file_name);
- connection.execute(sql, &[&str_to_sql!(log_file_name)])?;
-
- let sql = "
- BEGIN
- DBMS_LOGMNR.START_LOGMNR(
- OPTIONS =>
- DBMS_LOGMNR.DICT_FROM_ONLINE_CATALOG +
- DBMS_LOGMNR.PRINT_PRETTY_SQL +
- DBMS_LOGMNR.NO_ROWID_IN_STMT
- );
- END;";
- trace!("{}", sql);
- connection.execute(sql, &[])?;
-
- let base_sql = "SELECT SCN, TIMESTAMP, XID, PXID, OPERATION_CODE, SEG_OWNER, TABLE_NAME, RBASQN, RBABLK, RBABYTE, SQL_REDO, CSF FROM V$LOGMNR_CONTENTS";
- let rba_filter = "(RBABLK > :last_blk OR (RBABLK = :last_blk AND RBABYTE > :last_byte))";
- let con_id_filter = "SRC_CON_ID = :con_id";
- let result_set = match (last_rba, con_id) {
- (Some((last_blk, last_byte)), Some(con_id)) => {
- let sql = format!("{} WHERE {} AND {}", base_sql, rba_filter, con_id_filter);
- trace!("{}, {}, {}, {}", sql, last_blk, last_byte, con_id);
- connection.query_as_named(
- &sql,
- &[
- ("last_blk", &last_blk),
- ("last_byte", &last_byte),
- ("con_id", &con_id),
- ],
- )
- }
- (Some((last_blk, last_byte)), None) => {
- let sql = format!("{} WHERE {}", base_sql, rba_filter);
- trace!("{}, {}, {}", sql, last_blk, last_byte);
- connection
- .query_as_named(&sql, &[("last_blk", &last_blk), ("last_byte", &last_byte)])
- }
- (None, Some(con_id)) => {
- let sql = format!("{} WHERE {}", base_sql, con_id_filter);
- trace!("{}, {}", sql, con_id);
- connection.query_as_named(&sql, &[("con_id", &con_id)])
- }
- (None, None) => {
- trace!("{}", base_sql);
- connection.query_as(base_sql, &[])
- }
- }?;
- Ok(LogMinerIter {
- result_set,
- connection,
- })
- }
-}
-
-impl RowValue for LogManagerContent {
- fn get(row: &oracle::Row) -> oracle::Result {
- let (
- scn,
- timestamp,
- xid,
- pxid,
- operation_code,
- seg_owner,
- table_name,
- rbasqn,
- rbablk,
- rbabyte,
- sql_redo,
- csf,
- ) = <(
- Scn,
- DateTime,
- Vec,
- Vec,
- u8,
- Option,
- Option,
- u32,
- u32,
- u16,
- Option,
- u8,
- ) as RowValue>::get(row)?;
- Ok(LogManagerContent {
- scn,
- timestamp,
- xid: xid.try_into().expect("xid must be 8 bytes"),
- pxid: pxid.try_into().expect("pxid must be 8 bytes"),
- operation_code,
- seg_owner,
- table_name,
- rbasqn,
- rbablk,
- rbabyte,
- sql_redo,
- csf,
- })
- }
-}
diff --git a/dozer-ingestion/oracle/src/connector/replicate/log/redo/mod.rs b/dozer-ingestion/oracle/src/connector/replicate/log/redo/mod.rs
deleted file mode 100644
index 7d011dfdab..0000000000
--- a/dozer-ingestion/oracle/src/connector/replicate/log/redo/mod.rs
+++ /dev/null
@@ -1,26 +0,0 @@
-use oracle::Connection;
-
-use crate::connector::Error;
-
-/// Given a log file name, a redo reader emits `LogManagerContent` rows
-pub trait RedoReader {
- type Iterator<'a>: Iterator- >;
-
- /// Reads the `LogManagerContent` rows that have:
- ///
- /// - scn >= start_scn
- /// - rba > last_rba.0 || (rba == last_rba.0 && rbabyte > last_rba.1)
- fn read<'a>(
- &self,
- connection: &'a Connection,
- log_file_name: &str,
- last_rba: Option<(u32, u16)>,
- con_id: Option
,
- ) -> Result, Error>;
-}
-
-mod log_miner;
-
-pub use log_miner::LogMiner;
-
-use super::LogManagerContent;
diff --git a/dozer-ingestion/oracle/src/connector/replicate/mod.rs b/dozer-ingestion/oracle/src/connector/replicate/mod.rs
deleted file mode 100644
index 284bfee75b..0000000000
--- a/dozer-ingestion/oracle/src/connector/replicate/mod.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-mod log;
-mod transaction;
-
-pub use log::log_miner_loop;
-pub use transaction::Processor;
diff --git a/dozer-ingestion/oracle/src/connector/replicate/transaction/aggregate/commit.rs b/dozer-ingestion/oracle/src/connector/replicate/transaction/aggregate/commit.rs
deleted file mode 100644
index ea06f2c3db..0000000000
--- a/dozer-ingestion/oracle/src/connector/replicate/transaction/aggregate/commit.rs
+++ /dev/null
@@ -1,39 +0,0 @@
-use dozer_ingestion_connector::dozer_types::{
- chrono::{DateTime, Utc},
- log::warn,
-};
-
-use crate::connector::{replicate::log::TransactionId, Scn};
-
-use super::{Transaction, TransactionForest};
-
-pub fn commit(
- xid: TransactionId,
- pxid: TransactionId,
- scn: Scn,
- timestamp: DateTime,
- transaction_forest: &mut TransactionForest,
-) -> Option {
- let mut operations = vec![];
- transaction_forest.remove_subtree(xid, |_, ops| operations.extend(ops));
-
- if xid == pxid {
- // This is a top level transaction
- Some(Transaction {
- commit_scn: scn,
- commit_timestamp: timestamp,
- operations,
- })
- } else {
- // This is a sub transaction.
- let Some(parent_operations) = transaction_forest.get_mut(&pxid) else {
- warn!(
- "Parent transaction {:02X?} not found for sub transaction {:02X?}",
- pxid, xid
- );
- return None;
- };
- parent_operations.extend(operations);
- None
- }
-}
diff --git a/dozer-ingestion/oracle/src/connector/replicate/transaction/aggregate/forest.rs b/dozer-ingestion/oracle/src/connector/replicate/transaction/aggregate/forest.rs
deleted file mode 100644
index 97de993ed9..0000000000
--- a/dozer-ingestion/oracle/src/connector/replicate/transaction/aggregate/forest.rs
+++ /dev/null
@@ -1,111 +0,0 @@
-use std::{
- collections::{hash_map::Entry, HashMap},
- hash::Hash,
-};
-
-#[derive(Debug, Clone)]
-struct Node {
- data: T,
- parent: Option,
- children: Vec,
-}
-
-impl Default for Node {
- fn default() -> Self {
- Self {
- data: T::default(),
- parent: None,
- children: vec![],
- }
- }
-}
-
-#[derive(Debug, Clone, Default)]
-pub struct Forest {
- nodes: HashMap>,
-}
-
-impl Forest {
- pub fn remove_subtree(&mut self, id: Id, mut f: impl FnMut(Id, T)) -> bool {
- let Some(node) = self.nodes.remove(&id) else {
- return false;
- };
- if let Some(parent) = node.parent.as_ref() {
- self.nodes
- .get_mut(parent)
- .unwrap()
- .children
- .retain(|child| child != &id);
- }
- let mut stack = vec![(id, node)];
- while let Some((id, node)) = stack.pop() {
- f(id, node.data);
- for child in node.children {
- let node = self.nodes.remove(&child).unwrap();
- stack.push((child, node));
- }
- }
- true
- }
-
- pub fn get_mut(&mut self, id: &Id) -> Option<&mut T> {
- self.nodes.get_mut(id).map(|node| &mut node.data)
- }
-}
-
-impl Forest {
- pub fn insert_or_get_root(&mut self, id: Id) -> &mut T {
- &mut self.nodes.entry(id).or_default().data
- }
-}
-
-impl Forest {
- pub fn insert_or_get_child(&mut self, parent: Id, child: Id) -> Option<&mut T> {
- if !self.nodes.contains_key(&parent) {
- return None;
- }
-
- let is_new_child = if let Entry::Vacant(entry) = self.nodes.entry(child.clone()) {
- entry.insert(Node {
- data: T::default(),
- parent: Some(parent.clone()),
- children: vec![],
- });
- true
- } else {
- false
- };
-
- if is_new_child {
- self.nodes
- .get_mut(&parent)
- .unwrap()
- .children
- .push(child.clone());
- }
-
- Some(&mut self.nodes.get_mut(&child).unwrap().data)
- }
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- #[test]
- fn test_transaction_forest() {
- let mut forest = Forest::>::default();
- let node1 = forest.insert_or_get_root(1);
- assert_eq!(node1, &vec![]);
- node1.push(());
- assert_eq!(forest.insert_or_get_root(2), &vec![]);
- assert_eq!(forest.insert_or_get_child(0, 3), None);
- let node3 = forest.insert_or_get_child(1, 3).unwrap();
- assert_eq!(node3, &vec![]);
- node3.extend([(), ()]);
- let mut collected = vec![];
- forest.remove_subtree(1, |_, data| collected.extend(data));
- assert_eq!(collected.len(), 3);
- assert_eq!(forest.insert_or_get_root(1), &vec![]);
- }
-}
diff --git a/dozer-ingestion/oracle/src/connector/replicate/transaction/aggregate/mod.rs b/dozer-ingestion/oracle/src/connector/replicate/transaction/aggregate/mod.rs
deleted file mode 100644
index bf54d3e1ef..0000000000
--- a/dozer-ingestion/oracle/src/connector/replicate/transaction/aggregate/mod.rs
+++ /dev/null
@@ -1,144 +0,0 @@
-use dozer_ingestion_connector::dozer_types::{
- chrono::{DateTime, Utc},
- log::{trace, warn},
-};
-
-use crate::connector::{
- replicate::log::{LogManagerContent, TransactionId},
- Scn,
-};
-
-#[derive(Debug, Clone)]
-pub struct Transaction {
- pub commit_scn: Scn,
- pub commit_timestamp: DateTime,
- pub operations: Vec,
-}
-
-#[derive(Debug, Clone)]
-pub struct Operation {
- pub seg_owner: String,
- pub table_name: String,
- pub kind: OperationKind,
- pub sql_redo: String,
-}
-
-#[derive(Debug, Clone, Copy)]
-pub enum OperationKind {
- Insert,
- Delete,
- Update,
-}
-
-#[derive(Debug, Clone)]
-pub struct Aggregator {
- start_scn: Scn,
-}
-
-impl Aggregator {
- pub fn new(start_scn: Scn) -> Self {
- Self { start_scn }
- }
-
- pub fn process(
- &self,
- iterator: impl Iterator- ,
- ) -> impl Iterator
- {
- Processor {
- iterator,
- start_scn: self.start_scn,
- transaction_forest: Default::default(),
- }
- }
-}
-
-type TransactionForest = forest::Forest
>;
-
-#[derive(Debug)]
-struct Processor> {
- iterator: I,
- start_scn: Scn,
- transaction_forest: TransactionForest,
-}
-
-impl> Iterator for Processor {
- type Item = Transaction;
-
- fn next(&mut self) -> Option {
- loop {
- let content = self.iterator.next()?;
-
- if content.operation_code == OP_CODE_COMMIT {
- if let Some(transaction) = commit::commit(
- content.xid,
- content.pxid,
- content.scn,
- content.timestamp,
- &mut self.transaction_forest,
- ) {
- if transaction.commit_scn >= self.start_scn {
- return Some(transaction);
- }
- }
- continue;
- }
-
- if content.operation_code == OP_CODE_ROLLBACK {
- self.transaction_forest
- .remove_subtree(content.xid, |_, _| ());
- continue;
- }
-
- let Some(seg_owner) = content.seg_owner else {
- continue;
- };
- let Some(table_name) = content.table_name else {
- continue;
- };
- let (kind, sql_redo) = match content.operation_code {
- OP_CODE_INSERT => (
- OperationKind::Insert,
- content.sql_redo.expect("insert must have redo"),
- ),
- OP_CODE_DELETE => (
- OperationKind::Delete,
- content.sql_redo.expect("delete must have redo"),
- ),
- OP_CODE_UPDATE => (
- OperationKind::Update,
- content.sql_redo.expect("update must have redo"),
- ),
- OP_CODE_DDL => {
- warn!("Ignoring DDL operation: {:?}", content.sql_redo);
- continue;
- }
- _ => {
- trace!("Ignoring operation: {:?}", content.sql_redo);
- continue;
- }
- };
- op::process_operation(
- content.xid,
- content.pxid,
- Operation {
- seg_owner,
- table_name,
- kind,
- sql_redo,
- },
- &mut self.transaction_forest,
- );
- }
- }
-}
-
-mod commit;
-mod forest;
-mod op;
-
-const OP_CODE_INSERT: u8 = 1;
-const OP_CODE_DELETE: u8 = 2;
-const OP_CODE_UPDATE: u8 = 3;
-const OP_CODE_DDL: u8 = 5;
-const OP_CODE_COMMIT: u8 = 7;
-const OP_CODE_ROLLBACK: u8 = 36;
diff --git a/dozer-ingestion/oracle/src/connector/replicate/transaction/aggregate/op.rs b/dozer-ingestion/oracle/src/connector/replicate/transaction/aggregate/op.rs
deleted file mode 100644
index ad393b4729..0000000000
--- a/dozer-ingestion/oracle/src/connector/replicate/transaction/aggregate/op.rs
+++ /dev/null
@@ -1,27 +0,0 @@
-use dozer_ingestion_connector::dozer_types::log::warn;
-
-use crate::connector::replicate::log::TransactionId;
-
-use super::{Operation, TransactionForest};
-
-pub fn process_operation(
- xid: TransactionId,
- pxid: TransactionId,
- operation: Operation,
- transaction_forest: &mut TransactionForest,
-) {
- if xid == pxid {
- // This is a top level transaction
- transaction_forest.insert_or_get_root(xid).push(operation);
- } else {
- // This is a sub transaction.
- let Some(operations) = transaction_forest.insert_or_get_child(pxid, xid) else {
- warn!(
- "Parent transaction {:02X?} not found for sub transaction {:02X?}",
- pxid, xid
- );
- return;
- };
- operations.push(operation);
- }
-}
diff --git a/dozer-ingestion/oracle/src/connector/replicate/transaction/csf.rs b/dozer-ingestion/oracle/src/connector/replicate/transaction/csf.rs
deleted file mode 100644
index 3fd5dec366..0000000000
--- a/dozer-ingestion/oracle/src/connector/replicate/transaction/csf.rs
+++ /dev/null
@@ -1,48 +0,0 @@
-//! Handles the Continuation SQL flag in V$LOGMNR_CONTENTS.
-
-use crate::connector::replicate::log::LogManagerContent;
-
-/// Output items is guaranteed to have CSF = 0.
-pub fn process(
- iterator: impl Iterator- ,
-) -> impl Iterator
- {
- Processor {
- iterator,
- pending: None,
- }
-}
-
-struct Processor
> {
- iterator: I,
- pending: Option,
-}
-
-impl> Iterator for Processor {
- type Item = LogManagerContent;
-
- fn next(&mut self) -> Option {
- loop {
- let content = self.iterator.next()?;
-
- if let Some(mut previous_content) = self.pending.take() {
- previous_content.sql_redo = match (previous_content.sql_redo, content.sql_redo) {
- (Some(mut previous), Some(current)) => {
- previous.push_str(¤t);
- Some(previous)
- }
- (previous, current) => previous.or(current),
- };
- if content.csf == 0 {
- previous_content.csf = 0;
- return Some(previous_content);
- } else {
- self.pending = Some(previous_content);
- }
- } else if content.csf == 0 {
- return Some(content);
- } else {
- self.pending = Some(content);
- }
- }
- }
-}
diff --git a/dozer-ingestion/oracle/src/connector/replicate/transaction/map.rs b/dozer-ingestion/oracle/src/connector/replicate/transaction/map.rs
deleted file mode 100644
index 122ce3b5f9..0000000000
--- a/dozer-ingestion/oracle/src/connector/replicate/transaction/map.rs
+++ /dev/null
@@ -1,161 +0,0 @@
-use dozer_ingestion_connector::dozer_types::{
- chrono::{DateTime, FixedOffset, NaiveDate, NaiveDateTime, ParseError, Utc},
- ordered_float::OrderedFloat,
- rust_decimal::prelude::ToPrimitive,
- types::{Field, FieldType, Operation, Record, Schema},
-};
-
-use crate::connector::Error;
-
-use super::{
- parse::{ParsedOperation, ParsedOperationKind, ParsedRow, ParsedTransaction, ParsedValue},
- Transaction,
-};
-
-#[derive(Debug, Clone)]
-pub struct Mapper {
- schemas: Vec,
-}
-
-impl Mapper {
- pub fn new(schemas: Vec) -> Self {
- Self { schemas }
- }
-
- pub fn process<'a>(
- &'a self,
- iterator: impl Iterator- > + 'a,
- ) -> impl Iterator
- > + 'a {
- Processor {
- iterator,
- mapper: self,
- }
- }
-
- fn map(&self, operation: ParsedOperation) -> Result<(usize, Operation), Error> {
- let schema = &self.schemas[operation.table_index];
- Ok((
- operation.table_index,
- match operation.kind {
- ParsedOperationKind::Insert(row) => Operation::Insert {
- new: map_row(row, schema)?,
- },
- ParsedOperationKind::Delete(row) => Operation::Delete {
- old: map_row(row, schema)?,
- },
- ParsedOperationKind::Update { old, new } => Operation::Update {
- old: map_row(old, schema)?,
- new: map_row(new, schema)?,
- },
- },
- ))
- }
-}
-
-#[derive(Debug)]
-struct Processor<'a, I: Iterator
- >> {
- iterator: I,
- mapper: &'a Mapper,
-}
-
-impl<'a, I: Iterator
- >> Iterator for Processor<'a, I> {
- type Item = Result
;
-
- fn next(&mut self) -> Option {
- let transaction = match self.iterator.next()? {
- Ok(transaction) => transaction,
- Err(err) => return Some(Err(err)),
- };
-
- let mut operations = vec![];
- for operation in transaction.operations {
- match self.mapper.map(operation) {
- Ok(operation) => operations.push(operation),
- Err(err) => return Some(Err(err)),
- }
- }
-
- Some(Ok(Transaction {
- commit_scn: transaction.commit_scn,
- commit_timestamp: transaction.commit_timestamp,
- operations,
- }))
- }
-}
-
-fn map_row(mut row: ParsedRow, schema: &Schema) -> Result {
- let mut values = vec![];
- for field in &schema.fields {
- let value = row
- .remove(&field.name)
- .ok_or_else(|| Error::FieldNotFound(field.name.clone()))?;
- values.push(map_value(value, field.typ, field.nullable, &field.name)?);
- }
-
- Ok(Record::new(values))
-}
-
-fn map_value(
- value: ParsedValue,
- typ: FieldType,
- nullable: bool,
- name: &str,
-) -> Result {
- match (value, typ, nullable) {
- (ParsedValue::Null, _, false) => Err(Error::NullValue(name.to_string())),
- (ParsedValue::Null, _, true) => Ok(Field::Null),
- (ParsedValue::String(string), FieldType::Float, _) => {
- Ok(Field::Float(OrderedFloat(string.parse()?)))
- }
- (ParsedValue::Number(number), FieldType::Float, _) => Ok(Field::Float(OrderedFloat(
- number
- .to_f64()
- .ok_or_else(|| Error::FloatOverflow(number))?,
- ))),
- (ParsedValue::String(string), FieldType::Decimal, _) => Ok(Field::Decimal(string.parse()?)),
- (ParsedValue::Number(number), FieldType::Decimal, _) => Ok(Field::Decimal(number)),
- (ParsedValue::Number(number), FieldType::Int, _) => Ok(Field::Int(
- number
- .to_i64()
- .ok_or_else(|| Error::ParseIntFailed(number))?,
- )),
- (ParsedValue::Number(number), FieldType::UInt, _) => Ok(Field::UInt(
- number
- .to_u64()
- .ok_or_else(|| Error::ParseUIntFailed(number))?,
- )),
- (ParsedValue::String(string), FieldType::String, _) => Ok(Field::String(string)),
- (ParsedValue::Number(_), FieldType::String, _) => Err(Error::TypeMismatch {
- field: name.to_string(),
- expected: FieldType::String,
- actual: FieldType::Decimal,
- }),
- (_, FieldType::Binary, _) => unimplemented!("parse binary from redo sql"),
- (ParsedValue::String(string), FieldType::Date, _) => Ok(Field::Date(
- parse_date(&string).map_err(|e| Error::ParseDateTime(e, string))?,
- )),
- (ParsedValue::Number(_), FieldType::Date, _) => Err(Error::TypeMismatch {
- field: name.to_string(),
- expected: FieldType::Date,
- actual: FieldType::Decimal,
- }),
- (ParsedValue::String(string), FieldType::Timestamp, _) => Ok(Field::Timestamp(
- parse_date_time(&string).map_err(|e| Error::ParseDateTime(e, string))?,
- )),
- (ParsedValue::Number(_), FieldType::Timestamp, _) => Err(Error::TypeMismatch {
- field: name.to_string(),
- expected: FieldType::Timestamp,
- actual: FieldType::Decimal,
- }),
- _ => unreachable!(),
- }
-}
-
-fn parse_date(string: &str) -> Result {
- NaiveDate::parse_from_str(string, "%d-%b-%y")
-}
-
-fn parse_date_time(string: &str) -> Result, ParseError> {
- let date_time = NaiveDateTime::parse_from_str(string, "%d-%b-%y %I.%M.%S%.6f %p")?;
- Ok(Ok(DateTime::::from_naive_utc_and_offset(date_time, Utc))?.fixed_offset())
-}
diff --git a/dozer-ingestion/oracle/src/connector/replicate/transaction/mod.rs b/dozer-ingestion/oracle/src/connector/replicate/transaction/mod.rs
deleted file mode 100644
index d12afc6c81..0000000000
--- a/dozer-ingestion/oracle/src/connector/replicate/transaction/mod.rs
+++ /dev/null
@@ -1,53 +0,0 @@
-use std::collections::HashMap;
-
-use dozer_ingestion_connector::dozer_types::{
- chrono::{DateTime, Utc},
- types::{Operation, Schema},
-};
-
-use crate::connector::{Error, Scn};
-
-use super::log::LogManagerContent;
-
-#[derive(Debug, Clone)]
-pub struct Transaction {
- pub commit_scn: Scn,
- pub commit_timestamp: DateTime,
- pub operations: Vec<(usize, Operation)>,
-}
-
-#[derive(Debug, Clone)]
-pub struct Processor {
- aggregator: aggregate::Aggregator,
- parser: parse::Parser,
- mapper: map::Mapper,
-}
-
-impl Processor {
- pub fn new(
- start_scn: Scn,
- table_pair_to_index: HashMap<(String, String), usize>,
- schemas: Vec,
- ) -> Self {
- Self {
- aggregator: aggregate::Aggregator::new(start_scn),
- parser: parse::Parser::new(table_pair_to_index),
- mapper: map::Mapper::new(schemas),
- }
- }
-
- pub fn process<'a>(
- &'a self,
- iterator: impl IntoIterator- + 'a,
- ) -> impl Iterator
- > + 'a {
- let csf = csf::process(iterator.into_iter());
- let transaction = self.aggregator.process(csf);
- let parse = self.parser.process(transaction);
- self.mapper.process(parse)
- }
-}
-
-mod aggregate;
-mod csf;
-mod map;
-mod parse;
diff --git a/dozer-ingestion/oracle/src/connector/replicate/transaction/parse/delete.rs b/dozer-ingestion/oracle/src/connector/replicate/transaction/parse/delete.rs
deleted file mode 100644
index 8a90dc8f8f..0000000000
--- a/dozer-ingestion/oracle/src/connector/replicate/transaction/parse/delete.rs
+++ /dev/null
@@ -1,62 +0,0 @@
-use dozer_ingestion_connector::dozer_types::log::warn;
-use regex::Regex;
-
-use crate::connector::Error;
-
-use super::{row, ParsedRow};
-
-#[derive(Debug, Clone)]
-pub struct Parser {
- regex: Regex,
- row_parser: row::Parser,
-}
-
-impl Parser {
- pub fn new() -> Self {
- let regex =
- Regex::new(r#"^delete from "((?:C##)?\w+)"\."(\w+)"\n *where\n(?s)(.+)$"#).unwrap();
- Self {
- regex,
- row_parser: row::Parser::new(" and", ";"),
- }
- }
-
- pub fn parse(&self, sql_redo: &str, table_pair: &(String, String)) -> Result
{
- let captures = self
- .regex
- .captures(sql_redo)
- .ok_or_else(|| Error::DeleteFailedToMatch(sql_redo.to_string()))?;
- let owner = captures.get(1).unwrap().as_str();
- let table_name = captures.get(2).unwrap().as_str();
- if owner != table_pair.0 || table_name != table_pair.1 {
- warn!(
- "Table name {}.{} doesn't match {}.{} in log content",
- owner, table_name, table_pair.0, table_pair.1
- );
- }
-
- self.row_parser.parse(captures.get(3).unwrap().as_str())
- }
-}
-
-#[test]
-fn test_parse() {
- let parser = Parser::new();
- let sql_redo = r#"delete from "HR"."EMPLOYEES"
- where
- "EMPLOYEE_ID" = 306 and
- "FIRST_NAME" = 'Nandini' and
- "LAST_NAME" = 'Shastry' and
- "EMAIL" = 'NSHASTRY' and
- "PHONE_NUMBER" = '1234567890' and
- "JOB_ID" = 'HR_REP' and
- "SALARY" = 120000 and
- "COMMISSION_PCT" = .05 and
- "MANAGER_ID" = 105 and
- "DEPARTMENT_ID" = 10;
- "#;
- let parsed = parser
- .parse(sql_redo, &("HR".to_string(), "EMPLOYEES".to_string()))
- .unwrap();
- assert_eq!(parsed.len(), 10);
-}
diff --git a/dozer-ingestion/oracle/src/connector/replicate/transaction/parse/insert.rs b/dozer-ingestion/oracle/src/connector/replicate/transaction/parse/insert.rs
deleted file mode 100644
index a5ab56daf2..0000000000
--- a/dozer-ingestion/oracle/src/connector/replicate/transaction/parse/insert.rs
+++ /dev/null
@@ -1,63 +0,0 @@
-use dozer_ingestion_connector::dozer_types::log::warn;
-use regex::Regex;
-
-use crate::connector::Error;
-
-use super::{row, ParsedRow};
-
-#[derive(Debug, Clone)]
-pub struct Parser {
- regex: Regex,
- row_parser: row::Parser,
-}
-
-impl Parser {
- pub fn new() -> Self {
- let regex =
- Regex::new(r#"^insert into "((?:C##)?\w+)"\."(\w+)"\n *values\n(?s)(.+)$"#).unwrap();
- Self {
- regex,
- row_parser: row::Parser::new(",", ";"),
- }
- }
-
- pub fn parse(&self, sql_redo: &str, table_pair: &(String, String)) -> Result {
- let captures = self
- .regex
- .captures(sql_redo)
- .ok_or_else(|| Error::InsertFailedToMatch(sql_redo.to_string()))?;
- let owner = captures.get(1).unwrap().as_str();
- let table_name = captures.get(2).unwrap().as_str();
- if owner != table_pair.0 || table_name != table_pair.1 {
- warn!(
- "Table name {}.{} doesn't match {}.{} in log content",
- owner, table_name, table_pair.0, table_pair.1
- );
- }
-
- self.row_parser.parse(captures.get(3).unwrap().as_str())
- }
-}
-
-#[test]
-fn test_parse() {
- let parser = Parser::new();
- let sql_redo = r#"insert into "HR"."EMPLOYEES"
- values
- "EMPLOYEE_ID" = 306,
- "FIRST_NAME" = 'Nandini',
- "LAST_NAME" = 'Shastry',
- "EMAIL" = 'NSHASTRY',
- "PHONE_NUMBER" = '1234567890',
- "JOB_ID" = 'HR_REP',
- "SALARY" = 120000,
- "COMMISSION_PCT" = .05,
- "MANAGER_ID" = 105,
- "NULL_FIELD" IS NULL,
- "DEPARTMENT_ID" = 10;
- "#;
- let parsed = parser
- .parse(sql_redo, &("HR".to_string(), "EMPLOYEES".to_string()))
- .unwrap();
- assert_eq!(parsed.len(), 11);
-}
diff --git a/dozer-ingestion/oracle/src/connector/replicate/transaction/parse/mod.rs b/dozer-ingestion/oracle/src/connector/replicate/transaction/parse/mod.rs
deleted file mode 100644
index 1bbb371ab8..0000000000
--- a/dozer-ingestion/oracle/src/connector/replicate/transaction/parse/mod.rs
+++ /dev/null
@@ -1,141 +0,0 @@
-use std::{collections::HashMap, str::FromStr};
-
-use dozer_ingestion_connector::dozer_types::{
- chrono::{DateTime, Utc},
- log::trace,
- rust_decimal::Decimal,
-};
-
-use crate::connector::{Error, Scn};
-
-use super::aggregate::{Operation, OperationKind, Transaction};
-
-#[derive(Debug, Clone)]
-pub struct ParsedTransaction {
- pub commit_scn: Scn,
- pub commit_timestamp: DateTime,
- pub operations: Vec,
-}
-
-#[derive(Debug, Clone)]
-pub struct ParsedOperation {
- pub table_index: usize,
- pub kind: ParsedOperationKind,
-}
-
-pub type ParsedRow = HashMap;
-
-#[derive(Debug, Clone)]
-pub enum ParsedOperationKind {
- Insert(ParsedRow),
- Delete(ParsedRow),
- Update { old: ParsedRow, new: ParsedRow },
-}
-
-#[derive(Debug, Clone, PartialEq)]
-pub enum ParsedValue {
- String(String),
- Number(Decimal),
- Null,
-}
-
-#[derive(Debug, Clone)]
-pub struct Parser {
- insert_parser: insert::Parser,
- delete_parser: delete::Parser,
- update_parser: update::Parser,
- table_pair_to_index: HashMap<(String, String), usize>,
-}
-
-impl Parser {
- pub fn new(table_pair_to_index: HashMap<(String, String), usize>) -> Self {
- Self {
- insert_parser: insert::Parser::new(),
- delete_parser: delete::Parser::new(),
- update_parser: update::Parser::new(),
- table_pair_to_index,
- }
- }
-
- pub fn process<'a>(
- &'a self,
- iterator: impl Iterator- + 'a,
- ) -> impl Iterator
- > + 'a {
- Processor {
- iterator,
- parser: self,
- }
- }
-
- fn parse(&self, operation: Operation) -> Result
, Error> {
- let table_pair = (operation.seg_owner, operation.table_name);
- let Some(&table_index) = self.table_pair_to_index.get(&table_pair) else {
- trace!(
- "Ignoring operation on table {}.{}",
- table_pair.0,
- table_pair.1
- );
- return Ok(None);
- };
-
- let kind = match operation.kind {
- OperationKind::Insert => ParsedOperationKind::Insert(
- self.insert_parser.parse(&operation.sql_redo, &table_pair)?,
- ),
- OperationKind::Delete => ParsedOperationKind::Delete(
- self.delete_parser.parse(&operation.sql_redo, &table_pair)?,
- ),
- OperationKind::Update => {
- let (old, new) = self.update_parser.parse(&operation.sql_redo, &table_pair)?;
- ParsedOperationKind::Update { old, new }
- }
- };
- Ok(Some(ParsedOperation { table_index, kind }))
- }
-}
-
-#[derive(Debug)]
-struct Processor<'a, I: Iterator- > {
- iterator: I,
- parser: &'a Parser,
-}
-
-impl<'a, I: Iterator
- > Iterator for Processor<'a, I> {
- type Item = Result
;
-
- fn next(&mut self) -> Option {
- let transaction = self.iterator.next()?;
-
- let mut operations = vec![];
- for operation in transaction.operations {
- match self.parser.parse(operation) {
- Ok(Some(operation)) => operations.push(operation),
- Ok(None) => continue,
- Err(err) => return Some(Err(err)),
- }
- }
-
- Some(Ok(ParsedTransaction {
- commit_scn: transaction.commit_scn,
- commit_timestamp: transaction.commit_timestamp,
- operations,
- }))
- }
-}
-
-impl FromStr for ParsedValue {
- type Err = Error;
-
- fn from_str(s: &str) -> Result {
- if s.starts_with('\'') {
- Ok(ParsedValue::String(s[1..s.len() - 1].to_string()))
- } else {
- Ok(ParsedValue::Number(s.parse()?))
- }
- }
-}
-
-mod delete;
-mod insert;
-mod row;
-mod update;
diff --git a/dozer-ingestion/oracle/src/connector/replicate/transaction/parse/row.rs b/dozer-ingestion/oracle/src/connector/replicate/transaction/parse/row.rs
deleted file mode 100644
index fcafe6a5ed..0000000000
--- a/dozer-ingestion/oracle/src/connector/replicate/transaction/parse/row.rs
+++ /dev/null
@@ -1,36 +0,0 @@
-use std::collections::HashMap;
-
-use regex::Regex;
-
-use crate::connector::Error;
-
-use super::{ParsedRow, ParsedValue};
-
-#[derive(Debug, Clone)]
-pub struct Parser {
- regex: Regex,
-}
-
-impl Parser {
- pub fn new(delimiter: &str, end: &str) -> Self {
- let regex = Regex::new(&format!(
- "\"(\\w+)\" (= (.+)|IS NULL)({} *\\n|{})",
- delimiter, end
- ))
- .unwrap();
- Self { regex }
- }
-
- pub fn parse(&self, values: &str) -> Result {
- let mut result = HashMap::new();
- for cap in self.regex.captures_iter(values) {
- let column = cap.get(1).unwrap().as_str();
- let value = match cap.get(3) {
- Some(value) => value.as_str().parse()?,
- None => ParsedValue::Null,
- };
- result.insert(column.to_string(), value);
- }
- Ok(result)
- }
-}
diff --git a/dozer-ingestion/oracle/src/connector/replicate/transaction/parse/update.rs b/dozer-ingestion/oracle/src/connector/replicate/transaction/parse/update.rs
deleted file mode 100644
index e963938010..0000000000
--- a/dozer-ingestion/oracle/src/connector/replicate/transaction/parse/update.rs
+++ /dev/null
@@ -1,100 +0,0 @@
-use dozer_ingestion_connector::dozer_types::log::warn;
-use regex::Regex;
-
-use crate::connector::Error;
-
-use super::{row, ParsedRow};
-
-#[derive(Debug, Clone)]
-pub struct Parser {
- regex: Regex,
- new_row_parser: row::Parser,
- old_row_parser: row::Parser,
-}
-
-impl Parser {
- pub fn new() -> Self {
- let regex = Regex::new(
- r#"^update "((?:C##)?\w+)"\."(\w+)"\n *set *\n *(?s)(.+) *where *\n(?s)(.+)$"#,
- )
- .unwrap();
- Self {
- regex,
- new_row_parser: row::Parser::new(",", "\n"),
- old_row_parser: row::Parser::new(" and", ";"),
- }
- }
-
- pub fn parse(
- &self,
- sql_redo: &str,
- table_pair: &(String, String),
- ) -> Result<(ParsedRow, ParsedRow), Error> {
- let captures = self
- .regex
- .captures(sql_redo)
- .ok_or_else(|| Error::UpdateFailedToMatch(sql_redo.to_string()))?;
- let owner = captures.get(1).unwrap().as_str();
- let table_name = captures.get(2).unwrap().as_str();
- if owner != table_pair.0 || table_name != table_pair.1 {
- warn!(
- "Table name {}.{} doesn't match {}.{} in log content",
- owner, table_name, table_pair.0, table_pair.1
- );
- }
-
- let mut new_row = self
- .new_row_parser
- .parse(captures.get(3).unwrap().as_str())?;
- let old_row = self
- .old_row_parser
- .parse(captures.get(4).unwrap().as_str())?;
- for (column, old_value) in old_row.iter() {
- if !new_row.contains_key(column) {
- new_row.insert(column.clone(), old_value.clone());
- }
- }
- Ok((old_row, new_row))
- }
-}
-
-#[test]
-fn test_parse() {
- use super::ParsedValue;
-
- let parser = Parser::new();
- let sql_redo = r#"update "DOZER"."TRANSACTIONS"
- set
- "TYPE" = 'REBATE'
- where
- "TRANSACTION_ID" = 12001 and
- "CUSTOMER_ID" = 63147 and
- "TYPE" = 'Withdrawal' and
- "AMOUNT" = 9691.34 and
- "CURRENCY" = 'USD' and
- "TRANSACTION_DATE" = '28-JAN-24' and
- "STATUS" = 'Completed' and
- "DESCRIPTION" = 'Yeah become language inside purpose.';
- "#;
- let (old, new) = parser
- .parse(sql_redo, &("HR".to_string(), "EMPLOYEES".to_string()))
- .unwrap();
- assert_eq!(old.len(), 8);
- assert_eq!(new.len(), 8);
- assert_eq!(
- old.get("TRANSACTION_ID").unwrap(),
- &ParsedValue::Number("12001".parse().unwrap())
- );
- assert_eq!(
- new.get("TRANSACTION_ID").unwrap(),
- &ParsedValue::Number("12001".parse().unwrap())
- );
- assert_eq!(
- old.get("TYPE").unwrap(),
- &ParsedValue::String("Withdrawal".to_string())
- );
- assert_eq!(
- new.get("TYPE").unwrap(),
- &ParsedValue::String("REBATE".to_string())
- );
-}
diff --git a/dozer-ingestion/oracle/src/lib.rs b/dozer-ingestion/oracle/src/lib.rs
deleted file mode 100644
index cf398cb097..0000000000
--- a/dozer-ingestion/oracle/src/lib.rs
+++ /dev/null
@@ -1,215 +0,0 @@
-use dozer_ingestion_connector::{
- async_trait,
- dozer_types::{
- errors::internal::BoxedError,
- log::info,
- models::ingestion_types::{IngestionMessage, OracleConfig, TransactionInfo},
- node::OpIdentifier,
- types::FieldType,
- },
- tokio, Connector, Ingestor, SourceSchemaResult, TableIdentifier, TableInfo,
-};
-
-#[derive(Debug)]
-pub struct OracleConnector {
- connection_name: String,
- config: OracleConfig,
- connectors: Option,
-}
-
-#[derive(Debug, Clone)]
-struct Connectors {
- root_connector: connector::Connector,
- pdb_connector: connector::Connector,
- con_id: Option,
-}
-
-const DEFAULT_BATCH_SIZE: usize = 100_000;
-
-impl OracleConnector {
- pub fn new(connection_name: String, config: OracleConfig) -> Self {
- Self {
- connection_name,
- config,
- connectors: None,
- }
- }
-
- async fn ensure_connection(
- &mut self,
- force_reconnect: bool,
- ) -> Result {
- if self.connectors.is_none() || force_reconnect {
- let connection_name = self.connection_name.clone();
- let config = self.config.clone();
- let pdb = self.config.pdb.clone();
- self.connectors = Some(
- tokio::task::spawn_blocking(move || {
- let root_connect_string =
- format!("{}:{}/{}", config.host, config.port, config.sid);
- let batch_size = config.batch_size.unwrap_or(DEFAULT_BATCH_SIZE);
- let mut root_connector = connector::Connector::new(
- connection_name.clone(),
- config.user.clone(),
- &config.password,
- &root_connect_string,
- batch_size,
- config.replicator,
- )?;
-
- let (pdb_connector, con_id) = if let Some(pdb) = pdb {
- let pdb_connect_string = format!("{}:{}/{}", config.host, config.port, pdb);
- let pdb_connector = connector::Connector::new(
- connection_name,
- config.user.clone(),
- &config.password,
- &pdb_connect_string,
- batch_size,
- config.replicator,
- )?;
- let con_id = root_connector.get_con_id(&pdb)?;
- (pdb_connector, Some(con_id))
- } else {
- (root_connector.clone(), None)
- };
-
- Ok::<_, connector::Error>(Connectors {
- root_connector,
- pdb_connector,
- con_id,
- })
- })
- .await
- .unwrap()?,
- );
- }
- Ok(self.connectors.as_ref().unwrap().clone())
- }
-}
-
-#[async_trait]
-impl Connector for OracleConnector {
- fn types_mapping() -> Vec<(String, Option)>
- where
- Self: Sized,
- {
- todo!()
- }
-
- async fn validate_connection(&mut self) -> Result<(), BoxedError> {
- self.ensure_connection(false).await?;
- Ok(())
- }
-
- async fn list_tables(&mut self) -> Result, BoxedError> {
- let mut connectors = self.ensure_connection(false).await?;
- let schemas = self.config.schemas.clone();
- tokio::task::spawn_blocking(move || connectors.pdb_connector.list_tables(&schemas))
- .await
- .unwrap()
- .map_err(Into::into)
- }
-
- async fn validate_tables(&mut self, tables: &[TableIdentifier]) -> Result<(), BoxedError> {
- self.list_columns(tables.to_vec()).await?;
- Ok(())
- }
-
- async fn list_columns(
- &mut self,
- tables: Vec,
- ) -> Result, BoxedError> {
- let mut connectors = self.ensure_connection(false).await?;
- tokio::task::spawn_blocking(move || connectors.pdb_connector.list_columns(tables))
- .await
- .unwrap()
- .map_err(Into::into)
- }
-
- async fn get_schemas(
- &mut self,
- table_infos: &[TableInfo],
- ) -> Result, BoxedError> {
- let mut connectors = self.ensure_connection(false).await?;
- let table_infos = table_infos.to_vec();
- Ok(
- tokio::task::spawn_blocking(move || connectors.pdb_connector.get_schemas(&table_infos))
- .await
- .unwrap()?
- .into_iter()
- .map(|result| result.map_err(Into::into))
- .collect(),
- )
- }
-
- async fn serialize_state(&self) -> Result, BoxedError> {
- Ok(vec![])
- }
-
- async fn start(
- &mut self,
- ingestor: &Ingestor,
- tables: Vec,
- last_checkpoint: Option,
- ) -> Result<(), BoxedError> {
- let checkpoint = if let Some(last_checkpoint) = last_checkpoint {
- last_checkpoint.txid
- } else {
- info!("No checkpoint passed, starting snapshotting");
-
- let ingestor_clone = ingestor.clone();
- let tables = tables.clone();
- let mut connectors = self.ensure_connection(false).await?;
-
- if ingestor
- .handle_message(IngestionMessage::TransactionInfo(
- TransactionInfo::SnapshottingStarted,
- ))
- .await
- .is_err()
- {
- return Ok(());
- }
- let scn = tokio::task::spawn_blocking(move || {
- connectors.pdb_connector.snapshot(&ingestor_clone, tables)
- })
- .await
- .unwrap()?;
- ingestor
- .handle_message(IngestionMessage::TransactionInfo(
- TransactionInfo::SnapshottingDone {
- id: Some(OpIdentifier {
- txid: scn,
- seq_in_tx: 0,
- }),
- },
- ))
- .await?;
- scn
- };
-
- info!("Replicating from checkpoint: {}", checkpoint);
- let ingestor = ingestor.clone();
- let schemas = self.get_schemas(&tables).await?;
- let schemas = schemas
- .into_iter()
- .map(|schema| schema.map(|schema| schema.schema))
- .collect::, _>>()?;
- let mut connectors = self.ensure_connection(false).await?;
- tokio::task::spawn_blocking(move || {
- connectors.root_connector.replicate(
- &ingestor,
- tables,
- schemas,
- checkpoint,
- connectors.con_id,
- )
- })
- .await
- .unwrap();
-
- Ok(())
- }
-}
-
-mod connector;
diff --git a/dozer-ingestion/src/errors.rs b/dozer-ingestion/src/errors.rs
index 5f6646c4e2..8668bc6e20 100644
--- a/dozer-ingestion/src/errors.rs
+++ b/dozer-ingestion/src/errors.rs
@@ -33,6 +33,9 @@ pub enum ConnectorError {
#[error("javascript feature is not enabled")]
JavascrtiptFeatureNotEnabled,
+ #[error("{0}: This feature is only avaialble in enteprise. Please contact us.")]
+ FeatureNotEnabled(String),
+
#[error("{0} is not supported as a source connector")]
Unsupported(String),
}
diff --git a/dozer-ingestion/src/lib.rs b/dozer-ingestion/src/lib.rs
index 72090e531a..934429f0d3 100644
--- a/dozer-ingestion/src/lib.rs
+++ b/dozer-ingestion/src/lib.rs
@@ -1,6 +1,3 @@
-use std::sync::Arc;
-
-use dozer_ingestion_aerospike::connector::AerospikeConnector;
#[cfg(feature = "ethereum")]
use dozer_ingestion_connector::dozer_types::models::ingestion_types::EthProviderConfig;
use dozer_ingestion_connector::dozer_types::{
@@ -10,7 +7,6 @@ use dozer_ingestion_connector::dozer_types::{
connection::{Connection, ConnectionConfig},
ingestion_types::default_grpc_adapter,
},
- node::NodeHandle,
prettytable::Table,
};
#[cfg(feature = "datafusion")]
@@ -27,7 +23,6 @@ use dozer_ingestion_mongodb::MongodbConnector;
use dozer_ingestion_mysql::connector::{mysql_connection_opts_from_url, MySQLConnector};
#[cfg(feature = "datafusion")]
use dozer_ingestion_object_store::connector::ObjectStoreConnector;
-use dozer_ingestion_oracle::OracleConnector;
use dozer_ingestion_postgres::{
connection::helper::map_connection_config,
connector::{PostgresConfig, PostgresConnector},
@@ -36,6 +31,7 @@ use dozer_ingestion_postgres::{
use dozer_ingestion_snowflake::connector::SnowflakeConnector;
use dozer_ingestion_webhook::connector::WebhookConnector;
use errors::ConnectorError;
+use std::sync::Arc;
use tokio::runtime::Runtime;
pub mod errors;
@@ -157,15 +153,10 @@ pub fn get_connector(
runtime,
javascript_config,
))),
- ConnectionConfig::Aerospike(config) => Ok(Box::new(AerospikeConnector::new(
- config,
- NodeHandle::new(None, connection.name),
- event_hub.receiver,
- ))),
- ConnectionConfig::Oracle(oracle_config) => Ok(Box::new(OracleConnector::new(
- connection.name,
- oracle_config,
- ))),
+ ConnectionConfig::Aerospike(_) => {
+ Err(ConnectorError::FeatureNotEnabled("Aerospike".to_string()))
+ }
+ ConnectionConfig::Oracle(_) => Err(ConnectorError::FeatureNotEnabled("Oracle".to_string())),
}
}
diff --git a/dozer-sink-aerospike/Cargo.toml b/dozer-sink-aerospike/Cargo.toml
deleted file mode 100644
index 2f711eaf79..0000000000
--- a/dozer-sink-aerospike/Cargo.toml
+++ /dev/null
@@ -1,14 +0,0 @@
-[package]
-name = "dozer-sink-aerospike"
-version = "0.1.0"
-edition = "2021"
-license = "AGPL-3.0-or-later"
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
-[dependencies]
-dozer-core = { path = "../dozer-core" }
-dozer-types = { path = "../dozer-types" }
-aerospike-client-sys = { path = "./aerospike-client-sys" }
-itertools = "0.12"
-smallvec = "1.13.1"
diff --git a/dozer-sink-aerospike/aerospike-client-sys/Cargo.lock b/dozer-sink-aerospike/aerospike-client-sys/Cargo.lock
deleted file mode 100644
index 7ae1b029d4..0000000000
--- a/dozer-sink-aerospike/aerospike-client-sys/Cargo.lock
+++ /dev/null
@@ -1,7 +0,0 @@
-# This file is automatically @generated by Cargo.
-# It is not intended for manual editing.
-version = 3
-
-[[package]]
-name = "aerospike-client-sys"
-version = "0.1.0"
diff --git a/dozer-sink-aerospike/aerospike-client-sys/Cargo.toml b/dozer-sink-aerospike/aerospike-client-sys/Cargo.toml
deleted file mode 100644
index ec99384aae..0000000000
--- a/dozer-sink-aerospike/aerospike-client-sys/Cargo.toml
+++ /dev/null
@@ -1,15 +0,0 @@
-[package]
-name = "aerospike-client-sys"
-version = "0.1.0"
-edition = "2021"
-license = "AGPL-3.0-or-later"
-
-[lib]
-doctest = false
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
-[dependencies]
-
-[build-dependencies]
-bindgen = "0.69.4"
diff --git a/dozer-sink-aerospike/aerospike-client-sys/aerospike-client-c b/dozer-sink-aerospike/aerospike-client-sys/aerospike-client-c
deleted file mode 160000
index 029db7ac63..0000000000
--- a/dozer-sink-aerospike/aerospike-client-sys/aerospike-client-c
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 029db7ac63ba3533150c359e0dec5a51e54914ab
diff --git a/dozer-sink-aerospike/aerospike-client-sys/aerospike_client.h b/dozer-sink-aerospike/aerospike-client-sys/aerospike_client.h
deleted file mode 100644
index c7716020e1..0000000000
--- a/dozer-sink-aerospike/aerospike-client-sys/aerospike_client.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
diff --git a/dozer-sink-aerospike/aerospike-client-sys/build.rs b/dozer-sink-aerospike/aerospike-client-sys/build.rs
deleted file mode 100644
index cd6143c8fd..0000000000
--- a/dozer-sink-aerospike/aerospike-client-sys/build.rs
+++ /dev/null
@@ -1,74 +0,0 @@
-use core::panic;
-use std::{
- env, fs,
- path::{Path, PathBuf},
- process::Command,
-};
-
-fn cp_r(dir: &Path, dest: &Path) {
- for entry in fs::read_dir(dir).unwrap() {
- let entry = entry.unwrap();
- let path = entry.path();
- let dst = dest.join(path.file_name().expect("Failed to get filename of path"));
- if fs::metadata(&path).unwrap().is_file() {
- fs::copy(path, dst).unwrap();
- } else {
- fs::create_dir_all(&dst).unwrap();
- cp_r(&path, &dst);
- }
- }
-}
-fn main() {
- let out_dir = PathBuf::from(std::env::var("OUT_DIR").unwrap());
- let build_dir = out_dir.join("build");
- fs::create_dir_all(&build_dir).unwrap();
- let output_dir = build_dir.join("out");
- let lib_dir = output_dir.join("lib");
- let include_dir = output_dir.join("include");
- let make_flags = vec!["TARGET_BASE=out"];
-
- let current_dir = env::current_dir().unwrap();
- let source_dir = current_dir.join("aerospike-client-c");
- cp_r(&source_dir, &build_dir);
-
- let mut make = Command::new("make");
- make.args(make_flags)
- .env("MAKEFLAGS", std::env::var("CARGO_MAKEFLAGS").unwrap())
- // The Makefile checks whether DEBUG is defined and cargo always sets it
- // (it's either DEBUG=false or DEBUG=true, but always defined). When DEBUG,
- // it tries to link against gcov, which we don't want
- .env_remove("DEBUG")
- .current_dir(build_dir);
- let out = make.output().unwrap();
- if !out.status.success() {
- panic!(
- "Building aerospike client failed with exit code {}.\nstout: {}\nstderr: {}",
- out.status.code().unwrap(),
- String::from_utf8_lossy(&out.stdout),
- String::from_utf8_lossy(&out.stderr),
- );
- }
- println!("cargo:rustc-link-search=native={}", lib_dir.display());
- println!("cargo:rustc-link-lib=static=aerospike");
- println!("cargo:rustc-link-lib=ssl");
- println!("cargo:rustc-link-lib=crypto");
- println!("cargo:rustc-link-lib=m");
- println!("cargo:rustc-link-lib=z");
- println!("cargo:rustc-link-lib=pthread");
-
- println!("cargo:rerun-if-changed=aerospike_client.h");
- println!("cargo:rerun-if-changed=aerospike-client-c");
- let bindings = bindgen::Builder::default()
- .header("aerospike_client.h")
- .allowlist_type("(as|aerospike)_.*")
- .allowlist_type("aerospike")
- .allowlist_function("(as|aerospike)_.*")
- .allowlist_var("(as|AS)_.*")
- .clang_arg(format!("-I{}", include_dir.to_str().unwrap()))
- .generate()
- .expect("Unable to generate bindings");
-
- bindings
- .write_to_file(out_dir.join("generated.rs"))
- .expect("Failed to write bindings");
-}
diff --git a/dozer-sink-aerospike/aerospike-client-sys/src/lib.rs b/dozer-sink-aerospike/aerospike-client-sys/src/lib.rs
deleted file mode 100644
index e67d2da84b..0000000000
--- a/dozer-sink-aerospike/aerospike-client-sys/src/lib.rs
+++ /dev/null
@@ -1,174 +0,0 @@
-#![allow(clippy::all)]
-#![allow(non_camel_case_types)]
-#![allow(non_upper_case_globals)]
-#![allow(non_snake_case)]
-
-include!(concat!(env!("OUT_DIR"), "/generated.rs"));
-
-#[macro_export]
-macro_rules! as_exp_build {
- ($func:ident $args:tt ) => {{
- let mut v = Vec::new();
- $crate::as_exp_build_inner!(v, $func $args);
- $crate::as_exp_compile(v.as_mut_ptr(), v.len() as u32)
- }}
-}
-
-#[macro_export]
-macro_rules! as_exp_build_inner {
- ($v:expr, as_exp_bin_int($bin_name:expr $(,)?)) => {{
- let bin_name: *const i8 = $bin_name;
- $v.push($crate::as_exp_entry {
- op: $crate::as_exp_ops__AS_EXP_CODE_BIN,
- count: 3,
- sz: 0,
- prev_va_args: 0,
- v: std::mem::zeroed(),
- });
- $crate::as_exp_build_inner!($v, as_exp_int($crate::as_exp_type_AS_EXP_TYPE_INT as i64));
- $v.push($crate::as_exp_entry {
- op: $crate::as_exp_ops__AS_EXP_CODE_VAL_RAWSTR,
- v: $crate::as_exp_entry__bindgen_ty_1 { str_val: bin_name },
- count: 0,
- sz: 0,
- prev_va_args: 0,
- });
- }};
- ($v:expr, as_exp_int($val:expr)) => {
- $v.push($crate::as_exp_entry {
- op: $crate::as_exp_ops__AS_EXP_CODE_VAL_INT,
- v: $crate::as_exp_entry__bindgen_ty_1 { int_val: $val },
- count: 0,
- sz: 0,
- prev_va_args: 0,
- })
- };
- ($v:expr, as_exp_uint($val:expr)) => {
- $v.push($crate::as_exp_entry {
- op: $crate::as_exp_ops__AS_EXP_CODE_VAL_UINT,
- v: $crate::as_exp_entry__bindgen_ty_1 { uint_val: $val },
- count: 0,
- sz: 0,
- prev_va_args: 0,
- })
- };
- ($v:expr, as_exp_cmp_eq($left_name:ident $left_args:tt, $right_name:ident $right_args:tt $(,)?)) => {{
- $v.push($crate::as_exp_entry {
- op: $crate::as_exp_ops__AS_EXP_CODE_CMP_EQ,
- count: 3,
- v: std::mem::zeroed(),
- sz: 0,
- prev_va_args: 0,
- });
- $crate::as_exp_build_inner!($v, $left_name $left_args);
- $crate::as_exp_build_inner!($v, $right_name $right_args);
- }};
- ($v:expr, as_exp_cmp_gt($left_name:ident $left_args:tt, $right_name:ident $right_args:tt $(,)?)) => {{
- $v.push($crate::as_exp_entry {
- op: $crate::as_exp_ops__AS_EXP_CODE_CMP_GT,
- count: 3,
- v: std::mem::zeroed(),
- sz: 0,
- prev_va_args: 0,
- });
- $crate::as_exp_build_inner!($v, $left_name $left_args);
- $crate::as_exp_build_inner!($v, $right_name $right_args);
- }};
- ($v:expr, as_exp_cmp_ge($left_name:ident $left_args:tt, $right_name:ident $right_args:tt $(,)?)) => {{
- $v.push($crate::as_exp_entry {
- op: $crate::as_exp_ops__AS_EXP_CODE_CMP_GE,
- count: 3,
- v: std::mem::zeroed(),
- sz: 0,
- prev_va_args: 0,
- });
- $crate::as_exp_build_inner!($v, $left);
- $crate::as_exp_build_inner!($v, $right);
- }};
- ($v:expr, as_exp_cmp_lt($left_name:ident $left_args:tt, $right_name:ident $right_args:tt $(,)?)) => {{
- $v.push($crate::as_exp_entry {
- op: $crate::as_exp_ops__AS_EXP_CODE_CMP_LT,
- count: 3,
- v: std::mem::zeroed(),
- sz: 0,
- prev_va_args: 0,
- });
- $crate::as_exp_build_inner!($v, $left_name $left_args);
- $crate::as_exp_build_inner!($v, $right_name $right_args);
- }};
- ($v:expr, as_exp_cmp_le($left_name:ident $left_args:tt, $right_name:ident $right_args:tt $(,)?)) => {{
- $v.push($crate::as_exp_entry {
- op: $crate::as_exp_ops__AS_EXP_CODE_CMP_LE,
- count: 3,
- v: std::mem::zeroed(),
- sz: 0,
- prev_va_args: 0,
- });
- $crate::as_exp_build_inner!($v, $left_name $left_args);
- $crate::as_exp_build_inner!($v, $right_name $right_args);
- }};
- ($v:expr, as_exp_and($($arg_name:ident $arg_args:tt),*)) => {{
- $v.push($crate::as_exp_entry {
- op: $crate::as_exp_ops__AS_EXP_CODE_AND,
- count: 0,
- v: std::mem::zeroed(),
- sz: 0,
- prev_va_args: 0,
- });
- $($crate::as_exp_build_inner!($v, $arg_name $arg_args));*;
- $v.push($crate::as_exp_entry {
- op: $crate::as_exp_ops__AS_EXP_CODE_END_OF_VA_ARGS,
- count: 0,
- v: std::mem::zeroed(),
- sz: 0,
- prev_va_args: 0,
- });
- }};
-($v:expr, as_exp_or($($arg_name:ident $arg_args:tt),*)) => {{
- $v.push($crate::as_exp_entry {
- op: $crate::as_exp_ops__AS_EXP_CODE_OR,
- count: 0,
- v: std::mem::zeroed(),
- sz: 0,
- prev_va_args: 0,
- });
- $($crate::as_exp_build_inner!($v, $arg_name $arg_args));*;
- $v.push($crate::as_exp_entry {
- op: $crate::as_exp_ops__AS_EXP_CODE_END_OF_VA_ARGS,
- count: 0,
- v: std::mem::zeroed(),
- sz: 0,
- prev_va_args: 0,
- });
- }};
-}
-
-#[cfg(test)]
-mod tests {
- use std::ffi::CString;
-
- use super::*;
-
- #[test]
- fn test_as_exp_build() {
- // Tested that this results in the same compiled expression as when
- // using the macros from the C library
- let bin_name = CString::new("bin_name").unwrap();
- unsafe {
- let exp = as_exp_build! {
- as_exp_and(
- as_exp_cmp_gt(
- as_exp_bin_int(bin_name.as_ptr()),
- as_exp_int(3)
- ),
- as_exp_cmp_lt(
- as_exp_bin_int(bin_name.as_ptr()),
- as_exp_int(8)
- )
- )
- };
- assert!(!exp.is_null());
- as_exp_destroy(exp);
- }
- }
-}
diff --git a/dozer-sink-aerospike/src/aerospike.rs b/dozer-sink-aerospike/src/aerospike.rs
deleted file mode 100644
index d6d59d9a1e..0000000000
--- a/dozer-sink-aerospike/src/aerospike.rs
+++ /dev/null
@@ -1,1252 +0,0 @@
-use std::time::Instant;
-use std::{
- alloc::{handle_alloc_error, Layout},
- ffi::{c_char, c_void, CStr, CString, NulError},
- fmt::Display,
- mem::MaybeUninit,
- ptr::{addr_of, addr_of_mut, NonNull},
- slice,
-};
-
-use itertools::Itertools;
-
-use aerospike_client_sys::*;
-use dozer_types::log::debug;
-use dozer_types::{
- chrono::{DateTime, NaiveDate},
- geo::{Coord, Point},
- json_types::{DestructuredJsonRef, JsonValue},
- ordered_float::OrderedFloat,
- rust_decimal::prelude::*,
- thiserror,
- types::{DozerDuration, DozerPoint, Field, Schema},
-};
-
-use crate::{denorm_dag::Error, AerospikeSinkError};
-
-#[derive(Debug)]
-pub struct BinNames {
- storage: Vec,
- _ptrs: Vec<*mut i8>,
-}
-
-unsafe impl Send for BinNames {}
-
-impl Clone for BinNames {
- fn clone(&self) -> Self {
- let storage = self.storage.clone();
- let ptrs = Self::make_ptrs(&storage);
- Self {
- storage,
- _ptrs: ptrs,
- }
- }
-}
-
-impl BinNames {
- fn make_ptrs(storage: &[CString]) -> Vec<*mut i8> {
- storage
- .iter()
- .map(|name| name.as_ptr() as *mut i8)
- .collect()
- }
-
- pub(crate) fn _len(&self) -> usize {
- self.storage.len()
- }
-
- pub(crate) unsafe fn _ptrs(&mut self) -> *mut *mut i8 {
- self._ptrs.as_mut_ptr()
- }
-
- pub(crate) fn names(&self) -> &[CString] {
- &self.storage
- }
-
- pub(crate) fn new<'a, I: IntoIterator- >(names: I) -> Result
{
- let storage: Vec = names
- .into_iter()
- .map(CString::new)
- .collect::>()?;
- let ptrs = Self::make_ptrs(&storage);
- Ok(Self {
- storage,
- _ptrs: ptrs,
- })
- }
-}
-
-#[derive(Debug, thiserror::Error)]
-pub struct AerospikeError {
- pub(crate) code: i32,
- pub(crate) message: String,
-}
-
-impl AerospikeError {
- pub(crate) fn from_code(value: as_status) -> Self {
- let message = unsafe { as_error_string(value) };
-
- let message = unsafe { CStr::from_ptr(message) };
- // The message is ASCII (I think?), so this should not fail
- Self {
- code: value,
- message: message.to_str().unwrap().to_owned(),
- }
- }
-}
-
-impl From