diff --git a/.github/workflows/coverage.yaml b/.github/workflows/coverage.yaml index 25172a991a..2ce624a9ce 100644 --- a/.github/workflows/coverage.yaml +++ b/.github/workflows/coverage.yaml @@ -3,7 +3,6 @@ name: Dozer Coverage on: workflow_dispatch: push: - branches: [release, release-dev, release-test] tags: - "v*.*.*" diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index c7be7ae4dc..17e468b454 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -1,5 +1,6 @@ name: Release on: + workflow_dispatch: push: branches: [release, release-dev, release-test, main] tags: @@ -28,7 +29,7 @@ jobs: prerelease: ${{ steps.version.outputs.prerelease }} steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: fetch-depth: 0 - name: Generate a changelog @@ -66,99 +67,62 @@ jobs: release-linux-aarch64: name: Release Linux binary for aarch64 - runs-on: ${{ matrix.os }} + runs-on: ubuntu-20-16-cores needs: prepare - strategy: - fail-fast: false - matrix: - include: - - build: aarch64 - os: ubuntu-20-16-cores - target: aarch64-unknown-linux-gnu - linker: gcc-aarch64-linux-gnu - use-cross: true - file_name: dozer - asset_name: dozer-linux-aarch64 + env: + CARGO_TARGET: aarch64-unknown-linux-gnu + DEB_NAME: dozer-linux-aarch64 steps: - name: Checkout repository uses: actions/checkout@v3 - - name: Install minimal stable with clippy and rustfmt - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: stable - target: ${{ matrix.target }} - components: rustfmt, clippy - - name: APT update - run: | - sudo apt update - - name: Install target specific tools - if: matrix.use-cross - run: | - sudo apt-get install -y ${{ matrix.linker }} - - name: Install Protoc - uses: arduino/setup-protoc@v1 - with: - repo-token: ${{ secrets.GITHUB_TOKEN }} + - name: Rust cache + uses: swatinem/rust-cache@v2 - - name: ⚡ Cache - uses: actions/cache@v3 + - name: Install toolchain + uses: dtolnay/rust-toolchain@master with: - path: | - ~/.cargo/bin/ - ~/.cargo/.crates.toml - ~/.cargo/.crates2.json - ~/.cargo/.package-cache - ~/.cargo/registry/ - ~/.cargo/git/db/ - target/ - key: ${{ runner.os }}-cargo-release-${{ hashFiles('Cargo.lock') }} - restore-keys: | - ${{ runner.os }}-cargo-release-${{ hashFiles('Cargo.lock') }} - ${{ runner.os }}-cargo-release- + toolchain: stable + target: ${{ env.CARGO_TARGET }} - - name: Configure target aarch64 GNU - if: matrix.target == 'aarch64-unknown-linux-gnu' - run: | - echo '[target.aarch64-unknown-linux-gnu]' >> ~/.cargo/config - echo 'linker = "aarch64-linux-gnu-gcc"' >> ~/.cargo/config - echo 'JEMALLOC_SYS_WITH_LG_PAGE=16' >> $GITHUB_ENV - - name: Cargo build - uses: actions-rs/cargo@v1 + - name: Install cross + uses: baptiste0928/cargo-install@v1 with: - command: build - use-cross: false - args: --release --target ${{ matrix.target }} --bin ${{ matrix.file_name }} - - name: List target output files - run: ls -lR ./target + crate: cross + cache-key: '${{ env.CARGO_TARGET }}' + + - name: Build dozer + run: cross build --profile=release --target ${{ env.CARGO_TARGET }} --bin dozer - name: Install cargo-deb - run: cargo install cargo-deb + uses: baptiste0928/cargo-install@v1 + with: + crate: cargo-deb + cache-key: '${{ env.CARGO_TARGET }}' - name: Compile deb file - run: cargo-deb -p dozer-cli --target ${{ matrix.target }} --output ./deb/${{matrix.asset_name}}.deb + run: cargo deb -p dozer-cli --target ${{ env.CARGO_TARGET }} --no-build --no-strip --output ./deb/${{ env.DEB_NAME }}.deb - name: Prepare release assets shell: bash run: | mkdir -p release cp {LICENSE,README.md,CHANGELOG.md} release/ 2> /dev/null || echo "Copy Failed...Ignoring.." - cp target/${{ matrix.target }}/release/${{matrix.file_name}} release/ + cp target/${{ env.CARGO_TARGET }}/release/dozer release/ - mv release/ ${{matrix.asset_name}}/ + mv release/ ${{ env.DEB_NAME }}/ - tar -czvf ${{matrix.asset_name}}.tar.gz ${{matrix.asset_name}}/ + tar -czvf ${{ env.DEB_NAME }}.tar.gz ${{ env.DEB_NAME }}/ - cp deb/${{matrix.asset_name}}.deb ${{matrix.asset_name}}/ + cp deb/${{ env.DEB_NAME }}.deb ${{ env.DEB_NAME }}/ - ls -l ${{matrix.asset_name}} + ls -l ${{ env.DEB_NAME }} - name: Upload the release uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: ${{matrix.asset_name}}* + file: ${{ env.DEB_NAME }}* file_glob: true overwrite: true tag: ${{ needs.prepare.outputs.version }} @@ -169,10 +133,10 @@ jobs: - name: Set env variables env: VERSION: ${{ needs.prepare.outputs.version }} - RELEASE_NAME: ${{matrix.asset_name}}.tar.gz + RELEASE_NAME: ${{ env.DEB_NAME }}.tar.gz run: | echo "RELEASE_NAME=${{env.RELEASE_NAME}}" >> $GITHUB_ENV - echo "DEB_NAME=${{matrix.asset_name}}.deb" >> $GITHUB_ENV + echo "DEB_NAME=${{ env.DEB_NAME }}.deb" >> $GITHUB_ENV echo "VERSION=${{env.VERSION}}" >> $GITHUB_ENV echo "ARTIFACT_URL=https://${{ env.BUCKET_NAME }}.s3.ap-southeast-1.amazonaws.com/${{ env.VERSION }}/${{ env.RELEASE_NAME }}" >> $GITHUB_ENV diff --git a/Cargo.lock b/Cargo.lock index a0ee8949d1..7fa68fd146 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -482,9 +482,9 @@ checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" [[package]] name = "arrow" -version = "42.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "773d18d72cd290f3f9e2149a714c8ac404b6c3fd614c684f0015449940fca899" +checksum = "b7104b9e9761613ae92fe770c741d6bbf1dbc791a0fe204400aebdd429875741" dependencies = [ "ahash 0.8.3", "arrow-arith", @@ -504,9 +504,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "42.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93bc0da4b22ba63807fa2a74998e21209179c93c67856ae65d9218b81f3ef918" +checksum = "38e597a8e8efb8ff52c50eaf8f4d85124ce3c1bf20fab82f476d73739d9ab1c2" dependencies = [ "arrow-array", "arrow-buffer", @@ -519,9 +519,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "42.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea9a0fd21121304cad96f307c938d861cb1e7f0c151b93047462cd9817d760fb" +checksum = "2a86d9c1473db72896bd2345ebb6b8ad75b8553ba390875c76708e8dc5c5492d" dependencies = [ "ahash 0.8.3", "arrow-buffer", @@ -536,9 +536,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "42.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30ce342ecf5971004e23cef8b5fb3bacd2bbc48a381464144925074e1472e9eb" +checksum = "234b3b1c8ed00c874bf95972030ac4def6f58e02ea5a7884314388307fb3669b" dependencies = [ "half 2.3.1", "num", @@ -546,9 +546,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "42.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b94a0ce7d27abbb02e2ee4db770f593127610f57b32625b0bc6a1a90d65f085" +checksum = "22f61168b853c7faea8cea23a2169fdff9c82fb10ae5e2c07ad1cab8f6884931" dependencies = [ "arrow-array", "arrow-buffer", @@ -564,9 +564,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "42.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f3be10a00a43c4bf0d243c070754ebdde17c5d576b4928d9c3efbe3005a3853" +checksum = "10b545c114d9bf8569c84d2fbe2020ac4eea8db462c0a37d0b65f41a90d066fe" dependencies = [ "arrow-array", "arrow-buffer", @@ -583,9 +583,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "42.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d9a83dad6a53d6907765106d3bc61d6d9d313cfe1751701b3ef0948e7283dc2" +checksum = "c6b6852635e7c43e5b242841c7470606ff0ee70eef323004cacc3ecedd33dd8f" dependencies = [ "arrow-buffer", "arrow-schema", @@ -595,9 +595,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "42.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a46da5e438a854e0386b38774da88a98782c0973c6dbc5c949ca4e02faf9b016" +checksum = "a66da9e16aecd9250af0ae9717ae8dd7ea0d8ca5a3e788fe3de9f4ee508da751" dependencies = [ "arrow-array", "arrow-buffer", @@ -609,9 +609,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "42.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5f27a1fbc76553ad92dc1a9583e56b7058d8c418c4089b0b689f5b87e2da5e1" +checksum = "60ee0f9d8997f4be44a60ee5807443e396e025c23cf14d2b74ce56135cb04474" dependencies = [ "arrow-array", "arrow-buffer", @@ -620,7 +620,7 @@ dependencies = [ "arrow-schema", "chrono", "half 2.3.1", - "indexmap 1.9.2", + "indexmap 2.0.0", "lexical-core", "num", "serde", @@ -629,9 +629,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "42.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2373661f6c2233e18f6fa69c40999a9440231d1e8899be8bbbe73c7e24aa3b4" +checksum = "7fcab05410e6b241442abdab6e1035177dc082bdb6f17049a4db49faed986d63" dependencies = [ "arrow-array", "arrow-buffer", @@ -644,9 +644,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "42.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "377cd5158b7de4034a175e296726c40c3236e65d71d90a5dab2fb4fab526a8f4" +checksum = "91a847dd9eb0bacd7836ac63b3475c68b2210c2c96d0ec1b808237b973bd5d73" dependencies = [ "ahash 0.8.3", "arrow-array", @@ -659,18 +659,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "42.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba9ed245bd2d7d97ad1457cb281d4296e8b593588758b8fec6d67b2b2b0f2265" +checksum = "54df8c47918eb634c20e29286e69494fdc20cafa5173eb6dad49c7f6acece733" dependencies = [ "serde", ] [[package]] name = "arrow-select" -version = "42.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dc9bd6aebc565b1d04bae64a0f4dda3abc677190eb7d960471b1b20e1cebed0" +checksum = "941dbe481da043c4bd40c805a19ec2fc008846080c4953171b62bcad5ee5f7fb" dependencies = [ "arrow-array", "arrow-buffer", @@ -681,15 +681,16 @@ dependencies = [ [[package]] name = "arrow-string" -version = "42.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23cf2baea2ef53787332050decf7d71aca836a352e188c8ad062892405955d2b" +checksum = "359b2cd9e071d5a3bcf44679f9d85830afebc5b9c98a08019a570a65ae933e0f" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "arrow-select", + "num", "regex", "regex-syntax 0.7.4", ] @@ -789,9 +790,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "aws-config" -version = "0.55.3" +version = "0.56.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcdcf0d683fe9c23d32cf5b53c9918ea0a500375a9fb20109802552658e576c9" +checksum = "fc6b3804dca60326e07205179847f17a4fce45af3a1106939177ad41ac08a6de" dependencies = [ "aws-credential-types", "aws-http", @@ -805,7 +806,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand 1.9.0", + "fastrand", "hex", "http", "hyper", @@ -819,69 +820,77 @@ dependencies = [ [[package]] name = "aws-credential-types" -version = "0.55.3" +version = "0.56.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fcdb2f7acbc076ff5ad05e7864bdb191ca70a6fd07668dc3a1a8bcd051de5ae" +checksum = "70a66ac8ef5fa9cf01c2d999f39d16812e90ec1467bd382cbbb74ba23ea86201" dependencies = [ "aws-smithy-async", "aws-smithy-types", - "fastrand 1.9.0", + "fastrand", "tokio", "tracing", "zeroize", ] [[package]] -name = "aws-endpoint" -version = "0.55.3" +name = "aws-http" +version = "0.56.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cce1c41a6cfaa726adee9ebb9a56fcd2bbfd8be49fd8a04c5e20fd968330b04" +checksum = "3e626370f9ba806ae4c439e49675fd871f5767b093075cdf4fef16cac42ba900" dependencies = [ + "aws-credential-types", "aws-smithy-http", "aws-smithy-types", "aws-types", + "bytes", "http", - "regex", + "http-body", + "lazy_static", + "percent-encoding", + "pin-project-lite", "tracing", ] [[package]] -name = "aws-http" -version = "0.55.3" +name = "aws-runtime" +version = "0.56.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aadbc44e7a8f3e71c8b374e03ecd972869eb91dd2bc89ed018954a52ba84bc44" +checksum = "07ac5cf0ff19c1bca0cea7932e11b239d1025a45696a4f44f72ea86e2b8bdd07" dependencies = [ "aws-credential-types", + "aws-http", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-eventstream", "aws-smithy-http", + "aws-smithy-runtime-api", "aws-smithy-types", "aws-types", - "bytes", + "fastrand", "http", - "http-body", - "lazy_static", "percent-encoding", - "pin-project-lite", "tracing", + "uuid", ] [[package]] name = "aws-sdk-s3" -version = "0.28.0" +version = "0.31.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fba197193cbb4bcb6aad8d99796b2291f36fa89562ded5d4501363055b0de89f" +checksum = "c681fef332c3462634cd97fced8d1ac3cfdf790829bd7bfb4006cfba76712053" dependencies = [ "aws-credential-types", - "aws-endpoint", "aws-http", - "aws-sig-auth", + "aws-runtime", "aws-sigv4", "aws-smithy-async", "aws-smithy-checksums", "aws-smithy-client", "aws-smithy-eventstream", "aws-smithy-http", - "aws-smithy-http-tower", "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", "aws-smithy-types", "aws-smithy-xml", "aws-types", @@ -892,82 +901,63 @@ dependencies = [ "percent-encoding", "regex", "tokio-stream", - "tower", "tracing", "url", ] [[package]] name = "aws-sdk-sso" -version = "0.28.0" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8b812340d86d4a766b2ca73f740dfd47a97c2dff0c06c8517a16d88241957e4" +checksum = "903f888ff190e64f6f5c83fb0f8d54f9c20481f1dc26359bb8896f5d99908949" dependencies = [ "aws-credential-types", - "aws-endpoint", "aws-http", - "aws-sig-auth", + "aws-runtime", "aws-smithy-async", "aws-smithy-client", "aws-smithy-http", - "aws-smithy-http-tower", "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", "aws-smithy-types", "aws-types", "bytes", "http", "regex", "tokio-stream", - "tower", "tracing", ] [[package]] name = "aws-sdk-sts" -version = "0.28.0" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "265fac131fbfc188e5c3d96652ea90ecc676a934e3174eaaee523c6cec040b3b" +checksum = "a47ad6bf01afc00423d781d464220bf69fb6a674ad6629cbbcb06d88cdc2be82" dependencies = [ "aws-credential-types", - "aws-endpoint", "aws-http", - "aws-sig-auth", + "aws-runtime", "aws-smithy-async", "aws-smithy-client", "aws-smithy-http", - "aws-smithy-http-tower", "aws-smithy-json", "aws-smithy-query", + "aws-smithy-runtime", + "aws-smithy-runtime-api", "aws-smithy-types", "aws-smithy-xml", "aws-types", - "bytes", "http", "regex", - "tower", - "tracing", -] - -[[package]] -name = "aws-sig-auth" -version = "0.55.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b94acb10af0c879ecd5c7bdf51cda6679a0a4f4643ce630905a77673bfa3c61" -dependencies = [ - "aws-credential-types", - "aws-sigv4", - "aws-smithy-eventstream", - "aws-smithy-http", - "aws-types", - "http", "tracing", ] [[package]] name = "aws-sigv4" -version = "0.55.3" +version = "0.56.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d2ce6f507be68e968a33485ced670111d1cbad161ddbbab1e313c03d37d8f4c" +checksum = "b7b28f4910bb956b7ab320b62e98096402354eca976c587d1eeccd523d9bac03" dependencies = [ "aws-smithy-eventstream", "aws-smithy-http", @@ -986,9 +976,9 @@ dependencies = [ [[package]] name = "aws-smithy-async" -version = "0.55.3" +version = "0.56.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13bda3996044c202d75b91afeb11a9afae9db9a721c6a7a427410018e286b880" +checksum = "2cdb73f85528b9d19c23a496034ac53703955a59323d581c06aa27b4e4e247af" dependencies = [ "futures-util", "pin-project-lite", @@ -998,9 +988,9 @@ dependencies = [ [[package]] name = "aws-smithy-checksums" -version = "0.55.3" +version = "0.56.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07ed8b96d95402f3f6b8b57eb4e0e45ee365f78b1a924faf20ff6e97abf1eae6" +checksum = "afb15946af1b8d3beeff53ad991d9bff68ac22426b6d40372b958a75fa61eaed" dependencies = [ "aws-smithy-http", "aws-smithy-types", @@ -1019,23 +1009,23 @@ dependencies = [ [[package]] name = "aws-smithy-client" -version = "0.55.3" +version = "0.56.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a86aa6e21e86c4252ad6a0e3e74da9617295d8d6e374d552be7d3059c41cedd" +checksum = "c27b2756264c82f830a91cb4d2d485b2d19ad5bea476d9a966e03d27f27ba59a" dependencies = [ "aws-smithy-async", "aws-smithy-http", "aws-smithy-http-tower", "aws-smithy-types", "bytes", - "fastrand 1.9.0", + "fastrand", "http", "http-body", "hyper", - "hyper-rustls 0.23.2", + "hyper-rustls 0.24.1", "lazy_static", "pin-project-lite", - "rustls 0.20.8", + "rustls 0.21.7", "tokio", "tower", "tracing", @@ -1043,9 +1033,9 @@ dependencies = [ [[package]] name = "aws-smithy-eventstream" -version = "0.55.3" +version = "0.56.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "460c8da5110835e3d9a717c61f5556b20d03c32a1dec57f8fc559b360f733bb8" +checksum = "850233feab37b591b7377fd52063aa37af615687f5896807abe7f49bd4e1d25b" dependencies = [ "aws-smithy-types", "bytes", @@ -1054,9 +1044,9 @@ dependencies = [ [[package]] name = "aws-smithy-http" -version = "0.55.3" +version = "0.56.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b3b693869133551f135e1f2c77cb0b8277d9e3e17feaf2213f735857c4f0d28" +checksum = "54cdcf365d8eee60686885f750a34c190e513677db58bbc466c44c588abf4199" dependencies = [ "aws-smithy-eventstream", "aws-smithy-types", @@ -1077,9 +1067,9 @@ dependencies = [ [[package]] name = "aws-smithy-http-tower" -version = "0.55.3" +version = "0.56.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ae4f6c5798a247fac98a867698197d9ac22643596dc3777f0c76b91917616b9" +checksum = "822de399d0ce62829a69dfa8c5cd08efdbe61a7426b953e2268f8b8b52a607bd" dependencies = [ "aws-smithy-http", "aws-smithy-types", @@ -1093,50 +1083,88 @@ dependencies = [ [[package]] name = "aws-smithy-json" -version = "0.55.3" +version = "0.56.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23f9f42fbfa96d095194a632fbac19f60077748eba536eb0b9fecc28659807f8" +checksum = "4fb1e7ab8fa7ad10c193af7ae56d2420989e9f4758bf03601a342573333ea34f" dependencies = [ "aws-smithy-types", ] [[package]] name = "aws-smithy-query" -version = "0.55.3" +version = "0.56.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98819eb0b04020a1c791903533b638534ae6c12e2aceda3e6e6fba015608d51d" +checksum = "28556a3902091c1f768a34f6c998028921bdab8d47d92586f363f14a4a32d047" dependencies = [ "aws-smithy-types", "urlencoding", ] +[[package]] +name = "aws-smithy-runtime" +version = "0.56.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "745e096b3553e7e0f40622aa04971ce52765af82bebdeeac53aa6fc82fe801e6" +dependencies = [ + "aws-smithy-async", + "aws-smithy-client", + "aws-smithy-http", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "fastrand", + "http", + "http-body", + "once_cell", + "pin-project-lite", + "pin-utils", + "tokio", + "tracing", +] + +[[package]] +name = "aws-smithy-runtime-api" +version = "0.56.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d0ae0c9cfd57944e9711ea610b48a963fb174a53aabacc08c5794a594b1d02" +dependencies = [ + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-types", + "bytes", + "http", + "tokio", + "tracing", +] + [[package]] name = "aws-smithy-types" -version = "0.55.3" +version = "0.56.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16a3d0bf4f324f4ef9793b86a1701d9700fbcdbd12a846da45eed104c634c6e8" +checksum = "d90dbc8da2f6be461fa3c1906b20af8f79d14968fe47f2b7d29d086f62a51728" dependencies = [ "base64-simd", "itoa", "num-integer", "ryu", + "serde", "time 0.3.20", ] [[package]] name = "aws-smithy-xml" -version = "0.55.3" +version = "0.56.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1b9d12875731bd07e767be7baad95700c3137b56730ec9ddeedb52a5e5ca63b" +checksum = "e01d2dedcdd8023043716cfeeb3c6c59f2d447fce365d8e194838891794b23b6" dependencies = [ "xmlparser", ] [[package]] name = "aws-types" -version = "0.55.3" +version = "0.56.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dd209616cc8d7bfb82f87811a5c655dc97537f592689b18743bddf5dc5c4829" +checksum = "85aa0451bf8af1bf22a4f028d5d28054507a14be43cb8ac0597a8471fba9edfe" dependencies = [ "aws-credential-types", "aws-smithy-async", @@ -1257,6 +1285,27 @@ dependencies = [ "serde", ] +[[package]] +name = "bindgen" +version = "0.65.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfdf7b466f9a4903edc73f95d6d2bcd5baf8ae620638762244d3f60143643cc5" +dependencies = [ + "bitflags 1.3.2", + "cexpr", + "clang-sys", + "lazy_static", + "lazycell", + "peeking_take_while", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn 2.0.29", +] + [[package]] name = "bindgen" version = "0.66.1" @@ -2265,9 +2314,9 @@ checksum = "23d8666cb01533c39dde32bcbab8e227b4ed6679b2c925eba05feabea39508fb" [[package]] name = "datafusion" -version = "27.0.0" +version = "30.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e96f6e4eb10bd3e6b709686858246466983e8c5354a928ff77ee34919aa60d00" +checksum = "45e3bb3a788d9fa793268e9cec2601d79831ed1be437ba74d1deb32b226ae734" dependencies = [ "ahash 0.8.3", "arrow", @@ -2284,13 +2333,13 @@ dependencies = [ "datafusion-expr", "datafusion-optimizer", "datafusion-physical-expr", - "datafusion-row", "datafusion-sql", "flate2", "futures", "glob", + "half 2.3.1", "hashbrown 0.14.0", - "indexmap 1.9.2", + "indexmap 2.0.0", "itertools 0.11.0", "lazy_static", "log", @@ -2302,7 +2351,7 @@ dependencies = [ "pin-project-lite", "rand 0.8.5", "smallvec", - "sqlparser 0.35.0 (registry+https://github.com/rust-lang/crates.io-index)", + "sqlparser 0.36.1", "tempfile", "tokio", "tokio-util 0.7.8", @@ -2314,28 +2363,39 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "27.0.0" +version = "30.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00e5fddcc0dd49bbe199e43aa406f39c46c790bb2a43c7b36a478e5f3f971235" +checksum = "0dd256483875270612d4fa439359bafa6f1760bae080ecb69eecc59a92b5016f" dependencies = [ "arrow", "arrow-array", + "async-compression", + "bytes", + "bzip2", "chrono", + "flate2", + "futures", "num_cpus", "object_store", "parquet", - "sqlparser 0.35.0 (registry+https://github.com/rust-lang/crates.io-index)", + "sqlparser 0.36.1", + "tokio", + "tokio-util 0.7.8", + "xz2", + "zstd", ] [[package]] name = "datafusion-execution" -version = "27.0.0" +version = "30.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfd50b6cb17acc78d2473c0d28014b8fd4e2e0a2c067c07645d6547b33b0aeeb" +checksum = "4973610d680bdc38f409a678c838d3873356cc6c29a543d1f56d7b4801e8d0a4" dependencies = [ + "arrow", "dashmap", "datafusion-common", "datafusion-expr", + "futures", "hashbrown 0.14.0", "log", "object_store", @@ -2347,24 +2407,24 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "27.0.0" +version = "30.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1a35dc2cd9eac18063d636f7ddf4f090fe1f34284d80192ac7ade38cc3c6991" +checksum = "7f3599f4cfcf22490f7b7d6d2fc70610ca8045b8bdcd99ef9d4309cf2b387537" dependencies = [ "ahash 0.8.3", "arrow", "datafusion-common", "lazy_static", - "sqlparser 0.35.0 (registry+https://github.com/rust-lang/crates.io-index)", + "sqlparser 0.36.1", "strum 0.25.0", "strum_macros 0.25.1", ] [[package]] name = "datafusion-optimizer" -version = "27.0.0" +version = "30.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f5043afeb45ec1c0f45519e1eed6a477f2d30732e8f975d9cf9a75fba0ca716" +checksum = "f067401eea6a0967c83021e714746f9153368cca964d45c4a1a4f99869a1512f" dependencies = [ "arrow", "async-trait", @@ -2380,27 +2440,29 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "27.0.0" +version = "30.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6cc892a24f4b829ee7718ad3950884c0346dbdf1517f3df153af4bcf54d8ca4d" +checksum = "964c19161288d374fe066535f84de37a1dab419e47a24e02f3a0ca6413744451" dependencies = [ "ahash 0.8.3", "arrow", "arrow-array", "arrow-buffer", "arrow-schema", + "base64 0.21.0", "blake2", "blake3", "chrono", "datafusion-common", "datafusion-expr", - "datafusion-row", "half 2.3.1", "hashbrown 0.14.0", - "indexmap 1.9.2", + "hex", + "indexmap 2.0.0", "itertools 0.11.0", "lazy_static", "libc", + "log", "md-5 0.10.5", "paste", "petgraph 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)", @@ -2413,9 +2475,9 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "27.0.0" +version = "30.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40f7357464aeec3b530a3193dc23ca01ef20a26bad6c6dfe407ea7fb971701eb" +checksum = "4fde2768f10f1a5d47d164e0219ececb00f0dcd36f33079b656e03ad20e33c68" dependencies = [ "arrow", "chrono", @@ -2426,37 +2488,25 @@ dependencies = [ "prost 0.11.9", ] -[[package]] -name = "datafusion-row" -version = "27.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce75c660bbddfdd254109e668e5b5bd69df31ea26e3768e15cef0c68015e650e" -dependencies = [ - "arrow", - "datafusion-common", - "paste", - "rand 0.8.5", -] - [[package]] name = "datafusion-sql" -version = "27.0.0" +version = "30.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49cab87e4933a452e0b7b3f0cbd0e760daf7d33fb54d09d70d3ffba229eaa652" +checksum = "5b0939df21e440efcb35078c22b0192c537f7a53ebf1a34288a3a134753dd364" dependencies = [ "arrow", "arrow-schema", "datafusion-common", "datafusion-expr", "log", - "sqlparser 0.35.0 (registry+https://github.com/rust-lang/crates.io-index)", + "sqlparser 0.36.1", ] [[package]] name = "deltalake" -version = "0.13.1" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0474f075bcff018c73453b289451afacf28421b1e3fb313a10a0930345b67971" +checksum = "39fbcd162d595e3b7e7af762b05abbdb14218615d24e8c40d23b1cfe1a408589" dependencies = [ "arrow", "arrow-array", @@ -2499,7 +2549,7 @@ dependencies = [ "rusoto_sts", "serde", "serde_json", - "sqlparser 0.35.0 (registry+https://github.com/rust-lang/crates.io-index)", + "sqlparser 0.36.1", "thiserror", "tokio", "url", @@ -2803,8 +2853,10 @@ dependencies = [ name = "dozer-recordstore" version = "0.1.0" dependencies = [ + "dozer-storage", "dozer-types", "slice-dst", + "tempdir", ] [[package]] @@ -2821,7 +2873,7 @@ dependencies = [ "enum_dispatch", "linked-hash-map", "metrics", - "multimap", + "multimap 0.9.0", "proptest", "regex", "tempdir", @@ -2841,7 +2893,7 @@ dependencies = [ "num-traits", "ort", "proptest", - "sqlparser 0.35.0 (git+https://github.com/getdozer/sqlparser-rs.git)", + "sqlparser 0.35.0", ] [[package]] @@ -2853,6 +2905,7 @@ dependencies = [ "lmdb-rkv-sys", "page_size", "pin-project", + "rocksdb", "tempdir", "tokio", ] @@ -2880,11 +2933,11 @@ dependencies = [ "futures", "libtest-mimic", "mongodb", - "multimap", + "multimap 0.9.0", "reqwest", "rusqlite", "sqllogictest", - "sqlparser 0.35.0 (git+https://github.com/getdozer/sqlparser-rs.git)", + "sqlparser 0.35.0", "tempdir", "tokio", "url", @@ -2920,7 +2973,7 @@ dependencies = [ "bytes", "chrono", "geo", - "indexmap 1.9.2", + "indexmap 2.0.0", "indicatif", "log", "ordered-float 3.9.1", @@ -2951,9 +3004,9 @@ dependencies = [ [[package]] name = "dyn-clone" -version = "1.0.11" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68b0cf012f1230e43cd00ebb729c6bb58707ecfa8ad08b52ef3a4ccd2697fc30" +checksum = "23d2f3407d9a573d666de4b5bdf10569d73ca9478087346697dcbae6244bfbcd" [[package]] name = "dynamodb_lock" @@ -3179,15 +3232,6 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" -[[package]] -name = "fastrand" -version = "1.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" -dependencies = [ - "instant", -] - [[package]] name = "fastrand" version = "2.0.0" @@ -3387,9 +3431,9 @@ checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" [[package]] name = "futures" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13e2792b0ff0340399d58445b88fd9770e3489eff258a4cbc1523418f12abf84" +checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40" dependencies = [ "futures-channel", "futures-core", @@ -3418,9 +3462,9 @@ checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" [[package]] name = "futures-executor" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8de0a35a6ab97ec8869e32a2473f4b1324459e14c29275d14b10cb1fd19b50e" +checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0" dependencies = [ "futures-core", "futures-task", @@ -3925,21 +3969,6 @@ dependencies = [ "webpki 0.21.4", ] -[[package]] -name = "hyper-rustls" -version = "0.23.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1788965e61b367cd03a62950836d5cd41560c3577d90e40e0819373194d1661c" -dependencies = [ - "http", - "hyper", - "log", - "rustls 0.20.8", - "rustls-native-certs 0.6.3", - "tokio", - "tokio-rustls 0.23.4", -] - [[package]] name = "hyper-rustls" version = "0.24.1" @@ -3949,7 +3978,9 @@ dependencies = [ "futures-util", "http", "hyper", + "log", "rustls 0.21.7", + "rustls-native-certs 0.6.3", "tokio", "tokio-rustls 0.24.0", ] @@ -4111,7 +4142,6 @@ checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399" dependencies = [ "autocfg", "hashbrown 0.12.3", - "serde", ] [[package]] @@ -4122,6 +4152,7 @@ checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d" dependencies = [ "equivalent", "hashbrown 0.14.0", + "serde", ] [[package]] @@ -4162,15 +4193,6 @@ dependencies = [ "libc", ] -[[package]] -name = "instant" -version = "0.1.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" -dependencies = [ - "cfg-if", -] - [[package]] name = "integer-encoding" version = "3.0.4" @@ -4503,6 +4525,22 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb" +[[package]] +name = "librocksdb-sys" +version = "0.11.0+8.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3386f101bcb4bd252d8e9d2fb41ec3b0862a15a62b478c355b2982efa469e3e" +dependencies = [ + "bindgen 0.65.1", + "bzip2-sys", + "cc", + "glob", + "libc", + "libz-sys", + "lz4-sys", + "zstd-sys", +] + [[package]] name = "libsqlite3-sys" version = "0.25.2" @@ -4966,6 +5004,12 @@ name = "multimap" version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" + +[[package]] +name = "multimap" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70db9248a93dc36a36d9a47898caa007a32755c7ad140ec64eeeb50d5a730631" dependencies = [ "serde", ] @@ -5032,7 +5076,7 @@ checksum = "57349d5a326b437989b6ee4dc8f2f34b0cc131202748414712a8e7d98952fc8c" dependencies = [ "base64 0.21.0", "bigdecimal", - "bindgen", + "bindgen 0.66.1", "bitflags 2.4.0", "bitvec 1.0.1", "byteorder", @@ -5221,9 +5265,9 @@ dependencies = [ [[package]] name = "num" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43db66d1170d347f9a065114077f7dccb00c1b9478c89384490a3425279a4606" +checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af" dependencies = [ "num-bigint", "num-complex", @@ -5428,11 +5472,11 @@ checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" [[package]] name = "openapiv3" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b1a9f106eb0a780abd17ba9fca8e0843e3461630bcbe2af0ad4d5d3ba4e9aa4" +checksum = "75e56d5c441965b6425165b7e3223cc933ca469834f4a8b4786817a1f9dc4f13" dependencies = [ - "indexmap 1.9.2", + "indexmap 2.0.0", "serde", "serde_json", ] @@ -5695,9 +5739,9 @@ dependencies = [ [[package]] name = "parquet" -version = "42.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baab9c36b1c8300b81b4d577d306a0a733f9d34021363098d3548e37757ed6c8" +checksum = "49f9739b984380582bdb7749ae5b5d28839bce899212cf16465c1ac1f8b65d79" dependencies = [ "ahash 0.8.3", "arrow-array", @@ -6207,7 +6251,7 @@ dependencies = [ "heck", "itertools 0.11.0", "log", - "multimap", + "multimap 0.8.3", "once_cell", "petgraph 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)", "prettyplease", @@ -6570,9 +6614,9 @@ dependencies = [ [[package]] name = "rdkafka" -version = "0.32.2" +version = "0.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8733bc5dc0b192d1a4b28073f9bff1326ad9e4fecd4d9b025d6fc358d1c3e79" +checksum = "053adfa02fab06e86c01d586cc68aa47ee0ff4489a59469081dc12cbcde578bf" dependencies = [ "futures-channel", "futures-util", @@ -6588,9 +6632,9 @@ dependencies = [ [[package]] name = "rdkafka-sys" -version = "4.5.0+1.9.2" +version = "4.6.0+2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bb0676c2112342ac7165decdedbc4e7086c0af384479ccce534546b10687a5d" +checksum = "ad63c279fca41a27c231c450a2d2ad18288032e9cbb159ad16c9d96eba35aaaf" dependencies = [ "libc", "libz-sys", @@ -6836,6 +6880,16 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cbf4a6aa5f6d6888f39e980649f3ad6b666acdce1d78e95b8a2cb076e687ae30" +[[package]] +name = "rocksdb" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb6f170a4041d50a0ce04b0d2e14916d6ca863ea2e422689a5b694395d299ffe" +dependencies = [ + "libc", + "librocksdb-sys", +] + [[package]] name = "rstar" version = "0.11.0" @@ -7160,15 +7214,15 @@ dependencies = [ [[package]] name = "rustyline" -version = "11.0.0" +version = "12.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dfc8644681285d1fb67a467fb3021bfea306b99b4146b166a1fe3ada965eece" +checksum = "994eca4bca05c87e86e15d90fc7a91d1be64b4482b38cb2d27474568fe7c9db9" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.4.0", "cfg-if", "clipboard-win", - "dirs-next", "fd-lock", + "home", "libc", "log", "memchr", @@ -7767,20 +7821,20 @@ dependencies = [ [[package]] name = "sqlparser" version = "0.35.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca597d77c98894be1f965f2e4e2d2a61575d4998088e655476c73715c54b2b43" +source = "git+https://github.com/getdozer/sqlparser-rs.git#3dd4e9f14a9631c9707c40d7e497ffe0558a88cd" dependencies = [ + "bigdecimal", "log", - "sqlparser_derive", ] [[package]] name = "sqlparser" -version = "0.35.0" -source = "git+https://github.com/getdozer/sqlparser-rs.git#3dd4e9f14a9631c9707c40d7e497ffe0558a88cd" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2eaa1e88e78d2c2460d78b7dc3f0c08dbb606ab4222f9aff36f420d36e307d87" dependencies = [ - "bigdecimal", "log", + "sqlparser_derive", ] [[package]] @@ -7980,7 +8034,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb94d2f3cc536af71caac6b6fcebf65860b347e7ce0cc9ebe8f70d3e521054ef" dependencies = [ "cfg-if", - "fastrand 2.0.0", + "fastrand", "redox_syscall 0.3.5", "rustix 0.38.11", "windows-sys 0.48.0", @@ -8503,9 +8557,9 @@ dependencies = [ [[package]] name = "tracing-actix-web" -version = "0.7.2" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4082e4d81173e0b7ad3cfb71e9eaef0dd0cbb7b139fdb56394f488a3b0760b23" +checksum = "5c0b08ce08cbde6a96fc1e4ebb8132053e53ec7a5cd27eef93ede6b73ebbda06" dependencies = [ "actix-web", "pin-project", diff --git a/Cross.toml b/Cross.toml new file mode 100644 index 0000000000..9554703875 --- /dev/null +++ b/Cross.toml @@ -0,0 +1,5 @@ +[target.aarch64-unknown-linux-gnu] +dockerfile = "./ci/Dockerfile.aarch64-unknown-linux-gnu" + +[target.x86_64-unknown-linux-gnu] +dockerfile = "./ci/Dockerfile.x86_64-unknown-linux-gnu" diff --git a/ci/Dockerfile.aarch64-unknown-linux-gnu b/ci/Dockerfile.aarch64-unknown-linux-gnu new file mode 100644 index 0000000000..3d51cfc25b --- /dev/null +++ b/ci/Dockerfile.aarch64-unknown-linux-gnu @@ -0,0 +1,16 @@ +FROM ghcr.io/cross-rs/aarch64-unknown-linux-gnu:main@sha256:b4f5bf74812f9bb6516140d4b83d1f173c2d5ce0523f3e1c2253d99d851c734f + +ENV PKG_CONFIG_ALLOW_CROSS="true" + +RUN dpkg --add-architecture arm64 && \ + apt-get update && \ + apt-get install --assume-yes clang-8 libclang-8-dev binutils-aarch64-linux-gnu zlib1g-dev:arm64 unzip + +# INSTALL PROTOBUF +ENV PROTOBUF_FILE_NAME=protoc-3.18.2-linux-x86_64.zip +RUN curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v3.18.2/${PROTOBUF_FILE_NAME} +ENV PROTOC_DIR=/usr/local/protoc +RUN unzip ${PROTOBUF_FILE_NAME} -d ${PROTOC_DIR} +RUN chmod -R a+xr ${PROTOC_DIR} +ENV PROTOC=${PROTOC_DIR}/bin/protoc +RUN rm ${PROTOBUF_FILE_NAME} diff --git a/ci/Dockerfile.x86_64-unknown-linux-gnu b/ci/Dockerfile.x86_64-unknown-linux-gnu new file mode 100644 index 0000000000..8bc56f4fb0 --- /dev/null +++ b/ci/Dockerfile.x86_64-unknown-linux-gnu @@ -0,0 +1,14 @@ +FROM ghcr.io/cross-rs/x86_64-unknown-linux-gnu:main@sha256:bf0cd3027befe882feb5a2b4040dc6dbdcb799b25c5338342a03163cea43da1b + +RUN apt-get update && \ + apt-get install --assume-yes clang libclang-dev binutils-aarch64-linux-gnu unzip + +# INSTALL PROTOBUF +ENV PROTOBUF_FILE_NAME=protoc-3.18.2-linux-x86_64.zip +RUN curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v3.18.2/${PROTOBUF_FILE_NAME} +RUN unzip ${PROTOBUF_FILE_NAME} -d /usr/local/protoc +ENV PROTOC_DIR=/usr/local/protoc +RUN unzip ${PROTOBUF_FILE_NAME} -d ${PROTOC_DIR} +RUN chmod -R a+xr ${PROTOC_DIR} +ENV PROTOC=${PROTOC_DIR}/bin/protoc +RUN rm ${PROTOBUF_FILE_NAME} diff --git a/ci/README.md b/ci/README.md new file mode 100644 index 0000000000..7d00e3f1eb --- /dev/null +++ b/ci/README.md @@ -0,0 +1,10 @@ +# Cross compilation + +We use [cross](https://github.com/cross-rs/cross) to work around [bug](https://github.com/rust-lang/rust-bindgen/issues/1229) in `bind-gen`. + +To test cross compilation locally: + +```bash +cargo install cross +cross build --target ${target} --bin dozer +``` diff --git a/dozer-api/Cargo.toml b/dozer-api/Cargo.toml index 894eb9c158..d8ef4d1d42 100644 --- a/dozer-api/Cargo.toml +++ b/dozer-api/Cargo.toml @@ -27,7 +27,7 @@ actix-web = { version = "4.2.1", default-features = false, features = [ ] } actix-web-httpauth = "0.8.0" handlebars = "4.3.7" -openapiv3 = "1.0.2" +openapiv3 = "1.0.3" tonic-build = "0.10.0" tokio = { version = "1", features = ["full"] } prost-reflect = { version = "0.12.0", features = ["serde", "text-format"] } @@ -39,7 +39,7 @@ tonic-web = "0.10.0" jsonwebtoken = "8.3.0" tokio-stream = "0.1.12" async-trait = "0.1.73" -tracing-actix-web = "0.7.2" +tracing-actix-web = "0.7.6" tower = "0.4.13" hyper = "0.14.24" tower-http = { version = "0.4", features = ["full"] } diff --git a/dozer-api/src/grpc/client_server.rs b/dozer-api/src/grpc/client_server.rs index 057d98b4ad..66ab37aa5b 100644 --- a/dozer-api/src/grpc/client_server.rs +++ b/dozer-api/src/grpc/client_server.rs @@ -6,6 +6,7 @@ use crate::grpc::auth::AuthService; use crate::grpc::grpc_web_middleware::enable_grpc_web; use crate::grpc::health::HealthService; use crate::grpc::{common, run_server, typed}; +use crate::shutdown::ShutdownReceiver; use crate::{errors::GrpcError, CacheEndpoint}; use dozer_tracing::LabelsAndProgress; use dozer_types::grpc_types::health::health_check_response::ServingStatus; @@ -91,7 +92,7 @@ impl ApiServer { pub async fn run( &self, cache_endpoints: Vec>, - shutdown: impl Future + Send + 'static, + shutdown: ShutdownReceiver, operations_receiver: Option>, labels: LabelsAndProgress, default_max_num_records: usize, diff --git a/dozer-api/src/grpc/internal/internal_pipeline_server.rs b/dozer-api/src/grpc/internal/internal_pipeline_server.rs index aa935d3a67..0f5ddb27f2 100644 --- a/dozer-api/src/grpc/internal/internal_pipeline_server.rs +++ b/dozer-api/src/grpc/internal/internal_pipeline_server.rs @@ -26,6 +26,7 @@ use tokio::sync::Mutex; use crate::errors::GrpcError; use crate::grpc::run_server; +use crate::shutdown::ShutdownReceiver; #[derive(Debug, Clone)] pub struct LogEndpoint { @@ -173,7 +174,7 @@ async fn serialize_log_response(response: LogResponseFuture) -> Result, options: &AppGrpcOptions, - shutdown: impl Future + Send + 'static, + shutdown: ShutdownReceiver, ) -> Result>, GrpcError> { let endpoints = endpoint_and_logs .into_iter() diff --git a/dozer-api/src/grpc/mod.rs b/dozer-api/src/grpc/mod.rs index 49f250ca21..aaa46b9ea7 100644 --- a/dozer-api/src/grpc/mod.rs +++ b/dozer-api/src/grpc/mod.rs @@ -11,23 +11,127 @@ mod shared_impl; pub mod typed; pub mod types_helper; +use std::io; +use std::pin::Pin; +use std::task::{Context, Poll}; + use bytes::Bytes; pub use client_server::ApiServer; use dozer_types::errors::internal::BoxedError; -use dozer_types::tonic::transport::server::{Router, Routes, TcpIncoming}; -use futures_util::{ - stream::{AbortHandle, Abortable, Aborted}, - Future, +use dozer_types::tonic::transport::server::{ + Connected, Router, Routes, TcpConnectInfo, TcpIncoming, }; +use futures_util::Future; +use futures_util::StreamExt; pub use grpc_web_middleware::enable_grpc_web; use http::{Request, Response}; +use hyper::server::conn::AddrStream; use hyper::Body; +use tokio::io::{AsyncRead, AsyncWrite, ReadBuf}; use tower::{Layer, Service}; +use crate::shutdown::ShutdownReceiver; + +#[derive(Debug)] +struct ShutdownAddrStream { + inner: AddrStream, + state: ShutdownState, +} + +#[derive(Debug)] +enum ShutdownState { + SignalPending(F), + ShutdownPending, + Done, +} + +impl + Unpin> ShutdownAddrStream { + fn check_shutdown(&mut self, cx: &mut Context<'_>) -> Result<(), io::Error> { + match &mut self.state { + ShutdownState::SignalPending(signal) => { + if let Poll::Ready(()) = Pin::new(signal).poll(cx) { + self.state = ShutdownState::ShutdownPending; + self.check_shutdown(cx) + } else { + Ok(()) + } + } + ShutdownState::ShutdownPending => match Pin::new(&mut self.inner).poll_shutdown(cx) { + Poll::Ready(Ok(())) => { + self.state = ShutdownState::Done; + Ok(()) + } + Poll::Ready(Err(e)) => Err(e), + Poll::Pending => Ok(()), + }, + ShutdownState::Done => Ok(()), + } + } + + fn poll_impl( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + func: fn(Pin<&mut AddrStream>, &mut Context<'_>) -> Poll>, + ) -> Poll> { + let this = Pin::into_inner(self); + if let Err(e) = this.check_shutdown(cx) { + return Poll::Ready(Err(e)); + } + + func(Pin::new(&mut this.inner), cx) + } +} + +impl + Unpin> AsyncRead for ShutdownAddrStream { + fn poll_read( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &mut ReadBuf<'_>, + ) -> Poll> { + let this = Pin::into_inner(self); + if let Err(e) = this.check_shutdown(cx) { + return Poll::Ready(Err(e)); + } + + Pin::new(&mut this.inner).poll_read(cx, buf) + } +} + +impl + Unpin> AsyncWrite for ShutdownAddrStream { + fn poll_write( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &[u8], + ) -> Poll> { + let this = Pin::into_inner(self); + if let Err(e) = this.check_shutdown(cx) { + return Poll::Ready(Err(e)); + } + + Pin::new(&mut this.inner).poll_write(cx, buf) + } + + fn poll_flush(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + self.poll_impl(cx, AsyncWrite::poll_flush) + } + + fn poll_shutdown(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + self.poll_impl(cx, AsyncWrite::poll_shutdown) + } +} + +impl Connected for ShutdownAddrStream { + type ConnectInfo = TcpConnectInfo; + + fn connect_info(&self) -> Self::ConnectInfo { + self.inner.connect_info() + } +} + async fn run_server( server: Router, incoming: TcpIncoming, - shutdown: impl Future + Send + 'static, + shutdown: ShutdownReceiver, ) -> Result<(), dozer_types::tonic::transport::Error> where L: Layer, @@ -37,16 +141,17 @@ where ResBody: http_body::Body + Send + 'static, ResBody::Error: Into, { - // Tonic graceful shutdown doesn't allow us to set a timeout, resulting in hanging if a client doesn't close the connection. - // So we just abort the server when the shutdown signal is received. - let (abort_handle, abort_registration) = AbortHandle::new_pair(); - tokio::spawn(async move { - shutdown.await; - abort_handle.abort(); + let incoming = incoming.map(|stream| { + stream.map(|stream| { + let shutdown = shutdown.create_shutdown_future(); + ShutdownAddrStream { + inner: stream, + state: ShutdownState::SignalPending(Box::pin(shutdown)), + } + }) }); - match Abortable::new(server.serve_with_incoming(incoming), abort_registration).await { - Ok(result) => result, - Err(Aborted) => Ok(()), - } + server + .serve_with_incoming_shutdown(incoming, shutdown.create_shutdown_future()) + .await } diff --git a/dozer-api/src/lib.rs b/dozer-api/src/lib.rs index 76535c9a5d..fe984597dd 100644 --- a/dozer-api/src/lib.rs +++ b/dozer-api/src/lib.rs @@ -181,6 +181,7 @@ pub use actix_web_httpauth; pub use api_helper::API_LATENCY_HISTOGRAM_NAME; pub use api_helper::API_REQUEST_COUNTER_NAME; pub use async_trait; +pub mod shutdown; pub use dozer_types::tonic; use errors::ApiInitError; pub use openapiv3; diff --git a/dozer-cli/src/shutdown.rs b/dozer-api/src/shutdown.rs similarity index 97% rename from dozer-cli/src/shutdown.rs rename to dozer-api/src/shutdown.rs index d65b1d1cdf..dae29f7883 100644 --- a/dozer-cli/src/shutdown.rs +++ b/dozer-api/src/shutdown.rs @@ -3,7 +3,7 @@ use std::sync::{ Arc, }; -use futures::Future; +use futures_util::Future; use tokio::{ runtime::Runtime, sync::watch::{channel, Receiver, Sender}, diff --git a/dozer-cache/Cargo.toml b/dozer-cache/Cargo.toml index c4500e4351..6cf2211ec9 100644 --- a/dozer-cache/Cargo.toml +++ b/dozer-cache/Cargo.toml @@ -14,7 +14,7 @@ dozer-tracing = { path = "../dozer-tracing" } tokio = { version = "1", features = ["macros", "net", "rt-multi-thread"] } tempdir = "0.3.7" -futures = "0.3.26" +futures = "0.3.28" unicode-segmentation = "1.10.1" itertools = "0.10.5" roaring = "0.10.1" diff --git a/dozer-cli/Cargo.toml b/dozer-cli/Cargo.toml index c2f780eb31..326c5b001c 100644 --- a/dozer-cli/Cargo.toml +++ b/dozer-cli/Cargo.toml @@ -29,9 +29,9 @@ tonic = { version = "0.10.0", features = ["tls", "tls-roots"] } tokio-stream = "0.1.12" include_dir = "0.7.3" handlebars = "4.3.7" -rustyline = "11.0.0" +rustyline = "12.0.0" rustyline-derive = "0.9.0" -futures = "0.3.26" +futures = "0.3.28" page_size = "0.5.0" reqwest = { version = "0.11.20", features = [ "rustls-tls", diff --git a/dozer-cli/src/cli/cloud.rs b/dozer-cli/src/cli/cloud.rs index 8d0e020cec..a9bce50e12 100644 --- a/dozer-cli/src/cli/cloud.rs +++ b/dozer-cli/src/cli/cloud.rs @@ -93,9 +93,9 @@ pub struct LogCommandArgs { #[arg(short, long)] pub follow: bool, - /// The deployment to inspect + /// The version to inspect #[arg(short, long)] - pub deployment: Option, + pub version: Option, /// Ignore app logs #[arg(long, default_value = "false", action=ArgAction::SetTrue)] @@ -131,11 +131,6 @@ pub struct ListCommandArgs { #[derive(Debug, Clone, Subcommand)] pub enum VersionCommand { - /// Inspects the status of a version, compared to the current version if it's not current. - Status { - /// The version to inspect - version: u32, - }, /// Sets a version as the "current" version of the application /// /// Current version of an application can be visited without the "/v" prefix. @@ -143,6 +138,11 @@ pub enum VersionCommand { /// The version to set as current version: u32, }, + /// Creates or updates an alias to point at the given version + Alias { alias: String, version: u32 }, + /// Remove alias + #[command(name = "rm-alias", visible_alias = "rma")] + RmAlias { alias: String }, } #[derive(Debug, Clone, Subcommand)] diff --git a/dozer-cli/src/lib.rs b/dozer-cli/src/lib.rs index 8ed645d374..dfeb6a556d 100644 --- a/dozer-cli/src/lib.rs +++ b/dozer-cli/src/lib.rs @@ -2,13 +2,12 @@ pub mod cli; pub mod errors; pub mod live; pub mod pipeline; -pub mod shutdown; pub mod simple; +use dozer_api::shutdown::ShutdownSender; use dozer_core::{app::AppPipeline, errors::ExecutionError}; use dozer_sql::{builder::statement_to_pipeline, errors::PipelineError}; use dozer_types::log::debug; use errors::OrchestrationError; -use shutdown::ShutdownSender; use std::{ backtrace::{Backtrace, BacktraceStatus}, panic, process, @@ -68,6 +67,7 @@ pub fn wrapped_statement_to_pipeline(sql: &str) -> Result, diff --git a/dozer-cli/src/pipeline/source_builder.rs b/dozer-cli/src/pipeline/source_builder.rs index 03bc6d4d45..2d54f23e62 100644 --- a/dozer-cli/src/pipeline/source_builder.rs +++ b/dozer-cli/src/pipeline/source_builder.rs @@ -1,6 +1,6 @@ use crate::pipeline::connector_source::ConnectorSourceFactory; -use crate::shutdown::ShutdownReceiver; use crate::OrchestrationError; +use dozer_api::shutdown::ShutdownReceiver; use dozer_core::appsource::{AppSourceManager, AppSourceMappings}; use dozer_ingestion::connectors::TableInfo; diff --git a/dozer-cli/src/pipeline/tests/builder.rs b/dozer-cli/src/pipeline/tests/builder.rs index 46c050aebc..3e4e4bfb68 100644 --- a/dozer-cli/src/pipeline/tests/builder.rs +++ b/dozer-cli/src/pipeline/tests/builder.rs @@ -2,6 +2,7 @@ use std::sync::Arc; use crate::pipeline::source_builder::SourceBuilder; use crate::pipeline::PipelineBuilder; +use dozer_api::shutdown; use dozer_types::ingestion_types::{GrpcConfig, GrpcConfigSchemas}; use dozer_types::models::config::Config; @@ -83,7 +84,7 @@ fn load_multi_sources() { .unwrap(); let source_builder = SourceBuilder::new(grouped_connections, Default::default()); - let (_sender, shutdown_receiver) = crate::shutdown::new(&runtime); + let (_sender, shutdown_receiver) = shutdown::new(&runtime); let asm = runtime .block_on(source_builder.build_source_manager(&runtime, shutdown_receiver)) .unwrap(); diff --git a/dozer-cli/src/simple/cloud/mod.rs b/dozer-cli/src/simple/cloud/mod.rs index b76b2213a2..c8ea2673dc 100644 --- a/dozer-cli/src/simple/cloud/mod.rs +++ b/dozer-cli/src/simple/cloud/mod.rs @@ -2,4 +2,3 @@ pub mod deployer; pub mod login; pub mod monitor; pub mod progress_printer; -pub mod version; diff --git a/dozer-cli/src/simple/cloud/version.rs b/dozer-cli/src/simple/cloud/version.rs deleted file mode 100644 index 913380c3aa..0000000000 --- a/dozer-cli/src/simple/cloud/version.rs +++ /dev/null @@ -1,160 +0,0 @@ -use std::collections::HashMap; - -use dozer_api::rest::DOZER_SERVER_NAME_HEADER; -use dozer_cache::Phase; -use dozer_types::{ - prettytable::{row, table}, - serde_json, -}; - -use crate::errors::CloudError; - -#[derive(Debug)] -pub struct PathStatus { - count: usize, - phase: Phase, -} - -#[derive(Debug, Default)] -pub struct ServerStatus { - paths: HashMap, -} - -#[derive(Debug)] -pub struct VersionStatus { - servers: Vec, -} - -pub async fn get_version_status( - endpoint: &str, - version: u32, - api_available: i32, -) -> Result { - let (clients, paths) = probe_dozer_servers(endpoint, version, api_available).await?; - - let mut servers = vec![]; - for client in clients { - let mut server_status = ServerStatus::default(); - for path in &paths { - let count = client - .post(format!("{}/v{}{}/count", endpoint, version, path)) - .send() - .await? - .error_for_status()?; - let count = count.json::().await?; - - let phase = client - .post(format!("{}/v{}{}/phase", endpoint, version, path)) - .send() - .await? - .error_for_status()?; - let phase = phase.json::().await?; - - server_status - .paths - .insert(path.clone(), PathStatus { count, phase }); - } - servers.push(server_status) - } - - Ok(VersionStatus { servers }) -} - -/// Probe the servers to get a list of clients with sticky session cookie, and all the paths available. -async fn probe_dozer_servers( - endpoint: &str, - version: u32, - count_hint: i32, -) -> Result<(Vec, Vec), CloudError> { - let mut clients = HashMap::, reqwest::Client>::default(); - let mut paths = vec![]; - - // Try to visit the version endpoint many times to cover all the servers. - for _ in 0..count_hint * 5 { - let client = reqwest::Client::builder().cookie_store(true).build()?; - let response = client - .get(format!("{}/v{}/", endpoint, version)) - .send() - .await? - .error_for_status()?; - let server_name = response - .headers() - .get(DOZER_SERVER_NAME_HEADER) - .ok_or(CloudError::MissingResponseHeader)? - .as_bytes(); - - if clients.contains_key(server_name) { - continue; - } - clients.insert(server_name.to_vec(), client); - - if paths.is_empty() { - paths = response.json::>().await?; - } - } - - Ok((clients.into_values().collect(), paths)) -} - -pub fn version_status_table(status: &Result) -> String { - if let Ok(status) = status { - let mut table = table!(); - for server in &status.servers { - let mut server_table = table!(); - for (path, PathStatus { count, phase }) in &server.paths { - let phase = serde_json::to_string(phase).expect("Should always succeed"); - server_table.add_row(row![path, count, phase]); - } - table.add_row(row![server_table]); - } - table.to_string() - } else { - "Unavailable".to_string() - } -} - -pub fn version_is_up_to_date( - status: &Result, - current_status: &Result, -) -> bool { - if let Ok(status) = status { - if let Ok(current_status) = current_status { - version_is_up_to_date_impl(status, current_status) - } else { - true - } - } else { - false - } -} - -/// We say a version is up to date if any server of this version are up to date with any server of current version. -fn version_is_up_to_date_impl(status: &VersionStatus, current_status: &VersionStatus) -> bool { - for server in &status.servers { - for current_server in ¤t_status.servers { - if server_is_up_to_date(server, current_server) { - return true; - } - } - } - true -} - -/// We say a server is up to date if all paths are up to date. -fn server_is_up_to_date(status: &ServerStatus, current_status: &ServerStatus) -> bool { - for (path, status) in &status.paths { - if !path_is_up_to_date(status, current_status.paths.get(path)) { - return false; - } - } - true -} - -fn path_is_up_to_date(status: &PathStatus, current_status: Option<&PathStatus>) -> bool { - if let Some(current_status) = current_status { - status.phase == Phase::Streaming - || (current_status.phase == Phase::Snapshotting && status.count >= current_status.count) - } else { - true - } -} diff --git a/dozer-cli/src/simple/cloud_orchestrator.rs b/dozer-cli/src/simple/cloud_orchestrator.rs index 77b8a5fa0d..55f6254899 100644 --- a/dozer-cli/src/simple/cloud_orchestrator.rs +++ b/dozer-cli/src/simple/cloud_orchestrator.rs @@ -20,12 +20,12 @@ use dozer_types::grpc_types::api_explorer::api_explorer_service_client::ApiExplo use dozer_types::grpc_types::api_explorer::GetApiTokenRequest; use dozer_types::grpc_types::cloud::{ dozer_cloud_client::DozerCloudClient, CreateSecretRequest, DeleteAppRequest, - DeleteSecretRequest, GetEndpointCommandsSamplesRequest, GetSecretRequest, GetStatusRequest, - ListAppRequest, ListSecretsRequest, LogMessageRequest, UpdateSecretRequest, + DeleteSecretRequest, GetEndpointCommandsSamplesRequest, GetSecretRequest, ListAppRequest, + ListSecretsRequest, LogMessageRequest, UpdateSecretRequest, }; use dozer_types::grpc_types::cloud::{ - CreateAppRequest, DeploymentInfo, DeploymentStatusWithHealth, File, ListDeploymentRequest, - SetCurrentVersionRequest, + CreateAppRequest, DeploymentInfo, DeploymentStatus, File, GetAppRequest, ListDeploymentRequest, + RmAliasRequest, SetAliasRequest, SetCurrentVersionRequest, }; use dozer_types::log::info; use dozer_types::prettytable::{row, table}; @@ -35,7 +35,6 @@ use tonic::transport::Endpoint; use tower::ServiceBuilder; use super::cloud::login::LoginSvc; -use super::cloud::version::{get_version_status, version_is_up_to_date, version_status_table}; async fn establish_cloud_service_channel( cloud: &Cloud, cloud_config: &dozer_types::models::cloud::Cloud, @@ -230,63 +229,40 @@ impl CloudOrchestrator for SimpleOrchestrator { self.runtime.block_on(async move { let mut client = get_cloud_client(&cloud, cloud_config).await?; let response = client - .get_status(GetStatusRequest { app_id }) + .get_application(GetAppRequest { app_id }) .await .map_err(map_tonic_error)? .into_inner(); let mut table = table!(); + table.set_titles(row!["Deployment", "Version", "Status"]); - table.add_row(row!["Api endpoint", response.data_endpoint,]); - - let mut deployment_table = table!(); - deployment_table.set_titles(row![ - "Deployment", - "App", - "Api", - "Version", - "Phase", - "Error" - ]); - - for status in response.deployments.iter() { - let deployment = status.deployment.as_ref().expect("deployment is expected"); - fn mark(status: bool) -> &'static str { - if status { - "🟢" - } else { - "🟠" - } - } - - let mut version = "".to_string(); - for (loop_version, loop_deployment) in response.versions.iter() { - if loop_deployment == &deployment.deployment { - if Some(*loop_version) == response.current_version { - version = format!("v{loop_version} (current)"); - } else { - version = format!("v{loop_version}"); + for deployment in response.deployments.iter() { + fn deployment_status(status: i32) -> &'static str { + match status { + _ if status == DeploymentStatus::Pending as i32 => { + DeploymentStatus::Pending.as_str_name() } - break; + _ if status == DeploymentStatus::Running as i32 => { + DeploymentStatus::Running.as_str_name() + } + _ if status == DeploymentStatus::Success as i32 => { + DeploymentStatus::Success.as_str_name() + } + _ if status == DeploymentStatus::Failed as i32 => { + DeploymentStatus::Failed.as_str_name() + } + _ => "UNRECOGNIZED", } } - deployment_table.add_row(row![ - deployment.deployment, - format!("Deployment Status: {:?}", deployment.status), - format!("Version: {}", version), + table.add_row(row![ + deployment.deployment_id, + deployment.version, + deployment_status(deployment.status), ]); - for r in status.resources.iter() { - deployment_table.add_row(row![ - "", - format!("{}: {}", r.typ, mark(r.available == r.desired)), - format!("{}/{}", r.available.unwrap_or(0), r.desired.unwrap_or(0)), - ]); - } } - table.add_row(row!["Deployments", deployment_table]); - table.printstd(); Ok::<(), CloudError>(()) })?; @@ -316,18 +292,15 @@ impl CloudOrchestrator for SimpleOrchestrator { .map_err(map_tonic_error)? .into_inner(); - // Show log of the latest deployment for now. - let Some(deployment) = logs - .deployment - .or_else(|| latest_deployment(&res.deployments)) - else { - info!("No deployments found"); + // Show log of the latest version for now. + let Some(version) = logs.version.or_else(|| latest_version(&res.deployments)) else { + info!("No active version found"); return Ok(()); }; let mut response = client .on_log_message(LogMessageRequest { app_id, - deployment, + version, follow: logs.follow, include_build: !logs.ignore_build, include_app: !logs.ignore_app, @@ -526,67 +499,17 @@ impl SimpleOrchestrator { .set_current_version(SetCurrentVersionRequest { app_id, version }) .await?; } - VersionCommand::Status { version } => { - let status = client - .get_status(GetStatusRequest { app_id }) - .await - .map_err(map_tonic_error)? - .into_inner(); - let Some(deployment) = status.versions.get(&version) else { - info!("Version {} does not exist", version); - return Ok(()); - }; - let api_available = get_api_available(&status.deployments, *deployment); - - let version_status = - get_version_status(&status.data_endpoint, version, api_available).await; - let mut table = table!(); - - if let Some(current_version) = status.current_version { - if current_version != version { - let current_api_available = get_api_available( - &status.deployments, - status.versions[¤t_version], - ); - - table.add_row(row![ - format!("v{version}"), - version_status_table(&version_status) - ]); - - let current_version_status = get_version_status( - &status.data_endpoint, - current_version, - current_api_available, - ) - .await; - table.add_row(row![ - format!("v{current_version} (current)"), - version_status_table(¤t_version_status) - ]); - - table.printstd(); - - if version_is_up_to_date(&version_status, ¤t_version_status) { - info!("Version {} is up to date", version); - } else { - info!("Version {} is not up to date", version); - } - } else { - table.add_row(row![ - format!("v{version} (current)"), - version_status_table(&version_status) - ]); - table.printstd(); - } - } else { - table.add_row(row![ - format!("v{version}"), - version_status_table(&version_status) - ]); - table.printstd(); - info!("No current version"); - }; + VersionCommand::Alias { alias, version } => { + client + .set_alias(SetAliasRequest { + app_id, + version, + alias, + }) + .await?; + } + VersionCommand::RmAlias { alias } => { + client.rm_alias(RmAliasRequest { app_id, alias }).await?; } } @@ -655,27 +578,6 @@ impl SimpleOrchestrator { } } -fn latest_deployment(deployments: &[DeploymentInfo]) -> Option { - deployments.iter().map(|status| status.deployment).max() -} - -fn get_api_available(deployments: &[DeploymentStatusWithHealth], deployment: u32) -> i32 { - let info = deployments - .iter() - .find(|status| { - status - .deployment - .as_ref() - .expect("deployment is expected") - .deployment - == deployment - }) - .expect("Deployment should be found in deployments"); - - info.resources - .clone() - .into_iter() - .find(|r| r.typ == "api") - .and_then(|r| r.available) - .unwrap_or(1) +fn latest_version(deployments: &[DeploymentInfo]) -> Option { + deployments.iter().map(|status| status.version).max() } diff --git a/dozer-cli/src/simple/executor.rs b/dozer-cli/src/simple/executor.rs index 05edb37231..b3158afb7e 100644 --- a/dozer-cli/src/simple/executor.rs +++ b/dozer-cli/src/simple/executor.rs @@ -1,11 +1,11 @@ use dozer_api::grpc::internal::internal_pipeline_server::LogEndpoint; +use dozer_api::shutdown::ShutdownReceiver; use dozer_cache::dozer_log::camino::Utf8Path; use dozer_cache::dozer_log::home_dir::{BuildPath, HomeDir}; use dozer_cache::dozer_log::replication::Log; -use dozer_core::checkpoint::OptionCheckpoint; +use dozer_core::checkpoint::{CheckpointOptions, OptionCheckpoint}; use dozer_tracing::LabelsAndProgress; use dozer_types::models::api_endpoint::ApiEndpoint; -use dozer_types::models::app_config::DataStorage; use dozer_types::models::flags::Flags; use tokio::runtime::Runtime; use tokio::sync::Mutex; @@ -16,7 +16,6 @@ use dozer_types::models::source::Source; use dozer_types::models::udf_config::UdfConfig; use crate::pipeline::PipelineBuilder; -use crate::shutdown::ShutdownReceiver; use dozer_core::executor::{DagExecutor, ExecutorOptions}; use dozer_types::models::connection::Connection; @@ -47,7 +46,7 @@ impl<'a> Executor<'a> { sources: &'a [Source], sql: Option<&'a str>, api_endpoints: &'a [ApiEndpoint], - storage_config: DataStorage, + checkpoint_options: CheckpointOptions, labels: LabelsAndProgress, udfs: &'a [UdfConfig], ) -> Result, OrchestrationError> { @@ -59,7 +58,7 @@ impl<'a> Executor<'a> { // Load pipeline checkpoint. let checkpoint = - OptionCheckpoint::new(build_path.data_dir.to_string(), storage_config).await?; + OptionCheckpoint::new(build_path.data_dir.to_string(), checkpoint_options).await?; let mut endpoint_and_logs = vec![]; for endpoint in api_endpoints { diff --git a/dozer-cli/src/simple/orchestrator.rs b/dozer-cli/src/simple/orchestrator.rs index 0f51a8b340..8b969a2bfb 100644 --- a/dozer-cli/src/simple/orchestrator.rs +++ b/dozer-cli/src/simple/orchestrator.rs @@ -2,14 +2,17 @@ use super::executor::{run_dag_executor, Executor}; use super::Contract; use crate::errors::OrchestrationError; use crate::pipeline::PipelineBuilder; -use crate::shutdown::ShutdownReceiver; use crate::simple::build; use crate::simple::helper::validate_config; -use crate::utils::{get_cache_manager_options, get_default_max_num_records, get_executor_options}; +use crate::utils::{ + get_cache_manager_options, get_checkpoint_options, get_default_max_num_records, + get_executor_options, +}; use crate::{flatten_join_handle, join_handle_map_err}; use dozer_api::auth::{Access, Authorizer}; use dozer_api::grpc::internal::internal_pipeline_server::start_internal_pipeline_server; +use dozer_api::shutdown::ShutdownReceiver; use dozer_api::{get_api_security, grpc, rest, CacheEndpoint}; use dozer_cache::cache::LmdbRwCacheManager; use dozer_cache::dozer_log::camino::Utf8PathBuf; @@ -157,11 +160,10 @@ impl SimpleOrchestrator { let grpc_handle = if grpc_config.enabled.unwrap_or(true) { let api_security = self.config.api.api_security.clone(); let grpc_server = grpc::ApiServer::new(grpc_config, api_security, flags); - let shutdown = shutdown.create_shutdown_future(); let grpc_server = grpc_server .run( cache_endpoints, - shutdown, + shutdown.clone(), operations_receiver, self.labels.clone(), default_max_num_records, @@ -226,7 +228,7 @@ impl SimpleOrchestrator { &self.config.sources, self.config.sql.as_deref(), &self.config.endpoints, - self.config.app.data_storage.clone(), + get_checkpoint_options(&self.config), self.labels.clone(), &self.config.udfs, ))?; @@ -244,7 +246,7 @@ impl SimpleOrchestrator { .block_on(start_internal_pipeline_server( endpoint_and_logs, app_grpc_config, - shutdown.create_shutdown_future(), + shutdown.clone(), )) .map_err(OrchestrationError::InternalServerFailed)?; diff --git a/dozer-cli/src/utils.rs b/dozer-cli/src/utils.rs index 6ec59d1b0a..60f5faef8f 100644 --- a/dozer-cli/src/utils.rs +++ b/dozer-cli/src/utils.rs @@ -1,6 +1,8 @@ use dozer_cache::cache::CacheManagerOptions; use dozer_core::{ - checkpoint::CheckpointFactoryOptions, epoch::EpochManagerOptions, executor::ExecutorOptions, + checkpoint::{CheckpointFactoryOptions, CheckpointOptions}, + epoch::EpochManagerOptions, + executor::ExecutorOptions, }; use dozer_types::{ constants::DEFAULT_DEFAULT_MAX_NUM_RECORDS, @@ -62,6 +64,14 @@ fn get_max_interval_before_persist_in_seconds(config: &Config) -> u64 { .unwrap_or_else(default_max_interval_before_persist_in_seconds) } +pub fn get_checkpoint_options(config: &Config) -> CheckpointOptions { + let app = &config.app; + CheckpointOptions { + data_storage: app.data_storage.clone(), + record_store: app.record_store, + } +} + fn get_checkpoint_factory_options(config: &Config) -> CheckpointFactoryOptions { CheckpointFactoryOptions { persist_queue_capacity: config diff --git a/dozer-core/src/checkpoint/mod.rs b/dozer-core/src/checkpoint/mod.rs index d0631c4147..aff19563fd 100644 --- a/dozer-core/src/checkpoint/mod.rs +++ b/dozer-core/src/checkpoint/mod.rs @@ -10,7 +10,7 @@ use dozer_recordstore::{ProcessorRecordStore, ProcessorRecordStoreDeserializer, use dozer_types::{ bincode, log::{error, info}, - models::app_config::DataStorage, + models::app_config::{DataStorage, RecordStore}, node::{NodeHandle, OpIdentifier, SourceStates, TableState}, parking_lot::Mutex, serde::{Deserialize, Serialize}, @@ -58,14 +58,21 @@ pub struct OptionCheckpoint { checkpoint: Option, } +#[derive(Debug, Clone, Default)] +pub struct CheckpointOptions { + pub data_storage: DataStorage, + pub record_store: RecordStore, +} + impl OptionCheckpoint { pub async fn new( checkpoint_dir: String, - storage_config: DataStorage, + options: CheckpointOptions, ) -> Result { let (storage, prefix) = - create_data_storage(storage_config, checkpoint_dir.to_string()).await?; - let (record_store, checkpoint) = read_record_store_slices(&*storage, &prefix).await?; + create_data_storage(options.data_storage, checkpoint_dir.to_string()).await?; + let (record_store, checkpoint) = + read_record_store_slices(&*storage, &prefix, options.record_store).await?; if let Some(checkpoint) = &checkpoint { info!( "Restored record store from {}th checkpoint, last epoch id is {}, processor states are stored in {}", @@ -289,8 +296,9 @@ impl Drop for CheckpointWriter { async fn read_record_store_slices( storage: &dyn Storage, factory_prefix: &str, + record_store: RecordStore, ) -> Result<(ProcessorRecordStoreDeserializer, Option), ExecutionError> { - let record_store = ProcessorRecordStoreDeserializer::new()?; + let record_store = ProcessorRecordStoreDeserializer::new(record_store)?; let record_store_prefix = record_store_prefix(factory_prefix); let mut last_checkpoint: Option = None; @@ -358,7 +366,7 @@ async fn read_record_store_slices( pub async fn create_checkpoint_for_test() -> (TempDir, OptionCheckpoint) { let temp_dir = TempDir::new("create_checkpoint_for_test").unwrap(); let checkpoint_dir = temp_dir.path().to_str().unwrap().to_string(); - let checkpoint = OptionCheckpoint::new(checkpoint_dir.clone(), DataStorage::Local) + let checkpoint = OptionCheckpoint::new(checkpoint_dir.clone(), Default::default()) .await .unwrap(); (temp_dir, checkpoint) @@ -371,7 +379,7 @@ pub async fn create_checkpoint_factory_for_test( // Create empty checkpoint storage. let temp_dir = TempDir::new("create_checkpoint_factory_for_test").unwrap(); let checkpoint_dir = temp_dir.path().to_str().unwrap().to_string(); - let checkpoint = OptionCheckpoint::new(checkpoint_dir.clone(), DataStorage::Local) + let checkpoint = OptionCheckpoint::new(checkpoint_dir.clone(), Default::default()) .await .unwrap(); let (checkpoint_factory, handle) = CheckpointFactory::new(checkpoint, Default::default()) @@ -404,7 +412,7 @@ pub async fn create_checkpoint_factory_for_test( handle.await.unwrap(); // Create a new factory that loads from the checkpoint. - let checkpoint = OptionCheckpoint::new(checkpoint_dir, DataStorage::Local) + let checkpoint = OptionCheckpoint::new(checkpoint_dir, Default::default()) .await .unwrap(); let last_checkpoint = checkpoint.checkpoint.as_ref().unwrap(); diff --git a/dozer-core/src/executor/receiver_loop.rs b/dozer-core/src/executor/receiver_loop.rs index 2ab38b64be..cb4cd7c583 100644 --- a/dozer-core/src/executor/receiver_loop.rs +++ b/dozer-core/src/executor/receiver_loop.rs @@ -202,7 +202,7 @@ mod tests { #[test] fn receiver_loop_forwards_op() { let (mut test_loop, senders) = TestReceiverLoop::new(2); - let record_store = ProcessorRecordStore::new().unwrap(); + let record_store = ProcessorRecordStore::new(Default::default()).unwrap(); let record: ProcessorRecord = record_store .create_record(&Record::new(vec![Field::Int(1)])) .unwrap(); diff --git a/dozer-ingestion/Cargo.toml b/dozer-ingestion/Cargo.toml index 5864095470..9caf1b1653 100644 --- a/dozer-ingestion/Cargo.toml +++ b/dozer-ingestion/Cargo.toml @@ -12,7 +12,7 @@ dozer-types = { path = "../dozer-types" } dozer-log = { path = "../dozer-log" } tokio = { version = "1", features = ["full"] } -futures = "0.3.26" +futures = "0.3.28" # Postgres connector postgres-protocol = "0.6.4" postgres-types = { version = "0.2.4", features = [ @@ -25,11 +25,11 @@ tokio-postgres = { version = "0.7.7", features = [ "with-uuid-1", ] } # DataFusion connector -object_store = { version = "0.6", features = ["aws"] } +object_store = { version = "0.6.1", features = ["aws"] } # Eth connector web3 = { version = "0.18.0", optional = true } # Kafka connector -rdkafka = { version = "0.32.2", optional = true } +rdkafka = { version = "0.34.0", optional = true } # odbc connector odbc = { version = "0.17.0", optional = true } # Mongodb connector @@ -44,7 +44,7 @@ tonic-web = "0.10.0" tonic-reflection = "0.10.0" tower-http = { version = "0.4", features = ["full"] } prost-reflect = { version = "0.12.0", features = ["serde", "text-format"] } -deltalake = { version = "0.13.0", default-features = false, features = [ +deltalake = { version = "0.15.0", default-features = false, features = [ "s3", "datafusion", ] } @@ -77,7 +77,7 @@ rand = "0.8.5" hex-literal = "0.4.1" dozer-tracing = { path = "../dozer-tracing" } tempdir = "0.3.7" -parquet = "42.0.0" +parquet = "45.0.0" env_logger = "0.10.0" hex = "0.4.3" dozer-cli = { path = "../dozer-cli" } diff --git a/dozer-log/Cargo.toml b/dozer-log/Cargo.toml index 3ba6b335c8..0e1aa5375a 100644 --- a/dozer-log/Cargo.toml +++ b/dozer-log/Cargo.toml @@ -7,13 +7,13 @@ edition = "2021" [dependencies] -aws-config = "0.55.3" -aws-sdk-s3 = "0.28.0" -aws-smithy-http = "0.55.3" -aws-smithy-types = "0.55.3" +aws-config = "0.56.1" +aws-sdk-s3 = "0.31.2" +aws-smithy-http = "0.56.1" +aws-smithy-types = "0.56.1" camino = "1.1.4" -dozer-types = {path = "../dozer-types"} -dyn-clone = "1.0.11" +dozer-types = { path = "../dozer-types" } +dyn-clone = "1.0.14" futures-util = "0.3.28" nonzero_ext = "0.3.0" pin-project = "1.1.2" diff --git a/dozer-log/src/reader.rs b/dozer-log/src/reader.rs index 627c529b27..dff4965a9a 100644 --- a/dozer-log/src/reader.rs +++ b/dozer-log/src/reader.rs @@ -16,6 +16,7 @@ use dozer_types::models::api_endpoint::{ use dozer_types::tonic::transport::Channel; use dozer_types::tonic::Streaming; use dozer_types::{bincode, serde_json}; +use tokio::select; use tokio::sync::mpsc::{Receiver, Sender}; use tokio::task::JoinHandle; use tokio_stream::wrappers::ReceiverStream; @@ -265,10 +266,27 @@ async fn call_get_log_once( } async fn log_reader_worker( + log_client: LogClient, + pos: u64, + options: LogReaderOptions, + op_sender: Sender, +) -> Result<(), ReaderError> { + select! { + _ = op_sender.closed() => { + debug!("Log reader thread quit because LogReader was dropped"); + Ok(()) + } + result = log_reader_worker_loop(log_client, pos, options, &op_sender) => { + result + } + } +} + +async fn log_reader_worker_loop( mut log_client: LogClient, mut pos: u64, options: LogReaderOptions, - op_sender: Sender, + op_sender: &Sender, ) -> Result<(), ReaderError> { loop { // Request ops. diff --git a/dozer-log/src/storage/s3.rs b/dozer-log/src/storage/s3.rs index b13b3ca5c6..d24564694b 100644 --- a/dozer-log/src/storage/s3.rs +++ b/dozer-log/src/storage/s3.rs @@ -228,7 +228,7 @@ impl Storage for S3Storage { } } -fn is_bucket_already_owned_by_you(error: &SdkError) -> bool { +fn is_bucket_already_owned_by_you(error: &SdkError) -> bool { if let SdkError::ServiceError(error) = error { error.err().is_bucket_already_owned_by_you() } else { diff --git a/dozer-recordstore/Cargo.toml b/dozer-recordstore/Cargo.toml index 7aec7fa7e2..a66b543d54 100644 --- a/dozer-recordstore/Cargo.toml +++ b/dozer-recordstore/Cargo.toml @@ -10,4 +10,6 @@ fuzz = ["dozer-types/arbitrary"] [dependencies] dozer-types = { path = "../dozer-types" } +dozer-storage = { path = "../dozer-storage" } slice-dst = { version = "1.5.1", default-features = false } +tempdir = "0.3.7" diff --git a/dozer-recordstore/src/in_memory/mod.rs b/dozer-recordstore/src/in_memory/mod.rs new file mode 100644 index 0000000000..a3866294cf --- /dev/null +++ b/dozer-recordstore/src/in_memory/mod.rs @@ -0,0 +1,496 @@ +//! [`RecordRef`] is a compact representation of a collection of [dozer_types::types::Field]s +//! There are two principles that make this representation more compact than `[Field]`: +//! 1. The fields and their types are stored as a Struct of Arrays instead of +//! and Array of Structs. This makes it possible to pack the discriminants +//! for the field types as a byte per field, instead of taking up a full word, +//! which is the case in [Field] (because the variant value must be aligned) +//! 2. The field values are stored packed. In a `[Field]` representation, each +//! field takes as much space as the largest enum variant in [Field] (plus its discriminant, +//! see (1.)). Instead, for the compact representation, we pack the values into +//! align_of::() sized slots. This way, a u64 takes only 8 bytes, whereas +//! a u128 can still use its 16 bytes. +use std::alloc::{dealloc, handle_alloc_error, Layout}; +use std::sync::Arc; +use std::{hash::Hash, ptr::NonNull}; + +use slice_dst::SliceWithHeader; + +use dozer_types::chrono::{DateTime, FixedOffset, NaiveDate}; +use dozer_types::json_types::JsonValue; +use dozer_types::ordered_float::OrderedFloat; +use dozer_types::rust_decimal::Decimal; +use dozer_types::types::{DozerDuration, DozerPoint}; +use dozer_types::{ + serde::{Deserialize, Serialize}, + types::{Field, FieldType}, +}; + +// The alignment of an enum is necessarily the maximum alignment of its variants +// (otherwise it would be unsound to read from it). +// So, by using the alignment of `Field` as the alignment of the values in our +// packed `RecordRef`, we ensure that all accesses are aligned. +// This wastes a little bit of memory for subsequent fields that have +// smaller minimum alignment and size (such as `bool`, which has size=1, align=1), +// but in practice this should be negligible compared to the added effort of +// packing these fields while keeping everything aligned. +const MAX_ALIGN: usize = std::mem::align_of::(); + +#[repr(transparent)] +#[derive(Debug)] +/// `repr(transparent)` inner struct so we can implement drop logic on it +/// This is a `slice_dst` `SliceWithHeader` so we can make a fat Arc, saving a level +/// of indirection and a pointer which would otherwise be needed for the field types +struct RecordRefInner(SliceWithHeader, Option>); + +unsafe impl Send for RecordRefInner {} +unsafe impl Sync for RecordRefInner {} + +#[derive(Debug, Clone)] +pub struct RecordRef(Arc); + +impl PartialEq for RecordRef { + fn eq(&self, other: &Self) -> bool { + self.load() == other.load() + } +} + +impl Eq for RecordRef {} + +impl Hash for RecordRef { + fn hash(&self, state: &mut H) { + self.load().hash(state) + } +} + +impl<'de> Deserialize<'de> for RecordRef { + fn deserialize(deserializer: D) -> Result + where + D: dozer_types::serde::Deserializer<'de>, + { + let fields = Vec::::deserialize(deserializer)?; + let owned_fields: Vec<_> = fields.iter().map(FieldRef::cloned).collect(); + Ok(Self::new(owned_fields)) + } +} +impl Serialize for RecordRef { + fn serialize(&self, serializer: S) -> Result + where + S: dozer_types::serde::Serializer, + { + self.load().serialize(serializer) + } +} + +#[inline(always)] +unsafe fn adjust_alignment(ptr: *mut u8) -> *mut u8 { + ptr.add(ptr.align_offset(std::mem::align_of::())) +} +/// # Safety +/// ptr should be valid for writing a `T`, +/// that is, ptr..ptr + size_of:: should be inside a single live allocation +unsafe fn write(ptr: *mut u8, value: T) -> *mut u8 { + let ptr = adjust_alignment::(ptr) as *mut T; + ptr.write(value); + ptr.add(1) as *mut u8 +} + +/// # Safety +/// ptr should be valid for reading a `T`, +/// that is, ptr..ptr + size_of:: should be inside a single live allocation +/// and the memory read should be initialized. +/// The returned reference is only valid as long as pointed to memory is valid +/// for reading. +unsafe fn read_ref<'a, T>(ptr: *mut u8) -> (*mut u8, &'a T) { + let ptr = adjust_alignment::(ptr) as *mut T; + let result = &*ptr; + (ptr.add(1) as *mut u8, result) +} + +/// # Safety +/// ptr should be valid for reading a `T`, +/// that is, ptr..ptr + size_of:: should be inside a single live allocation +/// and the memory read should be initialized. +/// This takes ownership of the memory returned as `T`, which means dropping `T` +/// may make future reads from `ptr` undefined behavior +unsafe fn read(ptr: *mut u8) -> (*mut u8, T) { + let ptr = adjust_alignment::(ptr) as *mut T; + let result = ptr.read(); + (ptr.add(1) as *mut u8, result) +} + +/// # Safety +/// `ptr` should be valid for reading the contents of a `Field` with the type +/// corresponding to `field_type`. +/// See `read_ref` +unsafe fn read_field_ref<'a>(ptr: *mut u8, field_type: FieldType) -> (*mut u8, FieldRef<'a>) { + match field_type { + FieldType::UInt => { + let (ptr, value) = read_ref(ptr); + (ptr, FieldRef::UInt(*value)) + } + FieldType::U128 => { + let (ptr, value) = read_ref(ptr); + (ptr, FieldRef::U128(*value)) + } + + FieldType::Int => { + let (ptr, value) = read_ref(ptr); + (ptr, FieldRef::Int(*value)) + } + + FieldType::I128 => { + let (ptr, value) = read_ref(ptr); + (ptr, FieldRef::I128(*value)) + } + + FieldType::Float => { + let (ptr, value) = read_ref(ptr); + (ptr, FieldRef::Float(*value)) + } + + FieldType::Boolean => { + let (ptr, value) = read_ref(ptr); + (ptr, FieldRef::Boolean(*value)) + } + + FieldType::String => { + let (ptr, value): (_, &String) = read_ref(ptr); + (ptr, FieldRef::String(value)) + } + FieldType::Text => { + let (ptr, value): (_, &String) = read_ref(ptr); + (ptr, FieldRef::Text(value)) + } + FieldType::Binary => { + let (ptr, value): (_, &Vec) = read_ref(ptr); + (ptr, FieldRef::Binary(value)) + } + FieldType::Decimal => { + let (ptr, value) = read_ref(ptr); + (ptr, FieldRef::Decimal(*value)) + } + FieldType::Timestamp => { + let (ptr, value) = read_ref(ptr); + (ptr, FieldRef::Timestamp(*value)) + } + FieldType::Date => { + let (ptr, value) = read_ref(ptr); + (ptr, FieldRef::Date(*value)) + } + FieldType::Json => { + let (ptr, value) = read_ref::(ptr); + (ptr, FieldRef::Json(value.to_owned())) + } + FieldType::Point => { + let (ptr, value) = read_ref(ptr); + (ptr, FieldRef::Point(*value)) + } + FieldType::Duration => { + let (ptr, value) = read_ref(ptr); + (ptr, FieldRef::Duration(*value)) + } + } +} +unsafe fn read_field(ptr: *mut u8, field_type: FieldType) -> (*mut u8, Field) { + match field_type { + FieldType::UInt => { + let (ptr, value) = read(ptr); + (ptr, Field::UInt(value)) + } + FieldType::U128 => { + let (ptr, value) = read(ptr); + (ptr, Field::U128(value)) + } + + FieldType::Int => { + let (ptr, value) = read(ptr); + (ptr, Field::Int(value)) + } + + FieldType::I128 => { + let (ptr, value) = read(ptr); + (ptr, Field::I128(value)) + } + + FieldType::Float => { + let (ptr, value) = read(ptr); + (ptr, Field::Float(value)) + } + + FieldType::Boolean => { + let (ptr, value) = read(ptr); + (ptr, Field::Boolean(value)) + } + + FieldType::String => { + let (ptr, value) = read(ptr); + (ptr, Field::String(value)) + } + FieldType::Text => { + let (ptr, value) = read(ptr); + (ptr, Field::String(value)) + } + FieldType::Binary => { + let (ptr, value) = read(ptr); + (ptr, Field::Binary(value)) + } + FieldType::Decimal => { + let (ptr, value) = read(ptr); + (ptr, Field::Decimal(value)) + } + FieldType::Timestamp => { + let (ptr, value) = read(ptr); + (ptr, Field::Timestamp(value)) + } + FieldType::Date => { + let (ptr, value) = read(ptr); + (ptr, Field::Date(value)) + } + FieldType::Json => { + let (ptr, value) = read::(ptr); + (ptr, Field::Json(value)) + } + FieldType::Point => { + let (ptr, value) = read(ptr); + (ptr, Field::Point(value)) + } + FieldType::Duration => { + let (ptr, value) = read(ptr); + (ptr, Field::Duration(value)) + } + } +} + +#[inline(always)] +fn add_field_size(size: &mut usize) { + let align = std::mem::align_of::(); + // Align the start of the field + *size = (*size + (align - 1)) & !(align - 1); + *size += std::mem::size_of::(); +} +fn size(fields: &[Option]) -> usize { + let mut size = 0; + for field in fields.iter().flatten() { + match field { + FieldType::UInt => add_field_size::(&mut size), + FieldType::U128 => add_field_size::(&mut size), + FieldType::Int => add_field_size::(&mut size), + FieldType::I128 => add_field_size::(&mut size), + FieldType::Float => add_field_size::>(&mut size), + FieldType::Boolean => add_field_size::(&mut size), + FieldType::String => add_field_size::(&mut size), + FieldType::Text => add_field_size::(&mut size), + FieldType::Binary => add_field_size::>(&mut size), + FieldType::Decimal => add_field_size::(&mut size), + FieldType::Timestamp => add_field_size::>(&mut size), + FieldType::Date => add_field_size::(&mut size), + FieldType::Json => add_field_size::(&mut size), + FieldType::Point => add_field_size::(&mut size), + FieldType::Duration => add_field_size::(&mut size), + } + } + size +} + +#[derive(Hash, Serialize, Deserialize, Debug, PartialEq, Eq)] +#[serde(crate = "dozer_types::serde")] +pub enum FieldRef<'a> { + UInt(u64), + U128(u128), + Int(i64), + I128(i128), + Float(OrderedFloat), + Boolean(bool), + String(&'a str), + Text(&'a str), + Binary(&'a [u8]), + Decimal(Decimal), + Timestamp(DateTime), + Date(NaiveDate), + Json(JsonValue), + Point(DozerPoint), + Duration(DozerDuration), + Null, +} + +impl FieldRef<'_> { + pub fn cloned(&self) -> Field { + match self { + FieldRef::UInt(v) => Field::UInt(*v), + FieldRef::U128(v) => Field::U128(*v), + FieldRef::Int(v) => Field::Int(*v), + FieldRef::I128(v) => Field::I128(*v), + FieldRef::Float(v) => Field::Float(*v), + FieldRef::Boolean(v) => Field::Boolean(*v), + FieldRef::String(v) => Field::String((*v).to_owned()), + FieldRef::Text(v) => Field::Text((*v).to_owned()), + FieldRef::Binary(v) => Field::Binary((*v).to_vec()), + FieldRef::Decimal(v) => Field::Decimal(*v), + FieldRef::Timestamp(v) => Field::Timestamp(*v), + FieldRef::Date(v) => Field::Date(*v), + FieldRef::Json(v) => Field::Json(v.clone()), + FieldRef::Point(v) => Field::Point(*v), + FieldRef::Duration(v) => Field::Duration(*v), + FieldRef::Null => Field::Null, + } + } +} + +impl RecordRef { + pub fn new(fields: Vec) -> Self { + let field_types = fields + .iter() + .map(|field| field.ty()) + .collect::]>>(); + let size = size(&field_types); + + let layout = Layout::from_size_align(size, MAX_ALIGN).unwrap(); + // SAFETY: Everything is `ALIGN` byte aligned + let data = unsafe { + let data = std::alloc::alloc(layout); + if data.is_null() { + handle_alloc_error(layout); + } + data + }; + // SAFETY: We checked for null above + let data = unsafe { NonNull::new_unchecked(data) }; + let mut ptr = data.as_ptr(); + + // SAFETY: + // - ptr is non-null (we got it from a NonNull) + // - ptr is dereferencable (its memory range is large enough and not de-allocated) + // + unsafe { + for field in fields { + match field { + Field::UInt(v) => ptr = write(ptr, v), + Field::U128(v) => ptr = write(ptr, v), + Field::Int(v) => ptr = write(ptr, v), + Field::I128(v) => ptr = write(ptr, v), + Field::Float(v) => ptr = write(ptr, v), + Field::Boolean(v) => ptr = write(ptr, v), + Field::String(v) => ptr = write(ptr, v), + Field::Text(v) => ptr = write(ptr, v), + Field::Binary(v) => ptr = write(ptr, v), + Field::Decimal(v) => ptr = write(ptr, v), + Field::Timestamp(v) => ptr = write(ptr, v), + Field::Date(v) => ptr = write(ptr, v), + Field::Json(v) => ptr = write(ptr, v), + Field::Point(v) => ptr = write(ptr, v), + Field::Duration(v) => ptr = write(ptr, v), + Field::Null => (), + } + } + } + // SAFETY: This is valid, because inner is `repr(transparent)` + let arc = unsafe { + let arc = SliceWithHeader::from_slice::>(data, &field_types); + std::mem::transmute(arc) + }; + Self(arc) + } + + pub fn load(&self) -> Vec> { + self.0 + .field_types() + .iter() + .scan(self.0.data().as_ptr(), |ptr, field_type| { + let Some(field_type) = field_type else { + return Some(FieldRef::Null); + }; + + unsafe { + let (new_ptr, value) = read_field_ref(*ptr, *field_type); + *ptr = new_ptr; + Some(value) + } + }) + .collect() + } + + #[inline(always)] + pub fn id(&self) -> usize { + Arc::as_ptr(&self.0) as *const () as usize + } +} + +impl RecordRefInner { + #[inline(always)] + fn field_types(&self) -> &[Option] { + &self.0.slice + } + + #[inline(always)] + fn data(&self) -> NonNull { + self.0.header + } +} + +impl Drop for RecordRefInner { + fn drop(&mut self) { + let mut ptr = self.data().as_ptr(); + for field in self.field_types().iter().flatten() { + unsafe { + // Read owned so all field destructors run + ptr = read_field(ptr, *field).0; + } + } + // Then deallocate the field storage + unsafe { + dealloc( + self.data().as_ptr(), + Layout::from_size_align(size(self.field_types()), MAX_ALIGN).unwrap(), + ); + } + } +} + +mod store; +pub use store::{ProcessorRecordStore, ProcessorRecordStoreDeserializer, StoreRecord}; + +#[cfg(test)] +mod tests { + use dozer_types::types::Field; + + use super::RecordRef; + + #[test] + fn test_store_load() { + let fields = vec![ + Field::String("asdf".to_owned()), + Field::Int(23), + Field::Null, + Field::U128(234), + ]; + + let record = RecordRef::new(fields.clone()); + let loaded_fields: Vec<_> = record + .load() + .into_iter() + .map(|field| field.cloned()) + .collect(); + assert_eq!(&fields, &loaded_fields); + } + + #[test] + fn test_ser_de() { + let fields = vec![ + Field::String("asdf".to_owned()), + Field::Int(23), + Field::Null, + Field::U128(234), + ]; + + let record = RecordRef::new(fields.clone()); + + let bytes = dozer_types::bincode::serialize(&record).unwrap(); + let deserialized: RecordRef = dozer_types::bincode::deserialize(&bytes).unwrap(); + let loaded_fields: Vec<_> = deserialized + .load() + .into_iter() + .map(|field| field.cloned()) + .collect(); + assert_eq!(&fields, &loaded_fields); + } +} diff --git a/dozer-recordstore/src/store.rs b/dozer-recordstore/src/in_memory/store.rs similarity index 57% rename from dozer-recordstore/src/store.rs rename to dozer-recordstore/src/in_memory/store.rs index 496cdc19c1..0dc334b5c7 100644 --- a/dozer-recordstore/src/store.rs +++ b/dozer-recordstore/src/in_memory/store.rs @@ -3,33 +3,11 @@ use std::{ sync::{Arc, Weak}, }; -use dozer_types::{ - bincode, - errors::internal::BoxedError, - parking_lot::RwLock, - serde::{Deserialize, Serialize}, - thiserror::Error, - types::{Field, Lifetime, Record}, -}; - -use crate::RecordRefInner; +use dozer_types::{bincode, parking_lot::RwLock, types::Field}; -use super::{FieldRef, ProcessorRecord, RecordRef}; +use crate::RecordStoreError; -#[derive(Error, Debug)] -pub enum RecordStoreError { - #[error("Unable to deserialize type: {} - Reason: {}", typ, reason.to_string())] - DeserializationError { - typ: &'static str, - reason: BoxedError, - }, - - #[error("Unable to serialize type: {} - Reason: {}", typ, reason.to_string())] - SerializationError { - typ: &'static str, - reason: BoxedError, - }, -} +use super::{RecordRef, RecordRefInner}; pub trait StoreRecord { fn store_record(&self, record: &RecordRef) -> Result<(), RecordStoreError>; @@ -39,32 +17,6 @@ pub trait StoreRecord { self.store_record(&record)?; Ok(record) } - - fn load_ref<'a>( - &self, - record_ref: &'a RecordRef, - ) -> Result>, RecordStoreError> { - Ok(record_ref.load()) - } - - fn create_record(&self, record: &Record) -> Result { - let record_ref = self.create_ref(&record.values)?; - let mut processor_record = ProcessorRecord::new(Box::new([record_ref])); - processor_record.set_lifetime(record.lifetime.clone()); - Ok(processor_record) - } - - fn load_record(&self, processor_record: &ProcessorRecord) -> Result { - let mut record = Record::default(); - for record_ref in processor_record.values.iter() { - let fields = self.load_ref(record_ref)?; - record - .values - .extend(fields.iter().map(|field| field.cloned())); - } - record.set_lifetime(processor_record.get_lifetime()); - Ok(record) - } } #[derive(Debug)] @@ -111,19 +63,6 @@ impl ProcessorRecordStore { .get(&(record_ref.id())) .expect("RecordRef not found in ProcessorRecordStore") as u64 } - - pub fn serialize_record(&self, record: &ProcessorRecord) -> Result, bincode::Error> { - let ProcessorRecord { values, lifetime } = record; - let values = values - .iter() - .map(|value| self.serialize_ref(value)) - .collect(); - let record = ProcessorRecordForSerialization { - values, - lifetime: lifetime.clone(), - }; - bincode::serialize(&record) - } } impl StoreRecord for ProcessorRecordStore { @@ -184,22 +123,16 @@ impl ProcessorRecordStoreDeserializer { Ok(()) } - pub fn deserialize_ref(&self, index: u64) -> RecordRef { - self.inner.read().records[index as usize] + pub fn deserialize_ref(&self, index: u64) -> Result { + Ok(self + .inner + .read() + .records + .get(index as usize) + .ok_or(RecordStoreError::InMemoryRecordNotFound(index))? .as_ref() - .unwrap_or_else(|| { - panic!("RecordRef {index} not found in ProcessorRecordStoreDeserializer") - }) - .clone() - } - - pub fn deserialize_record(&self, data: &[u8]) -> Result { - let ProcessorRecordForSerialization { values, lifetime } = bincode::deserialize(data)?; - let values = values - .iter() - .map(|index| self.deserialize_ref(*index)) - .collect(); - Ok(ProcessorRecord { values, lifetime }) + .ok_or(RecordStoreError::InMemoryRecordNotFound(index))? + .clone()) } pub fn into_record_store(self) -> ProcessorRecordStore { @@ -243,43 +176,10 @@ fn insert_record_pointer_to_index( debug_assert!(previous_index.is_none()); } -#[derive(Debug, Serialize, Deserialize)] -#[serde(crate = "dozer_types::serde")] -struct ProcessorRecordForSerialization { - values: Vec, - lifetime: Option>, -} - #[cfg(test)] mod tests { - use std::time::Duration; - - use dozer_types::types::Timestamp; - use super::*; - fn test_record() -> Record { - let mut record = Record::new(vec![ - Field::Int(1), - Field::Int(2), - Field::Int(3), - Field::Int(4), - ]); - record.lifetime = Some(Lifetime { - reference: Timestamp::parse_from_rfc3339("2020-01-01T00:13:00Z").unwrap(), - duration: Duration::from_secs(10), - }); - record - } - - #[test] - fn test_record_roundtrip() { - let record = test_record(); - let record_store = ProcessorRecordStore::new().unwrap(); - let processor_record = record_store.create_record(&record).unwrap(); - assert_eq!(record_store.load_record(&processor_record).unwrap(), record); - } - #[test] fn test_serialization_roundtrip() { let record_store = ProcessorRecordStore::new().unwrap(); @@ -304,21 +204,9 @@ mod tests { record_store.deserialize_and_extend(&data).unwrap(); let mut deserialized_record_refs = vec![]; for serialized_record_ref in serialized_record_refs { - deserialized_record_refs.push(record_store.deserialize_ref(serialized_record_ref)); + deserialized_record_refs + .push(record_store.deserialize_ref(serialized_record_ref).unwrap()); } assert_eq!(deserialized_record_refs, record_refs); } - - #[test] - fn test_record_serialization_roundtrip() { - let record_store = ProcessorRecordStore::new().unwrap(); - let record = record_store.create_record(&test_record()).unwrap(); - let serialized_record = record_store.serialize_record(&record).unwrap(); - let data = record_store.serialize_slice(0).unwrap().0; - - let record_store = ProcessorRecordStoreDeserializer::new().unwrap(); - record_store.deserialize_and_extend(&data).unwrap(); - let deserialized_record = record_store.deserialize_record(&serialized_record).unwrap(); - assert_eq!(deserialized_record, record); - } } diff --git a/dozer-recordstore/src/lib.rs b/dozer-recordstore/src/lib.rs index 3a2e203375..3bab157542 100644 --- a/dozer-recordstore/src/lib.rs +++ b/dozer-recordstore/src/lib.rs @@ -1,535 +1,312 @@ -//! [`RecordRef`] is a compact representation of a collection of [dozer_types::types::Field]s -//! There are two principles that make this representation more compact than `[Field]`: -//! 1. The fields and their types are stored as a Struct of Arrays instead of -//! and Array of Structs. This makes it possible to pack the discriminants -//! for the field types as a byte per field, instead of taking up a full word, -//! which is the case in [Field] (because the variant value must be aligned) -//! 2. The field values are stored packed. In a `[Field]` representation, each -//! field takes as much space as the largest enum variant in [Field] (plus its discriminant, -//! see (1.)). Instead, for the compact representation, we pack the values into -//! align_of::() sized slots. This way, a u64 takes only 8 bytes, whereas -//! a u128 can still use its 16 bytes. -use std::alloc::{dealloc, handle_alloc_error, Layout}; -use std::sync::Arc; -use std::{hash::Hash, ptr::NonNull}; - -use slice_dst::SliceWithHeader; - -use dozer_types::chrono::{DateTime, FixedOffset, NaiveDate}; -use dozer_types::json_types::JsonValue; -use dozer_types::ordered_float::OrderedFloat; -use dozer_types::rust_decimal::Decimal; -use dozer_types::types::{DozerDuration, DozerPoint}; use dozer_types::{ + bincode, + errors::internal::BoxedError, + models::app_config::RecordStore, serde::{Deserialize, Serialize}, - types::{Field, FieldType, Lifetime}, + thiserror::{self, Error}, + types::{Field, Lifetime, Record}, }; +use in_memory::{FieldRef, StoreRecord as _}; + +#[derive(Error, Debug)] +pub enum RecordStoreError { + #[error("Unable to deserialize type: {} - Reason: {}", typ, reason.to_string())] + DeserializationError { + typ: &'static str, + reason: BoxedError, + }, + + #[error("Unable to serialize type: {} - Reason: {}", typ, reason.to_string())] + SerializationError { + typ: &'static str, + reason: BoxedError, + }, + #[error("Failed to create tempdir: {0}")] + FailedToCreateTempDir(#[source] std::io::Error), + #[error("Storage error: {0}")] + Storage(#[from] dozer_storage::errors::StorageError), + #[error("In memory record not found {0}")] + InMemoryRecordNotFound(u64), + #[error("Rocksdb record not found: {0}")] + RocksdbRecordNotFound(u64), + #[error("Bincode error: {0}")] + Bincode(#[from] bincode::Error), +} -// The alignment of an enum is necessarily the maximum alignment of its variants -// (otherwise it would be unsound to read from it). -// So, by using the alignment of `Field` as the alignment of the values in our -// packed `RecordRef`, we ensure that all accesses are aligned. -// This wastes a little bit of memory for subsequent fields that have -// smaller minimum alignment and size (such as `bool`, which has size=1, align=1), -// but in practice this should be negligible compared to the added effort of -// packing these fields while keeping everything aligned. -const MAX_ALIGN: usize = std::mem::align_of::(); - -#[repr(transparent)] -#[derive(Debug)] -/// `repr(transparent)` inner struct so we can implement drop logic on it -/// This is a `slice_dst` `SliceWithHeader` so we can make a fat Arc, saving a level -/// of indirection and a pointer which would otherwise be needed for the field types -struct RecordRefInner(SliceWithHeader, Option>); - -unsafe impl Send for RecordRefInner {} -unsafe impl Sync for RecordRefInner {} +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum RecordRef { + InMemory(in_memory::RecordRef), + Rocksdb(u64), +} -#[derive(Debug, Clone)] -pub struct RecordRef(Arc); +#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)] +pub struct ProcessorRecord { + /// All `Field`s in this record. The `Field`s are grouped by `Arc` to reduce memory usage. + /// This is a Box<[]> instead of a Vec to save space on storing the vec's capacity + values: Box<[RecordRef]>, -impl PartialEq for RecordRef { - fn eq(&self, other: &Self) -> bool { - self.load() == other.load() - } + /// Time To Live for this record. If the value is None, the record will never expire. + lifetime: Option>, } -impl Eq for RecordRef {} - -impl Hash for RecordRef { - fn hash(&self, state: &mut H) { - self.load().hash(state) +impl ProcessorRecord { + pub fn new(values: Box<[RecordRef]>) -> Self { + Self { + values, + ..Default::default() + } } -} -impl<'de> Deserialize<'de> for RecordRef { - fn deserialize(deserializer: D) -> Result - where - D: dozer_types::serde::Deserializer<'de>, - { - let fields = Vec::::deserialize(deserializer)?; - let owned_fields: Vec<_> = fields.iter().map(FieldRef::cloned).collect(); - Ok(Self::new(owned_fields)) + pub fn get_lifetime(&self) -> Option { + self.lifetime.as_ref().map(|lifetime| *lifetime.clone()) } -} -impl Serialize for RecordRef { - fn serialize(&self, serializer: S) -> Result - where - S: dozer_types::serde::Serializer, - { - self.load().serialize(serializer) + pub fn set_lifetime(&mut self, lifetime: Option) { + self.lifetime = lifetime.map(Box::new); } -} - -#[inline(always)] -unsafe fn adjust_alignment(ptr: *mut u8) -> *mut u8 { - ptr.add(ptr.align_offset(std::mem::align_of::())) -} -/// # Safety -/// ptr should be valid for writing a `T`, -/// that is, ptr..ptr + size_of:: should be inside a single live allocation -unsafe fn write(ptr: *mut u8, value: T) -> *mut u8 { - let ptr = adjust_alignment::(ptr) as *mut T; - ptr.write(value); - ptr.add(1) as *mut u8 -} -/// # Safety -/// ptr should be valid for reading a `T`, -/// that is, ptr..ptr + size_of:: should be inside a single live allocation -/// and the memory read should be initialized. -/// The returned reference is only valid as long as pointed to memory is valid -/// for reading. -unsafe fn read_ref<'a, T>(ptr: *mut u8) -> (*mut u8, &'a T) { - let ptr = adjust_alignment::(ptr) as *mut T; - let result = &*ptr; - (ptr.add(1) as *mut u8, result) -} + pub fn values(&self) -> &[RecordRef] { + &self.values + } -/// # Safety -/// ptr should be valid for reading a `T`, -/// that is, ptr..ptr + size_of:: should be inside a single live allocation -/// and the memory read should be initialized. -/// This takes ownership of the memory returned as `T`, which means dropping `T` -/// may make future reads from `ptr` undefined behavior -unsafe fn read(ptr: *mut u8) -> (*mut u8, T) { - let ptr = adjust_alignment::(ptr) as *mut T; - let result = ptr.read(); - (ptr.add(1) as *mut u8, result) + pub fn appended(existing: &ProcessorRecord, additional: RecordRef) -> Self { + let mut values = Vec::with_capacity(existing.values().len() + 1); + values.extend_from_slice(existing.values()); + values.push(additional); + Self::new(values.into_boxed_slice()) + } } -/// # Safety -/// `ptr` should be valid for reading the contents of a `Field` with the type -/// corresponding to `field_type`. -/// See `read_ref` -unsafe fn read_field_ref<'a>(ptr: *mut u8, field_type: FieldType) -> (*mut u8, FieldRef<'a>) { - match field_type { - FieldType::UInt => { - let (ptr, value) = read_ref(ptr); - (ptr, FieldRef::UInt(*value)) - } - FieldType::U128 => { - let (ptr, value) = read_ref(ptr); - (ptr, FieldRef::U128(*value)) - } +pub trait StoreRecord { + fn create_ref(&self, values: &[Field]) -> Result; - FieldType::Int => { - let (ptr, value) = read_ref(ptr); - (ptr, FieldRef::Int(*value)) - } + fn load_ref(&self, record_ref: &RecordRef) -> Result, RecordStoreError>; - FieldType::I128 => { - let (ptr, value) = read_ref(ptr); - (ptr, FieldRef::I128(*value)) - } + fn create_record(&self, record: &Record) -> Result { + let record_ref = self.create_ref(&record.values)?; + let mut processor_record = ProcessorRecord::new(Box::new([record_ref])); + processor_record.set_lifetime(record.lifetime.clone()); + Ok(processor_record) + } - FieldType::Float => { - let (ptr, value) = read_ref(ptr); - (ptr, FieldRef::Float(*value)) + fn load_record(&self, processor_record: &ProcessorRecord) -> Result { + let mut record = Record::default(); + for record_ref in processor_record.values.iter() { + let fields = self.load_ref(record_ref)?; + record.values.extend(fields); } + record.set_lifetime(processor_record.get_lifetime()); + Ok(record) + } +} - FieldType::Boolean => { - let (ptr, value) = read_ref(ptr); - (ptr, FieldRef::Boolean(*value)) - } +#[derive(Debug)] +pub enum ProcessorRecordStore { + InMemory(in_memory::ProcessorRecordStore), + Rocksdb(rocksdb::ProcessorRecordStore), +} - FieldType::String => { - let (ptr, value): (_, &String) = read_ref(ptr); - (ptr, FieldRef::String(value)) - } - FieldType::Text => { - let (ptr, value): (_, &String) = read_ref(ptr); - (ptr, FieldRef::Text(value)) - } - FieldType::Binary => { - let (ptr, value): (_, &Vec) = read_ref(ptr); - (ptr, FieldRef::Binary(value)) - } - FieldType::Decimal => { - let (ptr, value) = read_ref(ptr); - (ptr, FieldRef::Decimal(*value)) - } - FieldType::Timestamp => { - let (ptr, value) = read_ref(ptr); - (ptr, FieldRef::Timestamp(*value)) - } - FieldType::Date => { - let (ptr, value) = read_ref(ptr); - (ptr, FieldRef::Date(*value)) - } - FieldType::Json => { - let (ptr, value) = read_ref::(ptr); - (ptr, FieldRef::Json(value.to_owned())) - } - FieldType::Point => { - let (ptr, value) = read_ref(ptr); - (ptr, FieldRef::Point(*value)) - } - FieldType::Duration => { - let (ptr, value) = read_ref(ptr); - (ptr, FieldRef::Duration(*value)) +impl ProcessorRecordStore { + pub fn new(record_store: RecordStore) -> Result { + match record_store { + RecordStore::InMemory => Ok(Self::InMemory(in_memory::ProcessorRecordStore::new()?)), + RecordStore::Rocksdb(config) => { + Ok(Self::Rocksdb(rocksdb::ProcessorRecordStore::new(config)?)) + } } } -} -unsafe fn read_field(ptr: *mut u8, field_type: FieldType) -> (*mut u8, Field) { - match field_type { - FieldType::UInt => { - let (ptr, value) = read(ptr); - (ptr, Field::UInt(value)) - } - FieldType::U128 => { - let (ptr, value) = read(ptr); - (ptr, Field::U128(value)) - } - FieldType::Int => { - let (ptr, value) = read(ptr); - (ptr, Field::Int(value)) + pub fn num_records(&self) -> usize { + match self { + Self::InMemory(store) => store.num_records(), + Self::Rocksdb(store) => store.num_records(), } + } - FieldType::I128 => { - let (ptr, value) = read(ptr); - (ptr, Field::I128(value)) + pub fn serialize_slice(&self, start: usize) -> Result<(Vec, usize), RecordStoreError> { + match self { + Self::InMemory(store) => store.serialize_slice(start), + Self::Rocksdb(store) => store.serialize_slice(start), } + } - FieldType::Float => { - let (ptr, value) = read(ptr); - (ptr, Field::Float(value)) - } + pub fn serialize_record(&self, record: &ProcessorRecord) -> Result, bincode::Error> { + let ProcessorRecord { values, lifetime } = record; + let values = values + .iter() + .map(|value| match (value, self) { + (RecordRef::InMemory(record_ref), ProcessorRecordStore::InMemory(record_store)) => { + record_store.serialize_ref(record_ref) + } + (RecordRef::Rocksdb(record_ref), _) => *record_ref, + _ => panic!("In memory record ref cannot be serialized by rocksdb record store"), + }) + .collect(); + let record = ProcessorRecordForSerialization { + values, + lifetime: lifetime.clone(), + }; + bincode::serialize(&record) + } +} - FieldType::Boolean => { - let (ptr, value) = read(ptr); - (ptr, Field::Boolean(value)) +impl StoreRecord for ProcessorRecordStore { + fn create_ref(&self, values: &[Field]) -> Result { + match self { + Self::InMemory(store) => Ok(RecordRef::InMemory(store.create_ref(values)?)), + Self::Rocksdb(store) => Ok(RecordRef::Rocksdb(store.create_ref(values)?)), } + } - FieldType::String => { - let (ptr, value) = read(ptr); - (ptr, Field::String(value)) - } - FieldType::Text => { - let (ptr, value) = read(ptr); - (ptr, Field::String(value)) - } - FieldType::Binary => { - let (ptr, value) = read(ptr); - (ptr, Field::Binary(value)) - } - FieldType::Decimal => { - let (ptr, value) = read(ptr); - (ptr, Field::Decimal(value)) - } - FieldType::Timestamp => { - let (ptr, value) = read(ptr); - (ptr, Field::Timestamp(value)) - } - FieldType::Date => { - let (ptr, value) = read(ptr); - (ptr, Field::Date(value)) - } - FieldType::Json => { - let (ptr, value) = read::(ptr); - (ptr, Field::Json(value)) - } - FieldType::Point => { - let (ptr, value) = read(ptr); - (ptr, Field::Point(value)) - } - FieldType::Duration => { - let (ptr, value) = read(ptr); - (ptr, Field::Duration(value)) + fn load_ref(&self, record_ref: &RecordRef) -> Result, RecordStoreError> { + match (record_ref, self) { + (RecordRef::InMemory(record_ref), _) => Ok(load_in_memory_record_ref(record_ref)), + (RecordRef::Rocksdb(record_ref), ProcessorRecordStore::Rocksdb(record_store)) => { + Ok(record_store.load_ref(record_ref)?) + } + _ => panic!("Rocksdb record ref cannot be loaded by in memory record store"), } } } -#[inline(always)] -fn add_field_size(size: &mut usize) { - let align = std::mem::align_of::(); - // Align the start of the field - *size = (*size + (align - 1)) & !(align - 1); - *size += std::mem::size_of::(); +#[derive(Debug)] +pub enum ProcessorRecordStoreDeserializer { + InMemory(in_memory::ProcessorRecordStoreDeserializer), + Rocksdb(rocksdb::ProcessorRecordStore), } -fn size(fields: &[Option]) -> usize { - let mut size = 0; - for field in fields.iter().flatten() { - match field { - FieldType::UInt => add_field_size::(&mut size), - FieldType::U128 => add_field_size::(&mut size), - FieldType::Int => add_field_size::(&mut size), - FieldType::I128 => add_field_size::(&mut size), - FieldType::Float => add_field_size::>(&mut size), - FieldType::Boolean => add_field_size::(&mut size), - FieldType::String => add_field_size::(&mut size), - FieldType::Text => add_field_size::(&mut size), - FieldType::Binary => add_field_size::>(&mut size), - FieldType::Decimal => add_field_size::(&mut size), - FieldType::Timestamp => add_field_size::>(&mut size), - FieldType::Date => add_field_size::(&mut size), - FieldType::Json => add_field_size::(&mut size), - FieldType::Point => add_field_size::(&mut size), - FieldType::Duration => add_field_size::(&mut size), + +impl ProcessorRecordStoreDeserializer { + pub fn new(record_store: RecordStore) -> Result { + match record_store { + RecordStore::InMemory => Ok(Self::InMemory( + in_memory::ProcessorRecordStoreDeserializer::new()?, + )), + RecordStore::Rocksdb(config) => { + Ok(Self::Rocksdb(rocksdb::ProcessorRecordStore::new(config)?)) + } } } - size -} -#[derive(Hash, Serialize, Deserialize, Debug, PartialEq, Eq)] -#[serde(crate = "dozer_types::serde")] -pub enum FieldRef<'a> { - UInt(u64), - U128(u128), - Int(i64), - I128(i128), - Float(OrderedFloat), - Boolean(bool), - String(&'a str), - Text(&'a str), - Binary(&'a [u8]), - Decimal(Decimal), - Timestamp(DateTime), - Date(NaiveDate), - Json(JsonValue), - Point(DozerPoint), - Duration(DozerDuration), - Null, -} - -impl FieldRef<'_> { - pub fn cloned(&self) -> Field { + pub fn deserialize_and_extend(&self, data: &[u8]) -> Result<(), RecordStoreError> { match self { - FieldRef::UInt(v) => Field::UInt(*v), - FieldRef::U128(v) => Field::U128(*v), - FieldRef::Int(v) => Field::Int(*v), - FieldRef::I128(v) => Field::I128(*v), - FieldRef::Float(v) => Field::Float(*v), - FieldRef::Boolean(v) => Field::Boolean(*v), - FieldRef::String(v) => Field::String((*v).to_owned()), - FieldRef::Text(v) => Field::Text((*v).to_owned()), - FieldRef::Binary(v) => Field::Binary((*v).to_vec()), - FieldRef::Decimal(v) => Field::Decimal(*v), - FieldRef::Timestamp(v) => Field::Timestamp(*v), - FieldRef::Date(v) => Field::Date(*v), - FieldRef::Json(v) => Field::Json(v.clone()), - FieldRef::Point(v) => Field::Point(*v), - FieldRef::Duration(v) => Field::Duration(*v), - FieldRef::Null => Field::Null, + Self::InMemory(store) => store.deserialize_and_extend(data), + Self::Rocksdb(store) => store.deserialize_and_extend(data), } } -} -impl RecordRef { - pub fn new(fields: Vec) -> Self { - let field_types = fields - .iter() - .map(|field| field.ty()) - .collect::]>>(); - let size = size(&field_types); - - let layout = Layout::from_size_align(size, MAX_ALIGN).unwrap(); - // SAFETY: Everything is `ALIGN` byte aligned - let data = unsafe { - let data = std::alloc::alloc(layout); - if data.is_null() { - handle_alloc_error(layout); - } - data - }; - // SAFETY: We checked for null above - let data = unsafe { NonNull::new_unchecked(data) }; - let mut ptr = data.as_ptr(); - - // SAFETY: - // - ptr is non-null (we got it from a NonNull) - // - ptr is dereferencable (its memory range is large enough and not de-allocated) - // - unsafe { - for field in fields { - match field { - Field::UInt(v) => ptr = write(ptr, v), - Field::U128(v) => ptr = write(ptr, v), - Field::Int(v) => ptr = write(ptr, v), - Field::I128(v) => ptr = write(ptr, v), - Field::Float(v) => ptr = write(ptr, v), - Field::Boolean(v) => ptr = write(ptr, v), - Field::String(v) => ptr = write(ptr, v), - Field::Text(v) => ptr = write(ptr, v), - Field::Binary(v) => ptr = write(ptr, v), - Field::Decimal(v) => ptr = write(ptr, v), - Field::Timestamp(v) => ptr = write(ptr, v), - Field::Date(v) => ptr = write(ptr, v), - Field::Json(v) => ptr = write(ptr, v), - Field::Point(v) => ptr = write(ptr, v), - Field::Duration(v) => ptr = write(ptr, v), - Field::Null => (), + pub fn deserialize_record(&self, data: &[u8]) -> Result { + let ProcessorRecordForSerialization { values, lifetime } = bincode::deserialize(data)?; + let mut deserialized_values = Vec::with_capacity(values.len()); + for value in values { + match self { + Self::InMemory(record_store) => { + let record_ref = record_store.deserialize_ref(value)?; + deserialized_values.push(RecordRef::InMemory(record_ref)); } + Self::Rocksdb(_) => deserialized_values.push(RecordRef::Rocksdb(value)), } } - // SAFETY: This is valid, because inner is `repr(transparent)` - let arc = unsafe { - let arc = SliceWithHeader::from_slice::>(data, &field_types); - std::mem::transmute(arc) - }; - Self(arc) - } - - pub fn load(&self) -> Vec> { - self.0 - .field_types() - .iter() - .scan(self.0.data().as_ptr(), |ptr, field_type| { - let Some(field_type) = field_type else { - return Some(FieldRef::Null); - }; - - unsafe { - let (new_ptr, value) = read_field_ref(*ptr, *field_type); - *ptr = new_ptr; - Some(value) - } - }) - .collect() + Ok(ProcessorRecord { + values: deserialized_values.into(), + lifetime, + }) } - #[inline(always)] - pub fn id(&self) -> usize { - Arc::as_ptr(&self.0) as *const () as usize + pub fn into_record_store(self) -> ProcessorRecordStore { + match self { + Self::InMemory(record_store) => { + ProcessorRecordStore::InMemory(record_store.into_record_store()) + } + Self::Rocksdb(record_store) => ProcessorRecordStore::Rocksdb(record_store), + } } } -impl RecordRefInner { - #[inline(always)] - fn field_types(&self) -> &[Option] { - &self.0.slice +impl StoreRecord for ProcessorRecordStoreDeserializer { + fn create_ref(&self, values: &[Field]) -> Result { + match self { + Self::InMemory(store) => Ok(RecordRef::InMemory(store.create_ref(values)?)), + Self::Rocksdb(store) => Ok(RecordRef::Rocksdb(store.create_ref(values)?)), + } } - #[inline(always)] - fn data(&self) -> NonNull { - self.0.header + fn load_ref(&self, record_ref: &RecordRef) -> Result, RecordStoreError> { + match (record_ref, self) { + (RecordRef::InMemory(record_ref), _) => Ok(load_in_memory_record_ref(record_ref)), + ( + RecordRef::Rocksdb(record_ref), + ProcessorRecordStoreDeserializer::Rocksdb(record_store), + ) => Ok(record_store.load_ref(record_ref)?), + _ => panic!("Rocksdb record ref cannot be loaded by in memory record store"), + } } } -impl Drop for RecordRefInner { - fn drop(&mut self) { - let mut ptr = self.data().as_ptr(); - for field in self.field_types().iter().flatten() { - unsafe { - // Read owned so all field destructors run - ptr = read_field(ptr, *field).0; - } - } - // Then deallocate the field storage - unsafe { - dealloc( - self.data().as_ptr(), - Layout::from_size_align(size(self.field_types()), MAX_ALIGN).unwrap(), - ); - } - } +fn load_in_memory_record_ref(record_ref: &in_memory::RecordRef) -> Vec { + record_ref.load().iter().map(FieldRef::cloned).collect() } -#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)] -pub struct ProcessorRecord { - /// All `Field`s in this record. The `Field`s are grouped by `Arc` to reduce memory usage. - /// This is a Box<[]> instead of a Vec to save space on storing the vec's capacity - values: Box<[RecordRef]>, +mod in_memory; +mod rocksdb; - /// Time To Live for this record. If the value is None, the record will never expire. +#[derive(Debug, Serialize, Deserialize)] +#[serde(crate = "dozer_types::serde")] +struct ProcessorRecordForSerialization { + values: Vec, lifetime: Option>, } -impl ProcessorRecord { - pub fn new(values: Box<[RecordRef]>) -> Self { - Self { - values, - ..Default::default() - } - } - - pub fn get_lifetime(&self) -> Option { - self.lifetime.as_ref().map(|lifetime| *lifetime.clone()) - } - pub fn set_lifetime(&mut self, lifetime: Option) { - self.lifetime = lifetime.map(Box::new); +#[cfg(test)] +mod tests { + use dozer_types::models::app_config::RocksdbConfig; + use std::time::Duration; + + use dozer_types::types::Timestamp; + + use super::*; + + fn test_record() -> Record { + let mut record = Record::new(vec![ + Field::Int(1), + Field::Int(2), + Field::Int(3), + Field::Int(4), + ]); + record.lifetime = Some(Lifetime { + reference: Timestamp::parse_from_rfc3339("2020-01-01T00:13:00Z").unwrap(), + duration: Duration::from_secs(10), + }); + record } - pub fn values(&self) -> &[RecordRef] { - &self.values + fn test_record_roundtrip_impl(record_store_kind: RecordStore) { + let record = test_record(); + let record_store = ProcessorRecordStore::new(record_store_kind).unwrap(); + let processor_record = record_store.create_record(&record).unwrap(); + assert_eq!(record_store.load_record(&processor_record).unwrap(), record); } - pub fn appended(existing: &ProcessorRecord, additional: RecordRef) -> Self { - let mut values = Vec::with_capacity(existing.values().len() + 1); - values.extend_from_slice(existing.values()); - values.push(additional); - Self::new(values.into_boxed_slice()) + #[test] + fn test_record_roundtrip() { + test_record_roundtrip_impl(RecordStore::InMemory); + test_record_roundtrip_impl(RecordStore::Rocksdb(RocksdbConfig::default())); } -} - -mod store; -pub use store::{ - ProcessorRecordStore, ProcessorRecordStoreDeserializer, RecordStoreError, StoreRecord, -}; -#[cfg(test)] -mod tests { - use dozer_types::types::Field; + fn test_record_serialization_roundtrip_impl(record_store_kind: RecordStore) { + let record_store = ProcessorRecordStore::new(record_store_kind).unwrap(); + let record = record_store.create_record(&test_record()).unwrap(); + let serialized_record = record_store.serialize_record(&record).unwrap(); + let data = record_store.serialize_slice(0).unwrap().0; - use crate::RecordRef; - - #[test] - fn test_store_load() { - let fields = vec![ - Field::String("asdf".to_owned()), - Field::Int(23), - Field::Null, - Field::U128(234), - ]; - - let record = RecordRef::new(fields.clone()); - let loaded_fields: Vec<_> = record - .load() - .into_iter() - .map(|field| field.cloned()) - .collect(); - assert_eq!(&fields, &loaded_fields); + let record_store = ProcessorRecordStoreDeserializer::new(record_store_kind).unwrap(); + record_store.deserialize_and_extend(&data).unwrap(); + let deserialized_record = record_store.deserialize_record(&serialized_record).unwrap(); + assert_eq!(deserialized_record, record); } #[test] - fn test_ser_de() { - let fields = vec![ - Field::String("asdf".to_owned()), - Field::Int(23), - Field::Null, - Field::U128(234), - ]; - - let record = RecordRef::new(fields.clone()); - - let bytes = dozer_types::bincode::serialize(&record).unwrap(); - let deserialized: RecordRef = dozer_types::bincode::deserialize(&bytes).unwrap(); - let loaded_fields: Vec<_> = deserialized - .load() - .into_iter() - .map(|field| field.cloned()) - .collect(); - assert_eq!(&fields, &loaded_fields); + fn test_record_serialization_roundtrip() { + test_record_serialization_roundtrip_impl(RecordStore::InMemory); + // TODO: enable this test when serialization is implemented for rocksdb + // test_record_serialization_roundtrip_impl(RecordStore::Rocksdb); } } diff --git a/dozer-recordstore/src/rocksdb.rs b/dozer-recordstore/src/rocksdb.rs new file mode 100644 index 0000000000..f54f28d86f --- /dev/null +++ b/dozer-recordstore/src/rocksdb.rs @@ -0,0 +1,55 @@ +use std::sync::atomic::AtomicU64; + +use dozer_storage::RocksdbMap; +use dozer_types::models::app_config::RocksdbConfig; +use dozer_types::types::Field; +use tempdir::TempDir; + +use crate::RecordStoreError; + +#[derive(Debug)] +pub struct ProcessorRecordStore { + _temp_dir: TempDir, + next_id: AtomicU64, + records: RocksdbMap>, +} + +impl ProcessorRecordStore { + pub fn new(config: RocksdbConfig) -> Result { + let temp_dir = TempDir::new("rocksdb_processor_record_store") + .map_err(RecordStoreError::FailedToCreateTempDir)?; + let records = RocksdbMap::>::create(temp_dir.path(), config)?; + + Ok(Self { + _temp_dir: temp_dir, + next_id: AtomicU64::new(0), + records, + }) + } + + pub fn num_records(&self) -> usize { + self.next_id.load(std::sync::atomic::Ordering::SeqCst) as usize + } + + pub fn create_ref(&self, values: &[Field]) -> Result { + let id = self + .next_id + .fetch_add(1, std::sync::atomic::Ordering::SeqCst); + self.records.insert(&id, values)?; + Ok(id) + } + + pub fn load_ref(&self, record_ref: &u64) -> Result, RecordStoreError> { + self.records + .get(record_ref)? + .ok_or(RecordStoreError::RocksdbRecordNotFound(*record_ref)) + } + + pub fn serialize_slice(&self, _start: usize) -> Result<(Vec, usize), RecordStoreError> { + todo!("implement rocksdb record store checkpointing") + } + + pub fn deserialize_and_extend(&self, _data: &[u8]) -> Result<(), RecordStoreError> { + todo!("implement rocksdb record store checkpointing") + } +} diff --git a/dozer-sql/Cargo.toml b/dozer-sql/Cargo.toml index caf7c51bc9..3cccf0665a 100644 --- a/dozer-sql/Cargo.toml +++ b/dozer-sql/Cargo.toml @@ -18,7 +18,7 @@ ahash = "0.8.3" enum_dispatch = "0.3.11" linked-hash-map = { version = "0.5.6", features = ["serde_impl"] } metrics = "0.21.0" -multimap = "0.8.3" +multimap = "0.9.0" regex = "1.8.1" [dev-dependencies] diff --git a/dozer-sql/src/expression/tests/test_common.rs b/dozer-sql/src/expression/tests/test_common.rs index 7a1050ea6a..1ed845f599 100644 --- a/dozer-sql/src/expression/tests/test_common.rs +++ b/dozer-sql/src/expression/tests/test_common.rs @@ -19,7 +19,7 @@ impl ProcessorChannelForwarder for TestChannelForwarder { } pub(crate) fn run_fct(sql: &str, schema: Schema, input: Vec) -> Field { - let record_store = ProcessorRecordStoreDeserializer::new().unwrap(); + let record_store = ProcessorRecordStoreDeserializer::new(Default::default()).unwrap(); let select = get_select(sql).unwrap(); let processor_factory = diff --git a/dozer-sql/src/product/join/operator/table.rs b/dozer-sql/src/product/join/operator/table.rs index 661c10c4a7..0d64a71612 100644 --- a/dozer-sql/src/product/join/operator/table.rs +++ b/dozer-sql/src/product/join/operator/table.rs @@ -326,7 +326,7 @@ mod tests { }], primary_index: vec![0], }; - let record_store = ProcessorRecordStoreDeserializer::new().unwrap(); + let record_store = ProcessorRecordStoreDeserializer::new(Default::default()).unwrap(); let mut table = JoinTable::new(&schema, vec![0], &record_store, true, None).unwrap(); let record = Record::new(vec![Field::Int(1)]); diff --git a/dozer-sql/src/product/set/record_map/mod.rs b/dozer-sql/src/product/set/record_map/mod.rs index 8e011342df..c8b4975656 100644 --- a/dozer-sql/src/product/set/record_map/mod.rs +++ b/dozer-sql/src/product/set/record_map/mod.rs @@ -170,7 +170,7 @@ mod tests { }; fn test_map(mut map: CountingRecordMapEnum) { - let record_store = ProcessorRecordStore::new().unwrap(); + let record_store = ProcessorRecordStore::new(Default::default()).unwrap(); let make_record = |fields: Vec| -> ProcessorRecord { record_store.create_record(&Record::new(fields)).unwrap() }; diff --git a/dozer-sql/src/table_operator/tests/operator_test.rs b/dozer-sql/src/table_operator/tests/operator_test.rs index a2771bb489..a73024d6b9 100644 --- a/dozer-sql/src/table_operator/tests/operator_test.rs +++ b/dozer-sql/src/table_operator/tests/operator_test.rs @@ -36,7 +36,7 @@ fn test_lifetime() { ) .to_owned(); - let record_store = ProcessorRecordStore::new().unwrap(); + let record_store = ProcessorRecordStore::new(Default::default()).unwrap(); let record = Record::new(vec![ Field::Int(0), Field::Timestamp(DateTime::parse_from_rfc3339("2020-01-01T00:13:00Z").unwrap()), diff --git a/dozer-sql/src/utils/serialize.rs b/dozer-sql/src/utils/serialize.rs index 8fee0b271d..4e364f2759 100644 --- a/dozer-sql/src/utils/serialize.rs +++ b/dozer-sql/src/utils/serialize.rs @@ -47,6 +47,8 @@ pub enum DeserializationError { NotEnoughData { requested: usize, remaining: usize }, #[error("bincode error: {0}")] Bincode(#[from] bincode::Error), + #[error("record store error: {0}")] + RecordStore(#[from] dozer_recordstore::RecordStoreError), } pub fn serialize_u64(value: u64, object: &mut Object) -> Result<(), SerializationError> { diff --git a/dozer-sql/src/window/tests/operator_test.rs b/dozer-sql/src/window/tests/operator_test.rs index 0b3d7d2e04..426cf808f1 100644 --- a/dozer-sql/src/window/tests/operator_test.rs +++ b/dozer-sql/src/window/tests/operator_test.rs @@ -9,7 +9,7 @@ use crate::window::operator::WindowType; #[test] fn test_hop() { - let record_store = ProcessorRecordStore::new().unwrap(); + let record_store = ProcessorRecordStore::new(Default::default()).unwrap(); let record = record_store .create_record(&Record::new(vec![ Field::Int(0), @@ -61,7 +61,7 @@ fn test_hop() { #[test] fn test_tumble() { - let record_store = ProcessorRecordStore::new().unwrap(); + let record_store = ProcessorRecordStore::new(Default::default()).unwrap(); let record = record_store .create_record(&Record::new(vec![ Field::Int(0), diff --git a/dozer-storage/Cargo.toml b/dozer-storage/Cargo.toml index 56a57ff7eb..92c4a54529 100644 --- a/dozer-storage/Cargo.toml +++ b/dozer-storage/Cargo.toml @@ -13,6 +13,7 @@ lmdb-rkv = "0.14.0" lmdb-rkv-sys = "0.11.2" page_size = "0.5.0" pin-project = "1.1.0" +rocksdb = "0.21.0" tokio = "1.28.2" [dev-dependencies] diff --git a/dozer-storage/src/errors.rs b/dozer-storage/src/errors.rs index 07f846ee69..5a76169c89 100644 --- a/dozer-storage/src/errors.rs +++ b/dozer-storage/src/errors.rs @@ -31,6 +31,9 @@ pub enum StorageError { // Error forwarding #[error("Lmdb error: {0}")] Lmdb(#[from] lmdb::Error), + + #[error("Rocksdb error: {0}")] + Rocksdb(#[from] rocksdb::Error), } #[derive(Debug, Error)] diff --git a/dozer-storage/src/lib.rs b/dozer-storage/src/lib.rs index f8f32e8ce8..16b3962a78 100644 --- a/dozer-storage/src/lib.rs +++ b/dozer-storage/src/lib.rs @@ -17,6 +17,8 @@ mod lmdb_counter; pub use lmdb_counter::LmdbCounter; mod lmdb_option; pub use lmdb_option::LmdbOption; +mod rocksdb_map; +pub use rocksdb_map::RocksdbMap; #[cfg(test)] mod tests; diff --git a/dozer-storage/src/lmdb_database/lmdb_val.rs b/dozer-storage/src/lmdb_database/lmdb_val.rs index 6c50636811..044d5ae871 100644 --- a/dozer-storage/src/lmdb_database/lmdb_val.rs +++ b/dozer-storage/src/lmdb_database/lmdb_val.rs @@ -1,6 +1,6 @@ use dozer_types::{ borrow::{Borrow, Cow}, - types::{IndexDefinition, Record, SchemaWithIndex}, + types::{Field, IndexDefinition, Record, SchemaWithIndex}, }; use crate::errors::{InvalidBool, StorageError}; @@ -223,6 +223,34 @@ unsafe impl LmdbKey for String { const TYPE: LmdbKeyType = LmdbKeyType::VariableSize; } +impl<'a> Encode<'a> for &'a [Field] { + fn encode(self) -> Result, StorageError> { + dozer_types::bincode::serialize(self) + .map(Encoded::Vec) + .map_err(|e| StorageError::SerializationError { + typ: "[Field]", + reason: Box::new(e), + }) + } +} + +impl BorrowEncode for Vec { + type Encode<'a> = &'a [Field]; +} + +impl Decode for Vec { + fn decode(bytes: &[u8]) -> Result, StorageError> { + dozer_types::bincode::deserialize(bytes) + .map(Cow::Owned) + .map_err(|e| StorageError::DeserializationError { + typ: "Vec", + reason: Box::new(e), + }) + } +} + +unsafe impl LmdbVal for Vec {} + impl<'a> Encode<'a> for &'a Record { fn encode(self) -> Result, StorageError> { dozer_types::bincode::serialize(self) diff --git a/dozer-storage/src/rocksdb_map.rs b/dozer-storage/src/rocksdb_map.rs new file mode 100644 index 0000000000..2075644ff9 --- /dev/null +++ b/dozer-storage/src/rocksdb_map.rs @@ -0,0 +1,81 @@ +use std::path::Path; + +use rocksdb::{BlockBasedOptions, Cache, Options, DB}; + +use dozer_types::borrow::IntoOwned; +use dozer_types::models::app_config::RocksdbConfig; + +use crate::{errors::StorageError, BorrowEncode, Encode, LmdbVal}; + +#[derive(Debug)] +pub struct RocksdbMap { + db: DB, + _key: std::marker::PhantomData, + _value: std::marker::PhantomData, +} + +impl RocksdbMap +where + for<'a> V::Borrowed<'a>: IntoOwned, +{ + pub fn create(path: &Path, config: RocksdbConfig) -> Result { + let mut options = Options::default(); + options.create_if_missing(true); + + if let Some(block_cache_size) = config.block_cache_size { + let mut block_options = BlockBasedOptions::default(); + let cache = Cache::new_lru_cache(block_cache_size); + block_options.set_block_cache(&cache); + + options.set_block_based_table_factory(&block_options); + } + + let db = DB::open(&options, path)?; + Ok(Self { + db, + _key: std::marker::PhantomData, + _value: std::marker::PhantomData, + }) + } + + pub fn count(&self) -> Result { + Ok(self + .db + .property_int_value("rocksdb.estimate-num-keys")? + .expect("rocksdb.estimate-num-keys") as usize) + } + + pub fn get(&self, key: K::Encode<'_>) -> Result, StorageError> { + let key = key.encode()?; + let value = self.db.get_pinned(key)?; + if let Some(value) = value { + let value = V::decode(&value)?; + Ok(Some(value.into_owned())) + } else { + Ok(None) + } + } + + pub fn contains(&self, key: K::Encode<'_>) -> Result { + let key = key.encode()?; + let value = self.db.get_pinned(key)?; + Ok(value.is_some()) + } + + pub fn insert(&self, key: K::Encode<'_>, value: V::Encode<'_>) -> Result<(), StorageError> { + let key = key.encode()?; + let value = value.encode()?; + self.db.put(key, value)?; + Ok(()) + } + + pub fn remove(&self, key: K::Encode<'_>) -> Result<(), StorageError> { + let key = key.encode()?; + self.db.delete(key)?; + Ok(()) + } + + pub fn flush(&self) -> Result<(), StorageError> { + self.db.flush().map_err(Into::into) + } +} diff --git a/dozer-tests/Cargo.toml b/dozer-tests/Cargo.toml index 468427412b..7f865df08e 100644 --- a/dozer-tests/Cargo.toml +++ b/dozer-tests/Cargo.toml @@ -31,7 +31,7 @@ reqwest = { version = "0.11.20", features = ["json", "rustls-tls"], default-feat tokio = { version = "1.25.0", features = ["full", "rt"] } bson = { version = "2.7.0", optional = true } mongodb = { version = "2.6.1", optional = true } -futures = { version = "0.3.26", optional = true } +futures = { version = "0.3.28", optional = true } env_logger = "0.10.0" clap = { version = "4.4.1", features = ["derive"] } rusqlite = { version = "0.28.0", features = ["bundled", "column_decltype", "hooks"] } @@ -40,7 +40,7 @@ dozer-core = { path = "../dozer-core" } dozer-sql = { path = "../dozer-sql" } tempdir = "0.3.7" walkdir = "2.3.2" -multimap = "0.8.3" +multimap = "0.9.0" ahash = "0.8.3" csv = "1.2" url = "2.4.1" diff --git a/dozer-tests/src/tests/e2e/mod.rs b/dozer-tests/src/tests/e2e/mod.rs index 7c9a5ed8c2..2ff27b9f3e 100644 --- a/dozer-tests/src/tests/e2e/mod.rs +++ b/dozer-tests/src/tests/e2e/mod.rs @@ -1,10 +1,10 @@ use std::{future::Future, sync::Arc, thread::JoinHandle, time::Duration}; -use dozer_api::tonic::transport::Channel; -use dozer_cli::{ +use dozer_api::{ shutdown::{self, ShutdownSender}, - simple::SimpleOrchestrator, + tonic::transport::Channel, }; +use dozer_cli::simple::SimpleOrchestrator; use dozer_types::{ grpc_types::{ common::common_grpc_service_client::CommonGrpcServiceClient, diff --git a/dozer-types/Cargo.toml b/dozer-types/Cargo.toml index 6be2ea21af..8f83c465dd 100644 --- a/dozer-types/Cargo.toml +++ b/dozer-types/Cargo.toml @@ -16,7 +16,7 @@ ahash = "0.8.3" thiserror = "1.0.48" parking_lot = "0.12" bytes = "1.4.0" -indexmap = "1.9.2" +indexmap = "2.0.0" ordered-float = { version = "3.9.1", features = ["serde"] } tracing = "0.1.36" log = "0.4.17" @@ -28,8 +28,8 @@ pyo3 = { version = "0.18.1", optional = true } tonic = { version = "0.10.0" } prost-types = "0.12.0" prost = "0.12.0" -arrow = { version = "42.0.0" } -arrow-schema = { version = "42.0.0", features = ["serde"] } +arrow = { version = "45.0.0" } +arrow-schema = { version = "45.0.0", features = ["serde"] } tokio-postgres = { version = "0.7.7", features = [ "with-chrono-0_4", "with-geo-types-0_7", diff --git a/dozer-types/protos/cloud.proto b/dozer-types/protos/cloud.proto index f329adcb56..e699458899 100644 --- a/dozer-types/protos/cloud.proto +++ b/dozer-types/protos/cloud.proto @@ -2,9 +2,9 @@ syntax = "proto3"; package dozer.cloud; import "cloud_types.proto"; import "cloud_notification.proto"; +import "cloud_infastructure.proto"; import "google/protobuf/timestamp.proto"; import "google/protobuf/empty.proto"; - service DozerCloud { rpc validate_connection(ConnectionRequest) @@ -24,10 +24,10 @@ service DozerCloud { rpc update_application(UpdateAppRequest) returns (AppResponse); rpc delete_application(DeleteAppRequest) returns (DeleteAppResponse); rpc get_application(GetAppRequest) returns (AppResponse); - rpc stop_dozer(StopRequest) returns (StopResponse); - rpc get_status(GetStatusRequest) returns (GetStatusResponse); rpc list_deployments(ListDeploymentRequest) returns (ListDeploymentResponse); rpc list_versions(ListVersionsRequest) returns (ListVersionsResponse); + rpc set_alias(SetAliasRequest) returns (SetAliasResponse); + rpc rm_alias(RmAliasRequest) returns (RmAliasResponse); rpc set_current_version(SetCurrentVersionRequest) returns (SetCurrentVersionResponse); rpc list_files(ListFilesRequest) returns (ListFilesResponse); rpc get_configuration(GetConfigurationRequest) returns (GetConfigurationResponse); @@ -52,6 +52,8 @@ service DozerCloud { rpc mark_notifications_as_read(MarkNotificationsRequest) returns (MarkNotificationsResponse); rpc get_endpoint_commands_samples(GetEndpointCommandsSamplesRequest) returns (GetEndpointCommandsSamplesResponse); + + rpc get_infrastructure_info(GetInfastructureRequest) returns (GetInfastructureResponse); } service DozerPublic { @@ -129,21 +131,7 @@ message DeleteAppRequest { } message DeleteAppResponse { bool success = 1; } -message GetAppRequest { optional string app_id = 1; } - -message GetStatusRequest{ - string app_id = 1; -} -message GetStatusResponse { - string data_endpoint = 1; - repeated DeploymentStatusWithHealth deployments = 2; - map versions = 4; - optional uint32 current_version = 5; -} -message DeploymentStatusWithHealth { - DeploymentInfo deployment = 1; - repeated DeploymentResource resources = 2; -} +message GetAppRequest { string app_id = 1; } message GetResourcesRequest { string app_id = 1; @@ -151,13 +139,14 @@ message GetResourcesRequest { optional uint32 version = 2; } message ResourcesResponse { - repeated DeploymentResource resources = 1; + uint32 version = 1; + optional DeploymentResource app = 2; + optional DeploymentResource api = 3; } message DeploymentResource { string name = 1; - // api, app - string typ = 2; + string created_at = 3; optional int32 desired = 4; @@ -167,9 +156,6 @@ message DeploymentResource { optional int32 unavailable = 6; } -message StopRequest { string app_id = 1; } -message StopResponse { bool success = 1; } - enum DeploymentStatus { PENDING = 0; RUNNING = 1; @@ -180,12 +166,12 @@ enum DeploymentStatus { message ListVersionsRequest { string app_id = 1; } message ListVersionsResponse { optional uint32 current_version = 4; - map versions = 3; + repeated uint32 versions = 3; } message DeploymentInfo { string deployment_id = 1; - uint32 deployment = 2; + uint32 version = 2; DeploymentStatus status = 3; optional google.protobuf.Timestamp created_at = 4; optional google.protobuf.Timestamp updated_at = 5; @@ -224,6 +210,21 @@ message SetCurrentVersionRequest { message SetCurrentVersionResponse {} +message SetAliasRequest { + string app_id = 1; + uint32 version = 2; + string alias = 3; +} + +message SetAliasResponse {} + +message RmAliasRequest { + string app_id = 1; + string alias = 2; +} + +message RmAliasResponse {} + message ConnectionRequest { string yaml_content = 2; } @@ -273,7 +274,7 @@ message FileInfo { message LogMessageRequest { string app_id = 1; - uint32 deployment = 2; + uint32 version = 2; bool follow = 3; bool include_build = 4; bool include_app = 5; diff --git a/dozer-types/protos/cloud_infastructure.proto b/dozer-types/protos/cloud_infastructure.proto new file mode 100644 index 0000000000..29d986dc2d --- /dev/null +++ b/dozer-types/protos/cloud_infastructure.proto @@ -0,0 +1,62 @@ +syntax = "proto3"; +package dozer.cloud; + +message GetInfastructureRequest { + string app_id = 1; + uint32 version = 2; +} + +message GetInfastructureResponse { + repeated PodInfo pods = 1; +} +message PodInfo { + string name = 1; + string created_at = 3; + repeated string labels = 4; + repeated ContainerInfo containers = 5; + optional string phase = 6; + optional string reason = 7; +} +message ContainerInfo { + string name = 1; + repeated ResourceInfo resources = 2; + optional string image = 3; + repeated string command = 4; + ContainerStatus status = 5; +} +message ContainerStatus { + optional string container_id = 1; + bool ready = 2; + uint32 restart_count = 3; + optional bool started = 4; + // detail about current state + optional ContainerState state = 5; + // Details about the container's last termination condition. + optional ContainerState last_state = 6; +} + +message ContainerState { + optional ContainerStateRunning running = 1; + optional ContainerStateWaiting waiting = 2; + optional ContainerStateTerminated terminated = 3; +} +message ContainerStateTerminated { + optional string reason = 1; + optional string message = 2; + optional string finished_at = 3; + optional uint32 signal = 4; + uint32 exit_code = 5; +} +message ContainerStateRunning { + optional string started_at = 1; +} +message ContainerStateWaiting { + optional string reason = 1; + optional string message = 2; +} +message ResourceInfo { + string name = 1; + string limit = 2; + string usage = 3; + string request = 4; +} \ No newline at end of file diff --git a/dozer-types/protos/cloud_notification.proto b/dozer-types/protos/cloud_notification.proto index d5c01054a6..e08caa5618 100644 --- a/dozer-types/protos/cloud_notification.proto +++ b/dozer-types/protos/cloud_notification.proto @@ -23,7 +23,7 @@ enum Level { } message PodNotification { - uint32 deployment = 1; + uint32 version_instance = 1; oneof kind { ContainerTerminated containerTerminated = 2; } diff --git a/dozer-types/protos/contract.proto b/dozer-types/protos/contract.proto index 46545c5279..4e91c4d6c1 100644 --- a/dozer-types/protos/contract.proto +++ b/dozer-types/protos/contract.proto @@ -12,20 +12,20 @@ service ContractService { rpc GetProtos(CommonRequest) returns (ProtoResponse); } -message CloudDeploymentId { +message CloudVersionId { string app_id = 1; - uint32 deployment = 2; + uint32 version = 2; } message SourcesRequest { // Only used in cloud context. - optional CloudDeploymentId cloud_id = 1; + optional CloudVersionId cloud_id = 1; string connection_name = 2; } message CommonRequest { // Only used in cloud context. - optional CloudDeploymentId cloud_id = 1; + optional CloudVersionId cloud_id = 1; } message SchemasResponse { diff --git a/dozer-types/src/models/app_config.rs b/dozer-types/src/models/app_config.rs index dc1308b2f6..799229d419 100644 --- a/dozer-types/src/models/app_config.rs +++ b/dozer-types/src/models/app_config.rs @@ -37,6 +37,10 @@ pub struct AppConfig { #[serde(skip_serializing_if = "Option::is_none")] /// The maximum time in seconds before a new checkpoint is created. If there're no new records, no checkpoint will be created. pub max_interval_before_persist_in_seconds: Option, + + #[serde(default, skip_serializing_if = "equal_default")] + /// The record store to use for the processors. + pub record_store: RecordStore, } #[derive(Debug, JsonSchema, Clone, PartialEq, Eq, Serialize, Deserialize, Default)] @@ -54,6 +58,20 @@ pub struct S3Storage { pub bucket_name: String, } +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema, Default)] +#[serde(deny_unknown_fields)] +pub enum RecordStore { + #[default] + InMemory, + Rocksdb(RocksdbConfig), +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema, Default)] +#[serde(deny_unknown_fields)] +pub struct RocksdbConfig { + pub block_cache_size: Option, +} + pub fn default_persist_queue_capacity() -> u32 { 100 } diff --git a/json_schemas/dozer.json b/json_schemas/dozer.json index d28aa9caaa..429b75ee03 100644 --- a/json_schemas/dozer.json +++ b/json_schemas/dozer.json @@ -326,6 +326,14 @@ ], "format": "uint32", "minimum": 0.0 + }, + "record_store": { + "description": "The record store to use for the processors.", + "allOf": [ + { + "$ref": "#/definitions/RecordStore" + } + ] } }, "additionalProperties": false @@ -1298,6 +1306,28 @@ }, "additionalProperties": false }, + "RecordStore": { + "oneOf": [ + { + "type": "string", + "enum": [ + "InMemory" + ] + }, + { + "type": "object", + "required": [ + "Rocksdb" + ], + "properties": { + "Rocksdb": { + "$ref": "#/definitions/RocksdbConfig" + } + }, + "additionalProperties": false + } + ] + }, "RefreshConfig": { "type": "string", "enum": [ @@ -1336,6 +1366,20 @@ }, "additionalProperties": false }, + "RocksdbConfig": { + "type": "object", + "properties": { + "block_cache_size": { + "type": [ + "integer", + "null" + ], + "format": "uint", + "minimum": 0.0 + } + }, + "additionalProperties": false + }, "S3Details": { "type": "object", "required": [