diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index c4d23842b27..516b949f784 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,7 +1,7 @@ -# A CODEOWNERS file uses a pattern that follows the same rules used in gitignore files. -# The pattern is followed by one or more GitHub usernames or team names using the -# standard @username or @org/team-name format. You can also refer to a user by an -# email address that has been added to their GitHub account, for example user@example.com - -* @dragonflyoss/nydus-reviewers -.github @dragonflyoss/nydus-maintainers +# A CODEOWNERS file uses a pattern that follows the same rules used in gitignore files. +# The pattern is followed by one or more GitHub usernames or team names using the +# standard @username or @org/team-name format. You can also refer to a user by an +# email address that has been added to their GitHub account, for example user@example.com + +* @dragonflyoss/nydus-reviewers +.github @dragonflyoss/nydus-maintainers diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index ef3704fc2a1..5886f438a26 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -1,44 +1,44 @@ -## Additional Information -_The following information is very important in order to help us to help you. Omission of the following details may delay your support request or receive no attention at all._ - -### Version of nydus being used (nydusd --version) - - - -### Version of nydus-snapshotter being used (containerd-nydus-grpc --version) - - - -### Kernel information (uname -r) -_command result: uname -r_ - -### GNU/Linux Distribution, if applicable (cat /etc/os-release) -_command result: cat /etc/os-release_ - -### containerd-nydus-grpc command line used, if applicable (ps aux | grep containerd-nydus-grpc) -``` -``` - -### client command line used, if applicable (such as: nerdctl, docker, kubectl, ctr) -``` -``` - -### Screenshots (if applicable) - -## Details about issue - +## Additional Information +_The following information is very important in order to help us to help you. Omission of the following details may delay your support request or receive no attention at all._ + +### Version of nydus being used (nydusd --version) + + + +### Version of nydus-snapshotter being used (containerd-nydus-grpc --version) + + + +### Kernel information (uname -r) +_command result: uname -r_ + +### GNU/Linux Distribution, if applicable (cat /etc/os-release) +_command result: cat /etc/os-release_ + +### containerd-nydus-grpc command line used, if applicable (ps aux | grep containerd-nydus-grpc) +``` +``` + +### client command line used, if applicable (such as: nerdctl, docker, kubectl, ctr) +``` +``` + +### Screenshots (if applicable) + +## Details about issue + diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index bae8c39011c..fb095080a83 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,21 +1,21 @@ -## Relevant Issue (if applicable) -_If there are Issues related to this PullRequest, please list it._ - -## Details -_Please describe the details of PullRequest._ - -## Types of changes - -_What types of changes does your PullRequest introduce? Put an `x` in all the boxes that apply:_ - -- [ ] Bug fix (non-breaking change which fixes an issue) -- [ ] New feature (non-breaking change which adds functionality) -- [ ] Breaking change (fix or feature that would cause existing functionality to change) -- [ ] Documentation Update (if none of the other choices apply) - -## Checklist - -_Go over all the following points, and put an `x` in all the boxes that apply._ - -- [ ] I have updated the documentation accordingly. +## Relevant Issue (if applicable) +_If there are Issues related to this PullRequest, please list it._ + +## Details +_Please describe the details of PullRequest._ + +## Types of changes + +_What types of changes does your PullRequest introduce? Put an `x` in all the boxes that apply:_ + +- [ ] Bug fix (non-breaking change which fixes an issue) +- [ ] New feature (non-breaking change which adds functionality) +- [ ] Breaking change (fix or feature that would cause existing functionality to change) +- [ ] Documentation Update (if none of the other choices apply) + +## Checklist + +_Go over all the following points, and put an `x` in all the boxes that apply._ + +- [ ] I have updated the documentation accordingly. - [ ] I have added tests to cover my changes. \ No newline at end of file diff --git a/.github/codecov.yml b/.github/codecov.yml index 6509389a5aa..83aeb0be2c4 100644 --- a/.github/codecov.yml +++ b/.github/codecov.yml @@ -1,23 +1,23 @@ -coverage: - status: - project: - default: - enabled: yes - target: auto # auto compares coverage to the previous base commit - # adjust accordingly based on how flaky your tests are - # this allows a 0.2% drop from the previous base commit coverage - threshold: 0.2% - patch: false - -comment: - layout: "reach, diff, flags, files" - behavior: default - require_changes: true # if true: only post the comment if coverage changes - -codecov: - require_ci_to_pass: false - notify: - wait_for_ci: true - -# When modifying this file, please validate using +coverage: + status: + project: + default: + enabled: yes + target: auto # auto compares coverage to the previous base commit + # adjust accordingly based on how flaky your tests are + # this allows a 0.2% drop from the previous base commit coverage + threshold: 0.2% + patch: false + +comment: + layout: "reach, diff, flags, files" + behavior: default + require_changes: true # if true: only post the comment if coverage changes + +codecov: + require_ci_to_pass: false + notify: + wait_for_ci: true + +# When modifying this file, please validate using # curl -X POST --data-binary @codecov.yml https://codecov.io/validate \ No newline at end of file diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index c0875988517..ce08cb474d2 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -1,329 +1,329 @@ -name: Benchmark - -on: - schedule: - # Run at 03:00 clock UTC on Monday and Wednesday - - cron: "0 03 * * 1,3" - pull_request: - paths: - - '.github/workflows/benchmark.yml' - workflow_dispatch: - -env: - CARGO_TERM_COLOR: always - -jobs: - contrib-build: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Setup Golang - uses: actions/setup-go@v5 - with: - go-version-file: 'go.work' - cache-dependency-path: "**/*.sum" - - name: Build Contrib - run: | - make -e DOCKER=false nydusify-release - - name: Upload Nydusify - uses: actions/upload-artifact@v4 - with: - name: nydusify-artifact - path: contrib/nydusify/cmd/nydusify - - nydus-build: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Rust Cache - uses: Swatinem/rust-cache@v2 - with: - cache-on-failure: true - shared-key: Linux-cargo-amd64 - - uses: dsherret/rust-toolchain-file@v1 - - name: Build Nydus - run: | - make release - - name: Upload Nydus Binaries - uses: actions/upload-artifact@v4 - with: - name: nydus-artifact - path: | - target/release/nydus-image - target/release/nydusd - - benchmark-description: - runs-on: ubuntu-latest - steps: - - name: Description - run: | - echo "## Benchmark Environment" > $GITHUB_STEP_SUMMARY - echo "| operating system | cpu | memory " >> $GITHUB_STEP_SUMMARY - echo "|:----------------:|:---:|:------ " >> $GITHUB_STEP_SUMMARY - echo "| ubuntu-22.04 | 2-core CPU (x86_64) | 7GB |" >> $GITHUB_STEP_SUMMARY - - benchmark-oci: - runs-on: ubuntu-latest - needs: [contrib-build, nydus-build] - strategy: - matrix: - include: - - image: wordpress - tag: 6.1.1 - - image: node - tag: 19.8 - - image: python - tag: 3.10.7 - - image: golang - tag: 1.19.3 - - image: ruby - tag: 3.1.3 - - image: amazoncorretto - tag: 8-al2022-jdk - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Download Nydus - uses: actions/download-artifact@v4 - with: - name: nydus-artifact - path: target/release - - name: Download Nydusify - uses: actions/download-artifact@v4 - with: - name: nydusify-artifact - path: contrib/nydusify/cmd - - name: Prepare Environment - run: | - sudo bash misc/prepare.sh - - name: BenchMark Test - run: | - export BENCHMARK_TEST_IMAGE=${{ matrix.image }}:${{ matrix.tag }} - export BENCHMARK_MODE=oci - export BENCHMARK_METRIC_FILE=${{ matrix.image }}-oci.json - export SNAPSHOTTER=overlayfs - sudo -E make smoke-benchmark - - name: Save BenchMark Result - uses: actions/upload-artifact@v4 - with: - name: benchmark-oci-${{ matrix.image }} - path: smoke/${{ matrix.image }}-oci.json - - benchmark-fsversion-v5: - runs-on: ubuntu-latest - needs: [contrib-build, nydus-build] - strategy: - matrix: - include: - - image: wordpress - tag: 6.1.1 - - image: node - tag: 19.8 - - image: python - tag: 3.10.7 - - image: golang - tag: 1.19.3 - - image: ruby - tag: 3.1.3 - - image: amazoncorretto - tag: 8-al2022-jdk - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Download Nydus - uses: actions/download-artifact@v4 - with: - name: nydus-artifact - path: target/release - - name: Download Nydusify - uses: actions/download-artifact@v4 - with: - name: nydusify-artifact - path: contrib/nydusify/cmd - - name: Prepare Environment - run: | - sudo bash misc/prepare.sh - - name: BenchMark Test - run: | - export BENCHMARK_TEST_IMAGE=${{ matrix.image }}:${{ matrix.tag }} - export BENCHMARK_MODE=fs-version-5 - export BENCHMARK_METRIC_FILE=${{ matrix.image }}-fsversion-v5.json - sudo -E make smoke-benchmark - - name: Save BenchMark Result - uses: actions/upload-artifact@v4 - with: - name: benchmark-fsversion-v5-${{ matrix.image }} - path: smoke/${{ matrix.image }}-fsversion-v5.json - - benchmark-fsversion-v6: - runs-on: ubuntu-latest - needs: [contrib-build, nydus-build] - strategy: - matrix: - include: - - image: wordpress - tag: 6.1.1 - - image: node - tag: 19.8 - - image: python - tag: 3.10.7 - - image: golang - tag: 1.19.3 - - image: ruby - tag: 3.1.3 - - image: amazoncorretto - tag: 8-al2022-jdk - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Download Nydus - uses: actions/download-artifact@v4 - with: - name: nydus-artifact - path: target/release - - name: Download Nydusify - uses: actions/download-artifact@v4 - with: - name: nydusify-artifact - path: contrib/nydusify/cmd - - name: Prepare Environment - run: | - sudo bash misc/prepare.sh - - name: BenchMark Test - run: | - export BENCHMARK_TEST_IMAGE=${{ matrix.image }}:${{ matrix.tag }} - export BENCHMARK_MODE=fs-version-6 - export BENCHMARK_METRIC_FILE=${{ matrix.image }}-fsversion-v6.json - sudo -E make smoke-benchmark - - name: Save BenchMark Result - uses: actions/upload-artifact@v4 - with: - name: benchmark-fsversion-v6-${{ matrix.image }} - path: smoke/${{ matrix.image }}-fsversion-v6.json - - benchmark-zran: - runs-on: ubuntu-latest - needs: [contrib-build, nydus-build] - strategy: - matrix: - include: - - image: wordpress - tag: 6.1.1 - - image: node - tag: 19.8 - - image: python - tag: 3.10.7 - - image: golang - tag: 1.19.3 - - image: ruby - tag: 3.1.3 - - image: amazoncorretto - tag: 8-al2022-jdk - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Download Nydus - uses: actions/download-artifact@v4 - with: - name: nydus-artifact - path: target/release - - name: Download Nydusify - uses: actions/download-artifact@v4 - with: - name: nydusify-artifact - path: contrib/nydusify/cmd - - name: Prepare Environment - run: | - sudo bash misc/prepare.sh - - name: BenchMark Test - run: | - export BENCHMARK_TEST_IMAGE=${{ matrix.image }}:${{ matrix.tag }} - export BENCHMARK_MODE=zran - export BENCHMARK_METRIC_FILE=${{ matrix.image }}-zran.json - sudo -E make smoke-benchmark - - name: Save BenchMark Result - uses: actions/upload-artifact@v4 - with: - name: benchmark-zran-${{ matrix.image }} - path: smoke/${{ matrix.image }}-zran.json - - benchmark-result: - runs-on: ubuntu-latest - needs: [benchmark-oci, benchmark-fsversion-v5, benchmark-fsversion-v6, benchmark-zran] - strategy: - matrix: - include: - - image: wordpress - tag: 6.1.1 - - image: node - tag: 19.8 - - image: python - tag: 3.10.7 - - image: golang - tag: 1.19.3 - - image: ruby - tag: 3.1.3 - - image: amazoncorretto - tag: 8-al2022-jdk - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Download benchmark-oci - uses: actions/download-artifact@v4 - with: - name: benchmark-oci-${{ matrix.image }} - path: benchmark-result - - name: Download benchmark-fsversion-v5 - uses: actions/download-artifact@v4 - with: - name: benchmark-fsversion-v5-${{ matrix.image }} - path: benchmark-result - - name: Download benchmark-fsversion-v6 - uses: actions/download-artifact@v4 - with: - name: benchmark-fsversion-v6-${{ matrix.image }} - path: benchmark-result - - name: Download benchmark-zran - uses: actions/download-artifact@v4 - with: - name: benchmark-zran-${{ matrix.image }} - path: benchmark-result - - name: Benchmark Summary - run: | - case ${{matrix.image}} in - "wordpress") - echo "### workload: wait the 80 port response" > $GITHUB_STEP_SUMMARY - ;; - "node") - echo "### workload: node index.js; wait the 80 port response" > $GITHUB_STEP_SUMMARY - ;; - "python") - echo "### workload: python -c 'print("hello")'" > $GITHUB_STEP_SUMMARY - ;; - "golang") - echo "### workload: go run main.go" > $GITHUB_STEP_SUMMARY - ;; - "ruby") - echo "### workload: ruby -e "puts \"hello\""" > $GITHUB_STEP_SUMMARY - ;; - "amazoncorretto") - echo "### workload: javac Main.java; java Main" > $GITHUB_STEP_SUMMARY - ;; - esac - cd benchmark-result - metric_files=( - "${{ matrix.image }}-oci.json" - "${{ matrix.image }}-fsversion-v5.json" - "${{ matrix.image }}-fsversion-v6.json" - "${{ matrix.image }}-zran.json" - ) - echo "| bench-result | e2e-time(s) | read-count | read-amount(MB) | image-size(MB) |convert-time(s)|" >> $GITHUB_STEP_SUMMARY - echo "|:-------------|:-----------:|:----------:|:---------------:|:--------------:|:-------------:|" >> $GITHUB_STEP_SUMMARY - for file in "${metric_files[@]}"; do - name=$(basename "$file" .json | sed 's/^[^-]*-\(.*\)$/\1/') - data=$(jq -r '. | "\(.e2e_time / 1e9) \(.read_count) \(.read_amount_total / (1024 * 1024)) \(.image_size / (1024 * 1024)) \(.conversion_elapsed / 1e9)"' "$file" | \ - awk '{ printf "%.2f | %.0f | %.2f | %.2f | %.2f", $1, $2, $3, $4, $5 }') - echo "| $name | $data |" >> $GITHUB_STEP_SUMMARY - done +name: Benchmark + +on: + schedule: + # Run at 03:00 clock UTC on Monday and Wednesday + - cron: "0 03 * * 1,3" + pull_request: + paths: + - '.github/workflows/benchmark.yml' + workflow_dispatch: + +env: + CARGO_TERM_COLOR: always + +jobs: + contrib-build: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Golang + uses: actions/setup-go@v5 + with: + go-version-file: 'go.work' + cache-dependency-path: "**/*.sum" + - name: Build Contrib + run: | + make -e DOCKER=false nydusify-release + - name: Upload Nydusify + uses: actions/upload-artifact@v4 + with: + name: nydusify-artifact + path: contrib/nydusify/cmd/nydusify + + nydus-build: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Rust Cache + uses: Swatinem/rust-cache@v2 + with: + cache-on-failure: true + shared-key: Linux-cargo-amd64 + - uses: dsherret/rust-toolchain-file@v1 + - name: Build Nydus + run: | + make release + - name: Upload Nydus Binaries + uses: actions/upload-artifact@v4 + with: + name: nydus-artifact + path: | + target/release/nydus-image + target/release/nydusd + + benchmark-description: + runs-on: ubuntu-latest + steps: + - name: Description + run: | + echo "## Benchmark Environment" > $GITHUB_STEP_SUMMARY + echo "| operating system | cpu | memory " >> $GITHUB_STEP_SUMMARY + echo "|:----------------:|:---:|:------ " >> $GITHUB_STEP_SUMMARY + echo "| ubuntu-22.04 | 2-core CPU (x86_64) | 7GB |" >> $GITHUB_STEP_SUMMARY + + benchmark-oci: + runs-on: ubuntu-latest + needs: [contrib-build, nydus-build] + strategy: + matrix: + include: + - image: wordpress + tag: 6.1.1 + - image: node + tag: 19.8 + - image: python + tag: 3.10.7 + - image: golang + tag: 1.19.3 + - image: ruby + tag: 3.1.3 + - image: amazoncorretto + tag: 8-al2022-jdk + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Download Nydus + uses: actions/download-artifact@v4 + with: + name: nydus-artifact + path: target/release + - name: Download Nydusify + uses: actions/download-artifact@v4 + with: + name: nydusify-artifact + path: contrib/nydusify/cmd + - name: Prepare Environment + run: | + sudo bash misc/prepare.sh + - name: BenchMark Test + run: | + export BENCHMARK_TEST_IMAGE=${{ matrix.image }}:${{ matrix.tag }} + export BENCHMARK_MODE=oci + export BENCHMARK_METRIC_FILE=${{ matrix.image }}-oci.json + export SNAPSHOTTER=overlayfs + sudo -E make smoke-benchmark + - name: Save BenchMark Result + uses: actions/upload-artifact@v4 + with: + name: benchmark-oci-${{ matrix.image }} + path: smoke/${{ matrix.image }}-oci.json + + benchmark-fsversion-v5: + runs-on: ubuntu-latest + needs: [contrib-build, nydus-build] + strategy: + matrix: + include: + - image: wordpress + tag: 6.1.1 + - image: node + tag: 19.8 + - image: python + tag: 3.10.7 + - image: golang + tag: 1.19.3 + - image: ruby + tag: 3.1.3 + - image: amazoncorretto + tag: 8-al2022-jdk + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Download Nydus + uses: actions/download-artifact@v4 + with: + name: nydus-artifact + path: target/release + - name: Download Nydusify + uses: actions/download-artifact@v4 + with: + name: nydusify-artifact + path: contrib/nydusify/cmd + - name: Prepare Environment + run: | + sudo bash misc/prepare.sh + - name: BenchMark Test + run: | + export BENCHMARK_TEST_IMAGE=${{ matrix.image }}:${{ matrix.tag }} + export BENCHMARK_MODE=fs-version-5 + export BENCHMARK_METRIC_FILE=${{ matrix.image }}-fsversion-v5.json + sudo -E make smoke-benchmark + - name: Save BenchMark Result + uses: actions/upload-artifact@v4 + with: + name: benchmark-fsversion-v5-${{ matrix.image }} + path: smoke/${{ matrix.image }}-fsversion-v5.json + + benchmark-fsversion-v6: + runs-on: ubuntu-latest + needs: [contrib-build, nydus-build] + strategy: + matrix: + include: + - image: wordpress + tag: 6.1.1 + - image: node + tag: 19.8 + - image: python + tag: 3.10.7 + - image: golang + tag: 1.19.3 + - image: ruby + tag: 3.1.3 + - image: amazoncorretto + tag: 8-al2022-jdk + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Download Nydus + uses: actions/download-artifact@v4 + with: + name: nydus-artifact + path: target/release + - name: Download Nydusify + uses: actions/download-artifact@v4 + with: + name: nydusify-artifact + path: contrib/nydusify/cmd + - name: Prepare Environment + run: | + sudo bash misc/prepare.sh + - name: BenchMark Test + run: | + export BENCHMARK_TEST_IMAGE=${{ matrix.image }}:${{ matrix.tag }} + export BENCHMARK_MODE=fs-version-6 + export BENCHMARK_METRIC_FILE=${{ matrix.image }}-fsversion-v6.json + sudo -E make smoke-benchmark + - name: Save BenchMark Result + uses: actions/upload-artifact@v4 + with: + name: benchmark-fsversion-v6-${{ matrix.image }} + path: smoke/${{ matrix.image }}-fsversion-v6.json + + benchmark-zran: + runs-on: ubuntu-latest + needs: [contrib-build, nydus-build] + strategy: + matrix: + include: + - image: wordpress + tag: 6.1.1 + - image: node + tag: 19.8 + - image: python + tag: 3.10.7 + - image: golang + tag: 1.19.3 + - image: ruby + tag: 3.1.3 + - image: amazoncorretto + tag: 8-al2022-jdk + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Download Nydus + uses: actions/download-artifact@v4 + with: + name: nydus-artifact + path: target/release + - name: Download Nydusify + uses: actions/download-artifact@v4 + with: + name: nydusify-artifact + path: contrib/nydusify/cmd + - name: Prepare Environment + run: | + sudo bash misc/prepare.sh + - name: BenchMark Test + run: | + export BENCHMARK_TEST_IMAGE=${{ matrix.image }}:${{ matrix.tag }} + export BENCHMARK_MODE=zran + export BENCHMARK_METRIC_FILE=${{ matrix.image }}-zran.json + sudo -E make smoke-benchmark + - name: Save BenchMark Result + uses: actions/upload-artifact@v4 + with: + name: benchmark-zran-${{ matrix.image }} + path: smoke/${{ matrix.image }}-zran.json + + benchmark-result: + runs-on: ubuntu-latest + needs: [benchmark-oci, benchmark-fsversion-v5, benchmark-fsversion-v6, benchmark-zran] + strategy: + matrix: + include: + - image: wordpress + tag: 6.1.1 + - image: node + tag: 19.8 + - image: python + tag: 3.10.7 + - image: golang + tag: 1.19.3 + - image: ruby + tag: 3.1.3 + - image: amazoncorretto + tag: 8-al2022-jdk + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Download benchmark-oci + uses: actions/download-artifact@v4 + with: + name: benchmark-oci-${{ matrix.image }} + path: benchmark-result + - name: Download benchmark-fsversion-v5 + uses: actions/download-artifact@v4 + with: + name: benchmark-fsversion-v5-${{ matrix.image }} + path: benchmark-result + - name: Download benchmark-fsversion-v6 + uses: actions/download-artifact@v4 + with: + name: benchmark-fsversion-v6-${{ matrix.image }} + path: benchmark-result + - name: Download benchmark-zran + uses: actions/download-artifact@v4 + with: + name: benchmark-zran-${{ matrix.image }} + path: benchmark-result + - name: Benchmark Summary + run: | + case ${{matrix.image}} in + "wordpress") + echo "### workload: wait the 80 port response" > $GITHUB_STEP_SUMMARY + ;; + "node") + echo "### workload: node index.js; wait the 80 port response" > $GITHUB_STEP_SUMMARY + ;; + "python") + echo "### workload: python -c 'print("hello")'" > $GITHUB_STEP_SUMMARY + ;; + "golang") + echo "### workload: go run main.go" > $GITHUB_STEP_SUMMARY + ;; + "ruby") + echo "### workload: ruby -e "puts \"hello\""" > $GITHUB_STEP_SUMMARY + ;; + "amazoncorretto") + echo "### workload: javac Main.java; java Main" > $GITHUB_STEP_SUMMARY + ;; + esac + cd benchmark-result + metric_files=( + "${{ matrix.image }}-oci.json" + "${{ matrix.image }}-fsversion-v5.json" + "${{ matrix.image }}-fsversion-v6.json" + "${{ matrix.image }}-zran.json" + ) + echo "| bench-result | e2e-time(s) | read-count | read-amount(MB) | image-size(MB) |convert-time(s)|" >> $GITHUB_STEP_SUMMARY + echo "|:-------------|:-----------:|:----------:|:---------------:|:--------------:|:-------------:|" >> $GITHUB_STEP_SUMMARY + for file in "${metric_files[@]}"; do + name=$(basename "$file" .json | sed 's/^[^-]*-\(.*\)$/\1/') + data=$(jq -r '. | "\(.e2e_time / 1e9) \(.read_count) \(.read_amount_total / (1024 * 1024)) \(.image_size / (1024 * 1024)) \(.conversion_elapsed / 1e9)"' "$file" | \ + awk '{ printf "%.2f | %.0f | %.2f | %.2f | %.2f", $1, $2, $3, $4, $5 }') + echo "| $name | $data |" >> $GITHUB_STEP_SUMMARY + done diff --git a/.github/workflows/convert.yml b/.github/workflows/convert.yml index 9e2497bc810..4532726d59e 100644 --- a/.github/workflows/convert.yml +++ b/.github/workflows/convert.yml @@ -1,389 +1,389 @@ -name: Convert & Check Images - -on: - schedule: - # Do conversion every day at 00:03 clock UTC - - cron: "3 0 * * *" - workflow_dispatch: - -env: - CARGO_TERM_COLOR: always - REGISTRY: ghcr.io - ORGANIZATION: ${{ github.repository }} - IMAGE_LIST_PATH: misc/top_images/image_list.txt - FSCK_PATCH_PATH: misc/top_images/fsck.patch - -jobs: - nydusify-build: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Setup Golang - uses: actions/setup-go@v5 - with: - go-version-file: 'go.work' - cache-dependency-path: "**/*.sum" - - name: Build Contrib - run: | - curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sudo sh -s -- -b /usr/local/bin v1.54.2 - make -e DOCKER=false nydusify-release - - name: Upload Nydusify - uses: actions/upload-artifact@v4 - with: - name: nydusify-artifact - path: contrib/nydusify/cmd/nydusify - - nydus-build: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Rust Cache - uses: Swatinem/rust-cache@v2 - with: - cache-on-failure: true - shared-key: Linux-cargo-amd64 - - uses: dsherret/rust-toolchain-file@v1 - - name: Build Nydus - run: | - make release - - name: Upload Nydus Binaries - uses: actions/upload-artifact@v4 - with: - name: nydus-artifact - path: | - target/release/nydus-image - target/release/nydusd - - fsck-erofs-build: - runs-on: ubuntu-latest - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - name: Build fsck.erofs - run: | - sudo apt-get update && sudo apt-get install -y build-essential git autotools-dev automake libtool pkg-config uuid-dev liblz4-dev - git clone https://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git - cd erofs-utils && git checkout v1.6 && git apply ../${{ env.FSCK_PATCH_PATH }} && ./autogen.sh && ./configure && make && cd .. - sudo cp erofs-utils/fsck/fsck.erofs /usr/local/bin/ - - name: Upload fsck.erofs - uses: actions/upload-artifact@v4 - with: - name: fsck-erofs-artifact - path: | - /usr/local/bin/fsck.erofs - - convert-zran: - runs-on: ubuntu-latest - needs: [nydusify-build, nydus-build, fsck-erofs-build] - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - name: Login ghcr registry - uses: docker/login-action@v3 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - name: Download Nydus - uses: actions/download-artifact@v4 - with: - name: nydus-artifact - path: /usr/local/bin - - name: Download Nydusify - uses: actions/download-artifact@v4 - with: - name: nydusify-artifact - path: /usr/local/bin - - name: Download fsck.erofs - uses: actions/download-artifact@v4 - with: - name: fsck-erofs-artifact - path: /usr/local/bin - - name: Convert and check zran images - run: | - sudo chmod +x /usr/local/bin/nydus* - sudo chmod +x /usr/local/bin/fsck.erofs - sudo docker run -d --restart=always -p 5000:5000 registry - sudo mkdir convert-zran - for I in $(cat ${{ env.IMAGE_LIST_PATH }}); do - echo "converting $I:latest to $I:nydus-nightly-oci-ref" - ghcr_repo=${{ env.REGISTRY }}/${{ env.ORGANIZATION }} - - # push oci image to ghcr/local for zran reference - sudo docker pull $I:latest - sudo docker tag $I:latest $ghcr_repo/$I - sudo docker tag $I:latest localhost:5000/$I - sudo DOCKER_CONFIG=$HOME/.docker docker push $ghcr_repo/$I - sudo docker push localhost:5000/$I - - # for pre-built images - sudo DOCKER_CONFIG=$HOME/.docker nydusify convert \ - --oci-ref \ - --source $ghcr_repo/$I \ - --target $ghcr_repo/$I:nydus-nightly-oci-ref \ - --platform linux/amd64,linux/arm64 - - # use local registry for speed - sudo DOCKER_CONFIG=$HOME/.docker nydusify convert \ - --oci-ref \ - --source localhost:5000/$I \ - --target localhost:5000/$I:nydus-nightly-oci-ref \ - --platform linux/amd64,linux/arm64 \ - --output-json convert-zran/${I}.json - - # check zran image and referenced oci image - sudo rm -rf ./tmp - sudo DOCKER_CONFIG=$HOME/.docker nydusify check \ - --source localhost:5000/$I \ - --target localhost:5000/$I:nydus-nightly-oci-ref - - sudo fsck.erofs -d1 output/nydus_bootstrap - sudo rm -rf ./output - done - - name: Save Nydusify Metric - uses: actions/upload-artifact@v4 - with: - name: convert-zran-metric - path: convert-zran - - convert-native-v5: - runs-on: ubuntu-latest - needs: [nydusify-build, nydus-build] - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - name: Login ghcr registry - uses: docker/login-action@v3 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - name: Download Nydus - uses: actions/download-artifact@v4 - with: - name: nydus-artifact - path: /usr/local/bin - - name: Download Nydusify - uses: actions/download-artifact@v4 - with: - name: nydusify-artifact - path: /usr/local/bin - - name: Convert and check RAFS v5 images - run: | - sudo chmod +x /usr/local/bin/nydus* - sudo docker run -d --restart=always -p 5000:5000 registry - sudo mkdir convert-native-v5 - for I in $(cat ${{ env.IMAGE_LIST_PATH }}); do - echo "converting $I:latest to $I:nydus-nightly-v5" - # for pre-built images - sudo DOCKER_CONFIG=$HOME/.docker nydusify convert \ - --source $I:latest \ - --target ${{ env.REGISTRY }}/${{ env.ORGANIZATION }}/$I:nydus-nightly-v5 \ - --fs-version 5 \ - --platform linux/amd64,linux/arm64 - - # use local registry for speed - sudo DOCKER_CONFIG=$HOME/.docker nydusify convert \ - --source $I:latest \ - --target localhost:5000/$I:nydus-nightly-v5 \ - --fs-version 5 \ - --platform linux/amd64,linux/arm64 \ - --output-json convert-native-v5/${I}.json - - sudo rm -rf ./tmp - sudo DOCKER_CONFIG=$HOME/.docker nydusify check --source $I:latest \ - --target localhost:5000/$I:nydus-nightly-v5 - done - - name: Save Nydusify Metric - uses: actions/upload-artifact@v4 - with: - name: convert-native-v5-metric - path: convert-native-v5 - - convert-native-v6: - runs-on: ubuntu-latest - needs: [nydusify-build, nydus-build, fsck-erofs-build] - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - name: Login ghcr registry - uses: docker/login-action@v3 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - name: Download Nydus - uses: actions/download-artifact@v4 - with: - name: nydus-artifact - path: /usr/local/bin - - name: Download Nydusify - uses: actions/download-artifact@v4 - with: - name: nydusify-artifact - path: /usr/local/bin - - name: Download fsck.erofs - uses: actions/download-artifact@v4 - with: - name: fsck-erofs-artifact - path: /usr/local/bin - - name: Convert and check RAFS v6 images - run: | - sudo chmod +x /usr/local/bin/nydus* - sudo chmod +x /usr/local/bin/fsck.erofs - sudo docker run -d --restart=always -p 5000:5000 registry - sudo mkdir convert-native-v6 - for I in $(cat ${{ env.IMAGE_LIST_PATH }}); do - echo "converting $I:latest to $I:nydus-nightly-v6" - # for pre-built images - sudo DOCKER_CONFIG=$HOME/.docker nydusify convert \ - --source $I:latest \ - --target ${{ env.REGISTRY }}/${{ env.ORGANIZATION }}/$I:nydus-nightly-v6 \ - --fs-version 6 \ - --platform linux/amd64,linux/arm64 - - # use local registry for speed - sudo DOCKER_CONFIG=$HOME/.docker nydusify convert \ - --source $I:latest \ - --target localhost:5000/$I:nydus-nightly-v6 \ - --fs-version 6 \ - --platform linux/amd64,linux/arm64 \ - --output-json convert-native-v6/${I}.json - - sudo rm -rf ./tmp - sudo DOCKER_CONFIG=$HOME/.docker nydusify check --source $I:latest \ - --target localhost:5000/$I:nydus-nightly-v6 - - sudo fsck.erofs -d1 output/nydus_bootstrap - sudo rm -rf ./output - done - - name: Save Nydusify Metric - uses: actions/upload-artifact@v4 - with: - name: convert-native-v6-metric - path: convert-native-v6 - - convert-native-v6-batch: - runs-on: ubuntu-latest - needs: [nydusify-build, nydus-build, fsck-erofs-build] - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - name: Login ghcr registry - uses: docker/login-action@v3 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - name: Download Nydus - uses: actions/download-artifact@v4 - with: - name: nydus-artifact - path: /usr/local/bin - - name: Download Nydusify - uses: actions/download-artifact@v4 - with: - name: nydusify-artifact - path: /usr/local/bin - - name: Download fsck.erofs - uses: actions/download-artifact@v4 - with: - name: fsck-erofs-artifact - path: /usr/local/bin - - name: Convert and check RAFS v6 batch images - run: | - sudo chmod +x /usr/local/bin/nydus* - sudo chmod +x /usr/local/bin/fsck.erofs - sudo docker run -d --restart=always -p 5000:5000 registry - sudo mkdir convert-native-v6-batch - for I in $(cat ${{ env.IMAGE_LIST_PATH }}); do - echo "converting $I:latest to $I:nydus-nightly-v6-batch" - # for pre-built images - sudo DOCKER_CONFIG=$HOME/.docker nydusify convert \ - --source $I:latest \ - --target ${{ env.REGISTRY }}/${{ env.ORGANIZATION }}/$I:nydus-nightly-v6-batch \ - --fs-version 6 \ - --batch-size 0x100000 \ - --platform linux/amd64,linux/arm64 - - # use local registry for speed - sudo DOCKER_CONFIG=$HOME/.docker nydusify convert \ - --source $I:latest \ - --target localhost:5000/$I:nydus-nightly-v6-batch \ - --fs-version 6 \ - --batch-size 0x100000 \ - --platform linux/amd64,linux/arm64 \ - --output-json convert-native-v6-batch/${I}.json - - sudo rm -rf ./tmp - sudo DOCKER_CONFIG=$HOME/.docker nydusify check --source $I:latest \ - --target localhost:5000/$I:nydus-nightly-v6-batch - - sudo fsck.erofs -d1 output/nydus_bootstrap - sudo rm -rf ./output - done - - name: Save Nydusify Metric - uses: actions/upload-artifact@v4 - with: - name: convert-native-v6-batch-metric - path: convert-native-v6-batch - - convert-metric: - runs-on: ubuntu-latest - needs: [convert-zran, convert-native-v5, convert-native-v6, convert-native-v6-batch] - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - name: Download Zran Metric - uses: actions/download-artifact@v4 - with: - name: convert-zran-metric - path: convert-zran - - name: Download V5 Metric - uses: actions/download-artifact@v4 - with: - name: convert-native-v5-metric - path: convert-native-v5 - - name: Download V6 Metric - uses: actions/download-artifact@v4 - with: - name: convert-native-v6-metric - path: convert-native-v6 - - name: Download V6 Batch Metric - uses: actions/download-artifact@v4 - with: - name: convert-native-v6-batch-metric - path: convert-native-v6-batch - - name: Summary - run: | - echo "## Image Size(MB)" > $GITHUB_STEP_SUMMARY - echo "> Compare the size of OCI image and Nydus image." - echo "|image name|oci/nydus-zran|oci/nydus-v5|oci/nydus-v6|oci/nydus-batch|" >> $GITHUB_STEP_SUMMARY - echo "|:--------:|:------------:|:----------:|:----------:|:-------------:|" >> $GITHUB_STEP_SUMMARY - for I in $(cat ${{ env.IMAGE_LIST_PATH }}); do - zranSourceImageSize=$(printf "%0.2f" "$(bc <<< "scale=2; $(jq -r '.SourceImageSize' convert-zran/${I}.json) / 1048576")") - zranTargetImageSize=$(printf "%0.2f" "$(bc <<< "scale=2; $(jq -r '.TargetImageSize' convert-zran/${I}.json) / 1048576")") - v5SourceImageSize=$(printf "%0.2f" "$(bc <<< "scale=2; $(jq -r '.SourceImageSize' convert-native-v5/${I}.json) / 1048576")") - v5TargetImageSize=$(printf "%0.2f" "$(bc <<< "scale=2; $(jq -r '.TargetImageSize' convert-native-v5/${I}.json) / 1048576")") - v6SourceImageSize=$(printf "%0.2f" "$(bc <<< "scale=2; $(jq -r '.SourceImageSize' convert-native-v6/${I}.json) / 1048576")") - v6TargetImageSize=$(printf "%0.2f" "$(bc <<< "scale=2; $(jq -r '.TargetImageSize' convert-native-v6/${I}.json) / 1048576")") - batchSourceImageSize=$(printf "%0.2f" "$(bc <<< "scale=2; $(jq -r '.SourceImageSize' convert-native-v6-batch/${I}.json) / 1048576")") - batchTargetImageSize=$(printf "%0.2f" "$(bc <<< "scale=2; $(jq -r '.TargetImageSize' convert-native-v6-batch/${I}.json) / 1048576")") - echo "|${I}:latest|${zranSourceImageSize}/${zranTargetImageSize}|${v5SourceImageSize}/${v5TargetImageSize}|${v6SourceImageSize}/${v6TargetImageSize}|${batchSourceImageSize}/${batchTargetImageSize}|" >> $GITHUB_STEP_SUMMARY - done - echo "## Conversion Time(ms)" >> $GITHUB_STEP_SUMMARY - echo "> Time elapsed to convert OCI image to Nydus image." - echo "|image name|nydus-zran|nydus-v5|nydus-v6|nydus-batch|" >> $GITHUB_STEP_SUMMARY - echo "|:---:|:--:|:-------:|:-------:|:-------:|" >> $GITHUB_STEP_SUMMARY - for I in $(cat ${{ env.IMAGE_LIST_PATH }}); do - zranConversionElapsed=$(printf "%0.2f" "$(bc <<< "scale=2; $(jq -r '.ConversionElapsed' convert-zran/${I}.json) / 1000000")") - v5ConversionElapsed=$(printf "%0.2f" "$(bc <<< "scale=2; $(jq -r '.ConversionElapsed' convert-native-v5/${I}.json) / 1000000")") - v6ConversionElapsed=$(printf "%0.2f" "$(bc <<< "scale=2; $(jq -r '.ConversionElapsed' convert-native-v6/${I}.json) / 1000000")") - batchConversionElapsed=$(printf "%0.2f" "$(bc <<< "scale=2; $(jq -r '.ConversionElapsed' convert-native-v6-batch/${I}.json) / 1000000")") - echo "|${I}:latest|${zranConversionElapsed}|${v5ConversionElapsed}|${v6ConversionElapsed}|${batchConversionElapsed}|" >> $GITHUB_STEP_SUMMARY - done - - uses: geekyeggo/delete-artifact@v2 - with: - name: '*' +name: Convert & Check Images + +on: + schedule: + # Do conversion every day at 00:03 clock UTC + - cron: "3 0 * * *" + workflow_dispatch: + +env: + CARGO_TERM_COLOR: always + REGISTRY: ghcr.io + ORGANIZATION: ${{ github.repository }} + IMAGE_LIST_PATH: misc/top_images/image_list.txt + FSCK_PATCH_PATH: misc/top_images/fsck.patch + +jobs: + nydusify-build: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Golang + uses: actions/setup-go@v5 + with: + go-version-file: 'go.work' + cache-dependency-path: "**/*.sum" + - name: Build Contrib + run: | + curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sudo sh -s -- -b /usr/local/bin v1.54.2 + make -e DOCKER=false nydusify-release + - name: Upload Nydusify + uses: actions/upload-artifact@v4 + with: + name: nydusify-artifact + path: contrib/nydusify/cmd/nydusify + + nydus-build: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Rust Cache + uses: Swatinem/rust-cache@v2 + with: + cache-on-failure: true + shared-key: Linux-cargo-amd64 + - uses: dsherret/rust-toolchain-file@v1 + - name: Build Nydus + run: | + make release + - name: Upload Nydus Binaries + uses: actions/upload-artifact@v4 + with: + name: nydus-artifact + path: | + target/release/nydus-image + target/release/nydusd + + fsck-erofs-build: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Build fsck.erofs + run: | + sudo apt-get update && sudo apt-get install -y build-essential git autotools-dev automake libtool pkg-config uuid-dev liblz4-dev + git clone https://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git + cd erofs-utils && git checkout v1.6 && git apply ../${{ env.FSCK_PATCH_PATH }} && ./autogen.sh && ./configure && make && cd .. + sudo cp erofs-utils/fsck/fsck.erofs /usr/local/bin/ + - name: Upload fsck.erofs + uses: actions/upload-artifact@v4 + with: + name: fsck-erofs-artifact + path: | + /usr/local/bin/fsck.erofs + + convert-zran: + runs-on: ubuntu-latest + needs: [nydusify-build, nydus-build, fsck-erofs-build] + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Login ghcr registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Download Nydus + uses: actions/download-artifact@v4 + with: + name: nydus-artifact + path: /usr/local/bin + - name: Download Nydusify + uses: actions/download-artifact@v4 + with: + name: nydusify-artifact + path: /usr/local/bin + - name: Download fsck.erofs + uses: actions/download-artifact@v4 + with: + name: fsck-erofs-artifact + path: /usr/local/bin + - name: Convert and check zran images + run: | + sudo chmod +x /usr/local/bin/nydus* + sudo chmod +x /usr/local/bin/fsck.erofs + sudo docker run -d --restart=always -p 5000:5000 registry + sudo mkdir convert-zran + for I in $(cat ${{ env.IMAGE_LIST_PATH }}); do + echo "converting $I:latest to $I:nydus-nightly-oci-ref" + ghcr_repo=${{ env.REGISTRY }}/${{ env.ORGANIZATION }} + + # push oci image to ghcr/local for zran reference + sudo docker pull $I:latest + sudo docker tag $I:latest $ghcr_repo/$I + sudo docker tag $I:latest localhost:5000/$I + sudo DOCKER_CONFIG=$HOME/.docker docker push $ghcr_repo/$I + sudo docker push localhost:5000/$I + + # for pre-built images + sudo DOCKER_CONFIG=$HOME/.docker nydusify convert \ + --oci-ref \ + --source $ghcr_repo/$I \ + --target $ghcr_repo/$I:nydus-nightly-oci-ref \ + --platform linux/amd64,linux/arm64 + + # use local registry for speed + sudo DOCKER_CONFIG=$HOME/.docker nydusify convert \ + --oci-ref \ + --source localhost:5000/$I \ + --target localhost:5000/$I:nydus-nightly-oci-ref \ + --platform linux/amd64,linux/arm64 \ + --output-json convert-zran/${I}.json + + # check zran image and referenced oci image + sudo rm -rf ./tmp + sudo DOCKER_CONFIG=$HOME/.docker nydusify check \ + --source localhost:5000/$I \ + --target localhost:5000/$I:nydus-nightly-oci-ref + + sudo fsck.erofs -d1 output/nydus_bootstrap + sudo rm -rf ./output + done + - name: Save Nydusify Metric + uses: actions/upload-artifact@v4 + with: + name: convert-zran-metric + path: convert-zran + + convert-native-v5: + runs-on: ubuntu-latest + needs: [nydusify-build, nydus-build] + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Login ghcr registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Download Nydus + uses: actions/download-artifact@v4 + with: + name: nydus-artifact + path: /usr/local/bin + - name: Download Nydusify + uses: actions/download-artifact@v4 + with: + name: nydusify-artifact + path: /usr/local/bin + - name: Convert and check RAFS v5 images + run: | + sudo chmod +x /usr/local/bin/nydus* + sudo docker run -d --restart=always -p 5000:5000 registry + sudo mkdir convert-native-v5 + for I in $(cat ${{ env.IMAGE_LIST_PATH }}); do + echo "converting $I:latest to $I:nydus-nightly-v5" + # for pre-built images + sudo DOCKER_CONFIG=$HOME/.docker nydusify convert \ + --source $I:latest \ + --target ${{ env.REGISTRY }}/${{ env.ORGANIZATION }}/$I:nydus-nightly-v5 \ + --fs-version 5 \ + --platform linux/amd64,linux/arm64 + + # use local registry for speed + sudo DOCKER_CONFIG=$HOME/.docker nydusify convert \ + --source $I:latest \ + --target localhost:5000/$I:nydus-nightly-v5 \ + --fs-version 5 \ + --platform linux/amd64,linux/arm64 \ + --output-json convert-native-v5/${I}.json + + sudo rm -rf ./tmp + sudo DOCKER_CONFIG=$HOME/.docker nydusify check --source $I:latest \ + --target localhost:5000/$I:nydus-nightly-v5 + done + - name: Save Nydusify Metric + uses: actions/upload-artifact@v4 + with: + name: convert-native-v5-metric + path: convert-native-v5 + + convert-native-v6: + runs-on: ubuntu-latest + needs: [nydusify-build, nydus-build, fsck-erofs-build] + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Login ghcr registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Download Nydus + uses: actions/download-artifact@v4 + with: + name: nydus-artifact + path: /usr/local/bin + - name: Download Nydusify + uses: actions/download-artifact@v4 + with: + name: nydusify-artifact + path: /usr/local/bin + - name: Download fsck.erofs + uses: actions/download-artifact@v4 + with: + name: fsck-erofs-artifact + path: /usr/local/bin + - name: Convert and check RAFS v6 images + run: | + sudo chmod +x /usr/local/bin/nydus* + sudo chmod +x /usr/local/bin/fsck.erofs + sudo docker run -d --restart=always -p 5000:5000 registry + sudo mkdir convert-native-v6 + for I in $(cat ${{ env.IMAGE_LIST_PATH }}); do + echo "converting $I:latest to $I:nydus-nightly-v6" + # for pre-built images + sudo DOCKER_CONFIG=$HOME/.docker nydusify convert \ + --source $I:latest \ + --target ${{ env.REGISTRY }}/${{ env.ORGANIZATION }}/$I:nydus-nightly-v6 \ + --fs-version 6 \ + --platform linux/amd64,linux/arm64 + + # use local registry for speed + sudo DOCKER_CONFIG=$HOME/.docker nydusify convert \ + --source $I:latest \ + --target localhost:5000/$I:nydus-nightly-v6 \ + --fs-version 6 \ + --platform linux/amd64,linux/arm64 \ + --output-json convert-native-v6/${I}.json + + sudo rm -rf ./tmp + sudo DOCKER_CONFIG=$HOME/.docker nydusify check --source $I:latest \ + --target localhost:5000/$I:nydus-nightly-v6 + + sudo fsck.erofs -d1 output/nydus_bootstrap + sudo rm -rf ./output + done + - name: Save Nydusify Metric + uses: actions/upload-artifact@v4 + with: + name: convert-native-v6-metric + path: convert-native-v6 + + convert-native-v6-batch: + runs-on: ubuntu-latest + needs: [nydusify-build, nydus-build, fsck-erofs-build] + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Login ghcr registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Download Nydus + uses: actions/download-artifact@v4 + with: + name: nydus-artifact + path: /usr/local/bin + - name: Download Nydusify + uses: actions/download-artifact@v4 + with: + name: nydusify-artifact + path: /usr/local/bin + - name: Download fsck.erofs + uses: actions/download-artifact@v4 + with: + name: fsck-erofs-artifact + path: /usr/local/bin + - name: Convert and check RAFS v6 batch images + run: | + sudo chmod +x /usr/local/bin/nydus* + sudo chmod +x /usr/local/bin/fsck.erofs + sudo docker run -d --restart=always -p 5000:5000 registry + sudo mkdir convert-native-v6-batch + for I in $(cat ${{ env.IMAGE_LIST_PATH }}); do + echo "converting $I:latest to $I:nydus-nightly-v6-batch" + # for pre-built images + sudo DOCKER_CONFIG=$HOME/.docker nydusify convert \ + --source $I:latest \ + --target ${{ env.REGISTRY }}/${{ env.ORGANIZATION }}/$I:nydus-nightly-v6-batch \ + --fs-version 6 \ + --batch-size 0x100000 \ + --platform linux/amd64,linux/arm64 + + # use local registry for speed + sudo DOCKER_CONFIG=$HOME/.docker nydusify convert \ + --source $I:latest \ + --target localhost:5000/$I:nydus-nightly-v6-batch \ + --fs-version 6 \ + --batch-size 0x100000 \ + --platform linux/amd64,linux/arm64 \ + --output-json convert-native-v6-batch/${I}.json + + sudo rm -rf ./tmp + sudo DOCKER_CONFIG=$HOME/.docker nydusify check --source $I:latest \ + --target localhost:5000/$I:nydus-nightly-v6-batch + + sudo fsck.erofs -d1 output/nydus_bootstrap + sudo rm -rf ./output + done + - name: Save Nydusify Metric + uses: actions/upload-artifact@v4 + with: + name: convert-native-v6-batch-metric + path: convert-native-v6-batch + + convert-metric: + runs-on: ubuntu-latest + needs: [convert-zran, convert-native-v5, convert-native-v6, convert-native-v6-batch] + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Download Zran Metric + uses: actions/download-artifact@v4 + with: + name: convert-zran-metric + path: convert-zran + - name: Download V5 Metric + uses: actions/download-artifact@v4 + with: + name: convert-native-v5-metric + path: convert-native-v5 + - name: Download V6 Metric + uses: actions/download-artifact@v4 + with: + name: convert-native-v6-metric + path: convert-native-v6 + - name: Download V6 Batch Metric + uses: actions/download-artifact@v4 + with: + name: convert-native-v6-batch-metric + path: convert-native-v6-batch + - name: Summary + run: | + echo "## Image Size(MB)" > $GITHUB_STEP_SUMMARY + echo "> Compare the size of OCI image and Nydus image." + echo "|image name|oci/nydus-zran|oci/nydus-v5|oci/nydus-v6|oci/nydus-batch|" >> $GITHUB_STEP_SUMMARY + echo "|:--------:|:------------:|:----------:|:----------:|:-------------:|" >> $GITHUB_STEP_SUMMARY + for I in $(cat ${{ env.IMAGE_LIST_PATH }}); do + zranSourceImageSize=$(printf "%0.2f" "$(bc <<< "scale=2; $(jq -r '.SourceImageSize' convert-zran/${I}.json) / 1048576")") + zranTargetImageSize=$(printf "%0.2f" "$(bc <<< "scale=2; $(jq -r '.TargetImageSize' convert-zran/${I}.json) / 1048576")") + v5SourceImageSize=$(printf "%0.2f" "$(bc <<< "scale=2; $(jq -r '.SourceImageSize' convert-native-v5/${I}.json) / 1048576")") + v5TargetImageSize=$(printf "%0.2f" "$(bc <<< "scale=2; $(jq -r '.TargetImageSize' convert-native-v5/${I}.json) / 1048576")") + v6SourceImageSize=$(printf "%0.2f" "$(bc <<< "scale=2; $(jq -r '.SourceImageSize' convert-native-v6/${I}.json) / 1048576")") + v6TargetImageSize=$(printf "%0.2f" "$(bc <<< "scale=2; $(jq -r '.TargetImageSize' convert-native-v6/${I}.json) / 1048576")") + batchSourceImageSize=$(printf "%0.2f" "$(bc <<< "scale=2; $(jq -r '.SourceImageSize' convert-native-v6-batch/${I}.json) / 1048576")") + batchTargetImageSize=$(printf "%0.2f" "$(bc <<< "scale=2; $(jq -r '.TargetImageSize' convert-native-v6-batch/${I}.json) / 1048576")") + echo "|${I}:latest|${zranSourceImageSize}/${zranTargetImageSize}|${v5SourceImageSize}/${v5TargetImageSize}|${v6SourceImageSize}/${v6TargetImageSize}|${batchSourceImageSize}/${batchTargetImageSize}|" >> $GITHUB_STEP_SUMMARY + done + echo "## Conversion Time(ms)" >> $GITHUB_STEP_SUMMARY + echo "> Time elapsed to convert OCI image to Nydus image." + echo "|image name|nydus-zran|nydus-v5|nydus-v6|nydus-batch|" >> $GITHUB_STEP_SUMMARY + echo "|:---:|:--:|:-------:|:-------:|:-------:|" >> $GITHUB_STEP_SUMMARY + for I in $(cat ${{ env.IMAGE_LIST_PATH }}); do + zranConversionElapsed=$(printf "%0.2f" "$(bc <<< "scale=2; $(jq -r '.ConversionElapsed' convert-zran/${I}.json) / 1000000")") + v5ConversionElapsed=$(printf "%0.2f" "$(bc <<< "scale=2; $(jq -r '.ConversionElapsed' convert-native-v5/${I}.json) / 1000000")") + v6ConversionElapsed=$(printf "%0.2f" "$(bc <<< "scale=2; $(jq -r '.ConversionElapsed' convert-native-v6/${I}.json) / 1000000")") + batchConversionElapsed=$(printf "%0.2f" "$(bc <<< "scale=2; $(jq -r '.ConversionElapsed' convert-native-v6-batch/${I}.json) / 1000000")") + echo "|${I}:latest|${zranConversionElapsed}|${v5ConversionElapsed}|${v6ConversionElapsed}|${batchConversionElapsed}|" >> $GITHUB_STEP_SUMMARY + done + - uses: geekyeggo/delete-artifact@v2 + with: + name: '*' diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index c3499a26353..c6221027967 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,209 +1,209 @@ -name: Release - -on: - push: - tags: - - v[0-9]+.[0-9]+.[0-9]+* - schedule: - # Run daily sanity check at 22:08 clock UTC - - cron: "8 22 * * *" - workflow_dispatch: - -env: - CARGO_TERM_COLOR: always - -jobs: - nydus-linux: - runs-on: ubuntu-latest - strategy: - matrix: - arch: [amd64, arm64, ppc64le, riscv64] - steps: - - uses: actions/checkout@v4 - - name: Cache cargo - uses: Swatinem/rust-cache@v2 - with: - cache-on-failure: true - shared-key: ${{ runner.os }}-cargo-${{ matrix.arch }} - - uses: dsherret/rust-toolchain-file@v1 - - name: Build nydus-rs - run: | - declare -A rust_target_map=( ["amd64"]="x86_64-unknown-linux-musl" ["arm64"]="aarch64-unknown-linux-musl" ["ppc64le"]="powerpc64le-unknown-linux-gnu" ["riscv64"]="riscv64gc-unknown-linux-gnu") - RUST_TARGET=${rust_target_map[${{ matrix.arch }}]} - cargo install --locked --version 0.2.4 cross - make -e RUST_TARGET_STATIC=$RUST_TARGET -e CARGO=cross static-release - sudo mv target/$RUST_TARGET/release/nydusd nydusd - sudo mv target/$RUST_TARGET/release/nydus-image . - sudo mv target/$RUST_TARGET/release/nydusctl . - sudo cp -r misc/configs . - sudo chown -R $(id -un):$(id -gn) . ~/.cargo/ - - name: store-artifacts - uses: actions/upload-artifact@v4 - with: - name: nydus-artifacts-linux-${{ matrix.arch }} - path: | - nydusd - nydus-image - nydusctl - configs - - nydus-macos: - runs-on: macos-12 - strategy: - matrix: - arch: [amd64, arm64] - steps: - - uses: actions/checkout@v4 - - name: Cache cargo - uses: Swatinem/rust-cache@v2 - with: - cache-on-failure: true - shared-key: ${{ runner.os }}-cargo-${{ matrix.arch }} - - uses: dsherret/rust-toolchain-file@v1 - - name: build - run: | - if [[ "${{matrix.arch}}" == "amd64" ]]; then - RUST_TARGET="x86_64-apple-darwin" - else - RUST_TARGET="aarch64-apple-darwin" - fi - cargo install --version 0.2.4 cross - rustup target add ${RUST_TARGET} - make -e RUST_TARGET_STATIC=$RUST_TARGET -e CARGO=cross static-release - sudo mv target/$RUST_TARGET/release/nydusd nydusd - sudo cp -r misc/configs . - sudo chown -R $(id -un):$(id -gn) . ~/.cargo/ - - name: store-artifacts - uses: actions/upload-artifact@v4 - with: - name: nydus-artifacts-darwin-${{ matrix.arch }} - path: | - nydusctl - nydusd - nydus-image - configs - - contrib-linux: - runs-on: ubuntu-latest - strategy: - matrix: - arch: [amd64, arm64, ppc64le, riscv64] - env: - DOCKER: false - steps: - - uses: actions/checkout@v4 - - name: Setup Golang - uses: actions/setup-go@v5 - with: - go-version-file: 'go.work' - cache-dependency-path: "**/*.sum" - - name: build contrib go components - run: | - make -e GOARCH=${{ matrix.arch }} contrib-release - sudo mv contrib/ctr-remote/bin/ctr-remote . - sudo mv contrib/nydusify/cmd/nydusify . - sudo mv contrib/nydus-overlayfs/bin/nydus-overlayfs . - - name: store-artifacts - uses: actions/upload-artifact@v4 - with: - name: nydus-artifacts-linux-${{ matrix.arch }}-contrib - path: | - ctr-remote - nydusify - nydus-overlayfs - containerd-nydus-grpc - - prepare-tarball-linux: - runs-on: ubuntu-latest - strategy: - matrix: - arch: [amd64, arm64, ppc64le, riscv64] - os: [linux] - needs: [nydus-linux, contrib-linux] - steps: - - name: download artifacts - uses: actions/download-artifact@v4 - with: - pattern: nydus-artifacts-${{ matrix.os }}-${{ matrix.arch }}* - merge-multiple: true - path: nydus-static - - name: prepare release tarball - run: | - tag=$(echo $GITHUB_REF | cut -d/ -f3-) - tarball="nydus-static-$tag-${{ matrix.os }}-${{ matrix.arch }}.tgz" - chmod +x nydus-static/* - tar cf - nydus-static | gzip > ${tarball} - echo "tarball=${tarball}" >> $GITHUB_ENV - - shasum="$tarball.sha256sum" - sha256sum $tarball > $shasum - echo "tarball_shasum=${shasum}" >> $GITHUB_ENV - - name: store-artifacts - uses: actions/upload-artifact@v4 - with: - name: nydus-release-tarball-${{ matrix.os }}-${{ matrix.arch }} - path: | - ${{ env.tarball }} - ${{ env.tarball_shasum }} - - # use a seperate job for darwin because github action if: condition cannot handle && properly. - prepare-tarball-darwin: - runs-on: ubuntu-latest - strategy: - matrix: - arch: [amd64, arm64] - os: [darwin] - needs: [nydus-macos] - steps: - - name: download artifacts - uses: actions/download-artifact@v4 - with: - name: nydus-artifacts-${{ matrix.os }}-${{ matrix.arch }} - path: nydus-static - - name: prepare release tarball - run: | - tag=$(echo $GITHUB_REF | cut -d/ -f3-) - tarball="nydus-static-$tag-${{ matrix.os }}-${{ matrix.arch }}.tgz" - chmod +x nydus-static/* - tar cf - nydus-static | gzip > ${tarball} - echo "tarball=${tarball}" >> $GITHUB_ENV - - shasum="$tarball.sha256sum" - sha256sum $tarball > $shasum - echo "tarball_shasum=${shasum}" >> $GITHUB_ENV - - name: store-artifacts - uses: actions/upload-artifact@v4 - with: - name: nydus-release-tarball-${{ matrix.os }}-${{ matrix.arch }} - path: | - ${{ env.tarball }} - ${{ env.tarball_shasum }} - - create-release: - runs-on: ubuntu-latest - needs: [prepare-tarball-linux, prepare-tarball-darwin] - steps: - - name: download artifacts - uses: actions/download-artifact@v4 - with: - pattern: nydus-release-tarball-* - merge-multiple: true - path: nydus-tarball - - name: prepare release env - run: | - echo "tarballs<> $GITHUB_ENV - for I in $(ls nydus-tarball);do echo "nydus-tarball/${I}" >> $GITHUB_ENV; done - echo "EOF" >> $GITHUB_ENV - tag=$(echo $GITHUB_REF | cut -d/ -f3-) - echo "tag=${tag}" >> $GITHUB_ENV - cat $GITHUB_ENV - - name: push release - if: github.event_name == 'push' - uses: softprops/action-gh-release@v1 - with: - name: "Nydus Image Service ${{ env.tag }}" - body: | - Binaries download mirror (sync within a few hours): https://registry.npmmirror.com/binary.html?path=nydus/${{ env.tag }}/ - generate_release_notes: true - files: | - ${{ env.tarballs }} +name: Release + +on: + push: + tags: + - v[0-9]+.[0-9]+.[0-9]+* + schedule: + # Run daily sanity check at 22:08 clock UTC + - cron: "8 22 * * *" + workflow_dispatch: + +env: + CARGO_TERM_COLOR: always + +jobs: + nydus-linux: + runs-on: ubuntu-latest + strategy: + matrix: + arch: [amd64, arm64, ppc64le, riscv64] + steps: + - uses: actions/checkout@v4 + - name: Cache cargo + uses: Swatinem/rust-cache@v2 + with: + cache-on-failure: true + shared-key: ${{ runner.os }}-cargo-${{ matrix.arch }} + - uses: dsherret/rust-toolchain-file@v1 + - name: Build nydus-rs + run: | + declare -A rust_target_map=( ["amd64"]="x86_64-unknown-linux-musl" ["arm64"]="aarch64-unknown-linux-musl" ["ppc64le"]="powerpc64le-unknown-linux-gnu" ["riscv64"]="riscv64gc-unknown-linux-gnu") + RUST_TARGET=${rust_target_map[${{ matrix.arch }}]} + cargo install --locked --version 0.2.4 cross + make -e RUST_TARGET_STATIC=$RUST_TARGET -e CARGO=cross static-release + sudo mv target/$RUST_TARGET/release/nydusd nydusd + sudo mv target/$RUST_TARGET/release/nydus-image . + sudo mv target/$RUST_TARGET/release/nydusctl . + sudo cp -r misc/configs . + sudo chown -R $(id -un):$(id -gn) . ~/.cargo/ + - name: store-artifacts + uses: actions/upload-artifact@v4 + with: + name: nydus-artifacts-linux-${{ matrix.arch }} + path: | + nydusd + nydus-image + nydusctl + configs + + nydus-macos: + runs-on: macos-12 + strategy: + matrix: + arch: [amd64, arm64] + steps: + - uses: actions/checkout@v4 + - name: Cache cargo + uses: Swatinem/rust-cache@v2 + with: + cache-on-failure: true + shared-key: ${{ runner.os }}-cargo-${{ matrix.arch }} + - uses: dsherret/rust-toolchain-file@v1 + - name: build + run: | + if [[ "${{matrix.arch}}" == "amd64" ]]; then + RUST_TARGET="x86_64-apple-darwin" + else + RUST_TARGET="aarch64-apple-darwin" + fi + cargo install --version 0.2.4 cross + rustup target add ${RUST_TARGET} + make -e RUST_TARGET_STATIC=$RUST_TARGET -e CARGO=cross static-release + sudo mv target/$RUST_TARGET/release/nydusd nydusd + sudo cp -r misc/configs . + sudo chown -R $(id -un):$(id -gn) . ~/.cargo/ + - name: store-artifacts + uses: actions/upload-artifact@v4 + with: + name: nydus-artifacts-darwin-${{ matrix.arch }} + path: | + nydusctl + nydusd + nydus-image + configs + + contrib-linux: + runs-on: ubuntu-latest + strategy: + matrix: + arch: [amd64, arm64, ppc64le, riscv64] + env: + DOCKER: false + steps: + - uses: actions/checkout@v4 + - name: Setup Golang + uses: actions/setup-go@v5 + with: + go-version-file: 'go.work' + cache-dependency-path: "**/*.sum" + - name: build contrib go components + run: | + make -e GOARCH=${{ matrix.arch }} contrib-release + sudo mv contrib/ctr-remote/bin/ctr-remote . + sudo mv contrib/nydusify/cmd/nydusify . + sudo mv contrib/nydus-overlayfs/bin/nydus-overlayfs . + - name: store-artifacts + uses: actions/upload-artifact@v4 + with: + name: nydus-artifacts-linux-${{ matrix.arch }}-contrib + path: | + ctr-remote + nydusify + nydus-overlayfs + containerd-nydus-grpc + + prepare-tarball-linux: + runs-on: ubuntu-latest + strategy: + matrix: + arch: [amd64, arm64, ppc64le, riscv64] + os: [linux] + needs: [nydus-linux, contrib-linux] + steps: + - name: download artifacts + uses: actions/download-artifact@v4 + with: + pattern: nydus-artifacts-${{ matrix.os }}-${{ matrix.arch }}* + merge-multiple: true + path: nydus-static + - name: prepare release tarball + run: | + tag=$(echo $GITHUB_REF | cut -d/ -f3-) + tarball="nydus-static-$tag-${{ matrix.os }}-${{ matrix.arch }}.tgz" + chmod +x nydus-static/* + tar cf - nydus-static | gzip > ${tarball} + echo "tarball=${tarball}" >> $GITHUB_ENV + + shasum="$tarball.sha256sum" + sha256sum $tarball > $shasum + echo "tarball_shasum=${shasum}" >> $GITHUB_ENV + - name: store-artifacts + uses: actions/upload-artifact@v4 + with: + name: nydus-release-tarball-${{ matrix.os }}-${{ matrix.arch }} + path: | + ${{ env.tarball }} + ${{ env.tarball_shasum }} + + # use a seperate job for darwin because github action if: condition cannot handle && properly. + prepare-tarball-darwin: + runs-on: ubuntu-latest + strategy: + matrix: + arch: [amd64, arm64] + os: [darwin] + needs: [nydus-macos] + steps: + - name: download artifacts + uses: actions/download-artifact@v4 + with: + name: nydus-artifacts-${{ matrix.os }}-${{ matrix.arch }} + path: nydus-static + - name: prepare release tarball + run: | + tag=$(echo $GITHUB_REF | cut -d/ -f3-) + tarball="nydus-static-$tag-${{ matrix.os }}-${{ matrix.arch }}.tgz" + chmod +x nydus-static/* + tar cf - nydus-static | gzip > ${tarball} + echo "tarball=${tarball}" >> $GITHUB_ENV + + shasum="$tarball.sha256sum" + sha256sum $tarball > $shasum + echo "tarball_shasum=${shasum}" >> $GITHUB_ENV + - name: store-artifacts + uses: actions/upload-artifact@v4 + with: + name: nydus-release-tarball-${{ matrix.os }}-${{ matrix.arch }} + path: | + ${{ env.tarball }} + ${{ env.tarball_shasum }} + + create-release: + runs-on: ubuntu-latest + needs: [prepare-tarball-linux, prepare-tarball-darwin] + steps: + - name: download artifacts + uses: actions/download-artifact@v4 + with: + pattern: nydus-release-tarball-* + merge-multiple: true + path: nydus-tarball + - name: prepare release env + run: | + echo "tarballs<> $GITHUB_ENV + for I in $(ls nydus-tarball);do echo "nydus-tarball/${I}" >> $GITHUB_ENV; done + echo "EOF" >> $GITHUB_ENV + tag=$(echo $GITHUB_REF | cut -d/ -f3-) + echo "tag=${tag}" >> $GITHUB_ENV + cat $GITHUB_ENV + - name: push release + if: github.event_name == 'push' + uses: softprops/action-gh-release@v1 + with: + name: "Nydus Image Service ${{ env.tag }}" + body: | + Binaries download mirror (sync within a few hours): https://registry.npmmirror.com/binary.html?path=nydus/${{ env.tag }}/ + generate_release_notes: true + files: | + ${{ env.tarballs }} diff --git a/.github/workflows/smoke.yml b/.github/workflows/smoke.yml index 8c36c6d7954..952b0a3c144 100644 --- a/.github/workflows/smoke.yml +++ b/.github/workflows/smoke.yml @@ -1,336 +1,336 @@ -name: Smoke Test - -on: - push: - branches: ["**", "stable/**"] - paths-ignore: [ '**.md', '**.png', '**.jpg', '**.svg', '**/docs/**' ] - pull_request: - branches: ["**", "stable/**"] - paths-ignore: [ '**.md', '**.png', '**.jpg', '**.svg', '**/docs/**' ] - schedule: - # Run daily sanity check at 03:00 clock UTC - - cron: "0 03 * * *" - workflow_dispatch: - -env: - CARGO_TERM_COLOR: always - -jobs: - contrib-build: - runs-on: ubuntu-latest - strategy: - matrix: - arch: [amd64, arm64, ppc64le, riscv64] - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Setup Golang - uses: actions/setup-go@v5 - with: - go-version-file: 'go.work' - cache-dependency-path: "**/*.sum" - - name: Build Contrib - run: | - make -e DOCKER=false GOARCH=${{ matrix.arch }} contrib-release - - name: Upload Nydusify - if: matrix.arch == 'amd64' - uses: actions/upload-artifact@v4 - with: - name: nydusify-artifact - path: contrib/nydusify/cmd - - contrib-lint: - runs-on: ubuntu-latest - strategy: - matrix: - include: - - path: contrib/nydusify - - path: contrib/ctr-remote - - path: contrib/nydus-overlayfs - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Setup Golang - uses: actions/setup-go@v5 - with: - go-version-file: 'go.work' - cache: false - - name: Lint - uses: golangci/golangci-lint-action@v4 - with: - version: v1.56 - working-directory: ${{ matrix.path }} - args: --timeout=30m --issues-exit-code=0 - - nydus-build: - runs-on: ubuntu-latest - strategy: - matrix: - arch: [amd64, arm64, ppc64le, riscv64] - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Rust Cache - uses: Swatinem/rust-cache@v2 - with: - cache-on-failure: true - shared-key: ${{ runner.os }}-cargo-${{ matrix.arch }} - save-if: ${{ github.ref == 'refs/heads/master' }} - - uses: dsherret/rust-toolchain-file@v1 - - name: Build Nydus - run: | - declare -A rust_target_map=( ["amd64"]="x86_64-unknown-linux-musl" ["arm64"]="aarch64-unknown-linux-musl" ["ppc64le"]="powerpc64le-unknown-linux-gnu" ["riscv64"]="riscv64gc-unknown-linux-gnu") - RUST_TARGET=${rust_target_map[${{ matrix.arch }}]} - cargo install --locked --version 0.2.4 cross - make -e RUST_TARGET_STATIC=$RUST_TARGET -e CARGO=cross static-release - sudo mv target/$RUST_TARGET/release/nydusd . - sudo mv target/$RUST_TARGET/release/nydus-image . - - name: Upload Nydus Binaries - if: matrix.arch == 'amd64' - uses: actions/upload-artifact@v4 - with: - name: nydus-artifact - path: | - nydus-image - nydusd - - nydusd-build-macos: - runs-on: macos-12 - strategy: - matrix: - arch: [amd64, arm64] - steps: - - uses: actions/checkout@v4 - - name: Cache cargo - uses: Swatinem/rust-cache@v2 - with: - cache-on-failure: true - shared-key: ${{ runner.os }}-cargo-${{ matrix.arch }} - save-if: ${{ github.ref == 'refs/heads/master' }} - - uses: dsherret/rust-toolchain-file@v1 - - name: build - run: | - if [[ "${{matrix.arch}}" == "amd64" ]]; then - RUST_TARGET="x86_64-apple-darwin" - else - RUST_TARGET="aarch64-apple-darwin" - fi - cargo install --version 0.2.4 cross - rustup target add ${RUST_TARGET} - make -e RUST_TARGET_STATIC=$RUST_TARGET -e CARGO=cross static-release - - nydus-integration-test: - runs-on: ubuntu-latest - needs: [contrib-build, nydus-build] - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Docker Cache - uses: jpribyl/action-docker-layer-caching@v0.1.0 - continue-on-error: true - - name: Download Nydus - uses: actions/download-artifact@v4 - with: - name: nydus-artifact - path: | - target/release - - name: Download Nydusify - uses: actions/download-artifact@v4 - with: - name: nydusify-artifact - path: contrib/nydusify/cmd - - name: Prepare Older Binaries - id: prepare-binaries - run: | - export NYDUS_STABLE_VERSION=$(curl https://api.github.com/repos/Dragonflyoss/nydus/releases/latest | jq -r '.tag_name') - - versions=(v0.1.0 ${NYDUS_STABLE_VERSION}) - version_archs=(v0.1.0-x86_64 ${NYDUS_STABLE_VERSION}-linux-amd64) - for i in ${!versions[@]}; do - version=${versions[$i]} - version_arch=${version_archs[$i]} - - wget -q https://github.com/dragonflyoss/nydus/releases/download/$version/nydus-static-$version_arch.tgz - sudo mkdir nydus-$version /usr/bin/nydus-$version - sudo tar xzf nydus-static-$version_arch.tgz -C nydus-$version - sudo cp -r nydus-$version/nydus-static/* /usr/bin/nydus-$version/ - done - - name: Setup Golang - uses: actions/setup-go@v5 - with: - go-version-file: 'go.work' - cache-dependency-path: "**/*.sum" - - name: Integration Test - run: | - sudo mkdir -p /usr/bin/nydus-latest /home/runner/work/workdir - sudo install -D -m 755 contrib/nydusify/cmd/nydusify /usr/bin/nydus-latest - sudo install -D -m 755 target/release/nydusd target/release/nydus-image /usr/bin/nydus-latest - sudo bash misc/prepare.sh - - export NYDUS_STABLE_VERSION=$(curl https://api.github.com/repos/Dragonflyoss/nydus/releases/latest | jq -r '.tag_name') - export NYDUS_STABLE_VERSION_EXPORT="${NYDUS_STABLE_VERSION//./_}" - - versions=(v0.1.0 ${NYDUS_STABLE_VERSION} latest) - version_exports=(v0_1_0 ${NYDUS_STABLE_VERSION_EXPORT} latest) - for i in ${!version_exports[@]}; do - version=${versions[$i]} - version_export=${version_exports[$i]} - export NYDUS_BUILDER_$version_export=/usr/bin/nydus-$version/nydus-image - export NYDUS_NYDUSD_$version_export=/usr/bin/nydus-$version/nydusd - export NYDUS_NYDUSIFY_$version_export=/usr/bin/nydus-$version/nydusify - done - - curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sudo sh -s -- -b /usr/bin v1.54.2 - sudo -E make smoke-only - - nydus-unit-test: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Rust Cache - uses: Swatinem/rust-cache@v2 - with: - cache-on-failure: true - shared-key: Linux-cargo-amd64 - save-if: ${{ github.ref == 'refs/heads/master' }} - - name: Install cargo nextest - uses: taiki-e/install-action@nextest - - name: Fscache Setup - run: sudo bash misc/fscache/setup.sh - - name: Unit Test - run: | - CARGO_HOME=${HOME}/.cargo - CARGO_BIN=$(which cargo) - sudo -E CARGO=${CARGO_BIN} make ut-nextest - - contrib-unit-test-coverage: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Setup Golang - uses: actions/setup-go@v5 - with: - go-version-file: 'go.work' - cache-dependency-path: "**/*.sum" - - name: Unit Test - run: | - make -e DOCKER=false contrib-test - - name: Upload contrib coverage file - uses: actions/upload-artifact@v4 - with: - name: contrib-test-coverage-artifact - path: | - contrib/nydusify/coverage.txt - - nydus-unit-test-coverage: - runs-on: ubuntu-latest - env: - CARGO_TERM_COLOR: always - steps: - - uses: actions/checkout@v4 - - name: Rust Cache - uses: Swatinem/rust-cache@v2 - with: - cache-on-failure: true - shared-key: Linux-cargo-amd64 - save-if: ${{ github.ref == 'refs/heads/master' }} - - name: Install cargo-llvm-cov - uses: taiki-e/install-action@cargo-llvm-cov - - name: Fscache Setup - run: sudo bash misc/fscache/setup.sh - - name: Generate code coverage - run: | - CARGO_HOME=${HOME}/.cargo - CARGO_BIN=$(which cargo) - sudo -E CARGO=${CARGO_BIN} make coverage-codecov - - name: Upload nydus coverage file - uses: actions/upload-artifact@v4 - with: - name: nydus-test-coverage-artifact - path: | - codecov.json - - upload-coverage-to-codecov: - runs-on: ubuntu-latest - needs: [contrib-unit-test-coverage, nydus-unit-test-coverage] - steps: - - uses: actions/checkout@v4 - - name: Download nydus coverage file - uses: actions/download-artifact@v4 - with: - name: nydus-test-coverage-artifact - - name: Download contrib coverage file - uses: actions/download-artifact@v4 - with: - name: contrib-test-coverage-artifact - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v4 - with: - files: ./codecov.json,./coverage.txt - token: ${{ secrets.CODECOV_TOKEN }} - verbose: true - fail_ci_if_error: true - - nydus-cargo-deny: - name: cargo-deny - runs-on: ubuntu-latest - timeout-minutes: 10 - steps: - - uses: actions/checkout@v4 - - uses: EmbarkStudios/cargo-deny-action@v1 - - performance-test: - runs-on: ubuntu-latest - needs: [contrib-build, nydus-build] - strategy: - matrix: - include: - - mode: fs-version-5 - - mode: fs-version-6 - - mode: zran - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Download Nydus - uses: actions/download-artifact@v4 - with: - name: nydus-artifact - path: target/release - - name: Download Nydusify - uses: actions/download-artifact@v4 - with: - name: nydusify-artifact - path: contrib/nydusify/cmd - - name: Prepare Nydus Container Environment - run: | - sudo bash misc/prepare.sh - - name: Performance Test - run: | - export PERFORMANCE_TEST_MODE=${{ matrix.mode }} - sudo -E make smoke-performance - - takeover-test: - runs-on: ubuntu-latest - needs: [contrib-build, nydus-build] - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Download Nydus - uses: actions/download-artifact@v4 - with: - name: nydus-artifact - path: target/release - - name: Download Nydusify - uses: actions/download-artifact@v4 - with: - name: nydusify-artifact - path: contrib/nydusify/cmd - - name: Prepare Nydus Container Environment - run: | - sudo bash misc/prepare.sh takeover_test - - name: Takeover Test - run: | - export NEW_NYDUSD_BINARY_PATH=target/release/nydusd - sudo -E make smoke-takeover +name: Smoke Test + +on: + push: + branches: ["**", "stable/**"] + paths-ignore: [ '**.md', '**.png', '**.jpg', '**.svg', '**/docs/**' ] + pull_request: + branches: ["**", "stable/**"] + paths-ignore: [ '**.md', '**.png', '**.jpg', '**.svg', '**/docs/**' ] + schedule: + # Run daily sanity check at 03:00 clock UTC + - cron: "0 03 * * *" + workflow_dispatch: + +env: + CARGO_TERM_COLOR: always + +jobs: + contrib-build: + runs-on: ubuntu-latest + strategy: + matrix: + arch: [amd64, arm64, ppc64le, riscv64] + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Golang + uses: actions/setup-go@v5 + with: + go-version-file: 'go.work' + cache-dependency-path: "**/*.sum" + - name: Build Contrib + run: | + make -e DOCKER=false GOARCH=${{ matrix.arch }} contrib-release + - name: Upload Nydusify + if: matrix.arch == 'amd64' + uses: actions/upload-artifact@v4 + with: + name: nydusify-artifact + path: contrib/nydusify/cmd + + contrib-lint: + runs-on: ubuntu-latest + strategy: + matrix: + include: + - path: contrib/nydusify + - path: contrib/ctr-remote + - path: contrib/nydus-overlayfs + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Golang + uses: actions/setup-go@v5 + with: + go-version-file: 'go.work' + cache: false + - name: Lint + uses: golangci/golangci-lint-action@v4 + with: + version: v1.56 + working-directory: ${{ matrix.path }} + args: --timeout=30m --issues-exit-code=0 + + nydus-build: + runs-on: ubuntu-latest + strategy: + matrix: + arch: [amd64, arm64, ppc64le, riscv64] + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Rust Cache + uses: Swatinem/rust-cache@v2 + with: + cache-on-failure: true + shared-key: ${{ runner.os }}-cargo-${{ matrix.arch }} + save-if: ${{ github.ref == 'refs/heads/master' }} + - uses: dsherret/rust-toolchain-file@v1 + - name: Build Nydus + run: | + declare -A rust_target_map=( ["amd64"]="x86_64-unknown-linux-musl" ["arm64"]="aarch64-unknown-linux-musl" ["ppc64le"]="powerpc64le-unknown-linux-gnu" ["riscv64"]="riscv64gc-unknown-linux-gnu") + RUST_TARGET=${rust_target_map[${{ matrix.arch }}]} + cargo install --locked --version 0.2.4 cross + make -e RUST_TARGET_STATIC=$RUST_TARGET -e CARGO=cross static-release + sudo mv target/$RUST_TARGET/release/nydusd . + sudo mv target/$RUST_TARGET/release/nydus-image . + - name: Upload Nydus Binaries + if: matrix.arch == 'amd64' + uses: actions/upload-artifact@v4 + with: + name: nydus-artifact + path: | + nydus-image + nydusd + + nydusd-build-macos: + runs-on: macos-12 + strategy: + matrix: + arch: [amd64, arm64] + steps: + - uses: actions/checkout@v4 + - name: Cache cargo + uses: Swatinem/rust-cache@v2 + with: + cache-on-failure: true + shared-key: ${{ runner.os }}-cargo-${{ matrix.arch }} + save-if: ${{ github.ref == 'refs/heads/master' }} + - uses: dsherret/rust-toolchain-file@v1 + - name: build + run: | + if [[ "${{matrix.arch}}" == "amd64" ]]; then + RUST_TARGET="x86_64-apple-darwin" + else + RUST_TARGET="aarch64-apple-darwin" + fi + cargo install --version 0.2.4 cross + rustup target add ${RUST_TARGET} + make -e RUST_TARGET_STATIC=$RUST_TARGET -e CARGO=cross static-release + + nydus-integration-test: + runs-on: ubuntu-latest + needs: [contrib-build, nydus-build] + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Docker Cache + uses: jpribyl/action-docker-layer-caching@v0.1.0 + continue-on-error: true + - name: Download Nydus + uses: actions/download-artifact@v4 + with: + name: nydus-artifact + path: | + target/release + - name: Download Nydusify + uses: actions/download-artifact@v4 + with: + name: nydusify-artifact + path: contrib/nydusify/cmd + - name: Prepare Older Binaries + id: prepare-binaries + run: | + export NYDUS_STABLE_VERSION=$(curl https://api.github.com/repos/Dragonflyoss/nydus/releases/latest | jq -r '.tag_name') + + versions=(v0.1.0 ${NYDUS_STABLE_VERSION}) + version_archs=(v0.1.0-x86_64 ${NYDUS_STABLE_VERSION}-linux-amd64) + for i in ${!versions[@]}; do + version=${versions[$i]} + version_arch=${version_archs[$i]} + + wget -q https://github.com/dragonflyoss/nydus/releases/download/$version/nydus-static-$version_arch.tgz + sudo mkdir nydus-$version /usr/bin/nydus-$version + sudo tar xzf nydus-static-$version_arch.tgz -C nydus-$version + sudo cp -r nydus-$version/nydus-static/* /usr/bin/nydus-$version/ + done + - name: Setup Golang + uses: actions/setup-go@v5 + with: + go-version-file: 'go.work' + cache-dependency-path: "**/*.sum" + - name: Integration Test + run: | + sudo mkdir -p /usr/bin/nydus-latest /home/runner/work/workdir + sudo install -D -m 755 contrib/nydusify/cmd/nydusify /usr/bin/nydus-latest + sudo install -D -m 755 target/release/nydusd target/release/nydus-image /usr/bin/nydus-latest + sudo bash misc/prepare.sh + + export NYDUS_STABLE_VERSION=$(curl https://api.github.com/repos/Dragonflyoss/nydus/releases/latest | jq -r '.tag_name') + export NYDUS_STABLE_VERSION_EXPORT="${NYDUS_STABLE_VERSION//./_}" + + versions=(v0.1.0 ${NYDUS_STABLE_VERSION} latest) + version_exports=(v0_1_0 ${NYDUS_STABLE_VERSION_EXPORT} latest) + for i in ${!version_exports[@]}; do + version=${versions[$i]} + version_export=${version_exports[$i]} + export NYDUS_BUILDER_$version_export=/usr/bin/nydus-$version/nydus-image + export NYDUS_NYDUSD_$version_export=/usr/bin/nydus-$version/nydusd + export NYDUS_NYDUSIFY_$version_export=/usr/bin/nydus-$version/nydusify + done + + curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sudo sh -s -- -b /usr/bin v1.54.2 + sudo -E make smoke-only + + nydus-unit-test: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Rust Cache + uses: Swatinem/rust-cache@v2 + with: + cache-on-failure: true + shared-key: Linux-cargo-amd64 + save-if: ${{ github.ref == 'refs/heads/master' }} + - name: Install cargo nextest + uses: taiki-e/install-action@nextest + - name: Fscache Setup + run: sudo bash misc/fscache/setup.sh + - name: Unit Test + run: | + CARGO_HOME=${HOME}/.cargo + CARGO_BIN=$(which cargo) + sudo -E CARGO=${CARGO_BIN} make ut-nextest + + contrib-unit-test-coverage: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Golang + uses: actions/setup-go@v5 + with: + go-version-file: 'go.work' + cache-dependency-path: "**/*.sum" + - name: Unit Test + run: | + make -e DOCKER=false contrib-test + - name: Upload contrib coverage file + uses: actions/upload-artifact@v4 + with: + name: contrib-test-coverage-artifact + path: | + contrib/nydusify/coverage.txt + + nydus-unit-test-coverage: + runs-on: ubuntu-latest + env: + CARGO_TERM_COLOR: always + steps: + - uses: actions/checkout@v4 + - name: Rust Cache + uses: Swatinem/rust-cache@v2 + with: + cache-on-failure: true + shared-key: Linux-cargo-amd64 + save-if: ${{ github.ref == 'refs/heads/master' }} + - name: Install cargo-llvm-cov + uses: taiki-e/install-action@cargo-llvm-cov + - name: Fscache Setup + run: sudo bash misc/fscache/setup.sh + - name: Generate code coverage + run: | + CARGO_HOME=${HOME}/.cargo + CARGO_BIN=$(which cargo) + sudo -E CARGO=${CARGO_BIN} make coverage-codecov + - name: Upload nydus coverage file + uses: actions/upload-artifact@v4 + with: + name: nydus-test-coverage-artifact + path: | + codecov.json + + upload-coverage-to-codecov: + runs-on: ubuntu-latest + needs: [contrib-unit-test-coverage, nydus-unit-test-coverage] + steps: + - uses: actions/checkout@v4 + - name: Download nydus coverage file + uses: actions/download-artifact@v4 + with: + name: nydus-test-coverage-artifact + - name: Download contrib coverage file + uses: actions/download-artifact@v4 + with: + name: contrib-test-coverage-artifact + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v4 + with: + files: ./codecov.json,./coverage.txt + token: ${{ secrets.CODECOV_TOKEN }} + verbose: true + fail_ci_if_error: true + + nydus-cargo-deny: + name: cargo-deny + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - uses: actions/checkout@v4 + - uses: EmbarkStudios/cargo-deny-action@v1 + + performance-test: + runs-on: ubuntu-latest + needs: [contrib-build, nydus-build] + strategy: + matrix: + include: + - mode: fs-version-5 + - mode: fs-version-6 + - mode: zran + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Download Nydus + uses: actions/download-artifact@v4 + with: + name: nydus-artifact + path: target/release + - name: Download Nydusify + uses: actions/download-artifact@v4 + with: + name: nydusify-artifact + path: contrib/nydusify/cmd + - name: Prepare Nydus Container Environment + run: | + sudo bash misc/prepare.sh + - name: Performance Test + run: | + export PERFORMANCE_TEST_MODE=${{ matrix.mode }} + sudo -E make smoke-performance + + takeover-test: + runs-on: ubuntu-latest + needs: [contrib-build, nydus-build] + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Download Nydus + uses: actions/download-artifact@v4 + with: + name: nydus-artifact + path: target/release + - name: Download Nydusify + uses: actions/download-artifact@v4 + with: + name: nydusify-artifact + path: contrib/nydusify/cmd + - name: Prepare Nydus Container Environment + run: | + sudo bash misc/prepare.sh takeover_test + - name: Takeover Test + run: | + export NEW_NYDUSD_BINARY_PATH=target/release/nydusd + sudo -E make smoke-takeover diff --git a/.gitignore b/.gitignore index 22e6f1f8e7e..1b4a56d057f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,9 @@ -**/target* -**/*.rs.bk -**/.vscode -.idea -.cargo -**/.pyc -__pycache__ -.DS_Store -go.work.sum +**/target* +**/*.rs.bk +**/.vscode +.idea +.cargo +**/.pyc +__pycache__ +.DS_Store +go.work.sum diff --git a/ADOPTERS.md b/ADOPTERS.md index c8104048a50..e1ec07577d9 100644 --- a/ADOPTERS.md +++ b/ADOPTERS.md @@ -1,16 +1,16 @@ -## CNCF Dragonfly Nydus Adopters - -A non-exhaustive list of Nydus adopters is provided below. -Please kindly share your experience about Nydus with us and help us to improve Nydus ❤️. - -**_[Alibaba Cloud](https://www.alibabacloud.com)_** - Aliyun serverless image pull time drops from 20 seconds to 0.8s seconds. - -**_[Ant Group](https://www.antgroup.com)_** - Serving large-scale clusters with millions of container creations each day. - -**_[ByteDance](https://www.bytedance.com)_** - Serving container image acceleration in Technical Infrastructure of ByteDance. - -**_[KuaiShou](https://www.kuaishou.com)_** - Starting to deploy millions of containers with Dragonfly and Nydus. - -**_[Yue Miao](https://www.laiyuemiao.com)_** - The startup time of micro service has been greatly improved, and reduced the network consumption. - -**_[CoreWeave](https://coreweave.com/)_** - Dramatically reduce the pull time of container image which embedded machine learning models. +## CNCF Dragonfly Nydus Adopters + +A non-exhaustive list of Nydus adopters is provided below. +Please kindly share your experience about Nydus with us and help us to improve Nydus ❤️. + +**_[Alibaba Cloud](https://www.alibabacloud.com)_** - Aliyun serverless image pull time drops from 20 seconds to 0.8s seconds. + +**_[Ant Group](https://www.antgroup.com)_** - Serving large-scale clusters with millions of container creations each day. + +**_[ByteDance](https://www.bytedance.com)_** - Serving container image acceleration in Technical Infrastructure of ByteDance. + +**_[KuaiShou](https://www.kuaishou.com)_** - Starting to deploy millions of containers with Dragonfly and Nydus. + +**_[Yue Miao](https://www.laiyuemiao.com)_** - The startup time of micro service has been greatly improved, and reduced the network consumption. + +**_[CoreWeave](https://coreweave.com/)_** - Dramatically reduce the pull time of container image which embedded machine learning models. diff --git a/Cargo.lock b/Cargo.lock index a5ea303314b..fec3b14a637 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -226,11 +226,12 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.73" +version = "1.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" +checksum = "2aba8f4e9906c7ce3c73463f62a7f0c65183ada1a2d47e397cc8810827f9694f" dependencies = [ "jobserver", + "libc", ] [[package]] @@ -1006,9 +1007,9 @@ checksum = "112c678d4050afce233f4f2852bb2eb519230b3cf12f33585275537d7e41578d" [[package]] name = "jobserver" -version = "0.1.24" +version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af25a77299a7f711a01975c35a6a424eb6862092cc2d6c72c4ed6cbc56dfc1fa" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" dependencies = [ "libc", ] @@ -1520,11 +1521,11 @@ checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "openssl" -version = "0.10.55" +version = "0.10.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "345df152bc43501c5eb9e4654ff05f794effb78d4efe3d53abc158baddc0703d" +checksum = "9529f4786b70a3e8c61e11179af17ab6188ad8d0ded78c5529441ed39d4bd9c1" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.4.0", "cfg-if", "foreign-types", "libc", @@ -1552,18 +1553,18 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-src" -version = "111.25.0+1.1.1t" +version = "300.3.1+3.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3173cd3626c43e3854b1b727422a276e568d9ec5fe8cec197822cf52cfb743d6" +checksum = "7259953d42a81bf137fbbd73bd30a8e1914d6dce43c2b90ed575783a22608b91" dependencies = [ "cc", ] [[package]] name = "openssl-sys" -version = "0.9.90" +version = "0.9.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "374533b0e45f3a7ced10fcaeccca020e66656bc03dac384f852e4e5a7a8104a6" +checksum = "7f9e8deee91df40a943c71b917e5874b951d32a802526c85721ce3b776c929d6" dependencies = [ "cc", "libc", diff --git a/Cargo.toml b/Cargo.toml index fbd64232437..7b60c1e5670 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,129 +1,129 @@ -[package] -name = "nydus-rs" -# will be overridden by real git tag during cargo build -version = "0.0.0-git" -description = "Nydus Image Service" -authors = ["The Nydus Developers"] -license = "Apache-2.0 OR BSD-3-Clause" -homepage = "https://nydus.dev/" -repository = "https://github.com/dragonflyoss/nydus" -exclude = ["contrib/", "smoke/", "tests/"] -edition = "2021" -resolver = "2" -build = "build.rs" - -[profile.release] -panic = "abort" - -[[bin]] -name = "nydusctl" -path = "src/bin/nydusctl/main.rs" - -[[bin]] -name = "nydusd" -path = "src/bin/nydusd/main.rs" - -[[bin]] -name = "nydus-image" -path = "src/bin/nydus-image/main.rs" - -[lib] -name = "nydus" -path = "src/lib.rs" - -[dependencies] -anyhow = "1" -clap = { version = "4.0.18", features = ["derive", "cargo"] } -flexi_logger = { version = "0.25", features = ["compress"] } -fuse-backend-rs = "^0.12.0" -hex = "0.4.3" -hyper = "0.14.11" -hyperlocal = "0.8.0" -lazy_static = "1" -libc = "0.2" -log = "0.4.8" -log-panics = { version = "2.1.0", features = ["with-backtrace"] } -mio = { version = "0.8", features = ["os-poll", "os-ext"] } -nix = "0.24.0" -rlimit = "0.9.0" -rusqlite = { version = "0.30.0", features = ["bundled"] } -serde = { version = "1.0.110", features = ["serde_derive", "rc"] } -serde_json = "1.0.51" -tar = "0.4.40" -tokio = { version = "1.35.1", features = ["macros"] } - -# Build static linked openssl library -openssl = { version = "0.10.55", features = ["vendored"] } -# pin openssl-src to bring in fix for https://rustsec.org/advisories/RUSTSEC-2022-0032 -#openssl-src = { version = "111.22" } - -nydus-api = { version = "0.3.0", path = "api", features = [ - "error-backtrace", - "handler", -] } -nydus-builder = { version = "0.1.0", path = "builder" } -nydus-rafs = { version = "0.3.1", path = "rafs" } -nydus-service = { version = "0.3.0", path = "service", features = [ - "block-device", -] } -nydus-storage = { version = "0.6.3", path = "storage", features = [ - "prefetch-rate-limit", -] } -nydus-utils = { version = "0.4.2", path = "utils" } - -vhost = { version = "0.6.0", features = ["vhost-user-slave"], optional = true } -vhost-user-backend = { version = "0.8.0", optional = true } -virtio-bindings = { version = "0.1", features = [ - "virtio-v5_0_0", -], optional = true } -virtio-queue = { version = "0.7.0", optional = true } -vm-memory = { version = "0.10.0", features = ["backend-mmap"], optional = true } -vmm-sys-util = { version = "0.11.0", optional = true } - -[build-dependencies] -time = { version = "0.3.14", features = ["formatting"] } - -[dev-dependencies] -xattr = "1.0.1" -vmm-sys-util = "0.11.0" - -[features] -default = [ - "fuse-backend-rs/fusedev", - "backend-registry", - "backend-oss", - "backend-s3", - "backend-http-proxy", - "backend-localdisk", -] -virtiofs = [ - "nydus-service/virtiofs", - "vhost", - "vhost-user-backend", - "virtio-bindings", - "virtio-queue", - "vm-memory", - "vmm-sys-util", -] -block-nbd = ["nydus-service/block-nbd"] - -backend-http-proxy = ["nydus-storage/backend-http-proxy"] -backend-localdisk = [ - "nydus-storage/backend-localdisk", - "nydus-storage/backend-localdisk-gpt", -] -backend-oss = ["nydus-storage/backend-oss"] -backend-registry = ["nydus-storage/backend-registry"] -backend-s3 = ["nydus-storage/backend-s3"] - -[workspace] -members = [ - "api", - "builder", - "clib", - "rafs", - "storage", - "service", - "upgrade", - "utils", -] +[package] +name = "nydus-rs" +# will be overridden by real git tag during cargo build +version = "0.0.0-git" +description = "Nydus Image Service" +authors = ["The Nydus Developers"] +license = "Apache-2.0 OR BSD-3-Clause" +homepage = "https://nydus.dev/" +repository = "https://github.com/dragonflyoss/nydus" +exclude = ["contrib/", "smoke/", "tests/"] +edition = "2021" +resolver = "2" +build = "build.rs" + +[profile.release] +panic = "abort" + +[[bin]] +name = "nydusctl" +path = "src/bin/nydusctl/main.rs" + +[[bin]] +name = "nydusd" +path = "src/bin/nydusd/main.rs" + +[[bin]] +name = "nydus-image" +path = "src/bin/nydus-image/main.rs" + +[lib] +name = "nydus" +path = "src/lib.rs" + +[dependencies] +anyhow = "1" +clap = { version = "4.0.18", features = ["derive", "cargo"] } +flexi_logger = { version = "0.25", features = ["compress"] } +fuse-backend-rs = "^0.12.0" +hex = "0.4.3" +hyper = "0.14.11" +hyperlocal = "0.8.0" +lazy_static = "1" +libc = "0.2" +log = "0.4.8" +log-panics = { version = "2.1.0", features = ["with-backtrace"] } +mio = { version = "0.8", features = ["os-poll", "os-ext"] } +nix = "0.24.0" +rlimit = "0.9.0" +rusqlite = { version = "0.30.0", features = ["bundled"] } +serde = { version = "1.0.110", features = ["serde_derive", "rc"] } +serde_json = "1.0.51" +tar = "0.4.40" +tokio = { version = "1.35.1", features = ["macros"] } + +# Build static linked openssl library +openssl = { version = "0.10.55", features = ["vendored"] } +# pin openssl-src to bring in fix for https://rustsec.org/advisories/RUSTSEC-2022-0032 +#openssl-src = { version = "111.22" } + +nydus-api = { version = "0.3.0", path = "api", features = [ + "error-backtrace", + "handler", +] } +nydus-builder = { version = "0.1.0", path = "builder" } +nydus-rafs = { version = "0.3.1", path = "rafs" } +nydus-service = { version = "0.3.0", path = "service", features = [ + "block-device", +] } +nydus-storage = { version = "0.6.3", path = "storage", features = [ + "prefetch-rate-limit", +] } +nydus-utils = { version = "0.4.2", path = "utils" } + +vhost = { version = "0.6.0", features = ["vhost-user-slave"], optional = true } +vhost-user-backend = { version = "0.8.0", optional = true } +virtio-bindings = { version = "0.1", features = [ + "virtio-v5_0_0", +], optional = true } +virtio-queue = { version = "0.7.0", optional = true } +vm-memory = { version = "0.10.0", features = ["backend-mmap"], optional = true } +vmm-sys-util = { version = "0.11.0", optional = true } + +[build-dependencies] +time = { version = "0.3.14", features = ["formatting"] } + +[dev-dependencies] +xattr = "1.0.1" +vmm-sys-util = "0.11.0" + +[features] +default = [ + "fuse-backend-rs/fusedev", + "backend-registry", + "backend-oss", + "backend-s3", + "backend-http-proxy", + "backend-localdisk", +] +virtiofs = [ + "nydus-service/virtiofs", + "vhost", + "vhost-user-backend", + "virtio-bindings", + "virtio-queue", + "vm-memory", + "vmm-sys-util", +] +block-nbd = ["nydus-service/block-nbd"] + +backend-http-proxy = ["nydus-storage/backend-http-proxy"] +backend-localdisk = [ + "nydus-storage/backend-localdisk", + "nydus-storage/backend-localdisk-gpt", +] +backend-oss = ["nydus-storage/backend-oss"] +backend-registry = ["nydus-storage/backend-registry"] +backend-s3 = ["nydus-storage/backend-s3"] + +[workspace] +members = [ + "api", + "builder", + "clib", + "rafs", + "storage", + "service", + "upgrade", + "utils", +] diff --git a/Cross.toml b/Cross.toml index 8fc41c2395b..5ca6dca82a7 100644 --- a/Cross.toml +++ b/Cross.toml @@ -1,2 +1,2 @@ -[build] -pre-build = ["apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y cmake"] +[build] +pre-build = ["apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y cmake"] diff --git a/LICENSE-APACHE b/LICENSE-APACHE index d6456956733..75b52484ea4 100644 --- a/LICENSE-APACHE +++ b/LICENSE-APACHE @@ -1,202 +1,202 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/LICENSE-BSD-3-Clause b/LICENSE-BSD-3-Clause index 4969c211da7..5d1d01fc06d 100644 --- a/LICENSE-BSD-3-Clause +++ b/LICENSE-BSD-3-Clause @@ -1,27 +1,27 @@ -Copyright 2022 The Nydus Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +Copyright 2022 The Nydus Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/Makefile b/Makefile index 87bd8628a89..d77e1f23d4d 100644 --- a/Makefile +++ b/Makefile @@ -1,208 +1,208 @@ -all: release - -all-build: build contrib-build - -all-release: release contrib-release - -all-static-release: static-release docker-static contrib-release - -all-install: install contrib-install - -all-clean: clean contrib-clean - -TEST_WORKDIR_PREFIX ?= "/tmp" -INSTALL_DIR_PREFIX ?= "/usr/local/bin" -DOCKER ?= "true" - -CARGO ?= $(shell which cargo) -RUSTUP ?= $(shell which rustup) -CARGO_BUILD_GEARS = -v ~/.ssh/id_rsa:/root/.ssh/id_rsa -v ~/.cargo/git:/root/.cargo/git -v ~/.cargo/registry:/root/.cargo/registry -SUDO = $(shell which sudo) -CARGO_COMMON ?= - -EXCLUDE_PACKAGES = -UNAME_M := $(shell uname -m) -UNAME_S := $(shell uname -s) -STATIC_TARGET = $(UNAME_M)-unknown-linux-musl -ifeq ($(UNAME_S),Linux) - CARGO_COMMON += --features=virtiofs -ifeq ($(UNAME_M),ppc64le) - STATIC_TARGET = powerpc64le-unknown-linux-gnu -endif -ifeq ($(UNAME_M),riscv64) - STATIC_TARGET = riscv64gc-unknown-linux-gnu -endif -endif -ifeq ($(UNAME_S),Darwin) - EXCLUDE_PACKAGES += --exclude nydus-blobfs -ifeq ($(UNAME_M),amd64) - STATIC_TARGET = x86_64-apple-darwin -endif -ifeq ($(UNAME_M),arm64) - STATIC_TARGET = aarch64-apple-darwin -endif -endif -RUST_TARGET_STATIC ?= $(STATIC_TARGET) - -CTR-REMOTE_PATH = contrib/ctr-remote -NYDUSIFY_PATH = contrib/nydusify -NYDUS-OVERLAYFS_PATH = contrib/nydus-overlayfs - -current_dir := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) -env_go_path := $(shell go env GOPATH 2> /dev/null) -go_path := $(if $(env_go_path),$(env_go_path),"$(HOME)/go") - -# Functions - -# Func: build golang target in docker -# Args: -# $(1): The path where go build a golang project -# $(2): How to build the golang project -define build_golang - echo "Building target $@ by invoking: $(2)" - if [ $(DOCKER) = "true" ]; then \ - docker run --rm -v ${go_path}:/go -v ${current_dir}:/nydus-rs --workdir /nydus-rs/$(1) golang:1.21 $(2) ;\ - else \ - $(2) -C $(1); \ - fi -endef - -.PHONY: .release_version .format .musl_target .clean_libz_sys \ - all all-build all-release all-static-release build release static-release - -.release_version: - $(eval CARGO_BUILD_FLAGS += --release) - -.format: - ${CARGO} fmt -- --check - -.musl_target: - $(eval CARGO_BUILD_FLAGS += --target ${RUST_TARGET_STATIC}) - -# Workaround to clean up stale cache for libz-sys -.clean_libz_sys: - @${CARGO} clean --target ${RUST_TARGET_STATIC} -p libz-sys - @${CARGO} clean --target ${RUST_TARGET_STATIC} --release -p libz-sys - -# Targets that are exposed to developers and users. -build: .format - ${CARGO} build $(CARGO_COMMON) $(CARGO_BUILD_FLAGS) - # Cargo will skip checking if it is already checked - ${CARGO} clippy --workspace $(EXCLUDE_PACKAGES) $(CARGO_COMMON) $(CARGO_BUILD_FLAGS) --bins --tests -- -Dwarnings --allow clippy::unnecessary_cast --allow clippy::needless_borrow - -release: .format .release_version build - -static-release: .clean_libz_sys .musl_target .format .release_version build - -clean: - ${CARGO} clean - -install: release - @sudo mkdir -m 755 -p $(INSTALL_DIR_PREFIX) - @sudo install -m 755 target/release/nydusd $(INSTALL_DIR_PREFIX)/nydusd - @sudo install -m 755 target/release/nydus-image $(INSTALL_DIR_PREFIX)/nydus-image - @sudo install -m 755 target/release/nydusctl $(INSTALL_DIR_PREFIX)/nydusctl - -# unit test -ut: .release_version - TEST_WORKDIR_PREFIX=$(TEST_WORKDIR_PREFIX) RUST_BACKTRACE=1 ${CARGO} test --no-fail-fast --workspace $(EXCLUDE_PACKAGES) $(CARGO_COMMON) $(CARGO_BUILD_FLAGS) -- --skip integration --nocapture --test-threads=8 - -# you need install cargo nextest first from: https://nexte.st/book/pre-built-binaries.html -ut-nextest: .release_version - TEST_WORKDIR_PREFIX=$(TEST_WORKDIR_PREFIX) RUST_BACKTRACE=1 ${CARGO} nextest run --no-fail-fast --filter-expr 'test(test) - test(integration)' --workspace $(EXCLUDE_PACKAGES) $(CARGO_COMMON) $(CARGO_BUILD_FLAGS) --test-threads 8 - -# install test dependencies -pre-coverage: - ${CARGO} +stable install cargo-llvm-cov --locked - ${RUSTUP} component add llvm-tools-preview - -# print unit test coverage to console -coverage: pre-coverage - TEST_WORKDIR_PREFIX=$(TEST_WORKDIR_PREFIX) ${CARGO} llvm-cov --workspace $(EXCLUDE_PACKAGES) $(CARGO_COMMON) $(CARGO_BUILD_FLAGS) -- --skip integration --nocapture --test-threads=8 - -# write unit teset coverage to codecov.json, used for Github CI -coverage-codecov: - TEST_WORKDIR_PREFIX=$(TEST_WORKDIR_PREFIX) ${CARGO} llvm-cov --codecov --output-path codecov.json --workspace $(EXCLUDE_PACKAGES) $(CARGO_COMMON) $(CARGO_BUILD_FLAGS) -- --skip integration --nocapture --test-threads=8 - -smoke-only: - make -C smoke test - -smoke-performance: - make -C smoke test-performance - -smoke-benchmark: - make -C smoke test-benchmark - -smoke-takeover: - make -C smoke test-takeover - -smoke: release smoke-only - -contrib-build: nydusify ctr-remote nydus-overlayfs - -contrib-release: nydusify-release ctr-remote-release \ - nydus-overlayfs-release - -contrib-test: nydusify-test ctr-remote-test \ - nydus-overlayfs-test - -contrib-lint: nydusify-lint ctr-remote-lint \ - nydus-overlayfs-lint - -contrib-clean: nydusify-clean ctr-remote-clean \ - nydus-overlayfs-clean - -contrib-install: - @sudo mkdir -m 755 -p $(INSTALL_DIR_PREFIX) - @sudo install -m 755 contrib/ctr-remote/bin/ctr-remote $(INSTALL_DIR_PREFIX)/ctr-remote - @sudo install -m 755 contrib/nydus-overlayfs/bin/nydus-overlayfs $(INSTALL_DIR_PREFIX)/nydus-overlayfs - @sudo install -m 755 contrib/nydusify/cmd/nydusify $(INSTALL_DIR_PREFIX)/nydusify - -nydusify: - $(call build_golang,${NYDUSIFY_PATH},make) - -nydusify-release: - $(call build_golang,${NYDUSIFY_PATH},make release) - -nydusify-test: - $(call build_golang,${NYDUSIFY_PATH},make test) - -nydusify-clean: - $(call build_golang,${NYDUSIFY_PATH},make clean) - -nydusify-lint: - $(call build_golang,${NYDUSIFY_PATH},make lint) - -ctr-remote: - $(call build_golang,${CTR-REMOTE_PATH},make) - -ctr-remote-release: - $(call build_golang,${CTR-REMOTE_PATH},make release) - -ctr-remote-test: - $(call build_golang,${CTR-REMOTE_PATH},make test) - -ctr-remote-clean: - $(call build_golang,${CTR-REMOTE_PATH},make clean) - -ctr-remote-lint: - $(call build_golang,${CTR-REMOTE_PATH},make lint) - -nydus-overlayfs: - $(call build_golang,${NYDUS-OVERLAYFS_PATH},make) - -nydus-overlayfs-release: - $(call build_golang,${NYDUS-OVERLAYFS_PATH},make release) - -nydus-overlayfs-test: - $(call build_golang,${NYDUS-OVERLAYFS_PATH},make test) - -nydus-overlayfs-clean: - $(call build_golang,${NYDUS-OVERLAYFS_PATH},make clean) - -nydus-overlayfs-lint: - $(call build_golang,${NYDUS-OVERLAYFS_PATH},make lint) - -docker-static: - docker build -t nydus-rs-static --build-arg RUST_TARGET=${RUST_TARGET_STATIC} misc/musl-static - docker run --rm ${CARGO_BUILD_GEARS} -e RUST_TARGET=${RUST_TARGET_STATIC} --workdir /nydus-rs -v ${current_dir}:/nydus-rs nydus-rs-static +all: release + +all-build: build contrib-build + +all-release: release contrib-release + +all-static-release: static-release docker-static contrib-release + +all-install: install contrib-install + +all-clean: clean contrib-clean + +TEST_WORKDIR_PREFIX ?= "/tmp" +INSTALL_DIR_PREFIX ?= "/usr/local/bin" +DOCKER ?= "true" + +CARGO ?= $(shell which cargo) +RUSTUP ?= $(shell which rustup) +CARGO_BUILD_GEARS = -v ~/.ssh/id_rsa:/root/.ssh/id_rsa -v ~/.cargo/git:/root/.cargo/git -v ~/.cargo/registry:/root/.cargo/registry +SUDO = $(shell which sudo) +CARGO_COMMON ?= + +EXCLUDE_PACKAGES = +UNAME_M := $(shell uname -m) +UNAME_S := $(shell uname -s) +STATIC_TARGET = $(UNAME_M)-unknown-linux-musl +ifeq ($(UNAME_S),Linux) + CARGO_COMMON += --features=virtiofs +ifeq ($(UNAME_M),ppc64le) + STATIC_TARGET = powerpc64le-unknown-linux-gnu +endif +ifeq ($(UNAME_M),riscv64) + STATIC_TARGET = riscv64gc-unknown-linux-gnu +endif +endif +ifeq ($(UNAME_S),Darwin) + EXCLUDE_PACKAGES += --exclude nydus-blobfs +ifeq ($(UNAME_M),amd64) + STATIC_TARGET = x86_64-apple-darwin +endif +ifeq ($(UNAME_M),arm64) + STATIC_TARGET = aarch64-apple-darwin +endif +endif +RUST_TARGET_STATIC ?= $(STATIC_TARGET) + +CTR-REMOTE_PATH = contrib/ctr-remote +NYDUSIFY_PATH = contrib/nydusify +NYDUS-OVERLAYFS_PATH = contrib/nydus-overlayfs + +current_dir := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) +env_go_path := $(shell go env GOPATH 2> /dev/null) +go_path := $(if $(env_go_path),$(env_go_path),"$(HOME)/go") + +# Functions + +# Func: build golang target in docker +# Args: +# $(1): The path where go build a golang project +# $(2): How to build the golang project +define build_golang + echo "Building target $@ by invoking: $(2)" + if [ $(DOCKER) = "true" ]; then \ + docker run --rm -v ${go_path}:/go -v ${current_dir}:/nydus-rs --workdir /nydus-rs/$(1) golang:1.21 $(2) ;\ + else \ + $(2) -C $(1); \ + fi +endef + +.PHONY: .release_version .format .musl_target .clean_libz_sys \ + all all-build all-release all-static-release build release static-release + +.release_version: + $(eval CARGO_BUILD_FLAGS += --release) + +.format: + ${CARGO} fmt -- --check + +.musl_target: + $(eval CARGO_BUILD_FLAGS += --target ${RUST_TARGET_STATIC}) + +# Workaround to clean up stale cache for libz-sys +.clean_libz_sys: + @${CARGO} clean --target ${RUST_TARGET_STATIC} -p libz-sys + @${CARGO} clean --target ${RUST_TARGET_STATIC} --release -p libz-sys + +# Targets that are exposed to developers and users. +build: .format + ${CARGO} build $(CARGO_COMMON) $(CARGO_BUILD_FLAGS) + # Cargo will skip checking if it is already checked + ${CARGO} clippy --workspace $(EXCLUDE_PACKAGES) $(CARGO_COMMON) $(CARGO_BUILD_FLAGS) --bins --tests -- -Dwarnings --allow clippy::unnecessary_cast --allow clippy::needless_borrow + +release: .format .release_version build + +static-release: .clean_libz_sys .musl_target .format .release_version build + +clean: + ${CARGO} clean + +install: release + @sudo mkdir -m 755 -p $(INSTALL_DIR_PREFIX) + @sudo install -m 755 target/release/nydusd $(INSTALL_DIR_PREFIX)/nydusd + @sudo install -m 755 target/release/nydus-image $(INSTALL_DIR_PREFIX)/nydus-image + @sudo install -m 755 target/release/nydusctl $(INSTALL_DIR_PREFIX)/nydusctl + +# unit test +ut: .release_version + TEST_WORKDIR_PREFIX=$(TEST_WORKDIR_PREFIX) RUST_BACKTRACE=1 ${CARGO} test --no-fail-fast --workspace $(EXCLUDE_PACKAGES) $(CARGO_COMMON) $(CARGO_BUILD_FLAGS) -- --skip integration --nocapture --test-threads=8 + +# you need install cargo nextest first from: https://nexte.st/book/pre-built-binaries.html +ut-nextest: .release_version + TEST_WORKDIR_PREFIX=$(TEST_WORKDIR_PREFIX) RUST_BACKTRACE=1 ${CARGO} nextest run --no-fail-fast --filter-expr 'test(test) - test(integration)' --workspace $(EXCLUDE_PACKAGES) $(CARGO_COMMON) $(CARGO_BUILD_FLAGS) --test-threads 8 + +# install test dependencies +pre-coverage: + ${CARGO} +stable install cargo-llvm-cov --locked + ${RUSTUP} component add llvm-tools-preview + +# print unit test coverage to console +coverage: pre-coverage + TEST_WORKDIR_PREFIX=$(TEST_WORKDIR_PREFIX) ${CARGO} llvm-cov --workspace $(EXCLUDE_PACKAGES) $(CARGO_COMMON) $(CARGO_BUILD_FLAGS) -- --skip integration --nocapture --test-threads=8 + +# write unit teset coverage to codecov.json, used for Github CI +coverage-codecov: + TEST_WORKDIR_PREFIX=$(TEST_WORKDIR_PREFIX) ${CARGO} llvm-cov --codecov --output-path codecov.json --workspace $(EXCLUDE_PACKAGES) $(CARGO_COMMON) $(CARGO_BUILD_FLAGS) -- --skip integration --nocapture --test-threads=8 + +smoke-only: + make -C smoke test + +smoke-performance: + make -C smoke test-performance + +smoke-benchmark: + make -C smoke test-benchmark + +smoke-takeover: + make -C smoke test-takeover + +smoke: release smoke-only + +contrib-build: nydusify ctr-remote nydus-overlayfs + +contrib-release: nydusify-release ctr-remote-release \ + nydus-overlayfs-release + +contrib-test: nydusify-test ctr-remote-test \ + nydus-overlayfs-test + +contrib-lint: nydusify-lint ctr-remote-lint \ + nydus-overlayfs-lint + +contrib-clean: nydusify-clean ctr-remote-clean \ + nydus-overlayfs-clean + +contrib-install: + @sudo mkdir -m 755 -p $(INSTALL_DIR_PREFIX) + @sudo install -m 755 contrib/ctr-remote/bin/ctr-remote $(INSTALL_DIR_PREFIX)/ctr-remote + @sudo install -m 755 contrib/nydus-overlayfs/bin/nydus-overlayfs $(INSTALL_DIR_PREFIX)/nydus-overlayfs + @sudo install -m 755 contrib/nydusify/cmd/nydusify $(INSTALL_DIR_PREFIX)/nydusify + +nydusify: + $(call build_golang,${NYDUSIFY_PATH},make) + +nydusify-release: + $(call build_golang,${NYDUSIFY_PATH},make release) + +nydusify-test: + $(call build_golang,${NYDUSIFY_PATH},make test) + +nydusify-clean: + $(call build_golang,${NYDUSIFY_PATH},make clean) + +nydusify-lint: + $(call build_golang,${NYDUSIFY_PATH},make lint) + +ctr-remote: + $(call build_golang,${CTR-REMOTE_PATH},make) + +ctr-remote-release: + $(call build_golang,${CTR-REMOTE_PATH},make release) + +ctr-remote-test: + $(call build_golang,${CTR-REMOTE_PATH},make test) + +ctr-remote-clean: + $(call build_golang,${CTR-REMOTE_PATH},make clean) + +ctr-remote-lint: + $(call build_golang,${CTR-REMOTE_PATH},make lint) + +nydus-overlayfs: + $(call build_golang,${NYDUS-OVERLAYFS_PATH},make) + +nydus-overlayfs-release: + $(call build_golang,${NYDUS-OVERLAYFS_PATH},make release) + +nydus-overlayfs-test: + $(call build_golang,${NYDUS-OVERLAYFS_PATH},make test) + +nydus-overlayfs-clean: + $(call build_golang,${NYDUS-OVERLAYFS_PATH},make clean) + +nydus-overlayfs-lint: + $(call build_golang,${NYDUS-OVERLAYFS_PATH},make lint) + +docker-static: + docker build -t nydus-rs-static --build-arg RUST_TARGET=${RUST_TARGET_STATIC} misc/musl-static + docker run --rm ${CARGO_BUILD_GEARS} -e RUST_TARGET=${RUST_TARGET_STATIC} --workdir /nydus-rs -v ${current_dir}:/nydus-rs nydus-rs-static diff --git a/README.md b/README.md index 3b54a8702a1..ac8861ae75a 100644 --- a/README.md +++ b/README.md @@ -1,172 +1,172 @@ -[**[⬇️ Download]**](https://github.com/dragonflyoss/nydus/releases) -[**[📖 Website]**](https://nydus.dev/) -[**[☸ Quick Start (Kubernetes)**]](https://github.com/containerd/nydus-snapshotter/blob/main/docs/run_nydus_in_kubernetes.md) -[**[🤓 Quick Start (nerdctl)**]](https://github.com/containerd/nerdctl/blob/master/docs/nydus.md) -[**[❓ FAQs & Troubleshooting]**](https://github.com/dragonflyoss/nydus/wiki/FAQ) - -# Nydus: Dragonfly Container Image Service - -

- -[![Release Version](https://img.shields.io/github/v/release/dragonflyoss/nydus?style=flat)](https://github.com/dragonflyoss/nydus/releases) -[![License](https://img.shields.io/crates/l/nydus-rs)](https://crates.io/crates/nydus-rs) -[![Twitter](https://img.shields.io/twitter/url?style=social&url=https%3A%2F%2Ftwitter.com%2Fdragonfly_oss)](https://twitter.com/dragonfly_oss) -[![Nydus Stars](https://img.shields.io/github/stars/dragonflyoss/nydus?label=Nydus%20Stars&style=social)](https://github.com/dragonflyoss/nydus) - -[![Smoke Test](https://github.com/dragonflyoss/nydus/actions/workflows/smoke.yml/badge.svg?event=schedule)](https://github.com/dragonflyoss/nydus/actions/workflows/smoke.yml?query=event%3Aschedule) -[![Image Conversion](https://github.com/dragonflyoss/nydus/actions/workflows/convert.yml/badge.svg?event=schedule)](https://github.com/dragonflyoss/nydus/actions/workflows/convert.yml?query=event%3Aschedule) -[![Release Test Daily](https://github.com/dragonflyoss/nydus/actions/workflows/release.yml/badge.svg?event=schedule)](https://github.com/dragonflyoss/nydus/actions/workflows/release.yml?query=event%3Aschedule) -[![Benchmark](https://github.com/dragonflyoss/nydus/actions/workflows/benchmark.yml/badge.svg?event=schedule)](https://github.com/dragonflyoss/nydus/actions/workflows/benchmark.yml?query=event%3Aschedule) -[![Coverage](https://codecov.io/gh/dragonflyoss/nydus/branch/master/graph/badge.svg)](https://codecov.io/gh/dragonflyoss/nydus) - -## Introduction -Nydus implements a content-addressable file system on the RAFS format, which enhances the current OCI image specification by improving container launch speed, image space and network bandwidth efficiency, and data integrity. - -The following Benchmarking results demonstrate that Nydus images significantly outperform OCI images in terms of container cold startup elapsed time on Containerd, particularly as the OCI image size increases. - -![Container Cold Startup](./misc/perf.jpg) - -## Principles - -***Provide Fast, Secure And Easy Access to Data Distribution*** - -- **Performance**: Second-level container startup speed, millisecond-level function computation code package loading speed. -- **Low Cost**: Written in memory-safed language `Rust`, numerous optimizations help improve memory, CPU, and network consumption. -- **Flexible**: Supports container runtimes such as [runC](https://github.com/opencontainers/runc) and [Kata](https://github.com/kata-containers), and provides [Confidential Containers](https://github.com/confidential-containers) and vulnerability scanning capabilities -- **Security**: End to end data integrity check, Supply Chain Attack can be detected and avoided at runtime. - -## Key features - -- **On-demand Load**: Container images/packages are downloaded on-demand in chunk unit to boost startup. -- **Chunk Deduplication**: Chunk level data de-duplication cross-layer or cross-image to reduce storage, transport, and memory cost. -- **Compatible with Ecosystem**: Storage backend support with Registry, OSS, NAS, Shared Disk, and [P2P service](https://d7y.io/). Compatible with the [OCI images](https://github.com/dragonflyoss/nydus/blob/master/docs/nydus-zran.md), and provide native [eStargz images](https://github.com/containerd/stargz-snapshotter) support. -- **Data Analyzability**: Record accesses, data layout optimization, prefetch, IO amplification, abnormal behavior detection. -- **POSIX Compatibility**: In-Kernel EROFS or FUSE filesystems together with overlayfs provide full POSIX compatibility -- **I/O optimization**: Use merged filesystem tree, data prefetching and User I/O amplification to reduce read latency and improve user I/O performance. - -## Ecosystem -### Nydus tools - -| Tool | Description | -| ---------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------- | -| [nydusd](https://github.com/dragonflyoss/nydus/blob/master/docs/nydusd.md) | Nydus user-space daemon, it processes all fscache/FUSE messages from the kernel and parses Nydus images to fullfil those requests | -| [nydus-image](https://github.com/dragonflyoss/nydus/blob/master/docs/nydus-image.md) | Convert a single layer of OCI format container image into a nydus format container image generating meta part file and data part file respectively | -| [nydusify](https://github.com/dragonflyoss/nydus/blob/master/docs/nydusify.md) | It pulls OCI image down and unpack it, invokes `nydus-image create` to convert image and then pushes the converted image back to registry and data storage | -| [nydusctl](https://github.com/dragonflyoss/nydus/blob/master/docs/nydus-image.md) | Nydusd CLI client (`nydus-image inspect`), query daemon's working status/metrics and configure it | -| [ctr-remote](https://github.com/dragonflyoss/nydus/tree/master/contrib/ctr-remote) | An enhanced `containerd` CLI tool enable nydus support with `containerd` ctr | -| [nydus-docker-graphdriver](https://github.com/nydusaccelerator/docker-nydus-graphdriver) | [Experimental] Works as a `docker` remote graph driver to control how images and containers are stored and managed | -| [nydus-overlayfs](https://github.com/dragonflyoss/nydus/tree/master/contrib/nydus-overlayfs) | `Containerd` mount helper to invoke overlayfs mount with tweaking mount options a bit. So nydus prerequisites can be passed to vm-based runtime | -| [nydus-backend-proxy](./contrib/nydus-backend-proxy/README.md) | A simple HTTP server to serve local directory as a blob backend for nydusd | - -### Supported platforms - -| Type | Platform | Description | Status | -| ------------- | --------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------ | -| Storage | Registry/OSS/S3/NAS | Support for OCI-compatible distribution implementations such as Docker Hub, Harbor, Github GHCR, Aliyun ACR, NAS, and Aliyun OSS-like object storage service | ✅ | -| Storage/Build | [Harbor](https://github.com/goharbor/acceleration-service) | Provides a general service for Harbor to support acceleration image conversion based on kinds of accelerator like Nydus and eStargz etc | ✅ | -| Distribution | [Dragonfly](https://github.com/dragonflyoss/Dragonfly2) | Improve the runtime performance of Nydus image even further with the Dragonfly P2P data distribution system | ✅ | -| Build | [Buildkit](https://github.com/nydusaccelerator/buildkit/blob/master/docs/nydus.md) | Provides the ability to build and export Nydus images directly from Dockerfile | ✅ | -| Build/Runtime | [Nerdctl](https://github.com/containerd/nerdctl/blob/master/docs/nydus.md) | The containerd client to build or run (requires nydus snapshotter) Nydus image | ✅ | -| Runtime | [Docker / Moby](https://github.com/dragonflyoss/nydus/blob/master/docs/docker-env-setup.md) | Run Nydus image in Docker container with containerd and nydus-snapshotter | ✅ | -| Runtime | [Kubernetes](https://github.com/containerd/nydus-snapshotter/blob/main/docs/run_nydus_in_kubernetes.md) | Run Nydus image using CRI interface | ✅ | -| Runtime | [Containerd](https://github.com/containerd/nydus-snapshotter) | Nydus Snapshotter, a containerd remote plugin to run Nydus image | ✅ | -| Runtime | [CRI-O / Podman](https://github.com/containers/nydus-storage-plugin) | Run Nydus image with CRI-O or Podman | 🚧 | -| Runtime | [KataContainers](https://github.com/kata-containers/kata-containers/blob/main/docs/design/kata-nydus-design.md) | Run Nydus image in KataContainers as a native solution | ✅ | -| Runtime | [EROFS](https://www.kernel.org/doc/html/latest/filesystems/erofs.html) | Run Nydus image directly in-kernel EROFS for even greater performance improvement | ✅ | - -## Build - -### Build Binary -```shell -# build debug binary -make -# build release binary -make release -# build static binary with docker -make docker-static -``` - -### Build Nydus Image - -Convert OCIv1 image to Nydus image: [Nydusify](./docs/nydusify.md), [Acceld](https://github.com/goharbor/acceleration-service) or [Nerdctl](https://github.com/containerd/nerdctl/blob/master/docs/nydus.md#build-nydus-image-using-nerdctl-image-convert). - -Build Nydus image from Dockerfile directly: [Buildkit](https://github.com/nydusaccelerator/buildkit/blob/master/docs/nydus.md). - -Build Nydus layer from various sources: [Nydus Image Builder](./docs/nydus-image.md). - -#### Image prefetch optimization -To further reduce container startup time, a nydus image with a prefetch list can be built using the NRI plugin (containerd >=1.7): [Container Image Optimizer](https://github.com/containerd/nydus-snapshotter/blob/main/docs/optimize_nydus_image.md) - -## Run -### Quick Start - -For more details on how to lazily start a container with `nydus-snapshotter` and nydus image on Kubernetes nodes or locally use `nerdctl` rather than CRI, please refer to [Nydus Setup](./docs/containerd-env-setup.md) - -### Run Nydus Snapshotter - -Nydus-snapshotter is a non-core sub-project of containerd. - -Check out its code and tutorial from [Nydus-snapshotter repository](https://github.com/containerd/nydus-snapshotter). -It works as a `containerd` remote snapshotter to help setup container rootfs with nydus images, which handles nydus image format when necessary. When running without nydus images, it is identical to the containerd's builtin overlayfs snapshotter. - -### Run Nydusd Daemon - -Normally, users do not need to start `nydusd` by hand. It is started by `nydus-snapshotter` when a container rootfs is prepared. - -Run Nydusd Daemon to serve Nydus image: [Nydusd](./docs/nydusd.md). - -### Run Nydus with in-kernel EROFS filesystem - -In-kernel EROFS has been fully compatible with RAFS v6 image format since Linux 5.16. In other words, uncompressed RAFS v6 images can be mounted over block devices since then. - -Since [Linux 5.19](https://lwn.net/Articles/896140), EROFS has added a new file-based caching (fscache) backend. In this way, compressed RAFS v6 images can be mounted directly with fscache subsystem, even such images are partially available. `estargz` can be converted on the fly and mounted in this way too. - -Guide to running Nydus with fscache: [Nydus-fscache](./docs/nydus-fscache.md) - -### Run Nydus with Dragonfly P2P system - -Nydus is deeply integrated with [Dragonfly](https://d7y.io/) P2P system, which can greatly reduce the network latency and the single point pressure of the registry server. Benchmarking results in the production environment demonstrate that using Dragonfly can reduce network latency by more than 80%, to understand the performance results and integration steps, please refer to the [nydus integration](https://d7y.io/docs/setup/integration/nydus). - -If you want to deploy Dragonfly and Nydus at the same time through Helm, please refer to the **[Quick Start](https://github.com/dragonflyoss/helm-charts/blob/main/INSTALL.md)**. - -### Run OCI image directly with Nydus - -Nydus is able to generate a tiny artifact called a `nydus zran` from an existing OCI image in the short time. This artifact can be used to accelerate the container boot time without the need for a full image conversion. For more information, please see the [documentation](./docs/nydus-zran.md). - -### Run with Docker(Moby) - -Nydus provides a variety of methods to support running on docker(Moby), please refer to [Nydus Setup for Docker(Moby) Environment](./docs/docker-env-setup.md) - -### Run with macOS - -Nydus can also run with macfuse(a.k.a osxfuse). For more details please read [nydus with macOS](./docs/nydus_with_macos.md). - -### Run eStargz image (with lazy pulling) - -The containerd remote snapshotter plugin [nydus-snapshotter](https://github.com/containerd/nydus-snapshotter) can be used to run nydus images, or to run [eStargz](https://github.com/containerd/stargz-snapshotter) images directly by appending `--enable-stargz` command line option. - -In the future, `zstd::chunked` can work in this way as well. - -### Run Nydus Service - -Using the key features of nydus as native in your project without preparing and invoking `nydusd` deliberately, [nydus-service](./service/README.md) helps to reuse the core services of nyuds. - -## Documentation - -Please visit [**Wiki**](https://github.com/dragonflyoss/nydus/wiki), or [**docs**](./docs) - -## Community - -Nydus aims to form a **vendor-neutral opensource** image distribution solution to all communities. -Questions, bug reports, technical discussion, feature requests and contribution are always welcomed! - -We're very pleased to hear your use cases any time. -Feel free to reach us via Slack or Dingtalk. - -- **Slack:** [Nydus Workspace](https://join.slack.com/t/nydusimageservice/shared_invite/zt-pz4qvl4y-WIh4itPNILGhPS8JqdFm_w) - -- **Twitter:** [@dragonfly_oss](https://twitter.com/dragonfly_oss) - -- **Dingtalk:** [34971767](https://qr.dingtalk.com/action/joingroup?code=v1,k1,ioWGzuDZEIO10Bf+/ohz4RcQqAkW0MtOwoG1nbbMxQg=&_dt_no_comment=1&origin=11) - - +[**[⬇️ Download]**](https://github.com/dragonflyoss/nydus/releases) +[**[📖 Website]**](https://nydus.dev/) +[**[☸ Quick Start (Kubernetes)**]](https://github.com/containerd/nydus-snapshotter/blob/main/docs/run_nydus_in_kubernetes.md) +[**[🤓 Quick Start (nerdctl)**]](https://github.com/containerd/nerdctl/blob/master/docs/nydus.md) +[**[❓ FAQs & Troubleshooting]**](https://github.com/dragonflyoss/nydus/wiki/FAQ) + +# Nydus: Dragonfly Container Image Service + +

+ +[![Release Version](https://img.shields.io/github/v/release/dragonflyoss/nydus?style=flat)](https://github.com/dragonflyoss/nydus/releases) +[![License](https://img.shields.io/crates/l/nydus-rs)](https://crates.io/crates/nydus-rs) +[![Twitter](https://img.shields.io/twitter/url?style=social&url=https%3A%2F%2Ftwitter.com%2Fdragonfly_oss)](https://twitter.com/dragonfly_oss) +[![Nydus Stars](https://img.shields.io/github/stars/dragonflyoss/nydus?label=Nydus%20Stars&style=social)](https://github.com/dragonflyoss/nydus) + +[![Smoke Test](https://github.com/dragonflyoss/nydus/actions/workflows/smoke.yml/badge.svg?event=schedule)](https://github.com/dragonflyoss/nydus/actions/workflows/smoke.yml?query=event%3Aschedule) +[![Image Conversion](https://github.com/dragonflyoss/nydus/actions/workflows/convert.yml/badge.svg?event=schedule)](https://github.com/dragonflyoss/nydus/actions/workflows/convert.yml?query=event%3Aschedule) +[![Release Test Daily](https://github.com/dragonflyoss/nydus/actions/workflows/release.yml/badge.svg?event=schedule)](https://github.com/dragonflyoss/nydus/actions/workflows/release.yml?query=event%3Aschedule) +[![Benchmark](https://github.com/dragonflyoss/nydus/actions/workflows/benchmark.yml/badge.svg?event=schedule)](https://github.com/dragonflyoss/nydus/actions/workflows/benchmark.yml?query=event%3Aschedule) +[![Coverage](https://codecov.io/gh/dragonflyoss/nydus/branch/master/graph/badge.svg)](https://codecov.io/gh/dragonflyoss/nydus) + +## Introduction +Nydus implements a content-addressable file system on the RAFS format, which enhances the current OCI image specification by improving container launch speed, image space and network bandwidth efficiency, and data integrity. + +The following Benchmarking results demonstrate that Nydus images significantly outperform OCI images in terms of container cold startup elapsed time on Containerd, particularly as the OCI image size increases. + +![Container Cold Startup](./misc/perf.jpg) + +## Principles + +***Provide Fast, Secure And Easy Access to Data Distribution*** + +- **Performance**: Second-level container startup speed, millisecond-level function computation code package loading speed. +- **Low Cost**: Written in memory-safed language `Rust`, numerous optimizations help improve memory, CPU, and network consumption. +- **Flexible**: Supports container runtimes such as [runC](https://github.com/opencontainers/runc) and [Kata](https://github.com/kata-containers), and provides [Confidential Containers](https://github.com/confidential-containers) and vulnerability scanning capabilities +- **Security**: End to end data integrity check, Supply Chain Attack can be detected and avoided at runtime. + +## Key features + +- **On-demand Load**: Container images/packages are downloaded on-demand in chunk unit to boost startup. +- **Chunk Deduplication**: Chunk level data de-duplication cross-layer or cross-image to reduce storage, transport, and memory cost. +- **Compatible with Ecosystem**: Storage backend support with Registry, OSS, NAS, Shared Disk, and [P2P service](https://d7y.io/). Compatible with the [OCI images](https://github.com/dragonflyoss/nydus/blob/master/docs/nydus-zran.md), and provide native [eStargz images](https://github.com/containerd/stargz-snapshotter) support. +- **Data Analyzability**: Record accesses, data layout optimization, prefetch, IO amplification, abnormal behavior detection. +- **POSIX Compatibility**: In-Kernel EROFS or FUSE filesystems together with overlayfs provide full POSIX compatibility +- **I/O optimization**: Use merged filesystem tree, data prefetching and User I/O amplification to reduce read latency and improve user I/O performance. + +## Ecosystem +### Nydus tools + +| Tool | Description | +| ---------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------- | +| [nydusd](https://github.com/dragonflyoss/nydus/blob/master/docs/nydusd.md) | Nydus user-space daemon, it processes all fscache/FUSE messages from the kernel and parses Nydus images to fullfil those requests | +| [nydus-image](https://github.com/dragonflyoss/nydus/blob/master/docs/nydus-image.md) | Convert a single layer of OCI format container image into a nydus format container image generating meta part file and data part file respectively | +| [nydusify](https://github.com/dragonflyoss/nydus/blob/master/docs/nydusify.md) | It pulls OCI image down and unpack it, invokes `nydus-image create` to convert image and then pushes the converted image back to registry and data storage | +| [nydusctl](https://github.com/dragonflyoss/nydus/blob/master/docs/nydus-image.md) | Nydusd CLI client (`nydus-image inspect`), query daemon's working status/metrics and configure it | +| [ctr-remote](https://github.com/dragonflyoss/nydus/tree/master/contrib/ctr-remote) | An enhanced `containerd` CLI tool enable nydus support with `containerd` ctr | +| [nydus-docker-graphdriver](https://github.com/nydusaccelerator/docker-nydus-graphdriver) | [Experimental] Works as a `docker` remote graph driver to control how images and containers are stored and managed | +| [nydus-overlayfs](https://github.com/dragonflyoss/nydus/tree/master/contrib/nydus-overlayfs) | `Containerd` mount helper to invoke overlayfs mount with tweaking mount options a bit. So nydus prerequisites can be passed to vm-based runtime | +| [nydus-backend-proxy](./contrib/nydus-backend-proxy/README.md) | A simple HTTP server to serve local directory as a blob backend for nydusd | + +### Supported platforms + +| Type | Platform | Description | Status | +| ------------- | --------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------ | +| Storage | Registry/OSS/S3/NAS | Support for OCI-compatible distribution implementations such as Docker Hub, Harbor, Github GHCR, Aliyun ACR, NAS, and Aliyun OSS-like object storage service | ✅ | +| Storage/Build | [Harbor](https://github.com/goharbor/acceleration-service) | Provides a general service for Harbor to support acceleration image conversion based on kinds of accelerator like Nydus and eStargz etc | ✅ | +| Distribution | [Dragonfly](https://github.com/dragonflyoss/Dragonfly2) | Improve the runtime performance of Nydus image even further with the Dragonfly P2P data distribution system | ✅ | +| Build | [Buildkit](https://github.com/nydusaccelerator/buildkit/blob/master/docs/nydus.md) | Provides the ability to build and export Nydus images directly from Dockerfile | ✅ | +| Build/Runtime | [Nerdctl](https://github.com/containerd/nerdctl/blob/master/docs/nydus.md) | The containerd client to build or run (requires nydus snapshotter) Nydus image | ✅ | +| Runtime | [Docker / Moby](https://github.com/dragonflyoss/nydus/blob/master/docs/docker-env-setup.md) | Run Nydus image in Docker container with containerd and nydus-snapshotter | ✅ | +| Runtime | [Kubernetes](https://github.com/containerd/nydus-snapshotter/blob/main/docs/run_nydus_in_kubernetes.md) | Run Nydus image using CRI interface | ✅ | +| Runtime | [Containerd](https://github.com/containerd/nydus-snapshotter) | Nydus Snapshotter, a containerd remote plugin to run Nydus image | ✅ | +| Runtime | [CRI-O / Podman](https://github.com/containers/nydus-storage-plugin) | Run Nydus image with CRI-O or Podman | 🚧 | +| Runtime | [KataContainers](https://github.com/kata-containers/kata-containers/blob/main/docs/design/kata-nydus-design.md) | Run Nydus image in KataContainers as a native solution | ✅ | +| Runtime | [EROFS](https://www.kernel.org/doc/html/latest/filesystems/erofs.html) | Run Nydus image directly in-kernel EROFS for even greater performance improvement | ✅ | + +## Build + +### Build Binary +```shell +# build debug binary +make +# build release binary +make release +# build static binary with docker +make docker-static +``` + +### Build Nydus Image + +Convert OCIv1 image to Nydus image: [Nydusify](./docs/nydusify.md), [Acceld](https://github.com/goharbor/acceleration-service) or [Nerdctl](https://github.com/containerd/nerdctl/blob/master/docs/nydus.md#build-nydus-image-using-nerdctl-image-convert). + +Build Nydus image from Dockerfile directly: [Buildkit](https://github.com/nydusaccelerator/buildkit/blob/master/docs/nydus.md). + +Build Nydus layer from various sources: [Nydus Image Builder](./docs/nydus-image.md). + +#### Image prefetch optimization +To further reduce container startup time, a nydus image with a prefetch list can be built using the NRI plugin (containerd >=1.7): [Container Image Optimizer](https://github.com/containerd/nydus-snapshotter/blob/main/docs/optimize_nydus_image.md) + +## Run +### Quick Start + +For more details on how to lazily start a container with `nydus-snapshotter` and nydus image on Kubernetes nodes or locally use `nerdctl` rather than CRI, please refer to [Nydus Setup](./docs/containerd-env-setup.md) + +### Run Nydus Snapshotter + +Nydus-snapshotter is a non-core sub-project of containerd. + +Check out its code and tutorial from [Nydus-snapshotter repository](https://github.com/containerd/nydus-snapshotter). +It works as a `containerd` remote snapshotter to help setup container rootfs with nydus images, which handles nydus image format when necessary. When running without nydus images, it is identical to the containerd's builtin overlayfs snapshotter. + +### Run Nydusd Daemon + +Normally, users do not need to start `nydusd` by hand. It is started by `nydus-snapshotter` when a container rootfs is prepared. + +Run Nydusd Daemon to serve Nydus image: [Nydusd](./docs/nydusd.md). + +### Run Nydus with in-kernel EROFS filesystem + +In-kernel EROFS has been fully compatible with RAFS v6 image format since Linux 5.16. In other words, uncompressed RAFS v6 images can be mounted over block devices since then. + +Since [Linux 5.19](https://lwn.net/Articles/896140), EROFS has added a new file-based caching (fscache) backend. In this way, compressed RAFS v6 images can be mounted directly with fscache subsystem, even such images are partially available. `estargz` can be converted on the fly and mounted in this way too. + +Guide to running Nydus with fscache: [Nydus-fscache](./docs/nydus-fscache.md) + +### Run Nydus with Dragonfly P2P system + +Nydus is deeply integrated with [Dragonfly](https://d7y.io/) P2P system, which can greatly reduce the network latency and the single point pressure of the registry server. Benchmarking results in the production environment demonstrate that using Dragonfly can reduce network latency by more than 80%, to understand the performance results and integration steps, please refer to the [nydus integration](https://d7y.io/docs/setup/integration/nydus). + +If you want to deploy Dragonfly and Nydus at the same time through Helm, please refer to the **[Quick Start](https://github.com/dragonflyoss/helm-charts/blob/main/INSTALL.md)**. + +### Run OCI image directly with Nydus + +Nydus is able to generate a tiny artifact called a `nydus zran` from an existing OCI image in the short time. This artifact can be used to accelerate the container boot time without the need for a full image conversion. For more information, please see the [documentation](./docs/nydus-zran.md). + +### Run with Docker(Moby) + +Nydus provides a variety of methods to support running on docker(Moby), please refer to [Nydus Setup for Docker(Moby) Environment](./docs/docker-env-setup.md) + +### Run with macOS + +Nydus can also run with macfuse(a.k.a osxfuse). For more details please read [nydus with macOS](./docs/nydus_with_macos.md). + +### Run eStargz image (with lazy pulling) + +The containerd remote snapshotter plugin [nydus-snapshotter](https://github.com/containerd/nydus-snapshotter) can be used to run nydus images, or to run [eStargz](https://github.com/containerd/stargz-snapshotter) images directly by appending `--enable-stargz` command line option. + +In the future, `zstd::chunked` can work in this way as well. + +### Run Nydus Service + +Using the key features of nydus as native in your project without preparing and invoking `nydusd` deliberately, [nydus-service](./service/README.md) helps to reuse the core services of nyuds. + +## Documentation + +Please visit [**Wiki**](https://github.com/dragonflyoss/nydus/wiki), or [**docs**](./docs) + +## Community + +Nydus aims to form a **vendor-neutral opensource** image distribution solution to all communities. +Questions, bug reports, technical discussion, feature requests and contribution are always welcomed! + +We're very pleased to hear your use cases any time. +Feel free to reach us via Slack or Dingtalk. + +- **Slack:** [Nydus Workspace](https://join.slack.com/t/nydusimageservice/shared_invite/zt-pz4qvl4y-WIh4itPNILGhPS8JqdFm_w) + +- **Twitter:** [@dragonfly_oss](https://twitter.com/dragonfly_oss) + +- **Dingtalk:** [34971767](https://qr.dingtalk.com/action/joingroup?code=v1,k1,ioWGzuDZEIO10Bf+/ohz4RcQqAkW0MtOwoG1nbbMxQg=&_dt_no_comment=1&origin=11) + + diff --git a/api/Cargo.toml b/api/Cargo.toml index 3bd8c03a057..5ce18e47e0f 100644 --- a/api/Cargo.toml +++ b/api/Cargo.toml @@ -1,31 +1,31 @@ -[package] -name = "nydus-api" -version = "0.3.1" -description = "APIs for Nydus Image Service" -authors = ["The Nydus Developers"] -license = "Apache-2.0 OR BSD-3-Clause" -homepage = "https://nydus.dev/" -repository = "https://github.com/dragonflyoss/nydus" -edition = "2021" - -[dependencies] -libc = "0.2" -log = "0.4.8" -serde_json = "1.0.53" -toml = "0.5" - -thiserror = "1.0.30" -backtrace = { version = "0.3", optional = true } -dbs-uhttp = { version = "0.3.0", optional = true } -http = { version = "0.2.1", optional = true } -lazy_static = { version = "1.4.0", optional = true } -mio = { version = "0.8", features = ["os-poll", "os-ext"], optional = true } -serde = { version = "1.0.110", features = ["rc", "serde_derive"] } -url = { version = "2.1.1", optional = true } - -[dev-dependencies] -vmm-sys-util = { version = "0.11" } - -[features] -error-backtrace = ["backtrace"] -handler = ["dbs-uhttp", "http", "lazy_static", "mio", "url"] +[package] +name = "nydus-api" +version = "0.3.1" +description = "APIs for Nydus Image Service" +authors = ["The Nydus Developers"] +license = "Apache-2.0 OR BSD-3-Clause" +homepage = "https://nydus.dev/" +repository = "https://github.com/dragonflyoss/nydus" +edition = "2021" + +[dependencies] +libc = "0.2" +log = "0.4.8" +serde_json = "1.0.53" +toml = "0.5" + +thiserror = "1.0.30" +backtrace = { version = "0.3", optional = true } +dbs-uhttp = { version = "0.3.0", optional = true } +http = { version = "0.2.1", optional = true } +lazy_static = { version = "1.4.0", optional = true } +mio = { version = "0.8", features = ["os-poll", "os-ext"], optional = true } +serde = { version = "1.0.110", features = ["rc", "serde_derive"] } +url = { version = "2.1.1", optional = true } + +[dev-dependencies] +vmm-sys-util = { version = "0.11" } + +[features] +error-backtrace = ["backtrace"] +handler = ["dbs-uhttp", "http", "lazy_static", "mio", "url"] diff --git a/api/README.md b/api/README.md index 0635fa32d3c..a72887d9848 100644 --- a/api/README.md +++ b/api/README.md @@ -1,17 +1,17 @@ -# nydus-api - -The `nydus-api` crate defines the Nydus Image Service APIs and related data structures. - -## Support - -**Platforms**: -- x86_64 -- aarch64 - -**Operating Systems**: -- Linux -- Darwin - -## License - -This code is licensed under [Apache-2.0](LICENSE-APACHE) or [BSD-3-Clause](LICENSE-BSD-3-Clause). +# nydus-api + +The `nydus-api` crate defines the Nydus Image Service APIs and related data structures. + +## Support + +**Platforms**: +- x86_64 +- aarch64 + +**Operating Systems**: +- Linux +- Darwin + +## License + +This code is licensed under [Apache-2.0](LICENSE-APACHE) or [BSD-3-Clause](LICENSE-BSD-3-Clause). diff --git a/api/openapi/nydus-api-v1.yaml b/api/openapi/nydus-api-v1.yaml index f6aed791ab6..0a7ea0b5616 100644 --- a/api/openapi/nydus-api-v1.yaml +++ b/api/openapi/nydus-api-v1.yaml @@ -1,509 +1,509 @@ -openapi: 3.0.2 -info: - description: - RESTful public-facing management API. The API is accessible through - HTTP calls on specific URLs carrying JSON modeled data. - license: - name: Apache 2.0 - url: http://www.apache.org/licenses/LICENSE-2.0.html - title: Nydus-rs API - version: 0.1.0 -servers: - - url: http://localhost/api/v1 -paths: - /daemon: - summary: Returns general information about a nydus-rs daemon - get: - operationId: describeDaemon - responses: - "200": - description: Daemon information - content: - application/json: - schema: - $ref: "#/components/schemas/DaemonInfo" - "500": - description: Internal Server Error - content: - application/json: - schema: - $ref: "#/components/schemas/ErrorMsg" - put: - operationId: configureDaemon - requestBody: - content: - application/json: - schema: - $ref: "#/components/schemas/DaemonConf" - responses: - "204": - description: "Successfully configure the daemon!" - "500": - description: "Can't configure the daemon!" - content: - application/json: - schema: - $ref: "#/components/schemas/ErrorMsg" - /daemon/events: - get: - operationId: getEvents - responses: - "200": - description: "Get events happened to nydusd" - content: - application/json: - schema: - $ref: "#/components/schemas/Events" - "500": - description: Nydus api server can't process this request. - content: - application/json: - schema: - $ref: "#/components/schemas/ErrorMsg" - /daemon/backend: - get: - operationId: queryFsBackend - responses: - "200": - description: "Query mounted file system backend" - content: - application/json: - schema: - $ref: "#/components/schemas/DaemonFsBackend" - "500": - description: Nydus api server can't process this request. - content: - application/json: - schema: - $ref: "#/components/schemas/ErrorMsg" - /daemon/exit: - put: - operationId: exitDaemon - responses: - "204": - description: "Let nydusd process exit" - "500": - description: Nydus api server can't process this request. - content: - application/json: - schema: - $ref: "#/components/schemas/ErrorMsg" - /mount: - post: - operationId: mountFsBackend - summary: Operations on nydus file system instances. - parameters: - - name: mountpoint - in: query - description: Which directory(mountpoint) in pseudo fs hierarchy to mount to - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: "#/components/schemas/MountCmd" - required: true - responses: - "204": - description: The fs backend has already been successfully mounted - "500": - content: - application/json: - schema: - $ref: "#/components/schemas/ErrorMsg" - description: Failed in mounting fs backend due to bad request - put: - operationId: remountFsBackend - parameters: - - name: mountpoint - in: query - description: Which directory(mountpoint) in pseudo fs hierarchy to mount to - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: "#/components/schemas/MountCmd" - required: true - responses: - "204": - description: The mount update was successful - "500": - content: - application/json: - schema: - $ref: "#/components/schemas/ErrorMsg" - description: The mount update action cannot be executed due to bad input - summary: Updates a mount. - delete: - summary: Umount the specified file system backend - operationId: umountFsBackend - parameters: - - name: mountpoint - in: query - description: Which directory(mountpoint) in pseudo fs hierarchy to umount from - required: true - schema: - type: string - responses: - "204": - description: Operation - umount - is successful - "500": - content: - application/json: - schema: - $ref: "#/components/schemas/ErrorMsg" - description: Umount operation is not done successfully. - /metrics: - get: - operationId: exportRafsMetrics - summary: Rafs filesystem level global metrics. - parameters: - - name: id - in: query - description: "Specify rafs id to get its metrics" - required: false - schema: - type: string - responses: - "200": - description: Rafs metrics export - content: - application/json: - schema: - $ref: "#/components/schemas/RafsMetrics" - "500": - content: - application/json: - schema: - $ref: "#/components/schemas/ErrorMsg" - description: Perhaps no counter is found - /metrics/files: - get: - summary: Returns Rafs files' fop stats - operationId: exportRafsFilesMetrics - parameters: - - name: id - in: query - description: "Specify rafs id to get its all files metrics" - required: false - schema: - type: string - - name: latest - description: "The returned list represents all files that are ever read ignoring the frequency. The metics of each file will be cleared after this request." - in: query - required: false - schema: - type: boolean - responses: - "200": - content: - application/json: - schema: - oneOf: - - $ref: "#/components/schemas/RafsLatestReadFiles" - - $ref: "#/components/schemas/RafsFilesMetrics" - description: Rafs all opened files metrics export - "500": - content: - application/json: - schema: - $ref: "#/components/schemas/ErrorMsg" - description: Internal Server Error - /metrics/pattern: - get: - operationId: exportRafsFilesAccessPattern - summary: Rafs files' access patterns - parameters: - - name: id - in: query - description: "Specify rafs id to get its all files access patterns" - required: false - schema: - type: string - responses: - "200": - content: - application/json: - schema: - $ref: "#/components/schemas/RafsFilesAccessPatterns" - description: Rafs access pattern exporting - "500": - content: - application/json: - schema: - $ref: "#/components/schemas/ErrorMsg" - description: Internal Server Error - /metrics/backend: - get: - parameters: - - name: id - in: query - description: It is equal to ID of rafs, the ID is also the mountpoint of backend fs. - required: false - schema: - type: string - responses: - "200": - content: - application/json: - schema: - $ref: "#/components/schemas/RafsBackend" - description: Rafs storage backend metrics - "500": - content: - application/json: - schema: - $ref: "#/components/schemas/ErrorMsg" - description: Internal Server Error - /metrics/blobcache: - get: - parameters: - - name: id - in: query - description: It is equal to ID of rafs, the ID is also the mountpoint of backend fs. - required: true - schema: - type: string - responses: - "200": - content: - application/json: - schema: - $ref: "#/components/schemas/Blobcache" - description: Blobcache metrics - "500": - content: - application/json: - schema: - $ref: "#/components/schemas/ErrorMsg" - description: Internal Server Error - /metrics/inflight: - get: - responses: - "200": - content: - application/json: - schema: - $ref: "#/components/schemas/FuseInflight" - description: A set including what fuse requests are being handled. External manager can query this info to judge if request is hang - "500": - content: - application/json: - schema: - $ref: "#/components/schemas/ErrorMsg" - description: Internal Server Error - -components: - schemas: - DaemonInfo: - properties: - version: - type: object - properties: - package_ver: - type: string - git_commit: - type: string - build_time: - type: string - profile: - type: string - rustc: - type: string - id: - type: string - supervisor: - type: string - state: - type: string - enum: - - INIT - - RUNNING - - UPGRADING - - INTERRUPTED - - STOPPED - - UNKNOWN - backend_collection: - type: object - type: object - DaemonConf: - type: object - properties: - log_level: - type: string - enum: [trace, debug, info, warn, error] - DaemonFsBackend: - type: object - MountCmd: - type: object - properties: - fs_type: - type: string - source: - description: usually to be the metadata source - type: string - prefetch_files: - description: local file path which recorded files/directories to be prefetched and separated by newlines - type: string - config: - description: inline request, use to configure fs backend. - type: string - ErrorMsg: - type: object - properties: - code: - description: Nydus defined error code indicating certain error type - type: string - message: - description: Details about the error - type: string - RafsMetrics: - type: object - properties: - files_account_enabled: - type: boolean - measure_latency: - type: boolean - data_read: - type: integer - block_count_read: - type: array - items: - type: integer - fop_hits: - type: array - items: - type: integer - fop_errors: - type: array - items: - type: integer - fop_cumulative_latency_total: - type: array - items: - type: integer - read_latency_dist: - type: array - items: - type: integer - nr_opens: - type: integer - RafsFilesMetrics: - type: object - properties: - nr_open: - type: integer - total_fops: - type: integer - data_read: - type: integer - block_count_read: - type: array - items: - type: integer - fop_hits: - type: array - items: - type: integer - fop_errors: - type: array - items: - type: integer - RafsLatestReadFiles: - type: array - description: File ino array, [start,end] -- include inode from start to end, [ino] -- include inode ino - items: - type: array - items: - type: integer - RafsFilesAccessPatterns: - properties: - ino: - type: integer - description: File inode number to identify which file is against - nr_read: - type: integer - description: How many times a file is read regardless of io block size and request offset - first_access_time_secs: - type: integer - description: First time point at which this file is read. It's wall-time in unit of seconds - RafsBackend: - type: object - properties: - id: - type: string - backend_type: - type: string - read_count: - type: string - read_errors: - type: integer - read_amount_total: - type: integer - read_latency_dist: - type: array - items: - type: array - items: - type: integer - Blobcache: - type: object - properties: - id: - type: string - underlying_files: - type: string - store_path: - type: string - partial_hits: - type: integer - whole_hits: - type: integer - total: - type: integer - entries_count: - type: integer - prefetch_data_amount: - type: integer - prefetch_workers: - type: integer - prefetch_mr_count: - type: integer - prefetch_unmerged_chunks: - type: integer - FuseInflight: - type: array - items: - required: - - inode - - opcode - - unique - - timestamp_secs - type: object - properties: - inode: - type: integer - opcode: - type: integer - unique: - type: integer - timestamp_secs: - type: integer - Events: - type: object - properties: - max_errors: - type: integer - total_errors: - type: integer - max_size: - type: integer - errors: - type: array - items: - type: string +openapi: 3.0.2 +info: + description: + RESTful public-facing management API. The API is accessible through + HTTP calls on specific URLs carrying JSON modeled data. + license: + name: Apache 2.0 + url: http://www.apache.org/licenses/LICENSE-2.0.html + title: Nydus-rs API + version: 0.1.0 +servers: + - url: http://localhost/api/v1 +paths: + /daemon: + summary: Returns general information about a nydus-rs daemon + get: + operationId: describeDaemon + responses: + "200": + description: Daemon information + content: + application/json: + schema: + $ref: "#/components/schemas/DaemonInfo" + "500": + description: Internal Server Error + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorMsg" + put: + operationId: configureDaemon + requestBody: + content: + application/json: + schema: + $ref: "#/components/schemas/DaemonConf" + responses: + "204": + description: "Successfully configure the daemon!" + "500": + description: "Can't configure the daemon!" + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorMsg" + /daemon/events: + get: + operationId: getEvents + responses: + "200": + description: "Get events happened to nydusd" + content: + application/json: + schema: + $ref: "#/components/schemas/Events" + "500": + description: Nydus api server can't process this request. + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorMsg" + /daemon/backend: + get: + operationId: queryFsBackend + responses: + "200": + description: "Query mounted file system backend" + content: + application/json: + schema: + $ref: "#/components/schemas/DaemonFsBackend" + "500": + description: Nydus api server can't process this request. + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorMsg" + /daemon/exit: + put: + operationId: exitDaemon + responses: + "204": + description: "Let nydusd process exit" + "500": + description: Nydus api server can't process this request. + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorMsg" + /mount: + post: + operationId: mountFsBackend + summary: Operations on nydus file system instances. + parameters: + - name: mountpoint + in: query + description: Which directory(mountpoint) in pseudo fs hierarchy to mount to + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: "#/components/schemas/MountCmd" + required: true + responses: + "204": + description: The fs backend has already been successfully mounted + "500": + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorMsg" + description: Failed in mounting fs backend due to bad request + put: + operationId: remountFsBackend + parameters: + - name: mountpoint + in: query + description: Which directory(mountpoint) in pseudo fs hierarchy to mount to + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: "#/components/schemas/MountCmd" + required: true + responses: + "204": + description: The mount update was successful + "500": + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorMsg" + description: The mount update action cannot be executed due to bad input + summary: Updates a mount. + delete: + summary: Umount the specified file system backend + operationId: umountFsBackend + parameters: + - name: mountpoint + in: query + description: Which directory(mountpoint) in pseudo fs hierarchy to umount from + required: true + schema: + type: string + responses: + "204": + description: Operation - umount - is successful + "500": + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorMsg" + description: Umount operation is not done successfully. + /metrics: + get: + operationId: exportRafsMetrics + summary: Rafs filesystem level global metrics. + parameters: + - name: id + in: query + description: "Specify rafs id to get its metrics" + required: false + schema: + type: string + responses: + "200": + description: Rafs metrics export + content: + application/json: + schema: + $ref: "#/components/schemas/RafsMetrics" + "500": + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorMsg" + description: Perhaps no counter is found + /metrics/files: + get: + summary: Returns Rafs files' fop stats + operationId: exportRafsFilesMetrics + parameters: + - name: id + in: query + description: "Specify rafs id to get its all files metrics" + required: false + schema: + type: string + - name: latest + description: "The returned list represents all files that are ever read ignoring the frequency. The metics of each file will be cleared after this request." + in: query + required: false + schema: + type: boolean + responses: + "200": + content: + application/json: + schema: + oneOf: + - $ref: "#/components/schemas/RafsLatestReadFiles" + - $ref: "#/components/schemas/RafsFilesMetrics" + description: Rafs all opened files metrics export + "500": + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorMsg" + description: Internal Server Error + /metrics/pattern: + get: + operationId: exportRafsFilesAccessPattern + summary: Rafs files' access patterns + parameters: + - name: id + in: query + description: "Specify rafs id to get its all files access patterns" + required: false + schema: + type: string + responses: + "200": + content: + application/json: + schema: + $ref: "#/components/schemas/RafsFilesAccessPatterns" + description: Rafs access pattern exporting + "500": + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorMsg" + description: Internal Server Error + /metrics/backend: + get: + parameters: + - name: id + in: query + description: It is equal to ID of rafs, the ID is also the mountpoint of backend fs. + required: false + schema: + type: string + responses: + "200": + content: + application/json: + schema: + $ref: "#/components/schemas/RafsBackend" + description: Rafs storage backend metrics + "500": + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorMsg" + description: Internal Server Error + /metrics/blobcache: + get: + parameters: + - name: id + in: query + description: It is equal to ID of rafs, the ID is also the mountpoint of backend fs. + required: true + schema: + type: string + responses: + "200": + content: + application/json: + schema: + $ref: "#/components/schemas/Blobcache" + description: Blobcache metrics + "500": + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorMsg" + description: Internal Server Error + /metrics/inflight: + get: + responses: + "200": + content: + application/json: + schema: + $ref: "#/components/schemas/FuseInflight" + description: A set including what fuse requests are being handled. External manager can query this info to judge if request is hang + "500": + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorMsg" + description: Internal Server Error + +components: + schemas: + DaemonInfo: + properties: + version: + type: object + properties: + package_ver: + type: string + git_commit: + type: string + build_time: + type: string + profile: + type: string + rustc: + type: string + id: + type: string + supervisor: + type: string + state: + type: string + enum: + - INIT + - RUNNING + - UPGRADING + - INTERRUPTED + - STOPPED + - UNKNOWN + backend_collection: + type: object + type: object + DaemonConf: + type: object + properties: + log_level: + type: string + enum: [trace, debug, info, warn, error] + DaemonFsBackend: + type: object + MountCmd: + type: object + properties: + fs_type: + type: string + source: + description: usually to be the metadata source + type: string + prefetch_files: + description: local file path which recorded files/directories to be prefetched and separated by newlines + type: string + config: + description: inline request, use to configure fs backend. + type: string + ErrorMsg: + type: object + properties: + code: + description: Nydus defined error code indicating certain error type + type: string + message: + description: Details about the error + type: string + RafsMetrics: + type: object + properties: + files_account_enabled: + type: boolean + measure_latency: + type: boolean + data_read: + type: integer + block_count_read: + type: array + items: + type: integer + fop_hits: + type: array + items: + type: integer + fop_errors: + type: array + items: + type: integer + fop_cumulative_latency_total: + type: array + items: + type: integer + read_latency_dist: + type: array + items: + type: integer + nr_opens: + type: integer + RafsFilesMetrics: + type: object + properties: + nr_open: + type: integer + total_fops: + type: integer + data_read: + type: integer + block_count_read: + type: array + items: + type: integer + fop_hits: + type: array + items: + type: integer + fop_errors: + type: array + items: + type: integer + RafsLatestReadFiles: + type: array + description: File ino array, [start,end] -- include inode from start to end, [ino] -- include inode ino + items: + type: array + items: + type: integer + RafsFilesAccessPatterns: + properties: + ino: + type: integer + description: File inode number to identify which file is against + nr_read: + type: integer + description: How many times a file is read regardless of io block size and request offset + first_access_time_secs: + type: integer + description: First time point at which this file is read. It's wall-time in unit of seconds + RafsBackend: + type: object + properties: + id: + type: string + backend_type: + type: string + read_count: + type: string + read_errors: + type: integer + read_amount_total: + type: integer + read_latency_dist: + type: array + items: + type: array + items: + type: integer + Blobcache: + type: object + properties: + id: + type: string + underlying_files: + type: string + store_path: + type: string + partial_hits: + type: integer + whole_hits: + type: integer + total: + type: integer + entries_count: + type: integer + prefetch_data_amount: + type: integer + prefetch_workers: + type: integer + prefetch_mr_count: + type: integer + prefetch_unmerged_chunks: + type: integer + FuseInflight: + type: array + items: + required: + - inode + - opcode + - unique + - timestamp_secs + type: object + properties: + inode: + type: integer + opcode: + type: integer + unique: + type: integer + timestamp_secs: + type: integer + Events: + type: object + properties: + max_errors: + type: integer + total_errors: + type: integer + max_size: + type: integer + errors: + type: array + items: + type: string diff --git a/api/openapi/nydus-api-v2.yaml b/api/openapi/nydus-api-v2.yaml index daf6a50f0ad..14f2a8a855d 100644 --- a/api/openapi/nydus-api-v2.yaml +++ b/api/openapi/nydus-api-v2.yaml @@ -1,160 +1,160 @@ -openapi: "3.0.2" -info: - title: Nydus Service and Management APIs, version 2. - description: - This is the second version of RESTful Nydus service and management APIs to manage the global daemon and - individual services. - license: - name: Apache 2.0 - url: http://www.apache.org/licenses/LICENSE-2.0.html - version: "0.1" -servers: - - url: https://localhost/v2 -paths: - /daemon: - summary: Returns general information about the nydus daemon - get: - operationId: describeDaemon - responses: - "200": - description: Daemon information - content: - application/json: - schema: - $ref: "#/components/schemas/DaemonInfo" - "500": - description: Internal Server Error - content: - application/json: - schema: - $ref: "#/components/schemas/ErrorMsg" - put: - operationId: configureDaemon - requestBody: - content: - application/json: - schema: - $ref: "#/components/schemas/DaemonConf" - responses: - "204": - description: "Successfully configure the daemon!" - "500": - description: "Can't configure the daemon!" - content: - application/json: - schema: - $ref: "#/components/schemas/ErrorMsg" - /blobs: - summary: Manage cached blob objects - #################################################################### - get: - operationId: getBlobObject - responses: - "200": - description: Blob objects - content: - application/json: - schema: - $ref: "#/components/schemas/BlobObjectList" - "404": - description: "Blob object not found" - "500": - description: "Internal Server Error" - content: - application/json: - schema: - $ref: "#/components/schemas/ErrorMsg" - put: - operationId: createBlobObject - requestBody: - content: - application/json: - schema: - $ref: "#/components/schemas/BlobObjectConf" - responses: - "204": - description: "Successfully created the blob object!" - "500": - description: "Can't create the blob object!" - content: - application/json: - schema: - $ref: "#/components/schemas/ErrorMsg" - delete: - operationId: deleteBlobObject - requestBody: - content: - application/json: - schema: - $ref: "#/components/schemas/BlobObjectParam" - responses: - "204": - description: "Successfully deleted the blob object!" - "500": - description: "Can't delete the blob object!" - content: - application/json: - schema: - $ref: "#/components/schemas/ErrorMsg" - operationId: deleteBlobFile - requestBody: - content: - application/json: - schema: - $ref: "#/components/schemas/BlobId" - responses: - "204": - description: "Successfully deleted the blob file!" - "500": - description: "Can't delete the blob file!" - content: - application/json: - schema: - $ref: "#/components/schemas/ErrorMsg" -################################################################ -components: - schemas: - DaemonInfo: - type: object - properties: - version: - type: object - properties: - package_ver: - type: string - git_commit: - type: string - build_time: - type: string - profile: - type: string - rustc: - type: string - id: - type: string - supervisor: - type: string - state: - type: string - enum: - - INIT - - RUNNING - - UPGRADING - - INTERRUPTED - - STOPPED - - UNKNOWN - DaemonConf: - type: object - properties: - log_level: - type: string - enum: [trace, debug, info, warn, error] - ErrorMsg: - type: object - properties: - code: - description: Nydus defined error code indicating certain error type - type: string - message: - description: Details about the error - type: string +openapi: "3.0.2" +info: + title: Nydus Service and Management APIs, version 2. + description: + This is the second version of RESTful Nydus service and management APIs to manage the global daemon and + individual services. + license: + name: Apache 2.0 + url: http://www.apache.org/licenses/LICENSE-2.0.html + version: "0.1" +servers: + - url: https://localhost/v2 +paths: + /daemon: + summary: Returns general information about the nydus daemon + get: + operationId: describeDaemon + responses: + "200": + description: Daemon information + content: + application/json: + schema: + $ref: "#/components/schemas/DaemonInfo" + "500": + description: Internal Server Error + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorMsg" + put: + operationId: configureDaemon + requestBody: + content: + application/json: + schema: + $ref: "#/components/schemas/DaemonConf" + responses: + "204": + description: "Successfully configure the daemon!" + "500": + description: "Can't configure the daemon!" + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorMsg" + /blobs: + summary: Manage cached blob objects + #################################################################### + get: + operationId: getBlobObject + responses: + "200": + description: Blob objects + content: + application/json: + schema: + $ref: "#/components/schemas/BlobObjectList" + "404": + description: "Blob object not found" + "500": + description: "Internal Server Error" + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorMsg" + put: + operationId: createBlobObject + requestBody: + content: + application/json: + schema: + $ref: "#/components/schemas/BlobObjectConf" + responses: + "204": + description: "Successfully created the blob object!" + "500": + description: "Can't create the blob object!" + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorMsg" + delete: + operationId: deleteBlobObject + requestBody: + content: + application/json: + schema: + $ref: "#/components/schemas/BlobObjectParam" + responses: + "204": + description: "Successfully deleted the blob object!" + "500": + description: "Can't delete the blob object!" + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorMsg" + operationId: deleteBlobFile + requestBody: + content: + application/json: + schema: + $ref: "#/components/schemas/BlobId" + responses: + "204": + description: "Successfully deleted the blob file!" + "500": + description: "Can't delete the blob file!" + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorMsg" +################################################################ +components: + schemas: + DaemonInfo: + type: object + properties: + version: + type: object + properties: + package_ver: + type: string + git_commit: + type: string + build_time: + type: string + profile: + type: string + rustc: + type: string + id: + type: string + supervisor: + type: string + state: + type: string + enum: + - INIT + - RUNNING + - UPGRADING + - INTERRUPTED + - STOPPED + - UNKNOWN + DaemonConf: + type: object + properties: + log_level: + type: string + enum: [trace, debug, info, warn, error] + ErrorMsg: + type: object + properties: + code: + description: Nydus defined error code indicating certain error type + type: string + message: + description: Details about the error + type: string diff --git a/api/src/config.rs b/api/src/config.rs index 4823ec75485..4a4e8761fb5 100644 --- a/api/src/config.rs +++ b/api/src/config.rs @@ -1,2631 +1,2631 @@ -// Copyright 2022 Alibaba Cloud. All rights reserved. -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::collections::HashMap; -use std::convert::{TryFrom, TryInto}; -use std::fs; -use std::io::{Error, ErrorKind, Result}; -use std::path::Path; -use std::str::FromStr; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::Arc; - -use serde::Deserialize; -use serde_json::Value; - -/// Configuration file format version 2, based on Toml. -#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] -pub struct ConfigV2 { - /// Configuration file format version number, must be 2. - pub version: u32, - /// Identifier for the instance. - #[serde(default)] - pub id: String, - /// Configuration information for storage backend. - pub backend: Option, - /// Configuration information for local cache system. - pub cache: Option, - /// Configuration information for RAFS filesystem. - pub rafs: Option, - /// Overlay configuration information for the instance. - pub overlay: Option, - /// Internal runtime configuration. - #[serde(skip)] - pub internal: ConfigV2Internal, -} - -impl Default for ConfigV2 { - fn default() -> Self { - ConfigV2 { - version: 2, - id: String::new(), - backend: None, - cache: None, - rafs: None, - overlay: None, - internal: ConfigV2Internal::default(), - } - } -} - -impl ConfigV2 { - /// Create a new instance of `ConfigV2` object. - pub fn new(id: &str) -> Self { - ConfigV2 { - version: 2, - id: id.to_string(), - backend: None, - cache: None, - rafs: None, - overlay: None, - internal: ConfigV2Internal::default(), - } - } - - /// Create a new configuration object for `backend-localfs` and `filecache`. - pub fn new_localfs(id: &str, dir: &str) -> Result { - let content = format!( - r#" - version = 2 - id = "{}" - backend.type = "localfs" - backend.localfs.dir = "{}" - cache.type = "filecache" - cache.compressed = false - cache.validate = false - cache.filecache.work_dir = "{}" - "#, - id, dir, dir - ); - - Self::from_str(&content) - } - - /// Read configuration information from a file. - pub fn from_file>(path: P) -> Result { - let md = fs::metadata(path.as_ref())?; - if md.len() > 0x100000 { - return Err(Error::new( - ErrorKind::Other, - "configuration file size is too big", - )); - } - let content = fs::read_to_string(path)?; - Self::from_str(&content) - } - - /// Validate the configuration object. - pub fn validate(&self) -> bool { - if self.version != 2 { - return false; - } - if let Some(backend_cfg) = self.backend.as_ref() { - if !backend_cfg.validate() { - return false; - } - } - if let Some(cache_cfg) = self.cache.as_ref() { - if !cache_cfg.validate() { - return false; - } - } - if let Some(rafs_cfg) = self.rafs.as_ref() { - if !rafs_cfg.validate() { - return false; - } - } - - true - } - - /// Get configuration information for storage backend. - pub fn get_backend_config(&self) -> Result<&BackendConfigV2> { - self.backend.as_ref().ok_or_else(|| { - Error::new( - ErrorKind::InvalidInput, - "no configuration information for backend", - ) - }) - } - - /// Get configuration information for cache subsystem. - pub fn get_cache_config(&self) -> Result<&CacheConfigV2> { - self.cache.as_ref().ok_or_else(|| { - Error::new( - ErrorKind::InvalidData, - "no configuration information for cache", - ) - }) - } - - /// Get cache working directory. - pub fn get_cache_working_directory(&self) -> Result { - let cache = self.get_cache_config()?; - if cache.is_filecache() { - if let Some(c) = cache.file_cache.as_ref() { - return Ok(c.work_dir.clone()); - } - } else if cache.is_fscache() { - if let Some(c) = cache.fs_cache.as_ref() { - return Ok(c.work_dir.clone()); - } - } - - Err(Error::new( - ErrorKind::NotFound, - "no working directory configured", - )) - } - - /// Get configuration information for RAFS filesystem. - pub fn get_rafs_config(&self) -> Result<&RafsConfigV2> { - self.rafs.as_ref().ok_or_else(|| { - Error::new( - ErrorKind::InvalidInput, - "no configuration information for rafs", - ) - }) - } - - /// Clone the object with all secrets removed. - pub fn clone_without_secrets(&self) -> Self { - let mut cfg = self.clone(); - - if let Some(backend_cfg) = cfg.backend.as_mut() { - if let Some(oss_cfg) = backend_cfg.oss.as_mut() { - oss_cfg.access_key_id = String::new(); - oss_cfg.access_key_secret = String::new(); - } - if let Some(registry_cfg) = backend_cfg.registry.as_mut() { - registry_cfg.auth = None; - registry_cfg.registry_token = None; - } - } - - cfg - } - - /// Check whether chunk digest validation is enabled or not. - pub fn is_chunk_validation_enabled(&self) -> bool { - let mut validation = if let Some(cache) = &self.cache { - cache.cache_validate - } else { - false - }; - if let Some(rafs) = &self.rafs { - if rafs.validate { - validation = true; - } - } - - validation - } - - /// Check whether fscache is enabled or not. - pub fn is_fs_cache(&self) -> bool { - if let Some(cache) = self.cache.as_ref() { - cache.fs_cache.is_some() - } else { - false - } - } - - /// Fill authorization for registry backend. - pub fn update_registry_auth_info(&mut self, auth: &Option) { - if let Some(auth) = auth { - if let Some(backend) = self.backend.as_mut() { - if let Some(registry) = backend.registry.as_mut() { - registry.auth = Some(auth.to_string()); - } - } - } - } -} - -impl FromStr for ConfigV2 { - type Err = std::io::Error; - - fn from_str(s: &str) -> Result { - if let Ok(v) = serde_json::from_str::(s) { - return if v.validate() { - Ok(v) - } else { - Err(Error::new(ErrorKind::InvalidInput, "invalid configuration")) - }; - } - if let Ok(v) = toml::from_str::(s) { - return if v.validate() { - Ok(v) - } else { - Err(Error::new(ErrorKind::InvalidInput, "invalid configuration")) - }; - } - if let Ok(v) = serde_json::from_str::(s) { - if let Ok(v) = ConfigV2::try_from(v) { - if v.validate() { - return Ok(v); - } - } - } - Err(Error::new( - ErrorKind::InvalidInput, - "failed to parse configuration information", - )) - } -} - -/// Configuration information for storage backend. -#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] -pub struct BackendConfigV2 { - /// Type of storage backend. - #[serde(rename = "type")] - pub backend_type: String, - /// Configuration for local disk backend. - pub localdisk: Option, - /// Configuration for local filesystem backend. - pub localfs: Option, - /// Configuration for OSS backend. - pub oss: Option, - /// Configuration for S3 backend. - pub s3: Option, - /// Configuration for container registry backend. - pub registry: Option, - /// Configuration for local http proxy. - #[serde(rename = "http-proxy")] - pub http_proxy: Option, -} - -impl BackendConfigV2 { - /// Validate storage backend configuration. - pub fn validate(&self) -> bool { - match self.backend_type.as_str() { - "localdisk" => match self.localdisk.as_ref() { - Some(v) => { - if v.device_path.is_empty() { - return false; - } - } - None => return false, - }, - "localfs" => match self.localfs.as_ref() { - Some(v) => { - if v.blob_file.is_empty() && v.dir.is_empty() { - return false; - } - } - None => return false, - }, - "oss" => match self.oss.as_ref() { - Some(v) => { - if v.endpoint.is_empty() || v.bucket_name.is_empty() { - return false; - } - } - None => return false, - }, - "s3" => match self.s3.as_ref() { - Some(v) => { - if v.region.is_empty() || v.bucket_name.is_empty() { - return false; - } - } - None => return false, - }, - "registry" => match self.registry.as_ref() { - Some(v) => { - if v.host.is_empty() || v.repo.is_empty() { - return false; - } - } - None => return false, - }, - - "http-proxy" => match self.http_proxy.as_ref() { - Some(v) => { - let is_valid_unix_socket_path = |path: &str| { - let path = Path::new(path); - path.is_absolute() && path.exists() - }; - if v.addr.is_empty() - || !(v.addr.starts_with("http://") - || v.addr.starts_with("https://") - || is_valid_unix_socket_path(&v.addr)) - { - return false; - } - - // check if v.path is valid url path format - if Path::new(&v.path).join("any_blob_id").to_str().is_none() { - return false; - } - } - None => return false, - }, - _ => return false, - } - - true - } - - /// Get configuration information for localdisk - pub fn get_localdisk_config(&self) -> Result<&LocalDiskConfig> { - if &self.backend_type != "localdisk" { - Err(Error::new( - ErrorKind::InvalidInput, - "backend type is not 'localdisk'", - )) - } else { - self.localdisk.as_ref().ok_or_else(|| { - Error::new( - ErrorKind::InvalidData, - "no configuration information for localdisk", - ) - }) - } - } - - /// Get configuration information for localfs - pub fn get_localfs_config(&self) -> Result<&LocalFsConfig> { - if &self.backend_type != "localfs" { - Err(Error::new( - ErrorKind::InvalidInput, - "backend type is not 'localfs'", - )) - } else { - self.localfs.as_ref().ok_or_else(|| { - Error::new( - ErrorKind::InvalidData, - "no configuration information for localfs", - ) - }) - } - } - - /// Get configuration information for OSS - pub fn get_oss_config(&self) -> Result<&OssConfig> { - if &self.backend_type != "oss" { - Err(Error::new( - ErrorKind::InvalidInput, - "backend type is not 'oss'", - )) - } else { - self.oss.as_ref().ok_or_else(|| { - Error::new( - ErrorKind::InvalidData, - "no configuration information for OSS", - ) - }) - } - } - - /// Get configuration information for S3 - pub fn get_s3_config(&self) -> Result<&S3Config> { - if &self.backend_type != "s3" { - Err(Error::new( - ErrorKind::InvalidInput, - "backend type is not 's3'", - )) - } else { - self.s3.as_ref().ok_or_else(|| { - Error::new( - ErrorKind::InvalidData, - "no configuration information for s3", - ) - }) - } - } - - /// Get configuration information for Registry - pub fn get_registry_config(&self) -> Result<&RegistryConfig> { - if &self.backend_type != "registry" { - Err(Error::new( - ErrorKind::InvalidInput, - "backend type is not 'registry'", - )) - } else { - self.registry.as_ref().ok_or_else(|| { - Error::new( - ErrorKind::InvalidData, - "no configuration information for registry", - ) - }) - } - } - - /// Get configuration information for http proxy - pub fn get_http_proxy_config(&self) -> Result<&HttpProxyConfig> { - if &self.backend_type != "http-proxy" { - Err(Error::new( - ErrorKind::InvalidInput, - "backend type is not 'http-proxy'", - )) - } else { - self.http_proxy.as_ref().ok_or_else(|| { - Error::new( - ErrorKind::InvalidData, - "no configuration information for http-proxy", - ) - }) - } - } -} - -/// Configuration information for localdisk storage backend. -#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] -pub struct LocalDiskConfig { - /// Mounted block device path or original localdisk image file path. - #[serde(default)] - pub device_path: String, - /// Disable discover blob objects by scanning GPT table. - #[serde(default)] - pub disable_gpt: bool, -} - -/// Configuration information for localfs storage backend. -#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] -pub struct LocalFsConfig { - /// Blob file to access. - #[serde(default)] - pub blob_file: String, - /// Dir to hold blob files. Used when 'blob_file' is not specified. - #[serde(default)] - pub dir: String, - /// Alternative dirs to search for blobs. - #[serde(default)] - pub alt_dirs: Vec, -} - -/// OSS configuration information to access blobs. -#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] -pub struct OssConfig { - /// Oss http scheme, either 'http' or 'https' - #[serde(default = "default_http_scheme")] - pub scheme: String, - /// Oss endpoint - pub endpoint: String, - /// Oss bucket name - pub bucket_name: String, - /// Prefix object_prefix to OSS object key, for example the simulation of subdirectory: - /// - object_key: sha256:xxx - /// - object_prefix: nydus/ - /// - object_key with object_prefix: nydus/sha256:xxx - #[serde(default)] - pub object_prefix: String, - /// Oss access key - #[serde(default)] - pub access_key_id: String, - /// Oss secret - #[serde(default)] - pub access_key_secret: String, - /// Skip SSL certificate validation for HTTPS scheme. - #[serde(default)] - pub skip_verify: bool, - /// Drop the read request once http request timeout, in seconds. - #[serde(default = "default_http_timeout")] - pub timeout: u32, - /// Drop the read request once http connection timeout, in seconds. - #[serde(default = "default_http_timeout")] - pub connect_timeout: u32, - /// Retry count when read request failed. - #[serde(default)] - pub retry_limit: u8, - /// Enable HTTP proxy for the read request. - #[serde(default)] - pub proxy: ProxyConfig, - /// Enable mirrors for the read request. - #[serde(default)] - pub mirrors: Vec, -} - -/// S3 configuration information to access blobs. -#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] -pub struct S3Config { - /// S3 http scheme, either 'http' or 'https' - #[serde(default = "default_http_scheme")] - pub scheme: String, - /// S3 endpoint - pub endpoint: String, - /// S3 region - pub region: String, - /// S3 bucket name - pub bucket_name: String, - /// Prefix object_prefix to S3 object key, for example the simulation of subdirectory: - /// - object_key: sha256:xxx - /// - object_prefix: nydus/ - /// - object_key with object_prefix: nydus/sha256:xxx - #[serde(default)] - pub object_prefix: String, - /// S3 access key - #[serde(default)] - pub access_key_id: String, - /// S3 secret - #[serde(default)] - pub access_key_secret: String, - /// Skip SSL certificate validation for HTTPS scheme. - #[serde(default)] - pub skip_verify: bool, - /// Drop the read request once http request timeout, in seconds. - #[serde(default = "default_http_timeout")] - pub timeout: u32, - /// Drop the read request once http connection timeout, in seconds. - #[serde(default = "default_http_timeout")] - pub connect_timeout: u32, - /// Retry count when read request failed. - #[serde(default)] - pub retry_limit: u8, - /// Enable HTTP proxy for the read request. - #[serde(default)] - pub proxy: ProxyConfig, - /// Enable mirrors for the read request. - #[serde(default)] - pub mirrors: Vec, -} - -/// Http proxy configuration information to access blobs. -#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] -pub struct HttpProxyConfig { - /// Address of http proxy server, like `http://xxx.xxx` or `https://xxx.xxx` or `/path/to/unix.sock`. - pub addr: String, - /// Path to access the blobs, like `/<_namespace>/<_repo>/blobs`. - /// If the http proxy server is over unix socket, this field will be ignored. - #[serde(default)] - pub path: String, - /// Skip SSL certificate validation for HTTPS scheme. - #[serde(default)] - pub skip_verify: bool, - /// Drop the read request once http request timeout, in seconds. - #[serde(default = "default_http_timeout")] - pub timeout: u32, - /// Drop the read request once http connection timeout, in seconds. - #[serde(default = "default_http_timeout")] - pub connect_timeout: u32, - /// Retry count when read request failed. - #[serde(default)] - pub retry_limit: u8, - /// Enable HTTP proxy for the read request. - #[serde(default)] - pub proxy: ProxyConfig, - /// Enable mirrors for the read request. - #[serde(default)] - pub mirrors: Vec, -} - -/// Container registry configuration information to access blobs. -#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] -pub struct RegistryConfig { - /// Registry http scheme, either 'http' or 'https' - #[serde(default = "default_http_scheme")] - pub scheme: String, - /// Registry url host - pub host: String, - /// Registry image name, like 'library/ubuntu' - pub repo: String, - /// Base64_encoded(username:password), the field should be sent to registry auth server to get a bearer token. - #[serde(default)] - pub auth: Option, - /// Skip SSL certificate validation for HTTPS scheme. - #[serde(default)] - pub skip_verify: bool, - /// Drop the read request once http request timeout, in seconds. - #[serde(default = "default_http_timeout")] - pub timeout: u32, - /// Drop the read request once http connection timeout, in seconds. - #[serde(default = "default_http_timeout")] - pub connect_timeout: u32, - /// Retry count when read request failed. - #[serde(default)] - pub retry_limit: u8, - /// The field is a bearer token to be sent to registry to authorize registry requests. - #[serde(default)] - pub registry_token: Option, - /// The http scheme to access blobs. It is used to workaround some P2P subsystem - /// that requires a different scheme than the registry. - #[serde(default)] - pub blob_url_scheme: String, - /// Redirect blob access to a different host regardless of the one specified in 'host'. - #[serde(default)] - pub blob_redirected_host: String, - /// Enable HTTP proxy for the read request. - #[serde(default)] - pub proxy: ProxyConfig, - /// Enable mirrors for the read request. - #[serde(default)] - pub mirrors: Vec, -} - -/// Configuration information for blob cache manager. -#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] -pub struct CacheConfigV2 { - /// Type of blob cache: "blobcache", "fscache" or "dummy" - #[serde(default, rename = "type")] - pub cache_type: String, - /// Whether the data from the cache is compressed, not used anymore. - #[serde(default, rename = "compressed")] - pub cache_compressed: bool, - /// Whether to validate data read from the cache. - #[serde(default, rename = "validate")] - pub cache_validate: bool, - /// Configuration for blob level prefetch. - #[serde(default)] - pub prefetch: PrefetchConfigV2, - /// Configuration information for file cache - #[serde(rename = "filecache")] - pub file_cache: Option, - #[serde(rename = "fscache")] - /// Configuration information for fscache - pub fs_cache: Option, -} - -impl CacheConfigV2 { - /// Validate cache configuration information. - pub fn validate(&self) -> bool { - match self.cache_type.as_str() { - "blobcache" | "filecache" => { - if let Some(c) = self.file_cache.as_ref() { - if c.work_dir.is_empty() { - return false; - } - } else { - return false; - } - } - "fscache" => { - if let Some(c) = self.fs_cache.as_ref() { - if c.work_dir.is_empty() { - return false; - } - } else { - return false; - } - } - "" | "dummycache" => {} - _ => return false, - } - - if self.prefetch.enable { - if self.prefetch.batch_size > 0x10000000 { - return false; - } - if self.prefetch.threads_count == 0 || self.prefetch.threads_count > 1024 { - return false; - } - } - - true - } - - /// Check whether the cache type is `filecache` - pub fn is_filecache(&self) -> bool { - self.cache_type == "blobcache" || self.cache_type == "filecache" - } - - /// Check whether the cache type is `fscache` - pub fn is_fscache(&self) -> bool { - self.cache_type == "fscache" - } - - /// Get configuration information for file cache. - pub fn get_filecache_config(&self) -> Result<&FileCacheConfig> { - if self.is_filecache() { - self.file_cache.as_ref().ok_or_else(|| { - Error::new( - ErrorKind::InvalidInput, - "no configuration information for filecache", - ) - }) - } else { - Err(Error::new( - ErrorKind::InvalidData, - "cache type is not 'filecache'", - )) - } - } - - /// Get configuration information for fscache. - pub fn get_fscache_config(&self) -> Result<&FsCacheConfig> { - if self.is_fscache() { - self.fs_cache.as_ref().ok_or_else(|| { - Error::new( - ErrorKind::InvalidData, - "no configuration information for fscache", - ) - }) - } else { - Err(Error::new( - ErrorKind::InvalidInput, - "cache type is not 'fscache'", - )) - } - } -} - -/// Configuration information for file cache. -#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] -pub struct FileCacheConfig { - /// Working directory to store state and cached files. - #[serde(default = "default_work_dir")] - pub work_dir: String, - /// Deprecated: disable index mapping, keep it as false when possible. - #[serde(default)] - pub disable_indexed_map: bool, - /// Enable encryption data written to the cache file. - #[serde(default)] - pub enable_encryption: bool, - /// Enable convergent encryption for chunk deduplication. - #[serde(default)] - pub enable_convergent_encryption: bool, - /// Key for data encryption, a heximal representation of [u8; 32]. - #[serde(default)] - pub encryption_key: String, -} - -impl FileCacheConfig { - /// Get the working directory. - pub fn get_work_dir(&self) -> Result<&str> { - let path = fs::metadata(&self.work_dir) - .or_else(|_| { - fs::create_dir_all(&self.work_dir)?; - fs::metadata(&self.work_dir) - }) - .map_err(|e| { - log::error!("fail to stat filecache work_dir {}: {}", self.work_dir, e); - e - })?; - - if path.is_dir() { - Ok(&self.work_dir) - } else { - Err(Error::new( - ErrorKind::NotFound, - format!("filecache work_dir {} is not a directory", self.work_dir), - )) - } - } -} - -/// Configuration information for fscache. -#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] -pub struct FsCacheConfig { - /// Working directory to store state and cached files. - #[serde(default = "default_work_dir")] - pub work_dir: String, -} - -impl FsCacheConfig { - /// Get the working directory. - pub fn get_work_dir(&self) -> Result<&str> { - let path = fs::metadata(&self.work_dir) - .or_else(|_| { - fs::create_dir_all(&self.work_dir)?; - fs::metadata(&self.work_dir) - }) - .map_err(|e| { - log::error!("fail to stat fscache work_dir {}: {}", self.work_dir, e); - e - })?; - - if path.is_dir() { - Ok(&self.work_dir) - } else { - Err(Error::new( - ErrorKind::NotFound, - format!("fscache work_dir {} is not a directory", self.work_dir), - )) - } - } -} - -/// Configuration information for RAFS filesystem. -#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] -pub struct RafsConfigV2 { - /// Filesystem metadata cache mode. - #[serde(default = "default_rafs_mode")] - pub mode: String, - /// Amplified user IO request batch size to read data from remote storage backend / local cache. - #[serde(rename = "batch_size", default = "default_user_io_batch_size")] - pub user_io_batch_size: usize, - /// Whether to validate data digest. - #[serde(default)] - pub validate: bool, - /// Enable support of extended attributes. - #[serde(default)] - pub enable_xattr: bool, - /// Record file operation metrics for each file. - /// - /// Better to keep it off in production environment due to possible resource consumption. - #[serde(default)] - pub iostats_files: bool, - /// Record filesystem access pattern. - #[serde(default)] - pub access_pattern: bool, - /// Record file name if file access trace log. - #[serde(default)] - pub latest_read_files: bool, - /// Filesystem prefetching configuration. - #[serde(default)] - pub prefetch: PrefetchConfigV2, -} - -impl RafsConfigV2 { - /// Validate RAFS filesystem configuration information. - pub fn validate(&self) -> bool { - if self.mode != "direct" && self.mode != "cached" { - return false; - } - if self.user_io_batch_size > 0x10000000 { - return false; - } - if self.prefetch.enable { - if self.prefetch.batch_size > 0x10000000 { - return false; - } - if self.prefetch.threads_count == 0 || self.prefetch.threads_count > 1024 { - return false; - } - } - - true - } -} - -/// Configuration information for blob data prefetching. -#[derive(Clone, Debug, Default, Deserialize, Eq, Hash, PartialEq, Serialize)] -pub struct PrefetchConfigV2 { - /// Whether to enable blob data prefetching. - pub enable: bool, - /// Number of data prefetching working threads. - #[serde(rename = "threads", default = "default_prefetch_threads_count")] - pub threads_count: usize, - /// The amplify batch size to prefetch data from backend. - #[serde(default = "default_prefetch_batch_size")] - pub batch_size: usize, - /// Network bandwidth rate limit in unit of Bytes and Zero means no limit. - #[serde(default)] - pub bandwidth_limit: u32, - /// Prefetch all data from backend. - #[serde(default)] - pub prefetch_all: bool, -} - -/// Configuration information for network proxy. -#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] -pub struct ProxyConfig { - /// Access remote storage backend via proxy, e.g. Dragonfly dfdaemon server URL. - #[serde(default)] - pub url: String, - /// Proxy health checking endpoint. - #[serde(default)] - pub ping_url: String, - /// Fallback to remote storage backend if proxy ping failed. - #[serde(default = "default_true")] - pub fallback: bool, - /// Interval for proxy health checking, in seconds. - #[serde(default = "default_check_interval")] - pub check_interval: u64, - /// Replace URL to http to request source registry with proxy, and allow fallback to https if the proxy is unhealthy. - #[serde(default)] - pub use_http: bool, - /// Elapsed time to pause proxy health check when the request is inactive, in seconds. - #[serde(default = "default_check_pause_elapsed")] - pub check_pause_elapsed: u64, -} - -impl Default for ProxyConfig { - fn default() -> Self { - Self { - url: String::new(), - ping_url: String::new(), - fallback: true, - check_interval: 5, - use_http: false, - check_pause_elapsed: 300, - } - } -} - -/// Configuration for registry mirror. -#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] -pub struct MirrorConfig { - /// Mirror server URL, for example http://127.0.0.1:65001. - pub host: String, - /// Ping URL to check mirror server health. - #[serde(default)] - pub ping_url: String, - /// HTTP request headers to be passed to mirror server. - #[serde(default)] - pub headers: HashMap, - /// Interval for mirror health checking, in seconds. - #[serde(default = "default_check_interval")] - pub health_check_interval: u64, - /// Maximum number of failures before marking a mirror as unusable. - #[serde(default = "default_failure_limit")] - pub failure_limit: u8, - /// Elapsed time to pause mirror health check when the request is inactive, in seconds. - #[serde(default = "default_check_pause_elapsed")] - pub health_check_pause_elapsed: u64, -} - -impl Default for MirrorConfig { - fn default() -> Self { - Self { - host: String::new(), - headers: HashMap::new(), - health_check_interval: 5, - failure_limit: 5, - ping_url: String::new(), - health_check_pause_elapsed: 300, - } - } -} - -/// Configuration information for a cached blob`. -#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] -pub struct BlobCacheEntryConfigV2 { - /// Configuration file format version number, must be 2. - pub version: u32, - /// Identifier for the instance. - #[serde(default)] - pub id: String, - /// Configuration information for storage backend. - #[serde(default)] - pub backend: BackendConfigV2, - /// Configuration information for local cache system. - #[serde(default)] - pub cache: CacheConfigV2, - /// Optional file path for metadata blob. - #[serde(default)] - pub metadata_path: Option, -} - -impl BlobCacheEntryConfigV2 { - /// Read configuration information from a file. - pub fn from_file>(path: P) -> Result { - let md = fs::metadata(path.as_ref())?; - if md.len() > 0x100000 { - return Err(Error::new( - ErrorKind::InvalidInput, - "configuration file size is too big", - )); - } - let content = fs::read_to_string(path)?; - Self::from_str(&content) - } - - /// Validate the configuration object. - pub fn validate(&self) -> bool { - if self.version != 2 { - return false; - } - let config: ConfigV2 = self.into(); - config.validate() - } -} - -impl FromStr for BlobCacheEntryConfigV2 { - type Err = Error; - - fn from_str(s: &str) -> Result { - if let Ok(v) = serde_json::from_str::(s) { - return if v.validate() { - Ok(v) - } else { - Err(Error::new(ErrorKind::InvalidInput, "invalid configuration")) - }; - } - if let Ok(v) = toml::from_str::(s) { - return if v.validate() { - Ok(v) - } else { - Err(Error::new(ErrorKind::InvalidInput, "invalid configuration")) - }; - } - Err(Error::new( - ErrorKind::InvalidInput, - "failed to parse configuration information", - )) - } -} - -impl From<&BlobCacheEntryConfigV2> for ConfigV2 { - fn from(c: &BlobCacheEntryConfigV2) -> Self { - ConfigV2 { - version: c.version, - id: c.id.clone(), - backend: Some(c.backend.clone()), - cache: Some(c.cache.clone()), - rafs: None, - overlay: None, - internal: ConfigV2Internal::default(), - } - } -} - -/// Internal runtime configuration. -#[derive(Clone, Debug)] -pub struct ConfigV2Internal { - /// It's possible to access the raw or more blob objects. - pub blob_accessible: Arc, -} - -impl Default for ConfigV2Internal { - fn default() -> Self { - ConfigV2Internal { - blob_accessible: Arc::new(AtomicBool::new(false)), - } - } -} - -impl PartialEq for ConfigV2Internal { - fn eq(&self, other: &Self) -> bool { - self.blob_accessible() == other.blob_accessible() - } -} - -impl Eq for ConfigV2Internal {} - -impl ConfigV2Internal { - /// Get the auto-probe flag. - pub fn blob_accessible(&self) -> bool { - self.blob_accessible.load(Ordering::Relaxed) - } - - /// Set the auto-probe flag. - pub fn set_blob_accessible(&self, accessible: bool) { - self.blob_accessible.store(accessible, Ordering::Relaxed); - } -} - -/// Blob cache object type for nydus/rafs bootstrap blob. -pub const BLOB_CACHE_TYPE_META_BLOB: &str = "bootstrap"; -/// Blob cache object type for nydus/rafs data blob. -pub const BLOB_CACHE_TYPE_DATA_BLOB: &str = "datablob"; - -/// Configuration information for a cached blob. -#[derive(Debug, Deserialize, Serialize, Clone)] -pub struct BlobCacheEntry { - /// Type of blob object, bootstrap or data blob. - #[serde(rename = "type")] - pub blob_type: String, - /// Blob id. - #[serde(rename = "id")] - pub blob_id: String, - /// Configuration information to generate blob cache object. - #[serde(default, rename = "config")] - pub(crate) blob_config_legacy: Option, - /// Configuration information to generate blob cache object. - #[serde(default, rename = "config_v2")] - pub blob_config: Option, - /// Domain id for the blob, which is used to group cached blobs into management domains. - #[serde(default)] - pub domain_id: String, -} - -impl BlobCacheEntry { - pub fn prepare_configuration_info(&mut self) -> bool { - if self.blob_config.is_none() { - if let Some(legacy) = self.blob_config_legacy.as_ref() { - match legacy.try_into() { - Err(_) => return false, - Ok(v) => self.blob_config = Some(v), - } - } - } - - match self.blob_config.as_ref() { - None => false, - Some(cfg) => cfg.cache.validate() && cfg.backend.validate(), - } - } -} - -impl BlobCacheEntry { - /// Read configuration information from a file. - pub fn from_file>(path: P) -> Result { - let md = fs::metadata(path.as_ref())?; - if md.len() > 0x100000 { - return Err(Error::new( - ErrorKind::InvalidInput, - "configuration file size is too big", - )); - } - let content = fs::read_to_string(path)?; - Self::from_str(&content) - } - - /// Validate the configuration object. - pub fn validate(&self) -> bool { - if self.blob_type != BLOB_CACHE_TYPE_META_BLOB - && self.blob_type != BLOB_CACHE_TYPE_DATA_BLOB - { - log::warn!("invalid blob type {} for blob cache entry", self.blob_type); - return false; - } - if let Some(config) = self.blob_config.as_ref() { - if !config.validate() { - return false; - } - } - true - } -} - -impl FromStr for BlobCacheEntry { - type Err = Error; - - fn from_str(s: &str) -> Result { - if let Ok(v) = serde_json::from_str::(s) { - return if v.validate() { - Ok(v) - } else { - Err(Error::new(ErrorKind::InvalidInput, "invalid configuration")) - }; - } - if let Ok(v) = toml::from_str::(s) { - return if v.validate() { - Ok(v) - } else { - Err(Error::new(ErrorKind::InvalidInput, "invalid configuration")) - }; - } - Err(Error::new( - ErrorKind::InvalidInput, - "failed to parse configuration information", - )) - } -} - -/// Configuration information for a list of cached blob objects. -#[derive(Debug, Default, Deserialize, Serialize)] -pub struct BlobCacheList { - /// List of blob configuration information. - pub blobs: Vec, -} - -fn default_true() -> bool { - true -} - -fn default_http_scheme() -> String { - "https".to_string() -} - -fn default_http_timeout() -> u32 { - 5 -} - -fn default_check_interval() -> u64 { - 5 -} - -fn default_check_pause_elapsed() -> u64 { - 300 -} - -fn default_failure_limit() -> u8 { - 5 -} - -fn default_work_dir() -> String { - ".".to_string() -} - -pub fn default_user_io_batch_size() -> usize { - 1024 * 1024 -} - -pub fn default_prefetch_batch_size() -> usize { - 1024 * 1024 -} - -fn default_prefetch_threads_count() -> usize { - 8 -} - -fn default_prefetch_all() -> bool { - true -} - -fn default_rafs_mode() -> String { - "direct".to_string() -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// For backward compatibility -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/// Configuration information for storage backend. -#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] -struct BackendConfig { - /// Type of storage backend. - #[serde(rename = "type")] - pub backend_type: String, - /// Configuration for storage backend. - /// Possible value: `LocalFsConfig`, `RegistryConfig`, `OssConfig`, `LocalDiskConfig`. - #[serde(rename = "config")] - pub backend_config: Value, -} - -impl TryFrom<&BackendConfig> for BackendConfigV2 { - type Error = std::io::Error; - - fn try_from(value: &BackendConfig) -> std::result::Result { - let mut config = BackendConfigV2 { - backend_type: value.backend_type.clone(), - localdisk: None, - localfs: None, - oss: None, - s3: None, - registry: None, - http_proxy: None, - }; - - match value.backend_type.as_str() { - "localdisk" => { - config.localdisk = Some(serde_json::from_value(value.backend_config.clone())?); - } - "localfs" => { - config.localfs = Some(serde_json::from_value(value.backend_config.clone())?); - } - "oss" => { - config.oss = Some(serde_json::from_value(value.backend_config.clone())?); - } - "s3" => { - config.s3 = Some(serde_json::from_value(value.backend_config.clone())?); - } - "registry" => { - config.registry = Some(serde_json::from_value(value.backend_config.clone())?); - } - v => { - return Err(Error::new( - ErrorKind::InvalidInput, - format!("unsupported backend type '{}'", v), - )) - } - } - - Ok(config) - } -} - -/// Configuration information for blob cache manager. -#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] -struct CacheConfig { - /// Type of blob cache: "blobcache", "fscache" or "" - #[serde(default, rename = "type")] - pub cache_type: String, - /// Whether the data from the cache is compressed, not used anymore. - #[serde(default, rename = "compressed")] - pub cache_compressed: bool, - /// Blob cache manager specific configuration: FileCacheConfig, FsCacheConfig. - #[serde(default, rename = "config")] - pub cache_config: Value, - /// Whether to validate data read from the cache. - #[serde(skip_serializing, skip_deserializing)] - pub cache_validate: bool, - /// Configuration for blob data prefetching. - #[serde(skip_serializing, skip_deserializing)] - pub prefetch_config: BlobPrefetchConfig, -} - -impl TryFrom<&CacheConfig> for CacheConfigV2 { - type Error = std::io::Error; - - fn try_from(v: &CacheConfig) -> std::result::Result { - let mut config = CacheConfigV2 { - cache_type: v.cache_type.clone(), - cache_compressed: v.cache_compressed, - cache_validate: v.cache_validate, - prefetch: (&v.prefetch_config).into(), - file_cache: None, - fs_cache: None, - }; - - match v.cache_type.as_str() { - "blobcache" | "filecache" => { - config.file_cache = Some(serde_json::from_value(v.cache_config.clone())?); - } - "fscache" => { - config.fs_cache = Some(serde_json::from_value(v.cache_config.clone())?); - } - "" | "dummycache" => {} - t => { - return Err(Error::new( - ErrorKind::InvalidInput, - format!("unsupported cache type '{}'", t), - )) - } - } - - Ok(config) - } -} - -/// Configuration information to create blob cache manager. -#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] -struct FactoryConfig { - /// Id of the factory. - #[serde(default)] - pub id: String, - /// Configuration for storage backend. - pub backend: BackendConfig, - /// Configuration for blob cache manager. - #[serde(default)] - pub cache: CacheConfig, -} - -/// Rafs storage backend configuration information. -#[derive(Clone, Default, Deserialize)] -struct RafsConfig { - /// Configuration for storage subsystem. - pub device: FactoryConfig, - /// Filesystem working mode. - pub mode: String, - /// Whether to validate data digest before use. - #[serde(default)] - pub digest_validate: bool, - /// Io statistics. - #[serde(default)] - pub iostats_files: bool, - /// Filesystem prefetching configuration. - #[serde(default)] - pub fs_prefetch: FsPrefetchControl, - /// Enable extended attributes. - #[serde(default)] - pub enable_xattr: bool, - /// Record filesystem access pattern. - #[serde(default)] - pub access_pattern: bool, - /// Record file name if file access trace log. - #[serde(default)] - pub latest_read_files: bool, - // Amplified user IO request batch size to read data from remote storage backend / local cache. - // ZERO value means, amplifying user io is not enabled. - #[serde(rename = "amplify_io", default = "default_user_io_batch_size")] - pub user_io_batch_size: usize, -} - -impl TryFrom for ConfigV2 { - type Error = std::io::Error; - - fn try_from(v: RafsConfig) -> std::result::Result { - let backend: BackendConfigV2 = (&v.device.backend).try_into()?; - let mut cache: CacheConfigV2 = (&v.device.cache).try_into()?; - let rafs = RafsConfigV2 { - mode: v.mode, - user_io_batch_size: v.user_io_batch_size, - validate: v.digest_validate, - enable_xattr: v.enable_xattr, - iostats_files: v.iostats_files, - access_pattern: v.access_pattern, - latest_read_files: v.latest_read_files, - prefetch: v.fs_prefetch.into(), - }; - if !cache.prefetch.enable && rafs.prefetch.enable { - cache.prefetch = rafs.prefetch.clone(); - } - - Ok(ConfigV2 { - version: 2, - id: v.device.id, - backend: Some(backend), - cache: Some(cache), - rafs: Some(rafs), - overlay: None, - internal: ConfigV2Internal::default(), - }) - } -} - -/// Configuration information for filesystem data prefetch. -#[derive(Clone, Default, Deserialize)] -struct FsPrefetchControl { - /// Whether the filesystem layer data prefetch is enabled or not. - #[serde(default)] - pub enable: bool, - - /// How many working threads to prefetch data. - #[serde(default = "default_prefetch_threads_count")] - pub threads_count: usize, - - /// The amplify batch size to prefetch data from backend. - #[serde(rename = "merging_size", default = "default_prefetch_batch_size")] - pub batch_size: usize, - - /// Network bandwidth limitation for prefetching. - /// - /// In unit of Bytes. It sets a limit to prefetch bandwidth usage in order to - /// reduce congestion with normal user IO. - /// bandwidth_limit == 0 -- prefetch bandwidth ratelimit disabled - /// bandwidth_limit > 0 -- prefetch bandwidth ratelimit enabled. - /// Please note that if the value is less than Rafs chunk size, - /// it will be raised to the chunk size. - #[serde(default, rename = "bandwidth_rate")] - pub bandwidth_limit: u32, - - /// Whether to prefetch all filesystem data. - #[serde(default = "default_prefetch_all")] - pub prefetch_all: bool, -} - -impl From for PrefetchConfigV2 { - fn from(v: FsPrefetchControl) -> Self { - PrefetchConfigV2 { - enable: v.enable, - threads_count: v.threads_count, - batch_size: v.batch_size, - bandwidth_limit: v.bandwidth_limit, - prefetch_all: v.prefetch_all, - } - } -} - -/// Configuration information for blob data prefetching. -#[derive(Clone, Debug, Default, Deserialize, Eq, Hash, PartialEq, Serialize)] -struct BlobPrefetchConfig { - /// Whether to enable blob data prefetching. - pub enable: bool, - /// Number of data prefetching working threads. - pub threads_count: usize, - /// The amplify batch size to prefetch data from backend. - #[serde(rename = "merging_size")] - pub batch_size: usize, - /// Network bandwidth rate limit in unit of Bytes and Zero means no limit. - #[serde(rename = "bandwidth_rate")] - pub bandwidth_limit: u32, -} - -impl From<&BlobPrefetchConfig> for PrefetchConfigV2 { - fn from(v: &BlobPrefetchConfig) -> Self { - PrefetchConfigV2 { - enable: v.enable, - threads_count: v.threads_count, - batch_size: v.batch_size, - bandwidth_limit: v.bandwidth_limit, - prefetch_all: true, - } - } -} - -/// Configuration information for a cached blob, corresponding to `FactoryConfig`. -#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] -pub(crate) struct BlobCacheEntryConfig { - /// Identifier for the blob cache configuration: corresponding to `FactoryConfig::id`. - #[serde(default)] - id: String, - /// Type of storage backend, corresponding to `FactoryConfig::BackendConfig::backend_type`. - backend_type: String, - /// Configuration for storage backend, corresponding to `FactoryConfig::BackendConfig::backend_config`. - /// - /// Possible value: `LocalFsConfig`, `RegistryConfig`, `OssConfig`, `LocalDiskConfig`. - backend_config: Value, - /// Type of blob cache, corresponding to `FactoryConfig::CacheConfig::cache_type`. - /// - /// Possible value: "fscache", "filecache". - cache_type: String, - /// Configuration for blob cache, corresponding to `FactoryConfig::CacheConfig::cache_config`. - /// - /// Possible value: `FileCacheConfig`, `FsCacheConfig`. - cache_config: Value, - /// Configuration for data prefetch. - #[serde(default)] - prefetch_config: BlobPrefetchConfig, - /// Optional file path for metadata blobs. - #[serde(default)] - metadata_path: Option, -} - -impl TryFrom<&BlobCacheEntryConfig> for BlobCacheEntryConfigV2 { - type Error = std::io::Error; - - fn try_from(v: &BlobCacheEntryConfig) -> std::result::Result { - let backend_config = BackendConfig { - backend_type: v.backend_type.clone(), - backend_config: v.backend_config.clone(), - }; - let cache_config = CacheConfig { - cache_type: v.cache_type.clone(), - cache_compressed: false, - cache_config: v.cache_config.clone(), - cache_validate: false, - prefetch_config: v.prefetch_config.clone(), - }; - Ok(BlobCacheEntryConfigV2 { - version: 2, - id: v.id.clone(), - backend: (&backend_config).try_into()?, - cache: (&cache_config).try_into()?, - metadata_path: v.metadata_path.clone(), - }) - } -} - -/// Configuration information for Overlay filesystem. -/// OverlayConfig is used to configure the writable layer(upper layer), -/// The filesystem will be writable when OverlayConfig is set. -#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] -pub struct OverlayConfig { - pub upper_dir: String, - pub work_dir: String, -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::{BlobCacheEntry, BLOB_CACHE_TYPE_META_BLOB}; - - #[test] - fn test_blob_prefetch_config() { - let config = BlobPrefetchConfig::default(); - assert!(!config.enable); - assert_eq!(config.threads_count, 0); - assert_eq!(config.batch_size, 0); - assert_eq!(config.bandwidth_limit, 0); - - let content = r#"{ - "enable": true, - "threads_count": 2, - "merging_size": 4, - "bandwidth_rate": 5 - }"#; - let config: BlobPrefetchConfig = serde_json::from_str(content).unwrap(); - assert!(config.enable); - assert_eq!(config.threads_count, 2); - assert_eq!(config.batch_size, 4); - assert_eq!(config.bandwidth_limit, 5); - - let config: PrefetchConfigV2 = (&config).into(); - assert!(config.enable); - assert_eq!(config.threads_count, 2); - assert_eq!(config.batch_size, 4); - assert_eq!(config.bandwidth_limit, 5); - assert!(config.prefetch_all); - } - - #[test] - fn test_file_cache_config() { - let config: FileCacheConfig = serde_json::from_str("{}").unwrap(); - assert_eq!(&config.work_dir, "."); - assert!(!config.disable_indexed_map); - - let config: FileCacheConfig = - serde_json::from_str("{\"work_dir\":\"/tmp\",\"disable_indexed_map\":true}").unwrap(); - assert_eq!(&config.work_dir, "/tmp"); - assert!(config.get_work_dir().is_ok()); - assert!(config.disable_indexed_map); - - let config: FileCacheConfig = - serde_json::from_str("{\"work_dir\":\"/proc/mounts\",\"disable_indexed_map\":true}") - .unwrap(); - assert!(config.get_work_dir().is_err()); - } - - #[test] - fn test_fs_cache_config() { - let config: FsCacheConfig = serde_json::from_str("{}").unwrap(); - assert_eq!(&config.work_dir, "."); - - let config: FileCacheConfig = serde_json::from_str("{\"work_dir\":\"/tmp\"}").unwrap(); - assert_eq!(&config.work_dir, "/tmp"); - assert!(config.get_work_dir().is_ok()); - - let config: FileCacheConfig = - serde_json::from_str("{\"work_dir\":\"/proc/mounts\"}").unwrap(); - assert!(config.get_work_dir().is_err()); - } - - #[test] - fn test_blob_cache_entry() { - let content = r#"{ - "type": "bootstrap", - "id": "blob1", - "config": { - "id": "cache1", - "backend_type": "localfs", - "backend_config": {}, - "cache_type": "fscache", - "cache_config": {}, - "prefetch_config": { - "enable": true, - "threads_count": 2, - "merging_size": 4, - "bandwidth_rate": 5 - }, - "metadata_path": "/tmp/metadata1" - }, - "domain_id": "domain1" - }"#; - let config: BlobCacheEntry = serde_json::from_str(content).unwrap(); - assert_eq!(&config.blob_type, BLOB_CACHE_TYPE_META_BLOB); - assert_eq!(&config.blob_id, "blob1"); - assert_eq!(&config.domain_id, "domain1"); - - let blob_config = config.blob_config_legacy.as_ref().unwrap(); - assert_eq!(blob_config.id, "cache1"); - assert_eq!(blob_config.backend_type, "localfs"); - assert_eq!(blob_config.cache_type, "fscache"); - assert!(blob_config.cache_config.is_object()); - assert!(blob_config.prefetch_config.enable); - assert_eq!(blob_config.prefetch_config.threads_count, 2); - assert_eq!(blob_config.prefetch_config.batch_size, 4); - assert_eq!( - blob_config.metadata_path.as_ref().unwrap().as_str(), - "/tmp/metadata1" - ); - - let blob_config: BlobCacheEntryConfigV2 = blob_config.try_into().unwrap(); - assert_eq!(blob_config.id, "cache1"); - assert_eq!(blob_config.backend.backend_type, "localfs"); - assert_eq!(blob_config.cache.cache_type, "fscache"); - assert!(blob_config.cache.fs_cache.is_some()); - assert!(blob_config.cache.prefetch.enable); - assert_eq!(blob_config.cache.prefetch.threads_count, 2); - assert_eq!(blob_config.cache.prefetch.batch_size, 4); - assert_eq!( - blob_config.metadata_path.as_ref().unwrap().as_str(), - "/tmp/metadata1" - ); - - let content = r#"{ - "type": "bootstrap", - "id": "blob1", - "config": { - "id": "cache1", - "backend_type": "localfs", - "backend_config": {}, - "cache_type": "fscache", - "cache_config": {}, - "metadata_path": "/tmp/metadata1" - }, - "domain_id": "domain1" - }"#; - let config: BlobCacheEntry = serde_json::from_str(content).unwrap(); - let blob_config = config.blob_config_legacy.as_ref().unwrap(); - assert!(!blob_config.prefetch_config.enable); - assert_eq!(blob_config.prefetch_config.threads_count, 0); - assert_eq!(blob_config.prefetch_config.batch_size, 0); - } - - #[test] - fn test_proxy_config() { - let content = r#"{ - "url": "foo.com", - "ping_url": "ping.foo.com", - "fallback": true - }"#; - let config: ProxyConfig = serde_json::from_str(content).unwrap(); - assert_eq!(config.url, "foo.com"); - assert_eq!(config.ping_url, "ping.foo.com"); - assert!(config.fallback); - assert_eq!(config.check_interval, 5); - } - - #[test] - fn test_oss_config() { - let content = r#"{ - "endpoint": "test", - "access_key_id": "test", - "access_key_secret": "test", - "bucket_name": "antsys-nydus", - "object_prefix":"nydus_v2/" - }"#; - let config: OssConfig = serde_json::from_str(content).unwrap(); - assert_eq!(config.scheme, "https"); - assert!(!config.skip_verify); - assert_eq!(config.timeout, 5); - assert_eq!(config.connect_timeout, 5); - } - - #[test] - fn test_s3_config() { - let content = r#"{ - "endpoint": "test", - "region": "us-east-1", - "access_key_id": "test", - "access_key_secret": "test", - "bucket_name": "antsys-nydus", - "object_prefix":"nydus_v2/" - }"#; - let config: OssConfig = serde_json::from_str(content).unwrap(); - assert_eq!(config.scheme, "https"); - assert!(!config.skip_verify); - assert_eq!(config.timeout, 5); - assert_eq!(config.connect_timeout, 5); - } - - #[test] - fn test_registry_config() { - let content = r#"{ - "scheme": "http", - "skip_verify": true, - "host": "my-registry:5000", - "repo": "test/repo", - "auth": "base64_encoded_auth", - "registry_token": "bearer_token", - "blob_redirected_host": "blob_redirected_host" - }"#; - let config: RegistryConfig = serde_json::from_str(content).unwrap(); - assert_eq!(config.scheme, "http"); - assert!(config.skip_verify); - } - - #[test] - fn test_localfs_config() { - let content = r#"{ - "blob_file": "blob_file", - "dir": "blob_dir", - "alt_dirs": ["dir1", "dir2"] - }"#; - let config: LocalFsConfig = serde_json::from_str(content).unwrap(); - assert_eq!(config.blob_file, "blob_file"); - assert_eq!(config.dir, "blob_dir"); - assert_eq!(config.alt_dirs, vec!["dir1", "dir2"]); - } - - #[test] - fn test_localdisk_config() { - let content = r#"{ - "device_path": "device_path" - }"#; - let config: LocalDiskConfig = serde_json::from_str(content).unwrap(); - assert_eq!(config.device_path, "device_path"); - } - - #[test] - fn test_backend_config() { - let config = BackendConfig { - backend_type: "localfs".to_string(), - backend_config: Default::default(), - }; - let str_val = serde_json::to_string(&config).unwrap(); - let config2 = serde_json::from_str(&str_val).unwrap(); - - assert_eq!(config, config2); - } - - #[test] - fn test_v2_version() { - let content = "version=2"; - let config: ConfigV2 = toml::from_str(content).unwrap(); - assert_eq!(config.version, 2); - assert!(config.backend.is_none()); - } - - #[test] - fn test_v2_backend() { - let content = r#"version=2 - [backend] - type = "localfs" - "#; - let config: ConfigV2 = toml::from_str(content).unwrap(); - assert_eq!(config.version, 2); - assert!(config.backend.is_some()); - assert!(config.cache.is_none()); - - let backend = config.backend.as_ref().unwrap(); - assert_eq!(&backend.backend_type, "localfs"); - assert!(backend.localfs.is_none()); - assert!(backend.oss.is_none()); - assert!(backend.registry.is_none()); - } - - #[test] - fn test_v2_backend_localfs() { - let content = r#"version=2 - [backend] - type = "localfs" - [backend.localfs] - blob_file = "/tmp/nydus.blob.data" - dir = "/tmp" - alt_dirs = ["/var/nydus/cache"] - "#; - let config: ConfigV2 = toml::from_str(content).unwrap(); - assert_eq!(config.version, 2); - assert!(config.backend.is_some()); - - let backend = config.backend.as_ref().unwrap(); - assert_eq!(&backend.backend_type, "localfs"); - assert!(backend.localfs.is_some()); - - let localfs = backend.localfs.as_ref().unwrap(); - assert_eq!(&localfs.blob_file, "/tmp/nydus.blob.data"); - assert_eq!(&localfs.dir, "/tmp"); - assert_eq!(&localfs.alt_dirs[0], "/var/nydus/cache"); - } - - #[test] - fn test_v2_backend_oss() { - let content = r#"version=2 - [backend] - type = "oss" - [backend.oss] - endpoint = "my_endpoint" - bucket_name = "my_bucket_name" - object_prefix = "my_object_prefix" - access_key_id = "my_access_key_id" - access_key_secret = "my_access_key_secret" - scheme = "http" - skip_verify = true - timeout = 10 - connect_timeout = 10 - retry_limit = 5 - [backend.oss.proxy] - url = "localhost:6789" - ping_url = "localhost:6789/ping" - fallback = true - check_interval = 10 - use_http = true - [[backend.oss.mirrors]] - host = "http://127.0.0.1:65001" - ping_url = "http://127.0.0.1:65001/ping" - health_check_interval = 10 - failure_limit = 10 - "#; - let config: ConfigV2 = toml::from_str(content).unwrap(); - assert_eq!(config.version, 2); - assert!(config.backend.is_some()); - assert!(config.rafs.is_none()); - - let backend = config.backend.as_ref().unwrap(); - assert_eq!(&backend.backend_type, "oss"); - assert!(backend.oss.is_some()); - - let oss = backend.oss.as_ref().unwrap(); - assert_eq!(&oss.endpoint, "my_endpoint"); - assert_eq!(&oss.bucket_name, "my_bucket_name"); - assert_eq!(&oss.object_prefix, "my_object_prefix"); - assert_eq!(&oss.access_key_id, "my_access_key_id"); - assert_eq!(&oss.access_key_secret, "my_access_key_secret"); - assert_eq!(&oss.scheme, "http"); - assert!(oss.skip_verify); - assert_eq!(oss.timeout, 10); - assert_eq!(oss.connect_timeout, 10); - assert_eq!(oss.retry_limit, 5); - assert_eq!(&oss.proxy.url, "localhost:6789"); - assert_eq!(&oss.proxy.ping_url, "localhost:6789/ping"); - assert_eq!(oss.proxy.check_interval, 10); - assert!(oss.proxy.fallback); - assert!(oss.proxy.use_http); - - assert_eq!(oss.mirrors.len(), 1); - let mirror = &oss.mirrors[0]; - assert_eq!(mirror.host, "http://127.0.0.1:65001"); - assert_eq!(mirror.ping_url, "http://127.0.0.1:65001/ping"); - assert!(mirror.headers.is_empty()); - assert_eq!(mirror.health_check_interval, 10); - assert_eq!(mirror.failure_limit, 10); - } - - #[test] - fn test_v2_backend_registry() { - let content = r#"version=2 - [backend] - type = "registry" - [backend.registry] - scheme = "http" - host = "localhost" - repo = "nydus" - auth = "auth" - skip_verify = true - timeout = 10 - connect_timeout = 10 - retry_limit = 5 - registry_token = "bear_token" - blob_url_scheme = "https" - blob_redirected_host = "redirect.registry.com" - [backend.registry.proxy] - url = "localhost:6789" - ping_url = "localhost:6789/ping" - fallback = true - check_interval = 10 - use_http = true - [[backend.registry.mirrors]] - host = "http://127.0.0.1:65001" - ping_url = "http://127.0.0.1:65001/ping" - health_check_interval = 10 - failure_limit = 10 - "#; - let config: ConfigV2 = toml::from_str(content).unwrap(); - assert_eq!(config.version, 2); - assert!(config.backend.is_some()); - assert!(config.rafs.is_none()); - - let backend = config.backend.as_ref().unwrap(); - assert_eq!(&backend.backend_type, "registry"); - assert!(backend.registry.is_some()); - - let registry = backend.registry.as_ref().unwrap(); - assert_eq!(®istry.scheme, "http"); - assert_eq!(®istry.host, "localhost"); - assert_eq!(®istry.repo, "nydus"); - assert_eq!(registry.auth.as_ref().unwrap(), "auth"); - assert!(registry.skip_verify); - assert_eq!(registry.timeout, 10); - assert_eq!(registry.connect_timeout, 10); - assert_eq!(registry.retry_limit, 5); - assert_eq!(registry.registry_token.as_ref().unwrap(), "bear_token"); - assert_eq!(registry.blob_url_scheme, "https"); - assert_eq!(registry.blob_redirected_host, "redirect.registry.com"); - - assert_eq!(®istry.proxy.url, "localhost:6789"); - assert_eq!(®istry.proxy.ping_url, "localhost:6789/ping"); - assert_eq!(registry.proxy.check_interval, 10); - assert!(registry.proxy.fallback); - assert!(registry.proxy.use_http); - - assert_eq!(registry.mirrors.len(), 1); - let mirror = ®istry.mirrors[0]; - assert_eq!(mirror.host, "http://127.0.0.1:65001"); - assert_eq!(mirror.ping_url, "http://127.0.0.1:65001/ping"); - assert!(mirror.headers.is_empty()); - assert_eq!(mirror.health_check_interval, 10); - assert_eq!(mirror.failure_limit, 10); - } - - #[test] - fn test_v2_cache() { - let content = r#"version=2 - [cache] - type = "filecache" - compressed = true - validate = true - [cache.filecache] - work_dir = "/tmp" - [cache.fscache] - work_dir = "./" - [cache.prefetch] - enable = true - threads = 8 - batch_size = 1000000 - bandwidth_limit = 10000000 - "#; - let config: ConfigV2 = toml::from_str(content).unwrap(); - assert_eq!(config.version, 2); - assert!(config.backend.is_none()); - assert!(config.rafs.is_none()); - assert!(config.cache.is_some()); - - let cache = config.cache.as_ref().unwrap(); - assert_eq!(&cache.cache_type, "filecache"); - assert!(cache.cache_compressed); - assert!(cache.cache_validate); - let filecache = cache.file_cache.as_ref().unwrap(); - assert_eq!(&filecache.work_dir, "/tmp"); - let fscache = cache.fs_cache.as_ref().unwrap(); - assert_eq!(&fscache.work_dir, "./"); - - let prefetch = &cache.prefetch; - assert!(prefetch.enable); - assert_eq!(prefetch.threads_count, 8); - assert_eq!(prefetch.batch_size, 1000000); - assert_eq!(prefetch.bandwidth_limit, 10000000); - } - - #[test] - fn test_v2_rafs() { - let content = r#"version=2 - [rafs] - mode = "direct" - batch_size = 1000000 - validate = true - enable_xattr = true - iostats_files = true - access_pattern = true - latest_read_files = true - [rafs.prefetch] - enable = true - threads = 4 - batch_size = 1000000 - bandwidth_limit = 10000000 - prefetch_all = true - "#; - let config: ConfigV2 = toml::from_str(content).unwrap(); - assert_eq!(config.version, 2); - assert!(config.backend.is_none()); - assert!(config.cache.is_none()); - assert!(config.rafs.is_some()); - - let rafs = config.rafs.as_ref().unwrap(); - assert_eq!(&rafs.mode, "direct"); - assert_eq!(rafs.user_io_batch_size, 1000000); - assert!(rafs.validate); - assert!(rafs.enable_xattr); - assert!(rafs.iostats_files); - assert!(rafs.access_pattern); - assert!(rafs.latest_read_files); - assert!(rafs.prefetch.enable); - assert_eq!(rafs.prefetch.threads_count, 4); - assert_eq!(rafs.prefetch.batch_size, 1000000); - assert_eq!(rafs.prefetch.bandwidth_limit, 10000000); - assert!(rafs.prefetch.prefetch_all) - } - - #[test] - fn test_v2_blob_cache_entry() { - let content = r#"version=2 - id = "my_id" - metadata_path = "meta_path" - [backend] - type = "localfs" - [backend.localfs] - blob_file = "/tmp/nydus.blob.data" - dir = "/tmp" - alt_dirs = ["/var/nydus/cache"] - [cache] - type = "filecache" - compressed = true - validate = true - [cache.filecache] - work_dir = "/tmp" - "#; - let config: BlobCacheEntryConfigV2 = toml::from_str(content).unwrap(); - assert_eq!(config.version, 2); - assert_eq!(&config.id, "my_id"); - assert_eq!(config.metadata_path.as_ref().unwrap(), "meta_path"); - - let backend = &config.backend; - assert_eq!(&backend.backend_type, "localfs"); - assert!(backend.localfs.is_some()); - - let localfs = backend.localfs.as_ref().unwrap(); - assert_eq!(&localfs.blob_file, "/tmp/nydus.blob.data"); - assert_eq!(&localfs.dir, "/tmp"); - assert_eq!(&localfs.alt_dirs[0], "/var/nydus/cache"); - } - - #[test] - fn test_sample_config_file() { - let content = r#"{ - "device": { - "backend": { - "type": "localfs", - "config": { - "dir": "/tmp/AM7TxD/blobs", - "readahead": true - } - }, - "cache": { - "type": "blobcache", - "compressed": true, - "config": { - "work_dir": "/tmp/AM7TxD/cache" - } - } - }, - "mode": "cached", - "digest_validate": true, - "iostats_files": false - } - "#; - let config = ConfigV2::from_str(content).unwrap(); - assert_eq!(&config.id, ""); - } - - #[test] - fn test_snapshotter_sample_config() { - let content = r#" - { - "device": { - "backend": { - "type": "registry", - "config": { - "readahead": false, - "host": "localhost", - "repo": "vke/golang", - "auth": "", - "scheme": "https", - "proxy": { - "fallback": false - }, - "timeout": 5, - "connect_timeout": 5, - "retry_limit": 2 - } - }, - "cache": { - "type": "blobcache", - "compressed": true, - "config": { - "work_dir": "/var/lib/containerd-nydus/cache", - "disable_indexed_map": false - } - } - }, - "mode": "direct", - "digest_validate": false, - "enable_xattr": true, - "fs_prefetch": { - "enable": true, - "prefetch_all": true, - "threads_count": 8, - "merging_size": 1048576, - "bandwidth_rate": 0 - } - } - "#; - let config = ConfigV2::from_str(content).unwrap(); - assert_eq!(&config.id, ""); - } - - #[test] - fn test_backend_http_proxy_config() { - let config = - r#"{"version":2,"backend":{"type":"http-proxy","http-proxy":{"addr":"/tmp"}}}"#; - let config = ConfigV2::from_str(config).unwrap(); - let backend = config.backend.unwrap(); - assert_eq!(&backend.backend_type, "http-proxy"); - assert_eq!(&backend.http_proxy.unwrap().addr, "/tmp"); - } - - #[test] - fn test_new_localfs() { - let config = ConfigV2::new_localfs("id1", "./").unwrap(); - assert_eq!(&config.id, "id1"); - assert_eq!(config.backend.as_ref().unwrap().backend_type, "localfs"); - } - - #[test] - fn test_update_registry_auth_info() { - let config = r#" - { - "device": { - "id": "test", - "backend": { - "type": "registry", - "config": { - "readahead": false, - "host": "docker.io", - "repo": "library/nginx", - "scheme": "https", - "proxy": { - "fallback": false - }, - "timeout": 5, - "connect_timeout": 5, - "retry_limit": 8 - } - } - }, - "mode": "direct", - "digest_validate": false, - "enable_xattr": true, - "fs_prefetch": { - "enable": true, - "threads_count": 10, - "merging_size": 131072, - "bandwidth_rate": 10485760 - } - }"#; - - let mut rafs_config = ConfigV2::from_str(&config).unwrap(); - let test_auth = "test_auth".to_string(); - - rafs_config.update_registry_auth_info(&Some(test_auth.clone())); - - let backend = rafs_config.backend.unwrap(); - let registry = backend.registry.unwrap(); - let auth = registry.auth.unwrap(); - assert_eq!(auth, test_auth); - } - - #[test] - fn test_config2_error() { - let content_bad_version = r#"version=3 - "#; - let cfg: ConfigV2 = toml::from_str(content_bad_version).unwrap(); - assert!(!cfg.validate()); - let cfg = ConfigV2::new("id"); - assert!(cfg.get_backend_config().is_err()); - assert!(cfg.get_cache_config().is_err()); - assert!(cfg.get_rafs_config().is_err()); - assert!(cfg.get_cache_working_directory().is_err()); - - let content = r#"version=2 - [cache] - type = "filecache" - [cache.filecache] - work_dir = "/tmp" - "#; - let cfg: ConfigV2 = toml::from_str(content).unwrap(); - assert_eq!(cfg.get_cache_working_directory().unwrap(), "/tmp"); - - let content = r#"version=2 - [cache] - type = "fscache" - [cache.fscache] - work_dir = "./foo" - "#; - let cfg: ConfigV2 = toml::from_str(content).unwrap(); - assert_eq!(cfg.get_cache_working_directory().unwrap(), "./foo"); - - let content = r#"version=2 - [cache] - type = "bar" - "#; - let cfg: ConfigV2 = toml::from_str(content).unwrap(); - assert!(cfg.get_cache_working_directory().is_err()); - - let content = r#" - foo-bar-xxxx - "#; - assert!(toml::from_str::(content).is_err()); - } - - #[test] - fn test_backend_config_valid() { - let mut cfg = BackendConfigV2 { - backend_type: "localdisk".to_string(), - ..Default::default() - }; - assert!(!cfg.validate()); - cfg.localdisk = Some(LocalDiskConfig { - device_path: "".to_string(), - disable_gpt: true, - }); - assert!(!cfg.validate()); - - let cfg = BackendConfigV2 { - backend_type: "localfs".to_string(), - ..Default::default() - }; - assert!(!cfg.validate()); - - let cfg = BackendConfigV2 { - backend_type: "oss".to_string(), - ..Default::default() - }; - assert!(!cfg.validate()); - - let cfg = BackendConfigV2 { - backend_type: "s3".to_string(), - ..Default::default() - }; - assert!(!cfg.validate()); - - let cfg = BackendConfigV2 { - backend_type: "register".to_string(), - ..Default::default() - }; - assert!(!cfg.validate()); - - let cfg = BackendConfigV2 { - backend_type: "http-proxy".to_string(), - ..Default::default() - }; - assert!(!cfg.validate()); - - let cfg = BackendConfigV2 { - backend_type: "foobar".to_string(), - ..Default::default() - }; - assert!(!cfg.validate()); - } - - fn get_config(backend_type: &str) { - let mut cfg: BackendConfigV2 = BackendConfigV2::default(); - assert!(cfg.get_localdisk_config().is_err()); - - cfg.backend_type = backend_type.to_string(); - assert!(cfg.get_localdisk_config().is_err()); - } - - #[test] - fn test_get_config() { - get_config("localdisk"); - get_config("localfs"); - get_config("oss"); - get_config("s3"); - get_config("register"); - get_config("http-proxy"); - } - - #[test] - fn test_cache_config_valid() { - let cfg = CacheConfigV2 { - cache_type: "blobcache".to_string(), - ..Default::default() - }; - assert!(!cfg.validate()); - - let cfg = CacheConfigV2 { - cache_type: "fscache".to_string(), - ..Default::default() - }; - assert!(!cfg.validate()); - - let cfg = CacheConfigV2 { - cache_type: "dummycache".to_string(), - ..Default::default() - }; - assert!(cfg.validate()); - - let cfg = CacheConfigV2 { - cache_type: "foobar".to_string(), - ..Default::default() - }; - assert!(!cfg.validate()); - } - - #[test] - fn test_get_fscache_config() { - let mut cfg = CacheConfigV2::default(); - assert!(cfg.get_fscache_config().is_err()); - cfg.cache_type = "fscache".to_string(); - assert!(cfg.get_fscache_config().is_err()); - } - - #[test] - fn test_fscache_get_work_dir() { - let mut cfg = FsCacheConfig::default(); - assert!(cfg.get_work_dir().is_err()); - cfg.work_dir = ".".to_string(); - assert!(cfg.get_work_dir().is_ok()); - cfg.work_dir = "foobar".to_string(); - let res = cfg.get_work_dir().is_ok(); - fs::remove_dir_all("foobar").unwrap(); - assert!(res); - } - - #[test] - fn test_default_mirror_config() { - let cfg = MirrorConfig::default(); - assert_eq!(cfg.host, ""); - assert_eq!(cfg.health_check_interval, 5); - assert_eq!(cfg.failure_limit, 5); - assert_eq!(cfg.ping_url, ""); - } - - #[test] - fn test_config_v2_from_file() { - let content = r#"version=2 - [cache] - type = "filecache" - [cache.filecache] - work_dir = "/tmp" - "#; - if fs::write("test_config_v2_from_file.cfg", content).is_ok() { - let res = ConfigV2::from_file("test_config_v2_from_file.cfg").is_ok(); - fs::remove_file("test_config_v2_from_file.cfg").unwrap(); - assert!(res); - } else { - assert!(ConfigV2::from_file("test_config_v2_from_file.cfg").is_err()); - } - } - - #[test] - fn test_blob_cache_entry_v2_from_file() { - let content = r#"version=2 - id = "my_id" - metadata_path = "meta_path" - [backend] - type = "localfs" - [backend.localfs] - blob_file = "/tmp/nydus.blob.data" - dir = "/tmp" - alt_dirs = ["/var/nydus/cache"] - [cache] - type = "filecache" - compressed = true - validate = true - [cache.filecache] - work_dir = "/tmp" - "#; - if fs::write("test_blob_cache_entry_v2_from_file.cfg", content).is_ok() { - let res = - BlobCacheEntryConfigV2::from_file("test_blob_cache_entry_v2_from_file.cfg").is_ok(); - fs::remove_file("test_blob_cache_entry_v2_from_file.cfg").unwrap(); - assert!(res); - } else { - assert!(ConfigV2::from_file("test_blob_cache_entry_v2_from_file.cfg").is_err()); - } - } - - #[test] - fn test_blob_cache_valid() { - let err_version_content = r#"version=1"#; - - let config: BlobCacheEntryConfigV2 = toml::from_str(err_version_content).unwrap(); - assert!(!config.validate()); - - let content = r#"version=2 - id = "my_id" - metadata_path = "meta_path" - [backend] - type = "localfs" - [backend.localfs] - blob_file = "/tmp/nydus.blob.data" - dir = "/tmp" - alt_dirs = ["/var/nydus/cache"] - [cache] - type = "filecache" - compressed = true - validate = true - [cache.filecache] - work_dir = "/tmp" - "#; - - let config: BlobCacheEntryConfigV2 = toml::from_str(content).unwrap(); - assert!(config.validate()); - } - - #[test] - fn test_blob_from_str() { - let content = r#"version=2 - id = "my_id" - metadata_path = "meta_path" - [backend] - type = "localfs" - [backend.localfs] - blob_file = "/tmp/nydus.blob.data" - dir = "/tmp" - alt_dirs = ["/var/nydus/cache"] - [cache] - type = "filecache" - compressed = true - validate = true - [cache.filecache] - work_dir = "/tmp" - "#; - - let config: BlobCacheEntryConfigV2 = BlobCacheEntryConfigV2::from_str(content).unwrap(); - assert_eq!(config.version, 2); - assert_eq!(config.id, "my_id"); - assert_eq!(config.backend.localfs.unwrap().dir, "/tmp"); - assert_eq!(config.cache.file_cache.unwrap().work_dir, "/tmp"); - let content = r#" - { - "version": 2, - "id": "my_id", - "backend": { - "type": "localfs", - "localfs": { - "dir": "/tmp" - } - } - } - "#; - let config: BlobCacheEntryConfigV2 = BlobCacheEntryConfigV2::from_str(content).unwrap(); - - assert_eq!(config.version, 2); - assert_eq!(config.id, "my_id"); - assert_eq!(config.backend.localfs.unwrap().dir, "/tmp"); - - let content = r#"foobar"#; - assert!(BlobCacheEntryConfigV2::from_str(content).is_err()); - } - - #[test] - fn test_blob_cache_entry_from_file() { - let content = r#"{ - "type": "bootstrap", - "id": "blob1", - "config": { - "id": "cache1", - "backend_type": "localfs", - "backend_config": {}, - "cache_type": "fscache", - "cache_config": {}, - "metadata_path": "/tmp/metadata1" - }, - "domain_id": "domain1" - }"#; - if fs::write("test_blob_cache_entry_from_file.cfg", content).is_ok() { - let res = BlobCacheEntry::from_file("test_blob_cache_entry_from_file.cfg").is_ok(); - fs::remove_file("test_blob_cache_entry_from_file.cfg").unwrap(); - assert!(res); - } else { - assert!(ConfigV2::from_file("test_blob_cache_entry_from_file.cfg").is_err()); - } - } - - #[test] - fn test_blob_cache_entry_valid() { - let content = r#"{ - "type": "bootstrap", - "id": "blob1", - "config": { - "id": "cache1", - "backend_type": "localfs", - "backend_config": {}, - "cache_type": "fscache", - "cache_config": {}, - "metadata_path": "/tmp/metadata1" - }, - "domain_id": "domain1" - }"#; - let mut cfg = BlobCacheEntry::from_str(content).unwrap(); - cfg.blob_type = "foobar".to_string(); - assert!(!cfg.validate()); - - let content = r#"{ - "type": "bootstrap", - "id": "blob1", - "domain_id": "domain1" - }"#; - let cfg = BlobCacheEntry::from_str(content).unwrap(); - assert!(cfg.validate()); - } - - #[test] - fn test_blob_cache_entry_from_str() { - let content = r#"{ - "type": "bootstrap", - "id": "blob1", - "config": { - "id": "cache1", - "backend_type": "localfs", - "backend_config": {}, - "cache_type": "fscache", - "cache_config": {}, - "metadata_path": "/tmp/metadata1" - }, - "domain_id": "domain1" - }"#; - assert!(BlobCacheEntry::from_str(content).is_ok()); - let content = r#"{ - "type": "foobar", - "id": "blob1", - "config": { - "id": "cache1", - "backend_type": "foobar", - "backend_config": {}, - "cache_type": "foobar", - "cache_config": {}, - "metadata_path": "/tmp/metadata1" - }, - "domain_id": "domain1" - }"#; - assert!(BlobCacheEntry::from_str(content).is_err()); - - let content = r#"foobar"#; - assert!(BlobCacheEntry::from_str(content).is_err()); - } - - #[test] - fn test_default_value() { - assert!(default_true()); - assert_eq!(default_failure_limit(), 5); - assert_eq!(default_prefetch_batch_size(), 1024 * 1024); - assert_eq!(default_prefetch_threads_count(), 8); - } - - #[test] - fn test_backend_config_try_from() { - let config = BackendConfig { - backend_type: "localdisk".to_string(), - backend_config: serde_json::to_value(LocalDiskConfig::default()).unwrap(), - }; - assert!(BackendConfigV2::try_from(&config).is_ok()); - - let config = BackendConfig { - backend_type: "localfs".to_string(), - backend_config: serde_json::to_value(LocalFsConfig::default()).unwrap(), - }; - assert!(BackendConfigV2::try_from(&config).is_ok()); - - let config = BackendConfig { - backend_type: "oss".to_string(), - backend_config: serde_json::to_value(OssConfig::default()).unwrap(), - }; - assert!(BackendConfigV2::try_from(&config).is_ok()); - - let config = BackendConfig { - backend_type: "s3".to_string(), - backend_config: serde_json::to_value(S3Config::default()).unwrap(), - }; - assert!(BackendConfigV2::try_from(&config).is_ok()); - - let config = BackendConfig { - backend_type: "registry".to_string(), - backend_config: serde_json::to_value(RegistryConfig::default()).unwrap(), - }; - assert!(BackendConfigV2::try_from(&config).is_ok()); - - let config = BackendConfig { - backend_type: "foobar".to_string(), - backend_config: serde_json::to_value(LocalDiskConfig::default()).unwrap(), - }; - assert!(BackendConfigV2::try_from(&config).is_err()); - } -} +// Copyright 2022 Alibaba Cloud. All rights reserved. +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::collections::HashMap; +use std::convert::{TryFrom, TryInto}; +use std::fs; +use std::io::{Error, ErrorKind, Result}; +use std::path::Path; +use std::str::FromStr; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; + +use serde::Deserialize; +use serde_json::Value; + +/// Configuration file format version 2, based on Toml. +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct ConfigV2 { + /// Configuration file format version number, must be 2. + pub version: u32, + /// Identifier for the instance. + #[serde(default)] + pub id: String, + /// Configuration information for storage backend. + pub backend: Option, + /// Configuration information for local cache system. + pub cache: Option, + /// Configuration information for RAFS filesystem. + pub rafs: Option, + /// Overlay configuration information for the instance. + pub overlay: Option, + /// Internal runtime configuration. + #[serde(skip)] + pub internal: ConfigV2Internal, +} + +impl Default for ConfigV2 { + fn default() -> Self { + ConfigV2 { + version: 2, + id: String::new(), + backend: None, + cache: None, + rafs: None, + overlay: None, + internal: ConfigV2Internal::default(), + } + } +} + +impl ConfigV2 { + /// Create a new instance of `ConfigV2` object. + pub fn new(id: &str) -> Self { + ConfigV2 { + version: 2, + id: id.to_string(), + backend: None, + cache: None, + rafs: None, + overlay: None, + internal: ConfigV2Internal::default(), + } + } + + /// Create a new configuration object for `backend-localfs` and `filecache`. + pub fn new_localfs(id: &str, dir: &str) -> Result { + let content = format!( + r#" + version = 2 + id = "{}" + backend.type = "localfs" + backend.localfs.dir = "{}" + cache.type = "filecache" + cache.compressed = false + cache.validate = false + cache.filecache.work_dir = "{}" + "#, + id, dir, dir + ); + + Self::from_str(&content) + } + + /// Read configuration information from a file. + pub fn from_file>(path: P) -> Result { + let md = fs::metadata(path.as_ref())?; + if md.len() > 0x100000 { + return Err(Error::new( + ErrorKind::Other, + "configuration file size is too big", + )); + } + let content = fs::read_to_string(path)?; + Self::from_str(&content) + } + + /// Validate the configuration object. + pub fn validate(&self) -> bool { + if self.version != 2 { + return false; + } + if let Some(backend_cfg) = self.backend.as_ref() { + if !backend_cfg.validate() { + return false; + } + } + if let Some(cache_cfg) = self.cache.as_ref() { + if !cache_cfg.validate() { + return false; + } + } + if let Some(rafs_cfg) = self.rafs.as_ref() { + if !rafs_cfg.validate() { + return false; + } + } + + true + } + + /// Get configuration information for storage backend. + pub fn get_backend_config(&self) -> Result<&BackendConfigV2> { + self.backend.as_ref().ok_or_else(|| { + Error::new( + ErrorKind::InvalidInput, + "no configuration information for backend", + ) + }) + } + + /// Get configuration information for cache subsystem. + pub fn get_cache_config(&self) -> Result<&CacheConfigV2> { + self.cache.as_ref().ok_or_else(|| { + Error::new( + ErrorKind::InvalidData, + "no configuration information for cache", + ) + }) + } + + /// Get cache working directory. + pub fn get_cache_working_directory(&self) -> Result { + let cache = self.get_cache_config()?; + if cache.is_filecache() { + if let Some(c) = cache.file_cache.as_ref() { + return Ok(c.work_dir.clone()); + } + } else if cache.is_fscache() { + if let Some(c) = cache.fs_cache.as_ref() { + return Ok(c.work_dir.clone()); + } + } + + Err(Error::new( + ErrorKind::NotFound, + "no working directory configured", + )) + } + + /// Get configuration information for RAFS filesystem. + pub fn get_rafs_config(&self) -> Result<&RafsConfigV2> { + self.rafs.as_ref().ok_or_else(|| { + Error::new( + ErrorKind::InvalidInput, + "no configuration information for rafs", + ) + }) + } + + /// Clone the object with all secrets removed. + pub fn clone_without_secrets(&self) -> Self { + let mut cfg = self.clone(); + + if let Some(backend_cfg) = cfg.backend.as_mut() { + if let Some(oss_cfg) = backend_cfg.oss.as_mut() { + oss_cfg.access_key_id = String::new(); + oss_cfg.access_key_secret = String::new(); + } + if let Some(registry_cfg) = backend_cfg.registry.as_mut() { + registry_cfg.auth = None; + registry_cfg.registry_token = None; + } + } + + cfg + } + + /// Check whether chunk digest validation is enabled or not. + pub fn is_chunk_validation_enabled(&self) -> bool { + let mut validation = if let Some(cache) = &self.cache { + cache.cache_validate + } else { + false + }; + if let Some(rafs) = &self.rafs { + if rafs.validate { + validation = true; + } + } + + validation + } + + /// Check whether fscache is enabled or not. + pub fn is_fs_cache(&self) -> bool { + if let Some(cache) = self.cache.as_ref() { + cache.fs_cache.is_some() + } else { + false + } + } + + /// Fill authorization for registry backend. + pub fn update_registry_auth_info(&mut self, auth: &Option) { + if let Some(auth) = auth { + if let Some(backend) = self.backend.as_mut() { + if let Some(registry) = backend.registry.as_mut() { + registry.auth = Some(auth.to_string()); + } + } + } + } +} + +impl FromStr for ConfigV2 { + type Err = std::io::Error; + + fn from_str(s: &str) -> Result { + if let Ok(v) = serde_json::from_str::(s) { + return if v.validate() { + Ok(v) + } else { + Err(Error::new(ErrorKind::InvalidInput, "invalid configuration")) + }; + } + if let Ok(v) = toml::from_str::(s) { + return if v.validate() { + Ok(v) + } else { + Err(Error::new(ErrorKind::InvalidInput, "invalid configuration")) + }; + } + if let Ok(v) = serde_json::from_str::(s) { + if let Ok(v) = ConfigV2::try_from(v) { + if v.validate() { + return Ok(v); + } + } + } + Err(Error::new( + ErrorKind::InvalidInput, + "failed to parse configuration information", + )) + } +} + +/// Configuration information for storage backend. +#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] +pub struct BackendConfigV2 { + /// Type of storage backend. + #[serde(rename = "type")] + pub backend_type: String, + /// Configuration for local disk backend. + pub localdisk: Option, + /// Configuration for local filesystem backend. + pub localfs: Option, + /// Configuration for OSS backend. + pub oss: Option, + /// Configuration for S3 backend. + pub s3: Option, + /// Configuration for container registry backend. + pub registry: Option, + /// Configuration for local http proxy. + #[serde(rename = "http-proxy")] + pub http_proxy: Option, +} + +impl BackendConfigV2 { + /// Validate storage backend configuration. + pub fn validate(&self) -> bool { + match self.backend_type.as_str() { + "localdisk" => match self.localdisk.as_ref() { + Some(v) => { + if v.device_path.is_empty() { + return false; + } + } + None => return false, + }, + "localfs" => match self.localfs.as_ref() { + Some(v) => { + if v.blob_file.is_empty() && v.dir.is_empty() { + return false; + } + } + None => return false, + }, + "oss" => match self.oss.as_ref() { + Some(v) => { + if v.endpoint.is_empty() || v.bucket_name.is_empty() { + return false; + } + } + None => return false, + }, + "s3" => match self.s3.as_ref() { + Some(v) => { + if v.region.is_empty() || v.bucket_name.is_empty() { + return false; + } + } + None => return false, + }, + "registry" => match self.registry.as_ref() { + Some(v) => { + if v.host.is_empty() || v.repo.is_empty() { + return false; + } + } + None => return false, + }, + + "http-proxy" => match self.http_proxy.as_ref() { + Some(v) => { + let is_valid_unix_socket_path = |path: &str| { + let path = Path::new(path); + path.is_absolute() && path.exists() + }; + if v.addr.is_empty() + || !(v.addr.starts_with("http://") + || v.addr.starts_with("https://") + || is_valid_unix_socket_path(&v.addr)) + { + return false; + } + + // check if v.path is valid url path format + if Path::new(&v.path).join("any_blob_id").to_str().is_none() { + return false; + } + } + None => return false, + }, + _ => return false, + } + + true + } + + /// Get configuration information for localdisk + pub fn get_localdisk_config(&self) -> Result<&LocalDiskConfig> { + if &self.backend_type != "localdisk" { + Err(Error::new( + ErrorKind::InvalidInput, + "backend type is not 'localdisk'", + )) + } else { + self.localdisk.as_ref().ok_or_else(|| { + Error::new( + ErrorKind::InvalidData, + "no configuration information for localdisk", + ) + }) + } + } + + /// Get configuration information for localfs + pub fn get_localfs_config(&self) -> Result<&LocalFsConfig> { + if &self.backend_type != "localfs" { + Err(Error::new( + ErrorKind::InvalidInput, + "backend type is not 'localfs'", + )) + } else { + self.localfs.as_ref().ok_or_else(|| { + Error::new( + ErrorKind::InvalidData, + "no configuration information for localfs", + ) + }) + } + } + + /// Get configuration information for OSS + pub fn get_oss_config(&self) -> Result<&OssConfig> { + if &self.backend_type != "oss" { + Err(Error::new( + ErrorKind::InvalidInput, + "backend type is not 'oss'", + )) + } else { + self.oss.as_ref().ok_or_else(|| { + Error::new( + ErrorKind::InvalidData, + "no configuration information for OSS", + ) + }) + } + } + + /// Get configuration information for S3 + pub fn get_s3_config(&self) -> Result<&S3Config> { + if &self.backend_type != "s3" { + Err(Error::new( + ErrorKind::InvalidInput, + "backend type is not 's3'", + )) + } else { + self.s3.as_ref().ok_or_else(|| { + Error::new( + ErrorKind::InvalidData, + "no configuration information for s3", + ) + }) + } + } + + /// Get configuration information for Registry + pub fn get_registry_config(&self) -> Result<&RegistryConfig> { + if &self.backend_type != "registry" { + Err(Error::new( + ErrorKind::InvalidInput, + "backend type is not 'registry'", + )) + } else { + self.registry.as_ref().ok_or_else(|| { + Error::new( + ErrorKind::InvalidData, + "no configuration information for registry", + ) + }) + } + } + + /// Get configuration information for http proxy + pub fn get_http_proxy_config(&self) -> Result<&HttpProxyConfig> { + if &self.backend_type != "http-proxy" { + Err(Error::new( + ErrorKind::InvalidInput, + "backend type is not 'http-proxy'", + )) + } else { + self.http_proxy.as_ref().ok_or_else(|| { + Error::new( + ErrorKind::InvalidData, + "no configuration information for http-proxy", + ) + }) + } + } +} + +/// Configuration information for localdisk storage backend. +#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] +pub struct LocalDiskConfig { + /// Mounted block device path or original localdisk image file path. + #[serde(default)] + pub device_path: String, + /// Disable discover blob objects by scanning GPT table. + #[serde(default)] + pub disable_gpt: bool, +} + +/// Configuration information for localfs storage backend. +#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] +pub struct LocalFsConfig { + /// Blob file to access. + #[serde(default)] + pub blob_file: String, + /// Dir to hold blob files. Used when 'blob_file' is not specified. + #[serde(default)] + pub dir: String, + /// Alternative dirs to search for blobs. + #[serde(default)] + pub alt_dirs: Vec, +} + +/// OSS configuration information to access blobs. +#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] +pub struct OssConfig { + /// Oss http scheme, either 'http' or 'https' + #[serde(default = "default_http_scheme")] + pub scheme: String, + /// Oss endpoint + pub endpoint: String, + /// Oss bucket name + pub bucket_name: String, + /// Prefix object_prefix to OSS object key, for example the simulation of subdirectory: + /// - object_key: sha256:xxx + /// - object_prefix: nydus/ + /// - object_key with object_prefix: nydus/sha256:xxx + #[serde(default)] + pub object_prefix: String, + /// Oss access key + #[serde(default)] + pub access_key_id: String, + /// Oss secret + #[serde(default)] + pub access_key_secret: String, + /// Skip SSL certificate validation for HTTPS scheme. + #[serde(default)] + pub skip_verify: bool, + /// Drop the read request once http request timeout, in seconds. + #[serde(default = "default_http_timeout")] + pub timeout: u32, + /// Drop the read request once http connection timeout, in seconds. + #[serde(default = "default_http_timeout")] + pub connect_timeout: u32, + /// Retry count when read request failed. + #[serde(default)] + pub retry_limit: u8, + /// Enable HTTP proxy for the read request. + #[serde(default)] + pub proxy: ProxyConfig, + /// Enable mirrors for the read request. + #[serde(default)] + pub mirrors: Vec, +} + +/// S3 configuration information to access blobs. +#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] +pub struct S3Config { + /// S3 http scheme, either 'http' or 'https' + #[serde(default = "default_http_scheme")] + pub scheme: String, + /// S3 endpoint + pub endpoint: String, + /// S3 region + pub region: String, + /// S3 bucket name + pub bucket_name: String, + /// Prefix object_prefix to S3 object key, for example the simulation of subdirectory: + /// - object_key: sha256:xxx + /// - object_prefix: nydus/ + /// - object_key with object_prefix: nydus/sha256:xxx + #[serde(default)] + pub object_prefix: String, + /// S3 access key + #[serde(default)] + pub access_key_id: String, + /// S3 secret + #[serde(default)] + pub access_key_secret: String, + /// Skip SSL certificate validation for HTTPS scheme. + #[serde(default)] + pub skip_verify: bool, + /// Drop the read request once http request timeout, in seconds. + #[serde(default = "default_http_timeout")] + pub timeout: u32, + /// Drop the read request once http connection timeout, in seconds. + #[serde(default = "default_http_timeout")] + pub connect_timeout: u32, + /// Retry count when read request failed. + #[serde(default)] + pub retry_limit: u8, + /// Enable HTTP proxy for the read request. + #[serde(default)] + pub proxy: ProxyConfig, + /// Enable mirrors for the read request. + #[serde(default)] + pub mirrors: Vec, +} + +/// Http proxy configuration information to access blobs. +#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] +pub struct HttpProxyConfig { + /// Address of http proxy server, like `http://xxx.xxx` or `https://xxx.xxx` or `/path/to/unix.sock`. + pub addr: String, + /// Path to access the blobs, like `/<_namespace>/<_repo>/blobs`. + /// If the http proxy server is over unix socket, this field will be ignored. + #[serde(default)] + pub path: String, + /// Skip SSL certificate validation for HTTPS scheme. + #[serde(default)] + pub skip_verify: bool, + /// Drop the read request once http request timeout, in seconds. + #[serde(default = "default_http_timeout")] + pub timeout: u32, + /// Drop the read request once http connection timeout, in seconds. + #[serde(default = "default_http_timeout")] + pub connect_timeout: u32, + /// Retry count when read request failed. + #[serde(default)] + pub retry_limit: u8, + /// Enable HTTP proxy for the read request. + #[serde(default)] + pub proxy: ProxyConfig, + /// Enable mirrors for the read request. + #[serde(default)] + pub mirrors: Vec, +} + +/// Container registry configuration information to access blobs. +#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] +pub struct RegistryConfig { + /// Registry http scheme, either 'http' or 'https' + #[serde(default = "default_http_scheme")] + pub scheme: String, + /// Registry url host + pub host: String, + /// Registry image name, like 'library/ubuntu' + pub repo: String, + /// Base64_encoded(username:password), the field should be sent to registry auth server to get a bearer token. + #[serde(default)] + pub auth: Option, + /// Skip SSL certificate validation for HTTPS scheme. + #[serde(default)] + pub skip_verify: bool, + /// Drop the read request once http request timeout, in seconds. + #[serde(default = "default_http_timeout")] + pub timeout: u32, + /// Drop the read request once http connection timeout, in seconds. + #[serde(default = "default_http_timeout")] + pub connect_timeout: u32, + /// Retry count when read request failed. + #[serde(default)] + pub retry_limit: u8, + /// The field is a bearer token to be sent to registry to authorize registry requests. + #[serde(default)] + pub registry_token: Option, + /// The http scheme to access blobs. It is used to workaround some P2P subsystem + /// that requires a different scheme than the registry. + #[serde(default)] + pub blob_url_scheme: String, + /// Redirect blob access to a different host regardless of the one specified in 'host'. + #[serde(default)] + pub blob_redirected_host: String, + /// Enable HTTP proxy for the read request. + #[serde(default)] + pub proxy: ProxyConfig, + /// Enable mirrors for the read request. + #[serde(default)] + pub mirrors: Vec, +} + +/// Configuration information for blob cache manager. +#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] +pub struct CacheConfigV2 { + /// Type of blob cache: "blobcache", "fscache" or "dummy" + #[serde(default, rename = "type")] + pub cache_type: String, + /// Whether the data from the cache is compressed, not used anymore. + #[serde(default, rename = "compressed")] + pub cache_compressed: bool, + /// Whether to validate data read from the cache. + #[serde(default, rename = "validate")] + pub cache_validate: bool, + /// Configuration for blob level prefetch. + #[serde(default)] + pub prefetch: PrefetchConfigV2, + /// Configuration information for file cache + #[serde(rename = "filecache")] + pub file_cache: Option, + #[serde(rename = "fscache")] + /// Configuration information for fscache + pub fs_cache: Option, +} + +impl CacheConfigV2 { + /// Validate cache configuration information. + pub fn validate(&self) -> bool { + match self.cache_type.as_str() { + "blobcache" | "filecache" => { + if let Some(c) = self.file_cache.as_ref() { + if c.work_dir.is_empty() { + return false; + } + } else { + return false; + } + } + "fscache" => { + if let Some(c) = self.fs_cache.as_ref() { + if c.work_dir.is_empty() { + return false; + } + } else { + return false; + } + } + "" | "dummycache" => {} + _ => return false, + } + + if self.prefetch.enable { + if self.prefetch.batch_size > 0x10000000 { + return false; + } + if self.prefetch.threads_count == 0 || self.prefetch.threads_count > 1024 { + return false; + } + } + + true + } + + /// Check whether the cache type is `filecache` + pub fn is_filecache(&self) -> bool { + self.cache_type == "blobcache" || self.cache_type == "filecache" + } + + /// Check whether the cache type is `fscache` + pub fn is_fscache(&self) -> bool { + self.cache_type == "fscache" + } + + /// Get configuration information for file cache. + pub fn get_filecache_config(&self) -> Result<&FileCacheConfig> { + if self.is_filecache() { + self.file_cache.as_ref().ok_or_else(|| { + Error::new( + ErrorKind::InvalidInput, + "no configuration information for filecache", + ) + }) + } else { + Err(Error::new( + ErrorKind::InvalidData, + "cache type is not 'filecache'", + )) + } + } + + /// Get configuration information for fscache. + pub fn get_fscache_config(&self) -> Result<&FsCacheConfig> { + if self.is_fscache() { + self.fs_cache.as_ref().ok_or_else(|| { + Error::new( + ErrorKind::InvalidData, + "no configuration information for fscache", + ) + }) + } else { + Err(Error::new( + ErrorKind::InvalidInput, + "cache type is not 'fscache'", + )) + } + } +} + +/// Configuration information for file cache. +#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] +pub struct FileCacheConfig { + /// Working directory to store state and cached files. + #[serde(default = "default_work_dir")] + pub work_dir: String, + /// Deprecated: disable index mapping, keep it as false when possible. + #[serde(default)] + pub disable_indexed_map: bool, + /// Enable encryption data written to the cache file. + #[serde(default)] + pub enable_encryption: bool, + /// Enable convergent encryption for chunk deduplication. + #[serde(default)] + pub enable_convergent_encryption: bool, + /// Key for data encryption, a heximal representation of [u8; 32]. + #[serde(default)] + pub encryption_key: String, +} + +impl FileCacheConfig { + /// Get the working directory. + pub fn get_work_dir(&self) -> Result<&str> { + let path = fs::metadata(&self.work_dir) + .or_else(|_| { + fs::create_dir_all(&self.work_dir)?; + fs::metadata(&self.work_dir) + }) + .map_err(|e| { + log::error!("fail to stat filecache work_dir {}: {}", self.work_dir, e); + e + })?; + + if path.is_dir() { + Ok(&self.work_dir) + } else { + Err(Error::new( + ErrorKind::NotFound, + format!("filecache work_dir {} is not a directory", self.work_dir), + )) + } + } +} + +/// Configuration information for fscache. +#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] +pub struct FsCacheConfig { + /// Working directory to store state and cached files. + #[serde(default = "default_work_dir")] + pub work_dir: String, +} + +impl FsCacheConfig { + /// Get the working directory. + pub fn get_work_dir(&self) -> Result<&str> { + let path = fs::metadata(&self.work_dir) + .or_else(|_| { + fs::create_dir_all(&self.work_dir)?; + fs::metadata(&self.work_dir) + }) + .map_err(|e| { + log::error!("fail to stat fscache work_dir {}: {}", self.work_dir, e); + e + })?; + + if path.is_dir() { + Ok(&self.work_dir) + } else { + Err(Error::new( + ErrorKind::NotFound, + format!("fscache work_dir {} is not a directory", self.work_dir), + )) + } + } +} + +/// Configuration information for RAFS filesystem. +#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] +pub struct RafsConfigV2 { + /// Filesystem metadata cache mode. + #[serde(default = "default_rafs_mode")] + pub mode: String, + /// Amplified user IO request batch size to read data from remote storage backend / local cache. + #[serde(rename = "batch_size", default = "default_user_io_batch_size")] + pub user_io_batch_size: usize, + /// Whether to validate data digest. + #[serde(default)] + pub validate: bool, + /// Enable support of extended attributes. + #[serde(default)] + pub enable_xattr: bool, + /// Record file operation metrics for each file. + /// + /// Better to keep it off in production environment due to possible resource consumption. + #[serde(default)] + pub iostats_files: bool, + /// Record filesystem access pattern. + #[serde(default)] + pub access_pattern: bool, + /// Record file name if file access trace log. + #[serde(default)] + pub latest_read_files: bool, + /// Filesystem prefetching configuration. + #[serde(default)] + pub prefetch: PrefetchConfigV2, +} + +impl RafsConfigV2 { + /// Validate RAFS filesystem configuration information. + pub fn validate(&self) -> bool { + if self.mode != "direct" && self.mode != "cached" { + return false; + } + if self.user_io_batch_size > 0x10000000 { + return false; + } + if self.prefetch.enable { + if self.prefetch.batch_size > 0x10000000 { + return false; + } + if self.prefetch.threads_count == 0 || self.prefetch.threads_count > 1024 { + return false; + } + } + + true + } +} + +/// Configuration information for blob data prefetching. +#[derive(Clone, Debug, Default, Deserialize, Eq, Hash, PartialEq, Serialize)] +pub struct PrefetchConfigV2 { + /// Whether to enable blob data prefetching. + pub enable: bool, + /// Number of data prefetching working threads. + #[serde(rename = "threads", default = "default_prefetch_threads_count")] + pub threads_count: usize, + /// The amplify batch size to prefetch data from backend. + #[serde(default = "default_prefetch_batch_size")] + pub batch_size: usize, + /// Network bandwidth rate limit in unit of Bytes and Zero means no limit. + #[serde(default)] + pub bandwidth_limit: u32, + /// Prefetch all data from backend. + #[serde(default)] + pub prefetch_all: bool, +} + +/// Configuration information for network proxy. +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct ProxyConfig { + /// Access remote storage backend via proxy, e.g. Dragonfly dfdaemon server URL. + #[serde(default)] + pub url: String, + /// Proxy health checking endpoint. + #[serde(default)] + pub ping_url: String, + /// Fallback to remote storage backend if proxy ping failed. + #[serde(default = "default_true")] + pub fallback: bool, + /// Interval for proxy health checking, in seconds. + #[serde(default = "default_check_interval")] + pub check_interval: u64, + /// Replace URL to http to request source registry with proxy, and allow fallback to https if the proxy is unhealthy. + #[serde(default)] + pub use_http: bool, + /// Elapsed time to pause proxy health check when the request is inactive, in seconds. + #[serde(default = "default_check_pause_elapsed")] + pub check_pause_elapsed: u64, +} + +impl Default for ProxyConfig { + fn default() -> Self { + Self { + url: String::new(), + ping_url: String::new(), + fallback: true, + check_interval: 5, + use_http: false, + check_pause_elapsed: 300, + } + } +} + +/// Configuration for registry mirror. +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct MirrorConfig { + /// Mirror server URL, for example http://127.0.0.1:65001. + pub host: String, + /// Ping URL to check mirror server health. + #[serde(default)] + pub ping_url: String, + /// HTTP request headers to be passed to mirror server. + #[serde(default)] + pub headers: HashMap, + /// Interval for mirror health checking, in seconds. + #[serde(default = "default_check_interval")] + pub health_check_interval: u64, + /// Maximum number of failures before marking a mirror as unusable. + #[serde(default = "default_failure_limit")] + pub failure_limit: u8, + /// Elapsed time to pause mirror health check when the request is inactive, in seconds. + #[serde(default = "default_check_pause_elapsed")] + pub health_check_pause_elapsed: u64, +} + +impl Default for MirrorConfig { + fn default() -> Self { + Self { + host: String::new(), + headers: HashMap::new(), + health_check_interval: 5, + failure_limit: 5, + ping_url: String::new(), + health_check_pause_elapsed: 300, + } + } +} + +/// Configuration information for a cached blob`. +#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] +pub struct BlobCacheEntryConfigV2 { + /// Configuration file format version number, must be 2. + pub version: u32, + /// Identifier for the instance. + #[serde(default)] + pub id: String, + /// Configuration information for storage backend. + #[serde(default)] + pub backend: BackendConfigV2, + /// Configuration information for local cache system. + #[serde(default)] + pub cache: CacheConfigV2, + /// Optional file path for metadata blob. + #[serde(default)] + pub metadata_path: Option, +} + +impl BlobCacheEntryConfigV2 { + /// Read configuration information from a file. + pub fn from_file>(path: P) -> Result { + let md = fs::metadata(path.as_ref())?; + if md.len() > 0x100000 { + return Err(Error::new( + ErrorKind::InvalidInput, + "configuration file size is too big", + )); + } + let content = fs::read_to_string(path)?; + Self::from_str(&content) + } + + /// Validate the configuration object. + pub fn validate(&self) -> bool { + if self.version != 2 { + return false; + } + let config: ConfigV2 = self.into(); + config.validate() + } +} + +impl FromStr for BlobCacheEntryConfigV2 { + type Err = Error; + + fn from_str(s: &str) -> Result { + if let Ok(v) = serde_json::from_str::(s) { + return if v.validate() { + Ok(v) + } else { + Err(Error::new(ErrorKind::InvalidInput, "invalid configuration")) + }; + } + if let Ok(v) = toml::from_str::(s) { + return if v.validate() { + Ok(v) + } else { + Err(Error::new(ErrorKind::InvalidInput, "invalid configuration")) + }; + } + Err(Error::new( + ErrorKind::InvalidInput, + "failed to parse configuration information", + )) + } +} + +impl From<&BlobCacheEntryConfigV2> for ConfigV2 { + fn from(c: &BlobCacheEntryConfigV2) -> Self { + ConfigV2 { + version: c.version, + id: c.id.clone(), + backend: Some(c.backend.clone()), + cache: Some(c.cache.clone()), + rafs: None, + overlay: None, + internal: ConfigV2Internal::default(), + } + } +} + +/// Internal runtime configuration. +#[derive(Clone, Debug)] +pub struct ConfigV2Internal { + /// It's possible to access the raw or more blob objects. + pub blob_accessible: Arc, +} + +impl Default for ConfigV2Internal { + fn default() -> Self { + ConfigV2Internal { + blob_accessible: Arc::new(AtomicBool::new(false)), + } + } +} + +impl PartialEq for ConfigV2Internal { + fn eq(&self, other: &Self) -> bool { + self.blob_accessible() == other.blob_accessible() + } +} + +impl Eq for ConfigV2Internal {} + +impl ConfigV2Internal { + /// Get the auto-probe flag. + pub fn blob_accessible(&self) -> bool { + self.blob_accessible.load(Ordering::Relaxed) + } + + /// Set the auto-probe flag. + pub fn set_blob_accessible(&self, accessible: bool) { + self.blob_accessible.store(accessible, Ordering::Relaxed); + } +} + +/// Blob cache object type for nydus/rafs bootstrap blob. +pub const BLOB_CACHE_TYPE_META_BLOB: &str = "bootstrap"; +/// Blob cache object type for nydus/rafs data blob. +pub const BLOB_CACHE_TYPE_DATA_BLOB: &str = "datablob"; + +/// Configuration information for a cached blob. +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct BlobCacheEntry { + /// Type of blob object, bootstrap or data blob. + #[serde(rename = "type")] + pub blob_type: String, + /// Blob id. + #[serde(rename = "id")] + pub blob_id: String, + /// Configuration information to generate blob cache object. + #[serde(default, rename = "config")] + pub(crate) blob_config_legacy: Option, + /// Configuration information to generate blob cache object. + #[serde(default, rename = "config_v2")] + pub blob_config: Option, + /// Domain id for the blob, which is used to group cached blobs into management domains. + #[serde(default)] + pub domain_id: String, +} + +impl BlobCacheEntry { + pub fn prepare_configuration_info(&mut self) -> bool { + if self.blob_config.is_none() { + if let Some(legacy) = self.blob_config_legacy.as_ref() { + match legacy.try_into() { + Err(_) => return false, + Ok(v) => self.blob_config = Some(v), + } + } + } + + match self.blob_config.as_ref() { + None => false, + Some(cfg) => cfg.cache.validate() && cfg.backend.validate(), + } + } +} + +impl BlobCacheEntry { + /// Read configuration information from a file. + pub fn from_file>(path: P) -> Result { + let md = fs::metadata(path.as_ref())?; + if md.len() > 0x100000 { + return Err(Error::new( + ErrorKind::InvalidInput, + "configuration file size is too big", + )); + } + let content = fs::read_to_string(path)?; + Self::from_str(&content) + } + + /// Validate the configuration object. + pub fn validate(&self) -> bool { + if self.blob_type != BLOB_CACHE_TYPE_META_BLOB + && self.blob_type != BLOB_CACHE_TYPE_DATA_BLOB + { + log::warn!("invalid blob type {} for blob cache entry", self.blob_type); + return false; + } + if let Some(config) = self.blob_config.as_ref() { + if !config.validate() { + return false; + } + } + true + } +} + +impl FromStr for BlobCacheEntry { + type Err = Error; + + fn from_str(s: &str) -> Result { + if let Ok(v) = serde_json::from_str::(s) { + return if v.validate() { + Ok(v) + } else { + Err(Error::new(ErrorKind::InvalidInput, "invalid configuration")) + }; + } + if let Ok(v) = toml::from_str::(s) { + return if v.validate() { + Ok(v) + } else { + Err(Error::new(ErrorKind::InvalidInput, "invalid configuration")) + }; + } + Err(Error::new( + ErrorKind::InvalidInput, + "failed to parse configuration information", + )) + } +} + +/// Configuration information for a list of cached blob objects. +#[derive(Debug, Default, Deserialize, Serialize)] +pub struct BlobCacheList { + /// List of blob configuration information. + pub blobs: Vec, +} + +fn default_true() -> bool { + true +} + +fn default_http_scheme() -> String { + "https".to_string() +} + +fn default_http_timeout() -> u32 { + 5 +} + +fn default_check_interval() -> u64 { + 5 +} + +fn default_check_pause_elapsed() -> u64 { + 300 +} + +fn default_failure_limit() -> u8 { + 5 +} + +fn default_work_dir() -> String { + ".".to_string() +} + +pub fn default_user_io_batch_size() -> usize { + 1024 * 1024 +} + +pub fn default_prefetch_batch_size() -> usize { + 1024 * 1024 +} + +fn default_prefetch_threads_count() -> usize { + 8 +} + +fn default_prefetch_all() -> bool { + true +} + +fn default_rafs_mode() -> String { + "direct".to_string() +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// For backward compatibility +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Configuration information for storage backend. +#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] +struct BackendConfig { + /// Type of storage backend. + #[serde(rename = "type")] + pub backend_type: String, + /// Configuration for storage backend. + /// Possible value: `LocalFsConfig`, `RegistryConfig`, `OssConfig`, `LocalDiskConfig`. + #[serde(rename = "config")] + pub backend_config: Value, +} + +impl TryFrom<&BackendConfig> for BackendConfigV2 { + type Error = std::io::Error; + + fn try_from(value: &BackendConfig) -> std::result::Result { + let mut config = BackendConfigV2 { + backend_type: value.backend_type.clone(), + localdisk: None, + localfs: None, + oss: None, + s3: None, + registry: None, + http_proxy: None, + }; + + match value.backend_type.as_str() { + "localdisk" => { + config.localdisk = Some(serde_json::from_value(value.backend_config.clone())?); + } + "localfs" => { + config.localfs = Some(serde_json::from_value(value.backend_config.clone())?); + } + "oss" => { + config.oss = Some(serde_json::from_value(value.backend_config.clone())?); + } + "s3" => { + config.s3 = Some(serde_json::from_value(value.backend_config.clone())?); + } + "registry" => { + config.registry = Some(serde_json::from_value(value.backend_config.clone())?); + } + v => { + return Err(Error::new( + ErrorKind::InvalidInput, + format!("unsupported backend type '{}'", v), + )) + } + } + + Ok(config) + } +} + +/// Configuration information for blob cache manager. +#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] +struct CacheConfig { + /// Type of blob cache: "blobcache", "fscache" or "" + #[serde(default, rename = "type")] + pub cache_type: String, + /// Whether the data from the cache is compressed, not used anymore. + #[serde(default, rename = "compressed")] + pub cache_compressed: bool, + /// Blob cache manager specific configuration: FileCacheConfig, FsCacheConfig. + #[serde(default, rename = "config")] + pub cache_config: Value, + /// Whether to validate data read from the cache. + #[serde(skip_serializing, skip_deserializing)] + pub cache_validate: bool, + /// Configuration for blob data prefetching. + #[serde(skip_serializing, skip_deserializing)] + pub prefetch_config: BlobPrefetchConfig, +} + +impl TryFrom<&CacheConfig> for CacheConfigV2 { + type Error = std::io::Error; + + fn try_from(v: &CacheConfig) -> std::result::Result { + let mut config = CacheConfigV2 { + cache_type: v.cache_type.clone(), + cache_compressed: v.cache_compressed, + cache_validate: v.cache_validate, + prefetch: (&v.prefetch_config).into(), + file_cache: None, + fs_cache: None, + }; + + match v.cache_type.as_str() { + "blobcache" | "filecache" => { + config.file_cache = Some(serde_json::from_value(v.cache_config.clone())?); + } + "fscache" => { + config.fs_cache = Some(serde_json::from_value(v.cache_config.clone())?); + } + "" | "dummycache" => {} + t => { + return Err(Error::new( + ErrorKind::InvalidInput, + format!("unsupported cache type '{}'", t), + )) + } + } + + Ok(config) + } +} + +/// Configuration information to create blob cache manager. +#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] +struct FactoryConfig { + /// Id of the factory. + #[serde(default)] + pub id: String, + /// Configuration for storage backend. + pub backend: BackendConfig, + /// Configuration for blob cache manager. + #[serde(default)] + pub cache: CacheConfig, +} + +/// Rafs storage backend configuration information. +#[derive(Clone, Default, Deserialize)] +struct RafsConfig { + /// Configuration for storage subsystem. + pub device: FactoryConfig, + /// Filesystem working mode. + pub mode: String, + /// Whether to validate data digest before use. + #[serde(default)] + pub digest_validate: bool, + /// Io statistics. + #[serde(default)] + pub iostats_files: bool, + /// Filesystem prefetching configuration. + #[serde(default)] + pub fs_prefetch: FsPrefetchControl, + /// Enable extended attributes. + #[serde(default)] + pub enable_xattr: bool, + /// Record filesystem access pattern. + #[serde(default)] + pub access_pattern: bool, + /// Record file name if file access trace log. + #[serde(default)] + pub latest_read_files: bool, + // Amplified user IO request batch size to read data from remote storage backend / local cache. + // ZERO value means, amplifying user io is not enabled. + #[serde(rename = "amplify_io", default = "default_user_io_batch_size")] + pub user_io_batch_size: usize, +} + +impl TryFrom for ConfigV2 { + type Error = std::io::Error; + + fn try_from(v: RafsConfig) -> std::result::Result { + let backend: BackendConfigV2 = (&v.device.backend).try_into()?; + let mut cache: CacheConfigV2 = (&v.device.cache).try_into()?; + let rafs = RafsConfigV2 { + mode: v.mode, + user_io_batch_size: v.user_io_batch_size, + validate: v.digest_validate, + enable_xattr: v.enable_xattr, + iostats_files: v.iostats_files, + access_pattern: v.access_pattern, + latest_read_files: v.latest_read_files, + prefetch: v.fs_prefetch.into(), + }; + if !cache.prefetch.enable && rafs.prefetch.enable { + cache.prefetch = rafs.prefetch.clone(); + } + + Ok(ConfigV2 { + version: 2, + id: v.device.id, + backend: Some(backend), + cache: Some(cache), + rafs: Some(rafs), + overlay: None, + internal: ConfigV2Internal::default(), + }) + } +} + +/// Configuration information for filesystem data prefetch. +#[derive(Clone, Default, Deserialize)] +struct FsPrefetchControl { + /// Whether the filesystem layer data prefetch is enabled or not. + #[serde(default)] + pub enable: bool, + + /// How many working threads to prefetch data. + #[serde(default = "default_prefetch_threads_count")] + pub threads_count: usize, + + /// The amplify batch size to prefetch data from backend. + #[serde(rename = "merging_size", default = "default_prefetch_batch_size")] + pub batch_size: usize, + + /// Network bandwidth limitation for prefetching. + /// + /// In unit of Bytes. It sets a limit to prefetch bandwidth usage in order to + /// reduce congestion with normal user IO. + /// bandwidth_limit == 0 -- prefetch bandwidth ratelimit disabled + /// bandwidth_limit > 0 -- prefetch bandwidth ratelimit enabled. + /// Please note that if the value is less than Rafs chunk size, + /// it will be raised to the chunk size. + #[serde(default, rename = "bandwidth_rate")] + pub bandwidth_limit: u32, + + /// Whether to prefetch all filesystem data. + #[serde(default = "default_prefetch_all")] + pub prefetch_all: bool, +} + +impl From for PrefetchConfigV2 { + fn from(v: FsPrefetchControl) -> Self { + PrefetchConfigV2 { + enable: v.enable, + threads_count: v.threads_count, + batch_size: v.batch_size, + bandwidth_limit: v.bandwidth_limit, + prefetch_all: v.prefetch_all, + } + } +} + +/// Configuration information for blob data prefetching. +#[derive(Clone, Debug, Default, Deserialize, Eq, Hash, PartialEq, Serialize)] +struct BlobPrefetchConfig { + /// Whether to enable blob data prefetching. + pub enable: bool, + /// Number of data prefetching working threads. + pub threads_count: usize, + /// The amplify batch size to prefetch data from backend. + #[serde(rename = "merging_size")] + pub batch_size: usize, + /// Network bandwidth rate limit in unit of Bytes and Zero means no limit. + #[serde(rename = "bandwidth_rate")] + pub bandwidth_limit: u32, +} + +impl From<&BlobPrefetchConfig> for PrefetchConfigV2 { + fn from(v: &BlobPrefetchConfig) -> Self { + PrefetchConfigV2 { + enable: v.enable, + threads_count: v.threads_count, + batch_size: v.batch_size, + bandwidth_limit: v.bandwidth_limit, + prefetch_all: true, + } + } +} + +/// Configuration information for a cached blob, corresponding to `FactoryConfig`. +#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] +pub(crate) struct BlobCacheEntryConfig { + /// Identifier for the blob cache configuration: corresponding to `FactoryConfig::id`. + #[serde(default)] + id: String, + /// Type of storage backend, corresponding to `FactoryConfig::BackendConfig::backend_type`. + backend_type: String, + /// Configuration for storage backend, corresponding to `FactoryConfig::BackendConfig::backend_config`. + /// + /// Possible value: `LocalFsConfig`, `RegistryConfig`, `OssConfig`, `LocalDiskConfig`. + backend_config: Value, + /// Type of blob cache, corresponding to `FactoryConfig::CacheConfig::cache_type`. + /// + /// Possible value: "fscache", "filecache". + cache_type: String, + /// Configuration for blob cache, corresponding to `FactoryConfig::CacheConfig::cache_config`. + /// + /// Possible value: `FileCacheConfig`, `FsCacheConfig`. + cache_config: Value, + /// Configuration for data prefetch. + #[serde(default)] + prefetch_config: BlobPrefetchConfig, + /// Optional file path for metadata blobs. + #[serde(default)] + metadata_path: Option, +} + +impl TryFrom<&BlobCacheEntryConfig> for BlobCacheEntryConfigV2 { + type Error = std::io::Error; + + fn try_from(v: &BlobCacheEntryConfig) -> std::result::Result { + let backend_config = BackendConfig { + backend_type: v.backend_type.clone(), + backend_config: v.backend_config.clone(), + }; + let cache_config = CacheConfig { + cache_type: v.cache_type.clone(), + cache_compressed: false, + cache_config: v.cache_config.clone(), + cache_validate: false, + prefetch_config: v.prefetch_config.clone(), + }; + Ok(BlobCacheEntryConfigV2 { + version: 2, + id: v.id.clone(), + backend: (&backend_config).try_into()?, + cache: (&cache_config).try_into()?, + metadata_path: v.metadata_path.clone(), + }) + } +} + +/// Configuration information for Overlay filesystem. +/// OverlayConfig is used to configure the writable layer(upper layer), +/// The filesystem will be writable when OverlayConfig is set. +#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] +pub struct OverlayConfig { + pub upper_dir: String, + pub work_dir: String, +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{BlobCacheEntry, BLOB_CACHE_TYPE_META_BLOB}; + + #[test] + fn test_blob_prefetch_config() { + let config = BlobPrefetchConfig::default(); + assert!(!config.enable); + assert_eq!(config.threads_count, 0); + assert_eq!(config.batch_size, 0); + assert_eq!(config.bandwidth_limit, 0); + + let content = r#"{ + "enable": true, + "threads_count": 2, + "merging_size": 4, + "bandwidth_rate": 5 + }"#; + let config: BlobPrefetchConfig = serde_json::from_str(content).unwrap(); + assert!(config.enable); + assert_eq!(config.threads_count, 2); + assert_eq!(config.batch_size, 4); + assert_eq!(config.bandwidth_limit, 5); + + let config: PrefetchConfigV2 = (&config).into(); + assert!(config.enable); + assert_eq!(config.threads_count, 2); + assert_eq!(config.batch_size, 4); + assert_eq!(config.bandwidth_limit, 5); + assert!(config.prefetch_all); + } + + #[test] + fn test_file_cache_config() { + let config: FileCacheConfig = serde_json::from_str("{}").unwrap(); + assert_eq!(&config.work_dir, "."); + assert!(!config.disable_indexed_map); + + let config: FileCacheConfig = + serde_json::from_str("{\"work_dir\":\"/tmp\",\"disable_indexed_map\":true}").unwrap(); + assert_eq!(&config.work_dir, "/tmp"); + assert!(config.get_work_dir().is_ok()); + assert!(config.disable_indexed_map); + + let config: FileCacheConfig = + serde_json::from_str("{\"work_dir\":\"/proc/mounts\",\"disable_indexed_map\":true}") + .unwrap(); + assert!(config.get_work_dir().is_err()); + } + + #[test] + fn test_fs_cache_config() { + let config: FsCacheConfig = serde_json::from_str("{}").unwrap(); + assert_eq!(&config.work_dir, "."); + + let config: FileCacheConfig = serde_json::from_str("{\"work_dir\":\"/tmp\"}").unwrap(); + assert_eq!(&config.work_dir, "/tmp"); + assert!(config.get_work_dir().is_ok()); + + let config: FileCacheConfig = + serde_json::from_str("{\"work_dir\":\"/proc/mounts\"}").unwrap(); + assert!(config.get_work_dir().is_err()); + } + + #[test] + fn test_blob_cache_entry() { + let content = r#"{ + "type": "bootstrap", + "id": "blob1", + "config": { + "id": "cache1", + "backend_type": "localfs", + "backend_config": {}, + "cache_type": "fscache", + "cache_config": {}, + "prefetch_config": { + "enable": true, + "threads_count": 2, + "merging_size": 4, + "bandwidth_rate": 5 + }, + "metadata_path": "/tmp/metadata1" + }, + "domain_id": "domain1" + }"#; + let config: BlobCacheEntry = serde_json::from_str(content).unwrap(); + assert_eq!(&config.blob_type, BLOB_CACHE_TYPE_META_BLOB); + assert_eq!(&config.blob_id, "blob1"); + assert_eq!(&config.domain_id, "domain1"); + + let blob_config = config.blob_config_legacy.as_ref().unwrap(); + assert_eq!(blob_config.id, "cache1"); + assert_eq!(blob_config.backend_type, "localfs"); + assert_eq!(blob_config.cache_type, "fscache"); + assert!(blob_config.cache_config.is_object()); + assert!(blob_config.prefetch_config.enable); + assert_eq!(blob_config.prefetch_config.threads_count, 2); + assert_eq!(blob_config.prefetch_config.batch_size, 4); + assert_eq!( + blob_config.metadata_path.as_ref().unwrap().as_str(), + "/tmp/metadata1" + ); + + let blob_config: BlobCacheEntryConfigV2 = blob_config.try_into().unwrap(); + assert_eq!(blob_config.id, "cache1"); + assert_eq!(blob_config.backend.backend_type, "localfs"); + assert_eq!(blob_config.cache.cache_type, "fscache"); + assert!(blob_config.cache.fs_cache.is_some()); + assert!(blob_config.cache.prefetch.enable); + assert_eq!(blob_config.cache.prefetch.threads_count, 2); + assert_eq!(blob_config.cache.prefetch.batch_size, 4); + assert_eq!( + blob_config.metadata_path.as_ref().unwrap().as_str(), + "/tmp/metadata1" + ); + + let content = r#"{ + "type": "bootstrap", + "id": "blob1", + "config": { + "id": "cache1", + "backend_type": "localfs", + "backend_config": {}, + "cache_type": "fscache", + "cache_config": {}, + "metadata_path": "/tmp/metadata1" + }, + "domain_id": "domain1" + }"#; + let config: BlobCacheEntry = serde_json::from_str(content).unwrap(); + let blob_config = config.blob_config_legacy.as_ref().unwrap(); + assert!(!blob_config.prefetch_config.enable); + assert_eq!(blob_config.prefetch_config.threads_count, 0); + assert_eq!(blob_config.prefetch_config.batch_size, 0); + } + + #[test] + fn test_proxy_config() { + let content = r#"{ + "url": "foo.com", + "ping_url": "ping.foo.com", + "fallback": true + }"#; + let config: ProxyConfig = serde_json::from_str(content).unwrap(); + assert_eq!(config.url, "foo.com"); + assert_eq!(config.ping_url, "ping.foo.com"); + assert!(config.fallback); + assert_eq!(config.check_interval, 5); + } + + #[test] + fn test_oss_config() { + let content = r#"{ + "endpoint": "test", + "access_key_id": "test", + "access_key_secret": "test", + "bucket_name": "antsys-nydus", + "object_prefix":"nydus_v2/" + }"#; + let config: OssConfig = serde_json::from_str(content).unwrap(); + assert_eq!(config.scheme, "https"); + assert!(!config.skip_verify); + assert_eq!(config.timeout, 5); + assert_eq!(config.connect_timeout, 5); + } + + #[test] + fn test_s3_config() { + let content = r#"{ + "endpoint": "test", + "region": "us-east-1", + "access_key_id": "test", + "access_key_secret": "test", + "bucket_name": "antsys-nydus", + "object_prefix":"nydus_v2/" + }"#; + let config: OssConfig = serde_json::from_str(content).unwrap(); + assert_eq!(config.scheme, "https"); + assert!(!config.skip_verify); + assert_eq!(config.timeout, 5); + assert_eq!(config.connect_timeout, 5); + } + + #[test] + fn test_registry_config() { + let content = r#"{ + "scheme": "http", + "skip_verify": true, + "host": "my-registry:5000", + "repo": "test/repo", + "auth": "base64_encoded_auth", + "registry_token": "bearer_token", + "blob_redirected_host": "blob_redirected_host" + }"#; + let config: RegistryConfig = serde_json::from_str(content).unwrap(); + assert_eq!(config.scheme, "http"); + assert!(config.skip_verify); + } + + #[test] + fn test_localfs_config() { + let content = r#"{ + "blob_file": "blob_file", + "dir": "blob_dir", + "alt_dirs": ["dir1", "dir2"] + }"#; + let config: LocalFsConfig = serde_json::from_str(content).unwrap(); + assert_eq!(config.blob_file, "blob_file"); + assert_eq!(config.dir, "blob_dir"); + assert_eq!(config.alt_dirs, vec!["dir1", "dir2"]); + } + + #[test] + fn test_localdisk_config() { + let content = r#"{ + "device_path": "device_path" + }"#; + let config: LocalDiskConfig = serde_json::from_str(content).unwrap(); + assert_eq!(config.device_path, "device_path"); + } + + #[test] + fn test_backend_config() { + let config = BackendConfig { + backend_type: "localfs".to_string(), + backend_config: Default::default(), + }; + let str_val = serde_json::to_string(&config).unwrap(); + let config2 = serde_json::from_str(&str_val).unwrap(); + + assert_eq!(config, config2); + } + + #[test] + fn test_v2_version() { + let content = "version=2"; + let config: ConfigV2 = toml::from_str(content).unwrap(); + assert_eq!(config.version, 2); + assert!(config.backend.is_none()); + } + + #[test] + fn test_v2_backend() { + let content = r#"version=2 + [backend] + type = "localfs" + "#; + let config: ConfigV2 = toml::from_str(content).unwrap(); + assert_eq!(config.version, 2); + assert!(config.backend.is_some()); + assert!(config.cache.is_none()); + + let backend = config.backend.as_ref().unwrap(); + assert_eq!(&backend.backend_type, "localfs"); + assert!(backend.localfs.is_none()); + assert!(backend.oss.is_none()); + assert!(backend.registry.is_none()); + } + + #[test] + fn test_v2_backend_localfs() { + let content = r#"version=2 + [backend] + type = "localfs" + [backend.localfs] + blob_file = "/tmp/nydus.blob.data" + dir = "/tmp" + alt_dirs = ["/var/nydus/cache"] + "#; + let config: ConfigV2 = toml::from_str(content).unwrap(); + assert_eq!(config.version, 2); + assert!(config.backend.is_some()); + + let backend = config.backend.as_ref().unwrap(); + assert_eq!(&backend.backend_type, "localfs"); + assert!(backend.localfs.is_some()); + + let localfs = backend.localfs.as_ref().unwrap(); + assert_eq!(&localfs.blob_file, "/tmp/nydus.blob.data"); + assert_eq!(&localfs.dir, "/tmp"); + assert_eq!(&localfs.alt_dirs[0], "/var/nydus/cache"); + } + + #[test] + fn test_v2_backend_oss() { + let content = r#"version=2 + [backend] + type = "oss" + [backend.oss] + endpoint = "my_endpoint" + bucket_name = "my_bucket_name" + object_prefix = "my_object_prefix" + access_key_id = "my_access_key_id" + access_key_secret = "my_access_key_secret" + scheme = "http" + skip_verify = true + timeout = 10 + connect_timeout = 10 + retry_limit = 5 + [backend.oss.proxy] + url = "localhost:6789" + ping_url = "localhost:6789/ping" + fallback = true + check_interval = 10 + use_http = true + [[backend.oss.mirrors]] + host = "http://127.0.0.1:65001" + ping_url = "http://127.0.0.1:65001/ping" + health_check_interval = 10 + failure_limit = 10 + "#; + let config: ConfigV2 = toml::from_str(content).unwrap(); + assert_eq!(config.version, 2); + assert!(config.backend.is_some()); + assert!(config.rafs.is_none()); + + let backend = config.backend.as_ref().unwrap(); + assert_eq!(&backend.backend_type, "oss"); + assert!(backend.oss.is_some()); + + let oss = backend.oss.as_ref().unwrap(); + assert_eq!(&oss.endpoint, "my_endpoint"); + assert_eq!(&oss.bucket_name, "my_bucket_name"); + assert_eq!(&oss.object_prefix, "my_object_prefix"); + assert_eq!(&oss.access_key_id, "my_access_key_id"); + assert_eq!(&oss.access_key_secret, "my_access_key_secret"); + assert_eq!(&oss.scheme, "http"); + assert!(oss.skip_verify); + assert_eq!(oss.timeout, 10); + assert_eq!(oss.connect_timeout, 10); + assert_eq!(oss.retry_limit, 5); + assert_eq!(&oss.proxy.url, "localhost:6789"); + assert_eq!(&oss.proxy.ping_url, "localhost:6789/ping"); + assert_eq!(oss.proxy.check_interval, 10); + assert!(oss.proxy.fallback); + assert!(oss.proxy.use_http); + + assert_eq!(oss.mirrors.len(), 1); + let mirror = &oss.mirrors[0]; + assert_eq!(mirror.host, "http://127.0.0.1:65001"); + assert_eq!(mirror.ping_url, "http://127.0.0.1:65001/ping"); + assert!(mirror.headers.is_empty()); + assert_eq!(mirror.health_check_interval, 10); + assert_eq!(mirror.failure_limit, 10); + } + + #[test] + fn test_v2_backend_registry() { + let content = r#"version=2 + [backend] + type = "registry" + [backend.registry] + scheme = "http" + host = "localhost" + repo = "nydus" + auth = "auth" + skip_verify = true + timeout = 10 + connect_timeout = 10 + retry_limit = 5 + registry_token = "bear_token" + blob_url_scheme = "https" + blob_redirected_host = "redirect.registry.com" + [backend.registry.proxy] + url = "localhost:6789" + ping_url = "localhost:6789/ping" + fallback = true + check_interval = 10 + use_http = true + [[backend.registry.mirrors]] + host = "http://127.0.0.1:65001" + ping_url = "http://127.0.0.1:65001/ping" + health_check_interval = 10 + failure_limit = 10 + "#; + let config: ConfigV2 = toml::from_str(content).unwrap(); + assert_eq!(config.version, 2); + assert!(config.backend.is_some()); + assert!(config.rafs.is_none()); + + let backend = config.backend.as_ref().unwrap(); + assert_eq!(&backend.backend_type, "registry"); + assert!(backend.registry.is_some()); + + let registry = backend.registry.as_ref().unwrap(); + assert_eq!(®istry.scheme, "http"); + assert_eq!(®istry.host, "localhost"); + assert_eq!(®istry.repo, "nydus"); + assert_eq!(registry.auth.as_ref().unwrap(), "auth"); + assert!(registry.skip_verify); + assert_eq!(registry.timeout, 10); + assert_eq!(registry.connect_timeout, 10); + assert_eq!(registry.retry_limit, 5); + assert_eq!(registry.registry_token.as_ref().unwrap(), "bear_token"); + assert_eq!(registry.blob_url_scheme, "https"); + assert_eq!(registry.blob_redirected_host, "redirect.registry.com"); + + assert_eq!(®istry.proxy.url, "localhost:6789"); + assert_eq!(®istry.proxy.ping_url, "localhost:6789/ping"); + assert_eq!(registry.proxy.check_interval, 10); + assert!(registry.proxy.fallback); + assert!(registry.proxy.use_http); + + assert_eq!(registry.mirrors.len(), 1); + let mirror = ®istry.mirrors[0]; + assert_eq!(mirror.host, "http://127.0.0.1:65001"); + assert_eq!(mirror.ping_url, "http://127.0.0.1:65001/ping"); + assert!(mirror.headers.is_empty()); + assert_eq!(mirror.health_check_interval, 10); + assert_eq!(mirror.failure_limit, 10); + } + + #[test] + fn test_v2_cache() { + let content = r#"version=2 + [cache] + type = "filecache" + compressed = true + validate = true + [cache.filecache] + work_dir = "/tmp" + [cache.fscache] + work_dir = "./" + [cache.prefetch] + enable = true + threads = 8 + batch_size = 1000000 + bandwidth_limit = 10000000 + "#; + let config: ConfigV2 = toml::from_str(content).unwrap(); + assert_eq!(config.version, 2); + assert!(config.backend.is_none()); + assert!(config.rafs.is_none()); + assert!(config.cache.is_some()); + + let cache = config.cache.as_ref().unwrap(); + assert_eq!(&cache.cache_type, "filecache"); + assert!(cache.cache_compressed); + assert!(cache.cache_validate); + let filecache = cache.file_cache.as_ref().unwrap(); + assert_eq!(&filecache.work_dir, "/tmp"); + let fscache = cache.fs_cache.as_ref().unwrap(); + assert_eq!(&fscache.work_dir, "./"); + + let prefetch = &cache.prefetch; + assert!(prefetch.enable); + assert_eq!(prefetch.threads_count, 8); + assert_eq!(prefetch.batch_size, 1000000); + assert_eq!(prefetch.bandwidth_limit, 10000000); + } + + #[test] + fn test_v2_rafs() { + let content = r#"version=2 + [rafs] + mode = "direct" + batch_size = 1000000 + validate = true + enable_xattr = true + iostats_files = true + access_pattern = true + latest_read_files = true + [rafs.prefetch] + enable = true + threads = 4 + batch_size = 1000000 + bandwidth_limit = 10000000 + prefetch_all = true + "#; + let config: ConfigV2 = toml::from_str(content).unwrap(); + assert_eq!(config.version, 2); + assert!(config.backend.is_none()); + assert!(config.cache.is_none()); + assert!(config.rafs.is_some()); + + let rafs = config.rafs.as_ref().unwrap(); + assert_eq!(&rafs.mode, "direct"); + assert_eq!(rafs.user_io_batch_size, 1000000); + assert!(rafs.validate); + assert!(rafs.enable_xattr); + assert!(rafs.iostats_files); + assert!(rafs.access_pattern); + assert!(rafs.latest_read_files); + assert!(rafs.prefetch.enable); + assert_eq!(rafs.prefetch.threads_count, 4); + assert_eq!(rafs.prefetch.batch_size, 1000000); + assert_eq!(rafs.prefetch.bandwidth_limit, 10000000); + assert!(rafs.prefetch.prefetch_all) + } + + #[test] + fn test_v2_blob_cache_entry() { + let content = r#"version=2 + id = "my_id" + metadata_path = "meta_path" + [backend] + type = "localfs" + [backend.localfs] + blob_file = "/tmp/nydus.blob.data" + dir = "/tmp" + alt_dirs = ["/var/nydus/cache"] + [cache] + type = "filecache" + compressed = true + validate = true + [cache.filecache] + work_dir = "/tmp" + "#; + let config: BlobCacheEntryConfigV2 = toml::from_str(content).unwrap(); + assert_eq!(config.version, 2); + assert_eq!(&config.id, "my_id"); + assert_eq!(config.metadata_path.as_ref().unwrap(), "meta_path"); + + let backend = &config.backend; + assert_eq!(&backend.backend_type, "localfs"); + assert!(backend.localfs.is_some()); + + let localfs = backend.localfs.as_ref().unwrap(); + assert_eq!(&localfs.blob_file, "/tmp/nydus.blob.data"); + assert_eq!(&localfs.dir, "/tmp"); + assert_eq!(&localfs.alt_dirs[0], "/var/nydus/cache"); + } + + #[test] + fn test_sample_config_file() { + let content = r#"{ + "device": { + "backend": { + "type": "localfs", + "config": { + "dir": "/tmp/AM7TxD/blobs", + "readahead": true + } + }, + "cache": { + "type": "blobcache", + "compressed": true, + "config": { + "work_dir": "/tmp/AM7TxD/cache" + } + } + }, + "mode": "cached", + "digest_validate": true, + "iostats_files": false + } + "#; + let config = ConfigV2::from_str(content).unwrap(); + assert_eq!(&config.id, ""); + } + + #[test] + fn test_snapshotter_sample_config() { + let content = r#" + { + "device": { + "backend": { + "type": "registry", + "config": { + "readahead": false, + "host": "localhost", + "repo": "vke/golang", + "auth": "", + "scheme": "https", + "proxy": { + "fallback": false + }, + "timeout": 5, + "connect_timeout": 5, + "retry_limit": 2 + } + }, + "cache": { + "type": "blobcache", + "compressed": true, + "config": { + "work_dir": "/var/lib/containerd-nydus/cache", + "disable_indexed_map": false + } + } + }, + "mode": "direct", + "digest_validate": false, + "enable_xattr": true, + "fs_prefetch": { + "enable": true, + "prefetch_all": true, + "threads_count": 8, + "merging_size": 1048576, + "bandwidth_rate": 0 + } + } + "#; + let config = ConfigV2::from_str(content).unwrap(); + assert_eq!(&config.id, ""); + } + + #[test] + fn test_backend_http_proxy_config() { + let config = + r#"{"version":2,"backend":{"type":"http-proxy","http-proxy":{"addr":"/tmp"}}}"#; + let config = ConfigV2::from_str(config).unwrap(); + let backend = config.backend.unwrap(); + assert_eq!(&backend.backend_type, "http-proxy"); + assert_eq!(&backend.http_proxy.unwrap().addr, "/tmp"); + } + + #[test] + fn test_new_localfs() { + let config = ConfigV2::new_localfs("id1", "./").unwrap(); + assert_eq!(&config.id, "id1"); + assert_eq!(config.backend.as_ref().unwrap().backend_type, "localfs"); + } + + #[test] + fn test_update_registry_auth_info() { + let config = r#" + { + "device": { + "id": "test", + "backend": { + "type": "registry", + "config": { + "readahead": false, + "host": "docker.io", + "repo": "library/nginx", + "scheme": "https", + "proxy": { + "fallback": false + }, + "timeout": 5, + "connect_timeout": 5, + "retry_limit": 8 + } + } + }, + "mode": "direct", + "digest_validate": false, + "enable_xattr": true, + "fs_prefetch": { + "enable": true, + "threads_count": 10, + "merging_size": 131072, + "bandwidth_rate": 10485760 + } + }"#; + + let mut rafs_config = ConfigV2::from_str(&config).unwrap(); + let test_auth = "test_auth".to_string(); + + rafs_config.update_registry_auth_info(&Some(test_auth.clone())); + + let backend = rafs_config.backend.unwrap(); + let registry = backend.registry.unwrap(); + let auth = registry.auth.unwrap(); + assert_eq!(auth, test_auth); + } + + #[test] + fn test_config2_error() { + let content_bad_version = r#"version=3 + "#; + let cfg: ConfigV2 = toml::from_str(content_bad_version).unwrap(); + assert!(!cfg.validate()); + let cfg = ConfigV2::new("id"); + assert!(cfg.get_backend_config().is_err()); + assert!(cfg.get_cache_config().is_err()); + assert!(cfg.get_rafs_config().is_err()); + assert!(cfg.get_cache_working_directory().is_err()); + + let content = r#"version=2 + [cache] + type = "filecache" + [cache.filecache] + work_dir = "/tmp" + "#; + let cfg: ConfigV2 = toml::from_str(content).unwrap(); + assert_eq!(cfg.get_cache_working_directory().unwrap(), "/tmp"); + + let content = r#"version=2 + [cache] + type = "fscache" + [cache.fscache] + work_dir = "./foo" + "#; + let cfg: ConfigV2 = toml::from_str(content).unwrap(); + assert_eq!(cfg.get_cache_working_directory().unwrap(), "./foo"); + + let content = r#"version=2 + [cache] + type = "bar" + "#; + let cfg: ConfigV2 = toml::from_str(content).unwrap(); + assert!(cfg.get_cache_working_directory().is_err()); + + let content = r#" + foo-bar-xxxx + "#; + assert!(toml::from_str::(content).is_err()); + } + + #[test] + fn test_backend_config_valid() { + let mut cfg = BackendConfigV2 { + backend_type: "localdisk".to_string(), + ..Default::default() + }; + assert!(!cfg.validate()); + cfg.localdisk = Some(LocalDiskConfig { + device_path: "".to_string(), + disable_gpt: true, + }); + assert!(!cfg.validate()); + + let cfg = BackendConfigV2 { + backend_type: "localfs".to_string(), + ..Default::default() + }; + assert!(!cfg.validate()); + + let cfg = BackendConfigV2 { + backend_type: "oss".to_string(), + ..Default::default() + }; + assert!(!cfg.validate()); + + let cfg = BackendConfigV2 { + backend_type: "s3".to_string(), + ..Default::default() + }; + assert!(!cfg.validate()); + + let cfg = BackendConfigV2 { + backend_type: "register".to_string(), + ..Default::default() + }; + assert!(!cfg.validate()); + + let cfg = BackendConfigV2 { + backend_type: "http-proxy".to_string(), + ..Default::default() + }; + assert!(!cfg.validate()); + + let cfg = BackendConfigV2 { + backend_type: "foobar".to_string(), + ..Default::default() + }; + assert!(!cfg.validate()); + } + + fn get_config(backend_type: &str) { + let mut cfg: BackendConfigV2 = BackendConfigV2::default(); + assert!(cfg.get_localdisk_config().is_err()); + + cfg.backend_type = backend_type.to_string(); + assert!(cfg.get_localdisk_config().is_err()); + } + + #[test] + fn test_get_config() { + get_config("localdisk"); + get_config("localfs"); + get_config("oss"); + get_config("s3"); + get_config("register"); + get_config("http-proxy"); + } + + #[test] + fn test_cache_config_valid() { + let cfg = CacheConfigV2 { + cache_type: "blobcache".to_string(), + ..Default::default() + }; + assert!(!cfg.validate()); + + let cfg = CacheConfigV2 { + cache_type: "fscache".to_string(), + ..Default::default() + }; + assert!(!cfg.validate()); + + let cfg = CacheConfigV2 { + cache_type: "dummycache".to_string(), + ..Default::default() + }; + assert!(cfg.validate()); + + let cfg = CacheConfigV2 { + cache_type: "foobar".to_string(), + ..Default::default() + }; + assert!(!cfg.validate()); + } + + #[test] + fn test_get_fscache_config() { + let mut cfg = CacheConfigV2::default(); + assert!(cfg.get_fscache_config().is_err()); + cfg.cache_type = "fscache".to_string(); + assert!(cfg.get_fscache_config().is_err()); + } + + #[test] + fn test_fscache_get_work_dir() { + let mut cfg = FsCacheConfig::default(); + assert!(cfg.get_work_dir().is_err()); + cfg.work_dir = ".".to_string(); + assert!(cfg.get_work_dir().is_ok()); + cfg.work_dir = "foobar".to_string(); + let res = cfg.get_work_dir().is_ok(); + fs::remove_dir_all("foobar").unwrap(); + assert!(res); + } + + #[test] + fn test_default_mirror_config() { + let cfg = MirrorConfig::default(); + assert_eq!(cfg.host, ""); + assert_eq!(cfg.health_check_interval, 5); + assert_eq!(cfg.failure_limit, 5); + assert_eq!(cfg.ping_url, ""); + } + + #[test] + fn test_config_v2_from_file() { + let content = r#"version=2 + [cache] + type = "filecache" + [cache.filecache] + work_dir = "/tmp" + "#; + if fs::write("test_config_v2_from_file.cfg", content).is_ok() { + let res = ConfigV2::from_file("test_config_v2_from_file.cfg").is_ok(); + fs::remove_file("test_config_v2_from_file.cfg").unwrap(); + assert!(res); + } else { + assert!(ConfigV2::from_file("test_config_v2_from_file.cfg").is_err()); + } + } + + #[test] + fn test_blob_cache_entry_v2_from_file() { + let content = r#"version=2 + id = "my_id" + metadata_path = "meta_path" + [backend] + type = "localfs" + [backend.localfs] + blob_file = "/tmp/nydus.blob.data" + dir = "/tmp" + alt_dirs = ["/var/nydus/cache"] + [cache] + type = "filecache" + compressed = true + validate = true + [cache.filecache] + work_dir = "/tmp" + "#; + if fs::write("test_blob_cache_entry_v2_from_file.cfg", content).is_ok() { + let res = + BlobCacheEntryConfigV2::from_file("test_blob_cache_entry_v2_from_file.cfg").is_ok(); + fs::remove_file("test_blob_cache_entry_v2_from_file.cfg").unwrap(); + assert!(res); + } else { + assert!(ConfigV2::from_file("test_blob_cache_entry_v2_from_file.cfg").is_err()); + } + } + + #[test] + fn test_blob_cache_valid() { + let err_version_content = r#"version=1"#; + + let config: BlobCacheEntryConfigV2 = toml::from_str(err_version_content).unwrap(); + assert!(!config.validate()); + + let content = r#"version=2 + id = "my_id" + metadata_path = "meta_path" + [backend] + type = "localfs" + [backend.localfs] + blob_file = "/tmp/nydus.blob.data" + dir = "/tmp" + alt_dirs = ["/var/nydus/cache"] + [cache] + type = "filecache" + compressed = true + validate = true + [cache.filecache] + work_dir = "/tmp" + "#; + + let config: BlobCacheEntryConfigV2 = toml::from_str(content).unwrap(); + assert!(config.validate()); + } + + #[test] + fn test_blob_from_str() { + let content = r#"version=2 + id = "my_id" + metadata_path = "meta_path" + [backend] + type = "localfs" + [backend.localfs] + blob_file = "/tmp/nydus.blob.data" + dir = "/tmp" + alt_dirs = ["/var/nydus/cache"] + [cache] + type = "filecache" + compressed = true + validate = true + [cache.filecache] + work_dir = "/tmp" + "#; + + let config: BlobCacheEntryConfigV2 = BlobCacheEntryConfigV2::from_str(content).unwrap(); + assert_eq!(config.version, 2); + assert_eq!(config.id, "my_id"); + assert_eq!(config.backend.localfs.unwrap().dir, "/tmp"); + assert_eq!(config.cache.file_cache.unwrap().work_dir, "/tmp"); + let content = r#" + { + "version": 2, + "id": "my_id", + "backend": { + "type": "localfs", + "localfs": { + "dir": "/tmp" + } + } + } + "#; + let config: BlobCacheEntryConfigV2 = BlobCacheEntryConfigV2::from_str(content).unwrap(); + + assert_eq!(config.version, 2); + assert_eq!(config.id, "my_id"); + assert_eq!(config.backend.localfs.unwrap().dir, "/tmp"); + + let content = r#"foobar"#; + assert!(BlobCacheEntryConfigV2::from_str(content).is_err()); + } + + #[test] + fn test_blob_cache_entry_from_file() { + let content = r#"{ + "type": "bootstrap", + "id": "blob1", + "config": { + "id": "cache1", + "backend_type": "localfs", + "backend_config": {}, + "cache_type": "fscache", + "cache_config": {}, + "metadata_path": "/tmp/metadata1" + }, + "domain_id": "domain1" + }"#; + if fs::write("test_blob_cache_entry_from_file.cfg", content).is_ok() { + let res = BlobCacheEntry::from_file("test_blob_cache_entry_from_file.cfg").is_ok(); + fs::remove_file("test_blob_cache_entry_from_file.cfg").unwrap(); + assert!(res); + } else { + assert!(ConfigV2::from_file("test_blob_cache_entry_from_file.cfg").is_err()); + } + } + + #[test] + fn test_blob_cache_entry_valid() { + let content = r#"{ + "type": "bootstrap", + "id": "blob1", + "config": { + "id": "cache1", + "backend_type": "localfs", + "backend_config": {}, + "cache_type": "fscache", + "cache_config": {}, + "metadata_path": "/tmp/metadata1" + }, + "domain_id": "domain1" + }"#; + let mut cfg = BlobCacheEntry::from_str(content).unwrap(); + cfg.blob_type = "foobar".to_string(); + assert!(!cfg.validate()); + + let content = r#"{ + "type": "bootstrap", + "id": "blob1", + "domain_id": "domain1" + }"#; + let cfg = BlobCacheEntry::from_str(content).unwrap(); + assert!(cfg.validate()); + } + + #[test] + fn test_blob_cache_entry_from_str() { + let content = r#"{ + "type": "bootstrap", + "id": "blob1", + "config": { + "id": "cache1", + "backend_type": "localfs", + "backend_config": {}, + "cache_type": "fscache", + "cache_config": {}, + "metadata_path": "/tmp/metadata1" + }, + "domain_id": "domain1" + }"#; + assert!(BlobCacheEntry::from_str(content).is_ok()); + let content = r#"{ + "type": "foobar", + "id": "blob1", + "config": { + "id": "cache1", + "backend_type": "foobar", + "backend_config": {}, + "cache_type": "foobar", + "cache_config": {}, + "metadata_path": "/tmp/metadata1" + }, + "domain_id": "domain1" + }"#; + assert!(BlobCacheEntry::from_str(content).is_err()); + + let content = r#"foobar"#; + assert!(BlobCacheEntry::from_str(content).is_err()); + } + + #[test] + fn test_default_value() { + assert!(default_true()); + assert_eq!(default_failure_limit(), 5); + assert_eq!(default_prefetch_batch_size(), 1024 * 1024); + assert_eq!(default_prefetch_threads_count(), 8); + } + + #[test] + fn test_backend_config_try_from() { + let config = BackendConfig { + backend_type: "localdisk".to_string(), + backend_config: serde_json::to_value(LocalDiskConfig::default()).unwrap(), + }; + assert!(BackendConfigV2::try_from(&config).is_ok()); + + let config = BackendConfig { + backend_type: "localfs".to_string(), + backend_config: serde_json::to_value(LocalFsConfig::default()).unwrap(), + }; + assert!(BackendConfigV2::try_from(&config).is_ok()); + + let config = BackendConfig { + backend_type: "oss".to_string(), + backend_config: serde_json::to_value(OssConfig::default()).unwrap(), + }; + assert!(BackendConfigV2::try_from(&config).is_ok()); + + let config = BackendConfig { + backend_type: "s3".to_string(), + backend_config: serde_json::to_value(S3Config::default()).unwrap(), + }; + assert!(BackendConfigV2::try_from(&config).is_ok()); + + let config = BackendConfig { + backend_type: "registry".to_string(), + backend_config: serde_json::to_value(RegistryConfig::default()).unwrap(), + }; + assert!(BackendConfigV2::try_from(&config).is_ok()); + + let config = BackendConfig { + backend_type: "foobar".to_string(), + backend_config: serde_json::to_value(LocalDiskConfig::default()).unwrap(), + }; + assert!(BackendConfigV2::try_from(&config).is_err()); + } +} diff --git a/api/src/error.rs b/api/src/error.rs index 277c0cc00ba..199b606f2f8 100644 --- a/api/src/error.rs +++ b/api/src/error.rs @@ -1,104 +1,104 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::fmt::Debug; - -/// Display error messages with line number, file path and optional backtrace. -pub fn make_error( - err: std::io::Error, - _raw: impl Debug, - _file: &str, - _line: u32, -) -> std::io::Error { - #[cfg(feature = "error-backtrace")] - { - if let Ok(val) = std::env::var("RUST_BACKTRACE") { - if val.trim() != "0" { - error!("Stack:\n{:?}", backtrace::Backtrace::new()); - error!("Error:\n\t{:?}\n\tat {}:{}", _raw, _file, _line); - return err; - } - } - error!( - "Error:\n\t{:?}\n\tat {}:{}\n\tnote: enable `RUST_BACKTRACE=1` env to display a backtrace", - _raw, _file, _line - ); - } - err -} - -/// Define error macro like `x!()` or `x!(err)`. -/// Note: The `x!()` macro will convert any origin error (Os, Simple, Custom) to Custom error. -macro_rules! define_error_macro { - ($fn:ident, $err:expr) => { - #[macro_export] - macro_rules! $fn { - () => { - std::io::Error::new($err.kind(), format!("{}: {}:{}", $err, file!(), line!())) - }; - ($raw:expr) => { - $crate::error::make_error($err, &$raw, file!(), line!()) - }; - } - }; -} - -/// Define error macro for libc error codes -macro_rules! define_libc_error_macro { - ($fn:ident, $code:ident) => { - define_error_macro!($fn, std::io::Error::from_raw_os_error(libc::$code)); - }; -} - -// TODO: Add format string support -// Add more libc error macro here if necessary -define_libc_error_macro!(einval, EINVAL); -define_libc_error_macro!(enoent, ENOENT); -define_libc_error_macro!(ebadf, EBADF); -define_libc_error_macro!(eacces, EACCES); -define_libc_error_macro!(enotdir, ENOTDIR); -define_libc_error_macro!(eisdir, EISDIR); -define_libc_error_macro!(ealready, EALREADY); -define_libc_error_macro!(enosys, ENOSYS); -define_libc_error_macro!(epipe, EPIPE); -define_libc_error_macro!(eio, EIO); - -/// Return EINVAL error with formatted error message. -#[macro_export] -macro_rules! bail_einval { - ($($arg:tt)*) => {{ - return Err(einval!(format!($($arg)*))) - }} -} - -/// Return EIO error with formatted error message. -#[macro_export] -macro_rules! bail_eio { - ($($arg:tt)*) => {{ - return Err(eio!(format!($($arg)*))) - }} -} - -// Add more custom error macro here if necessary -define_error_macro!(last_error, std::io::Error::last_os_error()); -define_error_macro!(eother, std::io::Error::new(std::io::ErrorKind::Other, "")); - -#[cfg(test)] -mod tests { - fn check_size(size: usize) -> std::io::Result<()> { - if size > 0x1000 { - return Err(einval!()); - } - - Ok(()) - } - - #[test] - fn test_einval() { - assert_eq!( - check_size(0x2000).unwrap_err().kind(), - std::io::Error::from_raw_os_error(libc::EINVAL).kind() - ); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::fmt::Debug; + +/// Display error messages with line number, file path and optional backtrace. +pub fn make_error( + err: std::io::Error, + _raw: impl Debug, + _file: &str, + _line: u32, +) -> std::io::Error { + #[cfg(feature = "error-backtrace")] + { + if let Ok(val) = std::env::var("RUST_BACKTRACE") { + if val.trim() != "0" { + error!("Stack:\n{:?}", backtrace::Backtrace::new()); + error!("Error:\n\t{:?}\n\tat {}:{}", _raw, _file, _line); + return err; + } + } + error!( + "Error:\n\t{:?}\n\tat {}:{}\n\tnote: enable `RUST_BACKTRACE=1` env to display a backtrace", + _raw, _file, _line + ); + } + err +} + +/// Define error macro like `x!()` or `x!(err)`. +/// Note: The `x!()` macro will convert any origin error (Os, Simple, Custom) to Custom error. +macro_rules! define_error_macro { + ($fn:ident, $err:expr) => { + #[macro_export] + macro_rules! $fn { + () => { + std::io::Error::new($err.kind(), format!("{}: {}:{}", $err, file!(), line!())) + }; + ($raw:expr) => { + $crate::error::make_error($err, &$raw, file!(), line!()) + }; + } + }; +} + +/// Define error macro for libc error codes +macro_rules! define_libc_error_macro { + ($fn:ident, $code:ident) => { + define_error_macro!($fn, std::io::Error::from_raw_os_error(libc::$code)); + }; +} + +// TODO: Add format string support +// Add more libc error macro here if necessary +define_libc_error_macro!(einval, EINVAL); +define_libc_error_macro!(enoent, ENOENT); +define_libc_error_macro!(ebadf, EBADF); +define_libc_error_macro!(eacces, EACCES); +define_libc_error_macro!(enotdir, ENOTDIR); +define_libc_error_macro!(eisdir, EISDIR); +define_libc_error_macro!(ealready, EALREADY); +define_libc_error_macro!(enosys, ENOSYS); +define_libc_error_macro!(epipe, EPIPE); +define_libc_error_macro!(eio, EIO); + +/// Return EINVAL error with formatted error message. +#[macro_export] +macro_rules! bail_einval { + ($($arg:tt)*) => {{ + return Err(einval!(format!($($arg)*))) + }} +} + +/// Return EIO error with formatted error message. +#[macro_export] +macro_rules! bail_eio { + ($($arg:tt)*) => {{ + return Err(eio!(format!($($arg)*))) + }} +} + +// Add more custom error macro here if necessary +define_error_macro!(last_error, std::io::Error::last_os_error()); +define_error_macro!(eother, std::io::Error::new(std::io::ErrorKind::Other, "")); + +#[cfg(test)] +mod tests { + fn check_size(size: usize) -> std::io::Result<()> { + if size > 0x1000 { + return Err(einval!()); + } + + Ok(()) + } + + #[test] + fn test_einval() { + assert_eq!( + check_size(0x2000).unwrap_err().kind(), + std::io::Error::from_raw_os_error(libc::EINVAL).kind() + ); + } +} diff --git a/api/src/http.rs b/api/src/http.rs index 71d66f0bd81..557b53bf619 100644 --- a/api/src/http.rs +++ b/api/src/http.rs @@ -1,271 +1,271 @@ -// Copyright 2022 Alibaba Cloud. All rights reserved. -// Copyright 2020 Ant Group. All rights reserved. -// Copyright © 2019 Intel Corporation -// -// SPDX-License-Identifier: Apache-2.0 - -use std::io; -use std::sync::mpsc::{RecvError, SendError}; - -use serde::Deserialize; -use serde_json::Error as SerdeError; -use thiserror::Error; - -use crate::BlobCacheEntry; - -/// Errors related to Metrics. -#[derive(Error, Debug)] -pub enum MetricsError { - #[error("no counter found for the metric")] - NoCounter, - #[error("failed to serialize metric: {0:?}")] - Serialize(#[source] SerdeError), -} - -/// Mount a filesystem. -#[derive(Clone, Deserialize, Debug)] -pub struct ApiMountCmd { - /// Path to source of the filesystem. - pub source: String, - /// Type of filesystem. - #[serde(default)] - pub fs_type: String, - /// Configuration for the filesystem. - pub config: String, - /// List of files to prefetch. - #[serde(default)] - pub prefetch_files: Option>, -} - -/// Umount a mounted filesystem. -#[derive(Clone, Deserialize, Debug)] -pub struct ApiUmountCmd { - /// Path of mountpoint. - pub mountpoint: String, -} - -/// Set/update daemon configuration. -#[derive(Clone, Deserialize, Debug)] -pub struct DaemonConf { - /// Logging level: Off, Error, Warn, Info, Debug, Trace. - pub log_level: String, -} - -/// Identifier for cached blob objects. -/// -/// Domains are used to control the blob sharing scope. All blobs associated with the same domain -/// will be shared/reused, but blobs associated with different domains are isolated. -#[derive(Clone, Debug, Default, Deserialize, Serialize)] -pub struct BlobCacheObjectId { - /// Domain identifier for the object. - #[serde(default)] - pub domain_id: String, - /// Blob identifier for the object. - #[serde(default)] - pub blob_id: String, -} - -#[derive(Debug)] -pub enum ApiRequest { - /// Set daemon configuration. - ConfigureDaemon(DaemonConf), - /// Get daemon information. - GetDaemonInfo, - /// Get daemon global events. - GetEvents, - /// Stop the daemon. - Exit, - /// Start the daemon. - Start, - /// Send fuse fd to new daemon. - SendFuseFd, - /// Take over fuse fd from old daemon instance. - TakeoverFuseFd, - - // Filesystem Related - /// Mount a filesystem. - Mount(String, ApiMountCmd), - /// Remount a filesystem. - Remount(String, ApiMountCmd), - /// Unmount a filesystem. - Umount(String), - - /// Get storage backend metrics. - ExportBackendMetrics(Option), - /// Get blob cache metrics. - ExportBlobcacheMetrics(Option), - - // Nydus API v1 requests - /// Get filesystem global metrics. - ExportFsGlobalMetrics(Option), - /// Get filesystem access pattern log. - ExportFsAccessPatterns(Option), - /// Get filesystem backend information. - ExportFsBackendInfo(String), - /// Get filesystem file metrics. - ExportFsFilesMetrics(Option, bool), - /// Get information about filesystem inflight requests. - ExportFsInflightMetrics, - - // Nydus API v2 - /// Get daemon information excluding filesystem backends. - GetDaemonInfoV2, - /// Create a blob cache entry - CreateBlobObject(Box), - /// Get information about blob cache entries - GetBlobObject(BlobCacheObjectId), - /// Delete a blob cache entry - DeleteBlobObject(BlobCacheObjectId), - /// Delete a blob cache file - DeleteBlobFile(String), -} - -/// Kinds for daemon related error messages. -#[derive(Debug)] -pub enum DaemonErrorKind { - /// Service not ready yet. - NotReady, - /// Generic errors. - Other(String), - /// Message serialization/deserialization related errors. - Serde(SerdeError), - /// Unexpected event type. - UnexpectedEvent(String), - /// Can't upgrade the daemon. - UpgradeManager(String), - /// Unsupported requests. - Unsupported, -} - -/// Kinds for metrics related error messages. -#[derive(Debug)] -pub enum MetricsErrorKind { - /// Generic daemon related errors. - Daemon(DaemonErrorKind), - /// Errors related to metrics implementation. - Stats(MetricsError), -} - -#[derive(Error, Debug)] -#[allow(clippy::large_enum_variant)] -pub enum ApiError { - #[error("daemon internal error: {0:?}")] - DaemonAbnormal(DaemonErrorKind), - #[error("daemon events error: {0}")] - Events(String), - #[error("metrics error: {0:?}")] - Metrics(MetricsErrorKind), - #[error("failed to mount filesystem: {0:?}")] - MountFilesystem(DaemonErrorKind), - #[error("failed to send request to the API service: {0:?}")] - RequestSend(#[from] SendError>), - #[error("failed to parse response payload type")] - ResponsePayloadType, - #[error("failed to receive response from the API service: {0:?}")] - ResponseRecv(#[from] RecvError), - #[error("failed to wake up the daemon: {0:?}")] - Wakeup(#[source] io::Error), -} - -/// Specialized `std::result::Result` for API replies. -pub type ApiResult = std::result::Result; - -#[derive(Serialize)] -pub enum ApiResponsePayload { - /// Filesystem backend metrics. - BackendMetrics(String), - /// Blobcache metrics. - BlobcacheMetrics(String), - /// Daemon version, configuration and status information in json. - DaemonInfo(String), - /// No data is sent on the channel. - Empty, - /// Global error events. - Events(String), - - /// Filesystem global metrics, v1. - FsGlobalMetrics(String), - /// Filesystem per-file metrics, v1. - FsFilesMetrics(String), - /// Filesystem access pattern trace log, v1. - FsFilesPatterns(String), - // Filesystem Backend Information, v1. - FsBackendInfo(String), - // Filesystem Inflight Requests, v1. - FsInflightMetrics(String), - - /// List of blob objects, v2 - BlobObjectList(String), -} - -/// Specialized version of [`std::result::Result`] for value returned by backend services. -pub type ApiResponse = std::result::Result; - -/// HTTP error messages sent back to the clients. -/// -/// The `HttpError` object will be sent back to client with `format!("{:?}", http_error)`. -/// So unfortunately it implicitly becomes parts of the API, please keep it stable. -#[derive(Debug)] -pub enum HttpError { - // Daemon common related errors - /// Invalid HTTP request - BadRequest, - /// Failed to configure the daemon. - Configure(ApiError), - /// Failed to query information about daemon. - DaemonInfo(ApiError), - /// Failed to query global events. - Events(ApiError), - /// No handler registered for HTTP request URI - NoRoute, - /// Failed to parse HTTP request message body - ParseBody(SerdeError), - /// Query parameter is missed from the HTTP request. - QueryString(String), - - /// Failed to mount filesystem. - Mount(ApiError), - /// Failed to remount filesystem. - Upgrade(ApiError), - - // Metrics related errors - /// Failed to get backend metrics. - BackendMetrics(ApiError), - /// Failed to get blobcache metrics. - BlobcacheMetrics(ApiError), - - // Filesystem related errors (v1) - /// Failed to get filesystem backend information - FsBackendInfo(ApiError), - /// Failed to get filesystem per-file metrics. - FsFilesMetrics(ApiError), - /// Failed to get global metrics. - GlobalMetrics(ApiError), - /// Failed to get information about inflight request - InflightMetrics(ApiError), - /// Failed to get filesystem file access trace. - Pattern(ApiError), - - // Blob cache management related errors (v2) - /// Failed to create blob object - CreateBlobObject(ApiError), - /// Failed to delete blob object - DeleteBlobObject(ApiError), - /// Failed to delete blob file - DeleteBlobFile(ApiError), - /// Failed to list existing blob objects - GetBlobObjects(ApiError), -} - -#[derive(Serialize, Debug)] -pub(crate) struct ErrorMessage { - pub code: String, - pub message: String, -} - -impl From for Vec { - fn from(msg: ErrorMessage) -> Self { - // Safe to unwrap since `ErrorMessage` must succeed in serialization - serde_json::to_vec(&msg).unwrap() - } -} +// Copyright 2022 Alibaba Cloud. All rights reserved. +// Copyright 2020 Ant Group. All rights reserved. +// Copyright © 2019 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +use std::io; +use std::sync::mpsc::{RecvError, SendError}; + +use serde::Deserialize; +use serde_json::Error as SerdeError; +use thiserror::Error; + +use crate::BlobCacheEntry; + +/// Errors related to Metrics. +#[derive(Error, Debug)] +pub enum MetricsError { + #[error("no counter found for the metric")] + NoCounter, + #[error("failed to serialize metric: {0:?}")] + Serialize(#[source] SerdeError), +} + +/// Mount a filesystem. +#[derive(Clone, Deserialize, Debug)] +pub struct ApiMountCmd { + /// Path to source of the filesystem. + pub source: String, + /// Type of filesystem. + #[serde(default)] + pub fs_type: String, + /// Configuration for the filesystem. + pub config: String, + /// List of files to prefetch. + #[serde(default)] + pub prefetch_files: Option>, +} + +/// Umount a mounted filesystem. +#[derive(Clone, Deserialize, Debug)] +pub struct ApiUmountCmd { + /// Path of mountpoint. + pub mountpoint: String, +} + +/// Set/update daemon configuration. +#[derive(Clone, Deserialize, Debug)] +pub struct DaemonConf { + /// Logging level: Off, Error, Warn, Info, Debug, Trace. + pub log_level: String, +} + +/// Identifier for cached blob objects. +/// +/// Domains are used to control the blob sharing scope. All blobs associated with the same domain +/// will be shared/reused, but blobs associated with different domains are isolated. +#[derive(Clone, Debug, Default, Deserialize, Serialize)] +pub struct BlobCacheObjectId { + /// Domain identifier for the object. + #[serde(default)] + pub domain_id: String, + /// Blob identifier for the object. + #[serde(default)] + pub blob_id: String, +} + +#[derive(Debug)] +pub enum ApiRequest { + /// Set daemon configuration. + ConfigureDaemon(DaemonConf), + /// Get daemon information. + GetDaemonInfo, + /// Get daemon global events. + GetEvents, + /// Stop the daemon. + Exit, + /// Start the daemon. + Start, + /// Send fuse fd to new daemon. + SendFuseFd, + /// Take over fuse fd from old daemon instance. + TakeoverFuseFd, + + // Filesystem Related + /// Mount a filesystem. + Mount(String, ApiMountCmd), + /// Remount a filesystem. + Remount(String, ApiMountCmd), + /// Unmount a filesystem. + Umount(String), + + /// Get storage backend metrics. + ExportBackendMetrics(Option), + /// Get blob cache metrics. + ExportBlobcacheMetrics(Option), + + // Nydus API v1 requests + /// Get filesystem global metrics. + ExportFsGlobalMetrics(Option), + /// Get filesystem access pattern log. + ExportFsAccessPatterns(Option), + /// Get filesystem backend information. + ExportFsBackendInfo(String), + /// Get filesystem file metrics. + ExportFsFilesMetrics(Option, bool), + /// Get information about filesystem inflight requests. + ExportFsInflightMetrics, + + // Nydus API v2 + /// Get daemon information excluding filesystem backends. + GetDaemonInfoV2, + /// Create a blob cache entry + CreateBlobObject(Box), + /// Get information about blob cache entries + GetBlobObject(BlobCacheObjectId), + /// Delete a blob cache entry + DeleteBlobObject(BlobCacheObjectId), + /// Delete a blob cache file + DeleteBlobFile(String), +} + +/// Kinds for daemon related error messages. +#[derive(Debug)] +pub enum DaemonErrorKind { + /// Service not ready yet. + NotReady, + /// Generic errors. + Other(String), + /// Message serialization/deserialization related errors. + Serde(SerdeError), + /// Unexpected event type. + UnexpectedEvent(String), + /// Can't upgrade the daemon. + UpgradeManager(String), + /// Unsupported requests. + Unsupported, +} + +/// Kinds for metrics related error messages. +#[derive(Debug)] +pub enum MetricsErrorKind { + /// Generic daemon related errors. + Daemon(DaemonErrorKind), + /// Errors related to metrics implementation. + Stats(MetricsError), +} + +#[derive(Error, Debug)] +#[allow(clippy::large_enum_variant)] +pub enum ApiError { + #[error("daemon internal error: {0:?}")] + DaemonAbnormal(DaemonErrorKind), + #[error("daemon events error: {0}")] + Events(String), + #[error("metrics error: {0:?}")] + Metrics(MetricsErrorKind), + #[error("failed to mount filesystem: {0:?}")] + MountFilesystem(DaemonErrorKind), + #[error("failed to send request to the API service: {0:?}")] + RequestSend(#[from] SendError>), + #[error("failed to parse response payload type")] + ResponsePayloadType, + #[error("failed to receive response from the API service: {0:?}")] + ResponseRecv(#[from] RecvError), + #[error("failed to wake up the daemon: {0:?}")] + Wakeup(#[source] io::Error), +} + +/// Specialized `std::result::Result` for API replies. +pub type ApiResult = std::result::Result; + +#[derive(Serialize)] +pub enum ApiResponsePayload { + /// Filesystem backend metrics. + BackendMetrics(String), + /// Blobcache metrics. + BlobcacheMetrics(String), + /// Daemon version, configuration and status information in json. + DaemonInfo(String), + /// No data is sent on the channel. + Empty, + /// Global error events. + Events(String), + + /// Filesystem global metrics, v1. + FsGlobalMetrics(String), + /// Filesystem per-file metrics, v1. + FsFilesMetrics(String), + /// Filesystem access pattern trace log, v1. + FsFilesPatterns(String), + // Filesystem Backend Information, v1. + FsBackendInfo(String), + // Filesystem Inflight Requests, v1. + FsInflightMetrics(String), + + /// List of blob objects, v2 + BlobObjectList(String), +} + +/// Specialized version of [`std::result::Result`] for value returned by backend services. +pub type ApiResponse = std::result::Result; + +/// HTTP error messages sent back to the clients. +/// +/// The `HttpError` object will be sent back to client with `format!("{:?}", http_error)`. +/// So unfortunately it implicitly becomes parts of the API, please keep it stable. +#[derive(Debug)] +pub enum HttpError { + // Daemon common related errors + /// Invalid HTTP request + BadRequest, + /// Failed to configure the daemon. + Configure(ApiError), + /// Failed to query information about daemon. + DaemonInfo(ApiError), + /// Failed to query global events. + Events(ApiError), + /// No handler registered for HTTP request URI + NoRoute, + /// Failed to parse HTTP request message body + ParseBody(SerdeError), + /// Query parameter is missed from the HTTP request. + QueryString(String), + + /// Failed to mount filesystem. + Mount(ApiError), + /// Failed to remount filesystem. + Upgrade(ApiError), + + // Metrics related errors + /// Failed to get backend metrics. + BackendMetrics(ApiError), + /// Failed to get blobcache metrics. + BlobcacheMetrics(ApiError), + + // Filesystem related errors (v1) + /// Failed to get filesystem backend information + FsBackendInfo(ApiError), + /// Failed to get filesystem per-file metrics. + FsFilesMetrics(ApiError), + /// Failed to get global metrics. + GlobalMetrics(ApiError), + /// Failed to get information about inflight request + InflightMetrics(ApiError), + /// Failed to get filesystem file access trace. + Pattern(ApiError), + + // Blob cache management related errors (v2) + /// Failed to create blob object + CreateBlobObject(ApiError), + /// Failed to delete blob object + DeleteBlobObject(ApiError), + /// Failed to delete blob file + DeleteBlobFile(ApiError), + /// Failed to list existing blob objects + GetBlobObjects(ApiError), +} + +#[derive(Serialize, Debug)] +pub(crate) struct ErrorMessage { + pub code: String, + pub message: String, +} + +impl From for Vec { + fn from(msg: ErrorMessage) -> Self { + // Safe to unwrap since `ErrorMessage` must succeed in serialization + serde_json::to_vec(&msg).unwrap() + } +} diff --git a/api/src/http_endpoint_common.rs b/api/src/http_endpoint_common.rs index 24ee21744f6..4f47ff6853b 100644 --- a/api/src/http_endpoint_common.rs +++ b/api/src/http_endpoint_common.rs @@ -1,197 +1,197 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright © 2019 Intel Corporation -// -// SPDX-License-Identifier: Apache-2.0 - -use dbs_uhttp::{Method, Request, Response}; - -use crate::http::{ApiError, ApiRequest, ApiResponse, ApiResponsePayload, HttpError}; -use crate::http_handler::{ - error_response, extract_query_part, parse_body, success_response, translate_status_code, - EndpointHandler, HttpResult, -}; - -// Convert an ApiResponse to a HTTP response. -// -// API server has successfully processed the request, but can't fulfill that. Therefore, -// a `error_response` is generated whose status code is 4XX or 5XX. With error response, -// it still returns Ok(error_response) to http request handling framework, which means -// nydusd api server receives the request and try handle it, even the request can't be fulfilled. -fn convert_to_response HttpError>(api_resp: ApiResponse, op: O) -> Response { - match api_resp { - Ok(r) => { - use ApiResponsePayload::*; - match r { - Empty => success_response(None), - Events(d) => success_response(Some(d)), - BackendMetrics(d) => success_response(Some(d)), - BlobcacheMetrics(d) => success_response(Some(d)), - _ => panic!("Unexpected response message from API service"), - } - } - Err(e) => { - let status_code = translate_status_code(&e); - error_response(op(e), status_code) - } - } -} -// Global daemon control requests. -/// Start the daemon. -pub struct StartHandler {} -impl EndpointHandler for StartHandler { - fn handle_request( - &self, - req: &Request, - kicker: &dyn Fn(ApiRequest) -> ApiResponse, - ) -> HttpResult { - match (req.method(), req.body.as_ref()) { - (Method::Put, None) => { - let r = kicker(ApiRequest::Start); - Ok(convert_to_response(r, HttpError::Configure)) - } - _ => Err(HttpError::BadRequest), - } - } -} - -/// Stop the daemon. -pub struct ExitHandler {} -impl EndpointHandler for ExitHandler { - fn handle_request( - &self, - req: &Request, - kicker: &dyn Fn(ApiRequest) -> ApiResponse, - ) -> HttpResult { - match (req.method(), req.body.as_ref()) { - (Method::Put, None) => { - let r = kicker(ApiRequest::Exit); - Ok(convert_to_response(r, HttpError::Configure)) - } - _ => Err(HttpError::BadRequest), - } - } -} - -/// Get daemon global events. -pub struct EventsHandler {} -impl EndpointHandler for EventsHandler { - fn handle_request( - &self, - req: &Request, - kicker: &dyn Fn(ApiRequest) -> ApiResponse, - ) -> HttpResult { - match (req.method(), req.body.as_ref()) { - (Method::Get, None) => { - let r = kicker(ApiRequest::GetEvents); - Ok(convert_to_response(r, HttpError::Events)) - } - _ => Err(HttpError::BadRequest), - } - } -} - -// Metrics related requests. -/// Get storage backend metrics. -pub struct MetricsBackendHandler {} -impl EndpointHandler for MetricsBackendHandler { - fn handle_request( - &self, - req: &Request, - kicker: &dyn Fn(ApiRequest) -> ApiResponse, - ) -> HttpResult { - match (req.method(), req.body.as_ref()) { - (Method::Get, None) => { - let id = extract_query_part(req, "id"); - let r = kicker(ApiRequest::ExportBackendMetrics(id)); - Ok(convert_to_response(r, HttpError::BackendMetrics)) - } - _ => Err(HttpError::BadRequest), - } - } -} - -/// Get blob cache metrics. -pub struct MetricsBlobcacheHandler {} -impl EndpointHandler for MetricsBlobcacheHandler { - fn handle_request( - &self, - req: &Request, - kicker: &dyn Fn(ApiRequest) -> ApiResponse, - ) -> HttpResult { - match (req.method(), req.body.as_ref()) { - (Method::Get, None) => { - let id = extract_query_part(req, "id"); - let r = kicker(ApiRequest::ExportBlobcacheMetrics(id)); - Ok(convert_to_response(r, HttpError::BlobcacheMetrics)) - } - _ => Err(HttpError::BadRequest), - } - } -} - -/// Mount a filesystem. -pub struct MountHandler {} -impl EndpointHandler for MountHandler { - fn handle_request( - &self, - req: &Request, - kicker: &dyn Fn(ApiRequest) -> ApiResponse, - ) -> HttpResult { - let mountpoint = extract_query_part(req, "mountpoint").ok_or_else(|| { - HttpError::QueryString("'mountpoint' should be specified in query string".to_string()) - })?; - match (req.method(), req.body.as_ref()) { - (Method::Post, Some(body)) => { - let cmd = parse_body(body)?; - let r = kicker(ApiRequest::Mount(mountpoint, cmd)); - Ok(convert_to_response(r, HttpError::Mount)) - } - (Method::Put, Some(body)) => { - let cmd = parse_body(body)?; - let r = kicker(ApiRequest::Remount(mountpoint, cmd)); - Ok(convert_to_response(r, HttpError::Mount)) - } - (Method::Delete, None) => { - let r = kicker(ApiRequest::Umount(mountpoint)); - Ok(convert_to_response(r, HttpError::Mount)) - } - _ => Err(HttpError::BadRequest), - } - } -} - -/// Send fuse fd to new daemon. -pub struct SendFuseFdHandler {} -impl EndpointHandler for SendFuseFdHandler { - fn handle_request( - &self, - req: &Request, - kicker: &dyn Fn(ApiRequest) -> ApiResponse, - ) -> HttpResult { - match (req.method(), req.body.as_ref()) { - (Method::Put, None) => { - let r = kicker(ApiRequest::SendFuseFd); - Ok(convert_to_response(r, HttpError::Upgrade)) - } - _ => Err(HttpError::BadRequest), - } - } -} - -/// Take over fuse fd from old daemon instance. -pub struct TakeoverFuseFdHandler {} -impl EndpointHandler for TakeoverFuseFdHandler { - fn handle_request( - &self, - req: &Request, - kicker: &dyn Fn(ApiRequest) -> ApiResponse, - ) -> HttpResult { - match (req.method(), req.body.as_ref()) { - (Method::Put, None) => { - let r = kicker(ApiRequest::TakeoverFuseFd); - Ok(convert_to_response(r, HttpError::Upgrade)) - } - _ => Err(HttpError::BadRequest), - } - } -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright © 2019 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +use dbs_uhttp::{Method, Request, Response}; + +use crate::http::{ApiError, ApiRequest, ApiResponse, ApiResponsePayload, HttpError}; +use crate::http_handler::{ + error_response, extract_query_part, parse_body, success_response, translate_status_code, + EndpointHandler, HttpResult, +}; + +// Convert an ApiResponse to a HTTP response. +// +// API server has successfully processed the request, but can't fulfill that. Therefore, +// a `error_response` is generated whose status code is 4XX or 5XX. With error response, +// it still returns Ok(error_response) to http request handling framework, which means +// nydusd api server receives the request and try handle it, even the request can't be fulfilled. +fn convert_to_response HttpError>(api_resp: ApiResponse, op: O) -> Response { + match api_resp { + Ok(r) => { + use ApiResponsePayload::*; + match r { + Empty => success_response(None), + Events(d) => success_response(Some(d)), + BackendMetrics(d) => success_response(Some(d)), + BlobcacheMetrics(d) => success_response(Some(d)), + _ => panic!("Unexpected response message from API service"), + } + } + Err(e) => { + let status_code = translate_status_code(&e); + error_response(op(e), status_code) + } + } +} +// Global daemon control requests. +/// Start the daemon. +pub struct StartHandler {} +impl EndpointHandler for StartHandler { + fn handle_request( + &self, + req: &Request, + kicker: &dyn Fn(ApiRequest) -> ApiResponse, + ) -> HttpResult { + match (req.method(), req.body.as_ref()) { + (Method::Put, None) => { + let r = kicker(ApiRequest::Start); + Ok(convert_to_response(r, HttpError::Configure)) + } + _ => Err(HttpError::BadRequest), + } + } +} + +/// Stop the daemon. +pub struct ExitHandler {} +impl EndpointHandler for ExitHandler { + fn handle_request( + &self, + req: &Request, + kicker: &dyn Fn(ApiRequest) -> ApiResponse, + ) -> HttpResult { + match (req.method(), req.body.as_ref()) { + (Method::Put, None) => { + let r = kicker(ApiRequest::Exit); + Ok(convert_to_response(r, HttpError::Configure)) + } + _ => Err(HttpError::BadRequest), + } + } +} + +/// Get daemon global events. +pub struct EventsHandler {} +impl EndpointHandler for EventsHandler { + fn handle_request( + &self, + req: &Request, + kicker: &dyn Fn(ApiRequest) -> ApiResponse, + ) -> HttpResult { + match (req.method(), req.body.as_ref()) { + (Method::Get, None) => { + let r = kicker(ApiRequest::GetEvents); + Ok(convert_to_response(r, HttpError::Events)) + } + _ => Err(HttpError::BadRequest), + } + } +} + +// Metrics related requests. +/// Get storage backend metrics. +pub struct MetricsBackendHandler {} +impl EndpointHandler for MetricsBackendHandler { + fn handle_request( + &self, + req: &Request, + kicker: &dyn Fn(ApiRequest) -> ApiResponse, + ) -> HttpResult { + match (req.method(), req.body.as_ref()) { + (Method::Get, None) => { + let id = extract_query_part(req, "id"); + let r = kicker(ApiRequest::ExportBackendMetrics(id)); + Ok(convert_to_response(r, HttpError::BackendMetrics)) + } + _ => Err(HttpError::BadRequest), + } + } +} + +/// Get blob cache metrics. +pub struct MetricsBlobcacheHandler {} +impl EndpointHandler for MetricsBlobcacheHandler { + fn handle_request( + &self, + req: &Request, + kicker: &dyn Fn(ApiRequest) -> ApiResponse, + ) -> HttpResult { + match (req.method(), req.body.as_ref()) { + (Method::Get, None) => { + let id = extract_query_part(req, "id"); + let r = kicker(ApiRequest::ExportBlobcacheMetrics(id)); + Ok(convert_to_response(r, HttpError::BlobcacheMetrics)) + } + _ => Err(HttpError::BadRequest), + } + } +} + +/// Mount a filesystem. +pub struct MountHandler {} +impl EndpointHandler for MountHandler { + fn handle_request( + &self, + req: &Request, + kicker: &dyn Fn(ApiRequest) -> ApiResponse, + ) -> HttpResult { + let mountpoint = extract_query_part(req, "mountpoint").ok_or_else(|| { + HttpError::QueryString("'mountpoint' should be specified in query string".to_string()) + })?; + match (req.method(), req.body.as_ref()) { + (Method::Post, Some(body)) => { + let cmd = parse_body(body)?; + let r = kicker(ApiRequest::Mount(mountpoint, cmd)); + Ok(convert_to_response(r, HttpError::Mount)) + } + (Method::Put, Some(body)) => { + let cmd = parse_body(body)?; + let r = kicker(ApiRequest::Remount(mountpoint, cmd)); + Ok(convert_to_response(r, HttpError::Mount)) + } + (Method::Delete, None) => { + let r = kicker(ApiRequest::Umount(mountpoint)); + Ok(convert_to_response(r, HttpError::Mount)) + } + _ => Err(HttpError::BadRequest), + } + } +} + +/// Send fuse fd to new daemon. +pub struct SendFuseFdHandler {} +impl EndpointHandler for SendFuseFdHandler { + fn handle_request( + &self, + req: &Request, + kicker: &dyn Fn(ApiRequest) -> ApiResponse, + ) -> HttpResult { + match (req.method(), req.body.as_ref()) { + (Method::Put, None) => { + let r = kicker(ApiRequest::SendFuseFd); + Ok(convert_to_response(r, HttpError::Upgrade)) + } + _ => Err(HttpError::BadRequest), + } + } +} + +/// Take over fuse fd from old daemon instance. +pub struct TakeoverFuseFdHandler {} +impl EndpointHandler for TakeoverFuseFdHandler { + fn handle_request( + &self, + req: &Request, + kicker: &dyn Fn(ApiRequest) -> ApiResponse, + ) -> HttpResult { + match (req.method(), req.body.as_ref()) { + (Method::Put, None) => { + let r = kicker(ApiRequest::TakeoverFuseFd); + Ok(convert_to_response(r, HttpError::Upgrade)) + } + _ => Err(HttpError::BadRequest), + } + } +} diff --git a/api/src/http_endpoint_v1.rs b/api/src/http_endpoint_v1.rs index bf7ef7496d4..3adff648f12 100644 --- a/api/src/http_endpoint_v1.rs +++ b/api/src/http_endpoint_v1.rs @@ -1,168 +1,168 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright © 2019 Intel Corporation -// -// SPDX-License-Identifier: Apache-2.0 - -//! Nydus API v1. - -use dbs_uhttp::{Method, Request, Response}; - -use crate::http::{ApiError, ApiRequest, ApiResponse, ApiResponsePayload, HttpError}; -use crate::http_handler::{ - error_response, extract_query_part, parse_body, success_response, translate_status_code, - EndpointHandler, HttpResult, -}; - -/// HTTP URI prefix for API v1. -pub const HTTP_ROOT_V1: &str = "/api/v1"; - -// Convert an ApiResponse to a HTTP response. -// -// API server has successfully processed the request, but can't fulfill that. Therefore, -// a `error_response` is generated whose status code is 4XX or 5XX. With error response, -// it still returns Ok(error_response) to http request handling framework, which means -// nydusd api server receives the request and try handle it, even the request can't be fulfilled. -fn convert_to_response HttpError>(api_resp: ApiResponse, op: O) -> Response { - match api_resp { - Ok(r) => { - use ApiResponsePayload::*; - match r { - Empty => success_response(None), - DaemonInfo(d) => success_response(Some(d)), - FsGlobalMetrics(d) => success_response(Some(d)), - FsFilesMetrics(d) => success_response(Some(d)), - FsFilesPatterns(d) => success_response(Some(d)), - FsBackendInfo(d) => success_response(Some(d)), - FsInflightMetrics(d) => success_response(Some(d)), - _ => panic!("Unexpected response message from API service"), - } - } - Err(e) => { - let status_code = translate_status_code(&e); - error_response(op(e), status_code) - } - } -} - -/// Get daemon information and set daemon configuration. -pub struct InfoHandler {} -impl EndpointHandler for InfoHandler { - fn handle_request( - &self, - req: &Request, - kicker: &dyn Fn(ApiRequest) -> ApiResponse, - ) -> HttpResult { - match (req.method(), req.body.as_ref()) { - (Method::Get, None) => { - let r = kicker(ApiRequest::GetDaemonInfo); - Ok(convert_to_response(r, HttpError::DaemonInfo)) - } - (Method::Put, Some(body)) => { - let conf = parse_body(body)?; - let r = kicker(ApiRequest::ConfigureDaemon(conf)); - Ok(convert_to_response(r, HttpError::Configure)) - } - _ => Err(HttpError::BadRequest), - } - } -} - -/// Get filesystem backend information. -pub struct FsBackendInfo {} -impl EndpointHandler for FsBackendInfo { - fn handle_request( - &self, - req: &Request, - kicker: &dyn Fn(ApiRequest) -> ApiResponse, - ) -> HttpResult { - match (req.method(), req.body.as_ref()) { - (Method::Get, None) => { - let mountpoint = extract_query_part(req, "mountpoint").ok_or_else(|| { - HttpError::QueryString( - "'mountpoint' should be specified in query string".to_string(), - ) - })?; - let r = kicker(ApiRequest::ExportFsBackendInfo(mountpoint)); - Ok(convert_to_response(r, HttpError::FsBackendInfo)) - } - _ => Err(HttpError::BadRequest), - } - } -} - -/// Get filesystem global metrics. -pub struct MetricsFsGlobalHandler {} -impl EndpointHandler for MetricsFsGlobalHandler { - fn handle_request( - &self, - req: &Request, - kicker: &dyn Fn(ApiRequest) -> ApiResponse, - ) -> HttpResult { - match (req.method(), req.body.as_ref()) { - (Method::Get, None) => { - let id = extract_query_part(req, "id"); - let r = kicker(ApiRequest::ExportFsGlobalMetrics(id)); - Ok(convert_to_response(r, HttpError::GlobalMetrics)) - } - _ => Err(HttpError::BadRequest), - } - } -} - -/// Get filesystem access pattern log. -pub struct MetricsFsAccessPatternHandler {} -impl EndpointHandler for MetricsFsAccessPatternHandler { - fn handle_request( - &self, - req: &Request, - kicker: &dyn Fn(ApiRequest) -> ApiResponse, - ) -> HttpResult { - match (req.method(), req.body.as_ref()) { - (Method::Get, None) => { - let id = extract_query_part(req, "id"); - let r = kicker(ApiRequest::ExportFsAccessPatterns(id)); - Ok(convert_to_response(r, HttpError::Pattern)) - } - _ => Err(HttpError::BadRequest), - } - } -} - -/// Get filesystem file metrics. -pub struct MetricsFsFilesHandler {} -impl EndpointHandler for MetricsFsFilesHandler { - fn handle_request( - &self, - req: &Request, - kicker: &dyn Fn(ApiRequest) -> ApiResponse, - ) -> HttpResult { - match (req.method(), req.body.as_ref()) { - (Method::Get, None) => { - let id = extract_query_part(req, "id"); - let latest_read_files = extract_query_part(req, "latest") - .map_or(false, |b| b.parse::().unwrap_or(false)); - let r = kicker(ApiRequest::ExportFsFilesMetrics(id, latest_read_files)); - Ok(convert_to_response(r, HttpError::FsFilesMetrics)) - } - _ => Err(HttpError::BadRequest), - } - } -} - -/// Get information about filesystem inflight requests. -pub struct MetricsFsInflightHandler {} -impl EndpointHandler for MetricsFsInflightHandler { - fn handle_request( - &self, - req: &Request, - kicker: &dyn Fn(ApiRequest) -> ApiResponse, - ) -> HttpResult { - match (req.method(), req.body.as_ref()) { - (Method::Get, None) => { - let r = kicker(ApiRequest::ExportFsInflightMetrics); - Ok(convert_to_response(r, HttpError::InflightMetrics)) - } - _ => Err(HttpError::BadRequest), - } - } -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright © 2019 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +//! Nydus API v1. + +use dbs_uhttp::{Method, Request, Response}; + +use crate::http::{ApiError, ApiRequest, ApiResponse, ApiResponsePayload, HttpError}; +use crate::http_handler::{ + error_response, extract_query_part, parse_body, success_response, translate_status_code, + EndpointHandler, HttpResult, +}; + +/// HTTP URI prefix for API v1. +pub const HTTP_ROOT_V1: &str = "/api/v1"; + +// Convert an ApiResponse to a HTTP response. +// +// API server has successfully processed the request, but can't fulfill that. Therefore, +// a `error_response` is generated whose status code is 4XX or 5XX. With error response, +// it still returns Ok(error_response) to http request handling framework, which means +// nydusd api server receives the request and try handle it, even the request can't be fulfilled. +fn convert_to_response HttpError>(api_resp: ApiResponse, op: O) -> Response { + match api_resp { + Ok(r) => { + use ApiResponsePayload::*; + match r { + Empty => success_response(None), + DaemonInfo(d) => success_response(Some(d)), + FsGlobalMetrics(d) => success_response(Some(d)), + FsFilesMetrics(d) => success_response(Some(d)), + FsFilesPatterns(d) => success_response(Some(d)), + FsBackendInfo(d) => success_response(Some(d)), + FsInflightMetrics(d) => success_response(Some(d)), + _ => panic!("Unexpected response message from API service"), + } + } + Err(e) => { + let status_code = translate_status_code(&e); + error_response(op(e), status_code) + } + } +} + +/// Get daemon information and set daemon configuration. +pub struct InfoHandler {} +impl EndpointHandler for InfoHandler { + fn handle_request( + &self, + req: &Request, + kicker: &dyn Fn(ApiRequest) -> ApiResponse, + ) -> HttpResult { + match (req.method(), req.body.as_ref()) { + (Method::Get, None) => { + let r = kicker(ApiRequest::GetDaemonInfo); + Ok(convert_to_response(r, HttpError::DaemonInfo)) + } + (Method::Put, Some(body)) => { + let conf = parse_body(body)?; + let r = kicker(ApiRequest::ConfigureDaemon(conf)); + Ok(convert_to_response(r, HttpError::Configure)) + } + _ => Err(HttpError::BadRequest), + } + } +} + +/// Get filesystem backend information. +pub struct FsBackendInfo {} +impl EndpointHandler for FsBackendInfo { + fn handle_request( + &self, + req: &Request, + kicker: &dyn Fn(ApiRequest) -> ApiResponse, + ) -> HttpResult { + match (req.method(), req.body.as_ref()) { + (Method::Get, None) => { + let mountpoint = extract_query_part(req, "mountpoint").ok_or_else(|| { + HttpError::QueryString( + "'mountpoint' should be specified in query string".to_string(), + ) + })?; + let r = kicker(ApiRequest::ExportFsBackendInfo(mountpoint)); + Ok(convert_to_response(r, HttpError::FsBackendInfo)) + } + _ => Err(HttpError::BadRequest), + } + } +} + +/// Get filesystem global metrics. +pub struct MetricsFsGlobalHandler {} +impl EndpointHandler for MetricsFsGlobalHandler { + fn handle_request( + &self, + req: &Request, + kicker: &dyn Fn(ApiRequest) -> ApiResponse, + ) -> HttpResult { + match (req.method(), req.body.as_ref()) { + (Method::Get, None) => { + let id = extract_query_part(req, "id"); + let r = kicker(ApiRequest::ExportFsGlobalMetrics(id)); + Ok(convert_to_response(r, HttpError::GlobalMetrics)) + } + _ => Err(HttpError::BadRequest), + } + } +} + +/// Get filesystem access pattern log. +pub struct MetricsFsAccessPatternHandler {} +impl EndpointHandler for MetricsFsAccessPatternHandler { + fn handle_request( + &self, + req: &Request, + kicker: &dyn Fn(ApiRequest) -> ApiResponse, + ) -> HttpResult { + match (req.method(), req.body.as_ref()) { + (Method::Get, None) => { + let id = extract_query_part(req, "id"); + let r = kicker(ApiRequest::ExportFsAccessPatterns(id)); + Ok(convert_to_response(r, HttpError::Pattern)) + } + _ => Err(HttpError::BadRequest), + } + } +} + +/// Get filesystem file metrics. +pub struct MetricsFsFilesHandler {} +impl EndpointHandler for MetricsFsFilesHandler { + fn handle_request( + &self, + req: &Request, + kicker: &dyn Fn(ApiRequest) -> ApiResponse, + ) -> HttpResult { + match (req.method(), req.body.as_ref()) { + (Method::Get, None) => { + let id = extract_query_part(req, "id"); + let latest_read_files = extract_query_part(req, "latest") + .map_or(false, |b| b.parse::().unwrap_or(false)); + let r = kicker(ApiRequest::ExportFsFilesMetrics(id, latest_read_files)); + Ok(convert_to_response(r, HttpError::FsFilesMetrics)) + } + _ => Err(HttpError::BadRequest), + } + } +} + +/// Get information about filesystem inflight requests. +pub struct MetricsFsInflightHandler {} +impl EndpointHandler for MetricsFsInflightHandler { + fn handle_request( + &self, + req: &Request, + kicker: &dyn Fn(ApiRequest) -> ApiResponse, + ) -> HttpResult { + match (req.method(), req.body.as_ref()) { + (Method::Get, None) => { + let r = kicker(ApiRequest::ExportFsInflightMetrics); + Ok(convert_to_response(r, HttpError::InflightMetrics)) + } + _ => Err(HttpError::BadRequest), + } + } +} diff --git a/api/src/http_endpoint_v2.rs b/api/src/http_endpoint_v2.rs index 5d14f1cdb81..170179b7765 100644 --- a/api/src/http_endpoint_v2.rs +++ b/api/src/http_endpoint_v2.rs @@ -1,112 +1,112 @@ -// Copyright 2022 Alibaba Cloud. All rights reserved. -// Copyright 2020 Ant Group. All rights reserved. -// Copyright © 2019 Intel Corporation -// -// SPDX-License-Identifier: Apache-2.0 - -//! Nydus API v2. - -use crate::BlobCacheEntry; -use dbs_uhttp::{Method, Request, Response}; - -use crate::http::{ - ApiError, ApiRequest, ApiResponse, ApiResponsePayload, BlobCacheObjectId, HttpError, -}; -use crate::http_handler::{ - error_response, extract_query_part, parse_body, success_response, translate_status_code, - EndpointHandler, HttpResult, -}; - -/// HTTP URI prefix for API v2. -pub const HTTP_ROOT_V2: &str = "/api/v2"; - -// Convert an ApiResponse to a HTTP response. -// -// API server has successfully processed the request, but can't fulfill that. Therefore, -// a `error_response` is generated whose status code is 4XX or 5XX. With error response, -// it still returns Ok(error_response) to http request handling framework, which means -// nydusd api server receives the request and try handle it, even the request can't be fulfilled. -fn convert_to_response HttpError>(api_resp: ApiResponse, op: O) -> Response { - match api_resp { - Ok(r) => { - use ApiResponsePayload::*; - match r { - Empty => success_response(None), - DaemonInfo(d) => success_response(Some(d)), - BlobObjectList(d) => success_response(Some(d)), - _ => panic!("Unexpected response message from API service"), - } - } - Err(e) => { - let status_code = translate_status_code(&e); - error_response(op(e), status_code) - } - } -} - -/// Get daemon information and set daemon configuration. -pub struct InfoV2Handler {} -impl EndpointHandler for InfoV2Handler { - fn handle_request( - &self, - req: &Request, - kicker: &dyn Fn(ApiRequest) -> ApiResponse, - ) -> HttpResult { - match (req.method(), req.body.as_ref()) { - (Method::Get, None) => { - let r = kicker(ApiRequest::GetDaemonInfoV2); - Ok(convert_to_response(r, HttpError::DaemonInfo)) - } - (Method::Put, Some(body)) => { - let conf = parse_body(body)?; - let r = kicker(ApiRequest::ConfigureDaemon(conf)); - Ok(convert_to_response(r, HttpError::Configure)) - } - _ => Err(HttpError::BadRequest), - } - } -} - -/// List blob objects managed by the blob cache manager. -pub struct BlobObjectListHandlerV2 {} -impl EndpointHandler for BlobObjectListHandlerV2 { - fn handle_request( - &self, - req: &Request, - kicker: &dyn Fn(ApiRequest) -> ApiResponse, - ) -> HttpResult { - match (req.method(), req.body.as_ref()) { - (Method::Get, None) => { - if let Some(domain_id) = extract_query_part(req, "domain_id") { - let blob_id = extract_query_part(req, "blob_id").unwrap_or_default(); - let param = BlobCacheObjectId { domain_id, blob_id }; - let r = kicker(ApiRequest::GetBlobObject(param)); - return Ok(convert_to_response(r, HttpError::GetBlobObjects)); - } - Err(HttpError::BadRequest) - } - (Method::Put, Some(body)) => { - let mut conf: Box = parse_body(body)?; - if !conf.prepare_configuration_info() { - return Err(HttpError::BadRequest); - } - let r = kicker(ApiRequest::CreateBlobObject(conf)); - Ok(convert_to_response(r, HttpError::CreateBlobObject)) - } - (Method::Delete, None) => { - if let Some(domain_id) = extract_query_part(req, "domain_id") { - let blob_id = extract_query_part(req, "blob_id").unwrap_or_default(); - let param = BlobCacheObjectId { domain_id, blob_id }; - let r = kicker(ApiRequest::DeleteBlobObject(param)); - return Ok(convert_to_response(r, HttpError::DeleteBlobObject)); - } - if let Some(blob_id) = extract_query_part(req, "blob_id") { - let r = kicker(ApiRequest::DeleteBlobFile(blob_id)); - return Ok(convert_to_response(r, HttpError::DeleteBlobFile)); - } - Err(HttpError::BadRequest) - } - _ => Err(HttpError::BadRequest), - } - } -} +// Copyright 2022 Alibaba Cloud. All rights reserved. +// Copyright 2020 Ant Group. All rights reserved. +// Copyright © 2019 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +//! Nydus API v2. + +use crate::BlobCacheEntry; +use dbs_uhttp::{Method, Request, Response}; + +use crate::http::{ + ApiError, ApiRequest, ApiResponse, ApiResponsePayload, BlobCacheObjectId, HttpError, +}; +use crate::http_handler::{ + error_response, extract_query_part, parse_body, success_response, translate_status_code, + EndpointHandler, HttpResult, +}; + +/// HTTP URI prefix for API v2. +pub const HTTP_ROOT_V2: &str = "/api/v2"; + +// Convert an ApiResponse to a HTTP response. +// +// API server has successfully processed the request, but can't fulfill that. Therefore, +// a `error_response` is generated whose status code is 4XX or 5XX. With error response, +// it still returns Ok(error_response) to http request handling framework, which means +// nydusd api server receives the request and try handle it, even the request can't be fulfilled. +fn convert_to_response HttpError>(api_resp: ApiResponse, op: O) -> Response { + match api_resp { + Ok(r) => { + use ApiResponsePayload::*; + match r { + Empty => success_response(None), + DaemonInfo(d) => success_response(Some(d)), + BlobObjectList(d) => success_response(Some(d)), + _ => panic!("Unexpected response message from API service"), + } + } + Err(e) => { + let status_code = translate_status_code(&e); + error_response(op(e), status_code) + } + } +} + +/// Get daemon information and set daemon configuration. +pub struct InfoV2Handler {} +impl EndpointHandler for InfoV2Handler { + fn handle_request( + &self, + req: &Request, + kicker: &dyn Fn(ApiRequest) -> ApiResponse, + ) -> HttpResult { + match (req.method(), req.body.as_ref()) { + (Method::Get, None) => { + let r = kicker(ApiRequest::GetDaemonInfoV2); + Ok(convert_to_response(r, HttpError::DaemonInfo)) + } + (Method::Put, Some(body)) => { + let conf = parse_body(body)?; + let r = kicker(ApiRequest::ConfigureDaemon(conf)); + Ok(convert_to_response(r, HttpError::Configure)) + } + _ => Err(HttpError::BadRequest), + } + } +} + +/// List blob objects managed by the blob cache manager. +pub struct BlobObjectListHandlerV2 {} +impl EndpointHandler for BlobObjectListHandlerV2 { + fn handle_request( + &self, + req: &Request, + kicker: &dyn Fn(ApiRequest) -> ApiResponse, + ) -> HttpResult { + match (req.method(), req.body.as_ref()) { + (Method::Get, None) => { + if let Some(domain_id) = extract_query_part(req, "domain_id") { + let blob_id = extract_query_part(req, "blob_id").unwrap_or_default(); + let param = BlobCacheObjectId { domain_id, blob_id }; + let r = kicker(ApiRequest::GetBlobObject(param)); + return Ok(convert_to_response(r, HttpError::GetBlobObjects)); + } + Err(HttpError::BadRequest) + } + (Method::Put, Some(body)) => { + let mut conf: Box = parse_body(body)?; + if !conf.prepare_configuration_info() { + return Err(HttpError::BadRequest); + } + let r = kicker(ApiRequest::CreateBlobObject(conf)); + Ok(convert_to_response(r, HttpError::CreateBlobObject)) + } + (Method::Delete, None) => { + if let Some(domain_id) = extract_query_part(req, "domain_id") { + let blob_id = extract_query_part(req, "blob_id").unwrap_or_default(); + let param = BlobCacheObjectId { domain_id, blob_id }; + let r = kicker(ApiRequest::DeleteBlobObject(param)); + return Ok(convert_to_response(r, HttpError::DeleteBlobObject)); + } + if let Some(blob_id) = extract_query_part(req, "blob_id") { + let r = kicker(ApiRequest::DeleteBlobFile(blob_id)); + return Ok(convert_to_response(r, HttpError::DeleteBlobFile)); + } + Err(HttpError::BadRequest) + } + _ => Err(HttpError::BadRequest), + } + } +} diff --git a/api/src/http_handler.rs b/api/src/http_handler.rs index 36a181a6be4..7de4541664c 100644 --- a/api/src/http_handler.rs +++ b/api/src/http_handler.rs @@ -1,410 +1,410 @@ -use std::collections::HashMap; -use std::io::{Error, ErrorKind, Result}; -use std::os::unix::io::AsRawFd; -use std::path::PathBuf; -use std::sync::mpsc::{Receiver, Sender}; -use std::sync::Arc; -use std::time::SystemTime; -use std::{fs, thread}; - -use dbs_uhttp::{Body, HttpServer, MediaType, Request, Response, ServerError, StatusCode, Version}; - -use http::uri::Uri; -use mio::unix::SourceFd; -use mio::{Events, Interest, Poll, Token, Waker}; -use serde::Deserialize; -use url::Url; - -use crate::http::{ - ApiError, ApiRequest, ApiResponse, DaemonErrorKind, ErrorMessage, HttpError, MetricsError, - MetricsErrorKind, -}; -use crate::http_endpoint_common::{ - EventsHandler, ExitHandler, MetricsBackendHandler, MetricsBlobcacheHandler, MountHandler, - SendFuseFdHandler, StartHandler, TakeoverFuseFdHandler, -}; -use crate::http_endpoint_v1::{ - FsBackendInfo, InfoHandler, MetricsFsAccessPatternHandler, MetricsFsFilesHandler, - MetricsFsGlobalHandler, MetricsFsInflightHandler, HTTP_ROOT_V1, -}; -use crate::http_endpoint_v2::{BlobObjectListHandlerV2, InfoV2Handler, HTTP_ROOT_V2}; - -const EXIT_TOKEN: Token = Token(usize::MAX); -const REQUEST_TOKEN: Token = Token(1); - -/// Specialized version of [`std::result::Result`] for value returned by [`EndpointHandler`]. -pub type HttpResult = std::result::Result; - -/// Get query parameter with `key` from the HTTP request. -pub fn extract_query_part(req: &Request, key: &str) -> Option { - // Splicing req.uri with "http:" prefix might look weird, but since it depends on - // crate `Url` to generate query_pairs HashMap, which is working on top of Url not Uri. - // Better that we can add query part support to Micro-http in the future. But - // right now, below way makes it easy to obtain query parts from uri. - let http_prefix = format!("http:{}", req.uri().get_abs_path()); - let url = Url::parse(&http_prefix) - .map_err(|e| { - error!("api: can't parse request {:?}", e); - e - }) - .ok()?; - - for (k, v) in url.query_pairs() { - if k == key { - trace!("api: got query param {}={}", k, v); - return Some(v.into_owned()); - } - } - None -} - -/// Parse HTTP request body. -pub(crate) fn parse_body<'a, F: Deserialize<'a>>(b: &'a Body) -> std::result::Result { - serde_json::from_slice::(b.raw()).map_err(HttpError::ParseBody) -} - -/// Translate ApiError message to HTTP status code. -pub(crate) fn translate_status_code(e: &ApiError) -> StatusCode { - match e { - ApiError::DaemonAbnormal(kind) | ApiError::MountFilesystem(kind) => match kind { - DaemonErrorKind::NotReady => StatusCode::ServiceUnavailable, - DaemonErrorKind::Unsupported => StatusCode::NotImplemented, - DaemonErrorKind::UnexpectedEvent(_) => StatusCode::BadRequest, - _ => StatusCode::InternalServerError, - }, - ApiError::Metrics(MetricsErrorKind::Stats(MetricsError::NoCounter)) => StatusCode::NotFound, - _ => StatusCode::InternalServerError, - } -} - -/// Generate a successful HTTP response message. -pub(crate) fn success_response(body: Option) -> Response { - if let Some(body) = body { - let mut r = Response::new(Version::Http11, StatusCode::OK); - r.set_body(Body::new(body)); - r - } else { - Response::new(Version::Http11, StatusCode::NoContent) - } -} - -/// Generate a HTTP error response message with status code and error message. -pub(crate) fn error_response(error: HttpError, status: StatusCode) -> Response { - let mut response = Response::new(Version::Http11, status); - let err_msg = ErrorMessage { - code: "UNDEFINED".to_string(), - message: format!("{:?}", error), - }; - response.set_body(Body::new(err_msg)); - response -} - -/// Trait for HTTP endpoints to handle HTTP requests. -pub trait EndpointHandler: Sync + Send { - /// Handles an HTTP request. - /// - /// The main responsibilities of the handlers includes: - /// - parse and validate incoming request message - /// - send the request to subscriber - /// - wait response from the subscriber - /// - generate HTTP result - fn handle_request( - &self, - req: &Request, - kicker: &dyn Fn(ApiRequest) -> ApiResponse, - ) -> HttpResult; -} - -/// Struct to route HTTP requests to corresponding registered endpoint handlers. -pub struct HttpRoutes { - /// routes is a hash table mapping endpoint URIs to their endpoint handlers. - pub routes: HashMap>, -} - -macro_rules! endpoint_v1 { - ($path:expr) => { - format!("{}{}", HTTP_ROOT_V1, $path) - }; -} - -macro_rules! endpoint_v2 { - ($path:expr) => { - format!("{}{}", HTTP_ROOT_V2, $path) - }; -} - -lazy_static! { - /// HTTP_ROUTES contain all the nydusd HTTP routes. - pub static ref HTTP_ROUTES: HttpRoutes = { - let mut r = HttpRoutes { - routes: HashMap::new(), - }; - - // Common - r.routes.insert(endpoint_v1!("/daemon/events"), Box::new(EventsHandler{})); - r.routes.insert(endpoint_v1!("/daemon/exit"), Box::new(ExitHandler{})); - r.routes.insert(endpoint_v1!("/daemon/start"), Box::new(StartHandler{})); - r.routes.insert(endpoint_v1!("/daemon/fuse/sendfd"), Box::new(SendFuseFdHandler{})); - r.routes.insert(endpoint_v1!("/daemon/fuse/takeover"), Box::new(TakeoverFuseFdHandler{})); - r.routes.insert(endpoint_v1!("/mount"), Box::new(MountHandler{})); - r.routes.insert(endpoint_v1!("/metrics/backend"), Box::new(MetricsBackendHandler{})); - r.routes.insert(endpoint_v1!("/metrics/blobcache"), Box::new(MetricsBlobcacheHandler{})); - - // Nydus API, v1 - r.routes.insert(endpoint_v1!("/daemon"), Box::new(InfoHandler{})); - r.routes.insert(endpoint_v1!("/daemon/backend"), Box::new(FsBackendInfo{})); - r.routes.insert(endpoint_v1!("/metrics"), Box::new(MetricsFsGlobalHandler{})); - r.routes.insert(endpoint_v1!("/metrics/files"), Box::new(MetricsFsFilesHandler{})); - r.routes.insert(endpoint_v1!("/metrics/inflight"), Box::new(MetricsFsInflightHandler{})); - r.routes.insert(endpoint_v1!("/metrics/pattern"), Box::new(MetricsFsAccessPatternHandler{})); - - // Nydus API, v2 - r.routes.insert(endpoint_v2!("/daemon"), Box::new(InfoV2Handler{})); - r.routes.insert(endpoint_v2!("/blobs"), Box::new(BlobObjectListHandlerV2{})); - - r - }; -} - -fn kick_api_server( - to_api: &Sender>, - from_api: &Receiver, - request: ApiRequest, -) -> ApiResponse { - to_api.send(Some(request)).map_err(ApiError::RequestSend)?; - from_api.recv().map_err(ApiError::ResponseRecv)? -} - -// Example: -// <-- GET / -// --> GET / 200 835ms 746b - -fn trace_api_begin(request: &dbs_uhttp::Request) { - debug!("<--- {:?} {:?}", request.method(), request.uri()); -} - -fn trace_api_end(response: &dbs_uhttp::Response, method: dbs_uhttp::Method, recv_time: SystemTime) { - let elapse = SystemTime::now().duration_since(recv_time); - debug!( - "---> {:?} Status Code: {:?}, Elapse: {:?}, Body Size: {:?}", - method, - response.status(), - elapse, - response.content_length() - ); -} - -fn exit_api_server(to_api: &Sender>) { - if to_api.send(None).is_err() { - error!("failed to send stop request api server"); - } -} - -fn handle_http_request( - request: &Request, - to_api: &Sender>, - from_api: &Receiver, -) -> Response { - let begin_time = SystemTime::now(); - trace_api_begin(request); - - // Micro http should ensure that req path is legal. - let uri_parsed = request.uri().get_abs_path().parse::(); - let mut response = match uri_parsed { - Ok(uri) => match HTTP_ROUTES.routes.get(uri.path()) { - Some(route) => route - .handle_request(request, &|r| kick_api_server(to_api, from_api, r)) - .unwrap_or_else(|err| error_response(err, StatusCode::BadRequest)), - None => error_response(HttpError::NoRoute, StatusCode::NotFound), - }, - Err(e) => { - error!("Failed parse URI, {}", e); - error_response(HttpError::BadRequest, StatusCode::BadRequest) - } - }; - response.set_server("Nydus API"); - response.set_content_type(MediaType::ApplicationJson); - - trace_api_end(&response, request.method(), begin_time); - - response -} - -/// Start a HTTP server to serve API requests. -/// -/// Start a HTTP server parsing http requests and send to nydus API server a concrete -/// request to operate nydus or fetch working status. -/// The HTTP server sends request by `to_api` channel and wait for response from `from_api` channel. -pub fn start_http_thread( - path: &str, - to_api: Sender>, - from_api: Receiver, -) -> Result<(thread::JoinHandle>, Arc)> { - // Try to remove existed unix domain socket - let _ = fs::remove_file(path); - let socket_path = PathBuf::from(path); - - let mut poll = Poll::new()?; - let waker = Arc::new(Waker::new(poll.registry(), EXIT_TOKEN)?); - let waker2 = waker.clone(); - let mut server = HttpServer::new(socket_path).map_err(|e| { - if let ServerError::IOError(e) = e { - e - } else { - Error::new(ErrorKind::Other, format!("{:?}", e)) - } - })?; - poll.registry().register( - &mut SourceFd(&server.epoll().as_raw_fd()), - REQUEST_TOKEN, - Interest::READABLE, - )?; - - let thread = thread::Builder::new() - .name("nydus-http-server".to_string()) - .spawn(move || { - // Must start the server successfully or just die by panic - server.start_server().unwrap(); - info!("http server started"); - - let mut events = Events::with_capacity(100); - let mut do_exit = false; - loop { - match poll.poll(&mut events, None) { - Err(e) if e.kind() == std::io::ErrorKind::Interrupted => continue, - Err(e) => { - error!("http server poll events failed, {}", e); - exit_api_server(&to_api); - return Err(e); - } - Ok(_) => {} - } - - for event in &events { - match event.token() { - EXIT_TOKEN => do_exit = true, - REQUEST_TOKEN => match server.requests() { - Ok(request_vec) => { - for server_request in request_vec { - let reply = server_request.process(|request| { - handle_http_request(request, &to_api, &from_api) - }); - // Ignore error when sending response - server.respond(reply).unwrap_or_else(|e| { - error!("HTTP server error on response: {}", e) - }); - } - } - Err(e) => { - error!("HTTP server error on retrieving incoming request: {}", e); - } - }, - _ => unreachable!("unknown poll token."), - } - } - - if do_exit { - exit_api_server(&to_api); - break; - } - } - - info!("http-server thread exits"); - // Keep the Waker alive to match the lifetime of the poll loop above - drop(waker2); - Ok(()) - })?; - - Ok((thread, waker)) -} - -#[cfg(test)] -mod tests { - use super::*; - use std::sync::mpsc::channel; - use vmm_sys_util::tempfile::TempFile; - - #[test] - fn test_http_api_routes_v1() { - assert!(HTTP_ROUTES.routes.get("/api/v1/daemon").is_some()); - assert!(HTTP_ROUTES.routes.get("/api/v1/daemon/events").is_some()); - assert!(HTTP_ROUTES.routes.get("/api/v1/daemon/backend").is_some()); - assert!(HTTP_ROUTES.routes.get("/api/v1/daemon/start").is_some()); - assert!(HTTP_ROUTES.routes.get("/api/v1/daemon/exit").is_some()); - assert!(HTTP_ROUTES - .routes - .get("/api/v1/daemon/fuse/sendfd") - .is_some()); - assert!(HTTP_ROUTES - .routes - .get("/api/v1/daemon/fuse/takeover") - .is_some()); - assert!(HTTP_ROUTES.routes.get("/api/v1/mount").is_some()); - assert!(HTTP_ROUTES.routes.get("/api/v1/metrics").is_some()); - assert!(HTTP_ROUTES.routes.get("/api/v1/metrics/files").is_some()); - assert!(HTTP_ROUTES.routes.get("/api/v1/metrics/pattern").is_some()); - assert!(HTTP_ROUTES.routes.get("/api/v1/metrics/backend").is_some()); - assert!(HTTP_ROUTES - .routes - .get("/api/v1/metrics/blobcache") - .is_some()); - assert!(HTTP_ROUTES.routes.get("/api/v1/metrics/inflight").is_some()); - } - - #[test] - fn test_http_api_routes_v2() { - assert!(HTTP_ROUTES.routes.get("/api/v2/daemon").is_some()); - assert!(HTTP_ROUTES.routes.get("/api/v2/blobs").is_some()); - } - - #[test] - fn test_kick_api_server() { - let (to_api, from_route) = channel(); - let (to_route, from_api) = channel(); - let request = ApiRequest::GetDaemonInfo; - let thread = thread::spawn(move || match kick_api_server(&to_api, &from_api, request) { - Err(reply) => matches!(reply, ApiError::ResponsePayloadType), - Ok(_) => panic!("unexpected reply message"), - }); - let req2 = from_route.recv().unwrap(); - matches!(req2.as_ref().unwrap(), ApiRequest::GetDaemonInfo); - let reply: ApiResponse = Err(ApiError::ResponsePayloadType); - to_route.send(reply).unwrap(); - thread.join().unwrap(); - - let (to_api, from_route) = channel(); - let (to_route, from_api) = channel(); - drop(to_route); - let request = ApiRequest::GetDaemonInfo; - assert!(kick_api_server(&to_api, &from_api, request).is_err()); - drop(from_route); - let request = ApiRequest::GetDaemonInfo; - assert!(kick_api_server(&to_api, &from_api, request).is_err()); - } - - #[test] - fn test_extract_query_part() { - let req = Request::try_from( - b"GET http://localhost/api/v1/daemon?arg1=test HTTP/1.0\r\n\r\n", - None, - ) - .unwrap(); - let arg1 = extract_query_part(&req, "arg1").unwrap(); - assert_eq!(arg1, "test"); - assert!(extract_query_part(&req, "arg2").is_none()); - } - - #[test] - fn test_start_http_thread() { - let tmpdir = TempFile::new().unwrap(); - let path = tmpdir.as_path().to_str().unwrap(); - let (to_api, from_route) = channel(); - let (_to_route, from_api) = channel(); - let (thread, waker) = start_http_thread(path, to_api, from_api).unwrap(); - waker.wake().unwrap(); - - let msg = from_route.recv().unwrap(); - assert!(msg.is_none()); - let _ = thread.join().unwrap(); - } -} +use std::collections::HashMap; +use std::io::{Error, ErrorKind, Result}; +use std::os::unix::io::AsRawFd; +use std::path::PathBuf; +use std::sync::mpsc::{Receiver, Sender}; +use std::sync::Arc; +use std::time::SystemTime; +use std::{fs, thread}; + +use dbs_uhttp::{Body, HttpServer, MediaType, Request, Response, ServerError, StatusCode, Version}; + +use http::uri::Uri; +use mio::unix::SourceFd; +use mio::{Events, Interest, Poll, Token, Waker}; +use serde::Deserialize; +use url::Url; + +use crate::http::{ + ApiError, ApiRequest, ApiResponse, DaemonErrorKind, ErrorMessage, HttpError, MetricsError, + MetricsErrorKind, +}; +use crate::http_endpoint_common::{ + EventsHandler, ExitHandler, MetricsBackendHandler, MetricsBlobcacheHandler, MountHandler, + SendFuseFdHandler, StartHandler, TakeoverFuseFdHandler, +}; +use crate::http_endpoint_v1::{ + FsBackendInfo, InfoHandler, MetricsFsAccessPatternHandler, MetricsFsFilesHandler, + MetricsFsGlobalHandler, MetricsFsInflightHandler, HTTP_ROOT_V1, +}; +use crate::http_endpoint_v2::{BlobObjectListHandlerV2, InfoV2Handler, HTTP_ROOT_V2}; + +const EXIT_TOKEN: Token = Token(usize::MAX); +const REQUEST_TOKEN: Token = Token(1); + +/// Specialized version of [`std::result::Result`] for value returned by [`EndpointHandler`]. +pub type HttpResult = std::result::Result; + +/// Get query parameter with `key` from the HTTP request. +pub fn extract_query_part(req: &Request, key: &str) -> Option { + // Splicing req.uri with "http:" prefix might look weird, but since it depends on + // crate `Url` to generate query_pairs HashMap, which is working on top of Url not Uri. + // Better that we can add query part support to Micro-http in the future. But + // right now, below way makes it easy to obtain query parts from uri. + let http_prefix = format!("http:{}", req.uri().get_abs_path()); + let url = Url::parse(&http_prefix) + .map_err(|e| { + error!("api: can't parse request {:?}", e); + e + }) + .ok()?; + + for (k, v) in url.query_pairs() { + if k == key { + trace!("api: got query param {}={}", k, v); + return Some(v.into_owned()); + } + } + None +} + +/// Parse HTTP request body. +pub(crate) fn parse_body<'a, F: Deserialize<'a>>(b: &'a Body) -> std::result::Result { + serde_json::from_slice::(b.raw()).map_err(HttpError::ParseBody) +} + +/// Translate ApiError message to HTTP status code. +pub(crate) fn translate_status_code(e: &ApiError) -> StatusCode { + match e { + ApiError::DaemonAbnormal(kind) | ApiError::MountFilesystem(kind) => match kind { + DaemonErrorKind::NotReady => StatusCode::ServiceUnavailable, + DaemonErrorKind::Unsupported => StatusCode::NotImplemented, + DaemonErrorKind::UnexpectedEvent(_) => StatusCode::BadRequest, + _ => StatusCode::InternalServerError, + }, + ApiError::Metrics(MetricsErrorKind::Stats(MetricsError::NoCounter)) => StatusCode::NotFound, + _ => StatusCode::InternalServerError, + } +} + +/// Generate a successful HTTP response message. +pub(crate) fn success_response(body: Option) -> Response { + if let Some(body) = body { + let mut r = Response::new(Version::Http11, StatusCode::OK); + r.set_body(Body::new(body)); + r + } else { + Response::new(Version::Http11, StatusCode::NoContent) + } +} + +/// Generate a HTTP error response message with status code and error message. +pub(crate) fn error_response(error: HttpError, status: StatusCode) -> Response { + let mut response = Response::new(Version::Http11, status); + let err_msg = ErrorMessage { + code: "UNDEFINED".to_string(), + message: format!("{:?}", error), + }; + response.set_body(Body::new(err_msg)); + response +} + +/// Trait for HTTP endpoints to handle HTTP requests. +pub trait EndpointHandler: Sync + Send { + /// Handles an HTTP request. + /// + /// The main responsibilities of the handlers includes: + /// - parse and validate incoming request message + /// - send the request to subscriber + /// - wait response from the subscriber + /// - generate HTTP result + fn handle_request( + &self, + req: &Request, + kicker: &dyn Fn(ApiRequest) -> ApiResponse, + ) -> HttpResult; +} + +/// Struct to route HTTP requests to corresponding registered endpoint handlers. +pub struct HttpRoutes { + /// routes is a hash table mapping endpoint URIs to their endpoint handlers. + pub routes: HashMap>, +} + +macro_rules! endpoint_v1 { + ($path:expr) => { + format!("{}{}", HTTP_ROOT_V1, $path) + }; +} + +macro_rules! endpoint_v2 { + ($path:expr) => { + format!("{}{}", HTTP_ROOT_V2, $path) + }; +} + +lazy_static! { + /// HTTP_ROUTES contain all the nydusd HTTP routes. + pub static ref HTTP_ROUTES: HttpRoutes = { + let mut r = HttpRoutes { + routes: HashMap::new(), + }; + + // Common + r.routes.insert(endpoint_v1!("/daemon/events"), Box::new(EventsHandler{})); + r.routes.insert(endpoint_v1!("/daemon/exit"), Box::new(ExitHandler{})); + r.routes.insert(endpoint_v1!("/daemon/start"), Box::new(StartHandler{})); + r.routes.insert(endpoint_v1!("/daemon/fuse/sendfd"), Box::new(SendFuseFdHandler{})); + r.routes.insert(endpoint_v1!("/daemon/fuse/takeover"), Box::new(TakeoverFuseFdHandler{})); + r.routes.insert(endpoint_v1!("/mount"), Box::new(MountHandler{})); + r.routes.insert(endpoint_v1!("/metrics/backend"), Box::new(MetricsBackendHandler{})); + r.routes.insert(endpoint_v1!("/metrics/blobcache"), Box::new(MetricsBlobcacheHandler{})); + + // Nydus API, v1 + r.routes.insert(endpoint_v1!("/daemon"), Box::new(InfoHandler{})); + r.routes.insert(endpoint_v1!("/daemon/backend"), Box::new(FsBackendInfo{})); + r.routes.insert(endpoint_v1!("/metrics"), Box::new(MetricsFsGlobalHandler{})); + r.routes.insert(endpoint_v1!("/metrics/files"), Box::new(MetricsFsFilesHandler{})); + r.routes.insert(endpoint_v1!("/metrics/inflight"), Box::new(MetricsFsInflightHandler{})); + r.routes.insert(endpoint_v1!("/metrics/pattern"), Box::new(MetricsFsAccessPatternHandler{})); + + // Nydus API, v2 + r.routes.insert(endpoint_v2!("/daemon"), Box::new(InfoV2Handler{})); + r.routes.insert(endpoint_v2!("/blobs"), Box::new(BlobObjectListHandlerV2{})); + + r + }; +} + +fn kick_api_server( + to_api: &Sender>, + from_api: &Receiver, + request: ApiRequest, +) -> ApiResponse { + to_api.send(Some(request)).map_err(ApiError::RequestSend)?; + from_api.recv().map_err(ApiError::ResponseRecv)? +} + +// Example: +// <-- GET / +// --> GET / 200 835ms 746b + +fn trace_api_begin(request: &dbs_uhttp::Request) { + debug!("<--- {:?} {:?}", request.method(), request.uri()); +} + +fn trace_api_end(response: &dbs_uhttp::Response, method: dbs_uhttp::Method, recv_time: SystemTime) { + let elapse = SystemTime::now().duration_since(recv_time); + debug!( + "---> {:?} Status Code: {:?}, Elapse: {:?}, Body Size: {:?}", + method, + response.status(), + elapse, + response.content_length() + ); +} + +fn exit_api_server(to_api: &Sender>) { + if to_api.send(None).is_err() { + error!("failed to send stop request api server"); + } +} + +fn handle_http_request( + request: &Request, + to_api: &Sender>, + from_api: &Receiver, +) -> Response { + let begin_time = SystemTime::now(); + trace_api_begin(request); + + // Micro http should ensure that req path is legal. + let uri_parsed = request.uri().get_abs_path().parse::(); + let mut response = match uri_parsed { + Ok(uri) => match HTTP_ROUTES.routes.get(uri.path()) { + Some(route) => route + .handle_request(request, &|r| kick_api_server(to_api, from_api, r)) + .unwrap_or_else(|err| error_response(err, StatusCode::BadRequest)), + None => error_response(HttpError::NoRoute, StatusCode::NotFound), + }, + Err(e) => { + error!("Failed parse URI, {}", e); + error_response(HttpError::BadRequest, StatusCode::BadRequest) + } + }; + response.set_server("Nydus API"); + response.set_content_type(MediaType::ApplicationJson); + + trace_api_end(&response, request.method(), begin_time); + + response +} + +/// Start a HTTP server to serve API requests. +/// +/// Start a HTTP server parsing http requests and send to nydus API server a concrete +/// request to operate nydus or fetch working status. +/// The HTTP server sends request by `to_api` channel and wait for response from `from_api` channel. +pub fn start_http_thread( + path: &str, + to_api: Sender>, + from_api: Receiver, +) -> Result<(thread::JoinHandle>, Arc)> { + // Try to remove existed unix domain socket + let _ = fs::remove_file(path); + let socket_path = PathBuf::from(path); + + let mut poll = Poll::new()?; + let waker = Arc::new(Waker::new(poll.registry(), EXIT_TOKEN)?); + let waker2 = waker.clone(); + let mut server = HttpServer::new(socket_path).map_err(|e| { + if let ServerError::IOError(e) = e { + e + } else { + Error::new(ErrorKind::Other, format!("{:?}", e)) + } + })?; + poll.registry().register( + &mut SourceFd(&server.epoll().as_raw_fd()), + REQUEST_TOKEN, + Interest::READABLE, + )?; + + let thread = thread::Builder::new() + .name("nydus-http-server".to_string()) + .spawn(move || { + // Must start the server successfully or just die by panic + server.start_server().unwrap(); + info!("http server started"); + + let mut events = Events::with_capacity(100); + let mut do_exit = false; + loop { + match poll.poll(&mut events, None) { + Err(e) if e.kind() == std::io::ErrorKind::Interrupted => continue, + Err(e) => { + error!("http server poll events failed, {}", e); + exit_api_server(&to_api); + return Err(e); + } + Ok(_) => {} + } + + for event in &events { + match event.token() { + EXIT_TOKEN => do_exit = true, + REQUEST_TOKEN => match server.requests() { + Ok(request_vec) => { + for server_request in request_vec { + let reply = server_request.process(|request| { + handle_http_request(request, &to_api, &from_api) + }); + // Ignore error when sending response + server.respond(reply).unwrap_or_else(|e| { + error!("HTTP server error on response: {}", e) + }); + } + } + Err(e) => { + error!("HTTP server error on retrieving incoming request: {}", e); + } + }, + _ => unreachable!("unknown poll token."), + } + } + + if do_exit { + exit_api_server(&to_api); + break; + } + } + + info!("http-server thread exits"); + // Keep the Waker alive to match the lifetime of the poll loop above + drop(waker2); + Ok(()) + })?; + + Ok((thread, waker)) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::mpsc::channel; + use vmm_sys_util::tempfile::TempFile; + + #[test] + fn test_http_api_routes_v1() { + assert!(HTTP_ROUTES.routes.get("/api/v1/daemon").is_some()); + assert!(HTTP_ROUTES.routes.get("/api/v1/daemon/events").is_some()); + assert!(HTTP_ROUTES.routes.get("/api/v1/daemon/backend").is_some()); + assert!(HTTP_ROUTES.routes.get("/api/v1/daemon/start").is_some()); + assert!(HTTP_ROUTES.routes.get("/api/v1/daemon/exit").is_some()); + assert!(HTTP_ROUTES + .routes + .get("/api/v1/daemon/fuse/sendfd") + .is_some()); + assert!(HTTP_ROUTES + .routes + .get("/api/v1/daemon/fuse/takeover") + .is_some()); + assert!(HTTP_ROUTES.routes.get("/api/v1/mount").is_some()); + assert!(HTTP_ROUTES.routes.get("/api/v1/metrics").is_some()); + assert!(HTTP_ROUTES.routes.get("/api/v1/metrics/files").is_some()); + assert!(HTTP_ROUTES.routes.get("/api/v1/metrics/pattern").is_some()); + assert!(HTTP_ROUTES.routes.get("/api/v1/metrics/backend").is_some()); + assert!(HTTP_ROUTES + .routes + .get("/api/v1/metrics/blobcache") + .is_some()); + assert!(HTTP_ROUTES.routes.get("/api/v1/metrics/inflight").is_some()); + } + + #[test] + fn test_http_api_routes_v2() { + assert!(HTTP_ROUTES.routes.get("/api/v2/daemon").is_some()); + assert!(HTTP_ROUTES.routes.get("/api/v2/blobs").is_some()); + } + + #[test] + fn test_kick_api_server() { + let (to_api, from_route) = channel(); + let (to_route, from_api) = channel(); + let request = ApiRequest::GetDaemonInfo; + let thread = thread::spawn(move || match kick_api_server(&to_api, &from_api, request) { + Err(reply) => matches!(reply, ApiError::ResponsePayloadType), + Ok(_) => panic!("unexpected reply message"), + }); + let req2 = from_route.recv().unwrap(); + matches!(req2.as_ref().unwrap(), ApiRequest::GetDaemonInfo); + let reply: ApiResponse = Err(ApiError::ResponsePayloadType); + to_route.send(reply).unwrap(); + thread.join().unwrap(); + + let (to_api, from_route) = channel(); + let (to_route, from_api) = channel(); + drop(to_route); + let request = ApiRequest::GetDaemonInfo; + assert!(kick_api_server(&to_api, &from_api, request).is_err()); + drop(from_route); + let request = ApiRequest::GetDaemonInfo; + assert!(kick_api_server(&to_api, &from_api, request).is_err()); + } + + #[test] + fn test_extract_query_part() { + let req = Request::try_from( + b"GET http://localhost/api/v1/daemon?arg1=test HTTP/1.0\r\n\r\n", + None, + ) + .unwrap(); + let arg1 = extract_query_part(&req, "arg1").unwrap(); + assert_eq!(arg1, "test"); + assert!(extract_query_part(&req, "arg2").is_none()); + } + + #[test] + fn test_start_http_thread() { + let tmpdir = TempFile::new().unwrap(); + let path = tmpdir.as_path().to_str().unwrap(); + let (to_api, from_route) = channel(); + let (_to_route, from_api) = channel(); + let (thread, waker) = start_http_thread(path, to_api, from_api).unwrap(); + waker.wake().unwrap(); + + let msg = from_route.recv().unwrap(); + assert!(msg.is_none()); + let _ = thread.join().unwrap(); + } +} diff --git a/api/src/lib.rs b/api/src/lib.rs index 531d62ded93..1b2891d802f 100644 --- a/api/src/lib.rs +++ b/api/src/lib.rs @@ -1,47 +1,47 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! APIs for the Nydus Image Service -//! -//! The `nydus-api` crate defines API and related data structures for Nydus Image Service. -//! All data structures used by the API are encoded in JSON format. - -#[cfg_attr(feature = "handler", macro_use)] -extern crate log; -#[macro_use] -extern crate serde; -#[cfg(feature = "handler")] -#[macro_use] -extern crate lazy_static; - -pub mod config; -pub use config::*; -#[macro_use] -pub mod error; -pub mod http; -pub use self::http::*; - -#[cfg(feature = "handler")] -pub(crate) mod http_endpoint_common; -#[cfg(feature = "handler")] -pub(crate) mod http_endpoint_v1; -#[cfg(feature = "handler")] -pub(crate) mod http_endpoint_v2; -#[cfg(feature = "handler")] -pub(crate) mod http_handler; - -#[cfg(feature = "handler")] -pub use http_handler::{ - extract_query_part, start_http_thread, EndpointHandler, HttpResult, HttpRoutes, HTTP_ROUTES, -}; - -/// Application build and version information. -#[derive(Serialize, Clone)] -pub struct BuildTimeInfo { - pub package_ver: String, - pub git_commit: String, - pub build_time: String, - pub profile: String, - pub rustc: String, -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! APIs for the Nydus Image Service +//! +//! The `nydus-api` crate defines API and related data structures for Nydus Image Service. +//! All data structures used by the API are encoded in JSON format. + +#[cfg_attr(feature = "handler", macro_use)] +extern crate log; +#[macro_use] +extern crate serde; +#[cfg(feature = "handler")] +#[macro_use] +extern crate lazy_static; + +pub mod config; +pub use config::*; +#[macro_use] +pub mod error; +pub mod http; +pub use self::http::*; + +#[cfg(feature = "handler")] +pub(crate) mod http_endpoint_common; +#[cfg(feature = "handler")] +pub(crate) mod http_endpoint_v1; +#[cfg(feature = "handler")] +pub(crate) mod http_endpoint_v2; +#[cfg(feature = "handler")] +pub(crate) mod http_handler; + +#[cfg(feature = "handler")] +pub use http_handler::{ + extract_query_part, start_http_thread, EndpointHandler, HttpResult, HttpRoutes, HTTP_ROUTES, +}; + +/// Application build and version information. +#[derive(Serialize, Clone)] +pub struct BuildTimeInfo { + pub package_ver: String, + pub git_commit: String, + pub build_time: String, + pub profile: String, + pub rustc: String, +} diff --git a/build.rs b/build.rs index 0e4c0451a4f..db5123ed383 100644 --- a/build.rs +++ b/build.rs @@ -1,64 +1,64 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::ffi::OsString; -use std::process::Command; -use std::str::FromStr; -use std::{ffi, io}; - -fn get_version_from_cmd(executable: &ffi::OsStr) -> io::Result { - let output = Command::new(executable).arg("-V").output()?; - let mut v = String::from_utf8(output.stdout).unwrap(); - v.pop(); // remove newline - Ok(v) -} - -fn get_git_commit_hash() -> String { - let commit = Command::new("git") - .arg("rev-parse") - .arg("--verify") - .arg("HEAD") - .output(); - if let Ok(commit_output) = commit { - if let Some(commit) = String::from_utf8_lossy(&commit_output.stdout) - .lines() - .next() - { - return commit.to_string(); - } - } - "unknown".to_string() -} - -fn get_git_commit_version() -> String { - let tag = Command::new("git").args(["describe", "--tags"]).output(); - if let Ok(tag) = tag { - if let Some(tag) = String::from_utf8_lossy(&tag.stdout).lines().next() { - return tag.to_string(); - } - } - "unknown".to_string() -} - -fn main() { - let rustc_ver = if let Ok(p) = std::env::var("RUSTC") { - let rustc = OsString::from_str(&p).unwrap(); - get_version_from_cmd(&rustc).unwrap() - } else { - "".to_string() - }; - let profile = std::env::var("PROFILE").unwrap_or_else(|_| "".to_string()); - let build_time = time::OffsetDateTime::now_utc() - .format(&time::format_description::well_known::Iso8601::DEFAULT) - .unwrap(); - let git_commit_hash = get_git_commit_hash(); - let git_commit_version = get_git_commit_version(); - - println!("cargo:rerun-if-changed=../git/HEAD"); - println!("cargo:rustc-env=RUSTC_VERSION={}", rustc_ver); - println!("cargo:rustc-env=PROFILE={}", profile); - println!("cargo:rustc-env=BUILT_TIME_UTC={}", build_time); - println!("cargo:rustc-env=GIT_COMMIT_HASH={}", git_commit_hash); - println!("cargo:rustc-env=GIT_COMMIT_VERSION={}", git_commit_version); -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::ffi::OsString; +use std::process::Command; +use std::str::FromStr; +use std::{ffi, io}; + +fn get_version_from_cmd(executable: &ffi::OsStr) -> io::Result { + let output = Command::new(executable).arg("-V").output()?; + let mut v = String::from_utf8(output.stdout).unwrap(); + v.pop(); // remove newline + Ok(v) +} + +fn get_git_commit_hash() -> String { + let commit = Command::new("git") + .arg("rev-parse") + .arg("--verify") + .arg("HEAD") + .output(); + if let Ok(commit_output) = commit { + if let Some(commit) = String::from_utf8_lossy(&commit_output.stdout) + .lines() + .next() + { + return commit.to_string(); + } + } + "unknown".to_string() +} + +fn get_git_commit_version() -> String { + let tag = Command::new("git").args(["describe", "--tags"]).output(); + if let Ok(tag) = tag { + if let Some(tag) = String::from_utf8_lossy(&tag.stdout).lines().next() { + return tag.to_string(); + } + } + "unknown".to_string() +} + +fn main() { + let rustc_ver = if let Ok(p) = std::env::var("RUSTC") { + let rustc = OsString::from_str(&p).unwrap(); + get_version_from_cmd(&rustc).unwrap() + } else { + "".to_string() + }; + let profile = std::env::var("PROFILE").unwrap_or_else(|_| "".to_string()); + let build_time = time::OffsetDateTime::now_utc() + .format(&time::format_description::well_known::Iso8601::DEFAULT) + .unwrap(); + let git_commit_hash = get_git_commit_hash(); + let git_commit_version = get_git_commit_version(); + + println!("cargo:rerun-if-changed=../git/HEAD"); + println!("cargo:rustc-env=RUSTC_VERSION={}", rustc_ver); + println!("cargo:rustc-env=PROFILE={}", profile); + println!("cargo:rustc-env=BUILT_TIME_UTC={}", build_time); + println!("cargo:rustc-env=GIT_COMMIT_HASH={}", git_commit_hash); + println!("cargo:rustc-env=GIT_COMMIT_VERSION={}", git_commit_version); +} diff --git a/builder/Cargo.toml b/builder/Cargo.toml index fa76a36f947..e99932f5f34 100644 --- a/builder/Cargo.toml +++ b/builder/Cargo.toml @@ -1,33 +1,33 @@ -[package] -name = "nydus-builder" -version = "0.1.0" -description = "Nydus Image Builder" -authors = ["The Nydus Developers"] -license = "Apache-2.0" -homepage = "https://nydus.dev/" -repository = "https://github.com/dragonflyoss/nydus" -edition = "2021" - -[dependencies] -anyhow = "1.0.35" -base64 = "0.21" -hex = "0.4.3" -indexmap = "2" -libc = "0.2" -log = "0.4" -nix = "0.24" -serde = { version = "1.0.110", features = ["serde_derive", "rc"] } -serde_json = "1.0.53" -sha2 = "0.10.2" -tar = "0.4.40" -vmm-sys-util = "0.11.0" -xattr = "1.0.1" - -nydus-api = { version = "0.3", path = "../api" } -nydus-rafs = { version = "0.3", path = "../rafs" } -nydus-storage = { version = "0.6", path = "../storage", features = ["backend-localfs"] } -nydus-utils = { version = "0.4", path = "../utils" } - -[package.metadata.docs.rs] -all-features = true -targets = ["x86_64-unknown-linux-gnu", "aarch64-unknown-linux-gnu", "aarch64-apple-darwin"] +[package] +name = "nydus-builder" +version = "0.1.0" +description = "Nydus Image Builder" +authors = ["The Nydus Developers"] +license = "Apache-2.0" +homepage = "https://nydus.dev/" +repository = "https://github.com/dragonflyoss/nydus" +edition = "2021" + +[dependencies] +anyhow = "1.0.35" +base64 = "0.21" +hex = "0.4.3" +indexmap = "2" +libc = "0.2" +log = "0.4" +nix = "0.24" +serde = { version = "1.0.110", features = ["serde_derive", "rc"] } +serde_json = "1.0.53" +sha2 = "0.10.2" +tar = "0.4.40" +vmm-sys-util = "0.11.0" +xattr = "1.0.1" + +nydus-api = { version = "0.3", path = "../api" } +nydus-rafs = { version = "0.3", path = "../rafs" } +nydus-storage = { version = "0.6", path = "../storage", features = ["backend-localfs"] } +nydus-utils = { version = "0.4", path = "../utils" } + +[package.metadata.docs.rs] +all-features = true +targets = ["x86_64-unknown-linux-gnu", "aarch64-unknown-linux-gnu", "aarch64-apple-darwin"] diff --git a/builder/src/chunkdict_generator.rs b/builder/src/chunkdict_generator.rs index 4f7ab105d2b..75fbaa6c6ca 100644 --- a/builder/src/chunkdict_generator.rs +++ b/builder/src/chunkdict_generator.rs @@ -1,280 +1,280 @@ -// Copyright (C) 2023 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Generate Chunkdict RAFS bootstrap. -//! ------------------------------------------------------------------------------------------------- -//! Bug 1: Inconsistent Chunk Size Leading to Blob Size Less Than 4K(v6_block_size) -//! Description: The size of chunks is not consistent, which results in the possibility that a blob, -//! composed of a group of these chunks, may be less than 4K(v6_block_size) in size. -//! This inconsistency leads to a failure in passing the size check. -//! ------------------------------------------------------------------------------------------------- -//! Bug 2: Incorrect Chunk Number Calculation Due to Premature Check Logic -//! Description: The current logic for calculating the chunk number is based on the formula size/chunk size. -//! However, this approach is flawed as it precedes the actual check which accounts for chunk statistics. -//! Consequently, this leads to inaccurate counting of chunk numbers. - -use super::core::node::{ChunkSource, NodeInfo}; -use super::{BlobManager, Bootstrap, BootstrapManager, BuildContext, BuildOutput, Tree}; -use crate::core::node::Node; -use crate::NodeChunk; -use anyhow::Result; -use nydus_rafs::metadata::chunk::ChunkWrapper; -use nydus_rafs::metadata::inode::InodeWrapper; -use nydus_rafs::metadata::layout::RafsXAttrs; -use nydus_storage::meta::BlobChunkInfoV1Ondisk; -use nydus_utils::compress::Algorithm; -use nydus_utils::digest::RafsDigest; -use std::ffi::OsString; -use std::mem::size_of; -use std::path::PathBuf; -use std::str::FromStr; -use std::sync::Arc; - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct ChunkdictChunkInfo { - pub image_reference: String, - pub version: String, - pub chunk_blob_id: String, - pub chunk_digest: String, - pub chunk_compressed_size: u32, - pub chunk_uncompressed_size: u32, - pub chunk_compressed_offset: u64, - pub chunk_uncompressed_offset: u64, -} - -pub struct ChunkdictBlobInfo { - pub blob_id: String, - pub blob_compressed_size: u64, - pub blob_uncompressed_size: u64, - pub blob_compressor: String, - pub blob_meta_ci_compressed_size: u64, - pub blob_meta_ci_uncompressed_size: u64, - pub blob_meta_ci_offset: u64, -} - -/// Struct to generate chunkdict RAFS bootstrap. -pub struct Generator {} - -impl Generator { - // Generate chunkdict RAFS bootstrap. - pub fn generate( - ctx: &mut BuildContext, - bootstrap_mgr: &mut BootstrapManager, - blob_mgr: &mut BlobManager, - chunkdict_chunks_origin: Vec, - chunkdict_blobs: Vec, - ) -> Result { - // Validate and remove chunks whose belonged blob sizes are smaller than a block. - let mut chunkdict_chunks = chunkdict_chunks_origin.to_vec(); - Self::validate_and_remove_chunks(ctx, &mut chunkdict_chunks); - // Build root tree. - let mut tree = Self::build_root_tree(ctx)?; - - // Build child tree. - let child = Self::build_child_tree(ctx, blob_mgr, &chunkdict_chunks, &chunkdict_blobs)?; - let result = vec![child]; - tree.children = result; - - Self::validate_tree(&tree)?; - - // Build bootstrap. - let mut bootstrap_ctx = bootstrap_mgr.create_ctx()?; - let mut bootstrap = Bootstrap::new(tree)?; - bootstrap.build(ctx, &mut bootstrap_ctx)?; - - let blob_table = blob_mgr.to_blob_table(ctx)?; - let storage = &mut bootstrap_mgr.bootstrap_storage; - bootstrap.dump(ctx, storage, &mut bootstrap_ctx, &blob_table)?; - - BuildOutput::new(blob_mgr, &bootstrap_mgr.bootstrap_storage) - } - - /// Validate tree. - fn validate_tree(tree: &Tree) -> Result<()> { - let pre = &mut |t: &Tree| -> Result<()> { - let node = t.lock_node(); - debug!("chunkdict tree: "); - debug!("inode: {}", node); - for chunk in &node.chunks { - debug!("\t chunk: {}", chunk); - } - Ok(()) - }; - tree.walk_dfs_pre(pre)?; - debug!("chunkdict tree is valid."); - Ok(()) - } - - /// Validates and removes chunks with a total uncompressed size smaller than the block size limit. - fn validate_and_remove_chunks(ctx: &mut BuildContext, chunkdict: &mut Vec) { - let mut chunk_sizes = std::collections::HashMap::new(); - - // Accumulate the uncompressed size for each chunk_blob_id. - for chunk in chunkdict.iter() { - *chunk_sizes.entry(chunk.chunk_blob_id.clone()).or_insert(0) += - chunk.chunk_uncompressed_size as u64; - } - // Find all chunk_blob_ids with a total uncompressed size > v6_block_size. - let small_chunks: Vec = chunk_sizes - .into_iter() - .filter(|&(_, size)| size < ctx.v6_block_size()) - .inspect(|(id, _)| { - eprintln!( - "Warning: Blob with id '{}' is smaller than {} bytes.", - id, - ctx.v6_block_size() - ) - }) - .map(|(id, _)| id) - .collect(); - - // Retain only chunks with chunk_blob_id that has a total uncompressed size > v6_block_size. - chunkdict.retain(|chunk| !small_chunks.contains(&chunk.chunk_blob_id)); - } - - /// Build the root tree. - pub fn build_root_tree(ctx: &mut BuildContext) -> Result { - let mut inode = InodeWrapper::new(ctx.fs_version); - inode.set_ino(1); - inode.set_uid(1000); - inode.set_gid(1000); - inode.set_projid(0); - inode.set_mode(0o660 | libc::S_IFDIR as u32); - inode.set_nlink(3); - inode.set_name_size("/".len()); - inode.set_rdev(0); - inode.set_blocks(256); - let node_info = NodeInfo { - explicit_uidgid: true, - src_dev: 0, - src_ino: 0, - rdev: 0, - source: PathBuf::from("/"), - path: PathBuf::from("/"), - target: PathBuf::from("/"), - target_vec: vec![OsString::from("/")], - symlink: None, - xattrs: RafsXAttrs::default(), - v6_force_extended_inode: true, - }; - let root_node = Node::new(inode, node_info, 0); - let tree = Tree::new(root_node); - Ok(tree) - } - - /// Build the child tree. - fn build_child_tree( - ctx: &mut BuildContext, - blob_mgr: &mut BlobManager, - chunkdict_chunks: &[ChunkdictChunkInfo], - chunkdict_blobs: &[ChunkdictBlobInfo], - ) -> Result { - let mut inode = InodeWrapper::new(ctx.fs_version); - inode.set_ino(2); - inode.set_uid(0); - inode.set_gid(0); - inode.set_projid(0); - inode.set_mode(0o660 | libc::S_IFREG as u32); - inode.set_nlink(1); - inode.set_name_size("chunkdict".len()); - inode.set_rdev(0); - inode.set_blocks(256); - let node_info = NodeInfo { - explicit_uidgid: true, - src_dev: 0, - src_ino: 1, - rdev: 0, - source: PathBuf::from("/"), - path: PathBuf::from("/chunkdict"), - target: PathBuf::from("/chunkdict"), - target_vec: vec![OsString::from("/"), OsString::from("/chunkdict")], - symlink: None, - xattrs: RafsXAttrs::new(), - v6_force_extended_inode: true, - }; - let mut node = Node::new(inode, node_info, 0); - - // Insert chunks. - Self::insert_chunks(ctx, blob_mgr, &mut node, chunkdict_chunks, chunkdict_blobs)?; - let node_size: u64 = node - .chunks - .iter() - .map(|chunk| chunk.inner.uncompressed_size() as u64) - .sum(); - node.inode.set_size(node_size); - - // Update child count. - node.inode.set_child_count(node.chunks.len() as u32); - let child = Tree::new(node); - child - .lock_node() - .v5_set_dir_size(ctx.fs_version, &child.children); - Ok(child) - } - - /// Insert chunks. - fn insert_chunks( - ctx: &mut BuildContext, - blob_mgr: &mut BlobManager, - node: &mut Node, - chunkdict_chunks: &[ChunkdictChunkInfo], - chunkdict_blobs: &[ChunkdictBlobInfo], - ) -> Result<()> { - for (index, chunk_info) in chunkdict_chunks.iter().enumerate() { - let chunk_size: u32 = chunk_info.chunk_compressed_size; - let file_offset = index as u64 * chunk_size as u64; - let mut chunk = ChunkWrapper::new(ctx.fs_version); - - // Update blob context. - let (blob_index, blob_ctx) = - blob_mgr.get_or_cerate_blob_for_chunkdict(ctx, &chunk_info.chunk_blob_id)?; - let chunk_uncompressed_size = chunk_info.chunk_uncompressed_size; - let pre_d_offset = blob_ctx.current_uncompressed_offset; - blob_ctx.uncompressed_blob_size = pre_d_offset + chunk_uncompressed_size as u64; - blob_ctx.current_uncompressed_offset += chunk_uncompressed_size as u64; - - blob_ctx.blob_meta_header.set_ci_uncompressed_size( - blob_ctx.blob_meta_header.ci_uncompressed_size() - + size_of::() as u64, - ); - blob_ctx.blob_meta_header.set_ci_compressed_size( - blob_ctx.blob_meta_header.ci_uncompressed_size() - + size_of::() as u64, - ); - let chunkdict_blob_info = chunkdict_blobs - .iter() - .find(|blob| blob.blob_id == chunk_info.chunk_blob_id) - .unwrap(); - blob_ctx.blob_compressor = - Algorithm::from_str(chunkdict_blob_info.blob_compressor.as_str())?; - blob_ctx - .blob_meta_header - .set_ci_uncompressed_size(chunkdict_blob_info.blob_meta_ci_uncompressed_size); - blob_ctx - .blob_meta_header - .set_ci_compressed_size(chunkdict_blob_info.blob_meta_ci_compressed_size); - blob_ctx - .blob_meta_header - .set_ci_compressed_offset(chunkdict_blob_info.blob_meta_ci_offset); - blob_ctx.blob_meta_header.set_ci_compressor(Algorithm::Zstd); - - // Update chunk context. - let chunk_index = blob_ctx.alloc_chunk_index()?; - chunk.set_blob_index(blob_index); - chunk.set_index(chunk_index); - chunk.set_file_offset(file_offset); - chunk.set_compressed_size(chunk_info.chunk_compressed_size); - chunk.set_compressed_offset(chunk_info.chunk_compressed_offset); - chunk.set_uncompressed_size(chunk_info.chunk_uncompressed_size); - chunk.set_uncompressed_offset(chunk_info.chunk_uncompressed_offset); - chunk.set_id(RafsDigest::from_string(&chunk_info.chunk_digest)); - - node.chunks.push(NodeChunk { - source: ChunkSource::Build, - inner: Arc::new(chunk.clone()), - }); - } - Ok(()) - } -} +// Copyright (C) 2023 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Generate Chunkdict RAFS bootstrap. +//! ------------------------------------------------------------------------------------------------- +//! Bug 1: Inconsistent Chunk Size Leading to Blob Size Less Than 4K(v6_block_size) +//! Description: The size of chunks is not consistent, which results in the possibility that a blob, +//! composed of a group of these chunks, may be less than 4K(v6_block_size) in size. +//! This inconsistency leads to a failure in passing the size check. +//! ------------------------------------------------------------------------------------------------- +//! Bug 2: Incorrect Chunk Number Calculation Due to Premature Check Logic +//! Description: The current logic for calculating the chunk number is based on the formula size/chunk size. +//! However, this approach is flawed as it precedes the actual check which accounts for chunk statistics. +//! Consequently, this leads to inaccurate counting of chunk numbers. + +use super::core::node::{ChunkSource, NodeInfo}; +use super::{BlobManager, Bootstrap, BootstrapManager, BuildContext, BuildOutput, Tree}; +use crate::core::node::Node; +use crate::NodeChunk; +use anyhow::Result; +use nydus_rafs::metadata::chunk::ChunkWrapper; +use nydus_rafs::metadata::inode::InodeWrapper; +use nydus_rafs::metadata::layout::RafsXAttrs; +use nydus_storage::meta::BlobChunkInfoV1Ondisk; +use nydus_utils::compress::Algorithm; +use nydus_utils::digest::RafsDigest; +use std::ffi::OsString; +use std::mem::size_of; +use std::path::PathBuf; +use std::str::FromStr; +use std::sync::Arc; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct ChunkdictChunkInfo { + pub image_reference: String, + pub version: String, + pub chunk_blob_id: String, + pub chunk_digest: String, + pub chunk_compressed_size: u32, + pub chunk_uncompressed_size: u32, + pub chunk_compressed_offset: u64, + pub chunk_uncompressed_offset: u64, +} + +pub struct ChunkdictBlobInfo { + pub blob_id: String, + pub blob_compressed_size: u64, + pub blob_uncompressed_size: u64, + pub blob_compressor: String, + pub blob_meta_ci_compressed_size: u64, + pub blob_meta_ci_uncompressed_size: u64, + pub blob_meta_ci_offset: u64, +} + +/// Struct to generate chunkdict RAFS bootstrap. +pub struct Generator {} + +impl Generator { + // Generate chunkdict RAFS bootstrap. + pub fn generate( + ctx: &mut BuildContext, + bootstrap_mgr: &mut BootstrapManager, + blob_mgr: &mut BlobManager, + chunkdict_chunks_origin: Vec, + chunkdict_blobs: Vec, + ) -> Result { + // Validate and remove chunks whose belonged blob sizes are smaller than a block. + let mut chunkdict_chunks = chunkdict_chunks_origin.to_vec(); + Self::validate_and_remove_chunks(ctx, &mut chunkdict_chunks); + // Build root tree. + let mut tree = Self::build_root_tree(ctx)?; + + // Build child tree. + let child = Self::build_child_tree(ctx, blob_mgr, &chunkdict_chunks, &chunkdict_blobs)?; + let result = vec![child]; + tree.children = result; + + Self::validate_tree(&tree)?; + + // Build bootstrap. + let mut bootstrap_ctx = bootstrap_mgr.create_ctx()?; + let mut bootstrap = Bootstrap::new(tree)?; + bootstrap.build(ctx, &mut bootstrap_ctx)?; + + let blob_table = blob_mgr.to_blob_table(ctx)?; + let storage = &mut bootstrap_mgr.bootstrap_storage; + bootstrap.dump(ctx, storage, &mut bootstrap_ctx, &blob_table)?; + + BuildOutput::new(blob_mgr, &bootstrap_mgr.bootstrap_storage) + } + + /// Validate tree. + fn validate_tree(tree: &Tree) -> Result<()> { + let pre = &mut |t: &Tree| -> Result<()> { + let node = t.lock_node(); + debug!("chunkdict tree: "); + debug!("inode: {}", node); + for chunk in &node.chunks { + debug!("\t chunk: {}", chunk); + } + Ok(()) + }; + tree.walk_dfs_pre(pre)?; + debug!("chunkdict tree is valid."); + Ok(()) + } + + /// Validates and removes chunks with a total uncompressed size smaller than the block size limit. + fn validate_and_remove_chunks(ctx: &mut BuildContext, chunkdict: &mut Vec) { + let mut chunk_sizes = std::collections::HashMap::new(); + + // Accumulate the uncompressed size for each chunk_blob_id. + for chunk in chunkdict.iter() { + *chunk_sizes.entry(chunk.chunk_blob_id.clone()).or_insert(0) += + chunk.chunk_uncompressed_size as u64; + } + // Find all chunk_blob_ids with a total uncompressed size > v6_block_size. + let small_chunks: Vec = chunk_sizes + .into_iter() + .filter(|&(_, size)| size < ctx.v6_block_size()) + .inspect(|(id, _)| { + eprintln!( + "Warning: Blob with id '{}' is smaller than {} bytes.", + id, + ctx.v6_block_size() + ) + }) + .map(|(id, _)| id) + .collect(); + + // Retain only chunks with chunk_blob_id that has a total uncompressed size > v6_block_size. + chunkdict.retain(|chunk| !small_chunks.contains(&chunk.chunk_blob_id)); + } + + /// Build the root tree. + pub fn build_root_tree(ctx: &mut BuildContext) -> Result { + let mut inode = InodeWrapper::new(ctx.fs_version); + inode.set_ino(1); + inode.set_uid(1000); + inode.set_gid(1000); + inode.set_projid(0); + inode.set_mode(0o660 | libc::S_IFDIR as u32); + inode.set_nlink(3); + inode.set_name_size("/".len()); + inode.set_rdev(0); + inode.set_blocks(256); + let node_info = NodeInfo { + explicit_uidgid: true, + src_dev: 0, + src_ino: 0, + rdev: 0, + source: PathBuf::from("/"), + path: PathBuf::from("/"), + target: PathBuf::from("/"), + target_vec: vec![OsString::from("/")], + symlink: None, + xattrs: RafsXAttrs::default(), + v6_force_extended_inode: true, + }; + let root_node = Node::new(inode, node_info, 0); + let tree = Tree::new(root_node); + Ok(tree) + } + + /// Build the child tree. + fn build_child_tree( + ctx: &mut BuildContext, + blob_mgr: &mut BlobManager, + chunkdict_chunks: &[ChunkdictChunkInfo], + chunkdict_blobs: &[ChunkdictBlobInfo], + ) -> Result { + let mut inode = InodeWrapper::new(ctx.fs_version); + inode.set_ino(2); + inode.set_uid(0); + inode.set_gid(0); + inode.set_projid(0); + inode.set_mode(0o660 | libc::S_IFREG as u32); + inode.set_nlink(1); + inode.set_name_size("chunkdict".len()); + inode.set_rdev(0); + inode.set_blocks(256); + let node_info = NodeInfo { + explicit_uidgid: true, + src_dev: 0, + src_ino: 1, + rdev: 0, + source: PathBuf::from("/"), + path: PathBuf::from("/chunkdict"), + target: PathBuf::from("/chunkdict"), + target_vec: vec![OsString::from("/"), OsString::from("/chunkdict")], + symlink: None, + xattrs: RafsXAttrs::new(), + v6_force_extended_inode: true, + }; + let mut node = Node::new(inode, node_info, 0); + + // Insert chunks. + Self::insert_chunks(ctx, blob_mgr, &mut node, chunkdict_chunks, chunkdict_blobs)?; + let node_size: u64 = node + .chunks + .iter() + .map(|chunk| chunk.inner.uncompressed_size() as u64) + .sum(); + node.inode.set_size(node_size); + + // Update child count. + node.inode.set_child_count(node.chunks.len() as u32); + let child = Tree::new(node); + child + .lock_node() + .v5_set_dir_size(ctx.fs_version, &child.children); + Ok(child) + } + + /// Insert chunks. + fn insert_chunks( + ctx: &mut BuildContext, + blob_mgr: &mut BlobManager, + node: &mut Node, + chunkdict_chunks: &[ChunkdictChunkInfo], + chunkdict_blobs: &[ChunkdictBlobInfo], + ) -> Result<()> { + for (index, chunk_info) in chunkdict_chunks.iter().enumerate() { + let chunk_size: u32 = chunk_info.chunk_compressed_size; + let file_offset = index as u64 * chunk_size as u64; + let mut chunk = ChunkWrapper::new(ctx.fs_version); + + // Update blob context. + let (blob_index, blob_ctx) = + blob_mgr.get_or_cerate_blob_for_chunkdict(ctx, &chunk_info.chunk_blob_id)?; + let chunk_uncompressed_size = chunk_info.chunk_uncompressed_size; + let pre_d_offset = blob_ctx.current_uncompressed_offset; + blob_ctx.uncompressed_blob_size = pre_d_offset + chunk_uncompressed_size as u64; + blob_ctx.current_uncompressed_offset += chunk_uncompressed_size as u64; + + blob_ctx.blob_meta_header.set_ci_uncompressed_size( + blob_ctx.blob_meta_header.ci_uncompressed_size() + + size_of::() as u64, + ); + blob_ctx.blob_meta_header.set_ci_compressed_size( + blob_ctx.blob_meta_header.ci_uncompressed_size() + + size_of::() as u64, + ); + let chunkdict_blob_info = chunkdict_blobs + .iter() + .find(|blob| blob.blob_id == chunk_info.chunk_blob_id) + .unwrap(); + blob_ctx.blob_compressor = + Algorithm::from_str(chunkdict_blob_info.blob_compressor.as_str())?; + blob_ctx + .blob_meta_header + .set_ci_uncompressed_size(chunkdict_blob_info.blob_meta_ci_uncompressed_size); + blob_ctx + .blob_meta_header + .set_ci_compressed_size(chunkdict_blob_info.blob_meta_ci_compressed_size); + blob_ctx + .blob_meta_header + .set_ci_compressed_offset(chunkdict_blob_info.blob_meta_ci_offset); + blob_ctx.blob_meta_header.set_ci_compressor(Algorithm::Zstd); + + // Update chunk context. + let chunk_index = blob_ctx.alloc_chunk_index()?; + chunk.set_blob_index(blob_index); + chunk.set_index(chunk_index); + chunk.set_file_offset(file_offset); + chunk.set_compressed_size(chunk_info.chunk_compressed_size); + chunk.set_compressed_offset(chunk_info.chunk_compressed_offset); + chunk.set_uncompressed_size(chunk_info.chunk_uncompressed_size); + chunk.set_uncompressed_offset(chunk_info.chunk_uncompressed_offset); + chunk.set_id(RafsDigest::from_string(&chunk_info.chunk_digest)); + + node.chunks.push(NodeChunk { + source: ChunkSource::Build, + inner: Arc::new(chunk.clone()), + }); + } + Ok(()) + } +} diff --git a/builder/src/compact.rs b/builder/src/compact.rs index 3ff27eeac69..6fc40b33df2 100644 --- a/builder/src/compact.rs +++ b/builder/src/compact.rs @@ -1,1319 +1,1319 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright 2023 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::collections::hash_map::Entry; -use std::collections::HashMap; -use std::io::Write; -use std::mem; -use std::ops::Deref; -use std::path::PathBuf; -use std::sync::Arc; - -use anyhow::{bail, ensure, Result}; -use nydus_rafs::metadata::chunk::ChunkWrapper; -use nydus_rafs::metadata::{RafsSuper, RafsVersion}; -use nydus_storage::backend::BlobBackend; -use nydus_storage::utils::alloc_buf; -use nydus_utils::digest::RafsDigest; -use nydus_utils::{digest, try_round_up_4k}; -use serde::{Deserialize, Serialize}; -use sha2::Digest; - -use crate::core::context::Artifact; - -use super::core::blob::Blob; -use super::core::bootstrap::Bootstrap; -use super::{ - ArtifactStorage, ArtifactWriter, BlobContext, BlobManager, BootstrapManager, BuildContext, - BuildOutput, ChunkDict, ConversionType, Features, Tree, TreeNode, WhiteoutSpec, -}; - -const DEFAULT_COMPACT_BLOB_SIZE: usize = 10 * 1024 * 1024; -const DEFAULT_MAX_COMPACT_SIZE: usize = 100 * 1024 * 1024; - -const fn default_compact_blob_size() -> usize { - DEFAULT_COMPACT_BLOB_SIZE -} - -const fn default_max_compact_size() -> usize { - DEFAULT_MAX_COMPACT_SIZE -} - -#[derive(Clone, Deserialize, Serialize)] -pub struct Config { - /// rebuild blobs whose used_ratio < min_used_ratio - /// used_ratio = (compress_size of all chunks which are referenced by bootstrap) / blob_compress_size - /// available value: 0-99, 0 means disable - /// hint: it's better to disable this option when there are some shared blobs - /// for example: build-cache - #[serde(default)] - min_used_ratio: u8, - /// we compact blobs whose size are less than compact_blob_size - #[serde(default = "default_compact_blob_size")] - compact_blob_size: usize, - /// size of compacted blobs should not be larger than max_compact_size - #[serde(default = "default_max_compact_size")] - max_compact_size: usize, - /// if number of blobs >= layers_to_compact, do compact - /// 0 means always try compact - #[serde(default)] - layers_to_compact: usize, - /// local blobs dir, may haven't upload to backend yet - /// what's more, new blobs will output to this dir - /// name of blob file should be equal to blob_id - blobs_dir: String, -} - -#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] -enum ChunkKey { - // Chunk digest for RAFS v5, may be extended to support RAFS v6 in future. - Digest(RafsDigest), - // (blob_idx, compress_offset) for RAFS v6 only - Offset(u32, u64), -} - -impl ChunkKey { - fn from(c: &ChunkWrapper) -> Self { - match c { - ChunkWrapper::V5(_) => Self::Digest(*c.id()), - ChunkWrapper::V6(_) => Self::Offset(c.blob_index(), c.compressed_offset()), - ChunkWrapper::Ref(_) => unimplemented!("unsupport ChunkWrapper::Ref(c)"), - } - } -} - -#[derive(Clone, Debug)] -struct ChunkSet { - chunks: HashMap, - total_size: usize, -} - -impl ChunkSet { - fn new() -> Self { - Self { - chunks: Default::default(), - total_size: 0, - } - } - - fn add_chunk(&mut self, chunk: &ChunkWrapper) { - let key = ChunkKey::from(chunk); - if let Entry::Vacant(e) = self.chunks.entry(key) { - e.insert(chunk.clone()); - self.total_size += chunk.compressed_size() as usize; - } - } - - fn get_chunk(&self, key: &ChunkKey) -> Option<&ChunkWrapper> { - self.chunks.get(key) - } - - fn merge(&mut self, other: Self) { - for (_, c) in other.chunks.iter() { - self.add_chunk(c); - } - } - - #[allow(clippy::too_many_arguments)] - fn dump( - &self, - build_ctx: &BuildContext, - blob_storage: ArtifactStorage, - ori_blob_ids: &[String], - new_blob_ctx: &mut BlobContext, - new_blob_idx: u32, - aligned_chunk: bool, - backend: &Arc, - ) -> Result> { - let mut blob_writer = ArtifactWriter::new(blob_storage)?; - let mut chunks = self.chunks.values().collect::>(); - // sort chunks first, don't break order in original blobs - chunks.sort_by(|a, b| { - if (*a).blob_index() == (*b).blob_index() { - (*a).compressed_offset().cmp(&(*b).compressed_offset()) - } else { - (*a).blob_index().cmp(&(*b).blob_index()) - } - }); - - let mut changed_chunks = Vec::new(); - for chunk in chunks { - let blob_idx = chunk.blob_index(); - // get data from backend - // todo: merge download requests - let reader = backend - .get_reader(&ori_blob_ids[blob_idx as usize]) - .expect("get blob err"); - let mut buf = alloc_buf(chunk.compressed_size() as usize); - reader - .read(&mut buf, chunk.compressed_offset()) - .expect("read blob data err"); - blob_writer.write_all(&buf)?; - - let mut new_chunk = chunk.clone(); - // file offset field is useless - new_chunk.set_index(new_blob_ctx.chunk_count); - new_chunk.set_blob_index(new_blob_idx); - new_chunk.set_compressed_offset(new_blob_ctx.current_compressed_offset); - new_chunk.set_uncompressed_offset(new_blob_ctx.current_uncompressed_offset); - new_blob_ctx.add_chunk_meta_info(&new_chunk, None)?; - // insert change ops - changed_chunks.push((chunk.clone(), new_chunk)); - - new_blob_ctx.blob_hash.update(&buf); - new_blob_ctx.chunk_count += 1; - new_blob_ctx.current_compressed_offset += chunk.compressed_size() as u64; - new_blob_ctx.compressed_blob_size += chunk.compressed_size() as u64; - - let aligned_size = if aligned_chunk { - try_round_up_4k(chunk.uncompressed_size()).unwrap() - } else { - chunk.uncompressed_size() as u64 - }; - new_blob_ctx.current_uncompressed_offset += aligned_size; - new_blob_ctx.uncompressed_blob_size += aligned_size; - } - new_blob_ctx.blob_id = format!("{:x}", new_blob_ctx.blob_hash.clone().finalize()); - - // dump blob meta for v6 - Blob::dump_meta_data(build_ctx, new_blob_ctx, &mut blob_writer)?; - let blob_id = new_blob_ctx.blob_id(); - blob_writer.finalize(blob_id)?; - - Ok(changed_chunks) - } -} - -#[derive(Clone, Debug, Default)] -enum State { - ChunkDict, - /// delete this blob - Delete, - #[default] - Invalid, - Original(ChunkSet), - /// output chunks as a new blob file - Rebuild(ChunkSet), -} - -impl State { - fn is_rebuild(&self) -> bool { - matches!(self, Self::Rebuild(_)) - } - - fn is_from_dict(&self) -> bool { - matches!(self, Self::ChunkDict) - } - - fn is_invalid(&self) -> bool { - matches!(self, Self::Invalid) - } - - fn merge_blob(&mut self, other: Self) -> Result<()> { - let merge_cs = match other { - State::Original(cs) => cs, - State::Rebuild(cs) => cs, - _ => bail!("invalid state"), - }; - match self { - State::Rebuild(cs) => { - cs.merge(merge_cs); - } - _ => bail!("invalid state"), - } - Ok(()) - } - - fn chunk_total_size(&self) -> Result { - Ok(match self { - State::Original(cs) => cs.total_size, - State::Rebuild(cs) => cs.total_size, - _ => bail!("invalid state"), - }) - } -} - -#[inline] -fn apply_chunk_change(from: &ChunkWrapper, to: &mut ChunkWrapper) -> Result<()> { - ensure!( - to.uncompressed_size() == from.uncompressed_size(), - "different uncompress size" - ); - ensure!( - to.compressed_size() == from.compressed_size(), - "different compressed size" - ); - - to.set_blob_index(from.blob_index()); - to.set_index(from.index()); - to.set_uncompressed_offset(from.uncompressed_offset()); - to.set_compressed_offset(from.compressed_offset()); - Ok(()) -} - -/// RAFS blob compactor to compact multiple small blobs into one blob. -pub struct BlobCompactor { - /// v5 or v6 - version: RafsVersion, - /// states - states: Vec, - /// original blobs - ori_blob_mgr: BlobManager, - /// new blobs - new_blob_mgr: BlobManager, - /// chunk --> list - c2nodes: HashMap>, - /// original blob index --> list - b2nodes: HashMap>, - /// blobs backend - backend: Arc, -} - -impl BlobCompactor { - /// Create a new instance of [BlobCompactor]. - fn new( - version: RafsVersion, - ori_blob_mgr: BlobManager, - backend: Arc, - digester: digest::Algorithm, - bootstrap: &Bootstrap, - ) -> Result { - let ori_blobs_number = ori_blob_mgr.len(); - let mut compactor = Self { - version, - states: vec![Default::default(); ori_blobs_number], - ori_blob_mgr, - new_blob_mgr: BlobManager::new(digester), - c2nodes: HashMap::new(), - b2nodes: HashMap::new(), - backend, - }; - compactor.load_chunk_dict_blobs(); - compactor.load_and_dedup_chunks(bootstrap)?; - Ok(compactor) - } - - fn is_v6(&self) -> bool { - self.version.is_v6() - } - - fn load_and_dedup_chunks(&mut self, bootstrap: &Bootstrap) -> Result<()> { - let mut all_chunks = ChunkSet::new(); - let chunk_dict = self.get_chunk_dict(); - - let cb = &mut |n: &Tree| -> Result<()> { - let mut node = n.lock_node(); - for chunk_idx in 0..node.chunks.len() { - let chunk = &mut node.chunks[chunk_idx]; - let chunk_key = ChunkKey::from(&chunk.inner); - - if self.states[chunk.inner.blob_index() as usize].is_from_dict() { - // dedup by chunk dict - if let Some(c) = - chunk_dict.get_chunk(chunk.inner.id(), chunk.inner.uncompressed_size()) - { - let mut chunk_inner = chunk.inner.deref().clone(); - apply_chunk_change(c, &mut chunk_inner)?; - chunk.inner = Arc::new(chunk_inner); - } else if let Some(c) = all_chunks.get_chunk(&chunk_key) { - let mut chunk_inner = chunk.inner.deref().clone(); - apply_chunk_change(c, &mut chunk_inner)?; - chunk.inner = Arc::new(chunk_inner); - } else { - all_chunks.add_chunk(&chunk.inner); - // add to per blob ChunkSet - let blob_index = chunk.inner.blob_index() as usize; - if self.states[blob_index].is_invalid() { - self.states[blob_index] = State::Original(ChunkSet::new()); - } - if let State::Original(cs) = &mut self.states[blob_index] { - cs.add_chunk(&chunk.inner); - } - } - } - - // construct blobs/chunk --> nodes index map - self.c2nodes - .entry(chunk_key) - .or_default() - .push((n.node.clone(), chunk_idx)); - self.b2nodes - .entry(chunk.inner.blob_index()) - .or_default() - .push((n.node.clone(), chunk_idx)); - } - Ok(()) - }; - - bootstrap.tree.walk_bfs(false, cb) - } - - fn get_chunk_dict(&self) -> Arc { - self.ori_blob_mgr.get_chunk_dict() - } - - fn load_chunk_dict_blobs(&mut self) { - let chunk_dict = self.get_chunk_dict(); - let blobs = chunk_dict.get_blobs(); - for i in 0..blobs.len() { - if let Some(real_blob_idx) = chunk_dict.get_real_blob_idx(i as u32) { - self.states[real_blob_idx as usize] = State::ChunkDict; - } - } - } - - fn apply_blob_move(&mut self, from: u32, to: u32) -> Result<()> { - if let Some(idx_list) = self.b2nodes.get(&from) { - for (n, chunk_idx) in idx_list.iter() { - let mut node = n.lock().unwrap(); - ensure!( - node.chunks[*chunk_idx].inner.blob_index() == from, - "unexpected blob_index of chunk" - ); - node.chunks[*chunk_idx].set_blob_index(to); - } - } - Ok(()) - } - - fn apply_chunk_change(&mut self, c: &(ChunkWrapper, ChunkWrapper)) -> Result<()> { - if let Some(chunks) = self.c2nodes.get(&ChunkKey::from(&c.0)) { - for (n, chunk_idx) in chunks.iter() { - let mut node = n.lock().unwrap(); - let chunk = &mut node.chunks[*chunk_idx]; - let mut chunk_inner = chunk.inner.deref().clone(); - apply_chunk_change(&c.1, &mut chunk_inner)?; - chunk.inner = Arc::new(chunk_inner); - } - } - Ok(()) - } - - fn delete_unused_blobs(&mut self) { - for i in 0..self.states.len() { - if self.states[i].is_invalid() { - info!( - "compactor: delete unused blob {}", - self.ori_blob_mgr.get_blob(i).unwrap().blob_id - ); - self.states[i] = State::Delete; - } - } - } - - fn prepare_to_rebuild(&mut self, idx: usize) -> Result<()> { - if !self.states[idx].is_rebuild() { - return Ok(()); - } - - let mut old = State::Invalid; - mem::swap(&mut self.states[idx], &mut old); - if let State::Original(cs) = old { - self.states[idx] = State::Rebuild(cs); - } else { - mem::swap(&mut self.states[idx], &mut old); - bail!("invalid state"); - } - - Ok(()) - } - - fn try_rebuild_blobs(&mut self, ratio: u8) -> Result<()> { - for idx in 0..self.ori_blob_mgr.len() { - let blob_info = self.ori_blob_mgr.get_blob(idx).unwrap(); - let used_ratio = match &self.states[idx] { - State::Original(cs) => { - let compressed_blob_size = if blob_info.compressed_blob_size == 0 { - let reader = match self.backend.get_reader(&blob_info.blob_id) { - Ok(r) => r, - Err(e) => bail!("compactor: failed to get blob reader, {}", e), - }; - match reader.blob_size() { - Ok(sz) => sz, - Err(e) => bail!("compactor: failed to get blob size, {}", e), - } - } else { - blob_info.compressed_blob_size - }; - (cs.total_size * 100 / compressed_blob_size as usize) as u8 - } - _ => 100_u8, - }; - - info!( - "compactor: original blob size {}, used data ratio {}%", - blob_info.blob_id, used_ratio - ); - if used_ratio < ratio { - self.prepare_to_rebuild(idx)?; - } - } - - Ok(()) - } - - fn merge_blob(&mut self, from: usize, to: usize) -> Result<()> { - let mut old = State::Delete; - mem::swap(&mut self.states[from], &mut old); - self.states[to].merge_blob(old) - } - - /// use greedy algorithm to merge small blobs( Result<()> { - let mut need_merge_blobs = Vec::new(); - for idx in 0..self.states.len() { - let blob_info = self.ori_blob_mgr.get_blob(idx).unwrap(); - match &self.states[idx] { - State::Original(cs) => { - let blob_size = if blob_info.compressed_blob_size == 0 { - cs.total_size - } else { - blob_info.compressed_blob_size as usize - }; - if blob_size < low { - info!( - "compactor: try to merge blob {} size {}", - blob_info.blob_id, blob_size - ); - need_merge_blobs.push((idx, blob_size)); - } - } - State::Rebuild(cs) => { - if cs.total_size < low { - info!( - "compactor: try to merge blob {} size {}", - blob_info.blob_id, cs.total_size - ); - need_merge_blobs.push((idx, cs.total_size)); - } - } - _ => {} - } - } - // sort by size - need_merge_blobs.sort_by(|(_, len1), (_, len2)| len1.cmp(len2)); - // try merge - if need_merge_blobs.len() < 2 { - return Ok(()); - } - - let mut merge_to = need_merge_blobs[0].0; - for (blob_idx, _) in need_merge_blobs.iter().skip(1) { - let before_size = self.states[merge_to].chunk_total_size()?; - let append_size = self.states[*blob_idx].chunk_total_size()?; - if before_size + append_size <= max { - self.prepare_to_rebuild(merge_to)?; - self.merge_blob(*blob_idx, merge_to)?; - } else { - merge_to = *blob_idx; - } - } - - Ok(()) - } - - fn original_blob_ids(&self) -> Vec { - self.ori_blob_mgr - .get_blobs() - .into_iter() - .map(|blob| blob.blob_id.clone()) - .collect() - } - - fn dump_new_blobs( - &mut self, - build_ctx: &BuildContext, - dir: &str, - aligned_chunk: bool, - ) -> Result<()> { - let ori_blob_ids = self.original_blob_ids(); - ensure!(self.states.len() == self.ori_blob_mgr.len()); - - for idx in 0..self.states.len() { - match &self.states[idx] { - State::Original(_) | State::ChunkDict => { - info!("compactor: keep original data blob {}", ori_blob_ids[idx]); - // already exists, no need to dump - let ctx = self.ori_blob_mgr.take_blob(idx); - let blob_idx = self.new_blob_mgr.alloc_index()?; - if blob_idx != idx as u32 { - self.apply_blob_move(idx as u32, blob_idx)?; - } - self.new_blob_mgr.add_blob(ctx); - } - State::Delete => { - info!("compactor: delete compacted blob {}", ori_blob_ids[idx]); - } - State::Rebuild(cs) => { - let blob_storage = ArtifactStorage::FileDir(PathBuf::from(dir)); - let mut blob_ctx = BlobContext::new( - String::from(""), - 0, - build_ctx.blob_features, - build_ctx.compressor, - build_ctx.digester, - build_ctx.cipher, - Default::default(), - None, - ); - blob_ctx.set_meta_info_enabled(self.is_v6()); - let blob_idx = self.new_blob_mgr.alloc_index()?; - let new_chunks = cs.dump( - build_ctx, - blob_storage, - &ori_blob_ids, - &mut blob_ctx, - blob_idx, - aligned_chunk, - &self.backend, - )?; - for change_chunk in new_chunks.iter() { - self.apply_chunk_change(change_chunk)?; - } - info!("compactor: successfully rebuild blob {}", blob_ctx.blob_id); - self.new_blob_mgr.add_blob(blob_ctx); - } - State::Invalid => bail!("compactor: invalid state for blob {}", ori_blob_ids[idx]), - } - } - - Ok(()) - } - - fn do_compact(&mut self, cfg: &Config) -> Result<()> { - self.delete_unused_blobs(); - self.try_rebuild_blobs(cfg.min_used_ratio)?; - self.try_merge_blobs(cfg.compact_blob_size, cfg.max_compact_size)?; - Ok(()) - } - - /// Compact multiple small data blobs into one to reduce number of blobs. - pub fn compact( - rs: RafsSuper, - d_bootstrap: PathBuf, - chunk_dict: Option>, - backend: Arc, - cfg: &Config, - ) -> Result> { - let mut build_ctx = BuildContext::new( - "".to_string(), - false, - 0, - rs.meta.get_compressor(), - rs.meta.get_digester(), - rs.meta.explicit_uidgid(), - WhiteoutSpec::None, - ConversionType::DirectoryToRafs, - PathBuf::from(""), - Default::default(), - None, - false, - Features::new(), - false, - ); - let mut bootstrap_mgr = - BootstrapManager::new(Some(ArtifactStorage::SingleFile(d_bootstrap)), None); - let mut bootstrap_ctx = bootstrap_mgr.create_ctx()?; - let mut ori_blob_mgr = BlobManager::new(rs.meta.get_digester()); - ori_blob_mgr.extend_from_blob_table(&build_ctx, rs.superblock.get_blob_infos())?; - if let Some(dict) = chunk_dict { - ori_blob_mgr.set_chunk_dict(dict); - ori_blob_mgr.extend_from_chunk_dict(&build_ctx)?; - } - if ori_blob_mgr.len() < cfg.layers_to_compact { - return Ok(None); - } - - let tree = Tree::from_bootstrap(&rs, &mut ())?; - let mut bootstrap = Bootstrap::new(tree)?; - let mut compactor = Self::new( - build_ctx.fs_version, - ori_blob_mgr, - backend.clone(), - rs.meta.get_digester(), - &bootstrap, - )?; - compactor.do_compact(cfg)?; - compactor.dump_new_blobs(&build_ctx, &cfg.blobs_dir, build_ctx.aligned_chunk)?; - if compactor.new_blob_mgr.is_empty() { - info!("compactor: no chance to compact data blobs"); - return Ok(None); - } - - info!("compactor: successfully compacted blob"); - // blobs have already been dumped, dump bootstrap only - let blob_table = compactor.new_blob_mgr.to_blob_table(&build_ctx)?; - bootstrap.build(&mut build_ctx, &mut bootstrap_ctx)?; - bootstrap.dump( - &mut build_ctx, - &mut bootstrap_mgr.bootstrap_storage, - &mut bootstrap_ctx, - &blob_table, - )?; - - Ok(Some(BuildOutput::new( - &compactor.new_blob_mgr, - &bootstrap_mgr.bootstrap_storage, - )?)) - } -} - -#[cfg(test)] -mod tests { - use crate::core::node::Node; - use crate::HashChunkDict; - use crate::{NodeChunk, Overlay}; - - use super::*; - use nydus_api::ConfigV2; - use nydus_rafs::metadata::RafsSuperConfig; - use nydus_storage::backend::{BackendResult, BlobReader}; - use nydus_storage::device::v5::BlobV5ChunkInfo; - use nydus_storage::device::{BlobChunkFlags, BlobChunkInfo, BlobFeatures}; - use nydus_storage::RAFS_DEFAULT_CHUNK_SIZE; - use nydus_utils::crypt::Algorithm; - use nydus_utils::metrics::BackendMetrics; - use nydus_utils::{compress, crypt}; - use std::any::Any; - use vmm_sys_util::tempdir::TempDir; - use vmm_sys_util::tempfile::TempFile; - - #[doc(hidden)] - #[macro_export] - macro_rules! impl_getter { - ($G: ident, $F: ident, $U: ty) => { - fn $G(&self) -> $U { - self.$F - } - }; - } - - #[derive(Default, Clone)] - struct MockChunkInfo { - pub block_id: RafsDigest, - pub blob_index: u32, - pub flags: BlobChunkFlags, - pub compress_size: u32, - pub uncompress_size: u32, - pub compress_offset: u64, - pub uncompress_offset: u64, - pub file_offset: u64, - pub index: u32, - #[allow(unused)] - pub reserved: u32, - } - - impl BlobChunkInfo for MockChunkInfo { - fn chunk_id(&self) -> &RafsDigest { - &self.block_id - } - fn id(&self) -> u32 { - self.index - } - fn is_compressed(&self) -> bool { - self.flags.contains(BlobChunkFlags::COMPRESSED) - } - - fn is_batch(&self) -> bool { - self.flags.contains(BlobChunkFlags::BATCH) - } - - fn is_encrypted(&self) -> bool { - false - } - - fn as_any(&self) -> &dyn Any { - self - } - - impl_getter!(blob_index, blob_index, u32); - impl_getter!(compressed_offset, compress_offset, u64); - impl_getter!(compressed_size, compress_size, u32); - impl_getter!(uncompressed_offset, uncompress_offset, u64); - impl_getter!(uncompressed_size, uncompress_size, u32); - } - - impl BlobV5ChunkInfo for MockChunkInfo { - fn as_base(&self) -> &dyn BlobChunkInfo { - self - } - - impl_getter!(index, index, u32); - impl_getter!(file_offset, file_offset, u64); - impl_getter!(flags, flags, BlobChunkFlags); - } - - struct MockBackend { - pub metrics: Arc, - } - - impl BlobReader for MockBackend { - fn blob_size(&self) -> BackendResult { - Ok(1) - } - - fn try_read(&self, buf: &mut [u8], _offset: u64) -> BackendResult { - let mut i = 0; - while i < buf.len() { - buf[i] = i as u8; - i += 1; - } - Ok(i) - } - - fn metrics(&self) -> &BackendMetrics { - // Safe because nydusd must have backend attached with id, only image builder can no id - // but use backend instance to upload blob. - &self.metrics - } - } - - unsafe impl Send for MockBackend {} - unsafe impl Sync for MockBackend {} - - impl BlobBackend for MockBackend { - fn shutdown(&self) {} - - fn metrics(&self) -> &BackendMetrics { - // Safe because nydusd must have backend attached with id, only image builder can no id - // but use backend instance to upload blob. - &self.metrics - } - - fn get_reader(&self, _blob_id: &str) -> BackendResult> { - Ok(Arc::new(MockBackend { - metrics: self.metrics.clone(), - })) - } - } - - #[test] - #[should_panic = "not implemented: unsupport ChunkWrapper::Ref(c)"] - fn test_chunk_key_from() { - let cw = ChunkWrapper::new(RafsVersion::V5); - matches!(ChunkKey::from(&cw), ChunkKey::Digest(_)); - - let cw = ChunkWrapper::new(RafsVersion::V6); - matches!(ChunkKey::from(&cw), ChunkKey::Offset(_, _)); - - let chunk = Arc::new(MockChunkInfo { - block_id: Default::default(), - blob_index: 2, - flags: BlobChunkFlags::empty(), - compress_size: 0x800, - uncompress_size: 0x1000, - compress_offset: 0x800, - uncompress_offset: 0x1000, - file_offset: 0x1000, - index: 1, - reserved: 0, - }) as Arc; - let cw = ChunkWrapper::Ref(chunk); - ChunkKey::from(&cw); - } - - #[test] - fn test_chunk_set() { - let mut chunk_set1 = ChunkSet::new(); - - let mut chunk_wrapper1 = ChunkWrapper::new(RafsVersion::V5); - chunk_wrapper1.set_id(RafsDigest { data: [1u8; 32] }); - chunk_wrapper1.set_compressed_size(8); - let mut chunk_wrapper2 = ChunkWrapper::new(RafsVersion::V6); - chunk_wrapper2.set_compressed_size(16); - - chunk_set1.add_chunk(&chunk_wrapper1); - chunk_set1.add_chunk(&chunk_wrapper2); - assert_eq!(chunk_set1.total_size, 24); - - let chunk_key2 = ChunkKey::from(&chunk_wrapper2); - assert_eq!( - format!("{:?}", Some(chunk_wrapper2)), - format!("{:?}", chunk_set1.get_chunk(&chunk_key2)) - ); - - let mut chunk_wrapper3 = ChunkWrapper::new(RafsVersion::V5); - chunk_wrapper3.set_id(RafsDigest { data: [3u8; 32] }); - chunk_wrapper3.set_compressed_size(32); - - let mut chunk_set2 = ChunkSet::new(); - chunk_set2.add_chunk(&chunk_wrapper3); - chunk_set2.merge(chunk_set1); - assert_eq!(chunk_set2.total_size, 56); - assert_eq!(chunk_set2.chunks.len(), 3); - - let build_ctx = BuildContext::default(); - let tmp_file = TempFile::new().unwrap(); - let blob_storage = ArtifactStorage::SingleFile(PathBuf::from(tmp_file.as_path())); - let cipher_object = Algorithm::Aes256Xts.new_cipher().unwrap(); - let mut new_blob_ctx = BlobContext::new( - "blob_id".to_owned(), - 0, - BlobFeatures::all(), - compress::Algorithm::Lz4Block, - digest::Algorithm::Sha256, - crypt::Algorithm::Aes256Xts, - Arc::new(cipher_object), - None, - ); - let ori_blob_ids = ["1".to_owned(), "2".to_owned()]; - let backend = Arc::new(MockBackend { - metrics: BackendMetrics::new("id", "backend_type"), - }) as Arc; - - let mut res = chunk_set2 - .dump( - &build_ctx, - blob_storage, - &ori_blob_ids, - &mut new_blob_ctx, - 0, - true, - &backend, - ) - .unwrap(); - - res.sort_by(|a, b| a.0.id().data.cmp(&b.0.id().data)); - - assert_eq!(res.len(), 3); - assert_eq!( - format!("{:?}", res[0].1.id()), - format!("{:?}", RafsDigest { data: [0u8; 32] }) - ); - assert_eq!( - format!("{:?}", res[1].1.id()), - format!("{:?}", RafsDigest { data: [1u8; 32] }) - ); - assert_eq!( - format!("{:?}", res[2].1.id()), - format!("{:?}", RafsDigest { data: [3u8; 32] }) - ); - } - - #[test] - fn test_state() { - let state = State::Rebuild(ChunkSet::new()); - assert!(state.is_rebuild()); - let state = State::ChunkDict; - assert!(state.is_from_dict()); - let state = State::default(); - assert!(state.is_invalid()); - - let mut chunk_set1 = ChunkSet::new(); - let mut chunk_wrapper1 = ChunkWrapper::new(RafsVersion::V5); - chunk_wrapper1.set_id(RafsDigest { data: [1u8; 32] }); - chunk_wrapper1.set_compressed_size(8); - chunk_set1.add_chunk(&chunk_wrapper1); - let mut state1 = State::Original(chunk_set1); - assert_eq!(state1.chunk_total_size().unwrap(), 8); - - let mut chunk_wrapper2 = ChunkWrapper::new(RafsVersion::V6); - chunk_wrapper2.set_compressed_size(16); - let mut chunk_set2 = ChunkSet::new(); - chunk_set2.add_chunk(&chunk_wrapper2); - let mut state2 = State::Rebuild(chunk_set2); - assert_eq!(state2.chunk_total_size().unwrap(), 16); - - assert!(state1.merge_blob(state2.clone()).is_err()); - assert!(state2.merge_blob(state1).is_ok()); - assert!(state2.merge_blob(State::Invalid).is_err()); - - assert_eq!(state2.chunk_total_size().unwrap(), 24); - assert!(State::Delete.chunk_total_size().is_err()); - } - - #[test] - fn test_apply_chunk_change() { - let mut chunk_wrapper1 = ChunkWrapper::new(RafsVersion::V5); - chunk_wrapper1.set_id(RafsDigest { data: [1u8; 32] }); - chunk_wrapper1.set_uncompressed_size(8); - chunk_wrapper1.set_compressed_size(8); - - let mut chunk_wrapper2 = ChunkWrapper::new(RafsVersion::V6); - chunk_wrapper2.set_uncompressed_size(16); - chunk_wrapper2.set_compressed_size(16); - - assert!(apply_chunk_change(&chunk_wrapper1, &mut chunk_wrapper2).is_err()); - chunk_wrapper2.set_uncompressed_size(8); - assert!(apply_chunk_change(&chunk_wrapper1, &mut chunk_wrapper2).is_err()); - - chunk_wrapper2.set_compressed_size(8); - chunk_wrapper1.set_blob_index(0x10); - chunk_wrapper1.set_index(0x20); - chunk_wrapper1.set_uncompressed_offset(0x30); - chunk_wrapper1.set_compressed_offset(0x40); - assert!(apply_chunk_change(&chunk_wrapper1, &mut chunk_wrapper2).is_ok()); - assert_eq!(chunk_wrapper2.blob_index(), 0x10); - assert_eq!(chunk_wrapper2.index(), 0x20); - assert_eq!(chunk_wrapper2.uncompressed_offset(), 0x30); - assert_eq!(chunk_wrapper2.compressed_offset(), 0x40); - } - - fn create_blob_compactor() -> Result { - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let mut source_path = PathBuf::from(root_dir); - source_path.push("../tests/texture/bootstrap/rafs-v5.boot"); - let path = source_path.to_str().unwrap(); - let rafs_config = RafsSuperConfig { - version: RafsVersion::V5, - compressor: compress::Algorithm::Lz4Block, - digester: digest::Algorithm::Blake3, - chunk_size: 0x100000, - batch_size: 0, - explicit_uidgid: true, - is_tarfs_mode: false, - }; - let dict = - HashChunkDict::from_commandline_arg(path, Arc::new(ConfigV2::default()), &rafs_config) - .unwrap(); - - let mut ori_blob_mgr = BlobManager::new(digest::Algorithm::Sha256); - ori_blob_mgr.set_chunk_dict(dict); - - let backend = Arc::new(MockBackend { - metrics: BackendMetrics::new("id", "backend_type"), - }); - - let tmpdir = TempDir::new()?; - let tmpfile = TempFile::new_in(tmpdir.as_path())?; - let node = Node::from_fs_object( - RafsVersion::V6, - tmpdir.as_path().to_path_buf(), - tmpfile.as_path().to_path_buf(), - Overlay::UpperAddition, - RAFS_DEFAULT_CHUNK_SIZE as u32, - true, - false, - )?; - let tree = Tree::new(node); - let bootstrap = Bootstrap::new(tree)?; - - BlobCompactor::new( - RafsVersion::V6, - ori_blob_mgr, - backend, - digest::Algorithm::Sha256, - &bootstrap, - ) - } - - #[test] - fn test_blob_compactor_new() { - let compactor = create_blob_compactor(); - assert!(compactor.is_ok()); - assert!(compactor.unwrap().is_v6()); - } - - #[test] - fn test_blob_compactor_load_chunk_dict_blobs() { - let mut compactor = create_blob_compactor().unwrap(); - let chunk_dict = compactor.get_chunk_dict(); - let n = chunk_dict.get_blobs().len(); - for i in 0..n { - chunk_dict.set_real_blob_idx(i as u32, i as u32); - } - compactor.states = vec![State::default(); n + 1]; - compactor.load_chunk_dict_blobs(); - - assert_eq!(compactor.states.len(), n + 1); - assert!(compactor.states[0].is_from_dict()); - assert!(compactor.states[n >> 1].is_from_dict()); - assert!(compactor.states[n - 1].is_from_dict()); - assert!(!compactor.states[n].is_from_dict()); - } - - fn blob_compactor_load_and_dedup_chunks() -> Result { - let mut compactor = create_blob_compactor()?; - - let mut chunk1 = ChunkWrapper::new(RafsVersion::V5); - chunk1.set_id(RafsDigest { data: [1u8; 32] }); - chunk1.set_uncompressed_size(0); - chunk1.set_compressed_offset(0x11); - chunk1.set_blob_index(1); - let node_chunk1 = NodeChunk { - source: crate::ChunkSource::Dict, - inner: Arc::new(chunk1.clone()), - }; - let mut chunk2 = ChunkWrapper::new(RafsVersion::V6); - chunk2.set_id(RafsDigest { data: [2u8; 32] }); - chunk2.set_uncompressed_size(0x20); - chunk2.set_compressed_offset(0x22); - chunk2.set_blob_index(2); - let node_chunk2 = NodeChunk { - source: crate::ChunkSource::Dict, - inner: Arc::new(chunk2.clone()), - }; - let mut chunk3 = ChunkWrapper::new(RafsVersion::V6); - chunk3.set_id(RafsDigest { data: [3u8; 32] }); - chunk3.set_uncompressed_size(0x20); - chunk3.set_compressed_offset(0x22); - chunk3.set_blob_index(2); - let node_chunk3 = NodeChunk { - source: crate::ChunkSource::Dict, - inner: Arc::new(chunk3.clone()), - }; - - let mut chunk_dict = HashChunkDict::new(digest::Algorithm::Sha256); - chunk_dict.add_chunk( - Arc::new(ChunkWrapper::new(RafsVersion::V5)), - digest::Algorithm::Sha256, - ); - chunk_dict.add_chunk(Arc::new(chunk1.clone()), digest::Algorithm::Sha256); - compactor.ori_blob_mgr.set_chunk_dict(Arc::new(chunk_dict)); - - compactor.states = vec![State::ChunkDict; 5]; - - let tmpdir = TempDir::new()?; - let tmpfile = TempFile::new_in(tmpdir.as_path())?; - let node = Node::from_fs_object( - RafsVersion::V6, - tmpdir.as_path().to_path_buf(), - tmpfile.as_path().to_path_buf(), - Overlay::UpperAddition, - RAFS_DEFAULT_CHUNK_SIZE as u32, - true, - false, - )?; - let mut tree = Tree::new(node); - let tmpfile2 = TempFile::new_in(tmpdir.as_path())?; - let mut node = Node::from_fs_object( - RafsVersion::V6, - tmpdir.as_path().to_path_buf(), - tmpfile2.as_path().to_path_buf(), - Overlay::UpperAddition, - RAFS_DEFAULT_CHUNK_SIZE as u32, - true, - false, - )?; - node.chunks.push(node_chunk1); - node.chunks.push(node_chunk2); - node.chunks.push(node_chunk3); - let tree2 = Tree::new(node); - tree.insert_child(tree2); - - let bootstrap = Bootstrap::new(tree)?; - - assert!(compactor.load_and_dedup_chunks(&bootstrap).is_ok()); - assert_eq!(compactor.c2nodes.len(), 2); - assert_eq!(compactor.b2nodes.len(), 2); - - let chunk_key1 = ChunkKey::from(&chunk1); - assert!(compactor.c2nodes.get(&chunk_key1).is_some()); - assert_eq!(compactor.c2nodes.get(&chunk_key1).unwrap().len(), 1); - assert!(compactor.b2nodes.get(&chunk2.blob_index()).is_some()); - assert_eq!( - compactor.b2nodes.get(&chunk2.blob_index()).unwrap().len(), - 2 - ); - - Ok(compactor) - } - - #[test] - fn test_blob_compactor_load_and_dedup_chunks() { - assert!(blob_compactor_load_and_dedup_chunks().is_ok()); - } - - #[test] - fn test_blob_compactor_dump_new_blobs() { - let tmp_dir = TempDir::new().unwrap(); - let build_ctx = BuildContext::new( - "build_ctx".to_string(), - false, - 0, - compress::Algorithm::Lz4Block, - digest::Algorithm::Sha256, - true, - WhiteoutSpec::None, - ConversionType::DirectoryToRafs, - PathBuf::from(tmp_dir.as_path()), - Default::default(), - None, - false, - Features::new(), - false, - ); - - let mut compactor = blob_compactor_load_and_dedup_chunks().unwrap(); - - let blob_ctx1 = BlobContext::new( - "blob_id1".to_owned(), - 0, - build_ctx.blob_features, - build_ctx.compressor, - build_ctx.digester, - build_ctx.cipher, - Default::default(), - None, - ); - let blob_ctx2 = BlobContext::new( - "blob_id2".to_owned(), - 0, - build_ctx.blob_features, - build_ctx.compressor, - build_ctx.digester, - build_ctx.cipher, - Default::default(), - None, - ); - let blob_ctx3 = BlobContext::new( - "blob_id3".to_owned(), - 0, - build_ctx.blob_features, - build_ctx.compressor, - build_ctx.digester, - build_ctx.cipher, - Default::default(), - None, - ); - let blob_ctx4 = BlobContext::new( - "blob_id4".to_owned(), - 0, - build_ctx.blob_features, - build_ctx.compressor, - build_ctx.digester, - build_ctx.cipher, - Default::default(), - None, - ); - let blob_ctx5 = BlobContext::new( - "blob_id5".to_owned(), - 0, - build_ctx.blob_features, - build_ctx.compressor, - build_ctx.digester, - build_ctx.cipher, - Default::default(), - None, - ); - compactor.ori_blob_mgr.add_blob(blob_ctx1); - compactor.ori_blob_mgr.add_blob(blob_ctx2); - compactor.ori_blob_mgr.add_blob(blob_ctx3); - compactor.ori_blob_mgr.add_blob(blob_ctx4); - compactor.ori_blob_mgr.add_blob(blob_ctx5); - - compactor.states[0] = State::Invalid; - - let tmp_dir = TempDir::new().unwrap(); - let dir = tmp_dir.as_path().to_str().unwrap(); - assert!(compactor.dump_new_blobs(&build_ctx, dir, true).is_err()); - - compactor.states = vec![ - State::Delete, - State::ChunkDict, - State::Original(ChunkSet::new()), - State::Rebuild(ChunkSet::new()), - State::Delete, - ]; - assert!(compactor.dump_new_blobs(&build_ctx, dir, true).is_ok()); - assert_eq!(compactor.ori_blob_mgr.len(), 3); - } - - #[test] - fn test_blob_compactor_do_compact() { - let mut compactor = blob_compactor_load_and_dedup_chunks().unwrap(); - - let tmp_dir = TempDir::new().unwrap(); - let build_ctx = BuildContext::new( - "build_ctx".to_string(), - false, - 0, - compress::Algorithm::Lz4Block, - digest::Algorithm::Sha256, - true, - WhiteoutSpec::None, - ConversionType::DirectoryToRafs, - PathBuf::from(tmp_dir.as_path()), - Default::default(), - None, - false, - Features::new(), - false, - ); - let mut blob_ctx1 = BlobContext::new( - "blob_id1".to_owned(), - 0, - build_ctx.blob_features, - build_ctx.compressor, - build_ctx.digester, - build_ctx.cipher, - Default::default(), - None, - ); - blob_ctx1.compressed_blob_size = 2; - let mut blob_ctx2 = BlobContext::new( - "blob_id2".to_owned(), - 0, - build_ctx.blob_features, - build_ctx.compressor, - build_ctx.digester, - build_ctx.cipher, - Default::default(), - None, - ); - blob_ctx2.compressed_blob_size = 0; - let blob_ctx3 = BlobContext::new( - "blob_id3".to_owned(), - 0, - build_ctx.blob_features, - build_ctx.compressor, - build_ctx.digester, - build_ctx.cipher, - Default::default(), - None, - ); - let blob_ctx4 = BlobContext::new( - "blob_id4".to_owned(), - 0, - build_ctx.blob_features, - build_ctx.compressor, - build_ctx.digester, - build_ctx.cipher, - Default::default(), - None, - ); - let blob_ctx5 = BlobContext::new( - "blob_id5".to_owned(), - 0, - build_ctx.blob_features, - build_ctx.compressor, - build_ctx.digester, - build_ctx.cipher, - Default::default(), - None, - ); - compactor.ori_blob_mgr.add_blob(blob_ctx1); - compactor.ori_blob_mgr.add_blob(blob_ctx2); - compactor.ori_blob_mgr.add_blob(blob_ctx3); - compactor.ori_blob_mgr.add_blob(blob_ctx4); - compactor.ori_blob_mgr.add_blob(blob_ctx5); - - let mut chunk_set1 = ChunkSet::new(); - chunk_set1.total_size = 4; - let mut chunk_set2 = ChunkSet::new(); - chunk_set2.total_size = 6; - let mut chunk_set3 = ChunkSet::new(); - chunk_set3.total_size = 5; - - compactor.states = vec![ - State::Original(chunk_set1), - State::Original(chunk_set2), - State::Rebuild(chunk_set3), - State::ChunkDict, - State::Invalid, - ]; - - let cfg = Config { - min_used_ratio: 50, - compact_blob_size: 10, - max_compact_size: 8, - layers_to_compact: 0, - blobs_dir: "blobs_dir".to_string(), - }; - - assert!(compactor.do_compact(&cfg).is_ok()); - assert!(!compactor.states.last().unwrap().is_invalid()); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright 2023 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::collections::hash_map::Entry; +use std::collections::HashMap; +use std::io::Write; +use std::mem; +use std::ops::Deref; +use std::path::PathBuf; +use std::sync::Arc; + +use anyhow::{bail, ensure, Result}; +use nydus_rafs::metadata::chunk::ChunkWrapper; +use nydus_rafs::metadata::{RafsSuper, RafsVersion}; +use nydus_storage::backend::BlobBackend; +use nydus_storage::utils::alloc_buf; +use nydus_utils::digest::RafsDigest; +use nydus_utils::{digest, try_round_up_4k}; +use serde::{Deserialize, Serialize}; +use sha2::Digest; + +use crate::core::context::Artifact; + +use super::core::blob::Blob; +use super::core::bootstrap::Bootstrap; +use super::{ + ArtifactStorage, ArtifactWriter, BlobContext, BlobManager, BootstrapManager, BuildContext, + BuildOutput, ChunkDict, ConversionType, Features, Tree, TreeNode, WhiteoutSpec, +}; + +const DEFAULT_COMPACT_BLOB_SIZE: usize = 10 * 1024 * 1024; +const DEFAULT_MAX_COMPACT_SIZE: usize = 100 * 1024 * 1024; + +const fn default_compact_blob_size() -> usize { + DEFAULT_COMPACT_BLOB_SIZE +} + +const fn default_max_compact_size() -> usize { + DEFAULT_MAX_COMPACT_SIZE +} + +#[derive(Clone, Deserialize, Serialize)] +pub struct Config { + /// rebuild blobs whose used_ratio < min_used_ratio + /// used_ratio = (compress_size of all chunks which are referenced by bootstrap) / blob_compress_size + /// available value: 0-99, 0 means disable + /// hint: it's better to disable this option when there are some shared blobs + /// for example: build-cache + #[serde(default)] + min_used_ratio: u8, + /// we compact blobs whose size are less than compact_blob_size + #[serde(default = "default_compact_blob_size")] + compact_blob_size: usize, + /// size of compacted blobs should not be larger than max_compact_size + #[serde(default = "default_max_compact_size")] + max_compact_size: usize, + /// if number of blobs >= layers_to_compact, do compact + /// 0 means always try compact + #[serde(default)] + layers_to_compact: usize, + /// local blobs dir, may haven't upload to backend yet + /// what's more, new blobs will output to this dir + /// name of blob file should be equal to blob_id + blobs_dir: String, +} + +#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] +enum ChunkKey { + // Chunk digest for RAFS v5, may be extended to support RAFS v6 in future. + Digest(RafsDigest), + // (blob_idx, compress_offset) for RAFS v6 only + Offset(u32, u64), +} + +impl ChunkKey { + fn from(c: &ChunkWrapper) -> Self { + match c { + ChunkWrapper::V5(_) => Self::Digest(*c.id()), + ChunkWrapper::V6(_) => Self::Offset(c.blob_index(), c.compressed_offset()), + ChunkWrapper::Ref(_) => unimplemented!("unsupport ChunkWrapper::Ref(c)"), + } + } +} + +#[derive(Clone, Debug)] +struct ChunkSet { + chunks: HashMap, + total_size: usize, +} + +impl ChunkSet { + fn new() -> Self { + Self { + chunks: Default::default(), + total_size: 0, + } + } + + fn add_chunk(&mut self, chunk: &ChunkWrapper) { + let key = ChunkKey::from(chunk); + if let Entry::Vacant(e) = self.chunks.entry(key) { + e.insert(chunk.clone()); + self.total_size += chunk.compressed_size() as usize; + } + } + + fn get_chunk(&self, key: &ChunkKey) -> Option<&ChunkWrapper> { + self.chunks.get(key) + } + + fn merge(&mut self, other: Self) { + for (_, c) in other.chunks.iter() { + self.add_chunk(c); + } + } + + #[allow(clippy::too_many_arguments)] + fn dump( + &self, + build_ctx: &BuildContext, + blob_storage: ArtifactStorage, + ori_blob_ids: &[String], + new_blob_ctx: &mut BlobContext, + new_blob_idx: u32, + aligned_chunk: bool, + backend: &Arc, + ) -> Result> { + let mut blob_writer = ArtifactWriter::new(blob_storage)?; + let mut chunks = self.chunks.values().collect::>(); + // sort chunks first, don't break order in original blobs + chunks.sort_by(|a, b| { + if (*a).blob_index() == (*b).blob_index() { + (*a).compressed_offset().cmp(&(*b).compressed_offset()) + } else { + (*a).blob_index().cmp(&(*b).blob_index()) + } + }); + + let mut changed_chunks = Vec::new(); + for chunk in chunks { + let blob_idx = chunk.blob_index(); + // get data from backend + // todo: merge download requests + let reader = backend + .get_reader(&ori_blob_ids[blob_idx as usize]) + .expect("get blob err"); + let mut buf = alloc_buf(chunk.compressed_size() as usize); + reader + .read(&mut buf, chunk.compressed_offset()) + .expect("read blob data err"); + blob_writer.write_all(&buf)?; + + let mut new_chunk = chunk.clone(); + // file offset field is useless + new_chunk.set_index(new_blob_ctx.chunk_count); + new_chunk.set_blob_index(new_blob_idx); + new_chunk.set_compressed_offset(new_blob_ctx.current_compressed_offset); + new_chunk.set_uncompressed_offset(new_blob_ctx.current_uncompressed_offset); + new_blob_ctx.add_chunk_meta_info(&new_chunk, None)?; + // insert change ops + changed_chunks.push((chunk.clone(), new_chunk)); + + new_blob_ctx.blob_hash.update(&buf); + new_blob_ctx.chunk_count += 1; + new_blob_ctx.current_compressed_offset += chunk.compressed_size() as u64; + new_blob_ctx.compressed_blob_size += chunk.compressed_size() as u64; + + let aligned_size = if aligned_chunk { + try_round_up_4k(chunk.uncompressed_size()).unwrap() + } else { + chunk.uncompressed_size() as u64 + }; + new_blob_ctx.current_uncompressed_offset += aligned_size; + new_blob_ctx.uncompressed_blob_size += aligned_size; + } + new_blob_ctx.blob_id = format!("{:x}", new_blob_ctx.blob_hash.clone().finalize()); + + // dump blob meta for v6 + Blob::dump_meta_data(build_ctx, new_blob_ctx, &mut blob_writer)?; + let blob_id = new_blob_ctx.blob_id(); + blob_writer.finalize(blob_id)?; + + Ok(changed_chunks) + } +} + +#[derive(Clone, Debug, Default)] +enum State { + ChunkDict, + /// delete this blob + Delete, + #[default] + Invalid, + Original(ChunkSet), + /// output chunks as a new blob file + Rebuild(ChunkSet), +} + +impl State { + fn is_rebuild(&self) -> bool { + matches!(self, Self::Rebuild(_)) + } + + fn is_from_dict(&self) -> bool { + matches!(self, Self::ChunkDict) + } + + fn is_invalid(&self) -> bool { + matches!(self, Self::Invalid) + } + + fn merge_blob(&mut self, other: Self) -> Result<()> { + let merge_cs = match other { + State::Original(cs) => cs, + State::Rebuild(cs) => cs, + _ => bail!("invalid state"), + }; + match self { + State::Rebuild(cs) => { + cs.merge(merge_cs); + } + _ => bail!("invalid state"), + } + Ok(()) + } + + fn chunk_total_size(&self) -> Result { + Ok(match self { + State::Original(cs) => cs.total_size, + State::Rebuild(cs) => cs.total_size, + _ => bail!("invalid state"), + }) + } +} + +#[inline] +fn apply_chunk_change(from: &ChunkWrapper, to: &mut ChunkWrapper) -> Result<()> { + ensure!( + to.uncompressed_size() == from.uncompressed_size(), + "different uncompress size" + ); + ensure!( + to.compressed_size() == from.compressed_size(), + "different compressed size" + ); + + to.set_blob_index(from.blob_index()); + to.set_index(from.index()); + to.set_uncompressed_offset(from.uncompressed_offset()); + to.set_compressed_offset(from.compressed_offset()); + Ok(()) +} + +/// RAFS blob compactor to compact multiple small blobs into one blob. +pub struct BlobCompactor { + /// v5 or v6 + version: RafsVersion, + /// states + states: Vec, + /// original blobs + ori_blob_mgr: BlobManager, + /// new blobs + new_blob_mgr: BlobManager, + /// chunk --> list + c2nodes: HashMap>, + /// original blob index --> list + b2nodes: HashMap>, + /// blobs backend + backend: Arc, +} + +impl BlobCompactor { + /// Create a new instance of [BlobCompactor]. + fn new( + version: RafsVersion, + ori_blob_mgr: BlobManager, + backend: Arc, + digester: digest::Algorithm, + bootstrap: &Bootstrap, + ) -> Result { + let ori_blobs_number = ori_blob_mgr.len(); + let mut compactor = Self { + version, + states: vec![Default::default(); ori_blobs_number], + ori_blob_mgr, + new_blob_mgr: BlobManager::new(digester), + c2nodes: HashMap::new(), + b2nodes: HashMap::new(), + backend, + }; + compactor.load_chunk_dict_blobs(); + compactor.load_and_dedup_chunks(bootstrap)?; + Ok(compactor) + } + + fn is_v6(&self) -> bool { + self.version.is_v6() + } + + fn load_and_dedup_chunks(&mut self, bootstrap: &Bootstrap) -> Result<()> { + let mut all_chunks = ChunkSet::new(); + let chunk_dict = self.get_chunk_dict(); + + let cb = &mut |n: &Tree| -> Result<()> { + let mut node = n.lock_node(); + for chunk_idx in 0..node.chunks.len() { + let chunk = &mut node.chunks[chunk_idx]; + let chunk_key = ChunkKey::from(&chunk.inner); + + if self.states[chunk.inner.blob_index() as usize].is_from_dict() { + // dedup by chunk dict + if let Some(c) = + chunk_dict.get_chunk(chunk.inner.id(), chunk.inner.uncompressed_size()) + { + let mut chunk_inner = chunk.inner.deref().clone(); + apply_chunk_change(c, &mut chunk_inner)?; + chunk.inner = Arc::new(chunk_inner); + } else if let Some(c) = all_chunks.get_chunk(&chunk_key) { + let mut chunk_inner = chunk.inner.deref().clone(); + apply_chunk_change(c, &mut chunk_inner)?; + chunk.inner = Arc::new(chunk_inner); + } else { + all_chunks.add_chunk(&chunk.inner); + // add to per blob ChunkSet + let blob_index = chunk.inner.blob_index() as usize; + if self.states[blob_index].is_invalid() { + self.states[blob_index] = State::Original(ChunkSet::new()); + } + if let State::Original(cs) = &mut self.states[blob_index] { + cs.add_chunk(&chunk.inner); + } + } + } + + // construct blobs/chunk --> nodes index map + self.c2nodes + .entry(chunk_key) + .or_default() + .push((n.node.clone(), chunk_idx)); + self.b2nodes + .entry(chunk.inner.blob_index()) + .or_default() + .push((n.node.clone(), chunk_idx)); + } + Ok(()) + }; + + bootstrap.tree.walk_bfs(false, cb) + } + + fn get_chunk_dict(&self) -> Arc { + self.ori_blob_mgr.get_chunk_dict() + } + + fn load_chunk_dict_blobs(&mut self) { + let chunk_dict = self.get_chunk_dict(); + let blobs = chunk_dict.get_blobs(); + for i in 0..blobs.len() { + if let Some(real_blob_idx) = chunk_dict.get_real_blob_idx(i as u32) { + self.states[real_blob_idx as usize] = State::ChunkDict; + } + } + } + + fn apply_blob_move(&mut self, from: u32, to: u32) -> Result<()> { + if let Some(idx_list) = self.b2nodes.get(&from) { + for (n, chunk_idx) in idx_list.iter() { + let mut node = n.lock().unwrap(); + ensure!( + node.chunks[*chunk_idx].inner.blob_index() == from, + "unexpected blob_index of chunk" + ); + node.chunks[*chunk_idx].set_blob_index(to); + } + } + Ok(()) + } + + fn apply_chunk_change(&mut self, c: &(ChunkWrapper, ChunkWrapper)) -> Result<()> { + if let Some(chunks) = self.c2nodes.get(&ChunkKey::from(&c.0)) { + for (n, chunk_idx) in chunks.iter() { + let mut node = n.lock().unwrap(); + let chunk = &mut node.chunks[*chunk_idx]; + let mut chunk_inner = chunk.inner.deref().clone(); + apply_chunk_change(&c.1, &mut chunk_inner)?; + chunk.inner = Arc::new(chunk_inner); + } + } + Ok(()) + } + + fn delete_unused_blobs(&mut self) { + for i in 0..self.states.len() { + if self.states[i].is_invalid() { + info!( + "compactor: delete unused blob {}", + self.ori_blob_mgr.get_blob(i).unwrap().blob_id + ); + self.states[i] = State::Delete; + } + } + } + + fn prepare_to_rebuild(&mut self, idx: usize) -> Result<()> { + if !self.states[idx].is_rebuild() { + return Ok(()); + } + + let mut old = State::Invalid; + mem::swap(&mut self.states[idx], &mut old); + if let State::Original(cs) = old { + self.states[idx] = State::Rebuild(cs); + } else { + mem::swap(&mut self.states[idx], &mut old); + bail!("invalid state"); + } + + Ok(()) + } + + fn try_rebuild_blobs(&mut self, ratio: u8) -> Result<()> { + for idx in 0..self.ori_blob_mgr.len() { + let blob_info = self.ori_blob_mgr.get_blob(idx).unwrap(); + let used_ratio = match &self.states[idx] { + State::Original(cs) => { + let compressed_blob_size = if blob_info.compressed_blob_size == 0 { + let reader = match self.backend.get_reader(&blob_info.blob_id) { + Ok(r) => r, + Err(e) => bail!("compactor: failed to get blob reader, {}", e), + }; + match reader.blob_size() { + Ok(sz) => sz, + Err(e) => bail!("compactor: failed to get blob size, {}", e), + } + } else { + blob_info.compressed_blob_size + }; + (cs.total_size * 100 / compressed_blob_size as usize) as u8 + } + _ => 100_u8, + }; + + info!( + "compactor: original blob size {}, used data ratio {}%", + blob_info.blob_id, used_ratio + ); + if used_ratio < ratio { + self.prepare_to_rebuild(idx)?; + } + } + + Ok(()) + } + + fn merge_blob(&mut self, from: usize, to: usize) -> Result<()> { + let mut old = State::Delete; + mem::swap(&mut self.states[from], &mut old); + self.states[to].merge_blob(old) + } + + /// use greedy algorithm to merge small blobs( Result<()> { + let mut need_merge_blobs = Vec::new(); + for idx in 0..self.states.len() { + let blob_info = self.ori_blob_mgr.get_blob(idx).unwrap(); + match &self.states[idx] { + State::Original(cs) => { + let blob_size = if blob_info.compressed_blob_size == 0 { + cs.total_size + } else { + blob_info.compressed_blob_size as usize + }; + if blob_size < low { + info!( + "compactor: try to merge blob {} size {}", + blob_info.blob_id, blob_size + ); + need_merge_blobs.push((idx, blob_size)); + } + } + State::Rebuild(cs) => { + if cs.total_size < low { + info!( + "compactor: try to merge blob {} size {}", + blob_info.blob_id, cs.total_size + ); + need_merge_blobs.push((idx, cs.total_size)); + } + } + _ => {} + } + } + // sort by size + need_merge_blobs.sort_by(|(_, len1), (_, len2)| len1.cmp(len2)); + // try merge + if need_merge_blobs.len() < 2 { + return Ok(()); + } + + let mut merge_to = need_merge_blobs[0].0; + for (blob_idx, _) in need_merge_blobs.iter().skip(1) { + let before_size = self.states[merge_to].chunk_total_size()?; + let append_size = self.states[*blob_idx].chunk_total_size()?; + if before_size + append_size <= max { + self.prepare_to_rebuild(merge_to)?; + self.merge_blob(*blob_idx, merge_to)?; + } else { + merge_to = *blob_idx; + } + } + + Ok(()) + } + + fn original_blob_ids(&self) -> Vec { + self.ori_blob_mgr + .get_blobs() + .into_iter() + .map(|blob| blob.blob_id.clone()) + .collect() + } + + fn dump_new_blobs( + &mut self, + build_ctx: &BuildContext, + dir: &str, + aligned_chunk: bool, + ) -> Result<()> { + let ori_blob_ids = self.original_blob_ids(); + ensure!(self.states.len() == self.ori_blob_mgr.len()); + + for idx in 0..self.states.len() { + match &self.states[idx] { + State::Original(_) | State::ChunkDict => { + info!("compactor: keep original data blob {}", ori_blob_ids[idx]); + // already exists, no need to dump + let ctx = self.ori_blob_mgr.take_blob(idx); + let blob_idx = self.new_blob_mgr.alloc_index()?; + if blob_idx != idx as u32 { + self.apply_blob_move(idx as u32, blob_idx)?; + } + self.new_blob_mgr.add_blob(ctx); + } + State::Delete => { + info!("compactor: delete compacted blob {}", ori_blob_ids[idx]); + } + State::Rebuild(cs) => { + let blob_storage = ArtifactStorage::FileDir(PathBuf::from(dir)); + let mut blob_ctx = BlobContext::new( + String::from(""), + 0, + build_ctx.blob_features, + build_ctx.compressor, + build_ctx.digester, + build_ctx.cipher, + Default::default(), + None, + ); + blob_ctx.set_meta_info_enabled(self.is_v6()); + let blob_idx = self.new_blob_mgr.alloc_index()?; + let new_chunks = cs.dump( + build_ctx, + blob_storage, + &ori_blob_ids, + &mut blob_ctx, + blob_idx, + aligned_chunk, + &self.backend, + )?; + for change_chunk in new_chunks.iter() { + self.apply_chunk_change(change_chunk)?; + } + info!("compactor: successfully rebuild blob {}", blob_ctx.blob_id); + self.new_blob_mgr.add_blob(blob_ctx); + } + State::Invalid => bail!("compactor: invalid state for blob {}", ori_blob_ids[idx]), + } + } + + Ok(()) + } + + fn do_compact(&mut self, cfg: &Config) -> Result<()> { + self.delete_unused_blobs(); + self.try_rebuild_blobs(cfg.min_used_ratio)?; + self.try_merge_blobs(cfg.compact_blob_size, cfg.max_compact_size)?; + Ok(()) + } + + /// Compact multiple small data blobs into one to reduce number of blobs. + pub fn compact( + rs: RafsSuper, + d_bootstrap: PathBuf, + chunk_dict: Option>, + backend: Arc, + cfg: &Config, + ) -> Result> { + let mut build_ctx = BuildContext::new( + "".to_string(), + false, + 0, + rs.meta.get_compressor(), + rs.meta.get_digester(), + rs.meta.explicit_uidgid(), + WhiteoutSpec::None, + ConversionType::DirectoryToRafs, + PathBuf::from(""), + Default::default(), + None, + false, + Features::new(), + false, + ); + let mut bootstrap_mgr = + BootstrapManager::new(Some(ArtifactStorage::SingleFile(d_bootstrap)), None); + let mut bootstrap_ctx = bootstrap_mgr.create_ctx()?; + let mut ori_blob_mgr = BlobManager::new(rs.meta.get_digester()); + ori_blob_mgr.extend_from_blob_table(&build_ctx, rs.superblock.get_blob_infos())?; + if let Some(dict) = chunk_dict { + ori_blob_mgr.set_chunk_dict(dict); + ori_blob_mgr.extend_from_chunk_dict(&build_ctx)?; + } + if ori_blob_mgr.len() < cfg.layers_to_compact { + return Ok(None); + } + + let tree = Tree::from_bootstrap(&rs, &mut ())?; + let mut bootstrap = Bootstrap::new(tree)?; + let mut compactor = Self::new( + build_ctx.fs_version, + ori_blob_mgr, + backend.clone(), + rs.meta.get_digester(), + &bootstrap, + )?; + compactor.do_compact(cfg)?; + compactor.dump_new_blobs(&build_ctx, &cfg.blobs_dir, build_ctx.aligned_chunk)?; + if compactor.new_blob_mgr.is_empty() { + info!("compactor: no chance to compact data blobs"); + return Ok(None); + } + + info!("compactor: successfully compacted blob"); + // blobs have already been dumped, dump bootstrap only + let blob_table = compactor.new_blob_mgr.to_blob_table(&build_ctx)?; + bootstrap.build(&mut build_ctx, &mut bootstrap_ctx)?; + bootstrap.dump( + &mut build_ctx, + &mut bootstrap_mgr.bootstrap_storage, + &mut bootstrap_ctx, + &blob_table, + )?; + + Ok(Some(BuildOutput::new( + &compactor.new_blob_mgr, + &bootstrap_mgr.bootstrap_storage, + )?)) + } +} + +#[cfg(test)] +mod tests { + use crate::core::node::Node; + use crate::HashChunkDict; + use crate::{NodeChunk, Overlay}; + + use super::*; + use nydus_api::ConfigV2; + use nydus_rafs::metadata::RafsSuperConfig; + use nydus_storage::backend::{BackendResult, BlobReader}; + use nydus_storage::device::v5::BlobV5ChunkInfo; + use nydus_storage::device::{BlobChunkFlags, BlobChunkInfo, BlobFeatures}; + use nydus_storage::RAFS_DEFAULT_CHUNK_SIZE; + use nydus_utils::crypt::Algorithm; + use nydus_utils::metrics::BackendMetrics; + use nydus_utils::{compress, crypt}; + use std::any::Any; + use vmm_sys_util::tempdir::TempDir; + use vmm_sys_util::tempfile::TempFile; + + #[doc(hidden)] + #[macro_export] + macro_rules! impl_getter { + ($G: ident, $F: ident, $U: ty) => { + fn $G(&self) -> $U { + self.$F + } + }; + } + + #[derive(Default, Clone)] + struct MockChunkInfo { + pub block_id: RafsDigest, + pub blob_index: u32, + pub flags: BlobChunkFlags, + pub compress_size: u32, + pub uncompress_size: u32, + pub compress_offset: u64, + pub uncompress_offset: u64, + pub file_offset: u64, + pub index: u32, + #[allow(unused)] + pub reserved: u32, + } + + impl BlobChunkInfo for MockChunkInfo { + fn chunk_id(&self) -> &RafsDigest { + &self.block_id + } + fn id(&self) -> u32 { + self.index + } + fn is_compressed(&self) -> bool { + self.flags.contains(BlobChunkFlags::COMPRESSED) + } + + fn is_batch(&self) -> bool { + self.flags.contains(BlobChunkFlags::BATCH) + } + + fn is_encrypted(&self) -> bool { + false + } + + fn as_any(&self) -> &dyn Any { + self + } + + impl_getter!(blob_index, blob_index, u32); + impl_getter!(compressed_offset, compress_offset, u64); + impl_getter!(compressed_size, compress_size, u32); + impl_getter!(uncompressed_offset, uncompress_offset, u64); + impl_getter!(uncompressed_size, uncompress_size, u32); + } + + impl BlobV5ChunkInfo for MockChunkInfo { + fn as_base(&self) -> &dyn BlobChunkInfo { + self + } + + impl_getter!(index, index, u32); + impl_getter!(file_offset, file_offset, u64); + impl_getter!(flags, flags, BlobChunkFlags); + } + + struct MockBackend { + pub metrics: Arc, + } + + impl BlobReader for MockBackend { + fn blob_size(&self) -> BackendResult { + Ok(1) + } + + fn try_read(&self, buf: &mut [u8], _offset: u64) -> BackendResult { + let mut i = 0; + while i < buf.len() { + buf[i] = i as u8; + i += 1; + } + Ok(i) + } + + fn metrics(&self) -> &BackendMetrics { + // Safe because nydusd must have backend attached with id, only image builder can no id + // but use backend instance to upload blob. + &self.metrics + } + } + + unsafe impl Send for MockBackend {} + unsafe impl Sync for MockBackend {} + + impl BlobBackend for MockBackend { + fn shutdown(&self) {} + + fn metrics(&self) -> &BackendMetrics { + // Safe because nydusd must have backend attached with id, only image builder can no id + // but use backend instance to upload blob. + &self.metrics + } + + fn get_reader(&self, _blob_id: &str) -> BackendResult> { + Ok(Arc::new(MockBackend { + metrics: self.metrics.clone(), + })) + } + } + + #[test] + #[should_panic = "not implemented: unsupport ChunkWrapper::Ref(c)"] + fn test_chunk_key_from() { + let cw = ChunkWrapper::new(RafsVersion::V5); + matches!(ChunkKey::from(&cw), ChunkKey::Digest(_)); + + let cw = ChunkWrapper::new(RafsVersion::V6); + matches!(ChunkKey::from(&cw), ChunkKey::Offset(_, _)); + + let chunk = Arc::new(MockChunkInfo { + block_id: Default::default(), + blob_index: 2, + flags: BlobChunkFlags::empty(), + compress_size: 0x800, + uncompress_size: 0x1000, + compress_offset: 0x800, + uncompress_offset: 0x1000, + file_offset: 0x1000, + index: 1, + reserved: 0, + }) as Arc; + let cw = ChunkWrapper::Ref(chunk); + ChunkKey::from(&cw); + } + + #[test] + fn test_chunk_set() { + let mut chunk_set1 = ChunkSet::new(); + + let mut chunk_wrapper1 = ChunkWrapper::new(RafsVersion::V5); + chunk_wrapper1.set_id(RafsDigest { data: [1u8; 32] }); + chunk_wrapper1.set_compressed_size(8); + let mut chunk_wrapper2 = ChunkWrapper::new(RafsVersion::V6); + chunk_wrapper2.set_compressed_size(16); + + chunk_set1.add_chunk(&chunk_wrapper1); + chunk_set1.add_chunk(&chunk_wrapper2); + assert_eq!(chunk_set1.total_size, 24); + + let chunk_key2 = ChunkKey::from(&chunk_wrapper2); + assert_eq!( + format!("{:?}", Some(chunk_wrapper2)), + format!("{:?}", chunk_set1.get_chunk(&chunk_key2)) + ); + + let mut chunk_wrapper3 = ChunkWrapper::new(RafsVersion::V5); + chunk_wrapper3.set_id(RafsDigest { data: [3u8; 32] }); + chunk_wrapper3.set_compressed_size(32); + + let mut chunk_set2 = ChunkSet::new(); + chunk_set2.add_chunk(&chunk_wrapper3); + chunk_set2.merge(chunk_set1); + assert_eq!(chunk_set2.total_size, 56); + assert_eq!(chunk_set2.chunks.len(), 3); + + let build_ctx = BuildContext::default(); + let tmp_file = TempFile::new().unwrap(); + let blob_storage = ArtifactStorage::SingleFile(PathBuf::from(tmp_file.as_path())); + let cipher_object = Algorithm::Aes256Xts.new_cipher().unwrap(); + let mut new_blob_ctx = BlobContext::new( + "blob_id".to_owned(), + 0, + BlobFeatures::all(), + compress::Algorithm::Lz4Block, + digest::Algorithm::Sha256, + crypt::Algorithm::Aes256Xts, + Arc::new(cipher_object), + None, + ); + let ori_blob_ids = ["1".to_owned(), "2".to_owned()]; + let backend = Arc::new(MockBackend { + metrics: BackendMetrics::new("id", "backend_type"), + }) as Arc; + + let mut res = chunk_set2 + .dump( + &build_ctx, + blob_storage, + &ori_blob_ids, + &mut new_blob_ctx, + 0, + true, + &backend, + ) + .unwrap(); + + res.sort_by(|a, b| a.0.id().data.cmp(&b.0.id().data)); + + assert_eq!(res.len(), 3); + assert_eq!( + format!("{:?}", res[0].1.id()), + format!("{:?}", RafsDigest { data: [0u8; 32] }) + ); + assert_eq!( + format!("{:?}", res[1].1.id()), + format!("{:?}", RafsDigest { data: [1u8; 32] }) + ); + assert_eq!( + format!("{:?}", res[2].1.id()), + format!("{:?}", RafsDigest { data: [3u8; 32] }) + ); + } + + #[test] + fn test_state() { + let state = State::Rebuild(ChunkSet::new()); + assert!(state.is_rebuild()); + let state = State::ChunkDict; + assert!(state.is_from_dict()); + let state = State::default(); + assert!(state.is_invalid()); + + let mut chunk_set1 = ChunkSet::new(); + let mut chunk_wrapper1 = ChunkWrapper::new(RafsVersion::V5); + chunk_wrapper1.set_id(RafsDigest { data: [1u8; 32] }); + chunk_wrapper1.set_compressed_size(8); + chunk_set1.add_chunk(&chunk_wrapper1); + let mut state1 = State::Original(chunk_set1); + assert_eq!(state1.chunk_total_size().unwrap(), 8); + + let mut chunk_wrapper2 = ChunkWrapper::new(RafsVersion::V6); + chunk_wrapper2.set_compressed_size(16); + let mut chunk_set2 = ChunkSet::new(); + chunk_set2.add_chunk(&chunk_wrapper2); + let mut state2 = State::Rebuild(chunk_set2); + assert_eq!(state2.chunk_total_size().unwrap(), 16); + + assert!(state1.merge_blob(state2.clone()).is_err()); + assert!(state2.merge_blob(state1).is_ok()); + assert!(state2.merge_blob(State::Invalid).is_err()); + + assert_eq!(state2.chunk_total_size().unwrap(), 24); + assert!(State::Delete.chunk_total_size().is_err()); + } + + #[test] + fn test_apply_chunk_change() { + let mut chunk_wrapper1 = ChunkWrapper::new(RafsVersion::V5); + chunk_wrapper1.set_id(RafsDigest { data: [1u8; 32] }); + chunk_wrapper1.set_uncompressed_size(8); + chunk_wrapper1.set_compressed_size(8); + + let mut chunk_wrapper2 = ChunkWrapper::new(RafsVersion::V6); + chunk_wrapper2.set_uncompressed_size(16); + chunk_wrapper2.set_compressed_size(16); + + assert!(apply_chunk_change(&chunk_wrapper1, &mut chunk_wrapper2).is_err()); + chunk_wrapper2.set_uncompressed_size(8); + assert!(apply_chunk_change(&chunk_wrapper1, &mut chunk_wrapper2).is_err()); + + chunk_wrapper2.set_compressed_size(8); + chunk_wrapper1.set_blob_index(0x10); + chunk_wrapper1.set_index(0x20); + chunk_wrapper1.set_uncompressed_offset(0x30); + chunk_wrapper1.set_compressed_offset(0x40); + assert!(apply_chunk_change(&chunk_wrapper1, &mut chunk_wrapper2).is_ok()); + assert_eq!(chunk_wrapper2.blob_index(), 0x10); + assert_eq!(chunk_wrapper2.index(), 0x20); + assert_eq!(chunk_wrapper2.uncompressed_offset(), 0x30); + assert_eq!(chunk_wrapper2.compressed_offset(), 0x40); + } + + fn create_blob_compactor() -> Result { + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let mut source_path = PathBuf::from(root_dir); + source_path.push("../tests/texture/bootstrap/rafs-v5.boot"); + let path = source_path.to_str().unwrap(); + let rafs_config = RafsSuperConfig { + version: RafsVersion::V5, + compressor: compress::Algorithm::Lz4Block, + digester: digest::Algorithm::Blake3, + chunk_size: 0x100000, + batch_size: 0, + explicit_uidgid: true, + is_tarfs_mode: false, + }; + let dict = + HashChunkDict::from_commandline_arg(path, Arc::new(ConfigV2::default()), &rafs_config) + .unwrap(); + + let mut ori_blob_mgr = BlobManager::new(digest::Algorithm::Sha256); + ori_blob_mgr.set_chunk_dict(dict); + + let backend = Arc::new(MockBackend { + metrics: BackendMetrics::new("id", "backend_type"), + }); + + let tmpdir = TempDir::new()?; + let tmpfile = TempFile::new_in(tmpdir.as_path())?; + let node = Node::from_fs_object( + RafsVersion::V6, + tmpdir.as_path().to_path_buf(), + tmpfile.as_path().to_path_buf(), + Overlay::UpperAddition, + RAFS_DEFAULT_CHUNK_SIZE as u32, + true, + false, + )?; + let tree = Tree::new(node); + let bootstrap = Bootstrap::new(tree)?; + + BlobCompactor::new( + RafsVersion::V6, + ori_blob_mgr, + backend, + digest::Algorithm::Sha256, + &bootstrap, + ) + } + + #[test] + fn test_blob_compactor_new() { + let compactor = create_blob_compactor(); + assert!(compactor.is_ok()); + assert!(compactor.unwrap().is_v6()); + } + + #[test] + fn test_blob_compactor_load_chunk_dict_blobs() { + let mut compactor = create_blob_compactor().unwrap(); + let chunk_dict = compactor.get_chunk_dict(); + let n = chunk_dict.get_blobs().len(); + for i in 0..n { + chunk_dict.set_real_blob_idx(i as u32, i as u32); + } + compactor.states = vec![State::default(); n + 1]; + compactor.load_chunk_dict_blobs(); + + assert_eq!(compactor.states.len(), n + 1); + assert!(compactor.states[0].is_from_dict()); + assert!(compactor.states[n >> 1].is_from_dict()); + assert!(compactor.states[n - 1].is_from_dict()); + assert!(!compactor.states[n].is_from_dict()); + } + + fn blob_compactor_load_and_dedup_chunks() -> Result { + let mut compactor = create_blob_compactor()?; + + let mut chunk1 = ChunkWrapper::new(RafsVersion::V5); + chunk1.set_id(RafsDigest { data: [1u8; 32] }); + chunk1.set_uncompressed_size(0); + chunk1.set_compressed_offset(0x11); + chunk1.set_blob_index(1); + let node_chunk1 = NodeChunk { + source: crate::ChunkSource::Dict, + inner: Arc::new(chunk1.clone()), + }; + let mut chunk2 = ChunkWrapper::new(RafsVersion::V6); + chunk2.set_id(RafsDigest { data: [2u8; 32] }); + chunk2.set_uncompressed_size(0x20); + chunk2.set_compressed_offset(0x22); + chunk2.set_blob_index(2); + let node_chunk2 = NodeChunk { + source: crate::ChunkSource::Dict, + inner: Arc::new(chunk2.clone()), + }; + let mut chunk3 = ChunkWrapper::new(RafsVersion::V6); + chunk3.set_id(RafsDigest { data: [3u8; 32] }); + chunk3.set_uncompressed_size(0x20); + chunk3.set_compressed_offset(0x22); + chunk3.set_blob_index(2); + let node_chunk3 = NodeChunk { + source: crate::ChunkSource::Dict, + inner: Arc::new(chunk3.clone()), + }; + + let mut chunk_dict = HashChunkDict::new(digest::Algorithm::Sha256); + chunk_dict.add_chunk( + Arc::new(ChunkWrapper::new(RafsVersion::V5)), + digest::Algorithm::Sha256, + ); + chunk_dict.add_chunk(Arc::new(chunk1.clone()), digest::Algorithm::Sha256); + compactor.ori_blob_mgr.set_chunk_dict(Arc::new(chunk_dict)); + + compactor.states = vec![State::ChunkDict; 5]; + + let tmpdir = TempDir::new()?; + let tmpfile = TempFile::new_in(tmpdir.as_path())?; + let node = Node::from_fs_object( + RafsVersion::V6, + tmpdir.as_path().to_path_buf(), + tmpfile.as_path().to_path_buf(), + Overlay::UpperAddition, + RAFS_DEFAULT_CHUNK_SIZE as u32, + true, + false, + )?; + let mut tree = Tree::new(node); + let tmpfile2 = TempFile::new_in(tmpdir.as_path())?; + let mut node = Node::from_fs_object( + RafsVersion::V6, + tmpdir.as_path().to_path_buf(), + tmpfile2.as_path().to_path_buf(), + Overlay::UpperAddition, + RAFS_DEFAULT_CHUNK_SIZE as u32, + true, + false, + )?; + node.chunks.push(node_chunk1); + node.chunks.push(node_chunk2); + node.chunks.push(node_chunk3); + let tree2 = Tree::new(node); + tree.insert_child(tree2); + + let bootstrap = Bootstrap::new(tree)?; + + assert!(compactor.load_and_dedup_chunks(&bootstrap).is_ok()); + assert_eq!(compactor.c2nodes.len(), 2); + assert_eq!(compactor.b2nodes.len(), 2); + + let chunk_key1 = ChunkKey::from(&chunk1); + assert!(compactor.c2nodes.get(&chunk_key1).is_some()); + assert_eq!(compactor.c2nodes.get(&chunk_key1).unwrap().len(), 1); + assert!(compactor.b2nodes.get(&chunk2.blob_index()).is_some()); + assert_eq!( + compactor.b2nodes.get(&chunk2.blob_index()).unwrap().len(), + 2 + ); + + Ok(compactor) + } + + #[test] + fn test_blob_compactor_load_and_dedup_chunks() { + assert!(blob_compactor_load_and_dedup_chunks().is_ok()); + } + + #[test] + fn test_blob_compactor_dump_new_blobs() { + let tmp_dir = TempDir::new().unwrap(); + let build_ctx = BuildContext::new( + "build_ctx".to_string(), + false, + 0, + compress::Algorithm::Lz4Block, + digest::Algorithm::Sha256, + true, + WhiteoutSpec::None, + ConversionType::DirectoryToRafs, + PathBuf::from(tmp_dir.as_path()), + Default::default(), + None, + false, + Features::new(), + false, + ); + + let mut compactor = blob_compactor_load_and_dedup_chunks().unwrap(); + + let blob_ctx1 = BlobContext::new( + "blob_id1".to_owned(), + 0, + build_ctx.blob_features, + build_ctx.compressor, + build_ctx.digester, + build_ctx.cipher, + Default::default(), + None, + ); + let blob_ctx2 = BlobContext::new( + "blob_id2".to_owned(), + 0, + build_ctx.blob_features, + build_ctx.compressor, + build_ctx.digester, + build_ctx.cipher, + Default::default(), + None, + ); + let blob_ctx3 = BlobContext::new( + "blob_id3".to_owned(), + 0, + build_ctx.blob_features, + build_ctx.compressor, + build_ctx.digester, + build_ctx.cipher, + Default::default(), + None, + ); + let blob_ctx4 = BlobContext::new( + "blob_id4".to_owned(), + 0, + build_ctx.blob_features, + build_ctx.compressor, + build_ctx.digester, + build_ctx.cipher, + Default::default(), + None, + ); + let blob_ctx5 = BlobContext::new( + "blob_id5".to_owned(), + 0, + build_ctx.blob_features, + build_ctx.compressor, + build_ctx.digester, + build_ctx.cipher, + Default::default(), + None, + ); + compactor.ori_blob_mgr.add_blob(blob_ctx1); + compactor.ori_blob_mgr.add_blob(blob_ctx2); + compactor.ori_blob_mgr.add_blob(blob_ctx3); + compactor.ori_blob_mgr.add_blob(blob_ctx4); + compactor.ori_blob_mgr.add_blob(blob_ctx5); + + compactor.states[0] = State::Invalid; + + let tmp_dir = TempDir::new().unwrap(); + let dir = tmp_dir.as_path().to_str().unwrap(); + assert!(compactor.dump_new_blobs(&build_ctx, dir, true).is_err()); + + compactor.states = vec![ + State::Delete, + State::ChunkDict, + State::Original(ChunkSet::new()), + State::Rebuild(ChunkSet::new()), + State::Delete, + ]; + assert!(compactor.dump_new_blobs(&build_ctx, dir, true).is_ok()); + assert_eq!(compactor.ori_blob_mgr.len(), 3); + } + + #[test] + fn test_blob_compactor_do_compact() { + let mut compactor = blob_compactor_load_and_dedup_chunks().unwrap(); + + let tmp_dir = TempDir::new().unwrap(); + let build_ctx = BuildContext::new( + "build_ctx".to_string(), + false, + 0, + compress::Algorithm::Lz4Block, + digest::Algorithm::Sha256, + true, + WhiteoutSpec::None, + ConversionType::DirectoryToRafs, + PathBuf::from(tmp_dir.as_path()), + Default::default(), + None, + false, + Features::new(), + false, + ); + let mut blob_ctx1 = BlobContext::new( + "blob_id1".to_owned(), + 0, + build_ctx.blob_features, + build_ctx.compressor, + build_ctx.digester, + build_ctx.cipher, + Default::default(), + None, + ); + blob_ctx1.compressed_blob_size = 2; + let mut blob_ctx2 = BlobContext::new( + "blob_id2".to_owned(), + 0, + build_ctx.blob_features, + build_ctx.compressor, + build_ctx.digester, + build_ctx.cipher, + Default::default(), + None, + ); + blob_ctx2.compressed_blob_size = 0; + let blob_ctx3 = BlobContext::new( + "blob_id3".to_owned(), + 0, + build_ctx.blob_features, + build_ctx.compressor, + build_ctx.digester, + build_ctx.cipher, + Default::default(), + None, + ); + let blob_ctx4 = BlobContext::new( + "blob_id4".to_owned(), + 0, + build_ctx.blob_features, + build_ctx.compressor, + build_ctx.digester, + build_ctx.cipher, + Default::default(), + None, + ); + let blob_ctx5 = BlobContext::new( + "blob_id5".to_owned(), + 0, + build_ctx.blob_features, + build_ctx.compressor, + build_ctx.digester, + build_ctx.cipher, + Default::default(), + None, + ); + compactor.ori_blob_mgr.add_blob(blob_ctx1); + compactor.ori_blob_mgr.add_blob(blob_ctx2); + compactor.ori_blob_mgr.add_blob(blob_ctx3); + compactor.ori_blob_mgr.add_blob(blob_ctx4); + compactor.ori_blob_mgr.add_blob(blob_ctx5); + + let mut chunk_set1 = ChunkSet::new(); + chunk_set1.total_size = 4; + let mut chunk_set2 = ChunkSet::new(); + chunk_set2.total_size = 6; + let mut chunk_set3 = ChunkSet::new(); + chunk_set3.total_size = 5; + + compactor.states = vec![ + State::Original(chunk_set1), + State::Original(chunk_set2), + State::Rebuild(chunk_set3), + State::ChunkDict, + State::Invalid, + ]; + + let cfg = Config { + min_used_ratio: 50, + compact_blob_size: 10, + max_compact_size: 8, + layers_to_compact: 0, + blobs_dir: "blobs_dir".to_string(), + }; + + assert!(compactor.do_compact(&cfg).is_ok()); + assert!(!compactor.states.last().unwrap().is_invalid()); + } +} diff --git a/builder/src/core/blob.rs b/builder/src/core/blob.rs index cc8e4d56737..1b9111ac116 100644 --- a/builder/src/core/blob.rs +++ b/builder/src/core/blob.rs @@ -1,345 +1,345 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::borrow::Cow; -use std::slice; - -use anyhow::{Context, Result}; -use nydus_rafs::metadata::RAFS_MAX_CHUNK_SIZE; -use nydus_storage::device::BlobFeatures; -use nydus_storage::meta::{toc, BlobMetaChunkArray}; -use nydus_utils::digest::{self, DigestHasher, RafsDigest}; -use nydus_utils::{compress, crypt}; -use sha2::digest::Digest; - -use super::layout::BlobLayout; -use super::node::Node; -use crate::core::context::Artifact; -use crate::{BlobContext, BlobManager, BuildContext, ConversionType, Feature}; - -/// Generator for RAFS data blob. -pub(crate) struct Blob {} - -impl Blob { - /// Dump blob file and generate chunks - pub(crate) fn dump( - ctx: &BuildContext, - blob_mgr: &mut BlobManager, - blob_writer: &mut dyn Artifact, - ) -> Result<()> { - match ctx.conversion_type { - ConversionType::DirectoryToRafs => { - let mut chunk_data_buf = vec![0u8; RAFS_MAX_CHUNK_SIZE as usize]; - let (inodes, prefetch_entries) = BlobLayout::layout_blob_simple(&ctx.prefetch)?; - for (idx, node) in inodes.iter().enumerate() { - let mut node = node.lock().unwrap(); - let size = node - .dump_node_data(ctx, blob_mgr, blob_writer, &mut chunk_data_buf) - .context("failed to dump blob chunks")?; - if idx < prefetch_entries { - if let Some((_, blob_ctx)) = blob_mgr.get_current_blob() { - blob_ctx.blob_prefetch_size += size; - } - } - } - Self::finalize_blob_data(ctx, blob_mgr, blob_writer)?; - } - ConversionType::TarToRafs - | ConversionType::TargzToRafs - | ConversionType::EStargzToRafs => { - Self::finalize_blob_data(ctx, blob_mgr, blob_writer)?; - } - ConversionType::TarToTarfs - | ConversionType::TarToRef - | ConversionType::TargzToRef - | ConversionType::EStargzToRef => { - // Use `sha256(tarball)` as `blob_id` for ref-type conversions. - if let Some((_, blob_ctx)) = blob_mgr.get_current_blob() { - if let Some(zran) = &ctx.blob_zran_generator { - let reader = zran.lock().unwrap().reader(); - blob_ctx.compressed_blob_size = reader.get_data_size(); - if blob_ctx.blob_id.is_empty() { - let hash = reader.get_data_digest(); - blob_ctx.blob_id = format!("{:x}", hash.finalize()); - } - } else if let Some(tar_reader) = &ctx.blob_tar_reader { - blob_ctx.compressed_blob_size = tar_reader.position(); - if ctx.conversion_type == ConversionType::TarToTarfs { - blob_ctx.uncompressed_blob_size = blob_ctx.compressed_blob_size; - } - if blob_ctx.blob_id.is_empty() { - let hash = tar_reader.get_hash_object(); - blob_ctx.blob_id = format!("{:x}", hash.finalize()); - } - } - } - Self::finalize_blob_data(ctx, blob_mgr, blob_writer)?; - } - ConversionType::EStargzIndexToRef => { - Self::finalize_blob_data(ctx, blob_mgr, blob_writer)?; - } - ConversionType::TarToStargz - | ConversionType::DirectoryToTargz - | ConversionType::DirectoryToStargz - | ConversionType::TargzToStargz => { - unimplemented!() - } - } - - if let Some((_, blob_ctx)) = blob_mgr.get_current_blob() { - blob_ctx.set_blob_prefetch_size(ctx); - } - - Ok(()) - } - - fn finalize_blob_data( - ctx: &BuildContext, - blob_mgr: &mut BlobManager, - blob_writer: &mut dyn Artifact, - ) -> Result<()> { - // Dump buffered batch chunk data if exists. - if let Some(ref batch) = ctx.blob_batch_generator { - if let Some((_, blob_ctx)) = blob_mgr.get_current_blob() { - let mut batch = batch.lock().unwrap(); - if !batch.chunk_data_buf_is_empty() { - let (_, compressed_size, _) = Node::write_chunk_data( - &ctx, - blob_ctx, - blob_writer, - batch.chunk_data_buf(), - )?; - batch.add_context(compressed_size); - batch.clear_chunk_data_buf(); - } - } - } - - if !ctx.blob_features.contains(BlobFeatures::SEPARATE) - && (ctx.blob_inline_meta || ctx.features.is_enabled(Feature::BlobToc)) - { - if let Some((_, blob_ctx)) = blob_mgr.get_current_blob() { - blob_ctx.write_tar_header( - blob_writer, - toc::TOC_ENTRY_BLOB_RAW, - blob_ctx.compressed_blob_size, - )?; - if ctx.features.is_enabled(Feature::BlobToc) { - let blob_digest = RafsDigest { - data: blob_ctx.blob_hash.clone().finalize().into(), - }; - blob_ctx.entry_list.add( - toc::TOC_ENTRY_BLOB_RAW, - compress::Algorithm::None, - blob_digest, - blob_ctx.compressed_offset(), - blob_ctx.compressed_blob_size, - blob_ctx.uncompressed_blob_size, - )?; - } - } - } - - Ok(()) - } - - fn get_compression_algorithm_for_meta(ctx: &BuildContext) -> compress::Algorithm { - if ctx.conversion_type.is_to_ref() { - compress::Algorithm::Zstd - } else { - ctx.compressor - } - } - - pub(crate) fn dump_meta_data( - ctx: &BuildContext, - blob_ctx: &mut BlobContext, - blob_writer: &mut dyn Artifact, - ) -> Result<()> { - // Dump blob meta for v6 when it has chunks or bootstrap is to be inlined. - if !blob_ctx.blob_meta_info_enabled || blob_ctx.uncompressed_blob_size == 0 { - return Ok(()); - } - - // Prepare blob meta information data. - let encrypt = ctx.cipher != crypt::Algorithm::None; - let cipher_obj = &blob_ctx.cipher_object; - let cipher_ctx = &blob_ctx.cipher_ctx; - let blob_meta_info = &blob_ctx.blob_meta_info; - let mut ci_data = blob_meta_info.as_byte_slice(); - let mut inflate_buf = Vec::new(); - let mut header = blob_ctx.blob_meta_header; - if let Some(ref zran) = ctx.blob_zran_generator { - let (inflate_data, inflate_count) = zran.lock().unwrap().to_vec()?; - header.set_ci_zran_count(inflate_count); - header.set_ci_zran_offset(ci_data.len() as u64); - header.set_ci_zran_size(inflate_data.len() as u64); - header.set_ci_zran(true); - header.set_separate_blob(true); - inflate_buf = [ci_data, &inflate_data].concat(); - ci_data = &inflate_buf; - } else if let Some(ref batch) = ctx.blob_batch_generator { - let (inflate_data, inflate_count) = batch.lock().unwrap().to_vec()?; - header.set_ci_zran_count(inflate_count); - header.set_ci_zran_offset(ci_data.len() as u64); - header.set_ci_zran_size(inflate_data.len() as u64); - header.set_ci_batch(true); - inflate_buf = [ci_data, &inflate_data].concat(); - ci_data = &inflate_buf; - } else if ctx.blob_tar_reader.is_some() { - header.set_separate_blob(true); - }; - let mut compressor = Self::get_compression_algorithm_for_meta(ctx); - let (compressed_data, compressed) = compress::compress(ci_data, compressor) - .with_context(|| "failed to compress blob chunk info array".to_string())?; - if !compressed { - compressor = compress::Algorithm::None; - } - - let encrypted_ci_data = - crypt::encrypt_with_context(&compressed_data, cipher_obj, cipher_ctx, encrypt)?; - let compressed_offset = blob_writer.pos()?; - let compressed_size = encrypted_ci_data.len() as u64; - let uncompressed_size = ci_data.len() as u64; - - header.set_ci_compressor(compressor); - header.set_ci_entries(blob_meta_info.len() as u32); - header.set_ci_compressed_offset(compressed_offset); - header.set_ci_compressed_size(compressed_size as u64); - header.set_ci_uncompressed_size(uncompressed_size as u64); - header.set_aligned(true); - match blob_meta_info { - BlobMetaChunkArray::V1(_) => header.set_chunk_info_v2(false), - BlobMetaChunkArray::V2(_) => header.set_chunk_info_v2(true), - } - if ctx.features.is_enabled(Feature::BlobToc) && blob_ctx.chunk_count > 0 { - header.set_inlined_chunk_digest(true); - } - - blob_ctx.blob_meta_header = header; - if let Some(blob_cache) = ctx.blob_cache_generator.as_ref() { - blob_cache.write_blob_meta(ci_data, &header)?; - } - let encrypted_header = - crypt::encrypt_with_context(header.as_bytes(), cipher_obj, cipher_ctx, encrypt)?; - let header_size = encrypted_header.len(); - - // Write blob meta data and header - match encrypted_ci_data { - Cow::Owned(v) => blob_ctx.write_data(blob_writer, &v)?, - Cow::Borrowed(v) => { - let buf = v.to_vec(); - blob_ctx.write_data(blob_writer, &buf)?; - } - } - blob_ctx.write_data(blob_writer, &encrypted_header)?; - - // Write tar header for `blob.meta`. - if ctx.blob_inline_meta || ctx.features.is_enabled(Feature::BlobToc) { - blob_ctx.write_tar_header( - blob_writer, - toc::TOC_ENTRY_BLOB_META, - compressed_size + header_size as u64, - )?; - } - - // Generate ToC entry for `blob.meta` and write chunk digest array. - if ctx.features.is_enabled(Feature::BlobToc) { - let mut hasher = RafsDigest::hasher(digest::Algorithm::Sha256); - let ci_data = if ctx.blob_features.contains(BlobFeatures::BATCH) - || ctx.blob_features.contains(BlobFeatures::ZRAN) - { - inflate_buf.as_slice() - } else { - blob_ctx.blob_meta_info.as_byte_slice() - }; - hasher.digest_update(ci_data); - blob_ctx.entry_list.add( - toc::TOC_ENTRY_BLOB_META, - compressor, - hasher.digest_finalize(), - compressed_offset, - compressed_size as u64, - uncompressed_size as u64, - )?; - - let mut hasher = RafsDigest::hasher(digest::Algorithm::Sha256); - hasher.digest_update(header.as_bytes()); - blob_ctx.entry_list.add( - toc::TOC_ENTRY_BLOB_META_HEADER, - compress::Algorithm::None, - hasher.digest_finalize(), - compressed_offset + compressed_size, - header_size as u64, - header_size as u64, - )?; - - let buf = unsafe { - slice::from_raw_parts( - blob_ctx.blob_chunk_digest.as_ptr() as *const u8, - blob_ctx.blob_chunk_digest.len() * 32, - ) - }; - assert!(!buf.is_empty()); - // The chunk digest array is almost incompressible, no need for compression. - let digest = RafsDigest::from_buf(buf, digest::Algorithm::Sha256); - let compressed_offset = blob_writer.pos()?; - let size = buf.len() as u64; - blob_writer.write_all(buf)?; - blob_ctx.write_tar_header(blob_writer, toc::TOC_ENTRY_BLOB_DIGEST, size)?; - blob_ctx.entry_list.add( - toc::TOC_ENTRY_BLOB_DIGEST, - compress::Algorithm::None, - digest, - compressed_offset, - size, - size, - )?; - } - - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_default_compression_algorithm_for_meta_ci() { - let mut ctx = BuildContext::default(); - - //TarToRef - ctx = BuildContext { - conversion_type: ConversionType::TarToRef, - ..ctx - }; - let compressor = Blob::get_compression_algorithm_for_meta(&ctx); - assert_eq!(compressor, compress::Algorithm::Zstd); - - //EStargzIndexToRef - ctx = BuildContext { - conversion_type: ConversionType::EStargzIndexToRef, - ..ctx - }; - let compressor = Blob::get_compression_algorithm_for_meta(&ctx); - assert_eq!(compressor, compress::Algorithm::Zstd); - - //TargzToRef - ctx = BuildContext { - conversion_type: ConversionType::TargzToRef, - ..ctx - }; - let compressor = Blob::get_compression_algorithm_for_meta(&ctx); - assert_eq!(compressor, compress::Algorithm::Zstd); - - //TarToRef - ctx = BuildContext { - conversion_type: ConversionType::TarToRef, - ..ctx - }; - let compressor = Blob::get_compression_algorithm_for_meta(&ctx); - assert_eq!(compressor, compress::Algorithm::Zstd); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::borrow::Cow; +use std::slice; + +use anyhow::{Context, Result}; +use nydus_rafs::metadata::RAFS_MAX_CHUNK_SIZE; +use nydus_storage::device::BlobFeatures; +use nydus_storage::meta::{toc, BlobMetaChunkArray}; +use nydus_utils::digest::{self, DigestHasher, RafsDigest}; +use nydus_utils::{compress, crypt}; +use sha2::digest::Digest; + +use super::layout::BlobLayout; +use super::node::Node; +use crate::core::context::Artifact; +use crate::{BlobContext, BlobManager, BuildContext, ConversionType, Feature}; + +/// Generator for RAFS data blob. +pub(crate) struct Blob {} + +impl Blob { + /// Dump blob file and generate chunks + pub(crate) fn dump( + ctx: &BuildContext, + blob_mgr: &mut BlobManager, + blob_writer: &mut dyn Artifact, + ) -> Result<()> { + match ctx.conversion_type { + ConversionType::DirectoryToRafs => { + let mut chunk_data_buf = vec![0u8; RAFS_MAX_CHUNK_SIZE as usize]; + let (inodes, prefetch_entries) = BlobLayout::layout_blob_simple(&ctx.prefetch)?; + for (idx, node) in inodes.iter().enumerate() { + let mut node = node.lock().unwrap(); + let size = node + .dump_node_data(ctx, blob_mgr, blob_writer, &mut chunk_data_buf) + .context("failed to dump blob chunks")?; + if idx < prefetch_entries { + if let Some((_, blob_ctx)) = blob_mgr.get_current_blob() { + blob_ctx.blob_prefetch_size += size; + } + } + } + Self::finalize_blob_data(ctx, blob_mgr, blob_writer)?; + } + ConversionType::TarToRafs + | ConversionType::TargzToRafs + | ConversionType::EStargzToRafs => { + Self::finalize_blob_data(ctx, blob_mgr, blob_writer)?; + } + ConversionType::TarToTarfs + | ConversionType::TarToRef + | ConversionType::TargzToRef + | ConversionType::EStargzToRef => { + // Use `sha256(tarball)` as `blob_id` for ref-type conversions. + if let Some((_, blob_ctx)) = blob_mgr.get_current_blob() { + if let Some(zran) = &ctx.blob_zran_generator { + let reader = zran.lock().unwrap().reader(); + blob_ctx.compressed_blob_size = reader.get_data_size(); + if blob_ctx.blob_id.is_empty() { + let hash = reader.get_data_digest(); + blob_ctx.blob_id = format!("{:x}", hash.finalize()); + } + } else if let Some(tar_reader) = &ctx.blob_tar_reader { + blob_ctx.compressed_blob_size = tar_reader.position(); + if ctx.conversion_type == ConversionType::TarToTarfs { + blob_ctx.uncompressed_blob_size = blob_ctx.compressed_blob_size; + } + if blob_ctx.blob_id.is_empty() { + let hash = tar_reader.get_hash_object(); + blob_ctx.blob_id = format!("{:x}", hash.finalize()); + } + } + } + Self::finalize_blob_data(ctx, blob_mgr, blob_writer)?; + } + ConversionType::EStargzIndexToRef => { + Self::finalize_blob_data(ctx, blob_mgr, blob_writer)?; + } + ConversionType::TarToStargz + | ConversionType::DirectoryToTargz + | ConversionType::DirectoryToStargz + | ConversionType::TargzToStargz => { + unimplemented!() + } + } + + if let Some((_, blob_ctx)) = blob_mgr.get_current_blob() { + blob_ctx.set_blob_prefetch_size(ctx); + } + + Ok(()) + } + + fn finalize_blob_data( + ctx: &BuildContext, + blob_mgr: &mut BlobManager, + blob_writer: &mut dyn Artifact, + ) -> Result<()> { + // Dump buffered batch chunk data if exists. + if let Some(ref batch) = ctx.blob_batch_generator { + if let Some((_, blob_ctx)) = blob_mgr.get_current_blob() { + let mut batch = batch.lock().unwrap(); + if !batch.chunk_data_buf_is_empty() { + let (_, compressed_size, _) = Node::write_chunk_data( + &ctx, + blob_ctx, + blob_writer, + batch.chunk_data_buf(), + )?; + batch.add_context(compressed_size); + batch.clear_chunk_data_buf(); + } + } + } + + if !ctx.blob_features.contains(BlobFeatures::SEPARATE) + && (ctx.blob_inline_meta || ctx.features.is_enabled(Feature::BlobToc)) + { + if let Some((_, blob_ctx)) = blob_mgr.get_current_blob() { + blob_ctx.write_tar_header( + blob_writer, + toc::TOC_ENTRY_BLOB_RAW, + blob_ctx.compressed_blob_size, + )?; + if ctx.features.is_enabled(Feature::BlobToc) { + let blob_digest = RafsDigest { + data: blob_ctx.blob_hash.clone().finalize().into(), + }; + blob_ctx.entry_list.add( + toc::TOC_ENTRY_BLOB_RAW, + compress::Algorithm::None, + blob_digest, + blob_ctx.compressed_offset(), + blob_ctx.compressed_blob_size, + blob_ctx.uncompressed_blob_size, + )?; + } + } + } + + Ok(()) + } + + fn get_compression_algorithm_for_meta(ctx: &BuildContext) -> compress::Algorithm { + if ctx.conversion_type.is_to_ref() { + compress::Algorithm::Zstd + } else { + ctx.compressor + } + } + + pub(crate) fn dump_meta_data( + ctx: &BuildContext, + blob_ctx: &mut BlobContext, + blob_writer: &mut dyn Artifact, + ) -> Result<()> { + // Dump blob meta for v6 when it has chunks or bootstrap is to be inlined. + if !blob_ctx.blob_meta_info_enabled || blob_ctx.uncompressed_blob_size == 0 { + return Ok(()); + } + + // Prepare blob meta information data. + let encrypt = ctx.cipher != crypt::Algorithm::None; + let cipher_obj = &blob_ctx.cipher_object; + let cipher_ctx = &blob_ctx.cipher_ctx; + let blob_meta_info = &blob_ctx.blob_meta_info; + let mut ci_data = blob_meta_info.as_byte_slice(); + let mut inflate_buf = Vec::new(); + let mut header = blob_ctx.blob_meta_header; + if let Some(ref zran) = ctx.blob_zran_generator { + let (inflate_data, inflate_count) = zran.lock().unwrap().to_vec()?; + header.set_ci_zran_count(inflate_count); + header.set_ci_zran_offset(ci_data.len() as u64); + header.set_ci_zran_size(inflate_data.len() as u64); + header.set_ci_zran(true); + header.set_separate_blob(true); + inflate_buf = [ci_data, &inflate_data].concat(); + ci_data = &inflate_buf; + } else if let Some(ref batch) = ctx.blob_batch_generator { + let (inflate_data, inflate_count) = batch.lock().unwrap().to_vec()?; + header.set_ci_zran_count(inflate_count); + header.set_ci_zran_offset(ci_data.len() as u64); + header.set_ci_zran_size(inflate_data.len() as u64); + header.set_ci_batch(true); + inflate_buf = [ci_data, &inflate_data].concat(); + ci_data = &inflate_buf; + } else if ctx.blob_tar_reader.is_some() { + header.set_separate_blob(true); + }; + let mut compressor = Self::get_compression_algorithm_for_meta(ctx); + let (compressed_data, compressed) = compress::compress(ci_data, compressor) + .with_context(|| "failed to compress blob chunk info array".to_string())?; + if !compressed { + compressor = compress::Algorithm::None; + } + + let encrypted_ci_data = + crypt::encrypt_with_context(&compressed_data, cipher_obj, cipher_ctx, encrypt)?; + let compressed_offset = blob_writer.pos()?; + let compressed_size = encrypted_ci_data.len() as u64; + let uncompressed_size = ci_data.len() as u64; + + header.set_ci_compressor(compressor); + header.set_ci_entries(blob_meta_info.len() as u32); + header.set_ci_compressed_offset(compressed_offset); + header.set_ci_compressed_size(compressed_size as u64); + header.set_ci_uncompressed_size(uncompressed_size as u64); + header.set_aligned(true); + match blob_meta_info { + BlobMetaChunkArray::V1(_) => header.set_chunk_info_v2(false), + BlobMetaChunkArray::V2(_) => header.set_chunk_info_v2(true), + } + if ctx.features.is_enabled(Feature::BlobToc) && blob_ctx.chunk_count > 0 { + header.set_inlined_chunk_digest(true); + } + + blob_ctx.blob_meta_header = header; + if let Some(blob_cache) = ctx.blob_cache_generator.as_ref() { + blob_cache.write_blob_meta(ci_data, &header)?; + } + let encrypted_header = + crypt::encrypt_with_context(header.as_bytes(), cipher_obj, cipher_ctx, encrypt)?; + let header_size = encrypted_header.len(); + + // Write blob meta data and header + match encrypted_ci_data { + Cow::Owned(v) => blob_ctx.write_data(blob_writer, &v)?, + Cow::Borrowed(v) => { + let buf = v.to_vec(); + blob_ctx.write_data(blob_writer, &buf)?; + } + } + blob_ctx.write_data(blob_writer, &encrypted_header)?; + + // Write tar header for `blob.meta`. + if ctx.blob_inline_meta || ctx.features.is_enabled(Feature::BlobToc) { + blob_ctx.write_tar_header( + blob_writer, + toc::TOC_ENTRY_BLOB_META, + compressed_size + header_size as u64, + )?; + } + + // Generate ToC entry for `blob.meta` and write chunk digest array. + if ctx.features.is_enabled(Feature::BlobToc) { + let mut hasher = RafsDigest::hasher(digest::Algorithm::Sha256); + let ci_data = if ctx.blob_features.contains(BlobFeatures::BATCH) + || ctx.blob_features.contains(BlobFeatures::ZRAN) + { + inflate_buf.as_slice() + } else { + blob_ctx.blob_meta_info.as_byte_slice() + }; + hasher.digest_update(ci_data); + blob_ctx.entry_list.add( + toc::TOC_ENTRY_BLOB_META, + compressor, + hasher.digest_finalize(), + compressed_offset, + compressed_size as u64, + uncompressed_size as u64, + )?; + + let mut hasher = RafsDigest::hasher(digest::Algorithm::Sha256); + hasher.digest_update(header.as_bytes()); + blob_ctx.entry_list.add( + toc::TOC_ENTRY_BLOB_META_HEADER, + compress::Algorithm::None, + hasher.digest_finalize(), + compressed_offset + compressed_size, + header_size as u64, + header_size as u64, + )?; + + let buf = unsafe { + slice::from_raw_parts( + blob_ctx.blob_chunk_digest.as_ptr() as *const u8, + blob_ctx.blob_chunk_digest.len() * 32, + ) + }; + assert!(!buf.is_empty()); + // The chunk digest array is almost incompressible, no need for compression. + let digest = RafsDigest::from_buf(buf, digest::Algorithm::Sha256); + let compressed_offset = blob_writer.pos()?; + let size = buf.len() as u64; + blob_writer.write_all(buf)?; + blob_ctx.write_tar_header(blob_writer, toc::TOC_ENTRY_BLOB_DIGEST, size)?; + blob_ctx.entry_list.add( + toc::TOC_ENTRY_BLOB_DIGEST, + compress::Algorithm::None, + digest, + compressed_offset, + size, + size, + )?; + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_default_compression_algorithm_for_meta_ci() { + let mut ctx = BuildContext::default(); + + //TarToRef + ctx = BuildContext { + conversion_type: ConversionType::TarToRef, + ..ctx + }; + let compressor = Blob::get_compression_algorithm_for_meta(&ctx); + assert_eq!(compressor, compress::Algorithm::Zstd); + + //EStargzIndexToRef + ctx = BuildContext { + conversion_type: ConversionType::EStargzIndexToRef, + ..ctx + }; + let compressor = Blob::get_compression_algorithm_for_meta(&ctx); + assert_eq!(compressor, compress::Algorithm::Zstd); + + //TargzToRef + ctx = BuildContext { + conversion_type: ConversionType::TargzToRef, + ..ctx + }; + let compressor = Blob::get_compression_algorithm_for_meta(&ctx); + assert_eq!(compressor, compress::Algorithm::Zstd); + + //TarToRef + ctx = BuildContext { + conversion_type: ConversionType::TarToRef, + ..ctx + }; + let compressor = Blob::get_compression_algorithm_for_meta(&ctx); + assert_eq!(compressor, compress::Algorithm::Zstd); + } +} diff --git a/builder/src/core/bootstrap.rs b/builder/src/core/bootstrap.rs index 22805bd3c03..f85d4fae652 100644 --- a/builder/src/core/bootstrap.rs +++ b/builder/src/core/bootstrap.rs @@ -1,212 +1,212 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2023 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use anyhow::{Context, Error, Result}; -use nydus_utils::digest::{self, RafsDigest}; -use std::ops::Deref; - -use nydus_rafs::metadata::layout::{RafsBlobTable, RAFS_V5_ROOT_INODE}; -use nydus_rafs::metadata::{RafsSuper, RafsSuperConfig, RafsSuperFlags}; - -use crate::{ArtifactStorage, BlobManager, BootstrapContext, BootstrapManager, BuildContext, Tree}; - -/// RAFS bootstrap/meta builder. -pub struct Bootstrap { - pub(crate) tree: Tree, -} - -impl Bootstrap { - /// Create a new instance of [Bootstrap]. - pub fn new(tree: Tree) -> Result { - Ok(Self { tree }) - } - - /// Build the final view of the RAFS filesystem meta from the hierarchy `tree`. - pub fn build( - &mut self, - ctx: &mut BuildContext, - bootstrap_ctx: &mut BootstrapContext, - ) -> Result<()> { - // Special handling of the root inode - let mut root_node = self.tree.lock_node(); - assert!(root_node.is_dir()); - let index = bootstrap_ctx.generate_next_ino(); - // 0 is reserved and 1 also matches RAFS_V5_ROOT_INODE. - assert_eq!(index, RAFS_V5_ROOT_INODE); - root_node.index = index; - root_node.inode.set_ino(index); - ctx.prefetch.insert(&self.tree.node, root_node.deref()); - bootstrap_ctx.inode_map.insert( - ( - root_node.layer_idx, - root_node.info.src_ino, - root_node.info.src_dev, - ), - vec![self.tree.node.clone()], - ); - drop(root_node); - - Self::build_rafs(ctx, bootstrap_ctx, &mut self.tree)?; - if ctx.fs_version.is_v6() { - let root_offset = self.tree.node.lock().unwrap().v6_offset; - Self::v6_update_dirents(&self.tree, root_offset); - } - - Ok(()) - } - - /// Dump the RAFS filesystem meta information to meta blob. - pub fn dump( - &mut self, - ctx: &mut BuildContext, - bootstrap_storage: &mut Option, - bootstrap_ctx: &mut BootstrapContext, - blob_table: &RafsBlobTable, - ) -> Result<()> { - match blob_table { - RafsBlobTable::V5(table) => self.v5_dump(ctx, bootstrap_ctx, table)?, - RafsBlobTable::V6(table) => self.v6_dump(ctx, bootstrap_ctx, table)?, - } - - if let Some(ArtifactStorage::FileDir(p)) = bootstrap_storage { - let bootstrap_data = bootstrap_ctx.writer.as_bytes()?; - let digest = RafsDigest::from_buf(&bootstrap_data, digest::Algorithm::Sha256); - let name = digest.to_string(); - bootstrap_ctx.writer.finalize(Some(name.clone()))?; - *bootstrap_storage = Some(ArtifactStorage::SingleFile(p.join(name))); - Ok(()) - } else { - bootstrap_ctx.writer.finalize(Some(String::default())) - } - } - - /// Traverse node tree, set inode index, ino, child_index and child_count etc according to the - /// RAFS metadata format, then store to nodes collection. - fn build_rafs( - ctx: &mut BuildContext, - bootstrap_ctx: &mut BootstrapContext, - tree: &mut Tree, - ) -> Result<()> { - let parent_node = tree.node.clone(); - let mut parent_node = parent_node.lock().unwrap(); - let parent_ino = parent_node.inode.ino(); - let block_size = ctx.v6_block_size(); - - // In case of multi-layer building, it's possible that the parent node is not a directory. - if parent_node.is_dir() { - parent_node - .inode - .set_child_count(tree.children.len() as u32); - if ctx.fs_version.is_v5() { - parent_node - .inode - .set_child_index(bootstrap_ctx.get_next_ino() as u32); - } else if ctx.fs_version.is_v6() { - // Layout directory entries for v6. - let d_size = parent_node.v6_dirent_size(ctx, tree)?; - parent_node.v6_set_dir_offset(bootstrap_ctx, d_size, block_size)?; - } - } - - let mut dirs: Vec<&mut Tree> = Vec::new(); - for child in tree.children.iter_mut() { - let child_node = child.node.clone(); - let mut child_node = child_node.lock().unwrap(); - let index = bootstrap_ctx.generate_next_ino(); - child_node.index = index; - if ctx.fs_version.is_v5() { - child_node.inode.set_parent(parent_ino); - } - - // Handle hardlink. - // All hardlink nodes' ino and nlink should be the same. - // We need to find hardlink node index list in the layer where the node is located - // because the real_ino may be different among different layers, - let mut v6_hardlink_offset: Option = None; - let key = ( - child_node.layer_idx, - child_node.info.src_ino, - child_node.info.src_dev, - ); - if let Some(indexes) = bootstrap_ctx.inode_map.get_mut(&key) { - let nlink = indexes.len() as u32 + 1; - // Update nlink for previous hardlink inodes - for n in indexes.iter() { - n.lock().unwrap().inode.set_nlink(nlink); - } - - let (first_ino, first_offset) = { - let first_node = indexes[0].lock().unwrap(); - (first_node.inode.ino(), first_node.v6_offset) - }; - // set offset for rafs v6 hardlinks - v6_hardlink_offset = Some(first_offset); - child_node.inode.set_nlink(nlink); - child_node.inode.set_ino(first_ino); - indexes.push(child.node.clone()); - } else { - child_node.inode.set_ino(index); - child_node.inode.set_nlink(1); - // Store inode real ino - bootstrap_ctx - .inode_map - .insert(key, vec![child.node.clone()]); - } - - // update bootstrap_ctx.offset for rafs v6 non-dir nodes. - if !child_node.is_dir() && ctx.fs_version.is_v6() { - child_node.v6_set_offset(bootstrap_ctx, v6_hardlink_offset, block_size)?; - } - ctx.prefetch.insert(&child.node, child_node.deref()); - if child_node.is_dir() { - dirs.push(child); - } - } - - // According to filesystem semantics, a parent directory should have nlink equal to - // the number of its child directories plus 2. - if parent_node.is_dir() { - parent_node.inode.set_nlink((2 + dirs.len()) as u32); - } - for dir in dirs { - Self::build_rafs(ctx, bootstrap_ctx, dir)?; - } - - Ok(()) - } - - /// Load a parent RAFS bootstrap and return the `Tree` object representing the filesystem. - pub fn load_parent_bootstrap( - ctx: &mut BuildContext, - bootstrap_mgr: &mut BootstrapManager, - blob_mgr: &mut BlobManager, - ) -> Result { - let rs = if let Some(path) = bootstrap_mgr.f_parent_path.as_ref() { - RafsSuper::load_from_file(path, ctx.configuration.clone(), false).map(|(rs, _)| rs)? - } else { - return Err(Error::msg("bootstrap context's parent bootstrap is null")); - }; - - let config = RafsSuperConfig { - compressor: ctx.compressor, - digester: ctx.digester, - chunk_size: ctx.chunk_size, - batch_size: ctx.batch_size, - explicit_uidgid: ctx.explicit_uidgid, - version: ctx.fs_version, - is_tarfs_mode: rs.meta.flags.contains(RafsSuperFlags::TARTFS_MODE), - }; - config.check_compatibility(&rs.meta)?; - - // Reuse lower layer blob table, - // we need to append the blob entry of upper layer to the table - blob_mgr.extend_from_blob_table(ctx, rs.superblock.get_blob_infos())?; - - // Build node tree of lower layer from a bootstrap file, and add chunks - // of lower node to layered_chunk_dict for chunk deduplication on next. - Tree::from_bootstrap(&rs, &mut blob_mgr.layered_chunk_dict) - .context("failed to build tree from bootstrap") - } -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2023 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use anyhow::{Context, Error, Result}; +use nydus_utils::digest::{self, RafsDigest}; +use std::ops::Deref; + +use nydus_rafs::metadata::layout::{RafsBlobTable, RAFS_V5_ROOT_INODE}; +use nydus_rafs::metadata::{RafsSuper, RafsSuperConfig, RafsSuperFlags}; + +use crate::{ArtifactStorage, BlobManager, BootstrapContext, BootstrapManager, BuildContext, Tree}; + +/// RAFS bootstrap/meta builder. +pub struct Bootstrap { + pub(crate) tree: Tree, +} + +impl Bootstrap { + /// Create a new instance of [Bootstrap]. + pub fn new(tree: Tree) -> Result { + Ok(Self { tree }) + } + + /// Build the final view of the RAFS filesystem meta from the hierarchy `tree`. + pub fn build( + &mut self, + ctx: &mut BuildContext, + bootstrap_ctx: &mut BootstrapContext, + ) -> Result<()> { + // Special handling of the root inode + let mut root_node = self.tree.lock_node(); + assert!(root_node.is_dir()); + let index = bootstrap_ctx.generate_next_ino(); + // 0 is reserved and 1 also matches RAFS_V5_ROOT_INODE. + assert_eq!(index, RAFS_V5_ROOT_INODE); + root_node.index = index; + root_node.inode.set_ino(index); + ctx.prefetch.insert(&self.tree.node, root_node.deref()); + bootstrap_ctx.inode_map.insert( + ( + root_node.layer_idx, + root_node.info.src_ino, + root_node.info.src_dev, + ), + vec![self.tree.node.clone()], + ); + drop(root_node); + + Self::build_rafs(ctx, bootstrap_ctx, &mut self.tree)?; + if ctx.fs_version.is_v6() { + let root_offset = self.tree.node.lock().unwrap().v6_offset; + Self::v6_update_dirents(&self.tree, root_offset); + } + + Ok(()) + } + + /// Dump the RAFS filesystem meta information to meta blob. + pub fn dump( + &mut self, + ctx: &mut BuildContext, + bootstrap_storage: &mut Option, + bootstrap_ctx: &mut BootstrapContext, + blob_table: &RafsBlobTable, + ) -> Result<()> { + match blob_table { + RafsBlobTable::V5(table) => self.v5_dump(ctx, bootstrap_ctx, table)?, + RafsBlobTable::V6(table) => self.v6_dump(ctx, bootstrap_ctx, table)?, + } + + if let Some(ArtifactStorage::FileDir(p)) = bootstrap_storage { + let bootstrap_data = bootstrap_ctx.writer.as_bytes()?; + let digest = RafsDigest::from_buf(&bootstrap_data, digest::Algorithm::Sha256); + let name = digest.to_string(); + bootstrap_ctx.writer.finalize(Some(name.clone()))?; + *bootstrap_storage = Some(ArtifactStorage::SingleFile(p.join(name))); + Ok(()) + } else { + bootstrap_ctx.writer.finalize(Some(String::default())) + } + } + + /// Traverse node tree, set inode index, ino, child_index and child_count etc according to the + /// RAFS metadata format, then store to nodes collection. + fn build_rafs( + ctx: &mut BuildContext, + bootstrap_ctx: &mut BootstrapContext, + tree: &mut Tree, + ) -> Result<()> { + let parent_node = tree.node.clone(); + let mut parent_node = parent_node.lock().unwrap(); + let parent_ino = parent_node.inode.ino(); + let block_size = ctx.v6_block_size(); + + // In case of multi-layer building, it's possible that the parent node is not a directory. + if parent_node.is_dir() { + parent_node + .inode + .set_child_count(tree.children.len() as u32); + if ctx.fs_version.is_v5() { + parent_node + .inode + .set_child_index(bootstrap_ctx.get_next_ino() as u32); + } else if ctx.fs_version.is_v6() { + // Layout directory entries for v6. + let d_size = parent_node.v6_dirent_size(ctx, tree)?; + parent_node.v6_set_dir_offset(bootstrap_ctx, d_size, block_size)?; + } + } + + let mut dirs: Vec<&mut Tree> = Vec::new(); + for child in tree.children.iter_mut() { + let child_node = child.node.clone(); + let mut child_node = child_node.lock().unwrap(); + let index = bootstrap_ctx.generate_next_ino(); + child_node.index = index; + if ctx.fs_version.is_v5() { + child_node.inode.set_parent(parent_ino); + } + + // Handle hardlink. + // All hardlink nodes' ino and nlink should be the same. + // We need to find hardlink node index list in the layer where the node is located + // because the real_ino may be different among different layers, + let mut v6_hardlink_offset: Option = None; + let key = ( + child_node.layer_idx, + child_node.info.src_ino, + child_node.info.src_dev, + ); + if let Some(indexes) = bootstrap_ctx.inode_map.get_mut(&key) { + let nlink = indexes.len() as u32 + 1; + // Update nlink for previous hardlink inodes + for n in indexes.iter() { + n.lock().unwrap().inode.set_nlink(nlink); + } + + let (first_ino, first_offset) = { + let first_node = indexes[0].lock().unwrap(); + (first_node.inode.ino(), first_node.v6_offset) + }; + // set offset for rafs v6 hardlinks + v6_hardlink_offset = Some(first_offset); + child_node.inode.set_nlink(nlink); + child_node.inode.set_ino(first_ino); + indexes.push(child.node.clone()); + } else { + child_node.inode.set_ino(index); + child_node.inode.set_nlink(1); + // Store inode real ino + bootstrap_ctx + .inode_map + .insert(key, vec![child.node.clone()]); + } + + // update bootstrap_ctx.offset for rafs v6 non-dir nodes. + if !child_node.is_dir() && ctx.fs_version.is_v6() { + child_node.v6_set_offset(bootstrap_ctx, v6_hardlink_offset, block_size)?; + } + ctx.prefetch.insert(&child.node, child_node.deref()); + if child_node.is_dir() { + dirs.push(child); + } + } + + // According to filesystem semantics, a parent directory should have nlink equal to + // the number of its child directories plus 2. + if parent_node.is_dir() { + parent_node.inode.set_nlink((2 + dirs.len()) as u32); + } + for dir in dirs { + Self::build_rafs(ctx, bootstrap_ctx, dir)?; + } + + Ok(()) + } + + /// Load a parent RAFS bootstrap and return the `Tree` object representing the filesystem. + pub fn load_parent_bootstrap( + ctx: &mut BuildContext, + bootstrap_mgr: &mut BootstrapManager, + blob_mgr: &mut BlobManager, + ) -> Result { + let rs = if let Some(path) = bootstrap_mgr.f_parent_path.as_ref() { + RafsSuper::load_from_file(path, ctx.configuration.clone(), false).map(|(rs, _)| rs)? + } else { + return Err(Error::msg("bootstrap context's parent bootstrap is null")); + }; + + let config = RafsSuperConfig { + compressor: ctx.compressor, + digester: ctx.digester, + chunk_size: ctx.chunk_size, + batch_size: ctx.batch_size, + explicit_uidgid: ctx.explicit_uidgid, + version: ctx.fs_version, + is_tarfs_mode: rs.meta.flags.contains(RafsSuperFlags::TARTFS_MODE), + }; + config.check_compatibility(&rs.meta)?; + + // Reuse lower layer blob table, + // we need to append the blob entry of upper layer to the table + blob_mgr.extend_from_blob_table(ctx, rs.superblock.get_blob_infos())?; + + // Build node tree of lower layer from a bootstrap file, and add chunks + // of lower node to layered_chunk_dict for chunk deduplication on next. + Tree::from_bootstrap(&rs, &mut blob_mgr.layered_chunk_dict) + .context("failed to build tree from bootstrap") + } +} diff --git a/builder/src/core/chunk_dict.rs b/builder/src/core/chunk_dict.rs index 3bade44c62b..3cf0d475549 100644 --- a/builder/src/core/chunk_dict.rs +++ b/builder/src/core/chunk_dict.rs @@ -1,280 +1,280 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::collections::{BTreeMap, HashMap}; -use std::mem::size_of; -use std::path::{Path, PathBuf}; -use std::sync::atomic::{AtomicU32, Ordering}; -use std::sync::{Arc, Mutex}; - -use anyhow::{bail, Context, Result}; -use nydus_api::ConfigV2; -use nydus_rafs::metadata::chunk::ChunkWrapper; -use nydus_rafs::metadata::layout::v5::RafsV5ChunkInfo; -use nydus_rafs::metadata::{RafsSuper, RafsSuperConfig}; -use nydus_storage::device::BlobInfo; -use nydus_utils::digest::{self, RafsDigest}; - -use crate::Tree; - -#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd)] -pub struct DigestWithBlobIndex(pub RafsDigest, pub u32); - -/// Trait to manage chunk cache for chunk deduplication. -pub trait ChunkDict: Sync + Send + 'static { - /// Add a chunk into the cache. - fn add_chunk(&mut self, chunk: Arc, digester: digest::Algorithm); - - /// Get a cached chunk from the cache. - fn get_chunk(&self, digest: &RafsDigest, uncompressed_size: u32) -> Option<&Arc>; - - /// Get all `BlobInfo` objects referenced by cached chunks. - fn get_blobs(&self) -> Vec>; - - /// Get the `BlobInfo` object with inner index `idx`. - fn get_blob_by_inner_idx(&self, idx: u32) -> Option<&Arc>; - - /// Associate an external index with the inner index. - fn set_real_blob_idx(&self, inner_idx: u32, out_idx: u32); - - /// Get the external index associated with an inner index. - fn get_real_blob_idx(&self, inner_idx: u32) -> Option; - - /// Get the digest algorithm used to generate chunk digest. - fn digester(&self) -> digest::Algorithm; -} - -impl ChunkDict for () { - fn add_chunk(&mut self, _chunk: Arc, _digester: digest::Algorithm) {} - - fn get_chunk( - &self, - _digest: &RafsDigest, - _uncompressed_size: u32, - ) -> Option<&Arc> { - None - } - - fn get_blobs(&self) -> Vec> { - Vec::new() - } - - fn get_blob_by_inner_idx(&self, _idx: u32) -> Option<&Arc> { - None - } - - fn set_real_blob_idx(&self, _inner_idx: u32, _out_idx: u32) { - panic!("()::set_real_blob_idx() should not be invoked"); - } - - fn get_real_blob_idx(&self, inner_idx: u32) -> Option { - Some(inner_idx) - } - - fn digester(&self) -> digest::Algorithm { - digest::Algorithm::Sha256 - } -} - -/// An implementation of [ChunkDict] based on [HashMap]. -pub struct HashChunkDict { - m: HashMap, AtomicU32)>, - blobs: Vec>, - blob_idx_m: Mutex>, - digester: digest::Algorithm, -} - -impl ChunkDict for HashChunkDict { - fn add_chunk(&mut self, chunk: Arc, digester: digest::Algorithm) { - if self.digester == digester { - if let Some(e) = self.m.get(chunk.id()) { - e.1.fetch_add(1, Ordering::AcqRel); - } else { - self.m - .insert(chunk.id().to_owned(), (chunk, AtomicU32::new(1))); - } - } - } - - fn get_chunk(&self, digest: &RafsDigest, uncompressed_size: u32) -> Option<&Arc> { - if let Some((chunk, _)) = self.m.get(digest) { - if chunk.uncompressed_size() == 0 || chunk.uncompressed_size() == uncompressed_size { - return Some(chunk); - } - } - None - } - - fn get_blobs(&self) -> Vec> { - self.blobs.clone() - } - - fn get_blob_by_inner_idx(&self, idx: u32) -> Option<&Arc> { - self.blobs.get(idx as usize) - } - - fn set_real_blob_idx(&self, inner_idx: u32, out_idx: u32) { - self.blob_idx_m.lock().unwrap().insert(inner_idx, out_idx); - } - - fn get_real_blob_idx(&self, inner_idx: u32) -> Option { - self.blob_idx_m.lock().unwrap().get(&inner_idx).copied() - } - - fn digester(&self) -> digest::Algorithm { - self.digester - } -} - -impl HashChunkDict { - /// Create a new instance of [HashChunkDict]. - pub fn new(digester: digest::Algorithm) -> Self { - HashChunkDict { - m: Default::default(), - blobs: vec![], - blob_idx_m: Mutex::new(Default::default()), - digester, - } - } - - /// Get an immutable reference to the internal `HashMap`. - pub fn hashmap(&self) -> &HashMap, AtomicU32)> { - &self.m - } - - /// Parse commandline argument for chunk dictionary and load chunks into the dictionary. - pub fn from_commandline_arg( - arg: &str, - config: Arc, - rafs_config: &RafsSuperConfig, - ) -> Result> { - let file_path = parse_chunk_dict_arg(arg)?; - HashChunkDict::from_bootstrap_file(&file_path, config, rafs_config) - .map(|d| Arc::new(d) as Arc) - } - - /// Load chunks from the RAFS filesystem into the chunk dictionary. - pub fn from_bootstrap_file( - path: &Path, - config: Arc, - rafs_config: &RafsSuperConfig, - ) -> Result { - let (rs, _) = RafsSuper::load_from_file(path, config, true) - .with_context(|| format!("failed to open bootstrap file {:?}", path))?; - let mut d = HashChunkDict { - m: HashMap::new(), - blobs: rs.superblock.get_blob_infos(), - blob_idx_m: Mutex::new(BTreeMap::new()), - digester: rafs_config.digester, - }; - - rafs_config.check_compatibility(&rs.meta)?; - if rs.meta.is_v5() || rs.meta.has_inlined_chunk_digest() { - Tree::from_bootstrap(&rs, &mut d).context("failed to build tree from bootstrap")?; - } else if rs.meta.is_v6() { - d.load_chunk_table(&rs) - .context("failed to load chunk table")?; - } else { - unimplemented!() - } - - Ok(d) - } - - fn load_chunk_table(&mut self, rs: &RafsSuper) -> Result<()> { - let size = rs.meta.chunk_table_size as usize; - if size == 0 || self.digester != rs.meta.get_digester() { - return Ok(()); - } - - let unit_size = size_of::(); - if size % unit_size != 0 { - return Err(std::io::Error::from_raw_os_error(libc::EINVAL)).with_context(|| { - format!( - "load_chunk_table: invalid rafs v6 chunk table size {}", - size - ) - }); - } - - for idx in 0..(size / unit_size) { - let chunk = rs.superblock.get_chunk_info(idx)?; - let chunk_info = Arc::new(ChunkWrapper::from_chunk_info(chunk)); - self.add_chunk(chunk_info, self.digester); - } - - Ok(()) - } -} - -/// Parse a chunk dictionary argument string. -/// -/// # Argument -/// `arg` may be in inform of: -/// - type=path: type of external source and corresponding path -/// - path: type default to "bootstrap" -/// -/// for example: -/// bootstrap=image.boot -/// image.boot -/// ~/image/image.boot -/// boltdb=/var/db/dict.db (not supported yet) -pub fn parse_chunk_dict_arg(arg: &str) -> Result { - let (file_type, file_path) = match arg.find('=') { - None => ("bootstrap", arg), - Some(idx) => (&arg[0..idx], &arg[idx + 1..]), - }; - - debug!("parse chunk dict argument {}={}", file_type, file_path); - - match file_type { - "bootstrap" => Ok(PathBuf::from(file_path)), - _ => bail!("invalid chunk dict type {}", file_type), - } -} - -#[cfg(test)] -mod tests { - use super::*; - use nydus_rafs::metadata::RafsVersion; - use nydus_utils::{compress, digest}; - use std::path::PathBuf; - - #[test] - fn test_null_dict() { - let mut dict = Box::new(()) as Box; - - let chunk = Arc::new(ChunkWrapper::new(RafsVersion::V5)); - dict.add_chunk(chunk.clone(), digest::Algorithm::Sha256); - assert!(dict.get_chunk(chunk.id(), 0).is_none()); - assert_eq!(dict.get_blobs().len(), 0); - assert_eq!(dict.get_real_blob_idx(5).unwrap(), 5); - } - - #[test] - fn test_chunk_dict() { - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let mut source_path = PathBuf::from(root_dir); - source_path.push("../tests/texture/bootstrap/rafs-v5.boot"); - let path = source_path.to_str().unwrap(); - let rafs_config = RafsSuperConfig { - version: RafsVersion::V5, - compressor: compress::Algorithm::Lz4Block, - digester: digest::Algorithm::Blake3, - chunk_size: 0x100000, - batch_size: 0, - explicit_uidgid: true, - is_tarfs_mode: false, - }; - let dict = - HashChunkDict::from_commandline_arg(path, Arc::new(ConfigV2::default()), &rafs_config) - .unwrap(); - - assert!(dict.get_chunk(&RafsDigest::default(), 0).is_none()); - assert_eq!(dict.get_blobs().len(), 18); - dict.set_real_blob_idx(0, 10); - assert_eq!(dict.get_real_blob_idx(0), Some(10)); - assert_eq!(dict.get_real_blob_idx(1), None); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::collections::{BTreeMap, HashMap}; +use std::mem::size_of; +use std::path::{Path, PathBuf}; +use std::sync::atomic::{AtomicU32, Ordering}; +use std::sync::{Arc, Mutex}; + +use anyhow::{bail, Context, Result}; +use nydus_api::ConfigV2; +use nydus_rafs::metadata::chunk::ChunkWrapper; +use nydus_rafs::metadata::layout::v5::RafsV5ChunkInfo; +use nydus_rafs::metadata::{RafsSuper, RafsSuperConfig}; +use nydus_storage::device::BlobInfo; +use nydus_utils::digest::{self, RafsDigest}; + +use crate::Tree; + +#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd)] +pub struct DigestWithBlobIndex(pub RafsDigest, pub u32); + +/// Trait to manage chunk cache for chunk deduplication. +pub trait ChunkDict: Sync + Send + 'static { + /// Add a chunk into the cache. + fn add_chunk(&mut self, chunk: Arc, digester: digest::Algorithm); + + /// Get a cached chunk from the cache. + fn get_chunk(&self, digest: &RafsDigest, uncompressed_size: u32) -> Option<&Arc>; + + /// Get all `BlobInfo` objects referenced by cached chunks. + fn get_blobs(&self) -> Vec>; + + /// Get the `BlobInfo` object with inner index `idx`. + fn get_blob_by_inner_idx(&self, idx: u32) -> Option<&Arc>; + + /// Associate an external index with the inner index. + fn set_real_blob_idx(&self, inner_idx: u32, out_idx: u32); + + /// Get the external index associated with an inner index. + fn get_real_blob_idx(&self, inner_idx: u32) -> Option; + + /// Get the digest algorithm used to generate chunk digest. + fn digester(&self) -> digest::Algorithm; +} + +impl ChunkDict for () { + fn add_chunk(&mut self, _chunk: Arc, _digester: digest::Algorithm) {} + + fn get_chunk( + &self, + _digest: &RafsDigest, + _uncompressed_size: u32, + ) -> Option<&Arc> { + None + } + + fn get_blobs(&self) -> Vec> { + Vec::new() + } + + fn get_blob_by_inner_idx(&self, _idx: u32) -> Option<&Arc> { + None + } + + fn set_real_blob_idx(&self, _inner_idx: u32, _out_idx: u32) { + panic!("()::set_real_blob_idx() should not be invoked"); + } + + fn get_real_blob_idx(&self, inner_idx: u32) -> Option { + Some(inner_idx) + } + + fn digester(&self) -> digest::Algorithm { + digest::Algorithm::Sha256 + } +} + +/// An implementation of [ChunkDict] based on [HashMap]. +pub struct HashChunkDict { + m: HashMap, AtomicU32)>, + blobs: Vec>, + blob_idx_m: Mutex>, + digester: digest::Algorithm, +} + +impl ChunkDict for HashChunkDict { + fn add_chunk(&mut self, chunk: Arc, digester: digest::Algorithm) { + if self.digester == digester { + if let Some(e) = self.m.get(chunk.id()) { + e.1.fetch_add(1, Ordering::AcqRel); + } else { + self.m + .insert(chunk.id().to_owned(), (chunk, AtomicU32::new(1))); + } + } + } + + fn get_chunk(&self, digest: &RafsDigest, uncompressed_size: u32) -> Option<&Arc> { + if let Some((chunk, _)) = self.m.get(digest) { + if chunk.uncompressed_size() == 0 || chunk.uncompressed_size() == uncompressed_size { + return Some(chunk); + } + } + None + } + + fn get_blobs(&self) -> Vec> { + self.blobs.clone() + } + + fn get_blob_by_inner_idx(&self, idx: u32) -> Option<&Arc> { + self.blobs.get(idx as usize) + } + + fn set_real_blob_idx(&self, inner_idx: u32, out_idx: u32) { + self.blob_idx_m.lock().unwrap().insert(inner_idx, out_idx); + } + + fn get_real_blob_idx(&self, inner_idx: u32) -> Option { + self.blob_idx_m.lock().unwrap().get(&inner_idx).copied() + } + + fn digester(&self) -> digest::Algorithm { + self.digester + } +} + +impl HashChunkDict { + /// Create a new instance of [HashChunkDict]. + pub fn new(digester: digest::Algorithm) -> Self { + HashChunkDict { + m: Default::default(), + blobs: vec![], + blob_idx_m: Mutex::new(Default::default()), + digester, + } + } + + /// Get an immutable reference to the internal `HashMap`. + pub fn hashmap(&self) -> &HashMap, AtomicU32)> { + &self.m + } + + /// Parse commandline argument for chunk dictionary and load chunks into the dictionary. + pub fn from_commandline_arg( + arg: &str, + config: Arc, + rafs_config: &RafsSuperConfig, + ) -> Result> { + let file_path = parse_chunk_dict_arg(arg)?; + HashChunkDict::from_bootstrap_file(&file_path, config, rafs_config) + .map(|d| Arc::new(d) as Arc) + } + + /// Load chunks from the RAFS filesystem into the chunk dictionary. + pub fn from_bootstrap_file( + path: &Path, + config: Arc, + rafs_config: &RafsSuperConfig, + ) -> Result { + let (rs, _) = RafsSuper::load_from_file(path, config, true) + .with_context(|| format!("failed to open bootstrap file {:?}", path))?; + let mut d = HashChunkDict { + m: HashMap::new(), + blobs: rs.superblock.get_blob_infos(), + blob_idx_m: Mutex::new(BTreeMap::new()), + digester: rafs_config.digester, + }; + + rafs_config.check_compatibility(&rs.meta)?; + if rs.meta.is_v5() || rs.meta.has_inlined_chunk_digest() { + Tree::from_bootstrap(&rs, &mut d).context("failed to build tree from bootstrap")?; + } else if rs.meta.is_v6() { + d.load_chunk_table(&rs) + .context("failed to load chunk table")?; + } else { + unimplemented!() + } + + Ok(d) + } + + fn load_chunk_table(&mut self, rs: &RafsSuper) -> Result<()> { + let size = rs.meta.chunk_table_size as usize; + if size == 0 || self.digester != rs.meta.get_digester() { + return Ok(()); + } + + let unit_size = size_of::(); + if size % unit_size != 0 { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)).with_context(|| { + format!( + "load_chunk_table: invalid rafs v6 chunk table size {}", + size + ) + }); + } + + for idx in 0..(size / unit_size) { + let chunk = rs.superblock.get_chunk_info(idx)?; + let chunk_info = Arc::new(ChunkWrapper::from_chunk_info(chunk)); + self.add_chunk(chunk_info, self.digester); + } + + Ok(()) + } +} + +/// Parse a chunk dictionary argument string. +/// +/// # Argument +/// `arg` may be in inform of: +/// - type=path: type of external source and corresponding path +/// - path: type default to "bootstrap" +/// +/// for example: +/// bootstrap=image.boot +/// image.boot +/// ~/image/image.boot +/// boltdb=/var/db/dict.db (not supported yet) +pub fn parse_chunk_dict_arg(arg: &str) -> Result { + let (file_type, file_path) = match arg.find('=') { + None => ("bootstrap", arg), + Some(idx) => (&arg[0..idx], &arg[idx + 1..]), + }; + + debug!("parse chunk dict argument {}={}", file_type, file_path); + + match file_type { + "bootstrap" => Ok(PathBuf::from(file_path)), + _ => bail!("invalid chunk dict type {}", file_type), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use nydus_rafs::metadata::RafsVersion; + use nydus_utils::{compress, digest}; + use std::path::PathBuf; + + #[test] + fn test_null_dict() { + let mut dict = Box::new(()) as Box; + + let chunk = Arc::new(ChunkWrapper::new(RafsVersion::V5)); + dict.add_chunk(chunk.clone(), digest::Algorithm::Sha256); + assert!(dict.get_chunk(chunk.id(), 0).is_none()); + assert_eq!(dict.get_blobs().len(), 0); + assert_eq!(dict.get_real_blob_idx(5).unwrap(), 5); + } + + #[test] + fn test_chunk_dict() { + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let mut source_path = PathBuf::from(root_dir); + source_path.push("../tests/texture/bootstrap/rafs-v5.boot"); + let path = source_path.to_str().unwrap(); + let rafs_config = RafsSuperConfig { + version: RafsVersion::V5, + compressor: compress::Algorithm::Lz4Block, + digester: digest::Algorithm::Blake3, + chunk_size: 0x100000, + batch_size: 0, + explicit_uidgid: true, + is_tarfs_mode: false, + }; + let dict = + HashChunkDict::from_commandline_arg(path, Arc::new(ConfigV2::default()), &rafs_config) + .unwrap(); + + assert!(dict.get_chunk(&RafsDigest::default(), 0).is_none()); + assert_eq!(dict.get_blobs().len(), 18); + dict.set_real_blob_idx(0, 10); + assert_eq!(dict.get_real_blob_idx(0), Some(10)); + assert_eq!(dict.get_real_blob_idx(1), None); + } +} diff --git a/builder/src/core/context.rs b/builder/src/core/context.rs index eb7a77728c8..dded3590197 100644 --- a/builder/src/core/context.rs +++ b/builder/src/core/context.rs @@ -1,1575 +1,1575 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Struct to maintain context information for the image builder. - -use std::any::Any; -use std::borrow::Cow; -use std::collections::{HashMap, VecDeque}; -use std::convert::TryFrom; -use std::fs::{remove_file, rename, File, OpenOptions}; -use std::io::{BufWriter, Cursor, Read, Seek, Write}; -use std::mem::size_of; -use std::os::unix::fs::FileTypeExt; -use std::path::{Display, Path, PathBuf}; -use std::str::FromStr; -use std::sync::{Arc, Mutex}; -use std::{fmt, fs}; - -use anyhow::{anyhow, Context, Error, Result}; -use nydus_utils::crypt::{self, Cipher, CipherContext}; -use sha2::{Digest, Sha256}; -use tar::{EntryType, Header}; -use vmm_sys_util::tempfile::TempFile; - -use nydus_api::ConfigV2; -use nydus_rafs::metadata::chunk::ChunkWrapper; -use nydus_rafs::metadata::layout::v5::RafsV5BlobTable; -use nydus_rafs::metadata::layout::v6::{ - RafsV6BlobTable, EROFS_BLOCK_SIZE_4096, EROFS_INODE_SLOT_SIZE, -}; -use nydus_rafs::metadata::layout::RafsBlobTable; -use nydus_rafs::metadata::{Inode, RAFS_DEFAULT_CHUNK_SIZE}; -use nydus_rafs::metadata::{RafsSuperFlags, RafsVersion}; -use nydus_rafs::RafsIoWrite; -use nydus_storage::device::{BlobFeatures, BlobInfo}; -use nydus_storage::factory::BlobFactory; -use nydus_storage::meta::toc::{TocEntryList, TocLocation}; -use nydus_storage::meta::{ - toc, BatchContextGenerator, BlobChunkInfoV2Ondisk, BlobCompressionContextHeader, - BlobMetaChunkArray, BlobMetaChunkInfo, ZranContextGenerator, -}; -use nydus_utils::digest::DigestData; -use nydus_utils::{compress, digest, div_round_up, round_down, try_round_up_4k, BufReaderInfo}; - -use super::node::ChunkSource; -use crate::core::tree::TreeNode; -use crate::{ChunkDict, Feature, Features, HashChunkDict, Prefetch, PrefetchPolicy, WhiteoutSpec}; - -// TODO: select BufWriter capacity by performance testing. -pub const BUF_WRITER_CAPACITY: usize = 2 << 17; - -/// Filesystem conversion type supported by RAFS builder. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub enum ConversionType { - DirectoryToRafs, - DirectoryToStargz, - DirectoryToTargz, - EStargzToRafs, - EStargzToRef, - EStargzIndexToRef, - TargzToRafs, - TargzToStargz, - TargzToRef, - TarToStargz, - TarToRafs, - TarToRef, - TarToTarfs, -} - -impl Default for ConversionType { - fn default() -> Self { - Self::DirectoryToRafs - } -} - -impl FromStr for ConversionType { - type Err = Error; - fn from_str(s: &str) -> Result { - match s { - "dir-rafs" => Ok(Self::DirectoryToRafs), - "dir-stargz" => Ok(Self::DirectoryToStargz), - "dir-targz" => Ok(Self::DirectoryToTargz), - "estargz-rafs" => Ok(Self::EStargzToRafs), - "estargz-ref" => Ok(Self::EStargzToRef), - "estargztoc-ref" => Ok(Self::EStargzIndexToRef), - "targz-rafs" => Ok(Self::TargzToRafs), - "targz-stargz" => Ok(Self::TargzToStargz), - "targz-ref" => Ok(Self::TargzToRef), - "tar-rafs" => Ok(Self::TarToRafs), - "tar-stargz" => Ok(Self::TarToStargz), - "tar-tarfs" => Ok(Self::TarToTarfs), - // kept for backward compatibility - "directory" => Ok(Self::DirectoryToRafs), - "stargz_index" => Ok(Self::EStargzIndexToRef), - _ => Err(anyhow!("invalid conversion type")), - } - } -} - -impl fmt::Display for ConversionType { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - ConversionType::DirectoryToRafs => write!(f, "dir-rafs"), - ConversionType::DirectoryToStargz => write!(f, "dir-stargz"), - ConversionType::DirectoryToTargz => write!(f, "dir-targz"), - ConversionType::EStargzToRafs => write!(f, "estargz-rafs"), - ConversionType::EStargzToRef => write!(f, "estargz-ref"), - ConversionType::EStargzIndexToRef => write!(f, "estargztoc-ref"), - ConversionType::TargzToRafs => write!(f, "targz-rafs"), - ConversionType::TargzToStargz => write!(f, "targz-ref"), - ConversionType::TargzToRef => write!(f, "targz-ref"), - ConversionType::TarToRafs => write!(f, "tar-rafs"), - ConversionType::TarToRef => write!(f, "tar-ref"), - ConversionType::TarToStargz => write!(f, "tar-stargz"), - ConversionType::TarToTarfs => write!(f, "tar-tarfs"), - } - } -} - -impl ConversionType { - /// Check whether the generated image references the original OCI image data. - pub fn is_to_ref(&self) -> bool { - matches!( - self, - ConversionType::EStargzToRef - | ConversionType::EStargzIndexToRef - | ConversionType::TargzToRef - | ConversionType::TarToRef - | ConversionType::TarToTarfs - ) - } -} - -/// Filesystem based storage configuration for artifacts. -#[derive(Debug, Clone)] -pub enum ArtifactStorage { - // Won't rename user's specification - SingleFile(PathBuf), - // Will rename it from tmp file as user didn't specify a name. - FileDir(PathBuf), -} - -impl ArtifactStorage { - /// Show file path to store the generated artifacts. - pub fn display(&self) -> Display { - match self { - ArtifactStorage::SingleFile(p) => p.display(), - ArtifactStorage::FileDir(p) => p.display(), - } - } -} - -impl Default for ArtifactStorage { - fn default() -> Self { - Self::SingleFile(PathBuf::new()) - } -} - -/// ArtifactMemoryWriter provides a writer to allow writing bootstrap -/// data to a byte slice in memory. -struct ArtifactMemoryWriter(Cursor>); - -impl Default for ArtifactMemoryWriter { - fn default() -> Self { - Self(Cursor::new(Vec::new())) - } -} - -impl RafsIoWrite for ArtifactMemoryWriter { - fn as_any(&self) -> &dyn Any { - &self.0 - } - - fn as_bytes(&mut self) -> std::io::Result> { - self.0.set_position(0); - Ok(Cow::Borrowed(self.0.get_ref().as_slice())) - } -} - -impl Seek for ArtifactMemoryWriter { - fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result { - self.0.seek(pos) - } -} - -impl Write for ArtifactMemoryWriter { - fn write(&mut self, bytes: &[u8]) -> std::io::Result { - self.0.write(bytes) - } - - fn flush(&mut self) -> std::io::Result<()> { - self.0.flush() - } -} - -struct ArtifactFileWriter(pub ArtifactWriter); - -impl ArtifactFileWriter { - pub fn finalize(&mut self, name: Option) -> Result<()> { - self.0.finalize(name) - } -} - -impl RafsIoWrite for ArtifactFileWriter { - fn as_any(&self) -> &dyn Any { - &self.0 - } - - fn finalize(&mut self, name: Option) -> Result<()> { - self.0.finalize(name) - } - - fn as_bytes(&mut self) -> std::io::Result> { - self.0.file.flush()?; - self.0.reader.seek_offset(0)?; - - let mut buf = Vec::new(); - self.0.reader.read_to_end(&mut buf)?; - - Ok(Cow::Owned(buf)) - } -} - -impl ArtifactFileWriter { - pub fn set_len(&mut self, s: u64) -> std::io::Result<()> { - self.0.file.get_mut().set_len(s) - } -} - -impl Seek for ArtifactFileWriter { - fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result { - self.0.file.seek(pos) - } -} - -impl Write for ArtifactFileWriter { - fn write(&mut self, bytes: &[u8]) -> std::io::Result { - self.0.write(bytes) - } - - fn flush(&mut self) -> std::io::Result<()> { - self.0.flush() - } -} - -pub trait Artifact: Write { - fn pos(&self) -> Result; - fn finalize(&mut self, name: Option) -> Result<()>; -} - -#[derive(Default)] -pub struct NoopArtifactWriter { - pos: usize, -} - -impl Write for NoopArtifactWriter { - fn write(&mut self, buf: &[u8]) -> std::io::Result { - self.pos += buf.len(); - Ok(buf.len()) - } - - fn flush(&mut self) -> std::io::Result<()> { - Ok(()) - } -} - -impl Artifact for NoopArtifactWriter { - fn pos(&self) -> Result { - Ok(self.pos as u64) - } - - fn finalize(&mut self, _name: Option) -> Result<()> { - Ok(()) - } -} - -/// ArtifactWriter provides a writer to allow writing bootstrap -/// or blob data to a single file or in a directory. -pub struct ArtifactWriter { - pos: usize, - file: BufWriter, - reader: File, - storage: ArtifactStorage, - // Keep this because tmp file will be removed automatically when it is dropped. - // But we will rename/link the tmp file before it is removed. - tmp_file: Option, -} - -impl Write for ArtifactWriter { - fn write(&mut self, bytes: &[u8]) -> std::io::Result { - let n = self.file.write(bytes)?; - self.pos += n; - Ok(n) - } - - fn flush(&mut self) -> std::io::Result<()> { - self.file.flush() - } -} - -impl ArtifactWriter { - /// Create a new instance of [ArtifactWriter] from a [ArtifactStorage] configuration object. - pub fn new(storage: ArtifactStorage) -> Result { - match storage { - ArtifactStorage::SingleFile(ref p) => { - let mut opener = &mut OpenOptions::new(); - opener = opener.write(true).create(true); - if let Ok(md) = fs::metadata(p) { - let ty = md.file_type(); - // Make it as the writer side of FIFO file, no truncate flag because it has - // been created by the reader side. - if !ty.is_fifo() { - opener = opener.truncate(true); - } - } - let b = BufWriter::with_capacity( - BUF_WRITER_CAPACITY, - opener - .open(p) - .with_context(|| format!("failed to open file {}", p.display()))?, - ); - let reader = OpenOptions::new() - .read(true) - .open(p) - .with_context(|| format!("failed to open file {}", p.display()))?; - Ok(Self { - pos: 0, - file: b, - reader, - storage, - tmp_file: None, - }) - } - ArtifactStorage::FileDir(ref p) => { - // Better we can use open(2) O_TMPFILE, but for compatibility sake, we delay this job. - // TODO: Blob dir existence? - let tmp = TempFile::new_in(p) - .with_context(|| format!("failed to create temp file in {}", p.display()))?; - let tmp2 = tmp.as_file().try_clone()?; - let reader = OpenOptions::new() - .read(true) - .open(tmp.as_path()) - .with_context(|| format!("failed to open file {}", tmp.as_path().display()))?; - Ok(Self { - pos: 0, - file: BufWriter::with_capacity(BUF_WRITER_CAPACITY, tmp2), - reader, - storage, - tmp_file: Some(tmp), - }) - } - } - } -} - -impl Artifact for ArtifactWriter { - /// Get the current write position. - fn pos(&self) -> Result { - Ok(self.pos as u64) - } - - /// Finalize the metadata/data blob. - /// - /// When `name` is None, it means that the blob is empty and should be removed. - fn finalize(&mut self, name: Option) -> Result<()> { - self.file.flush()?; - - if let Some(n) = name { - if let ArtifactStorage::FileDir(s) = &self.storage { - let path = Path::new(s).join(n); - if !path.exists() { - if let Some(tmp_file) = &self.tmp_file { - rename(tmp_file.as_path(), &path).with_context(|| { - format!( - "failed to rename blob {:?} to {:?}", - tmp_file.as_path(), - path - ) - })?; - } - } - } - } else if let ArtifactStorage::SingleFile(s) = &self.storage { - if let Ok(md) = s.metadata() { - if md.is_file() { - remove_file(s).with_context(|| format!("failed to remove blob {:?}", s))?; - } - } - } - - Ok(()) - } -} - -pub struct BlobCacheGenerator { - blob_data: Mutex, - blob_meta: Mutex, -} - -impl BlobCacheGenerator { - pub fn new(storage: ArtifactStorage) -> Result { - Ok(BlobCacheGenerator { - blob_data: Mutex::new(ArtifactFileWriter(ArtifactWriter::new(storage.clone())?)), - blob_meta: Mutex::new(ArtifactFileWriter(ArtifactWriter::new(storage)?)), - }) - } - - pub fn write_blob_meta( - &self, - data: &[u8], - header: &BlobCompressionContextHeader, - ) -> Result<()> { - let mut guard = self.blob_meta.lock().unwrap(); - let aligned_uncompressed_size = try_round_up_4k(data.len() as u64).ok_or(anyhow!( - format!("invalid input {} for try_round_up_4k", data.len()) - ))?; - guard.set_len( - aligned_uncompressed_size + size_of::() as u64, - )?; - guard - .write_all(data) - .context("failed to write blob meta data")?; - guard.seek(std::io::SeekFrom::Start(aligned_uncompressed_size))?; - guard - .write_all(header.as_bytes()) - .context("failed to write blob meta header")?; - Ok(()) - } - - pub fn write_blob_data( - &self, - chunk_data: &[u8], - chunk_info: &ChunkWrapper, - aligned_d_size: u32, - ) -> Result<()> { - let mut guard = self.blob_data.lock().unwrap(); - let curr_pos = guard.seek(std::io::SeekFrom::End(0))?; - if curr_pos < chunk_info.uncompressed_offset() + aligned_d_size as u64 { - guard.set_len(chunk_info.uncompressed_offset() + aligned_d_size as u64)?; - } - - guard.seek(std::io::SeekFrom::Start(chunk_info.uncompressed_offset()))?; - guard - .write_all(&chunk_data) - .context("failed to write blob cache")?; - Ok(()) - } - - pub fn finalize(&self, name: &str) -> Result<()> { - let blob_data_name = format!("{}.blob.data", name); - let mut guard = self.blob_data.lock().unwrap(); - guard.finalize(Some(blob_data_name))?; - drop(guard); - - let blob_meta_name = format!("{}.blob.meta", name); - let mut guard = self.blob_meta.lock().unwrap(); - guard.finalize(Some(blob_meta_name)) - } -} - -/// BlobContext is used to hold the blob information of a layer during build. -pub struct BlobContext { - /// Blob id (user specified or sha256(blob)). - pub blob_id: String, - pub blob_hash: Sha256, - pub blob_compressor: compress::Algorithm, - pub blob_digester: digest::Algorithm, - pub blob_cipher: crypt::Algorithm, - pub blob_prefetch_size: u64, - /// Whether to generate blob metadata information. - pub blob_meta_info_enabled: bool, - /// Data chunks stored in the data blob, for v6. - pub blob_meta_info: BlobMetaChunkArray, - /// Blob metadata header stored in the data blob, for v6 - pub blob_meta_header: BlobCompressionContextHeader, - /// Blob chunk digest array. - pub blob_chunk_digest: Vec, - - /// Final compressed blob file size. - pub compressed_blob_size: u64, - /// Final expected blob cache file size. - pub uncompressed_blob_size: u64, - - /// Current blob offset cursor for writing to disk file. - pub current_compressed_offset: u64, - pub current_uncompressed_offset: u64, - - /// The number of counts in a blob by the index of blob table. - pub chunk_count: u32, - /// Chunk slice size. - pub chunk_size: u32, - /// Whether the blob is from chunk dict. - pub chunk_source: ChunkSource, - - // SHA256 digest of blob ToC content, including the toc tar header. - // It's all zero for blobs with inlined-meta. - pub blob_toc_digest: [u8; 32], - // SHA256 digest of RAFS blob for ZRAN, containing `blob.meta`, `blob.digest` `blob.toc` and - // optionally 'image.boot`. It's all zero for ZRAN blobs with inlined-meta, so need special - // handling. - pub blob_meta_digest: [u8; 32], - // Size of RAFS blob for ZRAN. It's zero ZRAN blobs with inlined-meta. - pub blob_meta_size: u64, - // Size of blob ToC content, it's zero for blobs with inlined-meta. - pub blob_toc_size: u32, - - pub entry_list: toc::TocEntryList, - /// Cipher to encrypt the RAFS blobs. - pub cipher_object: Arc, - pub cipher_ctx: Option, -} - -impl BlobContext { - /// Create a new instance of [BlobContext]. - #[allow(clippy::too_many_arguments)] - pub fn new( - blob_id: String, - blob_offset: u64, - features: BlobFeatures, - compressor: compress::Algorithm, - digester: digest::Algorithm, - cipher: crypt::Algorithm, - cipher_object: Arc, - cipher_ctx: Option, - ) -> Self { - let blob_meta_info = if features.contains(BlobFeatures::CHUNK_INFO_V2) { - BlobMetaChunkArray::new_v2() - } else { - BlobMetaChunkArray::new_v1() - }; - let mut blob_ctx = Self { - blob_id, - blob_hash: Sha256::new(), - blob_compressor: compressor, - blob_digester: digester, - blob_cipher: cipher, - blob_prefetch_size: 0, - blob_meta_info_enabled: false, - blob_meta_info, - blob_meta_header: BlobCompressionContextHeader::default(), - blob_chunk_digest: Vec::new(), - - compressed_blob_size: 0, - uncompressed_blob_size: 0, - - current_compressed_offset: blob_offset, - current_uncompressed_offset: 0, - - chunk_count: 0, - chunk_size: RAFS_DEFAULT_CHUNK_SIZE as u32, - chunk_source: ChunkSource::Build, - - blob_toc_digest: [0u8; 32], - blob_meta_digest: [0u8; 32], - blob_meta_size: 0, - blob_toc_size: 0, - - entry_list: toc::TocEntryList::new(), - cipher_object, - cipher_ctx, - }; - - blob_ctx - .blob_meta_header - .set_aligned(features.contains(BlobFeatures::ALIGNED)); - blob_ctx - .blob_meta_header - .set_inlined_fs_meta(features.contains(BlobFeatures::INLINED_FS_META)); - blob_ctx - .blob_meta_header - .set_chunk_info_v2(features.contains(BlobFeatures::CHUNK_INFO_V2)); - blob_ctx - .blob_meta_header - .set_ci_batch(features.contains(BlobFeatures::BATCH)); - blob_ctx - .blob_meta_header - .set_ci_zran(features.contains(BlobFeatures::ZRAN)); - blob_ctx - .blob_meta_header - .set_separate_blob(features.contains(BlobFeatures::SEPARATE)); - blob_ctx - .blob_meta_header - .set_inlined_chunk_digest(features.contains(BlobFeatures::INLINED_CHUNK_DIGEST)); - blob_ctx - .blob_meta_header - .set_has_tar_header(features.contains(BlobFeatures::HAS_TAR_HEADER)); - blob_ctx - .blob_meta_header - .set_has_toc(features.contains(BlobFeatures::HAS_TOC)); - blob_ctx - .blob_meta_header - .set_cap_tar_toc(features.contains(BlobFeatures::CAP_TAR_TOC)); - blob_ctx - .blob_meta_header - .set_tarfs(features.contains(BlobFeatures::TARFS)); - blob_ctx - .blob_meta_header - .set_encrypted(features.contains(BlobFeatures::ENCRYPTED)); - blob_ctx - .blob_meta_header - .set_is_chunkdict_generated(features.contains(BlobFeatures::IS_CHUNKDICT_GENERATED)); - - blob_ctx - } - - /// Create a new instance of [BlobContext] from `BlobInfo` object. - pub fn from(ctx: &BuildContext, blob: &BlobInfo, chunk_source: ChunkSource) -> Result { - let mut compressed_blob_size = blob.compressed_size(); - let mut blob_meta_size = blob.blob_meta_size(); - let mut toc_size = blob.blob_toc_size(); - let mut blob_meta_digest = blob.blob_meta_digest().to_owned(); - let mut toc_digest = blob.blob_toc_digest().to_owned(); - let mut blob_id = blob.raw_blob_id().to_string(); - let mut features = blob.features(); - - // Fixes up blob info objects from inlined-meta blobs. - if chunk_source == ChunkSource::Dict || chunk_source == ChunkSource::Parent { - if features.contains(BlobFeatures::INLINED_FS_META) { - features &= !BlobFeatures::INLINED_FS_META; - - if !features.contains(BlobFeatures::SEPARATE) { - blob_id = blob.blob_id(); - } - - if ctx.configuration.internal.blob_accessible() { - let backend_config = ctx.configuration.get_backend_config().map_err(|e| { - anyhow!("failed to get backend storage configuration, {}", e) - })?; - let blob_mgr = BlobFactory::new_backend(backend_config, "fix-inlined-meta")?; - - if features.contains(BlobFeatures::SEPARATE) { - if let Ok(digest) = blob.get_blob_meta_id() { - let reader = blob_mgr.get_reader(&digest).map_err(|e| { - anyhow!("failed to get reader for blob {}, {}", digest, e) - })?; - let size = reader - .blob_size() - .map_err(|e| anyhow!("failed to get blob size, {:?}", e))?; - if let Ok(v) = hex::decode(digest) { - if v.len() == 32 { - blob_meta_digest.copy_from_slice(&v[..32]); - blob_meta_size = size; - } - } - if blob.has_feature(BlobFeatures::HAS_TOC) { - if let Ok(toc) = TocEntryList::read_from_blob::( - reader.as_ref(), - None, - &TocLocation::default(), - ) { - toc_digest = toc.toc_digest().data; - toc_size = toc.toc_size(); - } - } - } - } else { - let reader = blob_mgr.get_reader(&blob_id).map_err(|e| { - anyhow!("failed to get reader for blob {}, {}", blob_id, e) - })?; - compressed_blob_size = reader - .blob_size() - .map_err(|e| anyhow!("failed to get blob size, {:?}", e))?; - if blob.has_feature(BlobFeatures::HAS_TOC) { - if let Ok(toc) = TocEntryList::read_from_blob::( - reader.as_ref(), - None, - &TocLocation::default(), - ) { - toc_digest = toc.toc_digest().data; - toc_size = toc.toc_size(); - } - } - } - } else if features.contains(BlobFeatures::SEPARATE) { - if let Ok(digest) = blob.get_blob_meta_id() { - if let Ok(v) = hex::decode(digest) { - if v.len() == 32 { - blob_meta_digest.copy_from_slice(&v[..32]); - } - } - } - } - } else if !blob.has_feature(BlobFeatures::CAP_TAR_TOC) - && !ctx.configuration.internal.blob_accessible() - { - blob_id = blob.blob_id(); - } - } - - let (cipher, cipher_object, cipher_ctx) = blob.get_cipher_info(); - - let mut blob_ctx = Self::new( - blob_id, - 0, - features, - blob.compressor(), - blob.digester(), - cipher, - cipher_object, - cipher_ctx, - ); - blob_ctx.blob_prefetch_size = blob.prefetch_size(); - blob_ctx.chunk_count = blob.chunk_count(); - blob_ctx.uncompressed_blob_size = blob.uncompressed_size(); - blob_ctx.compressed_blob_size = compressed_blob_size; - blob_ctx.chunk_size = blob.chunk_size(); - blob_ctx.chunk_source = chunk_source; - blob_ctx.blob_meta_digest = blob_meta_digest; - blob_ctx.blob_meta_size = blob_meta_size; - blob_ctx.blob_toc_digest = toc_digest; - blob_ctx.blob_toc_size = toc_size; - - if blob.meta_ci_is_valid() { - blob_ctx - .blob_meta_header - .set_ci_compressor(blob.meta_ci_compressor()); - blob_ctx.blob_meta_header.set_ci_entries(blob.chunk_count()); - blob_ctx - .blob_meta_header - .set_ci_compressed_offset(blob.meta_ci_offset()); - blob_ctx - .blob_meta_header - .set_ci_compressed_size(blob.meta_ci_compressed_size()); - blob_ctx - .blob_meta_header - .set_ci_uncompressed_size(blob.meta_ci_uncompressed_size()); - blob_ctx.blob_meta_info_enabled = true; - } - - Ok(blob_ctx) - } - - /// Set chunk size for the blob. - pub fn set_chunk_size(&mut self, chunk_size: u32) { - self.chunk_size = chunk_size; - } - - // TODO: check the logic to reset prefetch size - pub fn set_blob_prefetch_size(&mut self, ctx: &BuildContext) { - if (self.uncompressed_blob_size > 0 - || (ctx.conversion_type == ConversionType::EStargzIndexToRef - && !self.blob_id.is_empty())) - && ctx.prefetch.policy != PrefetchPolicy::Blob - { - self.blob_prefetch_size = 0; - } - } - - pub fn set_meta_info_enabled(&mut self, enable: bool) { - self.blob_meta_info_enabled = enable; - } - - pub fn set_cipher_info( - &mut self, - cipher_object: Arc, - cipher_ctx: Option, - ) { - self.cipher_object = cipher_object; - self.cipher_ctx = cipher_ctx; - } - - pub fn add_chunk_meta_info( - &mut self, - chunk: &ChunkWrapper, - chunk_info: Option, - ) -> Result<()> { - if self.blob_meta_info_enabled { - assert_eq!(chunk.index() as usize, self.blob_meta_info.len()); - match &self.blob_meta_info { - BlobMetaChunkArray::V1(_) => { - self.blob_meta_info.add_v1( - chunk.compressed_offset(), - chunk.compressed_size(), - chunk.uncompressed_offset(), - chunk.uncompressed_size(), - ); - self.blob_chunk_digest.push(chunk.id().data); - } - BlobMetaChunkArray::V2(_) => { - if let Some(mut info) = chunk_info { - info.set_uncompressed_offset(chunk.uncompressed_offset()); - self.blob_meta_info.add_v2_info(info); - } else { - self.blob_meta_info.add_v2( - chunk.compressed_offset(), - chunk.compressed_size(), - chunk.uncompressed_offset(), - chunk.uncompressed_size(), - chunk.is_compressed(), - chunk.is_encrypted(), - chunk.is_batch(), - 0, - ); - } - self.blob_chunk_digest.push(chunk.id().data); - } - } - } - - Ok(()) - } - - /// Allocate a count index sequentially in a blob. - pub fn alloc_chunk_index(&mut self) -> Result { - let index = self.chunk_count; - - // Rafs v6 only supports 24 bit chunk id. - if index >= 0xff_ffff { - Err(Error::msg( - "the number of chunks in blob exceeds the u32 limit", - )) - } else { - self.chunk_count += 1; - Ok(index) - } - } - - /// Get blob id if the blob has some chunks. - pub fn blob_id(&mut self) -> Option { - if self.uncompressed_blob_size > 0 { - Some(self.blob_id.to_string()) - } else { - None - } - } - - /// Helper to write data to blob and update blob hash. - pub fn write_data(&mut self, blob_writer: &mut dyn Artifact, data: &[u8]) -> Result<()> { - blob_writer.write_all(data)?; - self.blob_hash.update(data); - Ok(()) - } - - /// Helper to write a tar header to blob and update blob hash. - pub fn write_tar_header( - &mut self, - blob_writer: &mut dyn Artifact, - name: &str, - size: u64, - ) -> Result
{ - // The `inline-bootstrap` option merges the blob and bootstrap into one - // file. We need some header to index the location of the blob and bootstrap, - // write_tar_header uses tar header that arranges the data as follows: - // data | tar_header | data | tar_header - // This is a tar-like structure, except that we put the tar header after the - // data. The advantage is that we do not need to determine the size of the data - // first, so that we can write the blob data by stream without seek to improve - // the performance of the blob dump by using fifo. - - let mut header = Header::new_gnu(); - header.set_path(Path::new(name))?; - header.set_entry_type(EntryType::Regular); - header.set_size(size); - // The checksum must be set to ensure that the tar reader implementation - // in golang can correctly parse the header. - header.set_cksum(); - - blob_writer.write_all(header.as_bytes())?; - self.blob_hash.update(header.as_bytes()); - Ok(header) - } - - /// Get offset of compressed blob, since current_compressed_offset - /// is always >= compressed_blob_size, we can safely subtract here. - pub fn compressed_offset(&self) -> u64 { - assert!(self.current_compressed_offset >= self.compressed_blob_size); - self.current_compressed_offset - self.compressed_blob_size - } -} - -/// BlobManager stores all blob related information during build. -pub struct BlobManager { - /// Some layers may not have a blob (only have metadata), so Option - /// is used here, the vector index will be as the layer index. - /// - /// We can get blob index for a layer by using: - /// `self.blobs.iter().flatten().collect()[layer_index];` - blobs: Vec, - current_blob_index: Option, - /// Chunk dictionary to hold chunks from an extra chunk dict file. - /// Used for chunk data de-duplication within the whole image. - pub(crate) global_chunk_dict: Arc, - /// Chunk dictionary to hold chunks from all layers. - /// Used for chunk data de-duplication between layers (with `--parent-bootstrap`) - /// or within layer (with `--inline-bootstrap`). - pub(crate) layered_chunk_dict: HashChunkDict, -} - -impl BlobManager { - /// Create a new instance of [BlobManager]. - pub fn new(digester: digest::Algorithm) -> Self { - Self { - blobs: Vec::new(), - current_blob_index: None, - global_chunk_dict: Arc::new(()), - layered_chunk_dict: HashChunkDict::new(digester), - } - } - - fn new_blob_ctx(ctx: &BuildContext) -> Result { - let (cipher_object, cipher_ctx) = match ctx.cipher { - crypt::Algorithm::None => (Default::default(), None), - crypt::Algorithm::Aes128Xts => { - let key = crypt::Cipher::generate_random_key(ctx.cipher)?; - let iv = crypt::Cipher::generate_random_iv()?; - let cipher_ctx = CipherContext::new(key, iv, false, ctx.cipher)?; - ( - ctx.cipher.new_cipher().ok().unwrap_or_default(), - Some(cipher_ctx), - ) - } - _ => { - return Err(anyhow!(format!( - "cipher algorithm {:?} does not support", - ctx.cipher - ))) - } - }; - let mut blob_ctx = BlobContext::new( - ctx.blob_id.clone(), - ctx.blob_offset, - ctx.blob_features, - ctx.compressor, - ctx.digester, - ctx.cipher, - Arc::new(cipher_object), - cipher_ctx, - ); - blob_ctx.set_chunk_size(ctx.chunk_size); - blob_ctx.set_meta_info_enabled( - ctx.fs_version == RafsVersion::V6 && ctx.conversion_type != ConversionType::TarToTarfs, - ); - - Ok(blob_ctx) - } - - /// Get the current blob object or create one if no current blob available. - pub fn get_or_create_current_blob( - &mut self, - ctx: &BuildContext, - ) -> Result<(u32, &mut BlobContext)> { - if self.current_blob_index.is_none() { - let blob_ctx = Self::new_blob_ctx(ctx)?; - self.current_blob_index = Some(self.alloc_index()?); - self.add_blob(blob_ctx); - } - // Safe to unwrap because the blob context has been added. - Ok(self.get_current_blob().unwrap()) - } - - /// Get the current blob object. - pub fn get_current_blob(&mut self) -> Option<(u32, &mut BlobContext)> { - if let Some(idx) = self.current_blob_index { - Some((idx, &mut self.blobs[idx as usize])) - } else { - None - } - } - - /// Get or cerate blob for chunkdict, this is used for chunk deduplication. - pub fn get_or_cerate_blob_for_chunkdict( - &mut self, - ctx: &BuildContext, - id: &str, - ) -> Result<(u32, &mut BlobContext)> { - if self.get_blob_idx_by_id(id).is_none() { - let blob_ctx = Self::new_blob_ctx(ctx)?; - self.current_blob_index = Some(self.alloc_index()?); - self.add_blob(blob_ctx); - } else { - self.current_blob_index = self.get_blob_idx_by_id(id); - } - let (_, blob_ctx) = self.get_current_blob().unwrap(); - if blob_ctx.blob_id.is_empty() { - blob_ctx.blob_id = id.to_string(); - } - // Safe to unwrap because the blob context has been added. - Ok(self.get_current_blob().unwrap()) - } - - /// Determine if the given blob has been created. - pub fn has_blob(&self, blob_id: &str) -> bool { - self.get_blob_idx_by_id(blob_id).is_some() - } - - /// Set the global chunk dictionary for chunk deduplication. - pub fn set_chunk_dict(&mut self, dict: Arc) { - self.global_chunk_dict = dict - } - - /// Get the global chunk dictionary for chunk deduplication. - pub fn get_chunk_dict(&self) -> Arc { - self.global_chunk_dict.clone() - } - - /// Allocate a blob index sequentially. - /// - /// This should be paired with Self::add() and keep in consistence. - pub fn alloc_index(&self) -> Result { - // Rafs v6 only supports 256 blobs. - u8::try_from(self.blobs.len()) - .map(|v| v as u32) - .with_context(|| Error::msg("too many blobs")) - } - - /// Get number of blobs managed by the manager. - pub fn len(&self) -> usize { - self.blobs.len() - } - - /// Check whether there's managed blobs. - pub fn is_empty(&self) -> bool { - self.blobs.is_empty() - } - - /// Add a blob context to manager - /// - /// This should be paired with Self::alloc_index() and keep in consistence. - pub fn add_blob(&mut self, blob_ctx: BlobContext) { - self.blobs.push(blob_ctx); - } - - /// Get all blob contexts (include the blob context that does not have a blob). - pub fn get_blobs(&self) -> Vec<&BlobContext> { - self.blobs.iter().collect() - } - - pub fn get_blob(&self, idx: usize) -> Option<&BlobContext> { - self.blobs.get(idx) - } - - pub fn take_blob(&mut self, idx: usize) -> BlobContext { - self.blobs.remove(idx) - } - - pub fn get_last_blob(&self) -> Option<&BlobContext> { - self.blobs.last() - } - - pub fn get_blob_idx_by_id(&self, id: &str) -> Option { - for (idx, blob) in self.blobs.iter().enumerate() { - if blob.blob_id.eq(id) { - return Some(idx as u32); - } - } - None - } - - pub fn get_blob_ids(&self) -> Vec { - self.blobs.iter().map(|b| b.blob_id.to_owned()).collect() - } - - /// Prepend all blobs from `blob_table` to the blob manager. - pub fn extend_from_blob_table( - &mut self, - ctx: &BuildContext, - blob_table: Vec>, - ) -> Result<()> { - let mut blobs: Vec = Vec::new(); - for blob in blob_table.iter() { - let ctx = BlobContext::from(ctx, blob.as_ref(), ChunkSource::Parent)?; - blobs.push(ctx); - } - if let Some(curr) = self.current_blob_index { - self.current_blob_index = Some(curr + blobs.len() as u32); - blobs.append(&mut self.blobs); - } else { - assert!(self.blobs.is_empty()); - } - self.blobs = blobs; - Ok(()) - } - - /// Import all blobs from the global chunk dictionary for later chunk deduplication. - /// - /// The order to import blobs from parent bootstrap and chunk dictionary is important. - /// All blobs from parent bootstrap must be imported first, otherwise we need to fix blob index - /// of chunks from parent bootstrap. - pub fn extend_from_chunk_dict(&mut self, ctx: &BuildContext) -> Result<()> { - let blobs = self.global_chunk_dict.get_blobs(); - - for blob in blobs.iter() { - if let Some(real_idx) = self.get_blob_idx_by_id(&blob.blob_id()) { - self.global_chunk_dict - .set_real_blob_idx(blob.blob_index(), real_idx); - } else { - let idx = self.alloc_index()?; - let ctx = BlobContext::from(ctx, blob.as_ref(), ChunkSource::Dict)?; - self.add_blob(ctx); - self.global_chunk_dict - .set_real_blob_idx(blob.blob_index(), idx); - } - } - - Ok(()) - } - - /// Generate a [RafsBlobTable] from all blobs managed by the manager. - pub fn to_blob_table(&self, build_ctx: &BuildContext) -> Result { - let mut blob_table = match build_ctx.fs_version { - RafsVersion::V5 => RafsBlobTable::V5(RafsV5BlobTable::new()), - RafsVersion::V6 => RafsBlobTable::V6(RafsV6BlobTable::new()), - }; - - for ctx in &self.blobs { - let blob_id = ctx.blob_id.clone(); - let blob_prefetch_size = u32::try_from(ctx.blob_prefetch_size)?; - let chunk_count = ctx.chunk_count; - let decompressed_blob_size = ctx.uncompressed_blob_size; - let compressed_blob_size = ctx.compressed_blob_size; - let mut flags = RafsSuperFlags::empty(); - match &mut blob_table { - RafsBlobTable::V5(table) => { - let blob_features = BlobFeatures::from_bits(ctx.blob_meta_header.features()) - .ok_or_else(|| anyhow!("invalid blob features"))?; - flags |= RafsSuperFlags::from(ctx.blob_compressor); - flags |= RafsSuperFlags::from(ctx.blob_digester); - table.add( - blob_id, - 0, - blob_prefetch_size, - ctx.chunk_size, - chunk_count, - decompressed_blob_size, - compressed_blob_size, - blob_features, - flags, - build_ctx.is_chunkdict_generated, - ); - } - RafsBlobTable::V6(table) => { - flags |= RafsSuperFlags::from(ctx.blob_compressor); - flags |= RafsSuperFlags::from(ctx.blob_digester); - flags |= RafsSuperFlags::from(ctx.blob_cipher); - table.add( - blob_id, - 0, - blob_prefetch_size, - ctx.chunk_size, - chunk_count, - decompressed_blob_size, - compressed_blob_size, - flags, - ctx.blob_meta_digest, - ctx.blob_toc_digest, - ctx.blob_meta_size, - ctx.blob_toc_size, - build_ctx.is_chunkdict_generated, - ctx.blob_meta_header, - ctx.cipher_object.clone(), - ctx.cipher_ctx.clone(), - ); - } - } - } - - Ok(blob_table) - } -} - -/// BootstrapContext is used to hold in memory data of bootstrap during build. -pub struct BootstrapContext { - /// This build has a parent bootstrap. - pub layered: bool, - /// Cache node index for hardlinks, HashMap<(layer_index, real_inode, dev), Vec>. - pub(crate) inode_map: HashMap<(u16, Inode, u64), Vec>, - /// Current position to write in f_bootstrap - pub(crate) offset: u64, - pub(crate) writer: Box, - /// Not fully used blocks - pub(crate) v6_available_blocks: Vec>, - - next_ino: Inode, -} - -impl BootstrapContext { - /// Create a new instance of [BootstrapContext]. - pub fn new(storage: Option, layered: bool) -> Result { - let writer = if let Some(storage) = storage { - Box::new(ArtifactFileWriter(ArtifactWriter::new(storage)?)) as Box - } else { - Box::::default() as Box - }; - - Ok(Self { - layered, - inode_map: HashMap::new(), - next_ino: 1, - offset: EROFS_BLOCK_SIZE_4096, - writer, - v6_available_blocks: vec![ - VecDeque::new(); - EROFS_BLOCK_SIZE_4096 as usize / EROFS_INODE_SLOT_SIZE - ], - }) - } - - /// Align the write position. - pub fn align_offset(&mut self, align_size: u64) { - if self.offset % align_size > 0 { - self.offset = div_round_up(self.offset, align_size) * align_size; - } - } - - /// Get the next available inode number. - pub(crate) fn get_next_ino(&self) -> Inode { - self.next_ino - } - - /// Generate next inode number. - pub(crate) fn generate_next_ino(&mut self) -> Inode { - let ino = self.next_ino; - self.next_ino += 1; - ino - } - - // Only used to allocate space for metadata(inode / inode + inline data). - // Try to find an used block with no less than `size` space left. - // If found it, return the offset where we can store data. - // If not, return 0. - pub(crate) fn allocate_available_block(&mut self, size: u64, block_size: u64) -> u64 { - if size >= block_size { - return 0; - } - - let min_idx = div_round_up(size, EROFS_INODE_SLOT_SIZE as u64) as usize; - let max_idx = div_round_up(block_size, EROFS_INODE_SLOT_SIZE as u64) as usize; - - for idx in min_idx..max_idx { - let blocks = &mut self.v6_available_blocks[idx]; - if let Some(mut offset) = blocks.pop_front() { - offset += block_size - (idx * EROFS_INODE_SLOT_SIZE) as u64; - self.append_available_block( - offset + (min_idx * EROFS_INODE_SLOT_SIZE) as u64, - block_size, - ); - return offset; - } - } - - 0 - } - - // Append the block that `offset` belongs to corresponding deque. - pub(crate) fn append_available_block(&mut self, offset: u64, block_size: u64) { - if offset % block_size != 0 { - let avail = block_size - offset % block_size; - let idx = avail as usize / EROFS_INODE_SLOT_SIZE; - self.v6_available_blocks[idx].push_back(round_down(offset, block_size)); - } - } -} - -/// BootstrapManager is used to hold the parent bootstrap reader and create new bootstrap context. -pub struct BootstrapManager { - pub(crate) f_parent_path: Option, - pub(crate) bootstrap_storage: Option, -} - -impl BootstrapManager { - /// Create a new instance of [BootstrapManager] - pub fn new(bootstrap_storage: Option, f_parent_path: Option) -> Self { - Self { - f_parent_path: f_parent_path.map(PathBuf::from), - bootstrap_storage, - } - } - - /// Create a new instance of [BootstrapContext] - pub fn create_ctx(&self) -> Result { - BootstrapContext::new(self.bootstrap_storage.clone(), self.f_parent_path.is_some()) - } -} - -pub struct BuildContext { - /// Blob id (user specified or sha256(blob)). - pub blob_id: String, - - /// When filling local blobcache file, chunks are arranged as per the - /// `decompress_offset` within chunk info. Therefore, provide a new flag - /// to image tool thus to align chunks in blob with 4k size. - pub aligned_chunk: bool, - /// Add a offset for compressed blob. - pub blob_offset: u64, - /// Blob chunk compress flag. - pub compressor: compress::Algorithm, - /// Inode and chunk digest algorithm flag. - pub digester: digest::Algorithm, - /// Blob encryption algorithm flag. - pub cipher: crypt::Algorithm, - /// Save host uid gid in each inode. - pub explicit_uidgid: bool, - /// whiteout spec: overlayfs or oci - pub whiteout_spec: WhiteoutSpec, - /// Chunk slice size. - pub chunk_size: u32, - /// Batch chunk data size. - pub batch_size: u32, - /// Version number of output metadata and data blob. - pub fs_version: RafsVersion, - /// Whether any directory/file has extended attributes. - pub has_xattr: bool, - - /// Format conversion type. - pub conversion_type: ConversionType, - /// Path of source to build the image from: - /// - Directory: `source_path` should be a directory path - /// - StargzIndex: `source_path` should be a stargz index json file path - pub source_path: PathBuf, - - /// Track file/chunk prefetch state. - pub prefetch: Prefetch, - - /// Storage writing blob to single file or a directory. - pub blob_storage: Option, - pub blob_zran_generator: Option>>, - pub blob_batch_generator: Option>, - pub blob_tar_reader: Option>, - pub blob_features: BlobFeatures, - pub blob_inline_meta: bool, - - pub features: Features, - pub configuration: Arc, - /// Generate the blob cache and blob meta - pub blob_cache_generator: Option, - - /// Whether is chunkdict. - pub is_chunkdict_generated: bool, -} - -impl BuildContext { - #[allow(clippy::too_many_arguments)] - pub fn new( - blob_id: String, - aligned_chunk: bool, - blob_offset: u64, - compressor: compress::Algorithm, - digester: digest::Algorithm, - explicit_uidgid: bool, - whiteout_spec: WhiteoutSpec, - conversion_type: ConversionType, - source_path: PathBuf, - prefetch: Prefetch, - blob_storage: Option, - blob_inline_meta: bool, - features: Features, - encrypt: bool, - ) -> Self { - // It's a flag for images built with new nydus-image 2.2 and newer. - let mut blob_features = BlobFeatures::CAP_TAR_TOC; - if blob_inline_meta { - blob_features |= BlobFeatures::INLINED_FS_META; - blob_features |= BlobFeatures::HAS_TAR_HEADER; - }; - if features.is_enabled(Feature::BlobToc) { - blob_features |= BlobFeatures::HAS_TOC; - blob_features |= BlobFeatures::HAS_TAR_HEADER; - } - if conversion_type == ConversionType::TarToTarfs { - blob_features |= BlobFeatures::TARFS; - } - - let cipher = if encrypt { - crypt::Algorithm::Aes128Xts - } else { - crypt::Algorithm::None - }; - BuildContext { - blob_id, - aligned_chunk, - blob_offset, - compressor, - digester, - cipher, - explicit_uidgid, - whiteout_spec, - - chunk_size: RAFS_DEFAULT_CHUNK_SIZE as u32, - batch_size: 0, - fs_version: RafsVersion::default(), - - conversion_type, - source_path, - - prefetch, - blob_storage, - blob_zran_generator: None, - blob_batch_generator: None, - blob_tar_reader: None, - blob_features, - blob_inline_meta, - has_xattr: false, - - features, - configuration: Arc::new(ConfigV2::default()), - blob_cache_generator: None, - is_chunkdict_generated: false, - } - } - - pub fn set_fs_version(&mut self, fs_version: RafsVersion) { - self.fs_version = fs_version; - } - - pub fn set_chunk_size(&mut self, chunk_size: u32) { - self.chunk_size = chunk_size; - } - - pub fn set_batch_size(&mut self, batch_size: u32) { - self.batch_size = batch_size; - } - - pub fn set_configuration(&mut self, config: Arc) { - self.configuration = config; - } - - pub fn set_is_chunkdict(&mut self, is_chunkdict: bool) { - self.is_chunkdict_generated = is_chunkdict; - } -} - -impl Default for BuildContext { - fn default() -> Self { - Self { - blob_id: String::new(), - aligned_chunk: false, - blob_offset: 0, - compressor: compress::Algorithm::default(), - digester: digest::Algorithm::default(), - cipher: crypt::Algorithm::None, - explicit_uidgid: true, - whiteout_spec: WhiteoutSpec::default(), - - chunk_size: RAFS_DEFAULT_CHUNK_SIZE as u32, - batch_size: 0, - fs_version: RafsVersion::default(), - - conversion_type: ConversionType::default(), - source_path: PathBuf::new(), - - prefetch: Prefetch::default(), - blob_storage: None, - blob_zran_generator: None, - blob_batch_generator: None, - blob_tar_reader: None, - blob_features: BlobFeatures::empty(), - has_xattr: true, - blob_inline_meta: false, - features: Features::new(), - configuration: Arc::new(ConfigV2::default()), - blob_cache_generator: None, - is_chunkdict_generated: false, - } - } -} - -/// BuildOutput represents the output in this build. -#[derive(Default, Debug, Clone)] -pub struct BuildOutput { - /// Blob ids in the blob table of bootstrap. - pub blobs: Vec, - /// The size of output blob in this build. - pub blob_size: Option, - /// File path for the metadata blob. - pub bootstrap_path: Option, -} - -impl fmt::Display for BuildOutput { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - writeln!( - f, - "meta blob path: {}", - self.bootstrap_path.as_deref().unwrap_or("") - )?; - writeln!( - f, - "data blob size: 0x{:x}", - self.blob_size.unwrap_or_default() - )?; - write!(f, "data blobs: {:?}", self.blobs)?; - Ok(()) - } -} - -impl BuildOutput { - /// Create a new instance of [BuildOutput]. - pub fn new( - blob_mgr: &BlobManager, - bootstrap_storage: &Option, - ) -> Result { - let blobs = blob_mgr.get_blob_ids(); - let blob_size = blob_mgr.get_last_blob().map(|b| b.compressed_blob_size); - let bootstrap_path = if let Some(ArtifactStorage::SingleFile(p)) = bootstrap_storage { - Some(p.display().to_string()) - } else { - None - }; - - Ok(Self { - blobs, - blob_size, - bootstrap_path, - }) - } -} - -#[cfg(test)] -mod tests { - use std::sync::atomic::AtomicBool; - - use nydus_api::{BackendConfigV2, ConfigV2Internal, LocalFsConfig}; - - use super::*; - - #[test] - fn test_blob_context_from() { - let mut blob = BlobInfo::new( - 1, - "blob_id".to_string(), - 16, - 8, - 4, - 2, - BlobFeatures::INLINED_FS_META | BlobFeatures::SEPARATE | BlobFeatures::HAS_TOC, - ); - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let mut source_path = PathBuf::from(root_dir); - source_path.push("../tests/texture/blobs/be7d77eeb719f70884758d1aa800ed0fb09d701aaec469964e9d54325f0d5fef"); - assert!(blob - .set_blob_id_from_meta_path(source_path.as_path()) - .is_ok()); - blob.set_blob_meta_size(2); - blob.set_blob_toc_size(2); - blob.set_blob_meta_digest([32u8; 32]); - blob.set_blob_toc_digest([64u8; 32]); - blob.set_blob_meta_info(1, 2, 4, 8); - - let mut ctx = BuildContext::default(); - ctx.configuration.internal.set_blob_accessible(true); - let config = ConfigV2 { - version: 2, - backend: Some(BackendConfigV2 { - backend_type: "localfs".to_owned(), - localdisk: None, - localfs: Some(LocalFsConfig { - blob_file: source_path.to_str().unwrap().to_owned(), - dir: "/tmp".to_owned(), - alt_dirs: vec!["/var/nydus/cache".to_owned()], - }), - oss: None, - s3: None, - registry: None, - http_proxy: None, - }), - id: "id".to_owned(), - cache: None, - rafs: None, - overlay: None, - internal: ConfigV2Internal { - blob_accessible: Arc::new(AtomicBool::new(true)), - }, - }; - ctx.set_configuration(config.into()); - - let chunk_source = ChunkSource::Dict; - - let blob_ctx = BlobContext::from(&ctx, &blob, chunk_source); - - assert!(blob_ctx.is_ok()); - let blob_ctx = blob_ctx.unwrap(); - assert_eq!(blob_ctx.uncompressed_blob_size, 16); - assert!(blob_ctx.blob_meta_info_enabled); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Struct to maintain context information for the image builder. + +use std::any::Any; +use std::borrow::Cow; +use std::collections::{HashMap, VecDeque}; +use std::convert::TryFrom; +use std::fs::{remove_file, rename, File, OpenOptions}; +use std::io::{BufWriter, Cursor, Read, Seek, Write}; +use std::mem::size_of; +use std::os::unix::fs::FileTypeExt; +use std::path::{Display, Path, PathBuf}; +use std::str::FromStr; +use std::sync::{Arc, Mutex}; +use std::{fmt, fs}; + +use anyhow::{anyhow, Context, Error, Result}; +use nydus_utils::crypt::{self, Cipher, CipherContext}; +use sha2::{Digest, Sha256}; +use tar::{EntryType, Header}; +use vmm_sys_util::tempfile::TempFile; + +use nydus_api::ConfigV2; +use nydus_rafs::metadata::chunk::ChunkWrapper; +use nydus_rafs::metadata::layout::v5::RafsV5BlobTable; +use nydus_rafs::metadata::layout::v6::{ + RafsV6BlobTable, EROFS_BLOCK_SIZE_4096, EROFS_INODE_SLOT_SIZE, +}; +use nydus_rafs::metadata::layout::RafsBlobTable; +use nydus_rafs::metadata::{Inode, RAFS_DEFAULT_CHUNK_SIZE}; +use nydus_rafs::metadata::{RafsSuperFlags, RafsVersion}; +use nydus_rafs::RafsIoWrite; +use nydus_storage::device::{BlobFeatures, BlobInfo}; +use nydus_storage::factory::BlobFactory; +use nydus_storage::meta::toc::{TocEntryList, TocLocation}; +use nydus_storage::meta::{ + toc, BatchContextGenerator, BlobChunkInfoV2Ondisk, BlobCompressionContextHeader, + BlobMetaChunkArray, BlobMetaChunkInfo, ZranContextGenerator, +}; +use nydus_utils::digest::DigestData; +use nydus_utils::{compress, digest, div_round_up, round_down, try_round_up_4k, BufReaderInfo}; + +use super::node::ChunkSource; +use crate::core::tree::TreeNode; +use crate::{ChunkDict, Feature, Features, HashChunkDict, Prefetch, PrefetchPolicy, WhiteoutSpec}; + +// TODO: select BufWriter capacity by performance testing. +pub const BUF_WRITER_CAPACITY: usize = 2 << 17; + +/// Filesystem conversion type supported by RAFS builder. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum ConversionType { + DirectoryToRafs, + DirectoryToStargz, + DirectoryToTargz, + EStargzToRafs, + EStargzToRef, + EStargzIndexToRef, + TargzToRafs, + TargzToStargz, + TargzToRef, + TarToStargz, + TarToRafs, + TarToRef, + TarToTarfs, +} + +impl Default for ConversionType { + fn default() -> Self { + Self::DirectoryToRafs + } +} + +impl FromStr for ConversionType { + type Err = Error; + fn from_str(s: &str) -> Result { + match s { + "dir-rafs" => Ok(Self::DirectoryToRafs), + "dir-stargz" => Ok(Self::DirectoryToStargz), + "dir-targz" => Ok(Self::DirectoryToTargz), + "estargz-rafs" => Ok(Self::EStargzToRafs), + "estargz-ref" => Ok(Self::EStargzToRef), + "estargztoc-ref" => Ok(Self::EStargzIndexToRef), + "targz-rafs" => Ok(Self::TargzToRafs), + "targz-stargz" => Ok(Self::TargzToStargz), + "targz-ref" => Ok(Self::TargzToRef), + "tar-rafs" => Ok(Self::TarToRafs), + "tar-stargz" => Ok(Self::TarToStargz), + "tar-tarfs" => Ok(Self::TarToTarfs), + // kept for backward compatibility + "directory" => Ok(Self::DirectoryToRafs), + "stargz_index" => Ok(Self::EStargzIndexToRef), + _ => Err(anyhow!("invalid conversion type")), + } + } +} + +impl fmt::Display for ConversionType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ConversionType::DirectoryToRafs => write!(f, "dir-rafs"), + ConversionType::DirectoryToStargz => write!(f, "dir-stargz"), + ConversionType::DirectoryToTargz => write!(f, "dir-targz"), + ConversionType::EStargzToRafs => write!(f, "estargz-rafs"), + ConversionType::EStargzToRef => write!(f, "estargz-ref"), + ConversionType::EStargzIndexToRef => write!(f, "estargztoc-ref"), + ConversionType::TargzToRafs => write!(f, "targz-rafs"), + ConversionType::TargzToStargz => write!(f, "targz-ref"), + ConversionType::TargzToRef => write!(f, "targz-ref"), + ConversionType::TarToRafs => write!(f, "tar-rafs"), + ConversionType::TarToRef => write!(f, "tar-ref"), + ConversionType::TarToStargz => write!(f, "tar-stargz"), + ConversionType::TarToTarfs => write!(f, "tar-tarfs"), + } + } +} + +impl ConversionType { + /// Check whether the generated image references the original OCI image data. + pub fn is_to_ref(&self) -> bool { + matches!( + self, + ConversionType::EStargzToRef + | ConversionType::EStargzIndexToRef + | ConversionType::TargzToRef + | ConversionType::TarToRef + | ConversionType::TarToTarfs + ) + } +} + +/// Filesystem based storage configuration for artifacts. +#[derive(Debug, Clone)] +pub enum ArtifactStorage { + // Won't rename user's specification + SingleFile(PathBuf), + // Will rename it from tmp file as user didn't specify a name. + FileDir(PathBuf), +} + +impl ArtifactStorage { + /// Show file path to store the generated artifacts. + pub fn display(&self) -> Display { + match self { + ArtifactStorage::SingleFile(p) => p.display(), + ArtifactStorage::FileDir(p) => p.display(), + } + } +} + +impl Default for ArtifactStorage { + fn default() -> Self { + Self::SingleFile(PathBuf::new()) + } +} + +/// ArtifactMemoryWriter provides a writer to allow writing bootstrap +/// data to a byte slice in memory. +struct ArtifactMemoryWriter(Cursor>); + +impl Default for ArtifactMemoryWriter { + fn default() -> Self { + Self(Cursor::new(Vec::new())) + } +} + +impl RafsIoWrite for ArtifactMemoryWriter { + fn as_any(&self) -> &dyn Any { + &self.0 + } + + fn as_bytes(&mut self) -> std::io::Result> { + self.0.set_position(0); + Ok(Cow::Borrowed(self.0.get_ref().as_slice())) + } +} + +impl Seek for ArtifactMemoryWriter { + fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result { + self.0.seek(pos) + } +} + +impl Write for ArtifactMemoryWriter { + fn write(&mut self, bytes: &[u8]) -> std::io::Result { + self.0.write(bytes) + } + + fn flush(&mut self) -> std::io::Result<()> { + self.0.flush() + } +} + +struct ArtifactFileWriter(pub ArtifactWriter); + +impl ArtifactFileWriter { + pub fn finalize(&mut self, name: Option) -> Result<()> { + self.0.finalize(name) + } +} + +impl RafsIoWrite for ArtifactFileWriter { + fn as_any(&self) -> &dyn Any { + &self.0 + } + + fn finalize(&mut self, name: Option) -> Result<()> { + self.0.finalize(name) + } + + fn as_bytes(&mut self) -> std::io::Result> { + self.0.file.flush()?; + self.0.reader.seek_offset(0)?; + + let mut buf = Vec::new(); + self.0.reader.read_to_end(&mut buf)?; + + Ok(Cow::Owned(buf)) + } +} + +impl ArtifactFileWriter { + pub fn set_len(&mut self, s: u64) -> std::io::Result<()> { + self.0.file.get_mut().set_len(s) + } +} + +impl Seek for ArtifactFileWriter { + fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result { + self.0.file.seek(pos) + } +} + +impl Write for ArtifactFileWriter { + fn write(&mut self, bytes: &[u8]) -> std::io::Result { + self.0.write(bytes) + } + + fn flush(&mut self) -> std::io::Result<()> { + self.0.flush() + } +} + +pub trait Artifact: Write { + fn pos(&self) -> Result; + fn finalize(&mut self, name: Option) -> Result<()>; +} + +#[derive(Default)] +pub struct NoopArtifactWriter { + pos: usize, +} + +impl Write for NoopArtifactWriter { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + self.pos += buf.len(); + Ok(buf.len()) + } + + fn flush(&mut self) -> std::io::Result<()> { + Ok(()) + } +} + +impl Artifact for NoopArtifactWriter { + fn pos(&self) -> Result { + Ok(self.pos as u64) + } + + fn finalize(&mut self, _name: Option) -> Result<()> { + Ok(()) + } +} + +/// ArtifactWriter provides a writer to allow writing bootstrap +/// or blob data to a single file or in a directory. +pub struct ArtifactWriter { + pos: usize, + file: BufWriter, + reader: File, + storage: ArtifactStorage, + // Keep this because tmp file will be removed automatically when it is dropped. + // But we will rename/link the tmp file before it is removed. + tmp_file: Option, +} + +impl Write for ArtifactWriter { + fn write(&mut self, bytes: &[u8]) -> std::io::Result { + let n = self.file.write(bytes)?; + self.pos += n; + Ok(n) + } + + fn flush(&mut self) -> std::io::Result<()> { + self.file.flush() + } +} + +impl ArtifactWriter { + /// Create a new instance of [ArtifactWriter] from a [ArtifactStorage] configuration object. + pub fn new(storage: ArtifactStorage) -> Result { + match storage { + ArtifactStorage::SingleFile(ref p) => { + let mut opener = &mut OpenOptions::new(); + opener = opener.write(true).create(true); + if let Ok(md) = fs::metadata(p) { + let ty = md.file_type(); + // Make it as the writer side of FIFO file, no truncate flag because it has + // been created by the reader side. + if !ty.is_fifo() { + opener = opener.truncate(true); + } + } + let b = BufWriter::with_capacity( + BUF_WRITER_CAPACITY, + opener + .open(p) + .with_context(|| format!("failed to open file {}", p.display()))?, + ); + let reader = OpenOptions::new() + .read(true) + .open(p) + .with_context(|| format!("failed to open file {}", p.display()))?; + Ok(Self { + pos: 0, + file: b, + reader, + storage, + tmp_file: None, + }) + } + ArtifactStorage::FileDir(ref p) => { + // Better we can use open(2) O_TMPFILE, but for compatibility sake, we delay this job. + // TODO: Blob dir existence? + let tmp = TempFile::new_in(p) + .with_context(|| format!("failed to create temp file in {}", p.display()))?; + let tmp2 = tmp.as_file().try_clone()?; + let reader = OpenOptions::new() + .read(true) + .open(tmp.as_path()) + .with_context(|| format!("failed to open file {}", tmp.as_path().display()))?; + Ok(Self { + pos: 0, + file: BufWriter::with_capacity(BUF_WRITER_CAPACITY, tmp2), + reader, + storage, + tmp_file: Some(tmp), + }) + } + } + } +} + +impl Artifact for ArtifactWriter { + /// Get the current write position. + fn pos(&self) -> Result { + Ok(self.pos as u64) + } + + /// Finalize the metadata/data blob. + /// + /// When `name` is None, it means that the blob is empty and should be removed. + fn finalize(&mut self, name: Option) -> Result<()> { + self.file.flush()?; + + if let Some(n) = name { + if let ArtifactStorage::FileDir(s) = &self.storage { + let path = Path::new(s).join(n); + if !path.exists() { + if let Some(tmp_file) = &self.tmp_file { + rename(tmp_file.as_path(), &path).with_context(|| { + format!( + "failed to rename blob {:?} to {:?}", + tmp_file.as_path(), + path + ) + })?; + } + } + } + } else if let ArtifactStorage::SingleFile(s) = &self.storage { + if let Ok(md) = s.metadata() { + if md.is_file() { + remove_file(s).with_context(|| format!("failed to remove blob {:?}", s))?; + } + } + } + + Ok(()) + } +} + +pub struct BlobCacheGenerator { + blob_data: Mutex, + blob_meta: Mutex, +} + +impl BlobCacheGenerator { + pub fn new(storage: ArtifactStorage) -> Result { + Ok(BlobCacheGenerator { + blob_data: Mutex::new(ArtifactFileWriter(ArtifactWriter::new(storage.clone())?)), + blob_meta: Mutex::new(ArtifactFileWriter(ArtifactWriter::new(storage)?)), + }) + } + + pub fn write_blob_meta( + &self, + data: &[u8], + header: &BlobCompressionContextHeader, + ) -> Result<()> { + let mut guard = self.blob_meta.lock().unwrap(); + let aligned_uncompressed_size = try_round_up_4k(data.len() as u64).ok_or(anyhow!( + format!("invalid input {} for try_round_up_4k", data.len()) + ))?; + guard.set_len( + aligned_uncompressed_size + size_of::() as u64, + )?; + guard + .write_all(data) + .context("failed to write blob meta data")?; + guard.seek(std::io::SeekFrom::Start(aligned_uncompressed_size))?; + guard + .write_all(header.as_bytes()) + .context("failed to write blob meta header")?; + Ok(()) + } + + pub fn write_blob_data( + &self, + chunk_data: &[u8], + chunk_info: &ChunkWrapper, + aligned_d_size: u32, + ) -> Result<()> { + let mut guard = self.blob_data.lock().unwrap(); + let curr_pos = guard.seek(std::io::SeekFrom::End(0))?; + if curr_pos < chunk_info.uncompressed_offset() + aligned_d_size as u64 { + guard.set_len(chunk_info.uncompressed_offset() + aligned_d_size as u64)?; + } + + guard.seek(std::io::SeekFrom::Start(chunk_info.uncompressed_offset()))?; + guard + .write_all(&chunk_data) + .context("failed to write blob cache")?; + Ok(()) + } + + pub fn finalize(&self, name: &str) -> Result<()> { + let blob_data_name = format!("{}.blob.data", name); + let mut guard = self.blob_data.lock().unwrap(); + guard.finalize(Some(blob_data_name))?; + drop(guard); + + let blob_meta_name = format!("{}.blob.meta", name); + let mut guard = self.blob_meta.lock().unwrap(); + guard.finalize(Some(blob_meta_name)) + } +} + +/// BlobContext is used to hold the blob information of a layer during build. +pub struct BlobContext { + /// Blob id (user specified or sha256(blob)). + pub blob_id: String, + pub blob_hash: Sha256, + pub blob_compressor: compress::Algorithm, + pub blob_digester: digest::Algorithm, + pub blob_cipher: crypt::Algorithm, + pub blob_prefetch_size: u64, + /// Whether to generate blob metadata information. + pub blob_meta_info_enabled: bool, + /// Data chunks stored in the data blob, for v6. + pub blob_meta_info: BlobMetaChunkArray, + /// Blob metadata header stored in the data blob, for v6 + pub blob_meta_header: BlobCompressionContextHeader, + /// Blob chunk digest array. + pub blob_chunk_digest: Vec, + + /// Final compressed blob file size. + pub compressed_blob_size: u64, + /// Final expected blob cache file size. + pub uncompressed_blob_size: u64, + + /// Current blob offset cursor for writing to disk file. + pub current_compressed_offset: u64, + pub current_uncompressed_offset: u64, + + /// The number of counts in a blob by the index of blob table. + pub chunk_count: u32, + /// Chunk slice size. + pub chunk_size: u32, + /// Whether the blob is from chunk dict. + pub chunk_source: ChunkSource, + + // SHA256 digest of blob ToC content, including the toc tar header. + // It's all zero for blobs with inlined-meta. + pub blob_toc_digest: [u8; 32], + // SHA256 digest of RAFS blob for ZRAN, containing `blob.meta`, `blob.digest` `blob.toc` and + // optionally 'image.boot`. It's all zero for ZRAN blobs with inlined-meta, so need special + // handling. + pub blob_meta_digest: [u8; 32], + // Size of RAFS blob for ZRAN. It's zero ZRAN blobs with inlined-meta. + pub blob_meta_size: u64, + // Size of blob ToC content, it's zero for blobs with inlined-meta. + pub blob_toc_size: u32, + + pub entry_list: toc::TocEntryList, + /// Cipher to encrypt the RAFS blobs. + pub cipher_object: Arc, + pub cipher_ctx: Option, +} + +impl BlobContext { + /// Create a new instance of [BlobContext]. + #[allow(clippy::too_many_arguments)] + pub fn new( + blob_id: String, + blob_offset: u64, + features: BlobFeatures, + compressor: compress::Algorithm, + digester: digest::Algorithm, + cipher: crypt::Algorithm, + cipher_object: Arc, + cipher_ctx: Option, + ) -> Self { + let blob_meta_info = if features.contains(BlobFeatures::CHUNK_INFO_V2) { + BlobMetaChunkArray::new_v2() + } else { + BlobMetaChunkArray::new_v1() + }; + let mut blob_ctx = Self { + blob_id, + blob_hash: Sha256::new(), + blob_compressor: compressor, + blob_digester: digester, + blob_cipher: cipher, + blob_prefetch_size: 0, + blob_meta_info_enabled: false, + blob_meta_info, + blob_meta_header: BlobCompressionContextHeader::default(), + blob_chunk_digest: Vec::new(), + + compressed_blob_size: 0, + uncompressed_blob_size: 0, + + current_compressed_offset: blob_offset, + current_uncompressed_offset: 0, + + chunk_count: 0, + chunk_size: RAFS_DEFAULT_CHUNK_SIZE as u32, + chunk_source: ChunkSource::Build, + + blob_toc_digest: [0u8; 32], + blob_meta_digest: [0u8; 32], + blob_meta_size: 0, + blob_toc_size: 0, + + entry_list: toc::TocEntryList::new(), + cipher_object, + cipher_ctx, + }; + + blob_ctx + .blob_meta_header + .set_aligned(features.contains(BlobFeatures::ALIGNED)); + blob_ctx + .blob_meta_header + .set_inlined_fs_meta(features.contains(BlobFeatures::INLINED_FS_META)); + blob_ctx + .blob_meta_header + .set_chunk_info_v2(features.contains(BlobFeatures::CHUNK_INFO_V2)); + blob_ctx + .blob_meta_header + .set_ci_batch(features.contains(BlobFeatures::BATCH)); + blob_ctx + .blob_meta_header + .set_ci_zran(features.contains(BlobFeatures::ZRAN)); + blob_ctx + .blob_meta_header + .set_separate_blob(features.contains(BlobFeatures::SEPARATE)); + blob_ctx + .blob_meta_header + .set_inlined_chunk_digest(features.contains(BlobFeatures::INLINED_CHUNK_DIGEST)); + blob_ctx + .blob_meta_header + .set_has_tar_header(features.contains(BlobFeatures::HAS_TAR_HEADER)); + blob_ctx + .blob_meta_header + .set_has_toc(features.contains(BlobFeatures::HAS_TOC)); + blob_ctx + .blob_meta_header + .set_cap_tar_toc(features.contains(BlobFeatures::CAP_TAR_TOC)); + blob_ctx + .blob_meta_header + .set_tarfs(features.contains(BlobFeatures::TARFS)); + blob_ctx + .blob_meta_header + .set_encrypted(features.contains(BlobFeatures::ENCRYPTED)); + blob_ctx + .blob_meta_header + .set_is_chunkdict_generated(features.contains(BlobFeatures::IS_CHUNKDICT_GENERATED)); + + blob_ctx + } + + /// Create a new instance of [BlobContext] from `BlobInfo` object. + pub fn from(ctx: &BuildContext, blob: &BlobInfo, chunk_source: ChunkSource) -> Result { + let mut compressed_blob_size = blob.compressed_size(); + let mut blob_meta_size = blob.blob_meta_size(); + let mut toc_size = blob.blob_toc_size(); + let mut blob_meta_digest = blob.blob_meta_digest().to_owned(); + let mut toc_digest = blob.blob_toc_digest().to_owned(); + let mut blob_id = blob.raw_blob_id().to_string(); + let mut features = blob.features(); + + // Fixes up blob info objects from inlined-meta blobs. + if chunk_source == ChunkSource::Dict || chunk_source == ChunkSource::Parent { + if features.contains(BlobFeatures::INLINED_FS_META) { + features &= !BlobFeatures::INLINED_FS_META; + + if !features.contains(BlobFeatures::SEPARATE) { + blob_id = blob.blob_id(); + } + + if ctx.configuration.internal.blob_accessible() { + let backend_config = ctx.configuration.get_backend_config().map_err(|e| { + anyhow!("failed to get backend storage configuration, {}", e) + })?; + let blob_mgr = BlobFactory::new_backend(backend_config, "fix-inlined-meta")?; + + if features.contains(BlobFeatures::SEPARATE) { + if let Ok(digest) = blob.get_blob_meta_id() { + let reader = blob_mgr.get_reader(&digest).map_err(|e| { + anyhow!("failed to get reader for blob {}, {}", digest, e) + })?; + let size = reader + .blob_size() + .map_err(|e| anyhow!("failed to get blob size, {:?}", e))?; + if let Ok(v) = hex::decode(digest) { + if v.len() == 32 { + blob_meta_digest.copy_from_slice(&v[..32]); + blob_meta_size = size; + } + } + if blob.has_feature(BlobFeatures::HAS_TOC) { + if let Ok(toc) = TocEntryList::read_from_blob::( + reader.as_ref(), + None, + &TocLocation::default(), + ) { + toc_digest = toc.toc_digest().data; + toc_size = toc.toc_size(); + } + } + } + } else { + let reader = blob_mgr.get_reader(&blob_id).map_err(|e| { + anyhow!("failed to get reader for blob {}, {}", blob_id, e) + })?; + compressed_blob_size = reader + .blob_size() + .map_err(|e| anyhow!("failed to get blob size, {:?}", e))?; + if blob.has_feature(BlobFeatures::HAS_TOC) { + if let Ok(toc) = TocEntryList::read_from_blob::( + reader.as_ref(), + None, + &TocLocation::default(), + ) { + toc_digest = toc.toc_digest().data; + toc_size = toc.toc_size(); + } + } + } + } else if features.contains(BlobFeatures::SEPARATE) { + if let Ok(digest) = blob.get_blob_meta_id() { + if let Ok(v) = hex::decode(digest) { + if v.len() == 32 { + blob_meta_digest.copy_from_slice(&v[..32]); + } + } + } + } + } else if !blob.has_feature(BlobFeatures::CAP_TAR_TOC) + && !ctx.configuration.internal.blob_accessible() + { + blob_id = blob.blob_id(); + } + } + + let (cipher, cipher_object, cipher_ctx) = blob.get_cipher_info(); + + let mut blob_ctx = Self::new( + blob_id, + 0, + features, + blob.compressor(), + blob.digester(), + cipher, + cipher_object, + cipher_ctx, + ); + blob_ctx.blob_prefetch_size = blob.prefetch_size(); + blob_ctx.chunk_count = blob.chunk_count(); + blob_ctx.uncompressed_blob_size = blob.uncompressed_size(); + blob_ctx.compressed_blob_size = compressed_blob_size; + blob_ctx.chunk_size = blob.chunk_size(); + blob_ctx.chunk_source = chunk_source; + blob_ctx.blob_meta_digest = blob_meta_digest; + blob_ctx.blob_meta_size = blob_meta_size; + blob_ctx.blob_toc_digest = toc_digest; + blob_ctx.blob_toc_size = toc_size; + + if blob.meta_ci_is_valid() { + blob_ctx + .blob_meta_header + .set_ci_compressor(blob.meta_ci_compressor()); + blob_ctx.blob_meta_header.set_ci_entries(blob.chunk_count()); + blob_ctx + .blob_meta_header + .set_ci_compressed_offset(blob.meta_ci_offset()); + blob_ctx + .blob_meta_header + .set_ci_compressed_size(blob.meta_ci_compressed_size()); + blob_ctx + .blob_meta_header + .set_ci_uncompressed_size(blob.meta_ci_uncompressed_size()); + blob_ctx.blob_meta_info_enabled = true; + } + + Ok(blob_ctx) + } + + /// Set chunk size for the blob. + pub fn set_chunk_size(&mut self, chunk_size: u32) { + self.chunk_size = chunk_size; + } + + // TODO: check the logic to reset prefetch size + pub fn set_blob_prefetch_size(&mut self, ctx: &BuildContext) { + if (self.uncompressed_blob_size > 0 + || (ctx.conversion_type == ConversionType::EStargzIndexToRef + && !self.blob_id.is_empty())) + && ctx.prefetch.policy != PrefetchPolicy::Blob + { + self.blob_prefetch_size = 0; + } + } + + pub fn set_meta_info_enabled(&mut self, enable: bool) { + self.blob_meta_info_enabled = enable; + } + + pub fn set_cipher_info( + &mut self, + cipher_object: Arc, + cipher_ctx: Option, + ) { + self.cipher_object = cipher_object; + self.cipher_ctx = cipher_ctx; + } + + pub fn add_chunk_meta_info( + &mut self, + chunk: &ChunkWrapper, + chunk_info: Option, + ) -> Result<()> { + if self.blob_meta_info_enabled { + assert_eq!(chunk.index() as usize, self.blob_meta_info.len()); + match &self.blob_meta_info { + BlobMetaChunkArray::V1(_) => { + self.blob_meta_info.add_v1( + chunk.compressed_offset(), + chunk.compressed_size(), + chunk.uncompressed_offset(), + chunk.uncompressed_size(), + ); + self.blob_chunk_digest.push(chunk.id().data); + } + BlobMetaChunkArray::V2(_) => { + if let Some(mut info) = chunk_info { + info.set_uncompressed_offset(chunk.uncompressed_offset()); + self.blob_meta_info.add_v2_info(info); + } else { + self.blob_meta_info.add_v2( + chunk.compressed_offset(), + chunk.compressed_size(), + chunk.uncompressed_offset(), + chunk.uncompressed_size(), + chunk.is_compressed(), + chunk.is_encrypted(), + chunk.is_batch(), + 0, + ); + } + self.blob_chunk_digest.push(chunk.id().data); + } + } + } + + Ok(()) + } + + /// Allocate a count index sequentially in a blob. + pub fn alloc_chunk_index(&mut self) -> Result { + let index = self.chunk_count; + + // Rafs v6 only supports 24 bit chunk id. + if index >= 0xff_ffff { + Err(Error::msg( + "the number of chunks in blob exceeds the u32 limit", + )) + } else { + self.chunk_count += 1; + Ok(index) + } + } + + /// Get blob id if the blob has some chunks. + pub fn blob_id(&mut self) -> Option { + if self.uncompressed_blob_size > 0 { + Some(self.blob_id.to_string()) + } else { + None + } + } + + /// Helper to write data to blob and update blob hash. + pub fn write_data(&mut self, blob_writer: &mut dyn Artifact, data: &[u8]) -> Result<()> { + blob_writer.write_all(data)?; + self.blob_hash.update(data); + Ok(()) + } + + /// Helper to write a tar header to blob and update blob hash. + pub fn write_tar_header( + &mut self, + blob_writer: &mut dyn Artifact, + name: &str, + size: u64, + ) -> Result
{ + // The `inline-bootstrap` option merges the blob and bootstrap into one + // file. We need some header to index the location of the blob and bootstrap, + // write_tar_header uses tar header that arranges the data as follows: + // data | tar_header | data | tar_header + // This is a tar-like structure, except that we put the tar header after the + // data. The advantage is that we do not need to determine the size of the data + // first, so that we can write the blob data by stream without seek to improve + // the performance of the blob dump by using fifo. + + let mut header = Header::new_gnu(); + header.set_path(Path::new(name))?; + header.set_entry_type(EntryType::Regular); + header.set_size(size); + // The checksum must be set to ensure that the tar reader implementation + // in golang can correctly parse the header. + header.set_cksum(); + + blob_writer.write_all(header.as_bytes())?; + self.blob_hash.update(header.as_bytes()); + Ok(header) + } + + /// Get offset of compressed blob, since current_compressed_offset + /// is always >= compressed_blob_size, we can safely subtract here. + pub fn compressed_offset(&self) -> u64 { + assert!(self.current_compressed_offset >= self.compressed_blob_size); + self.current_compressed_offset - self.compressed_blob_size + } +} + +/// BlobManager stores all blob related information during build. +pub struct BlobManager { + /// Some layers may not have a blob (only have metadata), so Option + /// is used here, the vector index will be as the layer index. + /// + /// We can get blob index for a layer by using: + /// `self.blobs.iter().flatten().collect()[layer_index];` + blobs: Vec, + current_blob_index: Option, + /// Chunk dictionary to hold chunks from an extra chunk dict file. + /// Used for chunk data de-duplication within the whole image. + pub(crate) global_chunk_dict: Arc, + /// Chunk dictionary to hold chunks from all layers. + /// Used for chunk data de-duplication between layers (with `--parent-bootstrap`) + /// or within layer (with `--inline-bootstrap`). + pub(crate) layered_chunk_dict: HashChunkDict, +} + +impl BlobManager { + /// Create a new instance of [BlobManager]. + pub fn new(digester: digest::Algorithm) -> Self { + Self { + blobs: Vec::new(), + current_blob_index: None, + global_chunk_dict: Arc::new(()), + layered_chunk_dict: HashChunkDict::new(digester), + } + } + + fn new_blob_ctx(ctx: &BuildContext) -> Result { + let (cipher_object, cipher_ctx) = match ctx.cipher { + crypt::Algorithm::None => (Default::default(), None), + crypt::Algorithm::Aes128Xts => { + let key = crypt::Cipher::generate_random_key(ctx.cipher)?; + let iv = crypt::Cipher::generate_random_iv()?; + let cipher_ctx = CipherContext::new(key, iv, false, ctx.cipher)?; + ( + ctx.cipher.new_cipher().ok().unwrap_or_default(), + Some(cipher_ctx), + ) + } + _ => { + return Err(anyhow!(format!( + "cipher algorithm {:?} does not support", + ctx.cipher + ))) + } + }; + let mut blob_ctx = BlobContext::new( + ctx.blob_id.clone(), + ctx.blob_offset, + ctx.blob_features, + ctx.compressor, + ctx.digester, + ctx.cipher, + Arc::new(cipher_object), + cipher_ctx, + ); + blob_ctx.set_chunk_size(ctx.chunk_size); + blob_ctx.set_meta_info_enabled( + ctx.fs_version == RafsVersion::V6 && ctx.conversion_type != ConversionType::TarToTarfs, + ); + + Ok(blob_ctx) + } + + /// Get the current blob object or create one if no current blob available. + pub fn get_or_create_current_blob( + &mut self, + ctx: &BuildContext, + ) -> Result<(u32, &mut BlobContext)> { + if self.current_blob_index.is_none() { + let blob_ctx = Self::new_blob_ctx(ctx)?; + self.current_blob_index = Some(self.alloc_index()?); + self.add_blob(blob_ctx); + } + // Safe to unwrap because the blob context has been added. + Ok(self.get_current_blob().unwrap()) + } + + /// Get the current blob object. + pub fn get_current_blob(&mut self) -> Option<(u32, &mut BlobContext)> { + if let Some(idx) = self.current_blob_index { + Some((idx, &mut self.blobs[idx as usize])) + } else { + None + } + } + + /// Get or cerate blob for chunkdict, this is used for chunk deduplication. + pub fn get_or_cerate_blob_for_chunkdict( + &mut self, + ctx: &BuildContext, + id: &str, + ) -> Result<(u32, &mut BlobContext)> { + if self.get_blob_idx_by_id(id).is_none() { + let blob_ctx = Self::new_blob_ctx(ctx)?; + self.current_blob_index = Some(self.alloc_index()?); + self.add_blob(blob_ctx); + } else { + self.current_blob_index = self.get_blob_idx_by_id(id); + } + let (_, blob_ctx) = self.get_current_blob().unwrap(); + if blob_ctx.blob_id.is_empty() { + blob_ctx.blob_id = id.to_string(); + } + // Safe to unwrap because the blob context has been added. + Ok(self.get_current_blob().unwrap()) + } + + /// Determine if the given blob has been created. + pub fn has_blob(&self, blob_id: &str) -> bool { + self.get_blob_idx_by_id(blob_id).is_some() + } + + /// Set the global chunk dictionary for chunk deduplication. + pub fn set_chunk_dict(&mut self, dict: Arc) { + self.global_chunk_dict = dict + } + + /// Get the global chunk dictionary for chunk deduplication. + pub fn get_chunk_dict(&self) -> Arc { + self.global_chunk_dict.clone() + } + + /// Allocate a blob index sequentially. + /// + /// This should be paired with Self::add() and keep in consistence. + pub fn alloc_index(&self) -> Result { + // Rafs v6 only supports 256 blobs. + u8::try_from(self.blobs.len()) + .map(|v| v as u32) + .with_context(|| Error::msg("too many blobs")) + } + + /// Get number of blobs managed by the manager. + pub fn len(&self) -> usize { + self.blobs.len() + } + + /// Check whether there's managed blobs. + pub fn is_empty(&self) -> bool { + self.blobs.is_empty() + } + + /// Add a blob context to manager + /// + /// This should be paired with Self::alloc_index() and keep in consistence. + pub fn add_blob(&mut self, blob_ctx: BlobContext) { + self.blobs.push(blob_ctx); + } + + /// Get all blob contexts (include the blob context that does not have a blob). + pub fn get_blobs(&self) -> Vec<&BlobContext> { + self.blobs.iter().collect() + } + + pub fn get_blob(&self, idx: usize) -> Option<&BlobContext> { + self.blobs.get(idx) + } + + pub fn take_blob(&mut self, idx: usize) -> BlobContext { + self.blobs.remove(idx) + } + + pub fn get_last_blob(&self) -> Option<&BlobContext> { + self.blobs.last() + } + + pub fn get_blob_idx_by_id(&self, id: &str) -> Option { + for (idx, blob) in self.blobs.iter().enumerate() { + if blob.blob_id.eq(id) { + return Some(idx as u32); + } + } + None + } + + pub fn get_blob_ids(&self) -> Vec { + self.blobs.iter().map(|b| b.blob_id.to_owned()).collect() + } + + /// Prepend all blobs from `blob_table` to the blob manager. + pub fn extend_from_blob_table( + &mut self, + ctx: &BuildContext, + blob_table: Vec>, + ) -> Result<()> { + let mut blobs: Vec = Vec::new(); + for blob in blob_table.iter() { + let ctx = BlobContext::from(ctx, blob.as_ref(), ChunkSource::Parent)?; + blobs.push(ctx); + } + if let Some(curr) = self.current_blob_index { + self.current_blob_index = Some(curr + blobs.len() as u32); + blobs.append(&mut self.blobs); + } else { + assert!(self.blobs.is_empty()); + } + self.blobs = blobs; + Ok(()) + } + + /// Import all blobs from the global chunk dictionary for later chunk deduplication. + /// + /// The order to import blobs from parent bootstrap and chunk dictionary is important. + /// All blobs from parent bootstrap must be imported first, otherwise we need to fix blob index + /// of chunks from parent bootstrap. + pub fn extend_from_chunk_dict(&mut self, ctx: &BuildContext) -> Result<()> { + let blobs = self.global_chunk_dict.get_blobs(); + + for blob in blobs.iter() { + if let Some(real_idx) = self.get_blob_idx_by_id(&blob.blob_id()) { + self.global_chunk_dict + .set_real_blob_idx(blob.blob_index(), real_idx); + } else { + let idx = self.alloc_index()?; + let ctx = BlobContext::from(ctx, blob.as_ref(), ChunkSource::Dict)?; + self.add_blob(ctx); + self.global_chunk_dict + .set_real_blob_idx(blob.blob_index(), idx); + } + } + + Ok(()) + } + + /// Generate a [RafsBlobTable] from all blobs managed by the manager. + pub fn to_blob_table(&self, build_ctx: &BuildContext) -> Result { + let mut blob_table = match build_ctx.fs_version { + RafsVersion::V5 => RafsBlobTable::V5(RafsV5BlobTable::new()), + RafsVersion::V6 => RafsBlobTable::V6(RafsV6BlobTable::new()), + }; + + for ctx in &self.blobs { + let blob_id = ctx.blob_id.clone(); + let blob_prefetch_size = u32::try_from(ctx.blob_prefetch_size)?; + let chunk_count = ctx.chunk_count; + let decompressed_blob_size = ctx.uncompressed_blob_size; + let compressed_blob_size = ctx.compressed_blob_size; + let mut flags = RafsSuperFlags::empty(); + match &mut blob_table { + RafsBlobTable::V5(table) => { + let blob_features = BlobFeatures::from_bits(ctx.blob_meta_header.features()) + .ok_or_else(|| anyhow!("invalid blob features"))?; + flags |= RafsSuperFlags::from(ctx.blob_compressor); + flags |= RafsSuperFlags::from(ctx.blob_digester); + table.add( + blob_id, + 0, + blob_prefetch_size, + ctx.chunk_size, + chunk_count, + decompressed_blob_size, + compressed_blob_size, + blob_features, + flags, + build_ctx.is_chunkdict_generated, + ); + } + RafsBlobTable::V6(table) => { + flags |= RafsSuperFlags::from(ctx.blob_compressor); + flags |= RafsSuperFlags::from(ctx.blob_digester); + flags |= RafsSuperFlags::from(ctx.blob_cipher); + table.add( + blob_id, + 0, + blob_prefetch_size, + ctx.chunk_size, + chunk_count, + decompressed_blob_size, + compressed_blob_size, + flags, + ctx.blob_meta_digest, + ctx.blob_toc_digest, + ctx.blob_meta_size, + ctx.blob_toc_size, + build_ctx.is_chunkdict_generated, + ctx.blob_meta_header, + ctx.cipher_object.clone(), + ctx.cipher_ctx.clone(), + ); + } + } + } + + Ok(blob_table) + } +} + +/// BootstrapContext is used to hold in memory data of bootstrap during build. +pub struct BootstrapContext { + /// This build has a parent bootstrap. + pub layered: bool, + /// Cache node index for hardlinks, HashMap<(layer_index, real_inode, dev), Vec>. + pub(crate) inode_map: HashMap<(u16, Inode, u64), Vec>, + /// Current position to write in f_bootstrap + pub(crate) offset: u64, + pub(crate) writer: Box, + /// Not fully used blocks + pub(crate) v6_available_blocks: Vec>, + + next_ino: Inode, +} + +impl BootstrapContext { + /// Create a new instance of [BootstrapContext]. + pub fn new(storage: Option, layered: bool) -> Result { + let writer = if let Some(storage) = storage { + Box::new(ArtifactFileWriter(ArtifactWriter::new(storage)?)) as Box + } else { + Box::::default() as Box + }; + + Ok(Self { + layered, + inode_map: HashMap::new(), + next_ino: 1, + offset: EROFS_BLOCK_SIZE_4096, + writer, + v6_available_blocks: vec![ + VecDeque::new(); + EROFS_BLOCK_SIZE_4096 as usize / EROFS_INODE_SLOT_SIZE + ], + }) + } + + /// Align the write position. + pub fn align_offset(&mut self, align_size: u64) { + if self.offset % align_size > 0 { + self.offset = div_round_up(self.offset, align_size) * align_size; + } + } + + /// Get the next available inode number. + pub(crate) fn get_next_ino(&self) -> Inode { + self.next_ino + } + + /// Generate next inode number. + pub(crate) fn generate_next_ino(&mut self) -> Inode { + let ino = self.next_ino; + self.next_ino += 1; + ino + } + + // Only used to allocate space for metadata(inode / inode + inline data). + // Try to find an used block with no less than `size` space left. + // If found it, return the offset where we can store data. + // If not, return 0. + pub(crate) fn allocate_available_block(&mut self, size: u64, block_size: u64) -> u64 { + if size >= block_size { + return 0; + } + + let min_idx = div_round_up(size, EROFS_INODE_SLOT_SIZE as u64) as usize; + let max_idx = div_round_up(block_size, EROFS_INODE_SLOT_SIZE as u64) as usize; + + for idx in min_idx..max_idx { + let blocks = &mut self.v6_available_blocks[idx]; + if let Some(mut offset) = blocks.pop_front() { + offset += block_size - (idx * EROFS_INODE_SLOT_SIZE) as u64; + self.append_available_block( + offset + (min_idx * EROFS_INODE_SLOT_SIZE) as u64, + block_size, + ); + return offset; + } + } + + 0 + } + + // Append the block that `offset` belongs to corresponding deque. + pub(crate) fn append_available_block(&mut self, offset: u64, block_size: u64) { + if offset % block_size != 0 { + let avail = block_size - offset % block_size; + let idx = avail as usize / EROFS_INODE_SLOT_SIZE; + self.v6_available_blocks[idx].push_back(round_down(offset, block_size)); + } + } +} + +/// BootstrapManager is used to hold the parent bootstrap reader and create new bootstrap context. +pub struct BootstrapManager { + pub(crate) f_parent_path: Option, + pub(crate) bootstrap_storage: Option, +} + +impl BootstrapManager { + /// Create a new instance of [BootstrapManager] + pub fn new(bootstrap_storage: Option, f_parent_path: Option) -> Self { + Self { + f_parent_path: f_parent_path.map(PathBuf::from), + bootstrap_storage, + } + } + + /// Create a new instance of [BootstrapContext] + pub fn create_ctx(&self) -> Result { + BootstrapContext::new(self.bootstrap_storage.clone(), self.f_parent_path.is_some()) + } +} + +pub struct BuildContext { + /// Blob id (user specified or sha256(blob)). + pub blob_id: String, + + /// When filling local blobcache file, chunks are arranged as per the + /// `decompress_offset` within chunk info. Therefore, provide a new flag + /// to image tool thus to align chunks in blob with 4k size. + pub aligned_chunk: bool, + /// Add a offset for compressed blob. + pub blob_offset: u64, + /// Blob chunk compress flag. + pub compressor: compress::Algorithm, + /// Inode and chunk digest algorithm flag. + pub digester: digest::Algorithm, + /// Blob encryption algorithm flag. + pub cipher: crypt::Algorithm, + /// Save host uid gid in each inode. + pub explicit_uidgid: bool, + /// whiteout spec: overlayfs or oci + pub whiteout_spec: WhiteoutSpec, + /// Chunk slice size. + pub chunk_size: u32, + /// Batch chunk data size. + pub batch_size: u32, + /// Version number of output metadata and data blob. + pub fs_version: RafsVersion, + /// Whether any directory/file has extended attributes. + pub has_xattr: bool, + + /// Format conversion type. + pub conversion_type: ConversionType, + /// Path of source to build the image from: + /// - Directory: `source_path` should be a directory path + /// - StargzIndex: `source_path` should be a stargz index json file path + pub source_path: PathBuf, + + /// Track file/chunk prefetch state. + pub prefetch: Prefetch, + + /// Storage writing blob to single file or a directory. + pub blob_storage: Option, + pub blob_zran_generator: Option>>, + pub blob_batch_generator: Option>, + pub blob_tar_reader: Option>, + pub blob_features: BlobFeatures, + pub blob_inline_meta: bool, + + pub features: Features, + pub configuration: Arc, + /// Generate the blob cache and blob meta + pub blob_cache_generator: Option, + + /// Whether is chunkdict. + pub is_chunkdict_generated: bool, +} + +impl BuildContext { + #[allow(clippy::too_many_arguments)] + pub fn new( + blob_id: String, + aligned_chunk: bool, + blob_offset: u64, + compressor: compress::Algorithm, + digester: digest::Algorithm, + explicit_uidgid: bool, + whiteout_spec: WhiteoutSpec, + conversion_type: ConversionType, + source_path: PathBuf, + prefetch: Prefetch, + blob_storage: Option, + blob_inline_meta: bool, + features: Features, + encrypt: bool, + ) -> Self { + // It's a flag for images built with new nydus-image 2.2 and newer. + let mut blob_features = BlobFeatures::CAP_TAR_TOC; + if blob_inline_meta { + blob_features |= BlobFeatures::INLINED_FS_META; + blob_features |= BlobFeatures::HAS_TAR_HEADER; + }; + if features.is_enabled(Feature::BlobToc) { + blob_features |= BlobFeatures::HAS_TOC; + blob_features |= BlobFeatures::HAS_TAR_HEADER; + } + if conversion_type == ConversionType::TarToTarfs { + blob_features |= BlobFeatures::TARFS; + } + + let cipher = if encrypt { + crypt::Algorithm::Aes128Xts + } else { + crypt::Algorithm::None + }; + BuildContext { + blob_id, + aligned_chunk, + blob_offset, + compressor, + digester, + cipher, + explicit_uidgid, + whiteout_spec, + + chunk_size: RAFS_DEFAULT_CHUNK_SIZE as u32, + batch_size: 0, + fs_version: RafsVersion::default(), + + conversion_type, + source_path, + + prefetch, + blob_storage, + blob_zran_generator: None, + blob_batch_generator: None, + blob_tar_reader: None, + blob_features, + blob_inline_meta, + has_xattr: false, + + features, + configuration: Arc::new(ConfigV2::default()), + blob_cache_generator: None, + is_chunkdict_generated: false, + } + } + + pub fn set_fs_version(&mut self, fs_version: RafsVersion) { + self.fs_version = fs_version; + } + + pub fn set_chunk_size(&mut self, chunk_size: u32) { + self.chunk_size = chunk_size; + } + + pub fn set_batch_size(&mut self, batch_size: u32) { + self.batch_size = batch_size; + } + + pub fn set_configuration(&mut self, config: Arc) { + self.configuration = config; + } + + pub fn set_is_chunkdict(&mut self, is_chunkdict: bool) { + self.is_chunkdict_generated = is_chunkdict; + } +} + +impl Default for BuildContext { + fn default() -> Self { + Self { + blob_id: String::new(), + aligned_chunk: false, + blob_offset: 0, + compressor: compress::Algorithm::default(), + digester: digest::Algorithm::default(), + cipher: crypt::Algorithm::None, + explicit_uidgid: true, + whiteout_spec: WhiteoutSpec::default(), + + chunk_size: RAFS_DEFAULT_CHUNK_SIZE as u32, + batch_size: 0, + fs_version: RafsVersion::default(), + + conversion_type: ConversionType::default(), + source_path: PathBuf::new(), + + prefetch: Prefetch::default(), + blob_storage: None, + blob_zran_generator: None, + blob_batch_generator: None, + blob_tar_reader: None, + blob_features: BlobFeatures::empty(), + has_xattr: true, + blob_inline_meta: false, + features: Features::new(), + configuration: Arc::new(ConfigV2::default()), + blob_cache_generator: None, + is_chunkdict_generated: false, + } + } +} + +/// BuildOutput represents the output in this build. +#[derive(Default, Debug, Clone)] +pub struct BuildOutput { + /// Blob ids in the blob table of bootstrap. + pub blobs: Vec, + /// The size of output blob in this build. + pub blob_size: Option, + /// File path for the metadata blob. + pub bootstrap_path: Option, +} + +impl fmt::Display for BuildOutput { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!( + f, + "meta blob path: {}", + self.bootstrap_path.as_deref().unwrap_or("") + )?; + writeln!( + f, + "data blob size: 0x{:x}", + self.blob_size.unwrap_or_default() + )?; + write!(f, "data blobs: {:?}", self.blobs)?; + Ok(()) + } +} + +impl BuildOutput { + /// Create a new instance of [BuildOutput]. + pub fn new( + blob_mgr: &BlobManager, + bootstrap_storage: &Option, + ) -> Result { + let blobs = blob_mgr.get_blob_ids(); + let blob_size = blob_mgr.get_last_blob().map(|b| b.compressed_blob_size); + let bootstrap_path = if let Some(ArtifactStorage::SingleFile(p)) = bootstrap_storage { + Some(p.display().to_string()) + } else { + None + }; + + Ok(Self { + blobs, + blob_size, + bootstrap_path, + }) + } +} + +#[cfg(test)] +mod tests { + use std::sync::atomic::AtomicBool; + + use nydus_api::{BackendConfigV2, ConfigV2Internal, LocalFsConfig}; + + use super::*; + + #[test] + fn test_blob_context_from() { + let mut blob = BlobInfo::new( + 1, + "blob_id".to_string(), + 16, + 8, + 4, + 2, + BlobFeatures::INLINED_FS_META | BlobFeatures::SEPARATE | BlobFeatures::HAS_TOC, + ); + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let mut source_path = PathBuf::from(root_dir); + source_path.push("../tests/texture/blobs/be7d77eeb719f70884758d1aa800ed0fb09d701aaec469964e9d54325f0d5fef"); + assert!(blob + .set_blob_id_from_meta_path(source_path.as_path()) + .is_ok()); + blob.set_blob_meta_size(2); + blob.set_blob_toc_size(2); + blob.set_blob_meta_digest([32u8; 32]); + blob.set_blob_toc_digest([64u8; 32]); + blob.set_blob_meta_info(1, 2, 4, 8); + + let mut ctx = BuildContext::default(); + ctx.configuration.internal.set_blob_accessible(true); + let config = ConfigV2 { + version: 2, + backend: Some(BackendConfigV2 { + backend_type: "localfs".to_owned(), + localdisk: None, + localfs: Some(LocalFsConfig { + blob_file: source_path.to_str().unwrap().to_owned(), + dir: "/tmp".to_owned(), + alt_dirs: vec!["/var/nydus/cache".to_owned()], + }), + oss: None, + s3: None, + registry: None, + http_proxy: None, + }), + id: "id".to_owned(), + cache: None, + rafs: None, + overlay: None, + internal: ConfigV2Internal { + blob_accessible: Arc::new(AtomicBool::new(true)), + }, + }; + ctx.set_configuration(config.into()); + + let chunk_source = ChunkSource::Dict; + + let blob_ctx = BlobContext::from(&ctx, &blob, chunk_source); + + assert!(blob_ctx.is_ok()); + let blob_ctx = blob_ctx.unwrap(); + assert_eq!(blob_ctx.uncompressed_blob_size, 16); + assert!(blob_ctx.blob_meta_info_enabled); + } +} diff --git a/builder/src/core/feature.rs b/builder/src/core/feature.rs index e5743f66f07..f10dc722abd 100644 --- a/builder/src/core/feature.rs +++ b/builder/src/core/feature.rs @@ -1,94 +1,94 @@ -// Copyright (C) 2022 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::collections::HashSet; -use std::convert::TryFrom; - -use anyhow::{bail, Result}; - -const ERR_UNSUPPORTED_FEATURE: &str = "unsupported feature"; - -/// Feature flags to control behavior of RAFS filesystem builder. -#[derive(Clone, Debug, Hash, PartialEq, Eq)] -pub enum Feature { - /// Append a Table Of Content footer to RAFS v6 data blob, to help locate data sections. - BlobToc, -} - -impl TryFrom<&str> for Feature { - type Error = anyhow::Error; - - fn try_from(f: &str) -> Result { - match f { - "blob-toc" => Ok(Self::BlobToc), - _ => bail!( - "{} `{}`, please try upgrading to the latest nydus-image", - ERR_UNSUPPORTED_FEATURE, - f, - ), - } - } -} - -/// A set of enabled feature flags to control behavior of RAFS filesystem builder -#[derive(Clone, Debug)] -pub struct Features(HashSet); - -impl Default for Features { - fn default() -> Self { - Self::new() - } -} - -impl Features { - /// Create a new instance of [Features]. - pub fn new() -> Self { - Self(HashSet::new()) - } - - /// Check whether a feature is enabled or not. - pub fn is_enabled(&self, feature: Feature) -> bool { - self.0.contains(&feature) - } -} - -impl TryFrom<&str> for Features { - type Error = anyhow::Error; - - fn try_from(features: &str) -> Result { - let mut list = Features::new(); - for feat in features.trim().split(',') { - if !feat.is_empty() { - let feature = Feature::try_from(feat.trim())?; - list.0.insert(feature); - } - } - Ok(list) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_feature() { - assert_eq!(Feature::try_from("blob-toc").unwrap(), Feature::BlobToc); - Feature::try_from("unknown-feature-bit").unwrap_err(); - } - - #[test] - fn test_features() { - let features = Features::try_from("blob-toc").unwrap(); - assert!(features.is_enabled(Feature::BlobToc)); - let features = Features::try_from("blob-toc,").unwrap(); - assert!(features.is_enabled(Feature::BlobToc)); - let features = Features::try_from("blob-toc, ").unwrap(); - assert!(features.is_enabled(Feature::BlobToc)); - let features = Features::try_from("blob-toc ").unwrap(); - assert!(features.is_enabled(Feature::BlobToc)); - let features = Features::try_from(" blob-toc ").unwrap(); - assert!(features.is_enabled(Feature::BlobToc)); - } -} +// Copyright (C) 2022 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::collections::HashSet; +use std::convert::TryFrom; + +use anyhow::{bail, Result}; + +const ERR_UNSUPPORTED_FEATURE: &str = "unsupported feature"; + +/// Feature flags to control behavior of RAFS filesystem builder. +#[derive(Clone, Debug, Hash, PartialEq, Eq)] +pub enum Feature { + /// Append a Table Of Content footer to RAFS v6 data blob, to help locate data sections. + BlobToc, +} + +impl TryFrom<&str> for Feature { + type Error = anyhow::Error; + + fn try_from(f: &str) -> Result { + match f { + "blob-toc" => Ok(Self::BlobToc), + _ => bail!( + "{} `{}`, please try upgrading to the latest nydus-image", + ERR_UNSUPPORTED_FEATURE, + f, + ), + } + } +} + +/// A set of enabled feature flags to control behavior of RAFS filesystem builder +#[derive(Clone, Debug)] +pub struct Features(HashSet); + +impl Default for Features { + fn default() -> Self { + Self::new() + } +} + +impl Features { + /// Create a new instance of [Features]. + pub fn new() -> Self { + Self(HashSet::new()) + } + + /// Check whether a feature is enabled or not. + pub fn is_enabled(&self, feature: Feature) -> bool { + self.0.contains(&feature) + } +} + +impl TryFrom<&str> for Features { + type Error = anyhow::Error; + + fn try_from(features: &str) -> Result { + let mut list = Features::new(); + for feat in features.trim().split(',') { + if !feat.is_empty() { + let feature = Feature::try_from(feat.trim())?; + list.0.insert(feature); + } + } + Ok(list) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_feature() { + assert_eq!(Feature::try_from("blob-toc").unwrap(), Feature::BlobToc); + Feature::try_from("unknown-feature-bit").unwrap_err(); + } + + #[test] + fn test_features() { + let features = Features::try_from("blob-toc").unwrap(); + assert!(features.is_enabled(Feature::BlobToc)); + let features = Features::try_from("blob-toc,").unwrap(); + assert!(features.is_enabled(Feature::BlobToc)); + let features = Features::try_from("blob-toc, ").unwrap(); + assert!(features.is_enabled(Feature::BlobToc)); + let features = Features::try_from("blob-toc ").unwrap(); + assert!(features.is_enabled(Feature::BlobToc)); + let features = Features::try_from(" blob-toc ").unwrap(); + assert!(features.is_enabled(Feature::BlobToc)); + } +} diff --git a/builder/src/core/layout.rs b/builder/src/core/layout.rs index 9a3ef83ddbe..c9071ac60d0 100644 --- a/builder/src/core/layout.rs +++ b/builder/src/core/layout.rs @@ -1,62 +1,62 @@ -// Copyright (C) 2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use anyhow::Result; -use std::ops::Deref; - -use super::node::Node; -use crate::{Overlay, Prefetch, TreeNode}; - -#[derive(Clone)] -pub struct BlobLayout {} - -impl BlobLayout { - pub fn layout_blob_simple(prefetch: &Prefetch) -> Result<(Vec, usize)> { - let (pre, non_pre) = prefetch.get_file_nodes(); - let mut inodes: Vec = pre - .into_iter() - .filter(|x| Self::should_dump_node(x.lock().unwrap().deref())) - .collect(); - let mut non_prefetch_inodes: Vec = non_pre - .into_iter() - .filter(|x| Self::should_dump_node(x.lock().unwrap().deref())) - .collect(); - - let prefetch_entries = inodes.len(); - - inodes.append(&mut non_prefetch_inodes); - - Ok((inodes, prefetch_entries)) - } - - #[inline] - fn should_dump_node(node: &Node) -> bool { - node.overlay == Overlay::UpperAddition || node.overlay == Overlay::UpperModification - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::{core::node::NodeInfo, Tree}; - use nydus_rafs::metadata::{inode::InodeWrapper, RafsVersion}; - - #[test] - fn test_layout_blob_simple() { - let mut inode = InodeWrapper::new(RafsVersion::V6); - inode.set_mode(0o755 | libc::S_IFREG as u32); - inode.set_size(1); - let mut node1 = Node::new(inode.clone(), NodeInfo::default(), 1); - node1.overlay = Overlay::UpperAddition; - - let tree = Tree::new(node1); - - let mut prefetch = Prefetch::default(); - prefetch.insert(&tree.node, tree.node.lock().unwrap().deref()); - - let (inodes, prefetch_entries) = BlobLayout::layout_blob_simple(&prefetch).unwrap(); - assert_eq!(inodes.len(), 1); - assert_eq!(prefetch_entries, 0); - } -} +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use anyhow::Result; +use std::ops::Deref; + +use super::node::Node; +use crate::{Overlay, Prefetch, TreeNode}; + +#[derive(Clone)] +pub struct BlobLayout {} + +impl BlobLayout { + pub fn layout_blob_simple(prefetch: &Prefetch) -> Result<(Vec, usize)> { + let (pre, non_pre) = prefetch.get_file_nodes(); + let mut inodes: Vec = pre + .into_iter() + .filter(|x| Self::should_dump_node(x.lock().unwrap().deref())) + .collect(); + let mut non_prefetch_inodes: Vec = non_pre + .into_iter() + .filter(|x| Self::should_dump_node(x.lock().unwrap().deref())) + .collect(); + + let prefetch_entries = inodes.len(); + + inodes.append(&mut non_prefetch_inodes); + + Ok((inodes, prefetch_entries)) + } + + #[inline] + fn should_dump_node(node: &Node) -> bool { + node.overlay == Overlay::UpperAddition || node.overlay == Overlay::UpperModification + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{core::node::NodeInfo, Tree}; + use nydus_rafs::metadata::{inode::InodeWrapper, RafsVersion}; + + #[test] + fn test_layout_blob_simple() { + let mut inode = InodeWrapper::new(RafsVersion::V6); + inode.set_mode(0o755 | libc::S_IFREG as u32); + inode.set_size(1); + let mut node1 = Node::new(inode.clone(), NodeInfo::default(), 1); + node1.overlay = Overlay::UpperAddition; + + let tree = Tree::new(node1); + + let mut prefetch = Prefetch::default(); + prefetch.insert(&tree.node, tree.node.lock().unwrap().deref()); + + let (inodes, prefetch_entries) = BlobLayout::layout_blob_simple(&prefetch).unwrap(); + assert_eq!(inodes.len(), 1); + assert_eq!(prefetch_entries, 0); + } +} diff --git a/builder/src/core/mod.rs b/builder/src/core/mod.rs index 311625c5fc1..826c9f57a95 100644 --- a/builder/src/core/mod.rs +++ b/builder/src/core/mod.rs @@ -1,16 +1,16 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -pub(crate) mod blob; -pub(crate) mod bootstrap; -pub(crate) mod chunk_dict; -pub(crate) mod context; -pub(crate) mod feature; -pub(crate) mod layout; -pub(crate) mod node; -pub(crate) mod overlay; -pub(crate) mod prefetch; -pub(crate) mod tree; -pub(crate) mod v5; -pub(crate) mod v6; +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +pub(crate) mod blob; +pub(crate) mod bootstrap; +pub(crate) mod chunk_dict; +pub(crate) mod context; +pub(crate) mod feature; +pub(crate) mod layout; +pub(crate) mod node; +pub(crate) mod overlay; +pub(crate) mod prefetch; +pub(crate) mod tree; +pub(crate) mod v5; +pub(crate) mod v6; diff --git a/builder/src/core/node.rs b/builder/src/core/node.rs index aa73793973d..9681779c009 100644 --- a/builder/src/core/node.rs +++ b/builder/src/core/node.rs @@ -1,1111 +1,1111 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2021-2023 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::ffi::{OsStr, OsString}; -use std::fmt::{self, Display, Formatter, Result as FmtResult}; -use std::fs::{self, File}; -use std::io::Read; -use std::ops::Deref; -#[cfg(target_os = "linux")] -use std::os::linux::fs::MetadataExt; -#[cfg(target_os = "macos")] -use std::os::macos::fs::MetadataExt; -use std::os::unix::ffi::OsStrExt; -use std::path::{Component, Path, PathBuf}; -use std::sync::Arc; - -use anyhow::{anyhow, bail, Context, Error, Result}; -use nydus_rafs::metadata::chunk::ChunkWrapper; -use nydus_rafs::metadata::inode::InodeWrapper; -use nydus_rafs::metadata::layout::v6::EROFS_INODE_FLAT_PLAIN; -use nydus_rafs::metadata::layout::RafsXAttrs; -use nydus_rafs::metadata::{Inode, RafsVersion}; -use nydus_storage::device::BlobFeatures; -use nydus_storage::meta::{BlobChunkInfoV2Ondisk, BlobMetaChunkInfo}; -use nydus_utils::digest::{DigestHasher, RafsDigest}; -use nydus_utils::{compress, crypt}; -use nydus_utils::{div_round_up, event_tracer, root_tracer, try_round_up_4k, ByteSize}; -use sha2::digest::Digest; - -use crate::{BlobContext, BlobManager, BuildContext, ChunkDict, ConversionType, Overlay}; - -use super::context::Artifact; - -/// Filesystem root path for Unix OSs. -const ROOT_PATH_NAME: &[u8] = &[b'/']; - -/// Source of chunk data: chunk dictionary, parent filesystem or builder. -#[derive(Clone, Hash, PartialEq, Eq)] -pub enum ChunkSource { - /// Chunk is stored in data blob owned by current image. - Build, - /// A reference to a chunk in chunk dictionary. - Dict, - /// A reference to a chunk in parent image. - Parent, -} - -impl Display for ChunkSource { - fn fmt(&self, f: &mut Formatter) -> FmtResult { - match self { - Self::Build => write!(f, "build"), - Self::Dict => write!(f, "dict"), - Self::Parent => write!(f, "parent"), - } - } -} - -/// Chunk information for RAFS filesystem builder. -#[derive(Clone)] -pub struct NodeChunk { - pub source: ChunkSource, - pub inner: Arc, -} - -impl Display for NodeChunk { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.inner,) - } -} - -impl NodeChunk { - /// Copy all chunk information from another `ChunkWrapper` object. - pub fn copy_from(&mut self, other: &ChunkWrapper) { - let mut chunk = self.inner.deref().clone(); - chunk.copy_from(other); - self.inner = Arc::new(chunk); - } - - /// Set chunk index. - pub fn set_index(&mut self, index: u32) { - let mut chunk = self.inner.deref().clone(); - chunk.set_index(index); - self.inner = Arc::new(chunk); - } - - /// Set blob index. - pub fn set_blob_index(&mut self, index: u32) { - let mut chunk = self.inner.deref().clone(); - chunk.set_blob_index(index); - self.inner = Arc::new(chunk); - } - - /// Set chunk compressed size. - pub fn set_compressed_size(&mut self, size: u32) { - let mut chunk = self.inner.deref().clone(); - chunk.set_compressed_size(size); - self.inner = Arc::new(chunk); - } - - /// Set file offset of chunk. - pub fn set_file_offset(&mut self, offset: u64) { - let mut chunk = self.inner.deref().clone(); - chunk.set_file_offset(offset); - self.inner = Arc::new(chunk); - } -} - -/// Struct to host sharable fields of [Node]. -#[derive(Clone, Default, Debug)] -pub struct NodeInfo { - /// Whether the explicit UID/GID feature is enabled or not. - pub explicit_uidgid: bool, - - /// Device id associated with the source inode. - /// - /// A source directory may contain multiple partitions from different hard disk, so - /// a pair of (src_ino, src_dev) is needed to uniquely identify an inode from source directory. - pub src_dev: u64, - /// Inode number of the source inode, from fs stat(). - pub src_ino: Inode, - /// Device ID for special files, describing the device that this inode represents. - pub rdev: u64, - /// Absolute path of the source root directory. - pub source: PathBuf, - /// Absolute path of the source file/directory. - pub path: PathBuf, - /// Absolute path within the target RAFS filesystem. - pub target: PathBuf, - /// Parsed version of `target`. - pub target_vec: Vec, - /// Symlink info of symlink file - pub symlink: Option, - /// Extended attributes. - pub xattrs: RafsXAttrs, - - /// V6: whether it's forced to use an extended inode. - pub v6_force_extended_inode: bool, -} - -/// An in-memory representation of RAFS inode for image building and inspection. -#[derive(Clone)] -pub struct Node { - /// Immutable fields of a Node object. - pub info: Arc, - /// Assigned RAFS inode number. - pub index: u64, - /// Define a disk inode structure to persist to disk. - pub inode: InodeWrapper, - /// Chunks info list of regular file - pub chunks: Vec, - /// Layer index where node is located. - pub layer_idx: u16, - /// Overlay type for layered build - pub overlay: Overlay, - - /// V6: whether it's a compact inode or an extended inode. - pub v6_compact_inode: bool, - /// V6: inode data layout. - pub v6_datalayout: u16, - /// V6: offset to calculate nid. - pub v6_offset: u64, - /// V6: offset to build directory entries. - pub v6_dirents_offset: u64, - /// V6: information to build directory entries. - pub v6_dirents: Vec<(u64, OsString, u32)>, -} - -impl Display for Node { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "{} {:?}: index {} ino {} real_ino {} child_index {} child_count {} i_nlink {} i_size {} i_blocks {} i_name_size {} i_symlink_size {} has_xattr {} link {:?} i_mtime {} i_mtime_nsec {}", - self.file_type(), - self.target(), - self.index, - self.inode.ino(), - self.info.src_ino, - self.inode.child_index(), - self.inode.child_count(), - self.inode.nlink(), - self.inode.size(), - self.inode.blocks(), - self.inode.name_size(), - self.inode.symlink_size(), - self.inode.has_xattr(), - self.info.symlink, - self.inode.mtime(), - self.inode.mtime_nsec(), - ) - } -} - -impl Node { - /// Create a new instance of [Node]. - pub fn new(inode: InodeWrapper, info: NodeInfo, layer_idx: u16) -> Self { - Node { - info: Arc::new(info), - index: 0, - overlay: Overlay::UpperAddition, - inode, - chunks: Vec::new(), - layer_idx, - v6_offset: 0, - v6_dirents: Vec::<(u64, OsString, u32)>::new(), - v6_datalayout: 0, - v6_compact_inode: false, - v6_dirents_offset: 0, - } - } - - /// Dump node data into the data blob, and generate chunk information. - /// - /// # Arguments - /// - blob_writer: optional writer to write data into the data blob. - /// - data_buf: scratch buffer used to stored data read from the reader. - pub fn dump_node_data( - self: &mut Node, - ctx: &BuildContext, - blob_mgr: &mut BlobManager, - blob_writer: &mut dyn Artifact, - chunk_data_buf: &mut [u8], - ) -> Result { - let mut reader = if self.is_reg() { - let file = File::open(self.path()) - .with_context(|| format!("failed to open node file {:?}", self.path()))?; - Some(file) - } else { - None - }; - - self.dump_node_data_with_reader(ctx, blob_mgr, blob_writer, reader.as_mut(), chunk_data_buf) - } - - /// Dump data from a reader into the data blob, and generate chunk information. - /// - /// # Arguments - /// - blob_writer: optional writer to write data into the data blob. - /// - reader: reader to provide chunk data - /// - data_buf: scratch buffer used to stored data read from the reader. - pub fn dump_node_data_with_reader( - &mut self, - ctx: &BuildContext, - blob_mgr: &mut BlobManager, - blob_writer: &mut dyn Artifact, - reader: Option<&mut R>, - data_buf: &mut [u8], - ) -> Result { - if self.is_dir() { - return Ok(0); - } else if self.is_symlink() { - if let Some(symlink) = self.info.symlink.as_ref() { - if self.inode.is_v5() { - self.inode - .set_digest(RafsDigest::from_buf(symlink.as_bytes(), ctx.digester)); - } - return Ok(0); - } else { - return Err(Error::msg("inode's symblink is invalid.")); - } - } else if self.is_special() { - if self.inode.is_v5() { - self.inode - .set_digest(RafsDigest::hasher(ctx.digester).digest_finalize()); - } - return Ok(0); - } - - let mut blob_size = 0u64; - let reader = reader.ok_or_else(|| anyhow!("missing reader to read file data"))?; - let mut inode_hasher = if self.inode.is_v5() { - Some(RafsDigest::hasher(ctx.digester)) - } else { - None - }; - - // `child_count` of regular file is reused as `chunk_count`. - for i in 0..self.inode.child_count() { - let chunk_size = ctx.chunk_size; - let file_offset = i as u64 * chunk_size as u64; - let uncompressed_size = if i == self.inode.child_count() - 1 { - (self.inode.size() - chunk_size as u64 * i as u64) as u32 - } else { - chunk_size - }; - - let chunk_data = &mut data_buf[0..uncompressed_size as usize]; - let (mut chunk, mut chunk_info) = self.read_file_chunk(ctx, reader, chunk_data)?; - if let Some(h) = inode_hasher.as_mut() { - h.digest_update(chunk.id().as_ref()); - } - - // No need to perform chunk deduplication for tar-tarfs case. - if ctx.conversion_type != ConversionType::TarToTarfs { - chunk = match self.deduplicate_chunk( - ctx, - blob_mgr, - file_offset, - uncompressed_size, - chunk, - )? { - None => continue, - Some(c) => c, - }; - } - - let (blob_index, blob_ctx) = blob_mgr.get_or_create_current_blob(ctx)?; - let chunk_index = blob_ctx.alloc_chunk_index()?; - chunk.set_blob_index(blob_index); - chunk.set_index(chunk_index); - chunk.set_file_offset(file_offset); - let mut dumped_size = chunk.compressed_size(); - if ctx.conversion_type == ConversionType::TarToTarfs { - chunk.set_uncompressed_offset(chunk.compressed_offset()); - chunk.set_uncompressed_size(chunk.compressed_size()); - } else { - let (info, d_size) = - self.dump_file_chunk(ctx, blob_ctx, blob_writer, chunk_data, &mut chunk)?; - if info.is_some() { - chunk_info = info; - } - if let Some(d_size) = d_size { - dumped_size = d_size; - } - } - - let chunk = Arc::new(chunk); - blob_size += dumped_size as u64; - if ctx.conversion_type != ConversionType::TarToTarfs { - blob_ctx.add_chunk_meta_info(&chunk, chunk_info)?; - blob_mgr - .layered_chunk_dict - .add_chunk(chunk.clone(), ctx.digester); - } - self.chunks.push(NodeChunk { - source: ChunkSource::Build, - inner: chunk, - }); - } - - // Finish inode digest calculation - if let Some(h) = inode_hasher { - self.inode.set_digest(h.digest_finalize()); - } - - Ok(blob_size) - } - - fn read_file_chunk( - &self, - ctx: &BuildContext, - reader: &mut R, - buf: &mut [u8], - ) -> Result<(ChunkWrapper, Option)> { - let mut chunk = self.inode.create_chunk(); - let mut chunk_info = None; - if let Some(ref zran) = ctx.blob_zran_generator { - let mut zran = zran.lock().unwrap(); - zran.start_chunk(ctx.chunk_size as u64)?; - reader - .read_exact(buf) - .with_context(|| format!("failed to read node file {:?}", self.path()))?; - let info = zran.finish_chunk()?; - chunk.set_compressed_offset(info.compressed_offset()); - chunk.set_compressed_size(info.compressed_size()); - chunk.set_compressed(true); - chunk_info = Some(info); - } else if let Some(ref tar_reader) = ctx.blob_tar_reader { - // For `tar-ref` case - let pos = tar_reader.position(); - chunk.set_compressed_offset(pos); - chunk.set_compressed_size(buf.len() as u32); - chunk.set_compressed(false); - reader - .read_exact(buf) - .with_context(|| format!("failed to read node file {:?}", self.path()))?; - } else { - reader - .read_exact(buf) - .with_context(|| format!("failed to read node file {:?}", self.path()))?; - } - - // For tar-tarfs case, no need to compute chunk id. - if ctx.conversion_type != ConversionType::TarToTarfs { - chunk.set_id(RafsDigest::from_buf(buf, ctx.digester)); - } - - if ctx.cipher != crypt::Algorithm::None { - chunk.set_encrypted(true); - } - - Ok((chunk, chunk_info)) - } - - /// Dump a chunk from u8 slice into the data blob. - /// Return `BlobChunkInfoV2Ondisk` iff the chunk is added into a batch chunk. - /// Return dumped size iff not `BlobFeatures::SEPARATE`. - /// Dumped size can be zero if chunk data is cached in Batch Generator, - /// and may contain previous chunk data cached in Batch Generator. - fn dump_file_chunk( - &self, - ctx: &BuildContext, - blob_ctx: &mut BlobContext, - blob_writer: &mut dyn Artifact, - chunk_data: &[u8], - chunk: &mut ChunkWrapper, - ) -> Result<(Option, Option)> { - let d_size = chunk_data.len() as u32; - let aligned_d_size = if ctx.aligned_chunk { - // Safe to unwrap because `chunk_size` is much less than u32::MAX. - try_round_up_4k(d_size).unwrap() - } else { - d_size - }; - let pre_d_offset = blob_ctx.current_uncompressed_offset; - blob_ctx.uncompressed_blob_size = pre_d_offset + aligned_d_size as u64; - blob_ctx.current_uncompressed_offset += aligned_d_size as u64; - chunk.set_uncompressed_offset(pre_d_offset); - chunk.set_uncompressed_size(d_size); - - let mut chunk_info = None; - let encrypted = blob_ctx.blob_cipher != crypt::Algorithm::None; - let mut dumped_size = None; - - if ctx.blob_batch_generator.is_some() - && self.inode.child_count() == 1 - && d_size < ctx.batch_size / 2 - { - // This chunk will be added into a batch chunk. - let mut batch = ctx.blob_batch_generator.as_ref().unwrap().lock().unwrap(); - - if batch.chunk_data_buf_len() as u32 + d_size < ctx.batch_size { - // Add into current batch chunk directly. - chunk_info = Some(batch.generate_chunk_info( - blob_ctx.current_compressed_offset, - pre_d_offset, - d_size, - encrypted, - )?); - batch.append_chunk_data_buf(chunk_data); - } else { - // Dump current batch chunk if exists, and then add into a new batch chunk. - if !batch.chunk_data_buf_is_empty() { - // Dump current batch chunk. - let (_, c_size, _) = - Self::write_chunk_data(ctx, blob_ctx, blob_writer, batch.chunk_data_buf())?; - dumped_size = Some(c_size); - batch.add_context(c_size); - batch.clear_chunk_data_buf(); - } - - // Add into a new batch chunk. - chunk_info = Some(batch.generate_chunk_info( - blob_ctx.current_compressed_offset, - pre_d_offset, - d_size, - encrypted, - )?); - batch.append_chunk_data_buf(chunk_data); - } - } else if !ctx.blob_features.contains(BlobFeatures::SEPARATE) { - // For other case which needs to write chunk data to data blobs. Which means, - // `tar-ref`, `targz-ref`, `estargz-ref`, and `estargzindex-ref`, are excluded. - - // Interrupt and dump buffered batch chunks. - // TODO: cancel the interruption. - if let Some(batch) = &ctx.blob_batch_generator { - let mut batch = batch.lock().unwrap(); - if !batch.chunk_data_buf_is_empty() { - // Dump current batch chunk. - let (_, c_size, _) = - Self::write_chunk_data(ctx, blob_ctx, blob_writer, batch.chunk_data_buf())?; - dumped_size = Some(c_size); - batch.add_context(c_size); - batch.clear_chunk_data_buf(); - } - } - - let (pre_c_offset, c_size, is_compressed) = - Self::write_chunk_data(ctx, blob_ctx, blob_writer, chunk_data) - .with_context(|| format!("failed to write chunk data {:?}", self.path()))?; - dumped_size = Some(dumped_size.unwrap_or(0) + c_size); - chunk.set_compressed_offset(pre_c_offset); - chunk.set_compressed_size(c_size); - chunk.set_compressed(is_compressed); - } - - if let Some(blob_cache) = ctx.blob_cache_generator.as_ref() { - blob_cache.write_blob_data(chunk_data, chunk, aligned_d_size)?; - } - event_tracer!("blob_uncompressed_size", +d_size); - - Ok((chunk_info, dumped_size)) - } - - pub fn write_chunk_data( - ctx: &BuildContext, - blob_ctx: &mut BlobContext, - blob_writer: &mut dyn Artifact, - chunk_data: &[u8], - ) -> Result<(u64, u32, bool)> { - let (compressed, is_compressed) = compress::compress(chunk_data, ctx.compressor) - .with_context(|| "failed to compress node file".to_string())?; - let encrypted = crypt::encrypt_with_context( - &compressed, - &blob_ctx.cipher_object, - &blob_ctx.cipher_ctx, - blob_ctx.blob_cipher != crypt::Algorithm::None, - )?; - let compressed_size = encrypted.len() as u32; - let pre_compressed_offset = blob_ctx.current_compressed_offset; - blob_writer - .write_all(&encrypted) - .context("failed to write blob")?; - blob_ctx.blob_hash.update(&encrypted); - blob_ctx.current_compressed_offset += compressed_size as u64; - blob_ctx.compressed_blob_size += compressed_size as u64; - - Ok((pre_compressed_offset, compressed_size, is_compressed)) - } - - fn deduplicate_chunk( - &mut self, - ctx: &BuildContext, - blob_mgr: &mut BlobManager, - file_offset: u64, - uncompressed_size: u32, - mut chunk: ChunkWrapper, - ) -> Result> { - let dict = &blob_mgr.global_chunk_dict; - let mut cached_chunk = dict.get_chunk(chunk.id(), uncompressed_size); - let from_dict = cached_chunk.is_some(); - if cached_chunk.is_none() { - cached_chunk = blob_mgr - .layered_chunk_dict - .get_chunk(chunk.id(), uncompressed_size); - } - let cached_chunk = match cached_chunk { - Some(v) => v, - None => return Ok(Some(chunk)), - }; - - // The chunks of hardlink should be always deduplicated. - if !self.is_hardlink() { - event_tracer!("dedup_uncompressed_size", +uncompressed_size); - event_tracer!("dedup_chunks", +1); - } - chunk.copy_from(cached_chunk); - chunk.set_file_offset(file_offset); - - // Only add actually referenced data blobs from chunk dictionary to the blob table. - if from_dict { - let blob_index = if let Some(blob_idx) = dict.get_real_blob_idx(chunk.blob_index()) { - blob_idx - } else { - let blob_idx = blob_mgr.alloc_index()?; - dict.set_real_blob_idx(chunk.blob_index(), blob_idx); - if let Some(blob) = dict.get_blob_by_inner_idx(chunk.blob_index()) { - let ctx = BlobContext::from(ctx, blob, ChunkSource::Dict)?; - blob_mgr.add_blob(ctx); - } - blob_idx - }; - chunk.set_blob_index(blob_index); - } - - trace!( - "\t\tfound duplicated chunk: {} compressor {}", - chunk, - ctx.compressor - ); - let source = if from_dict { - ChunkSource::Dict - } else if Some(chunk.blob_index()) != blob_mgr.get_current_blob().map(|(u, _)| u) { - ChunkSource::Parent - } else { - ChunkSource::Build - }; - self.chunks.push(NodeChunk { - source, - inner: Arc::new(chunk), - }); - - Ok(None) - } -} - -// build node object from a filesystem object. -impl Node { - /// Create a new instance of [Node] from a filesystem object. - pub fn from_fs_object( - version: RafsVersion, - source: PathBuf, - path: PathBuf, - overlay: Overlay, - chunk_size: u32, - explicit_uidgid: bool, - v6_force_extended_inode: bool, - ) -> Result { - let target = Self::generate_target(&path, &source); - let target_vec = Self::generate_target_vec(&target); - let info = NodeInfo { - explicit_uidgid, - src_ino: 0, - src_dev: u64::MAX, - rdev: u64::MAX, - source, - target, - path, - target_vec, - symlink: None, - xattrs: RafsXAttrs::default(), - v6_force_extended_inode, - }; - let mut node = Node { - info: Arc::new(info), - index: 0, - layer_idx: 0, - overlay, - inode: InodeWrapper::new(version), - chunks: Vec::new(), - v6_datalayout: EROFS_INODE_FLAT_PLAIN, - v6_compact_inode: false, - v6_offset: 0, - v6_dirents_offset: 0, - v6_dirents: Vec::new(), - }; - - node.build_inode(chunk_size) - .context("failed to build Node from fs object")?; - if version.is_v6() { - node.v6_set_inode_compact(); - } - - Ok(node) - } - - fn build_inode_xattr(&mut self) -> Result<()> { - let file_xattrs = match xattr::list(self.path()) { - Ok(x) => x, - Err(e) => { - if e.raw_os_error() == Some(libc::EOPNOTSUPP) { - return Ok(()); - } else { - return Err(anyhow!( - "failed to list xattr of {}, {}", - self.path().display(), - e - )); - } - } - }; - - let mut info = self.info.deref().clone(); - for key in file_xattrs { - let value = xattr::get(self.path(), &key).with_context(|| { - format!("failed to get xattr {:?} of {}", key, self.path().display()) - })?; - info.xattrs.add(key, value.unwrap_or_default())?; - } - if !info.xattrs.is_empty() { - self.inode.set_has_xattr(true); - } - self.info = Arc::new(info); - - Ok(()) - } - - fn build_inode_stat(&mut self) -> Result<()> { - let meta = self - .meta() - .with_context(|| format!("failed to get metadata of {}", self.path().display()))?; - let mut info = self.info.deref().clone(); - - info.src_ino = meta.st_ino(); - info.src_dev = meta.st_dev(); - info.rdev = meta.st_rdev(); - - self.inode.set_mode(meta.st_mode()); - if info.explicit_uidgid { - self.inode.set_uid(meta.st_uid()); - self.inode.set_gid(meta.st_gid()); - } - - // Usually the root directory is created by the build tool (nydusify/buildkit/acceld) - // and the mtime of the root directory is different for each build, which makes it - // completely impossible to achieve repeatable builds, especially in a tar build scenario - // (blob + bootstrap in one tar layer), which causes the layer hash to change and wastes - // registry storage space, so the mtime of the root directory is forced to be ignored here. - let ignore_mtime = self.is_root(); - if !ignore_mtime { - self.inode.set_mtime(meta.st_mtime() as u64); - self.inode.set_mtime_nsec(meta.st_mtime_nsec() as u32); - } - self.inode.set_projid(0); - self.inode.set_rdev(meta.st_rdev() as u32); - // Ignore actual nlink value and calculate from rootfs directory instead - self.inode.set_nlink(1); - - // Different filesystem may have different algorithms to calculate size/blocks for - // directory entries, so let's ignore the value provided by source filesystem and - // calculate it later by ourself. - if !self.is_dir() { - self.inode.set_size(meta.st_size()); - self.v5_set_inode_blocks(); - } - self.info = Arc::new(info); - - Ok(()) - } - - fn build_inode(&mut self, chunk_size: u32) -> Result<()> { - let size = self.name().byte_size(); - if size > u16::MAX as usize { - bail!("file name length 0x{:x} is too big", size,); - } - self.inode.set_name_size(size); - - // NOTE: Always retrieve xattr before attr so that we can know the size of xattr pairs. - self.build_inode_xattr() - .with_context(|| format!("failed to get xattr for {}", self.path().display()))?; - self.build_inode_stat() - .with_context(|| format!("failed to build inode {}", self.path().display()))?; - - if self.is_reg() { - let chunk_count = self.chunk_count(chunk_size as u64).with_context(|| { - format!("failed to get chunk count for {}", self.path().display()) - })?; - self.inode.set_child_count(chunk_count); - } else if self.is_symlink() { - let target_path = fs::read_link(self.path()).with_context(|| { - format!( - "failed to read symlink target for {}", - self.path().display() - ) - })?; - let symlink: OsString = target_path.into(); - let size = symlink.byte_size(); - if size > u16::MAX as usize { - bail!("symlink content size 0x{:x} is too big", size); - } - self.inode.set_symlink_size(size); - self.set_symlink(symlink); - } - - Ok(()) - } - - fn meta(&self) -> Result { - self.path() - .symlink_metadata() - .with_context(|| format!("failed to get metadata of {}", self.path().display())) - } -} - -// Access Methods -impl Node { - pub fn is_root(&self) -> bool { - self.target() == OsStr::from_bytes(ROOT_PATH_NAME) - } - - pub fn is_dir(&self) -> bool { - self.inode.is_dir() - } - - pub fn is_symlink(&self) -> bool { - self.inode.is_symlink() - } - - pub fn is_reg(&self) -> bool { - self.inode.is_reg() - } - - pub fn is_hardlink(&self) -> bool { - self.inode.is_hardlink() - } - - pub fn is_special(&self) -> bool { - self.inode.is_special() - } - - pub fn chunk_count(&self, chunk_size: u64) -> Result { - if self.is_reg() { - let chunks = div_round_up(self.inode.size(), chunk_size); - if chunks > u32::MAX as u64 { - bail!("file size 0x{:x} is too big", self.inode.size()) - } else { - Ok(chunks as u32) - } - } else { - Ok(0) - } - } - - /// Get file type of the inode. - pub fn file_type(&self) -> &str { - let mut file_type = ""; - - if self.is_symlink() { - file_type = "symlink"; - } else if self.is_dir() { - file_type = "dir" - } else if self.is_reg() { - if self.is_hardlink() { - file_type = "hardlink"; - } else { - file_type = "file"; - } - } - - file_type - } - - /// Get filename of the inode. - pub fn name(&self) -> &OsStr { - let len = self.info.target_vec.len(); - if len != 0 { - &self.info.target_vec[len - 1] - } else if self.path() == &self.info.source { - OsStr::from_bytes(ROOT_PATH_NAME) - } else { - // Safe to unwrap because `path` is returned from `path()` which is canonicalized - self.path().file_name().unwrap() - } - } - - /// Get path of the inode - pub fn path(&self) -> &PathBuf { - &self.info.path - } - - /// Generate cached components of the target file path. - pub fn generate_target_vec(target: &Path) -> Vec { - target - .components() - .map(|comp| match comp { - Component::RootDir => OsString::from("/"), - Component::Normal(name) => name.to_os_string(), - _ => panic!("invalid file component pattern!"), - }) - .collect::>() - } - - /// Get cached components of the target file path. - pub fn target_vec(&self) -> &[OsString] { - &self.info.target_vec - } - - /// Generate target path by stripping the `root` prefix. - /// - /// Strip the `root` prefix if `path` starts with `root`, otherwise keep `path` as is. - /// For example: - /// root: /absolute/path/to/rootfs - /// path: /absolute/path/to/rootfs/file => /file - /// path /not_rootfs_prefix/file => /not_rootfs_prefix/file - pub fn generate_target(path: &Path, root: &Path) -> PathBuf { - if let Ok(p) = path.strip_prefix(root) { - Path::new("/").join(p) - } else { - // Compatible with path `/` - path.to_path_buf() - } - } - - /// Get the absolute path of the inode within the RAFS filesystem. - pub fn target(&self) -> &PathBuf { - &self.info.target - } - - /// Set symlink target for the node. - pub fn set_symlink(&mut self, symlink: OsString) { - let mut info = self.info.deref().clone(); - info.symlink = Some(symlink); - self.info = Arc::new(info); - } - - /// Set extended attributes for the node. - pub fn set_xattr(&mut self, xattr: RafsXAttrs) { - let mut info = self.info.deref().clone(); - info.xattrs = xattr; - self.info = Arc::new(info); - } - - /// Delete an extend attribute with id `key`. - pub fn remove_xattr(&mut self, key: &OsStr) { - let mut info = self.info.deref().clone(); - info.xattrs.remove(key); - if info.xattrs.is_empty() { - self.inode.set_has_xattr(false); - } - self.info = Arc::new(info); - } -} - -#[cfg(test)] -mod tests { - use std::io::BufReader; - - use nydus_utils::{digest, BufReaderInfo}; - use vmm_sys_util::tempfile::TempFile; - - use crate::{ArtifactWriter, BlobCacheGenerator, HashChunkDict}; - - use super::*; - - #[test] - fn test_node_chunk() { - let chunk_wrapper1 = ChunkWrapper::new(RafsVersion::V5); - let mut chunk = NodeChunk { - source: ChunkSource::Build, - inner: Arc::new(chunk_wrapper1), - }; - println!("NodeChunk: {}", chunk); - matches!(chunk.inner.deref().clone(), ChunkWrapper::V5(_)); - - let chunk_wrapper2 = ChunkWrapper::new(RafsVersion::V6); - chunk.copy_from(&chunk_wrapper2); - matches!(chunk.inner.deref().clone(), ChunkWrapper::V6(_)); - - chunk.set_index(0x10); - assert_eq!(chunk.inner.index(), 0x10); - chunk.set_blob_index(0x20); - assert_eq!(chunk.inner.blob_index(), 0x20); - chunk.set_compressed_size(0x30); - assert_eq!(chunk.inner.compressed_size(), 0x30); - chunk.set_file_offset(0x40); - assert_eq!(chunk.inner.file_offset(), 0x40); - } - - #[test] - fn test_node_dump_node_data() { - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let mut source_path = PathBuf::from(root_dir); - source_path.push("../tests/texture/blobs/be7d77eeb719f70884758d1aa800ed0fb09d701aaec469964e9d54325f0d5fef"); - - let mut inode = InodeWrapper::new(RafsVersion::V5); - inode.set_child_count(2); - inode.set_size(20); - let info = NodeInfo { - explicit_uidgid: true, - src_ino: 1, - src_dev: u64::MAX, - rdev: u64::MAX, - path: source_path.clone(), - source: PathBuf::from("/"), - target: source_path.clone(), - target_vec: vec![OsString::from(source_path)], - symlink: Some(OsString::from("symlink")), - xattrs: RafsXAttrs::new(), - v6_force_extended_inode: false, - }; - let mut node = Node::new(inode, info, 1); - - let mut ctx = BuildContext::default(); - ctx.set_chunk_size(2); - ctx.conversion_type = ConversionType::TarToRef; - ctx.cipher = crypt::Algorithm::Aes128Xts; - let tmp_file1 = TempFile::new().unwrap(); - std::fs::write( - tmp_file1.as_path(), - "This is a test!\n".repeat(32).as_bytes(), - ) - .unwrap(); - let buf_reader = BufReader::new(tmp_file1.into_file()); - ctx.blob_tar_reader = Some(BufReaderInfo::from_buf_reader(buf_reader)); - let tmp_file2 = TempFile::new().unwrap(); - ctx.blob_cache_generator = Some( - BlobCacheGenerator::new(crate::ArtifactStorage::SingleFile(PathBuf::from( - tmp_file2.as_path(), - ))) - .unwrap(), - ); - - let mut blob_mgr = BlobManager::new(digest::Algorithm::Sha256); - let mut chunk_dict = HashChunkDict::new(digest::Algorithm::Sha256); - let mut chunk_wrapper = ChunkWrapper::new(RafsVersion::V5); - chunk_wrapper.set_id(RafsDigest { - data: [ - 209, 217, 144, 116, 135, 113, 3, 121, 133, 92, 96, 25, 219, 145, 151, 219, 119, 47, - 96, 147, 90, 51, 78, 44, 193, 149, 6, 102, 13, 173, 138, 191, - ], - }); - chunk_wrapper.set_uncompressed_size(2); - chunk_dict.add_chunk(Arc::new(chunk_wrapper), digest::Algorithm::Sha256); - blob_mgr.set_chunk_dict(Arc::new(chunk_dict)); - - let tmp_file3 = TempFile::new().unwrap(); - let mut blob_writer = ArtifactWriter::new(crate::ArtifactStorage::SingleFile( - PathBuf::from(tmp_file3.as_path()), - )) - .unwrap(); - - let mut chunk_data_buf = [1u8; 32]; - - node.inode.set_mode(0o755 | libc::S_IFDIR as u32); - let data_size = - node.dump_node_data(&ctx, &mut blob_mgr, &mut blob_writer, &mut chunk_data_buf); - assert!(data_size.is_ok()); - assert_eq!(data_size.unwrap(), 0); - - node.inode.set_mode(0o755 | libc::S_IFLNK as u32); - let data_size = - node.dump_node_data(&ctx, &mut blob_mgr, &mut blob_writer, &mut chunk_data_buf); - assert!(data_size.is_ok()); - assert_eq!(data_size.unwrap(), 0); - - node.inode.set_mode(0o755 | libc::S_IFBLK as u32); - let data_size = - node.dump_node_data(&ctx, &mut blob_mgr, &mut blob_writer, &mut chunk_data_buf); - assert!(data_size.is_ok()); - assert_eq!(data_size.unwrap(), 0); - - node.inode.set_mode(0o755 | libc::S_IFREG as u32); - let data_size = - node.dump_node_data(&ctx, &mut blob_mgr, &mut blob_writer, &mut chunk_data_buf); - assert!(data_size.is_ok()); - assert_eq!(data_size.unwrap(), 18); - } - - #[test] - fn test_node() { - let inode = InodeWrapper::new(RafsVersion::V5); - let info = NodeInfo { - explicit_uidgid: true, - src_ino: 1, - src_dev: u64::MAX, - rdev: u64::MAX, - path: PathBuf::new(), - source: PathBuf::new(), - target: PathBuf::new(), - target_vec: vec![OsString::new()], - symlink: None, - xattrs: RafsXAttrs::new(), - v6_force_extended_inode: false, - }; - - let mut inode1 = inode.clone(); - inode1.set_size(1 << 60); - inode1.set_mode(0o755 | libc::S_IFREG as u32); - let node = Node::new(inode1, info.clone(), 1); - assert!(node.chunk_count(2).is_err()); - - let mut inode2 = inode.clone(); - inode2.set_mode(0o755 | libc::S_IFCHR as u32); - let node = Node::new(inode2, info.clone(), 1); - assert!(node.chunk_count(2).is_ok()); - assert_eq!(node.chunk_count(2).unwrap(), 0); - - let mut inode3 = inode.clone(); - inode3.set_mode(0o755 | libc::S_IFLNK as u32); - let node = Node::new(inode3, info.clone(), 1); - assert_eq!(node.file_type(), "symlink"); - let mut inode4 = inode.clone(); - inode4.set_mode(0o755 | libc::S_IFDIR as u32); - let node = Node::new(inode4, info.clone(), 1); - assert_eq!(node.file_type(), "dir"); - let mut inode5 = inode.clone(); - inode5.set_mode(0o755 | libc::S_IFREG as u32); - let node = Node::new(inode5, info.clone(), 1); - assert_eq!(node.file_type(), "file"); - - let mut info1 = info.clone(); - info1.target_vec = vec![OsString::from("1"), OsString::from("2")]; - let node = Node::new(inode.clone(), info1, 1); - assert_eq!(node.name(), OsString::from("2").as_os_str()); - let mut info2 = info.clone(); - info2.target_vec = vec![]; - info2.path = PathBuf::from("/"); - info2.source = PathBuf::from("/"); - let node = Node::new(inode.clone(), info2, 1); - assert_eq!(node.name(), OsStr::from_bytes(ROOT_PATH_NAME)); - let mut info3 = info.clone(); - info3.target_vec = vec![]; - info3.path = PathBuf::from("/1"); - info3.source = PathBuf::from("/11"); - let node = Node::new(inode.clone(), info3, 1); - assert_eq!(node.name(), OsStr::new("1")); - - let target = PathBuf::from("/root/child"); - assert_eq!( - Node::generate_target_vec(&target), - vec![ - OsString::from("/"), - OsString::from("root"), - OsString::from("child") - ] - ); - - let mut node = Node::new(inode, info, 1); - node.set_symlink(OsString::from("symlink")); - assert_eq!(node.info.deref().symlink, Some(OsString::from("symlink"))); - - let mut xatter = RafsXAttrs::new(); - assert!(xatter - .add(OsString::from("user.key"), [1u8; 16].to_vec()) - .is_ok()); - assert!(xatter - .add( - OsString::from("system.posix_acl_default.key"), - [2u8; 8].to_vec() - ) - .is_ok()); - node.set_xattr(xatter); - node.inode.set_has_xattr(true); - node.remove_xattr(OsStr::new("user.key")); - assert!(node.inode.has_xattr()); - node.remove_xattr(OsStr::new("system.posix_acl_default.key")); - assert!(!node.inode.has_xattr()); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2021-2023 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::ffi::{OsStr, OsString}; +use std::fmt::{self, Display, Formatter, Result as FmtResult}; +use std::fs::{self, File}; +use std::io::Read; +use std::ops::Deref; +#[cfg(target_os = "linux")] +use std::os::linux::fs::MetadataExt; +#[cfg(target_os = "macos")] +use std::os::macos::fs::MetadataExt; +use std::os::unix::ffi::OsStrExt; +use std::path::{Component, Path, PathBuf}; +use std::sync::Arc; + +use anyhow::{anyhow, bail, Context, Error, Result}; +use nydus_rafs::metadata::chunk::ChunkWrapper; +use nydus_rafs::metadata::inode::InodeWrapper; +use nydus_rafs::metadata::layout::v6::EROFS_INODE_FLAT_PLAIN; +use nydus_rafs::metadata::layout::RafsXAttrs; +use nydus_rafs::metadata::{Inode, RafsVersion}; +use nydus_storage::device::BlobFeatures; +use nydus_storage::meta::{BlobChunkInfoV2Ondisk, BlobMetaChunkInfo}; +use nydus_utils::digest::{DigestHasher, RafsDigest}; +use nydus_utils::{compress, crypt}; +use nydus_utils::{div_round_up, event_tracer, root_tracer, try_round_up_4k, ByteSize}; +use sha2::digest::Digest; + +use crate::{BlobContext, BlobManager, BuildContext, ChunkDict, ConversionType, Overlay}; + +use super::context::Artifact; + +/// Filesystem root path for Unix OSs. +const ROOT_PATH_NAME: &[u8] = &[b'/']; + +/// Source of chunk data: chunk dictionary, parent filesystem or builder. +#[derive(Clone, Hash, PartialEq, Eq)] +pub enum ChunkSource { + /// Chunk is stored in data blob owned by current image. + Build, + /// A reference to a chunk in chunk dictionary. + Dict, + /// A reference to a chunk in parent image. + Parent, +} + +impl Display for ChunkSource { + fn fmt(&self, f: &mut Formatter) -> FmtResult { + match self { + Self::Build => write!(f, "build"), + Self::Dict => write!(f, "dict"), + Self::Parent => write!(f, "parent"), + } + } +} + +/// Chunk information for RAFS filesystem builder. +#[derive(Clone)] +pub struct NodeChunk { + pub source: ChunkSource, + pub inner: Arc, +} + +impl Display for NodeChunk { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.inner,) + } +} + +impl NodeChunk { + /// Copy all chunk information from another `ChunkWrapper` object. + pub fn copy_from(&mut self, other: &ChunkWrapper) { + let mut chunk = self.inner.deref().clone(); + chunk.copy_from(other); + self.inner = Arc::new(chunk); + } + + /// Set chunk index. + pub fn set_index(&mut self, index: u32) { + let mut chunk = self.inner.deref().clone(); + chunk.set_index(index); + self.inner = Arc::new(chunk); + } + + /// Set blob index. + pub fn set_blob_index(&mut self, index: u32) { + let mut chunk = self.inner.deref().clone(); + chunk.set_blob_index(index); + self.inner = Arc::new(chunk); + } + + /// Set chunk compressed size. + pub fn set_compressed_size(&mut self, size: u32) { + let mut chunk = self.inner.deref().clone(); + chunk.set_compressed_size(size); + self.inner = Arc::new(chunk); + } + + /// Set file offset of chunk. + pub fn set_file_offset(&mut self, offset: u64) { + let mut chunk = self.inner.deref().clone(); + chunk.set_file_offset(offset); + self.inner = Arc::new(chunk); + } +} + +/// Struct to host sharable fields of [Node]. +#[derive(Clone, Default, Debug)] +pub struct NodeInfo { + /// Whether the explicit UID/GID feature is enabled or not. + pub explicit_uidgid: bool, + + /// Device id associated with the source inode. + /// + /// A source directory may contain multiple partitions from different hard disk, so + /// a pair of (src_ino, src_dev) is needed to uniquely identify an inode from source directory. + pub src_dev: u64, + /// Inode number of the source inode, from fs stat(). + pub src_ino: Inode, + /// Device ID for special files, describing the device that this inode represents. + pub rdev: u64, + /// Absolute path of the source root directory. + pub source: PathBuf, + /// Absolute path of the source file/directory. + pub path: PathBuf, + /// Absolute path within the target RAFS filesystem. + pub target: PathBuf, + /// Parsed version of `target`. + pub target_vec: Vec, + /// Symlink info of symlink file + pub symlink: Option, + /// Extended attributes. + pub xattrs: RafsXAttrs, + + /// V6: whether it's forced to use an extended inode. + pub v6_force_extended_inode: bool, +} + +/// An in-memory representation of RAFS inode for image building and inspection. +#[derive(Clone)] +pub struct Node { + /// Immutable fields of a Node object. + pub info: Arc, + /// Assigned RAFS inode number. + pub index: u64, + /// Define a disk inode structure to persist to disk. + pub inode: InodeWrapper, + /// Chunks info list of regular file + pub chunks: Vec, + /// Layer index where node is located. + pub layer_idx: u16, + /// Overlay type for layered build + pub overlay: Overlay, + + /// V6: whether it's a compact inode or an extended inode. + pub v6_compact_inode: bool, + /// V6: inode data layout. + pub v6_datalayout: u16, + /// V6: offset to calculate nid. + pub v6_offset: u64, + /// V6: offset to build directory entries. + pub v6_dirents_offset: u64, + /// V6: information to build directory entries. + pub v6_dirents: Vec<(u64, OsString, u32)>, +} + +impl Display for Node { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!( + f, + "{} {:?}: index {} ino {} real_ino {} child_index {} child_count {} i_nlink {} i_size {} i_blocks {} i_name_size {} i_symlink_size {} has_xattr {} link {:?} i_mtime {} i_mtime_nsec {}", + self.file_type(), + self.target(), + self.index, + self.inode.ino(), + self.info.src_ino, + self.inode.child_index(), + self.inode.child_count(), + self.inode.nlink(), + self.inode.size(), + self.inode.blocks(), + self.inode.name_size(), + self.inode.symlink_size(), + self.inode.has_xattr(), + self.info.symlink, + self.inode.mtime(), + self.inode.mtime_nsec(), + ) + } +} + +impl Node { + /// Create a new instance of [Node]. + pub fn new(inode: InodeWrapper, info: NodeInfo, layer_idx: u16) -> Self { + Node { + info: Arc::new(info), + index: 0, + overlay: Overlay::UpperAddition, + inode, + chunks: Vec::new(), + layer_idx, + v6_offset: 0, + v6_dirents: Vec::<(u64, OsString, u32)>::new(), + v6_datalayout: 0, + v6_compact_inode: false, + v6_dirents_offset: 0, + } + } + + /// Dump node data into the data blob, and generate chunk information. + /// + /// # Arguments + /// - blob_writer: optional writer to write data into the data blob. + /// - data_buf: scratch buffer used to stored data read from the reader. + pub fn dump_node_data( + self: &mut Node, + ctx: &BuildContext, + blob_mgr: &mut BlobManager, + blob_writer: &mut dyn Artifact, + chunk_data_buf: &mut [u8], + ) -> Result { + let mut reader = if self.is_reg() { + let file = File::open(self.path()) + .with_context(|| format!("failed to open node file {:?}", self.path()))?; + Some(file) + } else { + None + }; + + self.dump_node_data_with_reader(ctx, blob_mgr, blob_writer, reader.as_mut(), chunk_data_buf) + } + + /// Dump data from a reader into the data blob, and generate chunk information. + /// + /// # Arguments + /// - blob_writer: optional writer to write data into the data blob. + /// - reader: reader to provide chunk data + /// - data_buf: scratch buffer used to stored data read from the reader. + pub fn dump_node_data_with_reader( + &mut self, + ctx: &BuildContext, + blob_mgr: &mut BlobManager, + blob_writer: &mut dyn Artifact, + reader: Option<&mut R>, + data_buf: &mut [u8], + ) -> Result { + if self.is_dir() { + return Ok(0); + } else if self.is_symlink() { + if let Some(symlink) = self.info.symlink.as_ref() { + if self.inode.is_v5() { + self.inode + .set_digest(RafsDigest::from_buf(symlink.as_bytes(), ctx.digester)); + } + return Ok(0); + } else { + return Err(Error::msg("inode's symblink is invalid.")); + } + } else if self.is_special() { + if self.inode.is_v5() { + self.inode + .set_digest(RafsDigest::hasher(ctx.digester).digest_finalize()); + } + return Ok(0); + } + + let mut blob_size = 0u64; + let reader = reader.ok_or_else(|| anyhow!("missing reader to read file data"))?; + let mut inode_hasher = if self.inode.is_v5() { + Some(RafsDigest::hasher(ctx.digester)) + } else { + None + }; + + // `child_count` of regular file is reused as `chunk_count`. + for i in 0..self.inode.child_count() { + let chunk_size = ctx.chunk_size; + let file_offset = i as u64 * chunk_size as u64; + let uncompressed_size = if i == self.inode.child_count() - 1 { + (self.inode.size() - chunk_size as u64 * i as u64) as u32 + } else { + chunk_size + }; + + let chunk_data = &mut data_buf[0..uncompressed_size as usize]; + let (mut chunk, mut chunk_info) = self.read_file_chunk(ctx, reader, chunk_data)?; + if let Some(h) = inode_hasher.as_mut() { + h.digest_update(chunk.id().as_ref()); + } + + // No need to perform chunk deduplication for tar-tarfs case. + if ctx.conversion_type != ConversionType::TarToTarfs { + chunk = match self.deduplicate_chunk( + ctx, + blob_mgr, + file_offset, + uncompressed_size, + chunk, + )? { + None => continue, + Some(c) => c, + }; + } + + let (blob_index, blob_ctx) = blob_mgr.get_or_create_current_blob(ctx)?; + let chunk_index = blob_ctx.alloc_chunk_index()?; + chunk.set_blob_index(blob_index); + chunk.set_index(chunk_index); + chunk.set_file_offset(file_offset); + let mut dumped_size = chunk.compressed_size(); + if ctx.conversion_type == ConversionType::TarToTarfs { + chunk.set_uncompressed_offset(chunk.compressed_offset()); + chunk.set_uncompressed_size(chunk.compressed_size()); + } else { + let (info, d_size) = + self.dump_file_chunk(ctx, blob_ctx, blob_writer, chunk_data, &mut chunk)?; + if info.is_some() { + chunk_info = info; + } + if let Some(d_size) = d_size { + dumped_size = d_size; + } + } + + let chunk = Arc::new(chunk); + blob_size += dumped_size as u64; + if ctx.conversion_type != ConversionType::TarToTarfs { + blob_ctx.add_chunk_meta_info(&chunk, chunk_info)?; + blob_mgr + .layered_chunk_dict + .add_chunk(chunk.clone(), ctx.digester); + } + self.chunks.push(NodeChunk { + source: ChunkSource::Build, + inner: chunk, + }); + } + + // Finish inode digest calculation + if let Some(h) = inode_hasher { + self.inode.set_digest(h.digest_finalize()); + } + + Ok(blob_size) + } + + fn read_file_chunk( + &self, + ctx: &BuildContext, + reader: &mut R, + buf: &mut [u8], + ) -> Result<(ChunkWrapper, Option)> { + let mut chunk = self.inode.create_chunk(); + let mut chunk_info = None; + if let Some(ref zran) = ctx.blob_zran_generator { + let mut zran = zran.lock().unwrap(); + zran.start_chunk(ctx.chunk_size as u64)?; + reader + .read_exact(buf) + .with_context(|| format!("failed to read node file {:?}", self.path()))?; + let info = zran.finish_chunk()?; + chunk.set_compressed_offset(info.compressed_offset()); + chunk.set_compressed_size(info.compressed_size()); + chunk.set_compressed(true); + chunk_info = Some(info); + } else if let Some(ref tar_reader) = ctx.blob_tar_reader { + // For `tar-ref` case + let pos = tar_reader.position(); + chunk.set_compressed_offset(pos); + chunk.set_compressed_size(buf.len() as u32); + chunk.set_compressed(false); + reader + .read_exact(buf) + .with_context(|| format!("failed to read node file {:?}", self.path()))?; + } else { + reader + .read_exact(buf) + .with_context(|| format!("failed to read node file {:?}", self.path()))?; + } + + // For tar-tarfs case, no need to compute chunk id. + if ctx.conversion_type != ConversionType::TarToTarfs { + chunk.set_id(RafsDigest::from_buf(buf, ctx.digester)); + } + + if ctx.cipher != crypt::Algorithm::None { + chunk.set_encrypted(true); + } + + Ok((chunk, chunk_info)) + } + + /// Dump a chunk from u8 slice into the data blob. + /// Return `BlobChunkInfoV2Ondisk` iff the chunk is added into a batch chunk. + /// Return dumped size iff not `BlobFeatures::SEPARATE`. + /// Dumped size can be zero if chunk data is cached in Batch Generator, + /// and may contain previous chunk data cached in Batch Generator. + fn dump_file_chunk( + &self, + ctx: &BuildContext, + blob_ctx: &mut BlobContext, + blob_writer: &mut dyn Artifact, + chunk_data: &[u8], + chunk: &mut ChunkWrapper, + ) -> Result<(Option, Option)> { + let d_size = chunk_data.len() as u32; + let aligned_d_size = if ctx.aligned_chunk { + // Safe to unwrap because `chunk_size` is much less than u32::MAX. + try_round_up_4k(d_size).unwrap() + } else { + d_size + }; + let pre_d_offset = blob_ctx.current_uncompressed_offset; + blob_ctx.uncompressed_blob_size = pre_d_offset + aligned_d_size as u64; + blob_ctx.current_uncompressed_offset += aligned_d_size as u64; + chunk.set_uncompressed_offset(pre_d_offset); + chunk.set_uncompressed_size(d_size); + + let mut chunk_info = None; + let encrypted = blob_ctx.blob_cipher != crypt::Algorithm::None; + let mut dumped_size = None; + + if ctx.blob_batch_generator.is_some() + && self.inode.child_count() == 1 + && d_size < ctx.batch_size / 2 + { + // This chunk will be added into a batch chunk. + let mut batch = ctx.blob_batch_generator.as_ref().unwrap().lock().unwrap(); + + if batch.chunk_data_buf_len() as u32 + d_size < ctx.batch_size { + // Add into current batch chunk directly. + chunk_info = Some(batch.generate_chunk_info( + blob_ctx.current_compressed_offset, + pre_d_offset, + d_size, + encrypted, + )?); + batch.append_chunk_data_buf(chunk_data); + } else { + // Dump current batch chunk if exists, and then add into a new batch chunk. + if !batch.chunk_data_buf_is_empty() { + // Dump current batch chunk. + let (_, c_size, _) = + Self::write_chunk_data(ctx, blob_ctx, blob_writer, batch.chunk_data_buf())?; + dumped_size = Some(c_size); + batch.add_context(c_size); + batch.clear_chunk_data_buf(); + } + + // Add into a new batch chunk. + chunk_info = Some(batch.generate_chunk_info( + blob_ctx.current_compressed_offset, + pre_d_offset, + d_size, + encrypted, + )?); + batch.append_chunk_data_buf(chunk_data); + } + } else if !ctx.blob_features.contains(BlobFeatures::SEPARATE) { + // For other case which needs to write chunk data to data blobs. Which means, + // `tar-ref`, `targz-ref`, `estargz-ref`, and `estargzindex-ref`, are excluded. + + // Interrupt and dump buffered batch chunks. + // TODO: cancel the interruption. + if let Some(batch) = &ctx.blob_batch_generator { + let mut batch = batch.lock().unwrap(); + if !batch.chunk_data_buf_is_empty() { + // Dump current batch chunk. + let (_, c_size, _) = + Self::write_chunk_data(ctx, blob_ctx, blob_writer, batch.chunk_data_buf())?; + dumped_size = Some(c_size); + batch.add_context(c_size); + batch.clear_chunk_data_buf(); + } + } + + let (pre_c_offset, c_size, is_compressed) = + Self::write_chunk_data(ctx, blob_ctx, blob_writer, chunk_data) + .with_context(|| format!("failed to write chunk data {:?}", self.path()))?; + dumped_size = Some(dumped_size.unwrap_or(0) + c_size); + chunk.set_compressed_offset(pre_c_offset); + chunk.set_compressed_size(c_size); + chunk.set_compressed(is_compressed); + } + + if let Some(blob_cache) = ctx.blob_cache_generator.as_ref() { + blob_cache.write_blob_data(chunk_data, chunk, aligned_d_size)?; + } + event_tracer!("blob_uncompressed_size", +d_size); + + Ok((chunk_info, dumped_size)) + } + + pub fn write_chunk_data( + ctx: &BuildContext, + blob_ctx: &mut BlobContext, + blob_writer: &mut dyn Artifact, + chunk_data: &[u8], + ) -> Result<(u64, u32, bool)> { + let (compressed, is_compressed) = compress::compress(chunk_data, ctx.compressor) + .with_context(|| "failed to compress node file".to_string())?; + let encrypted = crypt::encrypt_with_context( + &compressed, + &blob_ctx.cipher_object, + &blob_ctx.cipher_ctx, + blob_ctx.blob_cipher != crypt::Algorithm::None, + )?; + let compressed_size = encrypted.len() as u32; + let pre_compressed_offset = blob_ctx.current_compressed_offset; + blob_writer + .write_all(&encrypted) + .context("failed to write blob")?; + blob_ctx.blob_hash.update(&encrypted); + blob_ctx.current_compressed_offset += compressed_size as u64; + blob_ctx.compressed_blob_size += compressed_size as u64; + + Ok((pre_compressed_offset, compressed_size, is_compressed)) + } + + fn deduplicate_chunk( + &mut self, + ctx: &BuildContext, + blob_mgr: &mut BlobManager, + file_offset: u64, + uncompressed_size: u32, + mut chunk: ChunkWrapper, + ) -> Result> { + let dict = &blob_mgr.global_chunk_dict; + let mut cached_chunk = dict.get_chunk(chunk.id(), uncompressed_size); + let from_dict = cached_chunk.is_some(); + if cached_chunk.is_none() { + cached_chunk = blob_mgr + .layered_chunk_dict + .get_chunk(chunk.id(), uncompressed_size); + } + let cached_chunk = match cached_chunk { + Some(v) => v, + None => return Ok(Some(chunk)), + }; + + // The chunks of hardlink should be always deduplicated. + if !self.is_hardlink() { + event_tracer!("dedup_uncompressed_size", +uncompressed_size); + event_tracer!("dedup_chunks", +1); + } + chunk.copy_from(cached_chunk); + chunk.set_file_offset(file_offset); + + // Only add actually referenced data blobs from chunk dictionary to the blob table. + if from_dict { + let blob_index = if let Some(blob_idx) = dict.get_real_blob_idx(chunk.blob_index()) { + blob_idx + } else { + let blob_idx = blob_mgr.alloc_index()?; + dict.set_real_blob_idx(chunk.blob_index(), blob_idx); + if let Some(blob) = dict.get_blob_by_inner_idx(chunk.blob_index()) { + let ctx = BlobContext::from(ctx, blob, ChunkSource::Dict)?; + blob_mgr.add_blob(ctx); + } + blob_idx + }; + chunk.set_blob_index(blob_index); + } + + trace!( + "\t\tfound duplicated chunk: {} compressor {}", + chunk, + ctx.compressor + ); + let source = if from_dict { + ChunkSource::Dict + } else if Some(chunk.blob_index()) != blob_mgr.get_current_blob().map(|(u, _)| u) { + ChunkSource::Parent + } else { + ChunkSource::Build + }; + self.chunks.push(NodeChunk { + source, + inner: Arc::new(chunk), + }); + + Ok(None) + } +} + +// build node object from a filesystem object. +impl Node { + /// Create a new instance of [Node] from a filesystem object. + pub fn from_fs_object( + version: RafsVersion, + source: PathBuf, + path: PathBuf, + overlay: Overlay, + chunk_size: u32, + explicit_uidgid: bool, + v6_force_extended_inode: bool, + ) -> Result { + let target = Self::generate_target(&path, &source); + let target_vec = Self::generate_target_vec(&target); + let info = NodeInfo { + explicit_uidgid, + src_ino: 0, + src_dev: u64::MAX, + rdev: u64::MAX, + source, + target, + path, + target_vec, + symlink: None, + xattrs: RafsXAttrs::default(), + v6_force_extended_inode, + }; + let mut node = Node { + info: Arc::new(info), + index: 0, + layer_idx: 0, + overlay, + inode: InodeWrapper::new(version), + chunks: Vec::new(), + v6_datalayout: EROFS_INODE_FLAT_PLAIN, + v6_compact_inode: false, + v6_offset: 0, + v6_dirents_offset: 0, + v6_dirents: Vec::new(), + }; + + node.build_inode(chunk_size) + .context("failed to build Node from fs object")?; + if version.is_v6() { + node.v6_set_inode_compact(); + } + + Ok(node) + } + + fn build_inode_xattr(&mut self) -> Result<()> { + let file_xattrs = match xattr::list(self.path()) { + Ok(x) => x, + Err(e) => { + if e.raw_os_error() == Some(libc::EOPNOTSUPP) { + return Ok(()); + } else { + return Err(anyhow!( + "failed to list xattr of {}, {}", + self.path().display(), + e + )); + } + } + }; + + let mut info = self.info.deref().clone(); + for key in file_xattrs { + let value = xattr::get(self.path(), &key).with_context(|| { + format!("failed to get xattr {:?} of {}", key, self.path().display()) + })?; + info.xattrs.add(key, value.unwrap_or_default())?; + } + if !info.xattrs.is_empty() { + self.inode.set_has_xattr(true); + } + self.info = Arc::new(info); + + Ok(()) + } + + fn build_inode_stat(&mut self) -> Result<()> { + let meta = self + .meta() + .with_context(|| format!("failed to get metadata of {}", self.path().display()))?; + let mut info = self.info.deref().clone(); + + info.src_ino = meta.st_ino(); + info.src_dev = meta.st_dev(); + info.rdev = meta.st_rdev(); + + self.inode.set_mode(meta.st_mode()); + if info.explicit_uidgid { + self.inode.set_uid(meta.st_uid()); + self.inode.set_gid(meta.st_gid()); + } + + // Usually the root directory is created by the build tool (nydusify/buildkit/acceld) + // and the mtime of the root directory is different for each build, which makes it + // completely impossible to achieve repeatable builds, especially in a tar build scenario + // (blob + bootstrap in one tar layer), which causes the layer hash to change and wastes + // registry storage space, so the mtime of the root directory is forced to be ignored here. + let ignore_mtime = self.is_root(); + if !ignore_mtime { + self.inode.set_mtime(meta.st_mtime() as u64); + self.inode.set_mtime_nsec(meta.st_mtime_nsec() as u32); + } + self.inode.set_projid(0); + self.inode.set_rdev(meta.st_rdev() as u32); + // Ignore actual nlink value and calculate from rootfs directory instead + self.inode.set_nlink(1); + + // Different filesystem may have different algorithms to calculate size/blocks for + // directory entries, so let's ignore the value provided by source filesystem and + // calculate it later by ourself. + if !self.is_dir() { + self.inode.set_size(meta.st_size()); + self.v5_set_inode_blocks(); + } + self.info = Arc::new(info); + + Ok(()) + } + + fn build_inode(&mut self, chunk_size: u32) -> Result<()> { + let size = self.name().byte_size(); + if size > u16::MAX as usize { + bail!("file name length 0x{:x} is too big", size,); + } + self.inode.set_name_size(size); + + // NOTE: Always retrieve xattr before attr so that we can know the size of xattr pairs. + self.build_inode_xattr() + .with_context(|| format!("failed to get xattr for {}", self.path().display()))?; + self.build_inode_stat() + .with_context(|| format!("failed to build inode {}", self.path().display()))?; + + if self.is_reg() { + let chunk_count = self.chunk_count(chunk_size as u64).with_context(|| { + format!("failed to get chunk count for {}", self.path().display()) + })?; + self.inode.set_child_count(chunk_count); + } else if self.is_symlink() { + let target_path = fs::read_link(self.path()).with_context(|| { + format!( + "failed to read symlink target for {}", + self.path().display() + ) + })?; + let symlink: OsString = target_path.into(); + let size = symlink.byte_size(); + if size > u16::MAX as usize { + bail!("symlink content size 0x{:x} is too big", size); + } + self.inode.set_symlink_size(size); + self.set_symlink(symlink); + } + + Ok(()) + } + + fn meta(&self) -> Result { + self.path() + .symlink_metadata() + .with_context(|| format!("failed to get metadata of {}", self.path().display())) + } +} + +// Access Methods +impl Node { + pub fn is_root(&self) -> bool { + self.target() == OsStr::from_bytes(ROOT_PATH_NAME) + } + + pub fn is_dir(&self) -> bool { + self.inode.is_dir() + } + + pub fn is_symlink(&self) -> bool { + self.inode.is_symlink() + } + + pub fn is_reg(&self) -> bool { + self.inode.is_reg() + } + + pub fn is_hardlink(&self) -> bool { + self.inode.is_hardlink() + } + + pub fn is_special(&self) -> bool { + self.inode.is_special() + } + + pub fn chunk_count(&self, chunk_size: u64) -> Result { + if self.is_reg() { + let chunks = div_round_up(self.inode.size(), chunk_size); + if chunks > u32::MAX as u64 { + bail!("file size 0x{:x} is too big", self.inode.size()) + } else { + Ok(chunks as u32) + } + } else { + Ok(0) + } + } + + /// Get file type of the inode. + pub fn file_type(&self) -> &str { + let mut file_type = ""; + + if self.is_symlink() { + file_type = "symlink"; + } else if self.is_dir() { + file_type = "dir" + } else if self.is_reg() { + if self.is_hardlink() { + file_type = "hardlink"; + } else { + file_type = "file"; + } + } + + file_type + } + + /// Get filename of the inode. + pub fn name(&self) -> &OsStr { + let len = self.info.target_vec.len(); + if len != 0 { + &self.info.target_vec[len - 1] + } else if self.path() == &self.info.source { + OsStr::from_bytes(ROOT_PATH_NAME) + } else { + // Safe to unwrap because `path` is returned from `path()` which is canonicalized + self.path().file_name().unwrap() + } + } + + /// Get path of the inode + pub fn path(&self) -> &PathBuf { + &self.info.path + } + + /// Generate cached components of the target file path. + pub fn generate_target_vec(target: &Path) -> Vec { + target + .components() + .map(|comp| match comp { + Component::RootDir => OsString::from("/"), + Component::Normal(name) => name.to_os_string(), + _ => panic!("invalid file component pattern!"), + }) + .collect::>() + } + + /// Get cached components of the target file path. + pub fn target_vec(&self) -> &[OsString] { + &self.info.target_vec + } + + /// Generate target path by stripping the `root` prefix. + /// + /// Strip the `root` prefix if `path` starts with `root`, otherwise keep `path` as is. + /// For example: + /// root: /absolute/path/to/rootfs + /// path: /absolute/path/to/rootfs/file => /file + /// path /not_rootfs_prefix/file => /not_rootfs_prefix/file + pub fn generate_target(path: &Path, root: &Path) -> PathBuf { + if let Ok(p) = path.strip_prefix(root) { + Path::new("/").join(p) + } else { + // Compatible with path `/` + path.to_path_buf() + } + } + + /// Get the absolute path of the inode within the RAFS filesystem. + pub fn target(&self) -> &PathBuf { + &self.info.target + } + + /// Set symlink target for the node. + pub fn set_symlink(&mut self, symlink: OsString) { + let mut info = self.info.deref().clone(); + info.symlink = Some(symlink); + self.info = Arc::new(info); + } + + /// Set extended attributes for the node. + pub fn set_xattr(&mut self, xattr: RafsXAttrs) { + let mut info = self.info.deref().clone(); + info.xattrs = xattr; + self.info = Arc::new(info); + } + + /// Delete an extend attribute with id `key`. + pub fn remove_xattr(&mut self, key: &OsStr) { + let mut info = self.info.deref().clone(); + info.xattrs.remove(key); + if info.xattrs.is_empty() { + self.inode.set_has_xattr(false); + } + self.info = Arc::new(info); + } +} + +#[cfg(test)] +mod tests { + use std::io::BufReader; + + use nydus_utils::{digest, BufReaderInfo}; + use vmm_sys_util::tempfile::TempFile; + + use crate::{ArtifactWriter, BlobCacheGenerator, HashChunkDict}; + + use super::*; + + #[test] + fn test_node_chunk() { + let chunk_wrapper1 = ChunkWrapper::new(RafsVersion::V5); + let mut chunk = NodeChunk { + source: ChunkSource::Build, + inner: Arc::new(chunk_wrapper1), + }; + println!("NodeChunk: {}", chunk); + matches!(chunk.inner.deref().clone(), ChunkWrapper::V5(_)); + + let chunk_wrapper2 = ChunkWrapper::new(RafsVersion::V6); + chunk.copy_from(&chunk_wrapper2); + matches!(chunk.inner.deref().clone(), ChunkWrapper::V6(_)); + + chunk.set_index(0x10); + assert_eq!(chunk.inner.index(), 0x10); + chunk.set_blob_index(0x20); + assert_eq!(chunk.inner.blob_index(), 0x20); + chunk.set_compressed_size(0x30); + assert_eq!(chunk.inner.compressed_size(), 0x30); + chunk.set_file_offset(0x40); + assert_eq!(chunk.inner.file_offset(), 0x40); + } + + #[test] + fn test_node_dump_node_data() { + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let mut source_path = PathBuf::from(root_dir); + source_path.push("../tests/texture/blobs/be7d77eeb719f70884758d1aa800ed0fb09d701aaec469964e9d54325f0d5fef"); + + let mut inode = InodeWrapper::new(RafsVersion::V5); + inode.set_child_count(2); + inode.set_size(20); + let info = NodeInfo { + explicit_uidgid: true, + src_ino: 1, + src_dev: u64::MAX, + rdev: u64::MAX, + path: source_path.clone(), + source: PathBuf::from("/"), + target: source_path.clone(), + target_vec: vec![OsString::from(source_path)], + symlink: Some(OsString::from("symlink")), + xattrs: RafsXAttrs::new(), + v6_force_extended_inode: false, + }; + let mut node = Node::new(inode, info, 1); + + let mut ctx = BuildContext::default(); + ctx.set_chunk_size(2); + ctx.conversion_type = ConversionType::TarToRef; + ctx.cipher = crypt::Algorithm::Aes128Xts; + let tmp_file1 = TempFile::new().unwrap(); + std::fs::write( + tmp_file1.as_path(), + "This is a test!\n".repeat(32).as_bytes(), + ) + .unwrap(); + let buf_reader = BufReader::new(tmp_file1.into_file()); + ctx.blob_tar_reader = Some(BufReaderInfo::from_buf_reader(buf_reader)); + let tmp_file2 = TempFile::new().unwrap(); + ctx.blob_cache_generator = Some( + BlobCacheGenerator::new(crate::ArtifactStorage::SingleFile(PathBuf::from( + tmp_file2.as_path(), + ))) + .unwrap(), + ); + + let mut blob_mgr = BlobManager::new(digest::Algorithm::Sha256); + let mut chunk_dict = HashChunkDict::new(digest::Algorithm::Sha256); + let mut chunk_wrapper = ChunkWrapper::new(RafsVersion::V5); + chunk_wrapper.set_id(RafsDigest { + data: [ + 209, 217, 144, 116, 135, 113, 3, 121, 133, 92, 96, 25, 219, 145, 151, 219, 119, 47, + 96, 147, 90, 51, 78, 44, 193, 149, 6, 102, 13, 173, 138, 191, + ], + }); + chunk_wrapper.set_uncompressed_size(2); + chunk_dict.add_chunk(Arc::new(chunk_wrapper), digest::Algorithm::Sha256); + blob_mgr.set_chunk_dict(Arc::new(chunk_dict)); + + let tmp_file3 = TempFile::new().unwrap(); + let mut blob_writer = ArtifactWriter::new(crate::ArtifactStorage::SingleFile( + PathBuf::from(tmp_file3.as_path()), + )) + .unwrap(); + + let mut chunk_data_buf = [1u8; 32]; + + node.inode.set_mode(0o755 | libc::S_IFDIR as u32); + let data_size = + node.dump_node_data(&ctx, &mut blob_mgr, &mut blob_writer, &mut chunk_data_buf); + assert!(data_size.is_ok()); + assert_eq!(data_size.unwrap(), 0); + + node.inode.set_mode(0o755 | libc::S_IFLNK as u32); + let data_size = + node.dump_node_data(&ctx, &mut blob_mgr, &mut blob_writer, &mut chunk_data_buf); + assert!(data_size.is_ok()); + assert_eq!(data_size.unwrap(), 0); + + node.inode.set_mode(0o755 | libc::S_IFBLK as u32); + let data_size = + node.dump_node_data(&ctx, &mut blob_mgr, &mut blob_writer, &mut chunk_data_buf); + assert!(data_size.is_ok()); + assert_eq!(data_size.unwrap(), 0); + + node.inode.set_mode(0o755 | libc::S_IFREG as u32); + let data_size = + node.dump_node_data(&ctx, &mut blob_mgr, &mut blob_writer, &mut chunk_data_buf); + assert!(data_size.is_ok()); + assert_eq!(data_size.unwrap(), 18); + } + + #[test] + fn test_node() { + let inode = InodeWrapper::new(RafsVersion::V5); + let info = NodeInfo { + explicit_uidgid: true, + src_ino: 1, + src_dev: u64::MAX, + rdev: u64::MAX, + path: PathBuf::new(), + source: PathBuf::new(), + target: PathBuf::new(), + target_vec: vec![OsString::new()], + symlink: None, + xattrs: RafsXAttrs::new(), + v6_force_extended_inode: false, + }; + + let mut inode1 = inode.clone(); + inode1.set_size(1 << 60); + inode1.set_mode(0o755 | libc::S_IFREG as u32); + let node = Node::new(inode1, info.clone(), 1); + assert!(node.chunk_count(2).is_err()); + + let mut inode2 = inode.clone(); + inode2.set_mode(0o755 | libc::S_IFCHR as u32); + let node = Node::new(inode2, info.clone(), 1); + assert!(node.chunk_count(2).is_ok()); + assert_eq!(node.chunk_count(2).unwrap(), 0); + + let mut inode3 = inode.clone(); + inode3.set_mode(0o755 | libc::S_IFLNK as u32); + let node = Node::new(inode3, info.clone(), 1); + assert_eq!(node.file_type(), "symlink"); + let mut inode4 = inode.clone(); + inode4.set_mode(0o755 | libc::S_IFDIR as u32); + let node = Node::new(inode4, info.clone(), 1); + assert_eq!(node.file_type(), "dir"); + let mut inode5 = inode.clone(); + inode5.set_mode(0o755 | libc::S_IFREG as u32); + let node = Node::new(inode5, info.clone(), 1); + assert_eq!(node.file_type(), "file"); + + let mut info1 = info.clone(); + info1.target_vec = vec![OsString::from("1"), OsString::from("2")]; + let node = Node::new(inode.clone(), info1, 1); + assert_eq!(node.name(), OsString::from("2").as_os_str()); + let mut info2 = info.clone(); + info2.target_vec = vec![]; + info2.path = PathBuf::from("/"); + info2.source = PathBuf::from("/"); + let node = Node::new(inode.clone(), info2, 1); + assert_eq!(node.name(), OsStr::from_bytes(ROOT_PATH_NAME)); + let mut info3 = info.clone(); + info3.target_vec = vec![]; + info3.path = PathBuf::from("/1"); + info3.source = PathBuf::from("/11"); + let node = Node::new(inode.clone(), info3, 1); + assert_eq!(node.name(), OsStr::new("1")); + + let target = PathBuf::from("/root/child"); + assert_eq!( + Node::generate_target_vec(&target), + vec![ + OsString::from("/"), + OsString::from("root"), + OsString::from("child") + ] + ); + + let mut node = Node::new(inode, info, 1); + node.set_symlink(OsString::from("symlink")); + assert_eq!(node.info.deref().symlink, Some(OsString::from("symlink"))); + + let mut xatter = RafsXAttrs::new(); + assert!(xatter + .add(OsString::from("user.key"), [1u8; 16].to_vec()) + .is_ok()); + assert!(xatter + .add( + OsString::from("system.posix_acl_default.key"), + [2u8; 8].to_vec() + ) + .is_ok()); + node.set_xattr(xatter); + node.inode.set_has_xattr(true); + node.remove_xattr(OsStr::new("user.key")); + assert!(node.inode.has_xattr()); + node.remove_xattr(OsStr::new("system.posix_acl_default.key")); + assert!(!node.inode.has_xattr()); + } +} diff --git a/builder/src/core/overlay.rs b/builder/src/core/overlay.rs index 7626ddd7b1b..069065efe1f 100644 --- a/builder/src/core/overlay.rs +++ b/builder/src/core/overlay.rs @@ -1,351 +1,351 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2021-2023 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Execute file/directory whiteout rules when merging multiple RAFS filesystems -//! according to the OCI or Overlayfs specifications. - -use std::ffi::{OsStr, OsString}; -use std::fmt::{self, Display, Formatter}; -use std::os::unix::ffi::OsStrExt; -use std::str::FromStr; - -use anyhow::{anyhow, Error, Result}; - -use super::node::Node; - -/// Prefix for OCI whiteout file. -pub const OCISPEC_WHITEOUT_PREFIX: &str = ".wh."; -/// Prefix for OCI whiteout opaque. -pub const OCISPEC_WHITEOUT_OPAQUE: &str = ".wh..wh..opq"; -/// Extended attribute key for Overlayfs whiteout opaque. -pub const OVERLAYFS_WHITEOUT_OPAQUE: &str = "trusted.overlay.opaque"; - -/// RAFS filesystem overlay specifications. -/// -/// When merging multiple RAFS filesystems into one, special rules are needed to white out -/// files/directories in lower/parent filesystems. The whiteout specification defined by the -/// OCI image specification and Linux Overlayfs are widely adopted, so both of them are supported -/// by RAFS filesystem. -/// -/// # Overlayfs Whiteout -/// -/// In order to support rm and rmdir without changing the lower filesystem, an overlay filesystem -/// needs to record in the upper filesystem that files have been removed. This is done using -/// whiteouts and opaque directories (non-directories are always opaque). -/// -/// A whiteout is created as a character device with 0/0 device number. When a whiteout is found -/// in the upper level of a merged directory, any matching name in the lower level is ignored, -/// and the whiteout itself is also hidden. -/// -/// A directory is made opaque by setting the xattr “trusted.overlay.opaque” to “y”. Where the upper -/// filesystem contains an opaque directory, any directory in the lower filesystem with the same -/// name is ignored. -/// -/// # OCI Image Whiteout -/// - A whiteout file is an empty file with a special filename that signifies a path should be -/// deleted. -/// - A whiteout filename consists of the prefix .wh. plus the basename of the path to be deleted. -/// - As files prefixed with .wh. are special whiteout markers, it is not possible to create a -/// filesystem which has a file or directory with a name beginning with .wh.. -/// - Once a whiteout is applied, the whiteout itself MUST also be hidden. -/// - Whiteout files MUST only apply to resources in lower/parent layers. -/// - Files that are present in the same layer as a whiteout file can only be hidden by whiteout -/// files in subsequent layers. -/// - In addition to expressing that a single entry should be removed from a lower layer, layers -/// may remove all of the children using an opaque whiteout entry. -/// - An opaque whiteout entry is a file with the name .wh..wh..opq indicating that all siblings -/// are hidden in the lower layer. -#[derive(Clone, Copy, PartialEq)] -pub enum WhiteoutSpec { - /// Overlay whiteout rules according to the OCI image specification. - /// - /// https://github.com/opencontainers/image-spec/blob/master/layer.md#whiteouts - Oci, - /// Overlay whiteout rules according to the Linux Overlayfs specification. - /// - /// "whiteouts and opaque directories" in https://www.kernel.org/doc/Documentation/filesystems/overlayfs.txt - Overlayfs, - /// No whiteout, keep all content from lower/parent filesystems. - None, -} - -impl Default for WhiteoutSpec { - fn default() -> Self { - Self::Oci - } -} - -impl FromStr for WhiteoutSpec { - type Err = Error; - - fn from_str(s: &str) -> Result { - match s.to_lowercase().as_str() { - "oci" => Ok(Self::Oci), - "overlayfs" => Ok(Self::Overlayfs), - "none" => Ok(Self::None), - _ => Err(anyhow!("invalid whiteout spec")), - } - } -} - -/// RAFS filesystem overlay operation types. -#[derive(Clone, Copy, Debug, PartialEq)] -pub enum WhiteoutType { - OciOpaque, - OciRemoval, - OverlayFsOpaque, - OverlayFsRemoval, -} - -impl WhiteoutType { - pub fn is_removal(&self) -> bool { - *self == WhiteoutType::OciRemoval || *self == WhiteoutType::OverlayFsRemoval - } -} - -/// RAFS filesystem node overlay state. -#[allow(dead_code)] -#[derive(Clone, Debug, PartialEq)] -pub enum Overlay { - Lower, - UpperAddition, - UpperModification, -} - -impl Overlay { - pub fn is_lower_layer(&self) -> bool { - self == &Overlay::Lower - } -} - -impl Display for Overlay { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - match self { - Overlay::Lower => write!(f, "LOWER"), - Overlay::UpperAddition => write!(f, "ADDED"), - Overlay::UpperModification => write!(f, "MODIFIED"), - } - } -} - -impl Node { - /// Check whether the inode is a special overlayfs whiteout file. - pub fn is_overlayfs_whiteout(&self, spec: WhiteoutSpec) -> bool { - if spec != WhiteoutSpec::Overlayfs { - return false; - } - self.inode.is_chrdev() - && nydus_utils::compact::major_dev(self.info.rdev) == 0 - && nydus_utils::compact::minor_dev(self.info.rdev) == 0 - } - - /// Check whether the inode (directory) is a overlayfs whiteout opaque. - pub fn is_overlayfs_opaque(&self, spec: WhiteoutSpec) -> bool { - if spec != WhiteoutSpec::Overlayfs || !self.is_dir() { - return false; - } - - // A directory is made opaque by setting the xattr "trusted.overlay.opaque" to "y". - if let Some(v) = self - .info - .xattrs - .get(&OsString::from(OVERLAYFS_WHITEOUT_OPAQUE)) - { - if let Ok(v) = std::str::from_utf8(v.as_slice()) { - return v == "y"; - } - } - - false - } - - /// Get whiteout type to process the inode. - pub fn whiteout_type(&self, spec: WhiteoutSpec) -> Option { - if self.overlay == Overlay::Lower { - return None; - } - - match spec { - WhiteoutSpec::Oci => { - if let Some(name) = self.name().to_str() { - if name == OCISPEC_WHITEOUT_OPAQUE { - return Some(WhiteoutType::OciOpaque); - } else if name.starts_with(OCISPEC_WHITEOUT_PREFIX) { - return Some(WhiteoutType::OciRemoval); - } - } - } - WhiteoutSpec::Overlayfs => { - if self.is_overlayfs_whiteout(spec) { - return Some(WhiteoutType::OverlayFsRemoval); - } else if self.is_overlayfs_opaque(spec) { - return Some(WhiteoutType::OverlayFsOpaque); - } - } - WhiteoutSpec::None => { - return None; - } - } - - None - } - - /// Get original filename from a whiteout filename. - pub fn origin_name(&self, t: WhiteoutType) -> Option<&OsStr> { - if let Some(name) = self.name().to_str() { - if t == WhiteoutType::OciRemoval { - // the whiteout filename prefixes the basename of the path to be deleted with ".wh.". - return Some(OsStr::from_bytes( - name[OCISPEC_WHITEOUT_PREFIX.len()..].as_bytes(), - )); - } else if t == WhiteoutType::OverlayFsRemoval { - // the whiteout file has the same name as the file to be deleted. - return Some(name.as_ref()); - } - } - - None - } -} - -#[cfg(test)] -mod tests { - use nydus_rafs::metadata::{inode::InodeWrapper, layout::v5::RafsV5Inode}; - - use crate::core::node::NodeInfo; - - use super::*; - - #[test] - fn test_white_spec_from_str() { - let spec = WhiteoutSpec::default(); - assert!(matches!(spec, WhiteoutSpec::Oci)); - - assert!(WhiteoutSpec::from_str("oci").is_ok()); - assert!(WhiteoutSpec::from_str("overlayfs").is_ok()); - assert!(WhiteoutSpec::from_str("none").is_ok()); - assert!(WhiteoutSpec::from_str("foo").is_err()); - } - - #[test] - fn test_white_type_removal_check() { - let t1 = WhiteoutType::OciOpaque; - let t2 = WhiteoutType::OciRemoval; - let t3 = WhiteoutType::OverlayFsOpaque; - let t4 = WhiteoutType::OverlayFsRemoval; - assert!(!t1.is_removal()); - assert!(t2.is_removal()); - assert!(!t3.is_removal()); - assert!(t4.is_removal()); - } - - #[test] - fn test_overlay_low_layer_check() { - let t1 = Overlay::Lower; - let t2 = Overlay::UpperAddition; - let t3 = Overlay::UpperModification; - - assert!(t1.is_lower_layer()); - assert!(!t2.is_lower_layer()); - assert!(!t3.is_lower_layer()); - } - - #[test] - fn test_node() { - let mut inode = InodeWrapper::V5(RafsV5Inode::default()); - inode.set_mode(libc::S_IFCHR as u32); - let node = Node::new(inode, NodeInfo::default(), 0); - assert!(!node.is_overlayfs_whiteout(WhiteoutSpec::None)); - assert!(node.is_overlayfs_whiteout(WhiteoutSpec::Overlayfs)); - assert_eq!( - node.whiteout_type(WhiteoutSpec::Overlayfs).unwrap(), - WhiteoutType::OverlayFsRemoval - ); - - let mut inode = InodeWrapper::V5(RafsV5Inode::default()); - let mut info: NodeInfo = NodeInfo::default(); - assert!(info - .xattrs - .add(OVERLAYFS_WHITEOUT_OPAQUE.into(), "y".into()) - .is_ok()); - inode.set_mode(libc::S_IFDIR as u32); - let node = Node::new(inode, info, 0); - assert!(!node.is_overlayfs_opaque(WhiteoutSpec::None)); - assert!(node.is_overlayfs_opaque(WhiteoutSpec::Overlayfs)); - assert_eq!( - node.whiteout_type(WhiteoutSpec::Overlayfs).unwrap(), - WhiteoutType::OverlayFsOpaque - ); - - let mut inode = InodeWrapper::V5(RafsV5Inode::default()); - let mut info = NodeInfo::default(); - assert!(info - .xattrs - .add(OVERLAYFS_WHITEOUT_OPAQUE.into(), "n".into()) - .is_ok()); - inode.set_mode(libc::S_IFDIR as u32); - let node = Node::new(inode, info, 0); - assert!(!node.is_overlayfs_opaque(WhiteoutSpec::None)); - assert!(!node.is_overlayfs_opaque(WhiteoutSpec::Overlayfs)); - - let mut inode = InodeWrapper::V5(RafsV5Inode::default()); - let mut info = NodeInfo::default(); - assert!(info - .xattrs - .add(OVERLAYFS_WHITEOUT_OPAQUE.into(), "y".into()) - .is_ok()); - inode.set_mode(libc::S_IFCHR as u32); - let node = Node::new(inode, info, 0); - assert!(!node.is_overlayfs_opaque(WhiteoutSpec::None)); - assert!(!node.is_overlayfs_opaque(WhiteoutSpec::Overlayfs)); - - let mut inode = InodeWrapper::V5(RafsV5Inode::default()); - let mut info = NodeInfo::default(); - assert!(info - .xattrs - .add(OVERLAYFS_WHITEOUT_OPAQUE.into(), "n".into()) - .is_ok()); - inode.set_mode(libc::S_IFDIR as u32); - let node = Node::new(inode, info, 0); - assert!(!node.is_overlayfs_opaque(WhiteoutSpec::None)); - assert!(!node.is_overlayfs_opaque(WhiteoutSpec::Overlayfs)); - - let inode = InodeWrapper::V5(RafsV5Inode::default()); - let info = NodeInfo::default(); - let mut node = Node::new(inode, info, 0); - - assert_eq!(node.whiteout_type(WhiteoutSpec::None), None); - assert_eq!(node.whiteout_type(WhiteoutSpec::Oci), None); - assert_eq!(node.whiteout_type(WhiteoutSpec::Overlayfs), None); - - node.overlay = Overlay::Lower; - assert_eq!(node.whiteout_type(WhiteoutSpec::Overlayfs), None); - - let inode = InodeWrapper::V5(RafsV5Inode::default()); - let mut info = NodeInfo::default(); - let name = OCISPEC_WHITEOUT_PREFIX.to_string() + "foo"; - info.target_vec.push(name.clone().into()); - let node = Node::new(inode, info, 0); - assert_eq!( - node.whiteout_type(WhiteoutSpec::Oci).unwrap(), - WhiteoutType::OciRemoval - ); - assert_eq!(node.origin_name(WhiteoutType::OciRemoval).unwrap(), "foo"); - assert_eq!(node.origin_name(WhiteoutType::OciOpaque), None); - assert_eq!( - node.origin_name(WhiteoutType::OverlayFsRemoval).unwrap(), - OsStr::new(&name) - ); - - let inode = InodeWrapper::V5(RafsV5Inode::default()); - let mut info = NodeInfo::default(); - info.target_vec.push(OCISPEC_WHITEOUT_OPAQUE.into()); - let node = Node::new(inode, info, 0); - assert_eq!( - node.whiteout_type(WhiteoutSpec::Oci).unwrap(), - WhiteoutType::OciOpaque - ); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2021-2023 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Execute file/directory whiteout rules when merging multiple RAFS filesystems +//! according to the OCI or Overlayfs specifications. + +use std::ffi::{OsStr, OsString}; +use std::fmt::{self, Display, Formatter}; +use std::os::unix::ffi::OsStrExt; +use std::str::FromStr; + +use anyhow::{anyhow, Error, Result}; + +use super::node::Node; + +/// Prefix for OCI whiteout file. +pub const OCISPEC_WHITEOUT_PREFIX: &str = ".wh."; +/// Prefix for OCI whiteout opaque. +pub const OCISPEC_WHITEOUT_OPAQUE: &str = ".wh..wh..opq"; +/// Extended attribute key for Overlayfs whiteout opaque. +pub const OVERLAYFS_WHITEOUT_OPAQUE: &str = "trusted.overlay.opaque"; + +/// RAFS filesystem overlay specifications. +/// +/// When merging multiple RAFS filesystems into one, special rules are needed to white out +/// files/directories in lower/parent filesystems. The whiteout specification defined by the +/// OCI image specification and Linux Overlayfs are widely adopted, so both of them are supported +/// by RAFS filesystem. +/// +/// # Overlayfs Whiteout +/// +/// In order to support rm and rmdir without changing the lower filesystem, an overlay filesystem +/// needs to record in the upper filesystem that files have been removed. This is done using +/// whiteouts and opaque directories (non-directories are always opaque). +/// +/// A whiteout is created as a character device with 0/0 device number. When a whiteout is found +/// in the upper level of a merged directory, any matching name in the lower level is ignored, +/// and the whiteout itself is also hidden. +/// +/// A directory is made opaque by setting the xattr “trusted.overlay.opaque” to “y”. Where the upper +/// filesystem contains an opaque directory, any directory in the lower filesystem with the same +/// name is ignored. +/// +/// # OCI Image Whiteout +/// - A whiteout file is an empty file with a special filename that signifies a path should be +/// deleted. +/// - A whiteout filename consists of the prefix .wh. plus the basename of the path to be deleted. +/// - As files prefixed with .wh. are special whiteout markers, it is not possible to create a +/// filesystem which has a file or directory with a name beginning with .wh.. +/// - Once a whiteout is applied, the whiteout itself MUST also be hidden. +/// - Whiteout files MUST only apply to resources in lower/parent layers. +/// - Files that are present in the same layer as a whiteout file can only be hidden by whiteout +/// files in subsequent layers. +/// - In addition to expressing that a single entry should be removed from a lower layer, layers +/// may remove all of the children using an opaque whiteout entry. +/// - An opaque whiteout entry is a file with the name .wh..wh..opq indicating that all siblings +/// are hidden in the lower layer. +#[derive(Clone, Copy, PartialEq)] +pub enum WhiteoutSpec { + /// Overlay whiteout rules according to the OCI image specification. + /// + /// https://github.com/opencontainers/image-spec/blob/master/layer.md#whiteouts + Oci, + /// Overlay whiteout rules according to the Linux Overlayfs specification. + /// + /// "whiteouts and opaque directories" in https://www.kernel.org/doc/Documentation/filesystems/overlayfs.txt + Overlayfs, + /// No whiteout, keep all content from lower/parent filesystems. + None, +} + +impl Default for WhiteoutSpec { + fn default() -> Self { + Self::Oci + } +} + +impl FromStr for WhiteoutSpec { + type Err = Error; + + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "oci" => Ok(Self::Oci), + "overlayfs" => Ok(Self::Overlayfs), + "none" => Ok(Self::None), + _ => Err(anyhow!("invalid whiteout spec")), + } + } +} + +/// RAFS filesystem overlay operation types. +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum WhiteoutType { + OciOpaque, + OciRemoval, + OverlayFsOpaque, + OverlayFsRemoval, +} + +impl WhiteoutType { + pub fn is_removal(&self) -> bool { + *self == WhiteoutType::OciRemoval || *self == WhiteoutType::OverlayFsRemoval + } +} + +/// RAFS filesystem node overlay state. +#[allow(dead_code)] +#[derive(Clone, Debug, PartialEq)] +pub enum Overlay { + Lower, + UpperAddition, + UpperModification, +} + +impl Overlay { + pub fn is_lower_layer(&self) -> bool { + self == &Overlay::Lower + } +} + +impl Display for Overlay { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + match self { + Overlay::Lower => write!(f, "LOWER"), + Overlay::UpperAddition => write!(f, "ADDED"), + Overlay::UpperModification => write!(f, "MODIFIED"), + } + } +} + +impl Node { + /// Check whether the inode is a special overlayfs whiteout file. + pub fn is_overlayfs_whiteout(&self, spec: WhiteoutSpec) -> bool { + if spec != WhiteoutSpec::Overlayfs { + return false; + } + self.inode.is_chrdev() + && nydus_utils::compact::major_dev(self.info.rdev) == 0 + && nydus_utils::compact::minor_dev(self.info.rdev) == 0 + } + + /// Check whether the inode (directory) is a overlayfs whiteout opaque. + pub fn is_overlayfs_opaque(&self, spec: WhiteoutSpec) -> bool { + if spec != WhiteoutSpec::Overlayfs || !self.is_dir() { + return false; + } + + // A directory is made opaque by setting the xattr "trusted.overlay.opaque" to "y". + if let Some(v) = self + .info + .xattrs + .get(&OsString::from(OVERLAYFS_WHITEOUT_OPAQUE)) + { + if let Ok(v) = std::str::from_utf8(v.as_slice()) { + return v == "y"; + } + } + + false + } + + /// Get whiteout type to process the inode. + pub fn whiteout_type(&self, spec: WhiteoutSpec) -> Option { + if self.overlay == Overlay::Lower { + return None; + } + + match spec { + WhiteoutSpec::Oci => { + if let Some(name) = self.name().to_str() { + if name == OCISPEC_WHITEOUT_OPAQUE { + return Some(WhiteoutType::OciOpaque); + } else if name.starts_with(OCISPEC_WHITEOUT_PREFIX) { + return Some(WhiteoutType::OciRemoval); + } + } + } + WhiteoutSpec::Overlayfs => { + if self.is_overlayfs_whiteout(spec) { + return Some(WhiteoutType::OverlayFsRemoval); + } else if self.is_overlayfs_opaque(spec) { + return Some(WhiteoutType::OverlayFsOpaque); + } + } + WhiteoutSpec::None => { + return None; + } + } + + None + } + + /// Get original filename from a whiteout filename. + pub fn origin_name(&self, t: WhiteoutType) -> Option<&OsStr> { + if let Some(name) = self.name().to_str() { + if t == WhiteoutType::OciRemoval { + // the whiteout filename prefixes the basename of the path to be deleted with ".wh.". + return Some(OsStr::from_bytes( + name[OCISPEC_WHITEOUT_PREFIX.len()..].as_bytes(), + )); + } else if t == WhiteoutType::OverlayFsRemoval { + // the whiteout file has the same name as the file to be deleted. + return Some(name.as_ref()); + } + } + + None + } +} + +#[cfg(test)] +mod tests { + use nydus_rafs::metadata::{inode::InodeWrapper, layout::v5::RafsV5Inode}; + + use crate::core::node::NodeInfo; + + use super::*; + + #[test] + fn test_white_spec_from_str() { + let spec = WhiteoutSpec::default(); + assert!(matches!(spec, WhiteoutSpec::Oci)); + + assert!(WhiteoutSpec::from_str("oci").is_ok()); + assert!(WhiteoutSpec::from_str("overlayfs").is_ok()); + assert!(WhiteoutSpec::from_str("none").is_ok()); + assert!(WhiteoutSpec::from_str("foo").is_err()); + } + + #[test] + fn test_white_type_removal_check() { + let t1 = WhiteoutType::OciOpaque; + let t2 = WhiteoutType::OciRemoval; + let t3 = WhiteoutType::OverlayFsOpaque; + let t4 = WhiteoutType::OverlayFsRemoval; + assert!(!t1.is_removal()); + assert!(t2.is_removal()); + assert!(!t3.is_removal()); + assert!(t4.is_removal()); + } + + #[test] + fn test_overlay_low_layer_check() { + let t1 = Overlay::Lower; + let t2 = Overlay::UpperAddition; + let t3 = Overlay::UpperModification; + + assert!(t1.is_lower_layer()); + assert!(!t2.is_lower_layer()); + assert!(!t3.is_lower_layer()); + } + + #[test] + fn test_node() { + let mut inode = InodeWrapper::V5(RafsV5Inode::default()); + inode.set_mode(libc::S_IFCHR as u32); + let node = Node::new(inode, NodeInfo::default(), 0); + assert!(!node.is_overlayfs_whiteout(WhiteoutSpec::None)); + assert!(node.is_overlayfs_whiteout(WhiteoutSpec::Overlayfs)); + assert_eq!( + node.whiteout_type(WhiteoutSpec::Overlayfs).unwrap(), + WhiteoutType::OverlayFsRemoval + ); + + let mut inode = InodeWrapper::V5(RafsV5Inode::default()); + let mut info: NodeInfo = NodeInfo::default(); + assert!(info + .xattrs + .add(OVERLAYFS_WHITEOUT_OPAQUE.into(), "y".into()) + .is_ok()); + inode.set_mode(libc::S_IFDIR as u32); + let node = Node::new(inode, info, 0); + assert!(!node.is_overlayfs_opaque(WhiteoutSpec::None)); + assert!(node.is_overlayfs_opaque(WhiteoutSpec::Overlayfs)); + assert_eq!( + node.whiteout_type(WhiteoutSpec::Overlayfs).unwrap(), + WhiteoutType::OverlayFsOpaque + ); + + let mut inode = InodeWrapper::V5(RafsV5Inode::default()); + let mut info = NodeInfo::default(); + assert!(info + .xattrs + .add(OVERLAYFS_WHITEOUT_OPAQUE.into(), "n".into()) + .is_ok()); + inode.set_mode(libc::S_IFDIR as u32); + let node = Node::new(inode, info, 0); + assert!(!node.is_overlayfs_opaque(WhiteoutSpec::None)); + assert!(!node.is_overlayfs_opaque(WhiteoutSpec::Overlayfs)); + + let mut inode = InodeWrapper::V5(RafsV5Inode::default()); + let mut info = NodeInfo::default(); + assert!(info + .xattrs + .add(OVERLAYFS_WHITEOUT_OPAQUE.into(), "y".into()) + .is_ok()); + inode.set_mode(libc::S_IFCHR as u32); + let node = Node::new(inode, info, 0); + assert!(!node.is_overlayfs_opaque(WhiteoutSpec::None)); + assert!(!node.is_overlayfs_opaque(WhiteoutSpec::Overlayfs)); + + let mut inode = InodeWrapper::V5(RafsV5Inode::default()); + let mut info = NodeInfo::default(); + assert!(info + .xattrs + .add(OVERLAYFS_WHITEOUT_OPAQUE.into(), "n".into()) + .is_ok()); + inode.set_mode(libc::S_IFDIR as u32); + let node = Node::new(inode, info, 0); + assert!(!node.is_overlayfs_opaque(WhiteoutSpec::None)); + assert!(!node.is_overlayfs_opaque(WhiteoutSpec::Overlayfs)); + + let inode = InodeWrapper::V5(RafsV5Inode::default()); + let info = NodeInfo::default(); + let mut node = Node::new(inode, info, 0); + + assert_eq!(node.whiteout_type(WhiteoutSpec::None), None); + assert_eq!(node.whiteout_type(WhiteoutSpec::Oci), None); + assert_eq!(node.whiteout_type(WhiteoutSpec::Overlayfs), None); + + node.overlay = Overlay::Lower; + assert_eq!(node.whiteout_type(WhiteoutSpec::Overlayfs), None); + + let inode = InodeWrapper::V5(RafsV5Inode::default()); + let mut info = NodeInfo::default(); + let name = OCISPEC_WHITEOUT_PREFIX.to_string() + "foo"; + info.target_vec.push(name.clone().into()); + let node = Node::new(inode, info, 0); + assert_eq!( + node.whiteout_type(WhiteoutSpec::Oci).unwrap(), + WhiteoutType::OciRemoval + ); + assert_eq!(node.origin_name(WhiteoutType::OciRemoval).unwrap(), "foo"); + assert_eq!(node.origin_name(WhiteoutType::OciOpaque), None); + assert_eq!( + node.origin_name(WhiteoutType::OverlayFsRemoval).unwrap(), + OsStr::new(&name) + ); + + let inode = InodeWrapper::V5(RafsV5Inode::default()); + let mut info = NodeInfo::default(); + info.target_vec.push(OCISPEC_WHITEOUT_OPAQUE.into()); + let node = Node::new(inode, info, 0); + assert_eq!( + node.whiteout_type(WhiteoutSpec::Oci).unwrap(), + WhiteoutType::OciOpaque + ); + } +} diff --git a/builder/src/core/prefetch.rs b/builder/src/core/prefetch.rs index b5695e05686..68e3b3e264d 100644 --- a/builder/src/core/prefetch.rs +++ b/builder/src/core/prefetch.rs @@ -1,391 +1,391 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2023 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::path::PathBuf; -use std::str::FromStr; - -use anyhow::{anyhow, Context, Error, Result}; -use indexmap::IndexMap; -use nydus_rafs::metadata::layout::v5::RafsV5PrefetchTable; -use nydus_rafs::metadata::layout::v6::{calculate_nid, RafsV6PrefetchTable}; - -use super::node::Node; -use crate::core::tree::TreeNode; - -/// Filesystem data prefetch policy. -#[derive(Clone, Copy, Debug, PartialEq)] -pub enum PrefetchPolicy { - None, - /// Prefetch will be issued from Fs layer, which leverages inode/chunkinfo to prefetch data - /// from blob no matter where it resides(OSS/Localfs). Basically, it is willing to cache the - /// data into blobcache(if exists). It's more nimble. With this policy applied, image builder - /// currently puts prefetch files' data into a continuous region within blob which behaves very - /// similar to `Blob` policy. - Fs, - /// Prefetch will be issued directly from backend/blob layer - Blob, -} - -impl Default for PrefetchPolicy { - fn default() -> Self { - Self::None - } -} - -impl FromStr for PrefetchPolicy { - type Err = Error; - fn from_str(s: &str) -> Result { - match s { - "none" => Ok(Self::None), - "fs" => Ok(Self::Fs), - "blob" => Ok(Self::Blob), - _ => Err(anyhow!("invalid prefetch policy")), - } - } -} - -/// Gather prefetch patterns from STDIN line by line. -/// -/// Input format: -/// printf "/relative/path/to/rootfs/1\n/relative/path/to/rootfs/2" -/// -/// It does not guarantee that specified path exist in local filesystem because the specified path -/// may exist in parent image/layers. -fn get_patterns() -> Result>> { - let stdin = std::io::stdin(); - let mut patterns = Vec::new(); - - loop { - let mut file = String::new(); - let size = stdin - .read_line(&mut file) - .context("failed to read prefetch pattern")?; - if size == 0 { - return generate_patterns(patterns); - } - patterns.push(file); - } -} - -fn generate_patterns(input: Vec) -> Result>> { - let mut patterns = IndexMap::new(); - - for file in &input { - let file_trimmed: PathBuf = file.trim().into(); - // Sanity check for the list format. - if !file_trimmed.is_absolute() { - warn!( - "Illegal file path {} specified, should be absolute path", - file - ); - continue; - } - - let mut current_path = file_trimmed.clone(); - let mut skip = patterns.contains_key(¤t_path); - while !skip && current_path.pop() { - if patterns.contains_key(¤t_path) { - skip = true; - break; - } - } - - if skip { - warn!( - "prefetch pattern {} is covered by previous pattern and thus omitted", - file - ); - } else { - debug!( - "prefetch pattern: {}, trimmed file name {:?}", - file, file_trimmed - ); - patterns.insert(file_trimmed, None); - } - } - - Ok(patterns) -} - -/// Manage filesystem data prefetch configuration and state for builder. -#[derive(Default, Clone)] -pub struct Prefetch { - pub policy: PrefetchPolicy, - - pub disabled: bool, - - // Patterns to generate prefetch inode array, which will be put into the prefetch array - // in the RAFS bootstrap. It may access directory or file inodes. - patterns: IndexMap>, - - // File list to help optimizing layout of data blobs. - // Files from this list may be put at the head of data blob for better prefetch performance, - // The index of matched prefetch pattern is stored in `usize`, - // which will help to sort the prefetch files in the final layout. - // It only stores regular files. - files_prefetch: Vec<(TreeNode, usize)>, - - // It stores all non-prefetch files that is not stored in `prefetch_files`, - // including regular files, dirs, symlinks, etc., - // with the same order of BFS traversal of file tree. - files_non_prefetch: Vec, -} - -impl Prefetch { - /// Create a new instance of [Prefetch]. - pub fn new(policy: PrefetchPolicy) -> Result { - let patterns = if policy != PrefetchPolicy::None { - get_patterns().context("failed to get prefetch patterns")? - } else { - IndexMap::new() - }; - - Ok(Self { - policy, - disabled: false, - patterns, - files_prefetch: Vec::with_capacity(10000), - files_non_prefetch: Vec::with_capacity(10000), - }) - } - - /// Insert node into the prefetch Vector if it matches prefetch rules, - /// while recording the index of matched prefetch pattern, - /// or insert it into non-prefetch Vector. - pub fn insert(&mut self, obj: &TreeNode, node: &Node) { - // Newly created root inode of this rafs has zero size - if self.policy == PrefetchPolicy::None - || self.disabled - || (node.inode.is_reg() && node.inode.size() == 0) - { - self.files_non_prefetch.push(obj.clone()); - return; - } - - let mut path = node.target().clone(); - let mut exact_match = true; - loop { - if let Some((idx, _, v)) = self.patterns.get_full_mut(&path) { - if exact_match { - *v = Some(obj.clone()); - } - if node.is_reg() { - self.files_prefetch.push((obj.clone(), idx)); - } else { - self.files_non_prefetch.push(obj.clone()); - } - return; - } - // If no exact match, try to match parent dir until root. - if !path.pop() { - self.files_non_prefetch.push(obj.clone()); - return; - } - exact_match = false; - } - } - - /// Get node Vector of files in the prefetch list and non-prefetch list. - /// The order of prefetch files is the same as the order of prefetch patterns. - /// The order of non-prefetch files is the same as the order of BFS traversal of file tree. - pub fn get_file_nodes(&self) -> (Vec, Vec) { - let mut p_files = self.files_prefetch.clone(); - p_files.sort_by_key(|k| k.1); - - let p_files = p_files.into_iter().map(|(s, _)| s).collect(); - - (p_files, self.files_non_prefetch.clone()) - } - - /// Get the number of ``valid`` prefetch rules. - pub fn fs_prefetch_rule_count(&self) -> u32 { - if self.policy == PrefetchPolicy::Fs { - self.patterns.values().filter(|v| v.is_some()).count() as u32 - } else { - 0 - } - } - - /// Generate filesystem layer prefetch list for RAFS v5. - pub fn get_v5_prefetch_table(&mut self) -> Option { - if self.policy == PrefetchPolicy::Fs { - let mut prefetch_table = RafsV5PrefetchTable::new(); - for i in self.patterns.values().filter_map(|v| v.clone()) { - let node = i.lock().unwrap(); - assert!(node.inode.ino() < u32::MAX as u64); - prefetch_table.add_entry(node.inode.ino() as u32); - } - Some(prefetch_table) - } else { - None - } - } - - /// Generate filesystem layer prefetch list for RAFS v6. - pub fn get_v6_prefetch_table(&mut self, meta_addr: u64) -> Option { - if self.policy == PrefetchPolicy::Fs { - let mut prefetch_table = RafsV6PrefetchTable::new(); - for i in self.patterns.values().filter_map(|v| v.clone()) { - let node = i.lock().unwrap(); - let ino = node.inode.ino(); - debug_assert!(ino > 0); - let nid = calculate_nid(node.v6_offset, meta_addr); - // 32bit nid can represent 128GB bootstrap, it is large enough, no need - // to worry about casting here - assert!(nid < u32::MAX as u64); - trace!( - "v6 prefetch table: map node index {} to offset {} nid {} path {:?} name {:?}", - ino, - node.v6_offset, - nid, - node.path(), - node.name() - ); - prefetch_table.add_entry(nid as u32); - } - Some(prefetch_table) - } else { - None - } - } - - /// Disable filesystem data prefetch. - pub fn disable(&mut self) { - self.disabled = true; - } - - /// Reset to initialization state. - pub fn clear(&mut self) { - self.disabled = false; - self.patterns.clear(); - self.files_prefetch.clear(); - self.files_non_prefetch.clear(); - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::core::node::NodeInfo; - use nydus_rafs::metadata::{inode::InodeWrapper, RafsVersion}; - use std::sync::Mutex; - - #[test] - fn test_generate_pattern() { - let input = vec![ - "/a/b".to_string(), - "/a/b/c".to_string(), - "/a/b/d".to_string(), - "/a/b/d/e".to_string(), - "/f".to_string(), - "/h/i".to_string(), - ]; - let patterns = generate_patterns(input).unwrap(); - assert_eq!(patterns.len(), 3); - assert!(patterns.contains_key(&PathBuf::from("/a/b"))); - assert!(patterns.contains_key(&PathBuf::from("/f"))); - assert!(patterns.contains_key(&PathBuf::from("/h/i"))); - assert!(!patterns.contains_key(&PathBuf::from("/"))); - assert!(!patterns.contains_key(&PathBuf::from("/a"))); - assert!(!patterns.contains_key(&PathBuf::from("/a/b/c"))); - assert!(!patterns.contains_key(&PathBuf::from("/a/b/d"))); - assert!(!patterns.contains_key(&PathBuf::from("/a/b/d/e"))); - assert!(!patterns.contains_key(&PathBuf::from("/k"))); - } - - #[test] - fn test_prefetch_policy() { - let policy = PrefetchPolicy::from_str("fs").unwrap(); - assert_eq!(policy, PrefetchPolicy::Fs); - let policy = PrefetchPolicy::from_str("blob").unwrap(); - assert_eq!(policy, PrefetchPolicy::Blob); - let policy = PrefetchPolicy::from_str("none").unwrap(); - assert_eq!(policy, PrefetchPolicy::None); - PrefetchPolicy::from_str("").unwrap_err(); - PrefetchPolicy::from_str("invalid").unwrap_err(); - } - - #[test] - fn test_prefetch() { - let input = vec![ - "/a/b".to_string(), - "/f".to_string(), - "/h/i".to_string(), - "/k".to_string(), - ]; - let patterns = generate_patterns(input).unwrap(); - let mut prefetch = Prefetch { - policy: PrefetchPolicy::Fs, - disabled: false, - patterns, - files_prefetch: Vec::with_capacity(10), - files_non_prefetch: Vec::with_capacity(10), - }; - let mut inode = InodeWrapper::new(RafsVersion::V6); - inode.set_mode(0o755 | libc::S_IFREG as u32); - inode.set_size(1); - - let info = NodeInfo::default(); - - let mut info1 = info.clone(); - info1.target = PathBuf::from("/f"); - let node1 = Node::new(inode.clone(), info1, 1); - let node1 = TreeNode::new(Mutex::from(node1)); - prefetch.insert(&node1, &node1.lock().unwrap()); - - let inode2 = inode.clone(); - let mut info2 = info.clone(); - info2.target = PathBuf::from("/a/b"); - let node2 = Node::new(inode2, info2, 1); - let node2 = TreeNode::new(Mutex::from(node2)); - prefetch.insert(&node2, &node2.lock().unwrap()); - - let inode3 = inode.clone(); - let mut info3 = info.clone(); - info3.target = PathBuf::from("/h/i/j"); - let node3 = Node::new(inode3, info3, 1); - let node3 = TreeNode::new(Mutex::from(node3)); - prefetch.insert(&node3, &node3.lock().unwrap()); - - let inode4 = inode.clone(); - let mut info4 = info.clone(); - info4.target = PathBuf::from("/z"); - let node4 = Node::new(inode4, info4, 1); - let node4 = TreeNode::new(Mutex::from(node4)); - prefetch.insert(&node4, &node4.lock().unwrap()); - - let inode5 = inode.clone(); - inode.set_mode(0o755 | libc::S_IFDIR as u32); - inode.set_size(0); - let mut info5 = info; - info5.target = PathBuf::from("/a/b/d"); - let node5 = Node::new(inode5, info5, 1); - let node5 = TreeNode::new(Mutex::from(node5)); - prefetch.insert(&node5, &node5.lock().unwrap()); - - // node1, node2 - assert_eq!(prefetch.fs_prefetch_rule_count(), 2); - - let (pre, non_pre) = prefetch.get_file_nodes(); - assert_eq!(pre.len(), 4); - assert_eq!(non_pre.len(), 1); - let pre_str: Vec = pre - .iter() - .map(|n| n.lock().unwrap().target().to_str().unwrap().to_owned()) - .collect(); - assert_eq!(pre_str, vec!["/a/b", "/a/b/d", "/f", "/h/i/j"]); - let non_pre_str: Vec = non_pre - .iter() - .map(|n| n.lock().unwrap().target().to_str().unwrap().to_owned()) - .collect(); - assert_eq!(non_pre_str, vec!["/z"]); - - prefetch.clear(); - assert_eq!(prefetch.fs_prefetch_rule_count(), 0); - let (pre, non_pre) = prefetch.get_file_nodes(); - assert_eq!(pre.len(), 0); - assert_eq!(non_pre.len(), 0); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2023 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::path::PathBuf; +use std::str::FromStr; + +use anyhow::{anyhow, Context, Error, Result}; +use indexmap::IndexMap; +use nydus_rafs::metadata::layout::v5::RafsV5PrefetchTable; +use nydus_rafs::metadata::layout::v6::{calculate_nid, RafsV6PrefetchTable}; + +use super::node::Node; +use crate::core::tree::TreeNode; + +/// Filesystem data prefetch policy. +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum PrefetchPolicy { + None, + /// Prefetch will be issued from Fs layer, which leverages inode/chunkinfo to prefetch data + /// from blob no matter where it resides(OSS/Localfs). Basically, it is willing to cache the + /// data into blobcache(if exists). It's more nimble. With this policy applied, image builder + /// currently puts prefetch files' data into a continuous region within blob which behaves very + /// similar to `Blob` policy. + Fs, + /// Prefetch will be issued directly from backend/blob layer + Blob, +} + +impl Default for PrefetchPolicy { + fn default() -> Self { + Self::None + } +} + +impl FromStr for PrefetchPolicy { + type Err = Error; + fn from_str(s: &str) -> Result { + match s { + "none" => Ok(Self::None), + "fs" => Ok(Self::Fs), + "blob" => Ok(Self::Blob), + _ => Err(anyhow!("invalid prefetch policy")), + } + } +} + +/// Gather prefetch patterns from STDIN line by line. +/// +/// Input format: +/// printf "/relative/path/to/rootfs/1\n/relative/path/to/rootfs/2" +/// +/// It does not guarantee that specified path exist in local filesystem because the specified path +/// may exist in parent image/layers. +fn get_patterns() -> Result>> { + let stdin = std::io::stdin(); + let mut patterns = Vec::new(); + + loop { + let mut file = String::new(); + let size = stdin + .read_line(&mut file) + .context("failed to read prefetch pattern")?; + if size == 0 { + return generate_patterns(patterns); + } + patterns.push(file); + } +} + +fn generate_patterns(input: Vec) -> Result>> { + let mut patterns = IndexMap::new(); + + for file in &input { + let file_trimmed: PathBuf = file.trim().into(); + // Sanity check for the list format. + if !file_trimmed.is_absolute() { + warn!( + "Illegal file path {} specified, should be absolute path", + file + ); + continue; + } + + let mut current_path = file_trimmed.clone(); + let mut skip = patterns.contains_key(¤t_path); + while !skip && current_path.pop() { + if patterns.contains_key(¤t_path) { + skip = true; + break; + } + } + + if skip { + warn!( + "prefetch pattern {} is covered by previous pattern and thus omitted", + file + ); + } else { + debug!( + "prefetch pattern: {}, trimmed file name {:?}", + file, file_trimmed + ); + patterns.insert(file_trimmed, None); + } + } + + Ok(patterns) +} + +/// Manage filesystem data prefetch configuration and state for builder. +#[derive(Default, Clone)] +pub struct Prefetch { + pub policy: PrefetchPolicy, + + pub disabled: bool, + + // Patterns to generate prefetch inode array, which will be put into the prefetch array + // in the RAFS bootstrap. It may access directory or file inodes. + patterns: IndexMap>, + + // File list to help optimizing layout of data blobs. + // Files from this list may be put at the head of data blob for better prefetch performance, + // The index of matched prefetch pattern is stored in `usize`, + // which will help to sort the prefetch files in the final layout. + // It only stores regular files. + files_prefetch: Vec<(TreeNode, usize)>, + + // It stores all non-prefetch files that is not stored in `prefetch_files`, + // including regular files, dirs, symlinks, etc., + // with the same order of BFS traversal of file tree. + files_non_prefetch: Vec, +} + +impl Prefetch { + /// Create a new instance of [Prefetch]. + pub fn new(policy: PrefetchPolicy) -> Result { + let patterns = if policy != PrefetchPolicy::None { + get_patterns().context("failed to get prefetch patterns")? + } else { + IndexMap::new() + }; + + Ok(Self { + policy, + disabled: false, + patterns, + files_prefetch: Vec::with_capacity(10000), + files_non_prefetch: Vec::with_capacity(10000), + }) + } + + /// Insert node into the prefetch Vector if it matches prefetch rules, + /// while recording the index of matched prefetch pattern, + /// or insert it into non-prefetch Vector. + pub fn insert(&mut self, obj: &TreeNode, node: &Node) { + // Newly created root inode of this rafs has zero size + if self.policy == PrefetchPolicy::None + || self.disabled + || (node.inode.is_reg() && node.inode.size() == 0) + { + self.files_non_prefetch.push(obj.clone()); + return; + } + + let mut path = node.target().clone(); + let mut exact_match = true; + loop { + if let Some((idx, _, v)) = self.patterns.get_full_mut(&path) { + if exact_match { + *v = Some(obj.clone()); + } + if node.is_reg() { + self.files_prefetch.push((obj.clone(), idx)); + } else { + self.files_non_prefetch.push(obj.clone()); + } + return; + } + // If no exact match, try to match parent dir until root. + if !path.pop() { + self.files_non_prefetch.push(obj.clone()); + return; + } + exact_match = false; + } + } + + /// Get node Vector of files in the prefetch list and non-prefetch list. + /// The order of prefetch files is the same as the order of prefetch patterns. + /// The order of non-prefetch files is the same as the order of BFS traversal of file tree. + pub fn get_file_nodes(&self) -> (Vec, Vec) { + let mut p_files = self.files_prefetch.clone(); + p_files.sort_by_key(|k| k.1); + + let p_files = p_files.into_iter().map(|(s, _)| s).collect(); + + (p_files, self.files_non_prefetch.clone()) + } + + /// Get the number of ``valid`` prefetch rules. + pub fn fs_prefetch_rule_count(&self) -> u32 { + if self.policy == PrefetchPolicy::Fs { + self.patterns.values().filter(|v| v.is_some()).count() as u32 + } else { + 0 + } + } + + /// Generate filesystem layer prefetch list for RAFS v5. + pub fn get_v5_prefetch_table(&mut self) -> Option { + if self.policy == PrefetchPolicy::Fs { + let mut prefetch_table = RafsV5PrefetchTable::new(); + for i in self.patterns.values().filter_map(|v| v.clone()) { + let node = i.lock().unwrap(); + assert!(node.inode.ino() < u32::MAX as u64); + prefetch_table.add_entry(node.inode.ino() as u32); + } + Some(prefetch_table) + } else { + None + } + } + + /// Generate filesystem layer prefetch list for RAFS v6. + pub fn get_v6_prefetch_table(&mut self, meta_addr: u64) -> Option { + if self.policy == PrefetchPolicy::Fs { + let mut prefetch_table = RafsV6PrefetchTable::new(); + for i in self.patterns.values().filter_map(|v| v.clone()) { + let node = i.lock().unwrap(); + let ino = node.inode.ino(); + debug_assert!(ino > 0); + let nid = calculate_nid(node.v6_offset, meta_addr); + // 32bit nid can represent 128GB bootstrap, it is large enough, no need + // to worry about casting here + assert!(nid < u32::MAX as u64); + trace!( + "v6 prefetch table: map node index {} to offset {} nid {} path {:?} name {:?}", + ino, + node.v6_offset, + nid, + node.path(), + node.name() + ); + prefetch_table.add_entry(nid as u32); + } + Some(prefetch_table) + } else { + None + } + } + + /// Disable filesystem data prefetch. + pub fn disable(&mut self) { + self.disabled = true; + } + + /// Reset to initialization state. + pub fn clear(&mut self) { + self.disabled = false; + self.patterns.clear(); + self.files_prefetch.clear(); + self.files_non_prefetch.clear(); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::core::node::NodeInfo; + use nydus_rafs::metadata::{inode::InodeWrapper, RafsVersion}; + use std::sync::Mutex; + + #[test] + fn test_generate_pattern() { + let input = vec![ + "/a/b".to_string(), + "/a/b/c".to_string(), + "/a/b/d".to_string(), + "/a/b/d/e".to_string(), + "/f".to_string(), + "/h/i".to_string(), + ]; + let patterns = generate_patterns(input).unwrap(); + assert_eq!(patterns.len(), 3); + assert!(patterns.contains_key(&PathBuf::from("/a/b"))); + assert!(patterns.contains_key(&PathBuf::from("/f"))); + assert!(patterns.contains_key(&PathBuf::from("/h/i"))); + assert!(!patterns.contains_key(&PathBuf::from("/"))); + assert!(!patterns.contains_key(&PathBuf::from("/a"))); + assert!(!patterns.contains_key(&PathBuf::from("/a/b/c"))); + assert!(!patterns.contains_key(&PathBuf::from("/a/b/d"))); + assert!(!patterns.contains_key(&PathBuf::from("/a/b/d/e"))); + assert!(!patterns.contains_key(&PathBuf::from("/k"))); + } + + #[test] + fn test_prefetch_policy() { + let policy = PrefetchPolicy::from_str("fs").unwrap(); + assert_eq!(policy, PrefetchPolicy::Fs); + let policy = PrefetchPolicy::from_str("blob").unwrap(); + assert_eq!(policy, PrefetchPolicy::Blob); + let policy = PrefetchPolicy::from_str("none").unwrap(); + assert_eq!(policy, PrefetchPolicy::None); + PrefetchPolicy::from_str("").unwrap_err(); + PrefetchPolicy::from_str("invalid").unwrap_err(); + } + + #[test] + fn test_prefetch() { + let input = vec![ + "/a/b".to_string(), + "/f".to_string(), + "/h/i".to_string(), + "/k".to_string(), + ]; + let patterns = generate_patterns(input).unwrap(); + let mut prefetch = Prefetch { + policy: PrefetchPolicy::Fs, + disabled: false, + patterns, + files_prefetch: Vec::with_capacity(10), + files_non_prefetch: Vec::with_capacity(10), + }; + let mut inode = InodeWrapper::new(RafsVersion::V6); + inode.set_mode(0o755 | libc::S_IFREG as u32); + inode.set_size(1); + + let info = NodeInfo::default(); + + let mut info1 = info.clone(); + info1.target = PathBuf::from("/f"); + let node1 = Node::new(inode.clone(), info1, 1); + let node1 = TreeNode::new(Mutex::from(node1)); + prefetch.insert(&node1, &node1.lock().unwrap()); + + let inode2 = inode.clone(); + let mut info2 = info.clone(); + info2.target = PathBuf::from("/a/b"); + let node2 = Node::new(inode2, info2, 1); + let node2 = TreeNode::new(Mutex::from(node2)); + prefetch.insert(&node2, &node2.lock().unwrap()); + + let inode3 = inode.clone(); + let mut info3 = info.clone(); + info3.target = PathBuf::from("/h/i/j"); + let node3 = Node::new(inode3, info3, 1); + let node3 = TreeNode::new(Mutex::from(node3)); + prefetch.insert(&node3, &node3.lock().unwrap()); + + let inode4 = inode.clone(); + let mut info4 = info.clone(); + info4.target = PathBuf::from("/z"); + let node4 = Node::new(inode4, info4, 1); + let node4 = TreeNode::new(Mutex::from(node4)); + prefetch.insert(&node4, &node4.lock().unwrap()); + + let inode5 = inode.clone(); + inode.set_mode(0o755 | libc::S_IFDIR as u32); + inode.set_size(0); + let mut info5 = info; + info5.target = PathBuf::from("/a/b/d"); + let node5 = Node::new(inode5, info5, 1); + let node5 = TreeNode::new(Mutex::from(node5)); + prefetch.insert(&node5, &node5.lock().unwrap()); + + // node1, node2 + assert_eq!(prefetch.fs_prefetch_rule_count(), 2); + + let (pre, non_pre) = prefetch.get_file_nodes(); + assert_eq!(pre.len(), 4); + assert_eq!(non_pre.len(), 1); + let pre_str: Vec = pre + .iter() + .map(|n| n.lock().unwrap().target().to_str().unwrap().to_owned()) + .collect(); + assert_eq!(pre_str, vec!["/a/b", "/a/b/d", "/f", "/h/i/j"]); + let non_pre_str: Vec = non_pre + .iter() + .map(|n| n.lock().unwrap().target().to_str().unwrap().to_owned()) + .collect(); + assert_eq!(non_pre_str, vec!["/z"]); + + prefetch.clear(); + assert_eq!(prefetch.fs_prefetch_rule_count(), 0); + let (pre, non_pre) = prefetch.get_file_nodes(); + assert_eq!(pre.len(), 0); + assert_eq!(non_pre.len(), 0); + } +} diff --git a/builder/src/core/tree.rs b/builder/src/core/tree.rs index d701c2bbd9f..b2b9e3dcc45 100644 --- a/builder/src/core/tree.rs +++ b/builder/src/core/tree.rs @@ -1,503 +1,503 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright 2023 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! An in-memory tree structure to maintain information for filesystem metadata. -//! -//! Steps to build the first layer for a Rafs image: -//! - Build the upper tree (FileSystemTree) from the source directory. -//! - Traverse the upper tree (FileSystemTree) to dump bootstrap and data blobs. -//! -//! Steps to build the second and following on layers for a Rafs image: -//! - Build the upper tree (FileSystemTree) from the source directory. -//! - Load the lower tree (MetadataTree) from a metadata blob. -//! - Merge the final tree (OverlayTree) by applying the upper tree (FileSystemTree) to the -//! lower tree (MetadataTree). -//! - Traverse the merged tree (OverlayTree) to dump bootstrap and data blobs. - -use std::ffi::OsString; -use std::os::unix::ffi::OsStrExt; -use std::path::{Path, PathBuf}; -use std::rc::Rc; -use std::sync::{Arc, Mutex, MutexGuard}; - -use anyhow::{bail, Result}; -use nydus_rafs::metadata::chunk::ChunkWrapper; -use nydus_rafs::metadata::inode::InodeWrapper; -use nydus_rafs::metadata::layout::{bytes_to_os_str, RafsXAttrs}; -use nydus_rafs::metadata::{Inode, RafsInodeExt, RafsSuper}; -use nydus_utils::{lazy_drop, root_tracer, timing_tracer}; - -use super::node::{ChunkSource, Node, NodeChunk, NodeInfo}; -use super::overlay::{Overlay, WhiteoutType}; -use crate::core::overlay::OVERLAYFS_WHITEOUT_OPAQUE; -use crate::{BuildContext, ChunkDict}; - -/// Type alias for tree internal node. -pub type TreeNode = Rc>; - -/// An in-memory tree structure to maintain information and topology of filesystem nodes. -#[derive(Clone)] -pub struct Tree { - /// Filesystem node. - pub node: TreeNode, - /// Cached base name. - name: Vec, - /// Children tree nodes. - pub children: Vec, -} - -impl Tree { - /// Create a new instance of `Tree` from a filesystem node. - pub fn new(node: Node) -> Self { - let name = node.name().as_bytes().to_vec(); - Tree { - node: Rc::new(Mutex::new(node)), - name, - children: Vec::new(), - } - } - - /// Load a `Tree` from a bootstrap file, and optionally caches chunk information. - pub fn from_bootstrap(rs: &RafsSuper, chunk_dict: &mut T) -> Result { - let tree_builder = MetadataTreeBuilder::new(rs); - let root_ino = rs.superblock.root_ino(); - let root_inode = rs.get_extended_inode(root_ino, true)?; - let root_node = MetadataTreeBuilder::parse_node(rs, root_inode, PathBuf::from("/"))?; - let mut tree = Tree::new(root_node); - - tree.children = timing_tracer!( - { tree_builder.load_children(root_ino, Option::::None, chunk_dict, true,) }, - "load_tree_from_bootstrap" - )?; - - Ok(tree) - } - - /// Get name of the tree node. - pub fn name(&self) -> &[u8] { - &self.name - } - - /// Set `Node` associated with the tree node. - pub fn set_node(&mut self, node: Node) { - self.node = Rc::new(Mutex::new(node)); - } - - /// Get mutex guard to access the associated `Node` object. - pub fn lock_node(&self) -> MutexGuard { - self.node.lock().unwrap() - } - - /// Walk all nodes in DFS mode. - pub fn walk_dfs(&self, pre: &mut F1, post: &mut F2) -> Result<()> - where - F1: FnMut(&Tree) -> Result<()>, - F2: FnMut(&Tree) -> Result<()>, - { - pre(self)?; - for child in &self.children { - child.walk_dfs(pre, post)?; - } - post(self)?; - - Ok(()) - } - - /// Walk all nodes in pre DFS mode. - pub fn walk_dfs_pre(&self, cb: &mut F) -> Result<()> - where - F: FnMut(&Tree) -> Result<()>, - { - self.walk_dfs(cb, &mut |_t| Ok(())) - } - - /// Walk all nodes in post DFS mode. - pub fn walk_dfs_post(&self, cb: &mut F) -> Result<()> - where - F: FnMut(&Tree) -> Result<()>, - { - self.walk_dfs(&mut |_t| Ok(()), cb) - } - - /// Walk the tree in BFS mode. - pub fn walk_bfs(&self, handle_self: bool, cb: &mut F) -> Result<()> - where - F: FnMut(&Tree) -> Result<()>, - { - if handle_self { - cb(self)?; - } - - let mut dirs = Vec::with_capacity(32); - for child in &self.children { - cb(child)?; - if child.lock_node().is_dir() { - dirs.push(child); - } - } - for dir in dirs { - dir.walk_bfs(false, cb)?; - } - - Ok(()) - } - - /// Insert a new child node into the tree. - pub fn insert_child(&mut self, child: Tree) { - if let Err(idx) = self - .children - .binary_search_by_key(&&child.name, |n| &n.name) - { - self.children.insert(idx, child); - } - } - - /// Get index of child node with specified `name`. - pub fn get_child_idx(&self, name: &[u8]) -> Option { - self.children.binary_search_by_key(&name, |n| &n.name).ok() - } - - /// Get the tree node corresponding to the path. - pub fn get_node(&self, path: &Path) -> Option<&Tree> { - let target_vec = Node::generate_target_vec(path); - assert!(!target_vec.is_empty()); - let mut tree = self; - for name in &target_vec[1..] { - match tree.get_child_idx(name.as_bytes()) { - Some(idx) => tree = &tree.children[idx], - None => return None, - } - } - Some(tree) - } - - /// Merge the upper layer tree into the lower layer tree, applying whiteout rules. - pub fn merge_overaly(&mut self, ctx: &BuildContext, upper: Tree) -> Result<()> { - assert_eq!(self.name, "/".as_bytes()); - assert_eq!(upper.name, "/".as_bytes()); - - // Handle the root node. - upper.lock_node().overlay = Overlay::UpperModification; - self.node = upper.node.clone(); - self.merge_children(ctx, &upper)?; - lazy_drop(upper); - - Ok(()) - } - - fn merge_children(&mut self, ctx: &BuildContext, upper: &Tree) -> Result<()> { - // Handle whiteout nodes in the first round, and handle other nodes in the second round. - let mut modified = Vec::with_capacity(upper.children.len()); - for u in upper.children.iter() { - let mut u_node = u.lock_node(); - match u_node.whiteout_type(ctx.whiteout_spec) { - Some(WhiteoutType::OciRemoval) => { - if let Some(origin_name) = u_node.origin_name(WhiteoutType::OciRemoval) { - if let Some(idx) = self.get_child_idx(origin_name.as_bytes()) { - self.children.remove(idx); - } - } - } - Some(WhiteoutType::OciOpaque) => { - self.children.clear(); - } - Some(WhiteoutType::OverlayFsRemoval) => { - if let Some(idx) = self.get_child_idx(&u.name) { - self.children.remove(idx); - } - } - Some(WhiteoutType::OverlayFsOpaque) => { - if let Some(idx) = self.get_child_idx(&u.name) { - self.children[idx].children.clear(); - } - u_node.remove_xattr(&OsString::from(OVERLAYFS_WHITEOUT_OPAQUE)); - modified.push(u); - } - None => modified.push(u), - } - } - - let mut dirs = Vec::new(); - for u in modified { - let mut u_node = u.lock_node(); - if let Some(idx) = self.get_child_idx(&u.name) { - u_node.overlay = Overlay::UpperModification; - self.children[idx].node = u.node.clone(); - } else { - u_node.overlay = Overlay::UpperAddition; - self.insert_child(Tree { - node: u.node.clone(), - name: u.name.clone(), - children: vec![], - }); - } - if u_node.is_dir() { - dirs.push(u); - } - } - for dir in dirs { - if let Some(idx) = self.get_child_idx(&dir.name) { - self.children[idx].merge_children(ctx, dir)?; - } else { - bail!("builder: can not find directory in merged tree"); - } - } - - Ok(()) - } -} - -pub struct MetadataTreeBuilder<'a> { - rs: &'a RafsSuper, -} - -impl<'a> MetadataTreeBuilder<'a> { - fn new(rs: &'a RafsSuper) -> Self { - Self { rs } - } - - /// Build node tree by loading bootstrap file - fn load_children>( - &self, - ino: Inode, - parent: Option

, - chunk_dict: &mut T, - validate_digest: bool, - ) -> Result> { - let inode = self.rs.get_extended_inode(ino, validate_digest)?; - if !inode.is_dir() { - return Ok(Vec::new()); - } - - let parent_path = if let Some(parent) = parent { - parent.as_ref().join(inode.name()) - } else { - PathBuf::from("/") - }; - - let blobs = self.rs.superblock.get_blob_infos(); - let child_count = inode.get_child_count(); - let mut children = Vec::with_capacity(child_count as usize); - for idx in 0..child_count { - let child = inode.get_child_by_index(idx)?; - let child_path = parent_path.join(child.name()); - let child = Self::parse_node(self.rs, child.clone(), child_path)?; - - if child.is_reg() { - for chunk in &child.chunks { - let blob_idx = chunk.inner.blob_index(); - if let Some(blob) = blobs.get(blob_idx as usize) { - chunk_dict.add_chunk(chunk.inner.clone(), blob.digester()); - } - } - } - - let child = Tree::new(child); - children.push(child); - } - children.sort_unstable_by(|a, b| a.name.cmp(&b.name)); - - for child in children.iter_mut() { - let child_node = child.lock_node(); - if child_node.is_dir() { - let child_ino = child_node.inode.ino(); - drop(child_node); - child.children = - self.load_children(child_ino, Some(&parent_path), chunk_dict, validate_digest)?; - } - } - - Ok(children) - } - - /// Convert a `RafsInode` object to an in-memory `Node` object. - pub fn parse_node(rs: &RafsSuper, inode: Arc, path: PathBuf) -> Result { - let chunks = if inode.is_reg() { - let chunk_count = inode.get_chunk_count(); - let mut chunks = Vec::with_capacity(chunk_count as usize); - for i in 0..chunk_count { - let cki = inode.get_chunk_info(i)?; - chunks.push(NodeChunk { - source: ChunkSource::Parent, - inner: Arc::new(ChunkWrapper::from_chunk_info(cki)), - }); - } - chunks - } else { - Vec::new() - }; - - let symlink = if inode.is_symlink() { - Some(inode.get_symlink()?) - } else { - None - }; - - let mut xattrs = RafsXAttrs::new(); - for name in inode.get_xattrs()? { - let name = bytes_to_os_str(&name); - let value = inode.get_xattr(name)?; - xattrs.add(name.to_os_string(), value.unwrap_or_default())?; - } - - // Nodes loaded from bootstrap will only be used as `Overlay::Lower`, so make `dev` invalid - // to avoid breaking hardlink detecting logic. - let src_dev = u64::MAX; - let rdev = inode.rdev() as u64; - let inode = InodeWrapper::from_inode_info(inode.clone()); - let source = PathBuf::from("/"); - let target = Node::generate_target(&path, &source); - let target_vec = Node::generate_target_vec(&target); - let info = NodeInfo { - explicit_uidgid: rs.meta.explicit_uidgid(), - src_ino: inode.ino(), - src_dev, - rdev, - path, - source, - target, - target_vec, - symlink, - xattrs, - v6_force_extended_inode: false, - }; - - Ok(Node { - info: Arc::new(info), - index: 0, - layer_idx: 0, - overlay: Overlay::Lower, - inode, - chunks, - v6_offset: 0, - v6_dirents: Vec::new(), - v6_datalayout: 0, - v6_compact_inode: false, - v6_dirents_offset: 0, - }) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use nydus_rafs::metadata::RafsVersion; - use nydus_storage::RAFS_DEFAULT_CHUNK_SIZE; - use vmm_sys_util::tempdir::TempDir; - use vmm_sys_util::tempfile::TempFile; - - #[test] - fn test_set_lock_node() { - let tmpdir = TempDir::new().unwrap(); - let tmpfile = TempFile::new_in(tmpdir.as_path()).unwrap(); - let node = Node::from_fs_object( - RafsVersion::V6, - tmpdir.as_path().to_path_buf(), - tmpfile.as_path().to_path_buf(), - Overlay::UpperAddition, - RAFS_DEFAULT_CHUNK_SIZE as u32, - true, - false, - ) - .unwrap(); - let mut tree = Tree::new(node); - assert_eq!(tree.name, tmpfile.as_path().file_name().unwrap().as_bytes()); - let node1 = tree.lock_node(); - drop(node1); - - let tmpfile = TempFile::new_in(tmpdir.as_path()).unwrap(); - let node = Node::from_fs_object( - RafsVersion::V6, - tmpdir.as_path().to_path_buf(), - tmpfile.as_path().to_path_buf(), - Overlay::UpperAddition, - RAFS_DEFAULT_CHUNK_SIZE as u32, - true, - false, - ) - .unwrap(); - tree.set_node(node); - let node2 = tree.lock_node(); - assert_eq!(node2.name(), tmpfile.as_path().file_name().unwrap()); - } - - #[test] - fn test_walk_tree() { - let tmpdir = TempDir::new().unwrap(); - let tmpfile = TempFile::new_in(tmpdir.as_path()).unwrap(); - let node = Node::from_fs_object( - RafsVersion::V6, - tmpdir.as_path().to_path_buf(), - tmpfile.as_path().to_path_buf(), - Overlay::UpperAddition, - RAFS_DEFAULT_CHUNK_SIZE as u32, - true, - false, - ) - .unwrap(); - let mut tree = Tree::new(node); - - let tmpfile2 = TempFile::new_in(tmpdir.as_path()).unwrap(); - let node = Node::from_fs_object( - RafsVersion::V6, - tmpdir.as_path().to_path_buf(), - tmpfile2.as_path().to_path_buf(), - Overlay::UpperAddition, - RAFS_DEFAULT_CHUNK_SIZE as u32, - true, - false, - ) - .unwrap(); - let tree2 = Tree::new(node); - tree.insert_child(tree2); - - let tmpfile3 = TempFile::new_in(tmpdir.as_path()).unwrap(); - let node = Node::from_fs_object( - RafsVersion::V6, - tmpdir.as_path().to_path_buf(), - tmpfile3.as_path().to_path_buf(), - Overlay::UpperAddition, - RAFS_DEFAULT_CHUNK_SIZE as u32, - true, - false, - ) - .unwrap(); - let tree3 = Tree::new(node); - tree.insert_child(tree3); - - let mut count = 0; - tree.walk_bfs(true, &mut |_n| -> Result<()> { - count += 1; - Ok(()) - }) - .unwrap(); - assert_eq!(count, 3); - - let mut count = 0; - tree.walk_bfs(false, &mut |_n| -> Result<()> { - count += 1; - Ok(()) - }) - .unwrap(); - assert_eq!(count, 2); - - let mut count = 0; - tree.walk_bfs(true, &mut |_n| -> Result<()> { - count += 1; - bail!("test") - }) - .unwrap_err(); - assert_eq!(count, 1); - - let idx = tree - .get_child_idx(tmpfile2.as_path().file_name().unwrap().as_bytes()) - .unwrap(); - assert!(idx == 0 || idx == 1); - let idx = tree - .get_child_idx(tmpfile3.as_path().file_name().unwrap().as_bytes()) - .unwrap(); - assert!(idx == 0 || idx == 1); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright 2023 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! An in-memory tree structure to maintain information for filesystem metadata. +//! +//! Steps to build the first layer for a Rafs image: +//! - Build the upper tree (FileSystemTree) from the source directory. +//! - Traverse the upper tree (FileSystemTree) to dump bootstrap and data blobs. +//! +//! Steps to build the second and following on layers for a Rafs image: +//! - Build the upper tree (FileSystemTree) from the source directory. +//! - Load the lower tree (MetadataTree) from a metadata blob. +//! - Merge the final tree (OverlayTree) by applying the upper tree (FileSystemTree) to the +//! lower tree (MetadataTree). +//! - Traverse the merged tree (OverlayTree) to dump bootstrap and data blobs. + +use std::ffi::OsString; +use std::os::unix::ffi::OsStrExt; +use std::path::{Path, PathBuf}; +use std::rc::Rc; +use std::sync::{Arc, Mutex, MutexGuard}; + +use anyhow::{bail, Result}; +use nydus_rafs::metadata::chunk::ChunkWrapper; +use nydus_rafs::metadata::inode::InodeWrapper; +use nydus_rafs::metadata::layout::{bytes_to_os_str, RafsXAttrs}; +use nydus_rafs::metadata::{Inode, RafsInodeExt, RafsSuper}; +use nydus_utils::{lazy_drop, root_tracer, timing_tracer}; + +use super::node::{ChunkSource, Node, NodeChunk, NodeInfo}; +use super::overlay::{Overlay, WhiteoutType}; +use crate::core::overlay::OVERLAYFS_WHITEOUT_OPAQUE; +use crate::{BuildContext, ChunkDict}; + +/// Type alias for tree internal node. +pub type TreeNode = Rc>; + +/// An in-memory tree structure to maintain information and topology of filesystem nodes. +#[derive(Clone)] +pub struct Tree { + /// Filesystem node. + pub node: TreeNode, + /// Cached base name. + name: Vec, + /// Children tree nodes. + pub children: Vec, +} + +impl Tree { + /// Create a new instance of `Tree` from a filesystem node. + pub fn new(node: Node) -> Self { + let name = node.name().as_bytes().to_vec(); + Tree { + node: Rc::new(Mutex::new(node)), + name, + children: Vec::new(), + } + } + + /// Load a `Tree` from a bootstrap file, and optionally caches chunk information. + pub fn from_bootstrap(rs: &RafsSuper, chunk_dict: &mut T) -> Result { + let tree_builder = MetadataTreeBuilder::new(rs); + let root_ino = rs.superblock.root_ino(); + let root_inode = rs.get_extended_inode(root_ino, true)?; + let root_node = MetadataTreeBuilder::parse_node(rs, root_inode, PathBuf::from("/"))?; + let mut tree = Tree::new(root_node); + + tree.children = timing_tracer!( + { tree_builder.load_children(root_ino, Option::::None, chunk_dict, true,) }, + "load_tree_from_bootstrap" + )?; + + Ok(tree) + } + + /// Get name of the tree node. + pub fn name(&self) -> &[u8] { + &self.name + } + + /// Set `Node` associated with the tree node. + pub fn set_node(&mut self, node: Node) { + self.node = Rc::new(Mutex::new(node)); + } + + /// Get mutex guard to access the associated `Node` object. + pub fn lock_node(&self) -> MutexGuard { + self.node.lock().unwrap() + } + + /// Walk all nodes in DFS mode. + pub fn walk_dfs(&self, pre: &mut F1, post: &mut F2) -> Result<()> + where + F1: FnMut(&Tree) -> Result<()>, + F2: FnMut(&Tree) -> Result<()>, + { + pre(self)?; + for child in &self.children { + child.walk_dfs(pre, post)?; + } + post(self)?; + + Ok(()) + } + + /// Walk all nodes in pre DFS mode. + pub fn walk_dfs_pre(&self, cb: &mut F) -> Result<()> + where + F: FnMut(&Tree) -> Result<()>, + { + self.walk_dfs(cb, &mut |_t| Ok(())) + } + + /// Walk all nodes in post DFS mode. + pub fn walk_dfs_post(&self, cb: &mut F) -> Result<()> + where + F: FnMut(&Tree) -> Result<()>, + { + self.walk_dfs(&mut |_t| Ok(()), cb) + } + + /// Walk the tree in BFS mode. + pub fn walk_bfs(&self, handle_self: bool, cb: &mut F) -> Result<()> + where + F: FnMut(&Tree) -> Result<()>, + { + if handle_self { + cb(self)?; + } + + let mut dirs = Vec::with_capacity(32); + for child in &self.children { + cb(child)?; + if child.lock_node().is_dir() { + dirs.push(child); + } + } + for dir in dirs { + dir.walk_bfs(false, cb)?; + } + + Ok(()) + } + + /// Insert a new child node into the tree. + pub fn insert_child(&mut self, child: Tree) { + if let Err(idx) = self + .children + .binary_search_by_key(&&child.name, |n| &n.name) + { + self.children.insert(idx, child); + } + } + + /// Get index of child node with specified `name`. + pub fn get_child_idx(&self, name: &[u8]) -> Option { + self.children.binary_search_by_key(&name, |n| &n.name).ok() + } + + /// Get the tree node corresponding to the path. + pub fn get_node(&self, path: &Path) -> Option<&Tree> { + let target_vec = Node::generate_target_vec(path); + assert!(!target_vec.is_empty()); + let mut tree = self; + for name in &target_vec[1..] { + match tree.get_child_idx(name.as_bytes()) { + Some(idx) => tree = &tree.children[idx], + None => return None, + } + } + Some(tree) + } + + /// Merge the upper layer tree into the lower layer tree, applying whiteout rules. + pub fn merge_overaly(&mut self, ctx: &BuildContext, upper: Tree) -> Result<()> { + assert_eq!(self.name, "/".as_bytes()); + assert_eq!(upper.name, "/".as_bytes()); + + // Handle the root node. + upper.lock_node().overlay = Overlay::UpperModification; + self.node = upper.node.clone(); + self.merge_children(ctx, &upper)?; + lazy_drop(upper); + + Ok(()) + } + + fn merge_children(&mut self, ctx: &BuildContext, upper: &Tree) -> Result<()> { + // Handle whiteout nodes in the first round, and handle other nodes in the second round. + let mut modified = Vec::with_capacity(upper.children.len()); + for u in upper.children.iter() { + let mut u_node = u.lock_node(); + match u_node.whiteout_type(ctx.whiteout_spec) { + Some(WhiteoutType::OciRemoval) => { + if let Some(origin_name) = u_node.origin_name(WhiteoutType::OciRemoval) { + if let Some(idx) = self.get_child_idx(origin_name.as_bytes()) { + self.children.remove(idx); + } + } + } + Some(WhiteoutType::OciOpaque) => { + self.children.clear(); + } + Some(WhiteoutType::OverlayFsRemoval) => { + if let Some(idx) = self.get_child_idx(&u.name) { + self.children.remove(idx); + } + } + Some(WhiteoutType::OverlayFsOpaque) => { + if let Some(idx) = self.get_child_idx(&u.name) { + self.children[idx].children.clear(); + } + u_node.remove_xattr(&OsString::from(OVERLAYFS_WHITEOUT_OPAQUE)); + modified.push(u); + } + None => modified.push(u), + } + } + + let mut dirs = Vec::new(); + for u in modified { + let mut u_node = u.lock_node(); + if let Some(idx) = self.get_child_idx(&u.name) { + u_node.overlay = Overlay::UpperModification; + self.children[idx].node = u.node.clone(); + } else { + u_node.overlay = Overlay::UpperAddition; + self.insert_child(Tree { + node: u.node.clone(), + name: u.name.clone(), + children: vec![], + }); + } + if u_node.is_dir() { + dirs.push(u); + } + } + for dir in dirs { + if let Some(idx) = self.get_child_idx(&dir.name) { + self.children[idx].merge_children(ctx, dir)?; + } else { + bail!("builder: can not find directory in merged tree"); + } + } + + Ok(()) + } +} + +pub struct MetadataTreeBuilder<'a> { + rs: &'a RafsSuper, +} + +impl<'a> MetadataTreeBuilder<'a> { + fn new(rs: &'a RafsSuper) -> Self { + Self { rs } + } + + /// Build node tree by loading bootstrap file + fn load_children>( + &self, + ino: Inode, + parent: Option

, + chunk_dict: &mut T, + validate_digest: bool, + ) -> Result> { + let inode = self.rs.get_extended_inode(ino, validate_digest)?; + if !inode.is_dir() { + return Ok(Vec::new()); + } + + let parent_path = if let Some(parent) = parent { + parent.as_ref().join(inode.name()) + } else { + PathBuf::from("/") + }; + + let blobs = self.rs.superblock.get_blob_infos(); + let child_count = inode.get_child_count(); + let mut children = Vec::with_capacity(child_count as usize); + for idx in 0..child_count { + let child = inode.get_child_by_index(idx)?; + let child_path = parent_path.join(child.name()); + let child = Self::parse_node(self.rs, child.clone(), child_path)?; + + if child.is_reg() { + for chunk in &child.chunks { + let blob_idx = chunk.inner.blob_index(); + if let Some(blob) = blobs.get(blob_idx as usize) { + chunk_dict.add_chunk(chunk.inner.clone(), blob.digester()); + } + } + } + + let child = Tree::new(child); + children.push(child); + } + children.sort_unstable_by(|a, b| a.name.cmp(&b.name)); + + for child in children.iter_mut() { + let child_node = child.lock_node(); + if child_node.is_dir() { + let child_ino = child_node.inode.ino(); + drop(child_node); + child.children = + self.load_children(child_ino, Some(&parent_path), chunk_dict, validate_digest)?; + } + } + + Ok(children) + } + + /// Convert a `RafsInode` object to an in-memory `Node` object. + pub fn parse_node(rs: &RafsSuper, inode: Arc, path: PathBuf) -> Result { + let chunks = if inode.is_reg() { + let chunk_count = inode.get_chunk_count(); + let mut chunks = Vec::with_capacity(chunk_count as usize); + for i in 0..chunk_count { + let cki = inode.get_chunk_info(i)?; + chunks.push(NodeChunk { + source: ChunkSource::Parent, + inner: Arc::new(ChunkWrapper::from_chunk_info(cki)), + }); + } + chunks + } else { + Vec::new() + }; + + let symlink = if inode.is_symlink() { + Some(inode.get_symlink()?) + } else { + None + }; + + let mut xattrs = RafsXAttrs::new(); + for name in inode.get_xattrs()? { + let name = bytes_to_os_str(&name); + let value = inode.get_xattr(name)?; + xattrs.add(name.to_os_string(), value.unwrap_or_default())?; + } + + // Nodes loaded from bootstrap will only be used as `Overlay::Lower`, so make `dev` invalid + // to avoid breaking hardlink detecting logic. + let src_dev = u64::MAX; + let rdev = inode.rdev() as u64; + let inode = InodeWrapper::from_inode_info(inode.clone()); + let source = PathBuf::from("/"); + let target = Node::generate_target(&path, &source); + let target_vec = Node::generate_target_vec(&target); + let info = NodeInfo { + explicit_uidgid: rs.meta.explicit_uidgid(), + src_ino: inode.ino(), + src_dev, + rdev, + path, + source, + target, + target_vec, + symlink, + xattrs, + v6_force_extended_inode: false, + }; + + Ok(Node { + info: Arc::new(info), + index: 0, + layer_idx: 0, + overlay: Overlay::Lower, + inode, + chunks, + v6_offset: 0, + v6_dirents: Vec::new(), + v6_datalayout: 0, + v6_compact_inode: false, + v6_dirents_offset: 0, + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use nydus_rafs::metadata::RafsVersion; + use nydus_storage::RAFS_DEFAULT_CHUNK_SIZE; + use vmm_sys_util::tempdir::TempDir; + use vmm_sys_util::tempfile::TempFile; + + #[test] + fn test_set_lock_node() { + let tmpdir = TempDir::new().unwrap(); + let tmpfile = TempFile::new_in(tmpdir.as_path()).unwrap(); + let node = Node::from_fs_object( + RafsVersion::V6, + tmpdir.as_path().to_path_buf(), + tmpfile.as_path().to_path_buf(), + Overlay::UpperAddition, + RAFS_DEFAULT_CHUNK_SIZE as u32, + true, + false, + ) + .unwrap(); + let mut tree = Tree::new(node); + assert_eq!(tree.name, tmpfile.as_path().file_name().unwrap().as_bytes()); + let node1 = tree.lock_node(); + drop(node1); + + let tmpfile = TempFile::new_in(tmpdir.as_path()).unwrap(); + let node = Node::from_fs_object( + RafsVersion::V6, + tmpdir.as_path().to_path_buf(), + tmpfile.as_path().to_path_buf(), + Overlay::UpperAddition, + RAFS_DEFAULT_CHUNK_SIZE as u32, + true, + false, + ) + .unwrap(); + tree.set_node(node); + let node2 = tree.lock_node(); + assert_eq!(node2.name(), tmpfile.as_path().file_name().unwrap()); + } + + #[test] + fn test_walk_tree() { + let tmpdir = TempDir::new().unwrap(); + let tmpfile = TempFile::new_in(tmpdir.as_path()).unwrap(); + let node = Node::from_fs_object( + RafsVersion::V6, + tmpdir.as_path().to_path_buf(), + tmpfile.as_path().to_path_buf(), + Overlay::UpperAddition, + RAFS_DEFAULT_CHUNK_SIZE as u32, + true, + false, + ) + .unwrap(); + let mut tree = Tree::new(node); + + let tmpfile2 = TempFile::new_in(tmpdir.as_path()).unwrap(); + let node = Node::from_fs_object( + RafsVersion::V6, + tmpdir.as_path().to_path_buf(), + tmpfile2.as_path().to_path_buf(), + Overlay::UpperAddition, + RAFS_DEFAULT_CHUNK_SIZE as u32, + true, + false, + ) + .unwrap(); + let tree2 = Tree::new(node); + tree.insert_child(tree2); + + let tmpfile3 = TempFile::new_in(tmpdir.as_path()).unwrap(); + let node = Node::from_fs_object( + RafsVersion::V6, + tmpdir.as_path().to_path_buf(), + tmpfile3.as_path().to_path_buf(), + Overlay::UpperAddition, + RAFS_DEFAULT_CHUNK_SIZE as u32, + true, + false, + ) + .unwrap(); + let tree3 = Tree::new(node); + tree.insert_child(tree3); + + let mut count = 0; + tree.walk_bfs(true, &mut |_n| -> Result<()> { + count += 1; + Ok(()) + }) + .unwrap(); + assert_eq!(count, 3); + + let mut count = 0; + tree.walk_bfs(false, &mut |_n| -> Result<()> { + count += 1; + Ok(()) + }) + .unwrap(); + assert_eq!(count, 2); + + let mut count = 0; + tree.walk_bfs(true, &mut |_n| -> Result<()> { + count += 1; + bail!("test") + }) + .unwrap_err(); + assert_eq!(count, 1); + + let idx = tree + .get_child_idx(tmpfile2.as_path().file_name().unwrap().as_bytes()) + .unwrap(); + assert!(idx == 0 || idx == 1); + let idx = tree + .get_child_idx(tmpfile3.as_path().file_name().unwrap().as_bytes()) + .unwrap(); + assert!(idx == 0 || idx == 1); + } +} diff --git a/builder/src/core/v5.rs b/builder/src/core/v5.rs index 78a6b78c212..54d52d844e1 100644 --- a/builder/src/core/v5.rs +++ b/builder/src/core/v5.rs @@ -1,266 +1,266 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::convert::TryFrom; -use std::mem::size_of; - -use anyhow::{bail, Context, Result}; -use nydus_rafs::metadata::inode::InodeWrapper; -use nydus_rafs::metadata::layout::v5::{ - RafsV5BlobTable, RafsV5ChunkInfo, RafsV5InodeTable, RafsV5InodeWrapper, RafsV5SuperBlock, - RafsV5XAttrsTable, -}; -use nydus_rafs::metadata::{RafsStore, RafsVersion}; -use nydus_rafs::RafsIoWrite; -use nydus_utils::digest::{DigestHasher, RafsDigest}; -use nydus_utils::{div_round_up, root_tracer, timing_tracer, try_round_up_4k}; - -use super::node::Node; -use crate::{Bootstrap, BootstrapContext, BuildContext, Tree}; - -// Filesystem may have different algorithms to calculate `i_size` for directory entries, -// which may break "repeatable build". To support repeatable build, instead of reuse the value -// provided by the source filesystem, we use our own algorithm to calculate `i_size` for directory -// entries for stable `i_size`. -// -// Rafs v6 already has its own algorithm to calculate `i_size` for directory entries, but we don't -// have directory entries for Rafs v5. So let's generate a pseudo `i_size` for Rafs v5 directory -// inode. -const RAFS_V5_VIRTUAL_ENTRY_SIZE: u64 = 8; - -impl Node { - /// Dump RAFS v5 inode metadata to meta blob. - pub fn dump_bootstrap_v5( - &self, - ctx: &mut BuildContext, - f_bootstrap: &mut dyn RafsIoWrite, - ) -> Result<()> { - trace!("[{}]\t{}", self.overlay, self); - if let InodeWrapper::V5(raw_inode) = &self.inode { - // Dump inode info - let name = self.name(); - let inode = RafsV5InodeWrapper { - name, - symlink: self.info.symlink.as_deref(), - inode: raw_inode, - }; - inode - .store(f_bootstrap) - .context("failed to dump inode to bootstrap")?; - - // Dump inode xattr - if !self.info.xattrs.is_empty() { - self.info - .xattrs - .store_v5(f_bootstrap) - .context("failed to dump xattr to bootstrap")?; - ctx.has_xattr = true; - } - - // Dump chunk info - if self.is_reg() && self.inode.child_count() as usize != self.chunks.len() { - bail!("invalid chunk count {}: {}", self.chunks.len(), self); - } - for chunk in &self.chunks { - chunk - .inner - .store(f_bootstrap) - .context("failed to dump chunk info to bootstrap")?; - trace!("\t\tchunk: {} compressor {}", chunk, ctx.compressor,); - } - - Ok(()) - } else { - bail!("dump_bootstrap_v5() encounters non-v5-inode"); - } - } - - // Filesystem may have different algorithms to calculate `i_size` for directory entries, - // which may break "repeatable build". To support repeatable build, instead of reuse the value - // provided by the source filesystem, we use our own algorithm to calculate `i_size` for - // directory entries for stable `i_size`. - // - // Rafs v6 already has its own algorithm to calculate `i_size` for directory entries, but we - // don't have directory entries for Rafs v5. So let's generate a pseudo `i_size` for Rafs v5 - // directory inode. - pub fn v5_set_dir_size(&mut self, fs_version: RafsVersion, children: &[Tree]) { - if !self.is_dir() || !fs_version.is_v5() { - return; - } - - let mut d_size = 0u64; - for child in children.iter() { - d_size += child.lock_node().inode.name_size() as u64 + RAFS_V5_VIRTUAL_ENTRY_SIZE; - } - if d_size == 0 { - self.inode.set_size(4096); - } else { - // Safe to unwrap() because we have u32 for child count. - self.inode.set_size(try_round_up_4k(d_size).unwrap()); - } - self.v5_set_inode_blocks(); - } - - /// Calculate and set `i_blocks` for inode. - /// - /// In order to support repeatable build, we can't reuse `i_blocks` from source filesystems, - /// so let's calculate it by ourself for stable `i_block`. - /// - /// Normal filesystem includes the space occupied by Xattr into the directory size, - /// let's follow the normal behavior. - pub fn v5_set_inode_blocks(&mut self) { - // Set inode blocks for RAFS v5 inode, v6 will calculate it at runtime. - if let InodeWrapper::V5(_) = self.inode { - self.inode.set_blocks(div_round_up( - self.inode.size() + self.info.xattrs.aligned_size_v5() as u64, - 512, - )); - } - } -} - -impl Bootstrap { - /// Calculate inode digest for directory. - fn v5_digest_node(&self, ctx: &mut BuildContext, tree: &Tree) { - let mut node = tree.lock_node(); - - // We have set digest for non-directory inode in the previous dump_blob workflow. - if node.is_dir() { - let mut inode_hasher = RafsDigest::hasher(ctx.digester); - for child in tree.children.iter() { - let child = child.lock_node(); - inode_hasher.digest_update(child.inode.digest().as_ref()); - } - node.inode.set_digest(inode_hasher.digest_finalize()); - } - } - - /// Dump bootstrap and blob file, return (Vec, blob_size) - pub(crate) fn v5_dump( - &mut self, - ctx: &mut BuildContext, - bootstrap_ctx: &mut BootstrapContext, - blob_table: &RafsV5BlobTable, - ) -> Result<()> { - // Set inode digest, use reverse iteration order to reduce repeated digest calculations. - self.tree.walk_dfs_post(&mut |t| { - self.v5_digest_node(ctx, t); - Ok(()) - })?; - - // Set inode table - let super_block_size = size_of::(); - let inode_table_entries = bootstrap_ctx.get_next_ino() as u32 - 1; - let mut inode_table = RafsV5InodeTable::new(inode_table_entries as usize); - let inode_table_size = inode_table.size(); - - // Set prefetch table - let (prefetch_table_size, prefetch_table_entries) = - if let Some(prefetch_table) = ctx.prefetch.get_v5_prefetch_table() { - (prefetch_table.size(), prefetch_table.len() as u32) - } else { - (0, 0u32) - }; - - // Set blob table, use sha256 string (length 64) as blob id if not specified - let prefetch_table_offset = super_block_size + inode_table_size; - let blob_table_offset = prefetch_table_offset + prefetch_table_size; - let blob_table_size = blob_table.size(); - let extended_blob_table_offset = blob_table_offset + blob_table_size; - let extended_blob_table_size = blob_table.extended.size(); - let extended_blob_table_entries = blob_table.extended.entries(); - - // Set super block - let mut super_block = RafsV5SuperBlock::new(); - let inodes_count = bootstrap_ctx.inode_map.len() as u64; - super_block.set_inodes_count(inodes_count); - super_block.set_inode_table_offset(super_block_size as u64); - super_block.set_inode_table_entries(inode_table_entries); - super_block.set_blob_table_offset(blob_table_offset as u64); - super_block.set_blob_table_size(blob_table_size as u32); - super_block.set_extended_blob_table_offset(extended_blob_table_offset as u64); - super_block.set_extended_blob_table_entries(u32::try_from(extended_blob_table_entries)?); - super_block.set_prefetch_table_offset(prefetch_table_offset as u64); - super_block.set_prefetch_table_entries(prefetch_table_entries); - super_block.set_compressor(ctx.compressor); - super_block.set_digester(ctx.digester); - super_block.set_chunk_size(ctx.chunk_size); - if ctx.explicit_uidgid { - super_block.set_explicit_uidgid(); - } - - // Set inodes and chunks - let mut inode_offset = (super_block_size - + inode_table_size - + prefetch_table_size - + blob_table_size - + extended_blob_table_size) as u32; - - let mut has_xattr = false; - self.tree.walk_dfs_pre(&mut |t| { - let node = t.lock_node(); - inode_table.set(node.index, inode_offset)?; - // Add inode size - inode_offset += node.inode.inode_size() as u32; - if node.inode.has_xattr() { - has_xattr = true; - if !node.info.xattrs.is_empty() { - inode_offset += (size_of::() - + node.info.xattrs.aligned_size_v5()) - as u32; - } - } - // Add chunks size - if node.is_reg() { - inode_offset += node.inode.child_count() * size_of::() as u32; - } - Ok(()) - })?; - if has_xattr { - super_block.set_has_xattr(); - } - - // Dump super block - super_block - .store(bootstrap_ctx.writer.as_mut()) - .context("failed to store superblock")?; - - // Dump inode table - inode_table - .store(bootstrap_ctx.writer.as_mut()) - .context("failed to store inode table")?; - - // Dump prefetch table - if let Some(mut prefetch_table) = ctx.prefetch.get_v5_prefetch_table() { - prefetch_table - .store(bootstrap_ctx.writer.as_mut()) - .context("failed to store prefetch table")?; - } - - // Dump blob table - blob_table - .store(bootstrap_ctx.writer.as_mut()) - .context("failed to store blob table")?; - - // Dump extended blob table - blob_table - .store_extended(bootstrap_ctx.writer.as_mut()) - .context("failed to store extended blob table")?; - - // Dump inodes and chunks - timing_tracer!( - { - self.tree.walk_dfs_pre(&mut |t| { - t.lock_node() - .dump_bootstrap_v5(ctx, bootstrap_ctx.writer.as_mut()) - .context("failed to dump bootstrap") - }) - }, - "dump_bootstrap" - )?; - - Ok(()) - } -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::convert::TryFrom; +use std::mem::size_of; + +use anyhow::{bail, Context, Result}; +use nydus_rafs::metadata::inode::InodeWrapper; +use nydus_rafs::metadata::layout::v5::{ + RafsV5BlobTable, RafsV5ChunkInfo, RafsV5InodeTable, RafsV5InodeWrapper, RafsV5SuperBlock, + RafsV5XAttrsTable, +}; +use nydus_rafs::metadata::{RafsStore, RafsVersion}; +use nydus_rafs::RafsIoWrite; +use nydus_utils::digest::{DigestHasher, RafsDigest}; +use nydus_utils::{div_round_up, root_tracer, timing_tracer, try_round_up_4k}; + +use super::node::Node; +use crate::{Bootstrap, BootstrapContext, BuildContext, Tree}; + +// Filesystem may have different algorithms to calculate `i_size` for directory entries, +// which may break "repeatable build". To support repeatable build, instead of reuse the value +// provided by the source filesystem, we use our own algorithm to calculate `i_size` for directory +// entries for stable `i_size`. +// +// Rafs v6 already has its own algorithm to calculate `i_size` for directory entries, but we don't +// have directory entries for Rafs v5. So let's generate a pseudo `i_size` for Rafs v5 directory +// inode. +const RAFS_V5_VIRTUAL_ENTRY_SIZE: u64 = 8; + +impl Node { + /// Dump RAFS v5 inode metadata to meta blob. + pub fn dump_bootstrap_v5( + &self, + ctx: &mut BuildContext, + f_bootstrap: &mut dyn RafsIoWrite, + ) -> Result<()> { + trace!("[{}]\t{}", self.overlay, self); + if let InodeWrapper::V5(raw_inode) = &self.inode { + // Dump inode info + let name = self.name(); + let inode = RafsV5InodeWrapper { + name, + symlink: self.info.symlink.as_deref(), + inode: raw_inode, + }; + inode + .store(f_bootstrap) + .context("failed to dump inode to bootstrap")?; + + // Dump inode xattr + if !self.info.xattrs.is_empty() { + self.info + .xattrs + .store_v5(f_bootstrap) + .context("failed to dump xattr to bootstrap")?; + ctx.has_xattr = true; + } + + // Dump chunk info + if self.is_reg() && self.inode.child_count() as usize != self.chunks.len() { + bail!("invalid chunk count {}: {}", self.chunks.len(), self); + } + for chunk in &self.chunks { + chunk + .inner + .store(f_bootstrap) + .context("failed to dump chunk info to bootstrap")?; + trace!("\t\tchunk: {} compressor {}", chunk, ctx.compressor,); + } + + Ok(()) + } else { + bail!("dump_bootstrap_v5() encounters non-v5-inode"); + } + } + + // Filesystem may have different algorithms to calculate `i_size` for directory entries, + // which may break "repeatable build". To support repeatable build, instead of reuse the value + // provided by the source filesystem, we use our own algorithm to calculate `i_size` for + // directory entries for stable `i_size`. + // + // Rafs v6 already has its own algorithm to calculate `i_size` for directory entries, but we + // don't have directory entries for Rafs v5. So let's generate a pseudo `i_size` for Rafs v5 + // directory inode. + pub fn v5_set_dir_size(&mut self, fs_version: RafsVersion, children: &[Tree]) { + if !self.is_dir() || !fs_version.is_v5() { + return; + } + + let mut d_size = 0u64; + for child in children.iter() { + d_size += child.lock_node().inode.name_size() as u64 + RAFS_V5_VIRTUAL_ENTRY_SIZE; + } + if d_size == 0 { + self.inode.set_size(4096); + } else { + // Safe to unwrap() because we have u32 for child count. + self.inode.set_size(try_round_up_4k(d_size).unwrap()); + } + self.v5_set_inode_blocks(); + } + + /// Calculate and set `i_blocks` for inode. + /// + /// In order to support repeatable build, we can't reuse `i_blocks` from source filesystems, + /// so let's calculate it by ourself for stable `i_block`. + /// + /// Normal filesystem includes the space occupied by Xattr into the directory size, + /// let's follow the normal behavior. + pub fn v5_set_inode_blocks(&mut self) { + // Set inode blocks for RAFS v5 inode, v6 will calculate it at runtime. + if let InodeWrapper::V5(_) = self.inode { + self.inode.set_blocks(div_round_up( + self.inode.size() + self.info.xattrs.aligned_size_v5() as u64, + 512, + )); + } + } +} + +impl Bootstrap { + /// Calculate inode digest for directory. + fn v5_digest_node(&self, ctx: &mut BuildContext, tree: &Tree) { + let mut node = tree.lock_node(); + + // We have set digest for non-directory inode in the previous dump_blob workflow. + if node.is_dir() { + let mut inode_hasher = RafsDigest::hasher(ctx.digester); + for child in tree.children.iter() { + let child = child.lock_node(); + inode_hasher.digest_update(child.inode.digest().as_ref()); + } + node.inode.set_digest(inode_hasher.digest_finalize()); + } + } + + /// Dump bootstrap and blob file, return (Vec, blob_size) + pub(crate) fn v5_dump( + &mut self, + ctx: &mut BuildContext, + bootstrap_ctx: &mut BootstrapContext, + blob_table: &RafsV5BlobTable, + ) -> Result<()> { + // Set inode digest, use reverse iteration order to reduce repeated digest calculations. + self.tree.walk_dfs_post(&mut |t| { + self.v5_digest_node(ctx, t); + Ok(()) + })?; + + // Set inode table + let super_block_size = size_of::(); + let inode_table_entries = bootstrap_ctx.get_next_ino() as u32 - 1; + let mut inode_table = RafsV5InodeTable::new(inode_table_entries as usize); + let inode_table_size = inode_table.size(); + + // Set prefetch table + let (prefetch_table_size, prefetch_table_entries) = + if let Some(prefetch_table) = ctx.prefetch.get_v5_prefetch_table() { + (prefetch_table.size(), prefetch_table.len() as u32) + } else { + (0, 0u32) + }; + + // Set blob table, use sha256 string (length 64) as blob id if not specified + let prefetch_table_offset = super_block_size + inode_table_size; + let blob_table_offset = prefetch_table_offset + prefetch_table_size; + let blob_table_size = blob_table.size(); + let extended_blob_table_offset = blob_table_offset + blob_table_size; + let extended_blob_table_size = blob_table.extended.size(); + let extended_blob_table_entries = blob_table.extended.entries(); + + // Set super block + let mut super_block = RafsV5SuperBlock::new(); + let inodes_count = bootstrap_ctx.inode_map.len() as u64; + super_block.set_inodes_count(inodes_count); + super_block.set_inode_table_offset(super_block_size as u64); + super_block.set_inode_table_entries(inode_table_entries); + super_block.set_blob_table_offset(blob_table_offset as u64); + super_block.set_blob_table_size(blob_table_size as u32); + super_block.set_extended_blob_table_offset(extended_blob_table_offset as u64); + super_block.set_extended_blob_table_entries(u32::try_from(extended_blob_table_entries)?); + super_block.set_prefetch_table_offset(prefetch_table_offset as u64); + super_block.set_prefetch_table_entries(prefetch_table_entries); + super_block.set_compressor(ctx.compressor); + super_block.set_digester(ctx.digester); + super_block.set_chunk_size(ctx.chunk_size); + if ctx.explicit_uidgid { + super_block.set_explicit_uidgid(); + } + + // Set inodes and chunks + let mut inode_offset = (super_block_size + + inode_table_size + + prefetch_table_size + + blob_table_size + + extended_blob_table_size) as u32; + + let mut has_xattr = false; + self.tree.walk_dfs_pre(&mut |t| { + let node = t.lock_node(); + inode_table.set(node.index, inode_offset)?; + // Add inode size + inode_offset += node.inode.inode_size() as u32; + if node.inode.has_xattr() { + has_xattr = true; + if !node.info.xattrs.is_empty() { + inode_offset += (size_of::() + + node.info.xattrs.aligned_size_v5()) + as u32; + } + } + // Add chunks size + if node.is_reg() { + inode_offset += node.inode.child_count() * size_of::() as u32; + } + Ok(()) + })?; + if has_xattr { + super_block.set_has_xattr(); + } + + // Dump super block + super_block + .store(bootstrap_ctx.writer.as_mut()) + .context("failed to store superblock")?; + + // Dump inode table + inode_table + .store(bootstrap_ctx.writer.as_mut()) + .context("failed to store inode table")?; + + // Dump prefetch table + if let Some(mut prefetch_table) = ctx.prefetch.get_v5_prefetch_table() { + prefetch_table + .store(bootstrap_ctx.writer.as_mut()) + .context("failed to store prefetch table")?; + } + + // Dump blob table + blob_table + .store(bootstrap_ctx.writer.as_mut()) + .context("failed to store blob table")?; + + // Dump extended blob table + blob_table + .store_extended(bootstrap_ctx.writer.as_mut()) + .context("failed to store extended blob table")?; + + // Dump inodes and chunks + timing_tracer!( + { + self.tree.walk_dfs_pre(&mut |t| { + t.lock_node() + .dump_bootstrap_v5(ctx, bootstrap_ctx.writer.as_mut()) + .context("failed to dump bootstrap") + }) + }, + "dump_bootstrap" + )?; + + Ok(()) + } +} diff --git a/builder/src/core/v6.rs b/builder/src/core/v6.rs index 9dd1091d1e2..bc0be4ac550 100644 --- a/builder/src/core/v6.rs +++ b/builder/src/core/v6.rs @@ -1,1059 +1,1059 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::collections::BTreeMap; -use std::ffi::{OsStr, OsString}; -use std::io::SeekFrom; -use std::mem::size_of; -use std::os::unix::ffi::OsStrExt; -use std::sync::Arc; - -use anyhow::{bail, ensure, Context, Result}; -use nydus_rafs::metadata::chunk::ChunkWrapper; -use nydus_rafs::metadata::layout::v6::{ - align_offset, calculate_nid, new_v6_inode, RafsV6BlobTable, RafsV6Device, RafsV6Dirent, - RafsV6InodeChunkAddr, RafsV6InodeChunkHeader, RafsV6OndiskInode, RafsV6SuperBlock, - RafsV6SuperBlockExt, EROFS_BLOCK_BITS_9, EROFS_BLOCK_SIZE_4096, EROFS_BLOCK_SIZE_512, - EROFS_DEVTABLE_OFFSET, EROFS_INODE_CHUNK_BASED, EROFS_INODE_FLAT_INLINE, - EROFS_INODE_FLAT_PLAIN, EROFS_INODE_SLOT_SIZE, EROFS_SUPER_BLOCK_SIZE, EROFS_SUPER_OFFSET, -}; -use nydus_rafs::metadata::RafsStore; -use nydus_rafs::RafsIoWrite; -use nydus_storage::device::BlobFeatures; -use nydus_utils::{root_tracer, round_down, round_up, timing_tracer}; - -use super::chunk_dict::DigestWithBlobIndex; -use super::node::Node; -use crate::{Bootstrap, BootstrapContext, BuildContext, ConversionType, Tree}; - -const WRITE_PADDING_DATA: [u8; 4096] = [0u8; 4096]; -const V6_BLOCK_SEG_ALIGNMENT: u64 = 0x8_0000; - -// Rafs v6 dedicated methods -impl Node { - /// Dump RAFS v6 inode metadata to meta blob. - pub fn dump_bootstrap_v6( - &mut self, - ctx: &mut BuildContext, - f_bootstrap: &mut dyn RafsIoWrite, - orig_meta_addr: u64, - meta_addr: u64, - chunk_cache: &mut BTreeMap>, - ) -> Result<()> { - let xattr_inline_count = self.info.xattrs.count_v6(); - ensure!( - xattr_inline_count <= u16::MAX as usize, - "size of extended attributes is too big" - ); - let mut inode = new_v6_inode( - &self.inode, - self.v6_datalayout, - xattr_inline_count as u16, - self.v6_compact_inode, - ); - - let meta_offset = meta_addr - orig_meta_addr; - // update all the inodes's offset according to the new 'meta_addr'. - self.v6_offset += meta_offset; - // The EROFS_INODE_FLAT_INLINE layout is valid for directory and symlink only, - // so `dirents_offset` is useful for these two types too, otherwise `dirents_offset` - // should always be zero. - // Enforce the check to avoid overflow of `dirents_offset`. - if self.is_dir() || self.is_symlink() { - self.v6_dirents_offset += meta_offset; - } - let nid = calculate_nid(self.v6_offset, meta_addr); - self.inode.set_ino(nid); - - if self.is_dir() { - self.v6_dump_dir(ctx, f_bootstrap, meta_addr, meta_offset, &mut inode)?; - } else if self.is_reg() { - self.v6_dump_file(ctx, f_bootstrap, chunk_cache, &mut inode)?; - } else if self.is_symlink() { - self.v6_dump_symlink(ctx, f_bootstrap, &mut inode)?; - } else { - f_bootstrap - .seek(SeekFrom::Start(self.v6_offset)) - .context("failed seek for dir inode")?; - inode.store(f_bootstrap).context("failed to store inode")?; - self.v6_store_xattrs(ctx, f_bootstrap)?; - } - - Ok(()) - } - - /// Update whether compact mode can be used for this inode or not. - pub fn v6_set_inode_compact(&mut self) { - self.v6_compact_inode = !(self.info.v6_force_extended_inode - || self.inode.uid() > u16::MAX as u32 - || self.inode.gid() > u16::MAX as u32 - || self.inode.nlink() > u16::MAX as u32 - || self.inode.size() > u32::MAX as u64 - || self.path().extension() == Some(OsStr::new("pyc"))); - } - - /// Layout the normal inode (except directory inode) into the meta blob. - pub fn v6_set_offset( - &mut self, - bootstrap_ctx: &mut BootstrapContext, - v6_hardlink_offset: Option, - block_size: u64, - ) -> Result<()> { - ensure!(!self.is_dir(), "{} is a directory", self.path().display()); - if self.is_reg() { - if let Some(v6_hardlink_offset) = v6_hardlink_offset { - self.v6_offset = v6_hardlink_offset; - } else { - let size = self.v6_size_with_xattr(); - let unit = size_of::() as u64; - let total_size = round_up(size, unit) + self.inode.child_count() as u64 * unit; - // First try to allocate from fragments of dirent pages. - self.v6_offset = bootstrap_ctx.allocate_available_block(total_size, block_size); - if self.v6_offset == 0 { - self.v6_offset = bootstrap_ctx.offset; - bootstrap_ctx.offset += total_size; - } - } - self.v6_datalayout = EROFS_INODE_CHUNK_BASED; - } else if self.is_symlink() { - self.v6_set_offset_with_tail(bootstrap_ctx, self.inode.size(), block_size); - } else { - self.v6_offset = bootstrap_ctx.offset; - bootstrap_ctx.offset += self.v6_size_with_xattr(); - } - bootstrap_ctx.align_offset(EROFS_INODE_SLOT_SIZE as u64); - - Ok(()) - } - - /// Layout the directory inode and its dirents into meta blob. - pub fn v6_set_dir_offset( - &mut self, - bootstrap_ctx: &mut BootstrapContext, - d_size: u64, - block_size: u64, - ) -> Result<()> { - ensure!( - self.is_dir(), - "{} is not a directory", - self.path().display() - ); - - // Dir isize is the total bytes of 'dirents + names'. - self.inode.set_size(d_size); - self.v6_set_offset_with_tail(bootstrap_ctx, d_size, block_size); - bootstrap_ctx.align_offset(EROFS_INODE_SLOT_SIZE as u64); - - Ok(()) - } - - /// Calculate space needed to store dirents of the directory inode. - pub fn v6_dirent_size(&self, ctx: &mut BuildContext, tree: &Tree) -> Result { - ensure!(self.is_dir(), "{} is not a directory", self); - let block_size = ctx.v6_block_size(); - let mut d_size = 0; - - // Sort all children if "." and ".." are not at the head after sorting. - if !tree.children.is_empty() && tree.children[0].name() < "..".as_bytes() { - let mut children = Vec::with_capacity(tree.children.len() + 2); - children.push(".".as_bytes()); - children.push("..".as_bytes()); - for child in tree.children.iter() { - children.push(child.name()); - } - children.sort_unstable(); - - for c in children { - // Use length in byte, instead of length in character. - let len = c.len() + size_of::(); - // erofs disk format requires dirent to be aligned to block size. - if (d_size % block_size) + len as u64 > block_size { - d_size = round_up(d_size as u64, block_size); - } - d_size += len as u64; - } - } else { - // Avoid sorting again if "." and ".." are at the head after sorting due to that - // `tree.children` has already been sorted. - d_size = (".".as_bytes().len() - + size_of::() - + "..".as_bytes().len() - + size_of::()) as u64; - for child in tree.children.iter() { - let len = child.name().len() + size_of::(); - // erofs disk format requires dirent to be aligned to block size. - if (d_size % block_size) + len as u64 > block_size { - d_size = round_up(d_size as u64, block_size); - } - d_size += len as u64; - } - } - - Ok(d_size) - } - - fn v6_size_with_xattr(&self) -> u64 { - self.inode - .get_inode_size_with_xattr(&self.info.xattrs, self.v6_compact_inode) as u64 - } - - // Layout symlink or directory inodes into the meta blob. - // - // For DIR inode, size is the total bytes of 'dirents + names'. - // For symlink, size is the length of symlink name. - fn v6_set_offset_with_tail( - &mut self, - bootstrap_ctx: &mut BootstrapContext, - d_size: u64, - block_size: u64, - ) { - // | avail | - // +--------+-----------+----+ +-----------------------+ - // | |inode+tail | free | dirents+names | - // | | | | | | - // +--------+-----------+----+ +-----------------------+ - // - // | avail | - // +--------+-----------+----+ +-----------------------+ +---------+-------------+ - // | |inode | free | dirents+names | | tail | free | - // | | | | | | | | | - // +--------+-----------+----+ +-----------------------+ +---------+-------------+ - // - // - // | avail | - // +--------+-----------+----+ +-----------------------+ +---------+-------------+ - // | | inode + | dirents+names | | tail | free | - // | | | | | | | | - // +--------+-----------+----+ +-----------------------+ +---------+-------------+ - // - // - // | avail | - // +--------+----------------+ +--------------+--------+ +-----------------------+ - // | | inode | | inode+tail | free | | dirents+names | - // | | | | | | | | - // +--------+----------------+ +--------------+--------+ +-----------------------+ - // | inode | - // - // | avail | - // +--------+----------------+ +--------------+--------+ +-----------------------+ +-------+---------------+ - // | | inode | | inode | free | | dirents+names | | tail | free | - // | | | | | | | | | | | - // +--------+----------------+ +--------------+--------+ +-----------------------+ +-------+---------------+ - // | inode | - // - // - let inode_size = self.v6_size_with_xattr(); - let tail: u64 = d_size % block_size; - - // We use a simple inline strategy here: - // If the inode size with xattr + tail data size <= EROFS_BLOCK_SIZE, - // we choose to inline it. - // Firstly, if it's bigger than EROFS_BLOCK_SIZE, - // in most cases, we can assume that the tail data size is close to EROFS_BLOCK_SIZE, - // in this condition, even if we don't inline the tail data, there won't be much waste. - // Secondly, the `available_blocks` that we maintain in the `BootstrapCtx`, - // since it contain only single blocks with some unused space, the available space can only - // be smaller than EROFS_BLOCK_SIZE, therefore we can't use our used blocks to store the - // inode plus the tail data bigger than EROFS_BLOCK_SIZE. - let should_inline = tail != 0 && (inode_size + tail) <= block_size; - - // If should inline, we first try to allocate space for the inode together with tail data - // using used blocks. - // If no available used block exists, we try to allocate space from current block. - // If current block doesn't have enough space, we append it to `available_blocks`, - // and we allocate space from the next block. - // For the remaining data, we allocate space for it sequentially. - self.v6_datalayout = if should_inline { - self.v6_offset = bootstrap_ctx.allocate_available_block(inode_size + tail, block_size); - if self.v6_offset == 0 { - let available = block_size - bootstrap_ctx.offset % block_size; - if available < inode_size + tail { - bootstrap_ctx.append_available_block(bootstrap_ctx.offset, block_size); - bootstrap_ctx.align_offset(block_size); - } - - self.v6_offset = bootstrap_ctx.offset; - bootstrap_ctx.offset += inode_size + tail; - } - - if d_size != tail { - bootstrap_ctx.append_available_block(bootstrap_ctx.offset, block_size); - bootstrap_ctx.align_offset(block_size); - } - self.v6_dirents_offset = bootstrap_ctx.offset; - bootstrap_ctx.offset += round_down(d_size, block_size); - - EROFS_INODE_FLAT_INLINE - } else { - // Otherwise, we first try to allocate space for the inode from used blocks. - // If no available used block exists, we allocate space sequentially. - // Then we allocate space for all data. - self.v6_offset = bootstrap_ctx.allocate_available_block(inode_size, block_size); - if self.v6_offset == 0 { - self.v6_offset = bootstrap_ctx.offset; - bootstrap_ctx.offset += inode_size; - } - - bootstrap_ctx.append_available_block(bootstrap_ctx.offset, block_size); - bootstrap_ctx.align_offset(block_size); - self.v6_dirents_offset = bootstrap_ctx.offset; - bootstrap_ctx.offset += d_size; - bootstrap_ctx.align_offset(block_size); - - EROFS_INODE_FLAT_PLAIN - }; - - trace!( - "{:?} inode offset {} ctx offset {} d_size {} dirents_offset {} datalayout {}", - self.name(), - self.v6_offset, - bootstrap_ctx.offset, - d_size, - self.v6_dirents_offset, - self.v6_datalayout - ); - } - - fn v6_store_xattrs( - &mut self, - ctx: &mut BuildContext, - f_bootstrap: &mut dyn RafsIoWrite, - ) -> Result<()> { - if !self.info.xattrs.is_empty() { - self.info - .xattrs - .store_v6(f_bootstrap) - .context("failed to dump xattr to bootstrap")?; - ctx.has_xattr = true; - } - Ok(()) - } - - fn v6_dump_dir( - &mut self, - ctx: &mut BuildContext, - f_bootstrap: &mut dyn RafsIoWrite, - meta_addr: u64, - meta_offset: u64, - inode: &mut Box, - ) -> Result<()> { - // the 1st 4k block after dir inode. - let mut dirent_off = self.v6_dirents_offset; - let blk_addr = ctx - .v6_block_addr(dirent_off) - .with_context(|| format!("failed to compute blk_addr for offset 0x{:x}", dirent_off))?; - inode.set_u(blk_addr); - self.v6_dump_inode(ctx, f_bootstrap, inode) - .context("failed to dump inode for directory")?; - - // Dump dirents - let mut dir_data: Vec = Vec::new(); - let mut entry_names = Vec::new(); - let mut dirents: Vec<(RafsV6Dirent, &OsString)> = Vec::new(); - let mut nameoff: u64 = 0; - let mut used: u64 = 0; - let block_size = ctx.v6_block_size(); - - trace!( - "{:?} self.dirents.len {}", - self.target(), - self.v6_dirents.len() - ); - // fill dir blocks one by one - for (offset, name, file_type) in self.v6_dirents.iter() { - let len = name.as_bytes().len() + size_of::(); - // write to bootstrap when it will exceed EROFS_BLOCK_SIZE - if used + len as u64 > block_size { - for (entry, name) in dirents.iter_mut() { - trace!("{:?} nameoff {}", name, nameoff); - entry.set_name_offset(nameoff as u16); - dir_data.extend(entry.as_ref()); - entry_names.push(*name); - // Use length in byte, instead of length in character. - // Because some characters could occupy more than one byte. - nameoff += name.as_bytes().len() as u64; - } - for name in entry_names.iter() { - dir_data.extend(name.as_bytes()); - } - - f_bootstrap - .seek(SeekFrom::Start(dirent_off as u64)) - .context("failed seek file position for writing dirent")?; - f_bootstrap - .write(dir_data.as_slice()) - .context("failed to write dirent data to meta blob")?; - - // track where we're going to write. - dirent_off += round_up(used, block_size); - used = 0; - nameoff = 0; - dir_data.clear(); - entry_names.clear(); - dirents.clear(); - } - - trace!( - "name {:?} file type {} {:?}", - *name, - *file_type, - RafsV6Dirent::file_type(*file_type) - ); - let entry = RafsV6Dirent::new( - calculate_nid(*offset + meta_offset, meta_addr), - 0, - RafsV6Dirent::file_type(*file_type), - ); - dirents.push((entry, name)); - - nameoff += size_of::() as u64; - used += len as u64; - } - - trace!( - "{:?} used {} dir size {}", - self.target(), - used, - self.inode.size() - ); - // dump tail part if any - if used > 0 { - for (entry, name) in dirents.iter_mut() { - trace!("{:?} tail nameoff {}", name, nameoff); - entry.set_name_offset(nameoff as u16); - dir_data.extend(entry.as_ref()); - entry_names.push(*name); - nameoff += name.as_bytes().len() as u64; - } - for name in entry_names.iter() { - dir_data.extend(name.as_bytes()); - } - - let tail_off = match self.v6_datalayout { - EROFS_INODE_FLAT_INLINE => self.v6_offset + self.v6_size_with_xattr(), - EROFS_INODE_FLAT_PLAIN => dirent_off, - _ => bail!("unsupported RAFS v6 inode layout for directory"), - }; - f_bootstrap - .seek(SeekFrom::Start(tail_off as u64)) - .context("failed seek for dir inode")?; - f_bootstrap - .write(dir_data.as_slice()) - .context("failed to store dirents")?; - } - - Ok(()) - } - - fn v6_dump_file( - &mut self, - ctx: &mut BuildContext, - f_bootstrap: &mut dyn RafsIoWrite, - chunk_cache: &mut BTreeMap>, - inode: &mut Box, - ) -> Result<()> { - let mut is_continuous = true; - let mut prev = None; - - // write chunk indexes, chunk contents has been written to blob file. - let mut chunks: Vec = Vec::new(); - for chunk in self.chunks.iter() { - let offset = chunk.inner.uncompressed_offset(); - let blk_addr = ctx.v6_block_addr(offset).with_context(|| { - format!( - "failed to compute blk_addr for chunk with uncompressed offset 0x{:x}", - offset - ) - })?; - let blob_idx = chunk.inner.blob_index(); - let mut v6_chunk = RafsV6InodeChunkAddr::new(); - v6_chunk.set_blob_index(blob_idx); - v6_chunk.set_blob_ci_index(chunk.inner.index()); - v6_chunk.set_block_addr(blk_addr); - - chunks.extend(v6_chunk.as_ref()); - chunk_cache.insert( - DigestWithBlobIndex(*chunk.inner.id(), chunk.inner.blob_index() + 1), - chunk.inner.clone(), - ); - if let Some((prev_idx, prev_pos)) = prev { - if prev_pos + ctx.chunk_size as u64 != offset || prev_idx != blob_idx { - is_continuous = false; - } - } - prev = Some((blob_idx, offset)); - } - - // Special optimization to enable page cache sharing for EROFS. - let chunk_size = if is_continuous && inode.size() > ctx.chunk_size as u64 { - inode.size().next_power_of_two() - } else { - ctx.chunk_size as u64 - }; - let info = RafsV6InodeChunkHeader::new(chunk_size, ctx.v6_block_size()); - inode.set_u(info.to_u32()); - self.v6_dump_inode(ctx, f_bootstrap, inode) - .context("failed to dump inode for file")?; - - let unit = size_of::() as u64; - let offset = align_offset(self.v6_offset + self.v6_size_with_xattr(), unit); - f_bootstrap - .seek(SeekFrom::Start(offset)) - .with_context(|| format!("failed to seek to 0x{:x} for writing chunk data", offset))?; - f_bootstrap - .write(chunks.as_slice()) - .context("failed to write chunk data for file")?; - - Ok(()) - } - - fn v6_dump_symlink( - &mut self, - ctx: &mut BuildContext, - f_bootstrap: &mut dyn RafsIoWrite, - inode: &mut Box, - ) -> Result<()> { - let blk_addr = ctx.v6_block_addr(self.v6_dirents_offset)?; - inode.set_u(blk_addr); - self.v6_dump_inode(ctx, f_bootstrap, inode) - .context("failed to dump inode for symlink")?; - - if let Some(symlink) = &self.info.symlink { - let tail_off = match self.v6_datalayout { - EROFS_INODE_FLAT_INLINE => self.v6_offset + self.v6_size_with_xattr(), - EROFS_INODE_FLAT_PLAIN => self.v6_dirents_offset, - _ => bail!("unsupported RAFS v5 inode layout for symlink"), - }; - f_bootstrap - .seek(SeekFrom::Start(tail_off)) - .context("failed seek for dir inode")?; - f_bootstrap - .write(symlink.as_bytes()) - .context("filed to store symlink")?; - } - - Ok(()) - } - - fn v6_dump_inode( - &mut self, - ctx: &mut BuildContext, - f_bootstrap: &mut dyn RafsIoWrite, - inode: &mut Box, - ) -> Result<()> { - f_bootstrap - .seek(SeekFrom::Start(self.v6_offset)) - .context("failed to seek file position for writing inode")?; - inode - .store(f_bootstrap) - .context("failed to write inode to meta blob")?; - self.v6_store_xattrs(ctx, f_bootstrap) - .context("failed to write extended attributes for inode") - } -} - -impl BuildContext { - pub fn v6_block_size(&self) -> u64 { - if self.conversion_type == ConversionType::TarToTarfs { - // Tar stream is 512-byte aligned. - EROFS_BLOCK_SIZE_512 - } else { - EROFS_BLOCK_SIZE_4096 - } - } - - pub fn v6_block_addr(&self, offset: u64) -> Result { - let blk_addr = offset / self.v6_block_size(); - if blk_addr > u32::MAX as u64 { - bail!("v6 block address 0x{:x} is too big", blk_addr) - } else { - Ok(blk_addr as u32) - } - } -} - -impl Bootstrap { - pub(crate) fn v6_update_dirents(parent: &Tree, parent_offset: u64) { - let mut node = parent.lock_node(); - let node_offset = node.v6_offset; - if !node.is_dir() { - return; - } - - // dot & dotdot - // Type of libc::S_IFDIR is u16 on macos, so it need a conversion - // but compiler will report useless conversion on linux platform, - // so we add an allow annotation here. - #[allow(clippy::useless_conversion)] - { - node.v6_dirents - .push((node_offset, OsString::from("."), libc::S_IFDIR.into())); - node.v6_dirents - .push((parent_offset, OsString::from(".."), libc::S_IFDIR.into())); - } - - let mut dirs: Vec<&Tree> = Vec::new(); - for child in parent.children.iter() { - let child_node = child.lock_node(); - let entry = ( - child_node.v6_offset, - OsStr::from_bytes(child.name()).to_owned(), - child_node.inode.mode(), - ); - node.v6_dirents.push(entry); - if child_node.is_dir() { - dirs.push(child); - } - } - node.v6_dirents - .sort_unstable_by(|a, b| a.1.as_os_str().cmp(b.1.as_os_str())); - - for dir in dirs { - Self::v6_update_dirents(dir, node_offset); - } - } - - /// Dump bootstrap and blob file, return (Vec, blob_size) - pub(crate) fn v6_dump( - &mut self, - ctx: &mut BuildContext, - bootstrap_ctx: &mut BootstrapContext, - blob_table: &RafsV6BlobTable, - ) -> Result<()> { - // Rafs v6 disk layout - // - // EROFS_SUPER_OFFSET - // | - // +---+---------+------------+-------------+----------------------------------------------+ - // | | | | | | | | - // |1k |super |extended | blob table | prefetch table | inodes | chunk info table | - // | |block |superblock+ | | | | | - // | | |devslot | | | | | - // +---+---------+------------+-------------+----------------------------------------------+ - - let block_size = ctx.v6_block_size(); - let blobs = blob_table.get_all(); - let devtable_len = blobs.len() * size_of::(); - let blob_table_size = blob_table.size() as u64; - let blob_table_offset = align_offset( - (EROFS_DEVTABLE_OFFSET as u64) + devtable_len as u64, - EROFS_BLOCK_SIZE_4096, - ); - let blob_table_entries = blobs.len(); - assert!(blob_table_entries < u8::MAX as usize); - trace!( - "devtable len {} blob table offset {} blob table size {}", - devtable_len, - blob_table_offset, - blob_table_size - ); - - let fs_prefetch_rule_count = ctx.prefetch.fs_prefetch_rule_count(); - let (prefetch_table_offset, prefetch_table_size) = - // If blob_table_size equal to 0, there is no prefetch. - if fs_prefetch_rule_count > 0 && blob_table_size > 0 { - // Prefetch table is very close to blob devices table - let offset = blob_table_offset + blob_table_size; - // Each prefetched file has is nid of `u32` filled into prefetch table. - let size = fs_prefetch_rule_count * size_of::() as u32; - trace!("prefetch table locates at offset {} size {}", offset, size); - (offset, size) - } else { - (0, 0) - }; - - // Make the superblock's meta_blkaddr one block ahead of the inode table, - // to avoid using 0 as root nid. - // inode offset = meta_blkaddr * block_size + 32 * nid - // When using nid 0 as root nid, - // the root directory will not be shown by glibc's getdents/readdir. - // Because in some OS, ino == 0 represents corresponding file is deleted. - let root_node_offset = self.tree.lock_node().v6_offset; - let orig_meta_addr = root_node_offset - EROFS_BLOCK_SIZE_4096; - let meta_addr = if blob_table_size > 0 { - align_offset( - blob_table_offset + blob_table_size + prefetch_table_size as u64, - EROFS_BLOCK_SIZE_4096, - ) - } else { - orig_meta_addr - }; - let meta_offset = meta_addr - orig_meta_addr; - let root_nid = calculate_nid(root_node_offset + meta_offset, meta_addr); - - // Prepare extended super block - let mut ext_sb = RafsV6SuperBlockExt::new(); - ext_sb.set_compressor(ctx.compressor); - ext_sb.set_digester(ctx.digester); - ext_sb.set_cipher(ctx.cipher); - ext_sb.set_chunk_size(ctx.chunk_size); - ext_sb.set_blob_table_offset(blob_table_offset); - ext_sb.set_blob_table_size(blob_table_size as u32); - - // collect all chunks in this bootstrap. - // HashChunkDict cannot be used here, because there will be duplicate chunks between layers, - // but there is no deduplication during the actual construction. - // Each layer uses the corresponding chunk in the blob of its own layer. - // If HashChunkDict is used here, it will cause duplication. The chunks are removed, - // resulting in incomplete chunk info. - let mut chunk_cache = BTreeMap::new(); - - // Dump bootstrap - timing_tracer!( - { - self.tree.walk_bfs(true, &mut |n| { - n.lock_node().dump_bootstrap_v6( - ctx, - bootstrap_ctx.writer.as_mut(), - orig_meta_addr, - meta_addr, - &mut chunk_cache, - ) - }) - }, - "dump_bootstrap" - )?; - Self::v6_align_to_4k(bootstrap_ctx)?; - - // `Node` offset might be updated during above inodes dumping. So `get_prefetch_table` after it. - if prefetch_table_size > 0 { - let prefetch_table = ctx.prefetch.get_v6_prefetch_table(meta_addr); - if let Some(mut pt) = prefetch_table { - assert!(pt.len() * size_of::() <= prefetch_table_size as usize); - // Device slots are very close to extended super block. - ext_sb.set_prefetch_table_offset(prefetch_table_offset); - ext_sb.set_prefetch_table_size(prefetch_table_size); - bootstrap_ctx - .writer - .seek_offset(prefetch_table_offset as u64) - .context("failed seek prefetch table offset")?; - pt.store(bootstrap_ctx.writer.as_mut()).unwrap(); - } - } - - // TODO: get rid of the chunk info array. - // Dump chunk info array. - let chunk_table_offset = bootstrap_ctx - .writer - .seek_to_end() - .context("failed to seek to bootstrap's end for chunk table")?; - let mut chunk_table_size: u64 = 0; - for (_, chunk) in chunk_cache.iter() { - let chunk_size = chunk - .store(bootstrap_ctx.writer.as_mut()) - .context("failed to dump chunk table")?; - chunk_table_size += chunk_size as u64; - } - ext_sb.set_chunk_table(chunk_table_offset, chunk_table_size); - debug!( - "chunk_table offset {} size {}", - chunk_table_offset, chunk_table_size - ); - Self::v6_align_to_4k(bootstrap_ctx)?; - - // Prepare device slots. - let mut pos = bootstrap_ctx - .writer - .seek_to_end() - .context("failed to seek to bootstrap's end for chunk table")?; - assert_eq!(pos % block_size, 0); - let mut devtable: Vec = Vec::new(); - let mut block_count = 0u32; - let mut inlined_chunk_digest = true; - for entry in blobs.iter() { - let mut devslot = RafsV6Device::new(); - // blob id is String, which is processed by sha256.finalize(). - if entry.blob_id().is_empty() { - bail!(" blob id is empty"); - } else if entry.blob_id().len() > 64 { - bail!(format!( - "blob id length is bigger than 64 bytes, blob id {:?}", - entry.blob_id() - )); - } else if entry.uncompressed_size() / block_size > u32::MAX as u64 { - bail!(format!( - "uncompressed blob size (0x:{:x}) is too big", - entry.uncompressed_size() - )); - } - if !entry.has_feature(BlobFeatures::INLINED_CHUNK_DIGEST) { - inlined_chunk_digest = false; - } - let cnt = (entry.uncompressed_size() / block_size) as u32; - if block_count.checked_add(cnt).is_none() { - bail!("Too many data blocks in RAFS filesystem, block size 0x{:x}, block count 0x{:x}", block_size, block_count as u64 + cnt as u64); - } - let mapped_blkaddr = Self::v6_align_mapped_blkaddr(block_size, pos)?; - pos = (mapped_blkaddr + cnt) as u64 * block_size; - block_count += cnt; - - let id = entry.blob_id(); - let id = id.as_bytes(); - let mut blob_id = [0u8; 64]; - blob_id[..id.len()].copy_from_slice(id); - devslot.set_blob_id(&blob_id); - devslot.set_blocks(cnt); - devslot.set_mapped_blkaddr(mapped_blkaddr); - devtable.push(devslot); - } - - // Dump super block - let mut sb = RafsV6SuperBlock::new(); - if ctx.conversion_type == ConversionType::TarToTarfs { - sb.set_block_bits(EROFS_BLOCK_BITS_9); - } - sb.set_inos(bootstrap_ctx.get_next_ino() - 1); - sb.set_blocks(block_count); - sb.set_root_nid(root_nid as u16); - sb.set_meta_addr(meta_addr); - sb.set_extra_devices(blob_table_entries as u16); - bootstrap_ctx.writer.seek(SeekFrom::Start(0))?; - sb.store(bootstrap_ctx.writer.as_mut()) - .context("failed to store SB")?; - - // Dump extended super block. - if ctx.explicit_uidgid { - ext_sb.set_explicit_uidgid(); - } - if ctx.has_xattr { - ext_sb.set_has_xattr(); - } - if inlined_chunk_digest { - ext_sb.set_inlined_chunk_digest(); - } - if ctx.conversion_type == ConversionType::TarToTarfs { - ext_sb.set_tarfs_mode(); - } - bootstrap_ctx - .writer - .seek_offset((EROFS_SUPER_OFFSET + EROFS_SUPER_BLOCK_SIZE) as u64) - .context("failed to seek for extended super block")?; - ext_sb - .store(bootstrap_ctx.writer.as_mut()) - .context("failed to store extended super block")?; - - // Dump device slots. - bootstrap_ctx - .writer - .seek_offset(EROFS_DEVTABLE_OFFSET as u64) - .context("failed to seek devtslot")?; - for slot in devtable.iter() { - slot.store(bootstrap_ctx.writer.as_mut()) - .context("failed to store device slot")?; - } - - // Dump blob table - bootstrap_ctx - .writer - .seek_offset(blob_table_offset as u64) - .context("failed seek for extended blob table offset")?; - blob_table - .store(bootstrap_ctx.writer.as_mut()) - .context("failed to store extended blob table")?; - - Ok(()) - } - - fn v6_align_to_4k(bootstrap_ctx: &mut BootstrapContext) -> Result<()> { - bootstrap_ctx - .writer - .flush() - .context("failed to flush bootstrap")?; - let pos = bootstrap_ctx - .writer - .seek_to_end() - .context("failed to seek to bootstrap's end for chunk table")?; - let padding = align_offset(pos, EROFS_BLOCK_SIZE_4096) - pos; - bootstrap_ctx - .writer - .write_all(&WRITE_PADDING_DATA[0..padding as usize]) - .context("failed to write 0 to padding of bootstrap's end for chunk table")?; - bootstrap_ctx - .writer - .flush() - .context("failed to flush bootstrap")?; - Ok(()) - } - - fn v6_align_mapped_blkaddr(block_size: u64, addr: u64) -> Result { - match addr.checked_add(V6_BLOCK_SEG_ALIGNMENT - 1) { - None => bail!("address 0x{:x} is too big", addr), - Some(v) => { - let v = (v & !(V6_BLOCK_SEG_ALIGNMENT - 1)) / block_size; - if v > u32::MAX as u64 { - bail!("address 0x{:x} is too big", addr); - } else { - Ok(v as u32) - } - } - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::{ArtifactStorage, BootstrapContext, Overlay}; - use nydus_rafs::metadata::layout::v6::{EROFS_INODE_CHUNK_BASED, EROFS_INODE_SLOT_SIZE}; - use nydus_rafs::metadata::{RafsVersion, RAFS_DEFAULT_CHUNK_SIZE}; - use std::fs::File; - use vmm_sys_util::{tempdir::TempDir, tempfile::TempFile}; - - #[test] - fn test_set_v6_offset() { - let pa = TempDir::new().unwrap(); - let pa_aa = TempFile::new_in(pa.as_path()).unwrap(); - let mut node = Node::from_fs_object( - RafsVersion::V6, - pa.as_path().to_path_buf(), - pa_aa.as_path().to_path_buf(), - Overlay::UpperAddition, - RAFS_DEFAULT_CHUNK_SIZE as u32, - false, - false, - ) - .unwrap(); - - let bootstrap_path = TempFile::new().unwrap(); - let storage = ArtifactStorage::SingleFile(bootstrap_path.as_path().to_path_buf()); - let mut bootstrap_ctx = BootstrapContext::new(Some(storage), false).unwrap(); - bootstrap_ctx.offset = 0; - - // reg file. - // "1" is used only for testing purpose, in practice - // it's always aligned to 32 bytes. - node.v6_set_offset(&mut bootstrap_ctx, None, EROFS_BLOCK_SIZE_4096) - .unwrap(); - assert_eq!(node.v6_offset, 0); - assert_eq!(node.v6_datalayout, EROFS_INODE_CHUNK_BASED); - assert!(node.v6_compact_inode); - assert_eq!(bootstrap_ctx.offset, 32); - - // symlink and dir are handled in the same way. - let mut dir_node = Node::from_fs_object( - RafsVersion::V6, - pa.as_path().to_path_buf(), - pa.as_path().to_path_buf(), - Overlay::UpperAddition, - RAFS_DEFAULT_CHUNK_SIZE as u32, - false, - false, - ) - .unwrap(); - - dir_node - .v6_set_dir_offset(&mut bootstrap_ctx, 4064, EROFS_BLOCK_SIZE_4096) - .unwrap(); - assert_eq!(dir_node.v6_datalayout, EROFS_INODE_FLAT_INLINE); - assert_eq!(dir_node.v6_offset, 4096); - assert_eq!(bootstrap_ctx.offset, 8192); - - dir_node - .v6_set_dir_offset(&mut bootstrap_ctx, 4096, EROFS_BLOCK_SIZE_4096) - .unwrap(); - assert_eq!(dir_node.v6_datalayout, EROFS_INODE_FLAT_PLAIN); - assert_eq!(dir_node.v6_offset, 32); - assert_eq!(dir_node.v6_dirents_offset, 8192); - assert_eq!(bootstrap_ctx.offset, 8192 + 4096); - - dir_node - .v6_set_dir_offset(&mut bootstrap_ctx, 8160, EROFS_BLOCK_SIZE_4096) - .unwrap(); - assert_eq!(dir_node.v6_datalayout, EROFS_INODE_FLAT_INLINE); - assert_eq!(dir_node.v6_offset, 8192 + 4096); - assert_eq!(dir_node.v6_dirents_offset, 8192 + 4096 + 4096); - assert_eq!(bootstrap_ctx.offset, 8192 + 4096 + 8192); - - dir_node - .v6_set_dir_offset(&mut bootstrap_ctx, 8161, EROFS_BLOCK_SIZE_4096) - .unwrap(); - assert_eq!(dir_node.v6_datalayout, EROFS_INODE_FLAT_PLAIN); - assert_eq!(dir_node.v6_offset, 64); - assert_eq!(dir_node.v6_dirents_offset, 8192 + 4096 + 8192); - assert_eq!(bootstrap_ctx.offset, 8192 + 4096 + 8192 + 8192); - - dir_node - .v6_set_dir_offset(&mut bootstrap_ctx, 4096 + 3968, EROFS_BLOCK_SIZE_4096) - .unwrap(); - assert_eq!(dir_node.v6_datalayout, EROFS_INODE_FLAT_INLINE); - assert_eq!(dir_node.v6_offset, 96); - assert_eq!(dir_node.v6_dirents_offset, 8192 + 4096 + 8192 + 8192); - assert_eq!(bootstrap_ctx.offset, 8192 + 4096 + 8192 + 8192 + 4096); - - dir_node - .v6_set_dir_offset(&mut bootstrap_ctx, 4096 + 2048, EROFS_BLOCK_SIZE_4096) - .unwrap(); - assert_eq!(dir_node.v6_datalayout, EROFS_INODE_FLAT_INLINE); - assert_eq!(dir_node.v6_offset, 8192 + 4096 + 8192 + 8192 + 4096); - assert_eq!( - dir_node.v6_dirents_offset, - 8192 + 4096 + 8192 + 8192 + 4096 + 4096 - ); - assert_eq!( - bootstrap_ctx.offset, - 8192 + 4096 + 8192 + 8192 + 4096 + 8192 - ); - - dir_node - .v6_set_dir_offset(&mut bootstrap_ctx, 1985, EROFS_BLOCK_SIZE_4096) - .unwrap(); - assert_eq!(dir_node.v6_datalayout, EROFS_INODE_FLAT_INLINE); - assert_eq!(dir_node.v6_offset, 8192 + 4096 + 8192 + 8192 + 4096 + 8192); - assert_eq!( - bootstrap_ctx.offset, - 8192 + 4096 + 8192 + 8192 + 4096 + 8192 + 32 + 1985 + 31 - ); - - bootstrap_ctx.align_offset(EROFS_INODE_SLOT_SIZE as u64); - dir_node - .v6_set_dir_offset(&mut bootstrap_ctx, 1984, EROFS_BLOCK_SIZE_4096) - .unwrap(); - assert_eq!(dir_node.v6_datalayout, EROFS_INODE_FLAT_INLINE); - assert_eq!( - dir_node.v6_offset, - 8192 + 4096 + 8192 + 8192 + 4096 + 2048 + 32 - ); - assert_eq!( - bootstrap_ctx.offset, - 8192 + 4096 + 8192 + 8192 + 4096 + 8192 + round_up(32 + 1985, 32) - ); - } - - #[test] - fn test_set_v6_inode_compact() { - let pa = TempDir::new().unwrap(); - let pa_reg = TempFile::new_in(pa.as_path()).unwrap(); - let pa_pyc = pa.as_path().join("foo.pyc"); - let _ = File::create(&pa_pyc).unwrap(); - - let reg_node = Node::from_fs_object( - RafsVersion::V6, - pa.as_path().to_path_buf(), - pa_reg.as_path().to_path_buf(), - Overlay::UpperAddition, - RAFS_DEFAULT_CHUNK_SIZE as u32, - false, - false, - ) - .unwrap(); - - assert!(reg_node.v6_compact_inode); - - let pyc_node = Node::from_fs_object( - RafsVersion::V6, - pa.as_path().to_path_buf(), - pa_pyc.as_path().to_path_buf(), - Overlay::UpperAddition, - RAFS_DEFAULT_CHUNK_SIZE as u32, - false, - false, - ) - .unwrap(); - - assert!(!pyc_node.v6_compact_inode); - - std::fs::remove_file(&pa_pyc).unwrap(); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::collections::BTreeMap; +use std::ffi::{OsStr, OsString}; +use std::io::SeekFrom; +use std::mem::size_of; +use std::os::unix::ffi::OsStrExt; +use std::sync::Arc; + +use anyhow::{bail, ensure, Context, Result}; +use nydus_rafs::metadata::chunk::ChunkWrapper; +use nydus_rafs::metadata::layout::v6::{ + align_offset, calculate_nid, new_v6_inode, RafsV6BlobTable, RafsV6Device, RafsV6Dirent, + RafsV6InodeChunkAddr, RafsV6InodeChunkHeader, RafsV6OndiskInode, RafsV6SuperBlock, + RafsV6SuperBlockExt, EROFS_BLOCK_BITS_9, EROFS_BLOCK_SIZE_4096, EROFS_BLOCK_SIZE_512, + EROFS_DEVTABLE_OFFSET, EROFS_INODE_CHUNK_BASED, EROFS_INODE_FLAT_INLINE, + EROFS_INODE_FLAT_PLAIN, EROFS_INODE_SLOT_SIZE, EROFS_SUPER_BLOCK_SIZE, EROFS_SUPER_OFFSET, +}; +use nydus_rafs::metadata::RafsStore; +use nydus_rafs::RafsIoWrite; +use nydus_storage::device::BlobFeatures; +use nydus_utils::{root_tracer, round_down, round_up, timing_tracer}; + +use super::chunk_dict::DigestWithBlobIndex; +use super::node::Node; +use crate::{Bootstrap, BootstrapContext, BuildContext, ConversionType, Tree}; + +const WRITE_PADDING_DATA: [u8; 4096] = [0u8; 4096]; +const V6_BLOCK_SEG_ALIGNMENT: u64 = 0x8_0000; + +// Rafs v6 dedicated methods +impl Node { + /// Dump RAFS v6 inode metadata to meta blob. + pub fn dump_bootstrap_v6( + &mut self, + ctx: &mut BuildContext, + f_bootstrap: &mut dyn RafsIoWrite, + orig_meta_addr: u64, + meta_addr: u64, + chunk_cache: &mut BTreeMap>, + ) -> Result<()> { + let xattr_inline_count = self.info.xattrs.count_v6(); + ensure!( + xattr_inline_count <= u16::MAX as usize, + "size of extended attributes is too big" + ); + let mut inode = new_v6_inode( + &self.inode, + self.v6_datalayout, + xattr_inline_count as u16, + self.v6_compact_inode, + ); + + let meta_offset = meta_addr - orig_meta_addr; + // update all the inodes's offset according to the new 'meta_addr'. + self.v6_offset += meta_offset; + // The EROFS_INODE_FLAT_INLINE layout is valid for directory and symlink only, + // so `dirents_offset` is useful for these two types too, otherwise `dirents_offset` + // should always be zero. + // Enforce the check to avoid overflow of `dirents_offset`. + if self.is_dir() || self.is_symlink() { + self.v6_dirents_offset += meta_offset; + } + let nid = calculate_nid(self.v6_offset, meta_addr); + self.inode.set_ino(nid); + + if self.is_dir() { + self.v6_dump_dir(ctx, f_bootstrap, meta_addr, meta_offset, &mut inode)?; + } else if self.is_reg() { + self.v6_dump_file(ctx, f_bootstrap, chunk_cache, &mut inode)?; + } else if self.is_symlink() { + self.v6_dump_symlink(ctx, f_bootstrap, &mut inode)?; + } else { + f_bootstrap + .seek(SeekFrom::Start(self.v6_offset)) + .context("failed seek for dir inode")?; + inode.store(f_bootstrap).context("failed to store inode")?; + self.v6_store_xattrs(ctx, f_bootstrap)?; + } + + Ok(()) + } + + /// Update whether compact mode can be used for this inode or not. + pub fn v6_set_inode_compact(&mut self) { + self.v6_compact_inode = !(self.info.v6_force_extended_inode + || self.inode.uid() > u16::MAX as u32 + || self.inode.gid() > u16::MAX as u32 + || self.inode.nlink() > u16::MAX as u32 + || self.inode.size() > u32::MAX as u64 + || self.path().extension() == Some(OsStr::new("pyc"))); + } + + /// Layout the normal inode (except directory inode) into the meta blob. + pub fn v6_set_offset( + &mut self, + bootstrap_ctx: &mut BootstrapContext, + v6_hardlink_offset: Option, + block_size: u64, + ) -> Result<()> { + ensure!(!self.is_dir(), "{} is a directory", self.path().display()); + if self.is_reg() { + if let Some(v6_hardlink_offset) = v6_hardlink_offset { + self.v6_offset = v6_hardlink_offset; + } else { + let size = self.v6_size_with_xattr(); + let unit = size_of::() as u64; + let total_size = round_up(size, unit) + self.inode.child_count() as u64 * unit; + // First try to allocate from fragments of dirent pages. + self.v6_offset = bootstrap_ctx.allocate_available_block(total_size, block_size); + if self.v6_offset == 0 { + self.v6_offset = bootstrap_ctx.offset; + bootstrap_ctx.offset += total_size; + } + } + self.v6_datalayout = EROFS_INODE_CHUNK_BASED; + } else if self.is_symlink() { + self.v6_set_offset_with_tail(bootstrap_ctx, self.inode.size(), block_size); + } else { + self.v6_offset = bootstrap_ctx.offset; + bootstrap_ctx.offset += self.v6_size_with_xattr(); + } + bootstrap_ctx.align_offset(EROFS_INODE_SLOT_SIZE as u64); + + Ok(()) + } + + /// Layout the directory inode and its dirents into meta blob. + pub fn v6_set_dir_offset( + &mut self, + bootstrap_ctx: &mut BootstrapContext, + d_size: u64, + block_size: u64, + ) -> Result<()> { + ensure!( + self.is_dir(), + "{} is not a directory", + self.path().display() + ); + + // Dir isize is the total bytes of 'dirents + names'. + self.inode.set_size(d_size); + self.v6_set_offset_with_tail(bootstrap_ctx, d_size, block_size); + bootstrap_ctx.align_offset(EROFS_INODE_SLOT_SIZE as u64); + + Ok(()) + } + + /// Calculate space needed to store dirents of the directory inode. + pub fn v6_dirent_size(&self, ctx: &mut BuildContext, tree: &Tree) -> Result { + ensure!(self.is_dir(), "{} is not a directory", self); + let block_size = ctx.v6_block_size(); + let mut d_size = 0; + + // Sort all children if "." and ".." are not at the head after sorting. + if !tree.children.is_empty() && tree.children[0].name() < "..".as_bytes() { + let mut children = Vec::with_capacity(tree.children.len() + 2); + children.push(".".as_bytes()); + children.push("..".as_bytes()); + for child in tree.children.iter() { + children.push(child.name()); + } + children.sort_unstable(); + + for c in children { + // Use length in byte, instead of length in character. + let len = c.len() + size_of::(); + // erofs disk format requires dirent to be aligned to block size. + if (d_size % block_size) + len as u64 > block_size { + d_size = round_up(d_size as u64, block_size); + } + d_size += len as u64; + } + } else { + // Avoid sorting again if "." and ".." are at the head after sorting due to that + // `tree.children` has already been sorted. + d_size = (".".as_bytes().len() + + size_of::() + + "..".as_bytes().len() + + size_of::()) as u64; + for child in tree.children.iter() { + let len = child.name().len() + size_of::(); + // erofs disk format requires dirent to be aligned to block size. + if (d_size % block_size) + len as u64 > block_size { + d_size = round_up(d_size as u64, block_size); + } + d_size += len as u64; + } + } + + Ok(d_size) + } + + fn v6_size_with_xattr(&self) -> u64 { + self.inode + .get_inode_size_with_xattr(&self.info.xattrs, self.v6_compact_inode) as u64 + } + + // Layout symlink or directory inodes into the meta blob. + // + // For DIR inode, size is the total bytes of 'dirents + names'. + // For symlink, size is the length of symlink name. + fn v6_set_offset_with_tail( + &mut self, + bootstrap_ctx: &mut BootstrapContext, + d_size: u64, + block_size: u64, + ) { + // | avail | + // +--------+-----------+----+ +-----------------------+ + // | |inode+tail | free | dirents+names | + // | | | | | | + // +--------+-----------+----+ +-----------------------+ + // + // | avail | + // +--------+-----------+----+ +-----------------------+ +---------+-------------+ + // | |inode | free | dirents+names | | tail | free | + // | | | | | | | | | + // +--------+-----------+----+ +-----------------------+ +---------+-------------+ + // + // + // | avail | + // +--------+-----------+----+ +-----------------------+ +---------+-------------+ + // | | inode + | dirents+names | | tail | free | + // | | | | | | | | + // +--------+-----------+----+ +-----------------------+ +---------+-------------+ + // + // + // | avail | + // +--------+----------------+ +--------------+--------+ +-----------------------+ + // | | inode | | inode+tail | free | | dirents+names | + // | | | | | | | | + // +--------+----------------+ +--------------+--------+ +-----------------------+ + // | inode | + // + // | avail | + // +--------+----------------+ +--------------+--------+ +-----------------------+ +-------+---------------+ + // | | inode | | inode | free | | dirents+names | | tail | free | + // | | | | | | | | | | | + // +--------+----------------+ +--------------+--------+ +-----------------------+ +-------+---------------+ + // | inode | + // + // + let inode_size = self.v6_size_with_xattr(); + let tail: u64 = d_size % block_size; + + // We use a simple inline strategy here: + // If the inode size with xattr + tail data size <= EROFS_BLOCK_SIZE, + // we choose to inline it. + // Firstly, if it's bigger than EROFS_BLOCK_SIZE, + // in most cases, we can assume that the tail data size is close to EROFS_BLOCK_SIZE, + // in this condition, even if we don't inline the tail data, there won't be much waste. + // Secondly, the `available_blocks` that we maintain in the `BootstrapCtx`, + // since it contain only single blocks with some unused space, the available space can only + // be smaller than EROFS_BLOCK_SIZE, therefore we can't use our used blocks to store the + // inode plus the tail data bigger than EROFS_BLOCK_SIZE. + let should_inline = tail != 0 && (inode_size + tail) <= block_size; + + // If should inline, we first try to allocate space for the inode together with tail data + // using used blocks. + // If no available used block exists, we try to allocate space from current block. + // If current block doesn't have enough space, we append it to `available_blocks`, + // and we allocate space from the next block. + // For the remaining data, we allocate space for it sequentially. + self.v6_datalayout = if should_inline { + self.v6_offset = bootstrap_ctx.allocate_available_block(inode_size + tail, block_size); + if self.v6_offset == 0 { + let available = block_size - bootstrap_ctx.offset % block_size; + if available < inode_size + tail { + bootstrap_ctx.append_available_block(bootstrap_ctx.offset, block_size); + bootstrap_ctx.align_offset(block_size); + } + + self.v6_offset = bootstrap_ctx.offset; + bootstrap_ctx.offset += inode_size + tail; + } + + if d_size != tail { + bootstrap_ctx.append_available_block(bootstrap_ctx.offset, block_size); + bootstrap_ctx.align_offset(block_size); + } + self.v6_dirents_offset = bootstrap_ctx.offset; + bootstrap_ctx.offset += round_down(d_size, block_size); + + EROFS_INODE_FLAT_INLINE + } else { + // Otherwise, we first try to allocate space for the inode from used blocks. + // If no available used block exists, we allocate space sequentially. + // Then we allocate space for all data. + self.v6_offset = bootstrap_ctx.allocate_available_block(inode_size, block_size); + if self.v6_offset == 0 { + self.v6_offset = bootstrap_ctx.offset; + bootstrap_ctx.offset += inode_size; + } + + bootstrap_ctx.append_available_block(bootstrap_ctx.offset, block_size); + bootstrap_ctx.align_offset(block_size); + self.v6_dirents_offset = bootstrap_ctx.offset; + bootstrap_ctx.offset += d_size; + bootstrap_ctx.align_offset(block_size); + + EROFS_INODE_FLAT_PLAIN + }; + + trace!( + "{:?} inode offset {} ctx offset {} d_size {} dirents_offset {} datalayout {}", + self.name(), + self.v6_offset, + bootstrap_ctx.offset, + d_size, + self.v6_dirents_offset, + self.v6_datalayout + ); + } + + fn v6_store_xattrs( + &mut self, + ctx: &mut BuildContext, + f_bootstrap: &mut dyn RafsIoWrite, + ) -> Result<()> { + if !self.info.xattrs.is_empty() { + self.info + .xattrs + .store_v6(f_bootstrap) + .context("failed to dump xattr to bootstrap")?; + ctx.has_xattr = true; + } + Ok(()) + } + + fn v6_dump_dir( + &mut self, + ctx: &mut BuildContext, + f_bootstrap: &mut dyn RafsIoWrite, + meta_addr: u64, + meta_offset: u64, + inode: &mut Box, + ) -> Result<()> { + // the 1st 4k block after dir inode. + let mut dirent_off = self.v6_dirents_offset; + let blk_addr = ctx + .v6_block_addr(dirent_off) + .with_context(|| format!("failed to compute blk_addr for offset 0x{:x}", dirent_off))?; + inode.set_u(blk_addr); + self.v6_dump_inode(ctx, f_bootstrap, inode) + .context("failed to dump inode for directory")?; + + // Dump dirents + let mut dir_data: Vec = Vec::new(); + let mut entry_names = Vec::new(); + let mut dirents: Vec<(RafsV6Dirent, &OsString)> = Vec::new(); + let mut nameoff: u64 = 0; + let mut used: u64 = 0; + let block_size = ctx.v6_block_size(); + + trace!( + "{:?} self.dirents.len {}", + self.target(), + self.v6_dirents.len() + ); + // fill dir blocks one by one + for (offset, name, file_type) in self.v6_dirents.iter() { + let len = name.as_bytes().len() + size_of::(); + // write to bootstrap when it will exceed EROFS_BLOCK_SIZE + if used + len as u64 > block_size { + for (entry, name) in dirents.iter_mut() { + trace!("{:?} nameoff {}", name, nameoff); + entry.set_name_offset(nameoff as u16); + dir_data.extend(entry.as_ref()); + entry_names.push(*name); + // Use length in byte, instead of length in character. + // Because some characters could occupy more than one byte. + nameoff += name.as_bytes().len() as u64; + } + for name in entry_names.iter() { + dir_data.extend(name.as_bytes()); + } + + f_bootstrap + .seek(SeekFrom::Start(dirent_off as u64)) + .context("failed seek file position for writing dirent")?; + f_bootstrap + .write(dir_data.as_slice()) + .context("failed to write dirent data to meta blob")?; + + // track where we're going to write. + dirent_off += round_up(used, block_size); + used = 0; + nameoff = 0; + dir_data.clear(); + entry_names.clear(); + dirents.clear(); + } + + trace!( + "name {:?} file type {} {:?}", + *name, + *file_type, + RafsV6Dirent::file_type(*file_type) + ); + let entry = RafsV6Dirent::new( + calculate_nid(*offset + meta_offset, meta_addr), + 0, + RafsV6Dirent::file_type(*file_type), + ); + dirents.push((entry, name)); + + nameoff += size_of::() as u64; + used += len as u64; + } + + trace!( + "{:?} used {} dir size {}", + self.target(), + used, + self.inode.size() + ); + // dump tail part if any + if used > 0 { + for (entry, name) in dirents.iter_mut() { + trace!("{:?} tail nameoff {}", name, nameoff); + entry.set_name_offset(nameoff as u16); + dir_data.extend(entry.as_ref()); + entry_names.push(*name); + nameoff += name.as_bytes().len() as u64; + } + for name in entry_names.iter() { + dir_data.extend(name.as_bytes()); + } + + let tail_off = match self.v6_datalayout { + EROFS_INODE_FLAT_INLINE => self.v6_offset + self.v6_size_with_xattr(), + EROFS_INODE_FLAT_PLAIN => dirent_off, + _ => bail!("unsupported RAFS v6 inode layout for directory"), + }; + f_bootstrap + .seek(SeekFrom::Start(tail_off as u64)) + .context("failed seek for dir inode")?; + f_bootstrap + .write(dir_data.as_slice()) + .context("failed to store dirents")?; + } + + Ok(()) + } + + fn v6_dump_file( + &mut self, + ctx: &mut BuildContext, + f_bootstrap: &mut dyn RafsIoWrite, + chunk_cache: &mut BTreeMap>, + inode: &mut Box, + ) -> Result<()> { + let mut is_continuous = true; + let mut prev = None; + + // write chunk indexes, chunk contents has been written to blob file. + let mut chunks: Vec = Vec::new(); + for chunk in self.chunks.iter() { + let offset = chunk.inner.uncompressed_offset(); + let blk_addr = ctx.v6_block_addr(offset).with_context(|| { + format!( + "failed to compute blk_addr for chunk with uncompressed offset 0x{:x}", + offset + ) + })?; + let blob_idx = chunk.inner.blob_index(); + let mut v6_chunk = RafsV6InodeChunkAddr::new(); + v6_chunk.set_blob_index(blob_idx); + v6_chunk.set_blob_ci_index(chunk.inner.index()); + v6_chunk.set_block_addr(blk_addr); + + chunks.extend(v6_chunk.as_ref()); + chunk_cache.insert( + DigestWithBlobIndex(*chunk.inner.id(), chunk.inner.blob_index() + 1), + chunk.inner.clone(), + ); + if let Some((prev_idx, prev_pos)) = prev { + if prev_pos + ctx.chunk_size as u64 != offset || prev_idx != blob_idx { + is_continuous = false; + } + } + prev = Some((blob_idx, offset)); + } + + // Special optimization to enable page cache sharing for EROFS. + let chunk_size = if is_continuous && inode.size() > ctx.chunk_size as u64 { + inode.size().next_power_of_two() + } else { + ctx.chunk_size as u64 + }; + let info = RafsV6InodeChunkHeader::new(chunk_size, ctx.v6_block_size()); + inode.set_u(info.to_u32()); + self.v6_dump_inode(ctx, f_bootstrap, inode) + .context("failed to dump inode for file")?; + + let unit = size_of::() as u64; + let offset = align_offset(self.v6_offset + self.v6_size_with_xattr(), unit); + f_bootstrap + .seek(SeekFrom::Start(offset)) + .with_context(|| format!("failed to seek to 0x{:x} for writing chunk data", offset))?; + f_bootstrap + .write(chunks.as_slice()) + .context("failed to write chunk data for file")?; + + Ok(()) + } + + fn v6_dump_symlink( + &mut self, + ctx: &mut BuildContext, + f_bootstrap: &mut dyn RafsIoWrite, + inode: &mut Box, + ) -> Result<()> { + let blk_addr = ctx.v6_block_addr(self.v6_dirents_offset)?; + inode.set_u(blk_addr); + self.v6_dump_inode(ctx, f_bootstrap, inode) + .context("failed to dump inode for symlink")?; + + if let Some(symlink) = &self.info.symlink { + let tail_off = match self.v6_datalayout { + EROFS_INODE_FLAT_INLINE => self.v6_offset + self.v6_size_with_xattr(), + EROFS_INODE_FLAT_PLAIN => self.v6_dirents_offset, + _ => bail!("unsupported RAFS v5 inode layout for symlink"), + }; + f_bootstrap + .seek(SeekFrom::Start(tail_off)) + .context("failed seek for dir inode")?; + f_bootstrap + .write(symlink.as_bytes()) + .context("filed to store symlink")?; + } + + Ok(()) + } + + fn v6_dump_inode( + &mut self, + ctx: &mut BuildContext, + f_bootstrap: &mut dyn RafsIoWrite, + inode: &mut Box, + ) -> Result<()> { + f_bootstrap + .seek(SeekFrom::Start(self.v6_offset)) + .context("failed to seek file position for writing inode")?; + inode + .store(f_bootstrap) + .context("failed to write inode to meta blob")?; + self.v6_store_xattrs(ctx, f_bootstrap) + .context("failed to write extended attributes for inode") + } +} + +impl BuildContext { + pub fn v6_block_size(&self) -> u64 { + if self.conversion_type == ConversionType::TarToTarfs { + // Tar stream is 512-byte aligned. + EROFS_BLOCK_SIZE_512 + } else { + EROFS_BLOCK_SIZE_4096 + } + } + + pub fn v6_block_addr(&self, offset: u64) -> Result { + let blk_addr = offset / self.v6_block_size(); + if blk_addr > u32::MAX as u64 { + bail!("v6 block address 0x{:x} is too big", blk_addr) + } else { + Ok(blk_addr as u32) + } + } +} + +impl Bootstrap { + pub(crate) fn v6_update_dirents(parent: &Tree, parent_offset: u64) { + let mut node = parent.lock_node(); + let node_offset = node.v6_offset; + if !node.is_dir() { + return; + } + + // dot & dotdot + // Type of libc::S_IFDIR is u16 on macos, so it need a conversion + // but compiler will report useless conversion on linux platform, + // so we add an allow annotation here. + #[allow(clippy::useless_conversion)] + { + node.v6_dirents + .push((node_offset, OsString::from("."), libc::S_IFDIR.into())); + node.v6_dirents + .push((parent_offset, OsString::from(".."), libc::S_IFDIR.into())); + } + + let mut dirs: Vec<&Tree> = Vec::new(); + for child in parent.children.iter() { + let child_node = child.lock_node(); + let entry = ( + child_node.v6_offset, + OsStr::from_bytes(child.name()).to_owned(), + child_node.inode.mode(), + ); + node.v6_dirents.push(entry); + if child_node.is_dir() { + dirs.push(child); + } + } + node.v6_dirents + .sort_unstable_by(|a, b| a.1.as_os_str().cmp(b.1.as_os_str())); + + for dir in dirs { + Self::v6_update_dirents(dir, node_offset); + } + } + + /// Dump bootstrap and blob file, return (Vec, blob_size) + pub(crate) fn v6_dump( + &mut self, + ctx: &mut BuildContext, + bootstrap_ctx: &mut BootstrapContext, + blob_table: &RafsV6BlobTable, + ) -> Result<()> { + // Rafs v6 disk layout + // + // EROFS_SUPER_OFFSET + // | + // +---+---------+------------+-------------+----------------------------------------------+ + // | | | | | | | | + // |1k |super |extended | blob table | prefetch table | inodes | chunk info table | + // | |block |superblock+ | | | | | + // | | |devslot | | | | | + // +---+---------+------------+-------------+----------------------------------------------+ + + let block_size = ctx.v6_block_size(); + let blobs = blob_table.get_all(); + let devtable_len = blobs.len() * size_of::(); + let blob_table_size = blob_table.size() as u64; + let blob_table_offset = align_offset( + (EROFS_DEVTABLE_OFFSET as u64) + devtable_len as u64, + EROFS_BLOCK_SIZE_4096, + ); + let blob_table_entries = blobs.len(); + assert!(blob_table_entries < u8::MAX as usize); + trace!( + "devtable len {} blob table offset {} blob table size {}", + devtable_len, + blob_table_offset, + blob_table_size + ); + + let fs_prefetch_rule_count = ctx.prefetch.fs_prefetch_rule_count(); + let (prefetch_table_offset, prefetch_table_size) = + // If blob_table_size equal to 0, there is no prefetch. + if fs_prefetch_rule_count > 0 && blob_table_size > 0 { + // Prefetch table is very close to blob devices table + let offset = blob_table_offset + blob_table_size; + // Each prefetched file has is nid of `u32` filled into prefetch table. + let size = fs_prefetch_rule_count * size_of::() as u32; + trace!("prefetch table locates at offset {} size {}", offset, size); + (offset, size) + } else { + (0, 0) + }; + + // Make the superblock's meta_blkaddr one block ahead of the inode table, + // to avoid using 0 as root nid. + // inode offset = meta_blkaddr * block_size + 32 * nid + // When using nid 0 as root nid, + // the root directory will not be shown by glibc's getdents/readdir. + // Because in some OS, ino == 0 represents corresponding file is deleted. + let root_node_offset = self.tree.lock_node().v6_offset; + let orig_meta_addr = root_node_offset - EROFS_BLOCK_SIZE_4096; + let meta_addr = if blob_table_size > 0 { + align_offset( + blob_table_offset + blob_table_size + prefetch_table_size as u64, + EROFS_BLOCK_SIZE_4096, + ) + } else { + orig_meta_addr + }; + let meta_offset = meta_addr - orig_meta_addr; + let root_nid = calculate_nid(root_node_offset + meta_offset, meta_addr); + + // Prepare extended super block + let mut ext_sb = RafsV6SuperBlockExt::new(); + ext_sb.set_compressor(ctx.compressor); + ext_sb.set_digester(ctx.digester); + ext_sb.set_cipher(ctx.cipher); + ext_sb.set_chunk_size(ctx.chunk_size); + ext_sb.set_blob_table_offset(blob_table_offset); + ext_sb.set_blob_table_size(blob_table_size as u32); + + // collect all chunks in this bootstrap. + // HashChunkDict cannot be used here, because there will be duplicate chunks between layers, + // but there is no deduplication during the actual construction. + // Each layer uses the corresponding chunk in the blob of its own layer. + // If HashChunkDict is used here, it will cause duplication. The chunks are removed, + // resulting in incomplete chunk info. + let mut chunk_cache = BTreeMap::new(); + + // Dump bootstrap + timing_tracer!( + { + self.tree.walk_bfs(true, &mut |n| { + n.lock_node().dump_bootstrap_v6( + ctx, + bootstrap_ctx.writer.as_mut(), + orig_meta_addr, + meta_addr, + &mut chunk_cache, + ) + }) + }, + "dump_bootstrap" + )?; + Self::v6_align_to_4k(bootstrap_ctx)?; + + // `Node` offset might be updated during above inodes dumping. So `get_prefetch_table` after it. + if prefetch_table_size > 0 { + let prefetch_table = ctx.prefetch.get_v6_prefetch_table(meta_addr); + if let Some(mut pt) = prefetch_table { + assert!(pt.len() * size_of::() <= prefetch_table_size as usize); + // Device slots are very close to extended super block. + ext_sb.set_prefetch_table_offset(prefetch_table_offset); + ext_sb.set_prefetch_table_size(prefetch_table_size); + bootstrap_ctx + .writer + .seek_offset(prefetch_table_offset as u64) + .context("failed seek prefetch table offset")?; + pt.store(bootstrap_ctx.writer.as_mut()).unwrap(); + } + } + + // TODO: get rid of the chunk info array. + // Dump chunk info array. + let chunk_table_offset = bootstrap_ctx + .writer + .seek_to_end() + .context("failed to seek to bootstrap's end for chunk table")?; + let mut chunk_table_size: u64 = 0; + for (_, chunk) in chunk_cache.iter() { + let chunk_size = chunk + .store(bootstrap_ctx.writer.as_mut()) + .context("failed to dump chunk table")?; + chunk_table_size += chunk_size as u64; + } + ext_sb.set_chunk_table(chunk_table_offset, chunk_table_size); + debug!( + "chunk_table offset {} size {}", + chunk_table_offset, chunk_table_size + ); + Self::v6_align_to_4k(bootstrap_ctx)?; + + // Prepare device slots. + let mut pos = bootstrap_ctx + .writer + .seek_to_end() + .context("failed to seek to bootstrap's end for chunk table")?; + assert_eq!(pos % block_size, 0); + let mut devtable: Vec = Vec::new(); + let mut block_count = 0u32; + let mut inlined_chunk_digest = true; + for entry in blobs.iter() { + let mut devslot = RafsV6Device::new(); + // blob id is String, which is processed by sha256.finalize(). + if entry.blob_id().is_empty() { + bail!(" blob id is empty"); + } else if entry.blob_id().len() > 64 { + bail!(format!( + "blob id length is bigger than 64 bytes, blob id {:?}", + entry.blob_id() + )); + } else if entry.uncompressed_size() / block_size > u32::MAX as u64 { + bail!(format!( + "uncompressed blob size (0x:{:x}) is too big", + entry.uncompressed_size() + )); + } + if !entry.has_feature(BlobFeatures::INLINED_CHUNK_DIGEST) { + inlined_chunk_digest = false; + } + let cnt = (entry.uncompressed_size() / block_size) as u32; + if block_count.checked_add(cnt).is_none() { + bail!("Too many data blocks in RAFS filesystem, block size 0x{:x}, block count 0x{:x}", block_size, block_count as u64 + cnt as u64); + } + let mapped_blkaddr = Self::v6_align_mapped_blkaddr(block_size, pos)?; + pos = (mapped_blkaddr + cnt) as u64 * block_size; + block_count += cnt; + + let id = entry.blob_id(); + let id = id.as_bytes(); + let mut blob_id = [0u8; 64]; + blob_id[..id.len()].copy_from_slice(id); + devslot.set_blob_id(&blob_id); + devslot.set_blocks(cnt); + devslot.set_mapped_blkaddr(mapped_blkaddr); + devtable.push(devslot); + } + + // Dump super block + let mut sb = RafsV6SuperBlock::new(); + if ctx.conversion_type == ConversionType::TarToTarfs { + sb.set_block_bits(EROFS_BLOCK_BITS_9); + } + sb.set_inos(bootstrap_ctx.get_next_ino() - 1); + sb.set_blocks(block_count); + sb.set_root_nid(root_nid as u16); + sb.set_meta_addr(meta_addr); + sb.set_extra_devices(blob_table_entries as u16); + bootstrap_ctx.writer.seek(SeekFrom::Start(0))?; + sb.store(bootstrap_ctx.writer.as_mut()) + .context("failed to store SB")?; + + // Dump extended super block. + if ctx.explicit_uidgid { + ext_sb.set_explicit_uidgid(); + } + if ctx.has_xattr { + ext_sb.set_has_xattr(); + } + if inlined_chunk_digest { + ext_sb.set_inlined_chunk_digest(); + } + if ctx.conversion_type == ConversionType::TarToTarfs { + ext_sb.set_tarfs_mode(); + } + bootstrap_ctx + .writer + .seek_offset((EROFS_SUPER_OFFSET + EROFS_SUPER_BLOCK_SIZE) as u64) + .context("failed to seek for extended super block")?; + ext_sb + .store(bootstrap_ctx.writer.as_mut()) + .context("failed to store extended super block")?; + + // Dump device slots. + bootstrap_ctx + .writer + .seek_offset(EROFS_DEVTABLE_OFFSET as u64) + .context("failed to seek devtslot")?; + for slot in devtable.iter() { + slot.store(bootstrap_ctx.writer.as_mut()) + .context("failed to store device slot")?; + } + + // Dump blob table + bootstrap_ctx + .writer + .seek_offset(blob_table_offset as u64) + .context("failed seek for extended blob table offset")?; + blob_table + .store(bootstrap_ctx.writer.as_mut()) + .context("failed to store extended blob table")?; + + Ok(()) + } + + fn v6_align_to_4k(bootstrap_ctx: &mut BootstrapContext) -> Result<()> { + bootstrap_ctx + .writer + .flush() + .context("failed to flush bootstrap")?; + let pos = bootstrap_ctx + .writer + .seek_to_end() + .context("failed to seek to bootstrap's end for chunk table")?; + let padding = align_offset(pos, EROFS_BLOCK_SIZE_4096) - pos; + bootstrap_ctx + .writer + .write_all(&WRITE_PADDING_DATA[0..padding as usize]) + .context("failed to write 0 to padding of bootstrap's end for chunk table")?; + bootstrap_ctx + .writer + .flush() + .context("failed to flush bootstrap")?; + Ok(()) + } + + fn v6_align_mapped_blkaddr(block_size: u64, addr: u64) -> Result { + match addr.checked_add(V6_BLOCK_SEG_ALIGNMENT - 1) { + None => bail!("address 0x{:x} is too big", addr), + Some(v) => { + let v = (v & !(V6_BLOCK_SEG_ALIGNMENT - 1)) / block_size; + if v > u32::MAX as u64 { + bail!("address 0x{:x} is too big", addr); + } else { + Ok(v as u32) + } + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{ArtifactStorage, BootstrapContext, Overlay}; + use nydus_rafs::metadata::layout::v6::{EROFS_INODE_CHUNK_BASED, EROFS_INODE_SLOT_SIZE}; + use nydus_rafs::metadata::{RafsVersion, RAFS_DEFAULT_CHUNK_SIZE}; + use std::fs::File; + use vmm_sys_util::{tempdir::TempDir, tempfile::TempFile}; + + #[test] + fn test_set_v6_offset() { + let pa = TempDir::new().unwrap(); + let pa_aa = TempFile::new_in(pa.as_path()).unwrap(); + let mut node = Node::from_fs_object( + RafsVersion::V6, + pa.as_path().to_path_buf(), + pa_aa.as_path().to_path_buf(), + Overlay::UpperAddition, + RAFS_DEFAULT_CHUNK_SIZE as u32, + false, + false, + ) + .unwrap(); + + let bootstrap_path = TempFile::new().unwrap(); + let storage = ArtifactStorage::SingleFile(bootstrap_path.as_path().to_path_buf()); + let mut bootstrap_ctx = BootstrapContext::new(Some(storage), false).unwrap(); + bootstrap_ctx.offset = 0; + + // reg file. + // "1" is used only for testing purpose, in practice + // it's always aligned to 32 bytes. + node.v6_set_offset(&mut bootstrap_ctx, None, EROFS_BLOCK_SIZE_4096) + .unwrap(); + assert_eq!(node.v6_offset, 0); + assert_eq!(node.v6_datalayout, EROFS_INODE_CHUNK_BASED); + assert!(node.v6_compact_inode); + assert_eq!(bootstrap_ctx.offset, 32); + + // symlink and dir are handled in the same way. + let mut dir_node = Node::from_fs_object( + RafsVersion::V6, + pa.as_path().to_path_buf(), + pa.as_path().to_path_buf(), + Overlay::UpperAddition, + RAFS_DEFAULT_CHUNK_SIZE as u32, + false, + false, + ) + .unwrap(); + + dir_node + .v6_set_dir_offset(&mut bootstrap_ctx, 4064, EROFS_BLOCK_SIZE_4096) + .unwrap(); + assert_eq!(dir_node.v6_datalayout, EROFS_INODE_FLAT_INLINE); + assert_eq!(dir_node.v6_offset, 4096); + assert_eq!(bootstrap_ctx.offset, 8192); + + dir_node + .v6_set_dir_offset(&mut bootstrap_ctx, 4096, EROFS_BLOCK_SIZE_4096) + .unwrap(); + assert_eq!(dir_node.v6_datalayout, EROFS_INODE_FLAT_PLAIN); + assert_eq!(dir_node.v6_offset, 32); + assert_eq!(dir_node.v6_dirents_offset, 8192); + assert_eq!(bootstrap_ctx.offset, 8192 + 4096); + + dir_node + .v6_set_dir_offset(&mut bootstrap_ctx, 8160, EROFS_BLOCK_SIZE_4096) + .unwrap(); + assert_eq!(dir_node.v6_datalayout, EROFS_INODE_FLAT_INLINE); + assert_eq!(dir_node.v6_offset, 8192 + 4096); + assert_eq!(dir_node.v6_dirents_offset, 8192 + 4096 + 4096); + assert_eq!(bootstrap_ctx.offset, 8192 + 4096 + 8192); + + dir_node + .v6_set_dir_offset(&mut bootstrap_ctx, 8161, EROFS_BLOCK_SIZE_4096) + .unwrap(); + assert_eq!(dir_node.v6_datalayout, EROFS_INODE_FLAT_PLAIN); + assert_eq!(dir_node.v6_offset, 64); + assert_eq!(dir_node.v6_dirents_offset, 8192 + 4096 + 8192); + assert_eq!(bootstrap_ctx.offset, 8192 + 4096 + 8192 + 8192); + + dir_node + .v6_set_dir_offset(&mut bootstrap_ctx, 4096 + 3968, EROFS_BLOCK_SIZE_4096) + .unwrap(); + assert_eq!(dir_node.v6_datalayout, EROFS_INODE_FLAT_INLINE); + assert_eq!(dir_node.v6_offset, 96); + assert_eq!(dir_node.v6_dirents_offset, 8192 + 4096 + 8192 + 8192); + assert_eq!(bootstrap_ctx.offset, 8192 + 4096 + 8192 + 8192 + 4096); + + dir_node + .v6_set_dir_offset(&mut bootstrap_ctx, 4096 + 2048, EROFS_BLOCK_SIZE_4096) + .unwrap(); + assert_eq!(dir_node.v6_datalayout, EROFS_INODE_FLAT_INLINE); + assert_eq!(dir_node.v6_offset, 8192 + 4096 + 8192 + 8192 + 4096); + assert_eq!( + dir_node.v6_dirents_offset, + 8192 + 4096 + 8192 + 8192 + 4096 + 4096 + ); + assert_eq!( + bootstrap_ctx.offset, + 8192 + 4096 + 8192 + 8192 + 4096 + 8192 + ); + + dir_node + .v6_set_dir_offset(&mut bootstrap_ctx, 1985, EROFS_BLOCK_SIZE_4096) + .unwrap(); + assert_eq!(dir_node.v6_datalayout, EROFS_INODE_FLAT_INLINE); + assert_eq!(dir_node.v6_offset, 8192 + 4096 + 8192 + 8192 + 4096 + 8192); + assert_eq!( + bootstrap_ctx.offset, + 8192 + 4096 + 8192 + 8192 + 4096 + 8192 + 32 + 1985 + 31 + ); + + bootstrap_ctx.align_offset(EROFS_INODE_SLOT_SIZE as u64); + dir_node + .v6_set_dir_offset(&mut bootstrap_ctx, 1984, EROFS_BLOCK_SIZE_4096) + .unwrap(); + assert_eq!(dir_node.v6_datalayout, EROFS_INODE_FLAT_INLINE); + assert_eq!( + dir_node.v6_offset, + 8192 + 4096 + 8192 + 8192 + 4096 + 2048 + 32 + ); + assert_eq!( + bootstrap_ctx.offset, + 8192 + 4096 + 8192 + 8192 + 4096 + 8192 + round_up(32 + 1985, 32) + ); + } + + #[test] + fn test_set_v6_inode_compact() { + let pa = TempDir::new().unwrap(); + let pa_reg = TempFile::new_in(pa.as_path()).unwrap(); + let pa_pyc = pa.as_path().join("foo.pyc"); + let _ = File::create(&pa_pyc).unwrap(); + + let reg_node = Node::from_fs_object( + RafsVersion::V6, + pa.as_path().to_path_buf(), + pa_reg.as_path().to_path_buf(), + Overlay::UpperAddition, + RAFS_DEFAULT_CHUNK_SIZE as u32, + false, + false, + ) + .unwrap(); + + assert!(reg_node.v6_compact_inode); + + let pyc_node = Node::from_fs_object( + RafsVersion::V6, + pa.as_path().to_path_buf(), + pa_pyc.as_path().to_path_buf(), + Overlay::UpperAddition, + RAFS_DEFAULT_CHUNK_SIZE as u32, + false, + false, + ) + .unwrap(); + + assert!(!pyc_node.v6_compact_inode); + + std::fs::remove_file(&pa_pyc).unwrap(); + } +} diff --git a/builder/src/directory.rs b/builder/src/directory.rs index f934f5111ac..7fbc2ae544e 100644 --- a/builder/src/directory.rs +++ b/builder/src/directory.rs @@ -1,197 +1,197 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::fs; -use std::fs::DirEntry; - -use anyhow::{Context, Result}; -use nydus_utils::{event_tracer, lazy_drop, root_tracer, timing_tracer}; - -use crate::core::context::{Artifact, NoopArtifactWriter}; - -use super::core::blob::Blob; -use super::core::context::{ - ArtifactWriter, BlobManager, BootstrapContext, BootstrapManager, BuildContext, BuildOutput, -}; -use super::core::node::Node; -use super::{build_bootstrap, dump_bootstrap, finalize_blob, Builder, Overlay, Tree, TreeNode}; - -struct FilesystemTreeBuilder {} - -impl FilesystemTreeBuilder { - fn new() -> Self { - Self {} - } - - #[allow(clippy::only_used_in_recursion)] - /// Walk directory to build node tree by DFS - fn load_children( - &self, - ctx: &mut BuildContext, - bootstrap_ctx: &mut BootstrapContext, - parent: &TreeNode, - layer_idx: u16, - ) -> Result> { - let mut result = Vec::new(); - let parent = parent.lock().unwrap(); - if !parent.is_dir() { - return Ok(result); - } - - let children = fs::read_dir(parent.path()) - .with_context(|| format!("failed to read dir {:?}", parent.path()))?; - let children = children.collect::, std::io::Error>>()?; - - event_tracer!("load_from_directory", +children.len()); - for child in children { - let path = child.path(); - let mut child = Node::from_fs_object( - ctx.fs_version, - ctx.source_path.clone(), - path.clone(), - Overlay::UpperAddition, - ctx.chunk_size, - parent.info.explicit_uidgid, - true, - ) - .with_context(|| format!("failed to create node {:?}", path))?; - child.layer_idx = layer_idx; - - // as per OCI spec, whiteout file should not be present within final image - // or filesystem, only existed in layers. - if !bootstrap_ctx.layered - && child.whiteout_type(ctx.whiteout_spec).is_some() - && !child.is_overlayfs_opaque(ctx.whiteout_spec) - { - continue; - } - - let mut child = Tree::new(child); - child.children = self.load_children(ctx, bootstrap_ctx, &child.node, layer_idx)?; - child - .lock_node() - .v5_set_dir_size(ctx.fs_version, &child.children); - result.push(child); - } - - result.sort_unstable_by(|a, b| a.name().cmp(b.name())); - - Ok(result) - } -} - -#[derive(Default)] -pub struct DirectoryBuilder {} - -impl DirectoryBuilder { - pub fn new() -> Self { - Self {} - } - - /// Build node tree from a filesystem directory - fn build_tree( - &mut self, - ctx: &mut BuildContext, - bootstrap_ctx: &mut BootstrapContext, - layer_idx: u16, - ) -> Result { - let node = Node::from_fs_object( - ctx.fs_version, - ctx.source_path.clone(), - ctx.source_path.clone(), - Overlay::UpperAddition, - ctx.chunk_size, - ctx.explicit_uidgid, - true, - )?; - let mut tree = Tree::new(node); - let tree_builder = FilesystemTreeBuilder::new(); - - tree.children = timing_tracer!( - { tree_builder.load_children(ctx, bootstrap_ctx, &tree.node, layer_idx) }, - "load_from_directory" - )?; - tree.lock_node() - .v5_set_dir_size(ctx.fs_version, &tree.children); - - Ok(tree) - } -} - -impl Builder for DirectoryBuilder { - fn build( - &mut self, - ctx: &mut BuildContext, - bootstrap_mgr: &mut BootstrapManager, - blob_mgr: &mut BlobManager, - ) -> Result { - let mut bootstrap_ctx = bootstrap_mgr.create_ctx()?; - let layer_idx = u16::from(bootstrap_ctx.layered); - let mut blob_writer: Box = if let Some(blob_stor) = ctx.blob_storage.clone() { - Box::new(ArtifactWriter::new(blob_stor)?) - } else { - Box::::default() - }; - - // Scan source directory to build upper layer tree. - let tree = timing_tracer!( - { self.build_tree(ctx, &mut bootstrap_ctx, layer_idx) }, - "build_tree" - )?; - - // Build bootstrap - let mut bootstrap = timing_tracer!( - { build_bootstrap(ctx, bootstrap_mgr, &mut bootstrap_ctx, blob_mgr, tree) }, - "build_bootstrap" - )?; - - // Dump blob file - timing_tracer!( - { Blob::dump(ctx, blob_mgr, blob_writer.as_mut()) }, - "dump_blob" - )?; - - // Dump blob meta information - if let Some((_, blob_ctx)) = blob_mgr.get_current_blob() { - Blob::dump_meta_data(ctx, blob_ctx, blob_writer.as_mut())?; - } - - // Dump RAFS meta/bootstrap and finalize the data blob. - if ctx.blob_inline_meta { - timing_tracer!( - { - dump_bootstrap( - ctx, - bootstrap_mgr, - &mut bootstrap_ctx, - &mut bootstrap, - blob_mgr, - blob_writer.as_mut(), - ) - }, - "dump_bootstrap" - )?; - finalize_blob(ctx, blob_mgr, blob_writer.as_mut())?; - } else { - finalize_blob(ctx, blob_mgr, blob_writer.as_mut())?; - timing_tracer!( - { - dump_bootstrap( - ctx, - bootstrap_mgr, - &mut bootstrap_ctx, - &mut bootstrap, - blob_mgr, - blob_writer.as_mut(), - ) - }, - "dump_bootstrap" - )?; - } - - lazy_drop(bootstrap_ctx); - - BuildOutput::new(blob_mgr, &bootstrap_mgr.bootstrap_storage) - } -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::fs; +use std::fs::DirEntry; + +use anyhow::{Context, Result}; +use nydus_utils::{event_tracer, lazy_drop, root_tracer, timing_tracer}; + +use crate::core::context::{Artifact, NoopArtifactWriter}; + +use super::core::blob::Blob; +use super::core::context::{ + ArtifactWriter, BlobManager, BootstrapContext, BootstrapManager, BuildContext, BuildOutput, +}; +use super::core::node::Node; +use super::{build_bootstrap, dump_bootstrap, finalize_blob, Builder, Overlay, Tree, TreeNode}; + +struct FilesystemTreeBuilder {} + +impl FilesystemTreeBuilder { + fn new() -> Self { + Self {} + } + + #[allow(clippy::only_used_in_recursion)] + /// Walk directory to build node tree by DFS + fn load_children( + &self, + ctx: &mut BuildContext, + bootstrap_ctx: &mut BootstrapContext, + parent: &TreeNode, + layer_idx: u16, + ) -> Result> { + let mut result = Vec::new(); + let parent = parent.lock().unwrap(); + if !parent.is_dir() { + return Ok(result); + } + + let children = fs::read_dir(parent.path()) + .with_context(|| format!("failed to read dir {:?}", parent.path()))?; + let children = children.collect::, std::io::Error>>()?; + + event_tracer!("load_from_directory", +children.len()); + for child in children { + let path = child.path(); + let mut child = Node::from_fs_object( + ctx.fs_version, + ctx.source_path.clone(), + path.clone(), + Overlay::UpperAddition, + ctx.chunk_size, + parent.info.explicit_uidgid, + true, + ) + .with_context(|| format!("failed to create node {:?}", path))?; + child.layer_idx = layer_idx; + + // as per OCI spec, whiteout file should not be present within final image + // or filesystem, only existed in layers. + if !bootstrap_ctx.layered + && child.whiteout_type(ctx.whiteout_spec).is_some() + && !child.is_overlayfs_opaque(ctx.whiteout_spec) + { + continue; + } + + let mut child = Tree::new(child); + child.children = self.load_children(ctx, bootstrap_ctx, &child.node, layer_idx)?; + child + .lock_node() + .v5_set_dir_size(ctx.fs_version, &child.children); + result.push(child); + } + + result.sort_unstable_by(|a, b| a.name().cmp(b.name())); + + Ok(result) + } +} + +#[derive(Default)] +pub struct DirectoryBuilder {} + +impl DirectoryBuilder { + pub fn new() -> Self { + Self {} + } + + /// Build node tree from a filesystem directory + fn build_tree( + &mut self, + ctx: &mut BuildContext, + bootstrap_ctx: &mut BootstrapContext, + layer_idx: u16, + ) -> Result { + let node = Node::from_fs_object( + ctx.fs_version, + ctx.source_path.clone(), + ctx.source_path.clone(), + Overlay::UpperAddition, + ctx.chunk_size, + ctx.explicit_uidgid, + true, + )?; + let mut tree = Tree::new(node); + let tree_builder = FilesystemTreeBuilder::new(); + + tree.children = timing_tracer!( + { tree_builder.load_children(ctx, bootstrap_ctx, &tree.node, layer_idx) }, + "load_from_directory" + )?; + tree.lock_node() + .v5_set_dir_size(ctx.fs_version, &tree.children); + + Ok(tree) + } +} + +impl Builder for DirectoryBuilder { + fn build( + &mut self, + ctx: &mut BuildContext, + bootstrap_mgr: &mut BootstrapManager, + blob_mgr: &mut BlobManager, + ) -> Result { + let mut bootstrap_ctx = bootstrap_mgr.create_ctx()?; + let layer_idx = u16::from(bootstrap_ctx.layered); + let mut blob_writer: Box = if let Some(blob_stor) = ctx.blob_storage.clone() { + Box::new(ArtifactWriter::new(blob_stor)?) + } else { + Box::::default() + }; + + // Scan source directory to build upper layer tree. + let tree = timing_tracer!( + { self.build_tree(ctx, &mut bootstrap_ctx, layer_idx) }, + "build_tree" + )?; + + // Build bootstrap + let mut bootstrap = timing_tracer!( + { build_bootstrap(ctx, bootstrap_mgr, &mut bootstrap_ctx, blob_mgr, tree) }, + "build_bootstrap" + )?; + + // Dump blob file + timing_tracer!( + { Blob::dump(ctx, blob_mgr, blob_writer.as_mut()) }, + "dump_blob" + )?; + + // Dump blob meta information + if let Some((_, blob_ctx)) = blob_mgr.get_current_blob() { + Blob::dump_meta_data(ctx, blob_ctx, blob_writer.as_mut())?; + } + + // Dump RAFS meta/bootstrap and finalize the data blob. + if ctx.blob_inline_meta { + timing_tracer!( + { + dump_bootstrap( + ctx, + bootstrap_mgr, + &mut bootstrap_ctx, + &mut bootstrap, + blob_mgr, + blob_writer.as_mut(), + ) + }, + "dump_bootstrap" + )?; + finalize_blob(ctx, blob_mgr, blob_writer.as_mut())?; + } else { + finalize_blob(ctx, blob_mgr, blob_writer.as_mut())?; + timing_tracer!( + { + dump_bootstrap( + ctx, + bootstrap_mgr, + &mut bootstrap_ctx, + &mut bootstrap, + blob_mgr, + blob_writer.as_mut(), + ) + }, + "dump_bootstrap" + )?; + } + + lazy_drop(bootstrap_ctx); + + BuildOutput::new(blob_mgr, &bootstrap_mgr.bootstrap_storage) + } +} diff --git a/builder/src/lib.rs b/builder/src/lib.rs index 54f47e264a7..004cc56663f 100644 --- a/builder/src/lib.rs +++ b/builder/src/lib.rs @@ -1,402 +1,402 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Builder to create RAFS filesystems from directories and tarballs. - -#[macro_use] -extern crate log; - -use crate::core::context::Artifact; -use std::ffi::OsString; -use std::os::unix::ffi::OsStrExt; -use std::path::{Path, PathBuf}; - -use anyhow::{anyhow, Context, Result}; -use nydus_rafs::metadata::inode::InodeWrapper; -use nydus_rafs::metadata::layout::RafsXAttrs; -use nydus_rafs::metadata::{Inode, RafsVersion}; -use nydus_storage::meta::toc; -use nydus_utils::digest::{DigestHasher, RafsDigest}; -use nydus_utils::{compress, digest, root_tracer, timing_tracer}; -use sha2::Digest; - -use self::core::node::{Node, NodeInfo}; - -pub use self::chunkdict_generator::ChunkdictBlobInfo; -pub use self::chunkdict_generator::ChunkdictChunkInfo; -pub use self::chunkdict_generator::Generator; -pub use self::compact::BlobCompactor; -pub use self::core::bootstrap::Bootstrap; -pub use self::core::chunk_dict::{parse_chunk_dict_arg, ChunkDict, HashChunkDict}; -pub use self::core::context::{ - ArtifactStorage, ArtifactWriter, BlobCacheGenerator, BlobContext, BlobManager, - BootstrapContext, BootstrapManager, BuildContext, BuildOutput, ConversionType, -}; -pub use self::core::feature::{Feature, Features}; -pub use self::core::node::{ChunkSource, NodeChunk}; -pub use self::core::overlay::{Overlay, WhiteoutSpec}; -pub use self::core::prefetch::{Prefetch, PrefetchPolicy}; -pub use self::core::tree::{MetadataTreeBuilder, Tree, TreeNode}; -pub use self::directory::DirectoryBuilder; -pub use self::merge::Merger; -pub use self::stargz::StargzBuilder; -pub use self::tarball::TarballBuilder; - -mod chunkdict_generator; -mod compact; -mod core; -mod directory; -mod merge; -mod stargz; -mod tarball; - -/// Trait to generate a RAFS filesystem from the source. -pub trait Builder { - fn build( - &mut self, - build_ctx: &mut BuildContext, - bootstrap_mgr: &mut BootstrapManager, - blob_mgr: &mut BlobManager, - ) -> Result; -} - -fn build_bootstrap( - ctx: &mut BuildContext, - bootstrap_mgr: &mut BootstrapManager, - bootstrap_ctx: &mut BootstrapContext, - blob_mgr: &mut BlobManager, - mut tree: Tree, -) -> Result { - // For multi-layer build, merge the upper layer and lower layer with overlay whiteout applied. - if bootstrap_ctx.layered { - let mut parent = Bootstrap::load_parent_bootstrap(ctx, bootstrap_mgr, blob_mgr)?; - timing_tracer!({ parent.merge_overaly(ctx, tree) }, "merge_bootstrap")?; - tree = parent; - } - - let mut bootstrap = Bootstrap::new(tree)?; - timing_tracer!({ bootstrap.build(ctx, bootstrap_ctx) }, "build_bootstrap")?; - - Ok(bootstrap) -} - -fn dump_bootstrap( - ctx: &mut BuildContext, - bootstrap_mgr: &mut BootstrapManager, - bootstrap_ctx: &mut BootstrapContext, - bootstrap: &mut Bootstrap, - blob_mgr: &mut BlobManager, - blob_writer: &mut dyn Artifact, -) -> Result<()> { - // Make sure blob id is updated according to blob hash if not specified by user. - if let Some((_, blob_ctx)) = blob_mgr.get_current_blob() { - if blob_ctx.blob_id.is_empty() { - // `Blob::dump()` should have set `blob_ctx.blob_id` to referenced OCI tarball for - // ref-type conversion. - assert!(!ctx.conversion_type.is_to_ref()); - if ctx.blob_inline_meta { - // Set special blob id for blob with inlined meta. - blob_ctx.blob_id = "x".repeat(64); - } else { - blob_ctx.blob_id = format!("{:x}", blob_ctx.blob_hash.clone().finalize()); - } - } - if !ctx.conversion_type.is_to_ref() { - blob_ctx.compressed_blob_size = blob_writer.pos()?; - } - } - - // Dump bootstrap file - let blob_table = blob_mgr.to_blob_table(ctx)?; - let storage = &mut bootstrap_mgr.bootstrap_storage; - bootstrap.dump(ctx, storage, bootstrap_ctx, &blob_table)?; - - // Dump RAFS meta to data blob if inline meta is enabled. - if ctx.blob_inline_meta { - assert_ne!(ctx.conversion_type, ConversionType::TarToTarfs); - // Ensure the blob object is created in case of no chunks generated for the blob. - let (_, blob_ctx) = blob_mgr - .get_or_create_current_blob(ctx) - .map_err(|_e| anyhow!("failed to get current blob object"))?; - let bootstrap_offset = blob_writer.pos()?; - let uncompressed_bootstrap = bootstrap_ctx.writer.as_bytes()?; - let uncompressed_size = uncompressed_bootstrap.len(); - let uncompressed_digest = - RafsDigest::from_buf(&uncompressed_bootstrap, digest::Algorithm::Sha256); - - // Output uncompressed data for backward compatibility and compressed data for new format. - let (bootstrap_data, compressor) = if ctx.features.is_enabled(Feature::BlobToc) { - let mut compressor = compress::Algorithm::Zstd; - let (compressed_data, compressed) = - compress::compress(&uncompressed_bootstrap, compressor) - .with_context(|| "failed to compress bootstrap".to_string())?; - blob_ctx.write_data(blob_writer, &compressed_data)?; - if !compressed { - compressor = compress::Algorithm::None; - } - (compressed_data, compressor) - } else { - blob_ctx.write_data(blob_writer, &uncompressed_bootstrap)?; - (uncompressed_bootstrap, compress::Algorithm::None) - }; - - let compressed_size = bootstrap_data.len(); - blob_ctx.write_tar_header( - blob_writer, - toc::TOC_ENTRY_BOOTSTRAP, - compressed_size as u64, - )?; - - if ctx.features.is_enabled(Feature::BlobToc) { - blob_ctx.entry_list.add( - toc::TOC_ENTRY_BOOTSTRAP, - compressor, - uncompressed_digest, - bootstrap_offset, - compressed_size as u64, - uncompressed_size as u64, - )?; - } - } - - Ok(()) -} - -fn dump_toc( - ctx: &mut BuildContext, - blob_ctx: &mut BlobContext, - blob_writer: &mut dyn Artifact, -) -> Result<()> { - if ctx.features.is_enabled(Feature::BlobToc) { - assert_ne!(ctx.conversion_type, ConversionType::TarToTarfs); - let mut hasher = RafsDigest::hasher(digest::Algorithm::Sha256); - let data = blob_ctx.entry_list.as_bytes().to_vec(); - let toc_size = data.len() as u64; - blob_ctx.write_data(blob_writer, &data)?; - hasher.digest_update(&data); - let header = blob_ctx.write_tar_header(blob_writer, toc::TOC_ENTRY_BLOB_TOC, toc_size)?; - hasher.digest_update(header.as_bytes()); - blob_ctx.blob_toc_digest = hasher.digest_finalize().data; - blob_ctx.blob_toc_size = toc_size as u32 + header.as_bytes().len() as u32; - } - Ok(()) -} - -fn finalize_blob( - ctx: &mut BuildContext, - blob_mgr: &mut BlobManager, - blob_writer: &mut dyn Artifact, -) -> Result<()> { - if let Some((_, blob_ctx)) = blob_mgr.get_current_blob() { - let is_tarfs = ctx.conversion_type == ConversionType::TarToTarfs; - - if !is_tarfs { - dump_toc(ctx, blob_ctx, blob_writer)?; - } - if !ctx.conversion_type.is_to_ref() { - blob_ctx.compressed_blob_size = blob_writer.pos()?; - } - if ctx.blob_inline_meta && blob_ctx.blob_id == "x".repeat(64) { - blob_ctx.blob_id = String::new(); - } - - let hash = blob_ctx.blob_hash.clone().finalize(); - let blob_meta_id = if ctx.blob_id.is_empty() { - format!("{:x}", hash) - } else { - assert!(!ctx.conversion_type.is_to_ref() || is_tarfs); - ctx.blob_id.clone() - }; - - if ctx.conversion_type.is_to_ref() { - if blob_ctx.blob_id.is_empty() { - // Use `sha256(tarball)` as `blob_id`. A tarball without files will fall through - // this path because `Blob::dump()` hasn't generated `blob_ctx.blob_id`. - if let Some(zran) = &ctx.blob_zran_generator { - let reader = zran.lock().unwrap().reader(); - blob_ctx.compressed_blob_size = reader.get_data_size(); - if blob_ctx.blob_id.is_empty() { - let hash = reader.get_data_digest(); - blob_ctx.blob_id = format!("{:x}", hash.finalize()); - } - } else if let Some(tar_reader) = &ctx.blob_tar_reader { - blob_ctx.compressed_blob_size = tar_reader.position(); - if blob_ctx.blob_id.is_empty() { - let hash = tar_reader.get_hash_object(); - blob_ctx.blob_id = format!("{:x}", hash.finalize()); - } - } - } - // Tarfs mode only has tar stream and meta blob, there's no data blob. - if !ctx.blob_inline_meta && !is_tarfs { - blob_ctx.blob_meta_digest = hash.into(); - blob_ctx.blob_meta_size = blob_writer.pos()?; - } - } else if blob_ctx.blob_id.is_empty() { - // `blob_ctx.blob_id` should be RAFS blob id. - blob_ctx.blob_id = blob_meta_id.clone(); - } - - // Tarfs mode directly use the tar file as RAFS data blob, so no need to generate the data - // blob file. - if !is_tarfs { - blob_writer.finalize(Some(blob_meta_id))?; - } - - if let Some(blob_cache) = ctx.blob_cache_generator.as_ref() { - blob_cache.finalize(&blob_ctx.blob_id)?; - } - } - - Ok(()) -} - -/// Helper for TarballBuilder/StargzBuilder to build the filesystem tree. -pub struct TarBuilder { - pub explicit_uidgid: bool, - pub layer_idx: u16, - pub version: RafsVersion, - next_ino: Inode, -} - -impl TarBuilder { - /// Create a new instance of [TarBuilder]. - pub fn new(explicit_uidgid: bool, layer_idx: u16, version: RafsVersion) -> Self { - TarBuilder { - explicit_uidgid, - layer_idx, - next_ino: 0, - version, - } - } - - /// Allocate an inode number. - pub fn next_ino(&mut self) -> Inode { - self.next_ino += 1; - self.next_ino - } - - /// Insert a node into the tree, creating any missing intermediate directories. - pub fn insert_into_tree(&mut self, tree: &mut Tree, node: Node) -> Result<()> { - let target_paths = node.target_vec(); - let target_paths_len = target_paths.len(); - - if target_paths_len == 1 { - // Handle root node modification - assert_eq!(node.path(), Path::new("/")); - tree.set_node(node); - } else { - let mut tmp_tree = tree; - for idx in 1..target_paths.len() { - match tmp_tree.get_child_idx(target_paths[idx].as_bytes()) { - Some(i) => { - if idx == target_paths_len - 1 { - tmp_tree.children[i].set_node(node); - break; - } else { - tmp_tree = &mut tmp_tree.children[i]; - } - } - None => { - if idx == target_paths_len - 1 { - tmp_tree.insert_child(Tree::new(node)); - break; - } else { - let node = self.create_directory(&target_paths[..=idx])?; - tmp_tree.insert_child(Tree::new(node)); - let last_idx = tmp_tree.children.len() - 1; - tmp_tree = &mut tmp_tree.children[last_idx]; - } - } - } - } - } - - Ok(()) - } - - /// Create a new node for a directory. - pub fn create_directory(&mut self, target_paths: &[OsString]) -> Result { - let ino = self.next_ino(); - let name = &target_paths[target_paths.len() - 1]; - let mut inode = InodeWrapper::new(self.version); - inode.set_ino(ino); - inode.set_mode(0o755 | libc::S_IFDIR as u32); - inode.set_nlink(2); - inode.set_name_size(name.len()); - inode.set_rdev(u32::MAX); - - let source = PathBuf::from("/"); - let target_vec = target_paths.to_vec(); - let mut target = PathBuf::new(); - for name in target_paths.iter() { - target = target.join(name); - } - let info = NodeInfo { - explicit_uidgid: self.explicit_uidgid, - src_ino: ino, - src_dev: u64::MAX, - rdev: u64::MAX, - path: target.clone(), - source, - target, - target_vec, - symlink: None, - xattrs: RafsXAttrs::new(), - v6_force_extended_inode: false, - }; - - Ok(Node::new(inode, info, self.layer_idx)) - } - - /// Check whether the path is a eStargz special file. - pub fn is_stargz_special_files(&self, path: &Path) -> bool { - path == Path::new("/stargz.index.json") - || path == Path::new("/.prefetch.landmark") - || path == Path::new("/.no.prefetch.landmark") - } -} - -#[cfg(test)] -mod tests { - use vmm_sys_util::tempdir::TempDir; - - use super::*; - - #[test] - fn test_tar_builder_is_stargz_special_files() { - let builder = TarBuilder::new(true, 0, RafsVersion::V6); - - let path = Path::new("/stargz.index.json"); - assert!(builder.is_stargz_special_files(&path)); - let path = Path::new("/.prefetch.landmark"); - assert!(builder.is_stargz_special_files(&path)); - let path = Path::new("/.no.prefetch.landmark"); - assert!(builder.is_stargz_special_files(&path)); - - let path = Path::new("/no.prefetch.landmark"); - assert!(!builder.is_stargz_special_files(&path)); - let path = Path::new("/prefetch.landmark"); - assert!(!builder.is_stargz_special_files(&path)); - let path = Path::new("/tar.index.json"); - assert!(!builder.is_stargz_special_files(&path)); - } - - #[test] - fn test_tar_builder_create_directory() { - let tmp_dir = TempDir::new().unwrap(); - let target_paths = [OsString::from(tmp_dir.as_path())]; - let mut builder = TarBuilder::new(true, 0, RafsVersion::V6); - - let node = builder.create_directory(&target_paths); - assert!(node.is_ok()); - let node = node.unwrap(); - println!("Node: {}", node); - assert_eq!(node.file_type(), "dir"); - assert_eq!(node.target(), tmp_dir.as_path()); - - assert_eq!(builder.next_ino, 1); - assert_eq!(builder.next_ino(), 2); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Builder to create RAFS filesystems from directories and tarballs. + +#[macro_use] +extern crate log; + +use crate::core::context::Artifact; +use std::ffi::OsString; +use std::os::unix::ffi::OsStrExt; +use std::path::{Path, PathBuf}; + +use anyhow::{anyhow, Context, Result}; +use nydus_rafs::metadata::inode::InodeWrapper; +use nydus_rafs::metadata::layout::RafsXAttrs; +use nydus_rafs::metadata::{Inode, RafsVersion}; +use nydus_storage::meta::toc; +use nydus_utils::digest::{DigestHasher, RafsDigest}; +use nydus_utils::{compress, digest, root_tracer, timing_tracer}; +use sha2::Digest; + +use self::core::node::{Node, NodeInfo}; + +pub use self::chunkdict_generator::ChunkdictBlobInfo; +pub use self::chunkdict_generator::ChunkdictChunkInfo; +pub use self::chunkdict_generator::Generator; +pub use self::compact::BlobCompactor; +pub use self::core::bootstrap::Bootstrap; +pub use self::core::chunk_dict::{parse_chunk_dict_arg, ChunkDict, HashChunkDict}; +pub use self::core::context::{ + ArtifactStorage, ArtifactWriter, BlobCacheGenerator, BlobContext, BlobManager, + BootstrapContext, BootstrapManager, BuildContext, BuildOutput, ConversionType, +}; +pub use self::core::feature::{Feature, Features}; +pub use self::core::node::{ChunkSource, NodeChunk}; +pub use self::core::overlay::{Overlay, WhiteoutSpec}; +pub use self::core::prefetch::{Prefetch, PrefetchPolicy}; +pub use self::core::tree::{MetadataTreeBuilder, Tree, TreeNode}; +pub use self::directory::DirectoryBuilder; +pub use self::merge::Merger; +pub use self::stargz::StargzBuilder; +pub use self::tarball::TarballBuilder; + +mod chunkdict_generator; +mod compact; +mod core; +mod directory; +mod merge; +mod stargz; +mod tarball; + +/// Trait to generate a RAFS filesystem from the source. +pub trait Builder { + fn build( + &mut self, + build_ctx: &mut BuildContext, + bootstrap_mgr: &mut BootstrapManager, + blob_mgr: &mut BlobManager, + ) -> Result; +} + +fn build_bootstrap( + ctx: &mut BuildContext, + bootstrap_mgr: &mut BootstrapManager, + bootstrap_ctx: &mut BootstrapContext, + blob_mgr: &mut BlobManager, + mut tree: Tree, +) -> Result { + // For multi-layer build, merge the upper layer and lower layer with overlay whiteout applied. + if bootstrap_ctx.layered { + let mut parent = Bootstrap::load_parent_bootstrap(ctx, bootstrap_mgr, blob_mgr)?; + timing_tracer!({ parent.merge_overaly(ctx, tree) }, "merge_bootstrap")?; + tree = parent; + } + + let mut bootstrap = Bootstrap::new(tree)?; + timing_tracer!({ bootstrap.build(ctx, bootstrap_ctx) }, "build_bootstrap")?; + + Ok(bootstrap) +} + +fn dump_bootstrap( + ctx: &mut BuildContext, + bootstrap_mgr: &mut BootstrapManager, + bootstrap_ctx: &mut BootstrapContext, + bootstrap: &mut Bootstrap, + blob_mgr: &mut BlobManager, + blob_writer: &mut dyn Artifact, +) -> Result<()> { + // Make sure blob id is updated according to blob hash if not specified by user. + if let Some((_, blob_ctx)) = blob_mgr.get_current_blob() { + if blob_ctx.blob_id.is_empty() { + // `Blob::dump()` should have set `blob_ctx.blob_id` to referenced OCI tarball for + // ref-type conversion. + assert!(!ctx.conversion_type.is_to_ref()); + if ctx.blob_inline_meta { + // Set special blob id for blob with inlined meta. + blob_ctx.blob_id = "x".repeat(64); + } else { + blob_ctx.blob_id = format!("{:x}", blob_ctx.blob_hash.clone().finalize()); + } + } + if !ctx.conversion_type.is_to_ref() { + blob_ctx.compressed_blob_size = blob_writer.pos()?; + } + } + + // Dump bootstrap file + let blob_table = blob_mgr.to_blob_table(ctx)?; + let storage = &mut bootstrap_mgr.bootstrap_storage; + bootstrap.dump(ctx, storage, bootstrap_ctx, &blob_table)?; + + // Dump RAFS meta to data blob if inline meta is enabled. + if ctx.blob_inline_meta { + assert_ne!(ctx.conversion_type, ConversionType::TarToTarfs); + // Ensure the blob object is created in case of no chunks generated for the blob. + let (_, blob_ctx) = blob_mgr + .get_or_create_current_blob(ctx) + .map_err(|_e| anyhow!("failed to get current blob object"))?; + let bootstrap_offset = blob_writer.pos()?; + let uncompressed_bootstrap = bootstrap_ctx.writer.as_bytes()?; + let uncompressed_size = uncompressed_bootstrap.len(); + let uncompressed_digest = + RafsDigest::from_buf(&uncompressed_bootstrap, digest::Algorithm::Sha256); + + // Output uncompressed data for backward compatibility and compressed data for new format. + let (bootstrap_data, compressor) = if ctx.features.is_enabled(Feature::BlobToc) { + let mut compressor = compress::Algorithm::Zstd; + let (compressed_data, compressed) = + compress::compress(&uncompressed_bootstrap, compressor) + .with_context(|| "failed to compress bootstrap".to_string())?; + blob_ctx.write_data(blob_writer, &compressed_data)?; + if !compressed { + compressor = compress::Algorithm::None; + } + (compressed_data, compressor) + } else { + blob_ctx.write_data(blob_writer, &uncompressed_bootstrap)?; + (uncompressed_bootstrap, compress::Algorithm::None) + }; + + let compressed_size = bootstrap_data.len(); + blob_ctx.write_tar_header( + blob_writer, + toc::TOC_ENTRY_BOOTSTRAP, + compressed_size as u64, + )?; + + if ctx.features.is_enabled(Feature::BlobToc) { + blob_ctx.entry_list.add( + toc::TOC_ENTRY_BOOTSTRAP, + compressor, + uncompressed_digest, + bootstrap_offset, + compressed_size as u64, + uncompressed_size as u64, + )?; + } + } + + Ok(()) +} + +fn dump_toc( + ctx: &mut BuildContext, + blob_ctx: &mut BlobContext, + blob_writer: &mut dyn Artifact, +) -> Result<()> { + if ctx.features.is_enabled(Feature::BlobToc) { + assert_ne!(ctx.conversion_type, ConversionType::TarToTarfs); + let mut hasher = RafsDigest::hasher(digest::Algorithm::Sha256); + let data = blob_ctx.entry_list.as_bytes().to_vec(); + let toc_size = data.len() as u64; + blob_ctx.write_data(blob_writer, &data)?; + hasher.digest_update(&data); + let header = blob_ctx.write_tar_header(blob_writer, toc::TOC_ENTRY_BLOB_TOC, toc_size)?; + hasher.digest_update(header.as_bytes()); + blob_ctx.blob_toc_digest = hasher.digest_finalize().data; + blob_ctx.blob_toc_size = toc_size as u32 + header.as_bytes().len() as u32; + } + Ok(()) +} + +fn finalize_blob( + ctx: &mut BuildContext, + blob_mgr: &mut BlobManager, + blob_writer: &mut dyn Artifact, +) -> Result<()> { + if let Some((_, blob_ctx)) = blob_mgr.get_current_blob() { + let is_tarfs = ctx.conversion_type == ConversionType::TarToTarfs; + + if !is_tarfs { + dump_toc(ctx, blob_ctx, blob_writer)?; + } + if !ctx.conversion_type.is_to_ref() { + blob_ctx.compressed_blob_size = blob_writer.pos()?; + } + if ctx.blob_inline_meta && blob_ctx.blob_id == "x".repeat(64) { + blob_ctx.blob_id = String::new(); + } + + let hash = blob_ctx.blob_hash.clone().finalize(); + let blob_meta_id = if ctx.blob_id.is_empty() { + format!("{:x}", hash) + } else { + assert!(!ctx.conversion_type.is_to_ref() || is_tarfs); + ctx.blob_id.clone() + }; + + if ctx.conversion_type.is_to_ref() { + if blob_ctx.blob_id.is_empty() { + // Use `sha256(tarball)` as `blob_id`. A tarball without files will fall through + // this path because `Blob::dump()` hasn't generated `blob_ctx.blob_id`. + if let Some(zran) = &ctx.blob_zran_generator { + let reader = zran.lock().unwrap().reader(); + blob_ctx.compressed_blob_size = reader.get_data_size(); + if blob_ctx.blob_id.is_empty() { + let hash = reader.get_data_digest(); + blob_ctx.blob_id = format!("{:x}", hash.finalize()); + } + } else if let Some(tar_reader) = &ctx.blob_tar_reader { + blob_ctx.compressed_blob_size = tar_reader.position(); + if blob_ctx.blob_id.is_empty() { + let hash = tar_reader.get_hash_object(); + blob_ctx.blob_id = format!("{:x}", hash.finalize()); + } + } + } + // Tarfs mode only has tar stream and meta blob, there's no data blob. + if !ctx.blob_inline_meta && !is_tarfs { + blob_ctx.blob_meta_digest = hash.into(); + blob_ctx.blob_meta_size = blob_writer.pos()?; + } + } else if blob_ctx.blob_id.is_empty() { + // `blob_ctx.blob_id` should be RAFS blob id. + blob_ctx.blob_id = blob_meta_id.clone(); + } + + // Tarfs mode directly use the tar file as RAFS data blob, so no need to generate the data + // blob file. + if !is_tarfs { + blob_writer.finalize(Some(blob_meta_id))?; + } + + if let Some(blob_cache) = ctx.blob_cache_generator.as_ref() { + blob_cache.finalize(&blob_ctx.blob_id)?; + } + } + + Ok(()) +} + +/// Helper for TarballBuilder/StargzBuilder to build the filesystem tree. +pub struct TarBuilder { + pub explicit_uidgid: bool, + pub layer_idx: u16, + pub version: RafsVersion, + next_ino: Inode, +} + +impl TarBuilder { + /// Create a new instance of [TarBuilder]. + pub fn new(explicit_uidgid: bool, layer_idx: u16, version: RafsVersion) -> Self { + TarBuilder { + explicit_uidgid, + layer_idx, + next_ino: 0, + version, + } + } + + /// Allocate an inode number. + pub fn next_ino(&mut self) -> Inode { + self.next_ino += 1; + self.next_ino + } + + /// Insert a node into the tree, creating any missing intermediate directories. + pub fn insert_into_tree(&mut self, tree: &mut Tree, node: Node) -> Result<()> { + let target_paths = node.target_vec(); + let target_paths_len = target_paths.len(); + + if target_paths_len == 1 { + // Handle root node modification + assert_eq!(node.path(), Path::new("/")); + tree.set_node(node); + } else { + let mut tmp_tree = tree; + for idx in 1..target_paths.len() { + match tmp_tree.get_child_idx(target_paths[idx].as_bytes()) { + Some(i) => { + if idx == target_paths_len - 1 { + tmp_tree.children[i].set_node(node); + break; + } else { + tmp_tree = &mut tmp_tree.children[i]; + } + } + None => { + if idx == target_paths_len - 1 { + tmp_tree.insert_child(Tree::new(node)); + break; + } else { + let node = self.create_directory(&target_paths[..=idx])?; + tmp_tree.insert_child(Tree::new(node)); + let last_idx = tmp_tree.children.len() - 1; + tmp_tree = &mut tmp_tree.children[last_idx]; + } + } + } + } + } + + Ok(()) + } + + /// Create a new node for a directory. + pub fn create_directory(&mut self, target_paths: &[OsString]) -> Result { + let ino = self.next_ino(); + let name = &target_paths[target_paths.len() - 1]; + let mut inode = InodeWrapper::new(self.version); + inode.set_ino(ino); + inode.set_mode(0o755 | libc::S_IFDIR as u32); + inode.set_nlink(2); + inode.set_name_size(name.len()); + inode.set_rdev(u32::MAX); + + let source = PathBuf::from("/"); + let target_vec = target_paths.to_vec(); + let mut target = PathBuf::new(); + for name in target_paths.iter() { + target = target.join(name); + } + let info = NodeInfo { + explicit_uidgid: self.explicit_uidgid, + src_ino: ino, + src_dev: u64::MAX, + rdev: u64::MAX, + path: target.clone(), + source, + target, + target_vec, + symlink: None, + xattrs: RafsXAttrs::new(), + v6_force_extended_inode: false, + }; + + Ok(Node::new(inode, info, self.layer_idx)) + } + + /// Check whether the path is a eStargz special file. + pub fn is_stargz_special_files(&self, path: &Path) -> bool { + path == Path::new("/stargz.index.json") + || path == Path::new("/.prefetch.landmark") + || path == Path::new("/.no.prefetch.landmark") + } +} + +#[cfg(test)] +mod tests { + use vmm_sys_util::tempdir::TempDir; + + use super::*; + + #[test] + fn test_tar_builder_is_stargz_special_files() { + let builder = TarBuilder::new(true, 0, RafsVersion::V6); + + let path = Path::new("/stargz.index.json"); + assert!(builder.is_stargz_special_files(&path)); + let path = Path::new("/.prefetch.landmark"); + assert!(builder.is_stargz_special_files(&path)); + let path = Path::new("/.no.prefetch.landmark"); + assert!(builder.is_stargz_special_files(&path)); + + let path = Path::new("/no.prefetch.landmark"); + assert!(!builder.is_stargz_special_files(&path)); + let path = Path::new("/prefetch.landmark"); + assert!(!builder.is_stargz_special_files(&path)); + let path = Path::new("/tar.index.json"); + assert!(!builder.is_stargz_special_files(&path)); + } + + #[test] + fn test_tar_builder_create_directory() { + let tmp_dir = TempDir::new().unwrap(); + let target_paths = [OsString::from(tmp_dir.as_path())]; + let mut builder = TarBuilder::new(true, 0, RafsVersion::V6); + + let node = builder.create_directory(&target_paths); + assert!(node.is_ok()); + let node = node.unwrap(); + println!("Node: {}", node); + assert_eq!(node.file_type(), "dir"); + assert_eq!(node.target(), tmp_dir.as_path()); + + assert_eq!(builder.next_ino, 1); + assert_eq!(builder.next_ino(), 2); + } +} diff --git a/builder/src/merge.rs b/builder/src/merge.rs index 25ba20c07e9..0f11633db71 100644 --- a/builder/src/merge.rs +++ b/builder/src/merge.rs @@ -1,415 +1,415 @@ -// Copyright (C) 2022 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::collections::hash_map::Entry; -use std::collections::{HashMap, HashSet}; -use std::convert::TryFrom; -use std::path::PathBuf; -use std::sync::Arc; - -use anyhow::{anyhow, bail, ensure, Context, Result}; -use hex::FromHex; -use nydus_api::ConfigV2; -use nydus_rafs::metadata::{RafsSuper, RafsVersion}; -use nydus_storage::device::{BlobFeatures, BlobInfo}; -use nydus_utils::crypt; - -use super::{ - ArtifactStorage, BlobContext, BlobManager, Bootstrap, BootstrapContext, BuildContext, - BuildOutput, ChunkSource, ConversionType, Overlay, Tree, -}; - -/// Struct to generate the merged RAFS bootstrap for an image from per layer RAFS bootstraps. -/// -/// A container image contains one or more layers, a RAFS bootstrap is built for each layer. -/// Those per layer bootstraps could be mounted by overlayfs to form the container rootfs. -/// To improve performance by avoiding overlayfs, an image level bootstrap is generated by -/// merging per layer bootstrap with overlayfs rules applied. -pub struct Merger {} - -impl Merger { - fn get_string_from_list( - original_ids: &Option>, - idx: usize, - ) -> Result> { - Ok(if let Some(id) = &original_ids { - let id_string = id - .get(idx) - .ok_or_else(|| anyhow!("unmatched digest index {}", idx))?; - Some(id_string.clone()) - } else { - None - }) - } - - fn get_digest_from_list(digests: &Option>, idx: usize) -> Result> { - Ok(if let Some(digests) = &digests { - let digest = digests - .get(idx) - .ok_or_else(|| anyhow!("unmatched digest index {}", idx))?; - Some(<[u8; 32]>::from_hex(digest)?) - } else { - None - }) - } - - fn get_size_from_list(sizes: &Option>, idx: usize) -> Result> { - Ok(if let Some(sizes) = &sizes { - let size = sizes - .get(idx) - .ok_or_else(|| anyhow!("unmatched size index {}", idx))?; - Some(*size) - } else { - None - }) - } - - /// Overlay multiple RAFS filesystems into a merged RAFS filesystem. - /// - /// # Arguments - /// - sources: contains one or more per layer bootstraps in order of lower to higher. - /// - chunk_dict: contain the chunk dictionary used to build per layer boostrap, or None. - #[allow(clippy::too_many_arguments)] - pub fn merge( - ctx: &mut BuildContext, - parent_bootstrap_path: Option, - sources: Vec, - blob_digests: Option>, - original_blob_ids: Option>, - blob_sizes: Option>, - blob_toc_digests: Option>, - blob_toc_sizes: Option>, - target: ArtifactStorage, - chunk_dict: Option, - config_v2: Arc, - ) -> Result { - if sources.is_empty() { - bail!("source bootstrap list is empty , at least one bootstrap is required"); - } - if let Some(digests) = blob_digests.as_ref() { - ensure!( - digests.len() == sources.len(), - "number of blob digest entries {} doesn't match number of sources {}", - digests.len(), - sources.len(), - ); - } - if let Some(original_ids) = original_blob_ids.as_ref() { - ensure!( - original_ids.len() == sources.len(), - "number of original blob id entries {} doesn't match number of sources {}", - original_ids.len(), - sources.len(), - ); - } - if let Some(sizes) = blob_sizes.as_ref() { - ensure!( - sizes.len() == sources.len(), - "number of blob size entries {} doesn't match number of sources {}", - sizes.len(), - sources.len(), - ); - } - if let Some(toc_digests) = blob_toc_digests.as_ref() { - ensure!( - toc_digests.len() == sources.len(), - "number of toc digest entries {} doesn't match number of sources {}", - toc_digests.len(), - sources.len(), - ); - } - if let Some(sizes) = blob_toc_sizes.as_ref() { - ensure!( - sizes.len() == sources.len(), - "number of toc size entries {} doesn't match number of sources {}", - sizes.len(), - sources.len(), - ); - } - - let mut tree: Option = None; - let mut blob_mgr = BlobManager::new(ctx.digester); - let mut blob_idx_map = HashMap::new(); - let mut parent_layers = 0; - - // Load parent bootstrap - if let Some(parent_bootstrap_path) = &parent_bootstrap_path { - let (rs, _) = - RafsSuper::load_from_file(parent_bootstrap_path, config_v2.clone(), false) - .context(format!("load parent bootstrap {:?}", parent_bootstrap_path))?; - let blobs = rs.superblock.get_blob_infos(); - for blob in &blobs { - let blob_ctx = BlobContext::from(ctx, &blob, ChunkSource::Parent)?; - blob_idx_map.insert(blob_ctx.blob_id.clone(), blob_mgr.len()); - blob_mgr.add_blob(blob_ctx); - } - parent_layers = blobs.len(); - tree = Some(Tree::from_bootstrap(&rs, &mut ())?); - } - - // Get the blobs come from chunk dictionary. - let mut chunk_dict_blobs = HashSet::new(); - let mut config = None; - if let Some(chunk_dict_path) = &chunk_dict { - let (rs, _) = RafsSuper::load_from_file(chunk_dict_path, config_v2.clone(), false) - .context(format!("load chunk dict bootstrap {:?}", chunk_dict_path))?; - config = Some(rs.meta.get_config()); - for blob in rs.superblock.get_blob_infos() { - chunk_dict_blobs.insert(blob.blob_id().to_string()); - } - } - - let mut fs_version = RafsVersion::V6; - let mut chunk_size = None; - - for (layer_idx, bootstrap_path) in sources.iter().enumerate() { - let (rs, _) = RafsSuper::load_from_file(bootstrap_path, config_v2.clone(), false) - .context(format!("load bootstrap {:?}", bootstrap_path))?; - config - .get_or_insert_with(|| rs.meta.get_config()) - .check_compatibility(&rs.meta)?; - fs_version = RafsVersion::try_from(rs.meta.version) - .context("failed to get RAFS version number")?; - ctx.compressor = rs.meta.get_compressor(); - ctx.digester = rs.meta.get_digester(); - // If any RAFS filesystems are encrypted, the merged boostrap will be marked as encrypted. - match rs.meta.get_cipher() { - crypt::Algorithm::None => (), - crypt::Algorithm::Aes128Xts => ctx.cipher = crypt::Algorithm::Aes128Xts, - _ => bail!("invalid per layer bootstrap, only supports aes-128-xts"), - } - ctx.explicit_uidgid = rs.meta.explicit_uidgid(); - if config.as_ref().unwrap().is_tarfs_mode { - ctx.conversion_type = ConversionType::TarToTarfs; - ctx.blob_features |= BlobFeatures::TARFS; - } - - let mut parent_blob_added = false; - let blobs = &rs.superblock.get_blob_infos(); - for blob in blobs { - let mut blob_ctx = BlobContext::from(ctx, &blob, ChunkSource::Parent)?; - if let Some(chunk_size) = chunk_size { - ensure!( - chunk_size == blob_ctx.chunk_size, - "can not merge bootstraps with inconsistent chunk size, current bootstrap {:?} with chunk size {:x}, expected {:x}", - bootstrap_path, - blob_ctx.chunk_size, - chunk_size, - ); - } else { - chunk_size = Some(blob_ctx.chunk_size); - } - if !chunk_dict_blobs.contains(&blob.blob_id()) { - // It is assumed that the `nydus-image create` at each layer and `nydus-image merge` commands - // use the same chunk dict bootstrap. So the parent bootstrap includes multiple blobs, but - // only at most one new blob, the other blobs should be from the chunk dict image. - if parent_blob_added { - bail!("invalid per layer bootstrap, having multiple associated data blobs"); - } - parent_blob_added = true; - - if ctx.configuration.internal.blob_accessible() - || ctx.conversion_type == ConversionType::TarToTarfs - { - // `blob.blob_id()` should have been fixed when loading the bootstrap. - blob_ctx.blob_id = blob.blob_id(); - } else { - // The blob id (blob sha256 hash) in parent bootstrap is invalid for nydusd - // runtime, should change it to the hash of whole tar blob. - if let Some(original_id) = - Self::get_string_from_list(&original_blob_ids, layer_idx)? - { - blob_ctx.blob_id = original_id; - } else { - blob_ctx.blob_id = - BlobInfo::get_blob_id_from_meta_path(bootstrap_path)?; - } - } - if let Some(digest) = Self::get_digest_from_list(&blob_digests, layer_idx)? { - if blob.has_feature(BlobFeatures::SEPARATE) { - blob_ctx.blob_meta_digest = digest; - } else { - blob_ctx.blob_id = hex::encode(digest); - } - } - if let Some(size) = Self::get_size_from_list(&blob_sizes, layer_idx)? { - if blob.has_feature(BlobFeatures::SEPARATE) { - blob_ctx.blob_meta_size = size; - } else { - blob_ctx.compressed_blob_size = size; - } - } - if let Some(digest) = Self::get_digest_from_list(&blob_toc_digests, layer_idx)? - { - blob_ctx.blob_toc_digest = digest; - } - if let Some(size) = Self::get_size_from_list(&blob_toc_sizes, layer_idx)? { - blob_ctx.blob_toc_size = size as u32; - } - } - - if let Entry::Vacant(e) = blob_idx_map.entry(blob.blob_id()) { - e.insert(blob_mgr.len()); - blob_mgr.add_blob(blob_ctx); - } - } - - let upper = Tree::from_bootstrap(&rs, &mut ())?; - upper.walk_bfs(true, &mut |n| { - let mut node = n.lock_node(); - for chunk in &mut node.chunks { - let origin_blob_index = chunk.inner.blob_index() as usize; - let blob_ctx = blobs[origin_blob_index].as_ref(); - if let Some(blob_index) = blob_idx_map.get(&blob_ctx.blob_id()) { - // Set the blob index of chunk to real index in blob table of final bootstrap. - chunk.set_blob_index(*blob_index as u32); - } - } - // Set node's layer index to distinguish same inode number (from bootstrap) - // between different layers. - let idx = u16::try_from(layer_idx).context(format!( - "too many layers {}, limited to {}", - layer_idx, - u16::MAX - ))?; - if parent_layers + idx as usize > u16::MAX as usize { - bail!("too many layers {}, limited to {}", layer_idx, u16::MAX); - } - node.layer_idx = idx + parent_layers as u16; - node.overlay = Overlay::UpperAddition; - Ok(()) - })?; - - if let Some(tree) = &mut tree { - tree.merge_overaly(ctx, upper)?; - } else { - tree = Some(upper); - } - } - - if ctx.conversion_type == ConversionType::TarToTarfs { - if parent_layers > 0 { - bail!("merging RAFS in TARFS mode conflicts with `--parent-bootstrap`"); - } - if !chunk_dict_blobs.is_empty() { - bail!("merging RAFS in TARFS mode conflicts with `--chunk-dict`"); - } - } - - // Safe to unwrap because there is at least one source bootstrap. - let tree = tree.unwrap(); - ctx.fs_version = fs_version; - if let Some(chunk_size) = chunk_size { - ctx.chunk_size = chunk_size; - } - - let mut bootstrap_ctx = BootstrapContext::new(Some(target.clone()), false)?; - let mut bootstrap = Bootstrap::new(tree)?; - bootstrap.build(ctx, &mut bootstrap_ctx)?; - let blob_table = blob_mgr.to_blob_table(ctx)?; - let mut bootstrap_storage = Some(target.clone()); - bootstrap - .dump(ctx, &mut bootstrap_storage, &mut bootstrap_ctx, &blob_table) - .context(format!("dump bootstrap to {:?}", target.display()))?; - BuildOutput::new(&blob_mgr, &bootstrap_storage) - } -} - -#[cfg(test)] -mod tests { - use nydus_utils::digest; - use vmm_sys_util::tempfile::TempFile; - - use super::*; - - #[test] - fn test_merger_get_string_from_list() { - let res = Merger::get_string_from_list(&None, 1); - assert!(res.is_ok()); - assert!(res.unwrap().is_none()); - - let original_ids = vec!["string1".to_owned(), "string2".to_owned()]; - let original_ids = Some(original_ids); - - let res = Merger::get_string_from_list(&original_ids, 0); - assert!(res.is_ok()); - assert_eq!(res.unwrap(), Some("string1".to_owned())); - assert!(Merger::get_string_from_list(&original_ids, 2).is_err()); - } - - #[test] - fn test_merger_get_digest_from_list() { - let res = Merger::get_digest_from_list(&None, 1); - assert!(res.is_ok()); - assert!(res.unwrap().is_none()); - - let original_ids = vec!["string1".to_owned(), "12ab".repeat(16)]; - let original_ids = Some(original_ids); - - let res = Merger::get_digest_from_list(&original_ids, 1); - assert!(res.is_ok()); - assert_eq!( - res.unwrap(), - Some([ - 18u8, 171, 18, 171, 18, 171, 18, 171, 18, 171, 18, 171, 18, 171, 18, 171, 18, 171, - 18, 171, 18, 171, 18, 171, 18, 171, 18, 171, 18, 171, 18, 171 - ]) - ); - assert!(Merger::get_digest_from_list(&original_ids, 0).is_err()); - assert!(Merger::get_digest_from_list(&original_ids, 2).is_err()); - } - - #[test] - fn test_merger_get_size_from_list() { - let res = Merger::get_size_from_list(&None, 1); - assert!(res.is_ok()); - assert!(res.unwrap().is_none()); - - let original_ids = vec![1u64, 2, 3, 4]; - let original_ids = Some(original_ids); - let res = Merger::get_size_from_list(&original_ids, 1); - assert!(res.is_ok()); - assert_eq!(res.unwrap(), Some(2u64)); - assert!(Merger::get_size_from_list(&original_ids, 4).is_err()); - } - - #[test] - fn test_merger_merge() { - let mut ctx = BuildContext::default(); - ctx.configuration.internal.set_blob_accessible(false); - ctx.digester = digest::Algorithm::Sha256; - - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let mut source_path1 = PathBuf::from(root_dir); - source_path1.push("../tests/texture/bootstrap/rafs-v6-2.2.boot"); - let mut source_path2 = PathBuf::from(root_dir); - source_path2.push("../tests/texture/bootstrap/rafs-v6-2.2.boot"); - - let tmp_file = TempFile::new().unwrap(); - let target = ArtifactStorage::SingleFile(tmp_file.as_path().to_path_buf()); - - let blob_toc_digests = Some(vec![ - "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855".to_owned(), - "4cf0c409788fc1c149afbf4c81276b92427ae41e46412334ca495991b8526650".to_owned(), - ]); - - let build_output = Merger::merge( - &mut ctx, - None, - vec![source_path1, source_path2], - Some(vec!["a70f".repeat(16), "9bd3".repeat(16)]), - Some(vec!["blob_id".to_owned(), "blob_id2".to_owned()]), - Some(vec![16u64, 32u64]), - blob_toc_digests, - Some(vec![64u64, 128]), - target, - None, - Arc::new(ConfigV2::new("config_v2")), - ); - assert!(build_output.is_ok()); - let build_output = build_output.unwrap(); - println!("BuildOutput: {}", build_output); - assert_eq!(build_output.blob_size, Some(16)); - } -} +// Copyright (C) 2022 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::collections::hash_map::Entry; +use std::collections::{HashMap, HashSet}; +use std::convert::TryFrom; +use std::path::PathBuf; +use std::sync::Arc; + +use anyhow::{anyhow, bail, ensure, Context, Result}; +use hex::FromHex; +use nydus_api::ConfigV2; +use nydus_rafs::metadata::{RafsSuper, RafsVersion}; +use nydus_storage::device::{BlobFeatures, BlobInfo}; +use nydus_utils::crypt; + +use super::{ + ArtifactStorage, BlobContext, BlobManager, Bootstrap, BootstrapContext, BuildContext, + BuildOutput, ChunkSource, ConversionType, Overlay, Tree, +}; + +/// Struct to generate the merged RAFS bootstrap for an image from per layer RAFS bootstraps. +/// +/// A container image contains one or more layers, a RAFS bootstrap is built for each layer. +/// Those per layer bootstraps could be mounted by overlayfs to form the container rootfs. +/// To improve performance by avoiding overlayfs, an image level bootstrap is generated by +/// merging per layer bootstrap with overlayfs rules applied. +pub struct Merger {} + +impl Merger { + fn get_string_from_list( + original_ids: &Option>, + idx: usize, + ) -> Result> { + Ok(if let Some(id) = &original_ids { + let id_string = id + .get(idx) + .ok_or_else(|| anyhow!("unmatched digest index {}", idx))?; + Some(id_string.clone()) + } else { + None + }) + } + + fn get_digest_from_list(digests: &Option>, idx: usize) -> Result> { + Ok(if let Some(digests) = &digests { + let digest = digests + .get(idx) + .ok_or_else(|| anyhow!("unmatched digest index {}", idx))?; + Some(<[u8; 32]>::from_hex(digest)?) + } else { + None + }) + } + + fn get_size_from_list(sizes: &Option>, idx: usize) -> Result> { + Ok(if let Some(sizes) = &sizes { + let size = sizes + .get(idx) + .ok_or_else(|| anyhow!("unmatched size index {}", idx))?; + Some(*size) + } else { + None + }) + } + + /// Overlay multiple RAFS filesystems into a merged RAFS filesystem. + /// + /// # Arguments + /// - sources: contains one or more per layer bootstraps in order of lower to higher. + /// - chunk_dict: contain the chunk dictionary used to build per layer boostrap, or None. + #[allow(clippy::too_many_arguments)] + pub fn merge( + ctx: &mut BuildContext, + parent_bootstrap_path: Option, + sources: Vec, + blob_digests: Option>, + original_blob_ids: Option>, + blob_sizes: Option>, + blob_toc_digests: Option>, + blob_toc_sizes: Option>, + target: ArtifactStorage, + chunk_dict: Option, + config_v2: Arc, + ) -> Result { + if sources.is_empty() { + bail!("source bootstrap list is empty , at least one bootstrap is required"); + } + if let Some(digests) = blob_digests.as_ref() { + ensure!( + digests.len() == sources.len(), + "number of blob digest entries {} doesn't match number of sources {}", + digests.len(), + sources.len(), + ); + } + if let Some(original_ids) = original_blob_ids.as_ref() { + ensure!( + original_ids.len() == sources.len(), + "number of original blob id entries {} doesn't match number of sources {}", + original_ids.len(), + sources.len(), + ); + } + if let Some(sizes) = blob_sizes.as_ref() { + ensure!( + sizes.len() == sources.len(), + "number of blob size entries {} doesn't match number of sources {}", + sizes.len(), + sources.len(), + ); + } + if let Some(toc_digests) = blob_toc_digests.as_ref() { + ensure!( + toc_digests.len() == sources.len(), + "number of toc digest entries {} doesn't match number of sources {}", + toc_digests.len(), + sources.len(), + ); + } + if let Some(sizes) = blob_toc_sizes.as_ref() { + ensure!( + sizes.len() == sources.len(), + "number of toc size entries {} doesn't match number of sources {}", + sizes.len(), + sources.len(), + ); + } + + let mut tree: Option = None; + let mut blob_mgr = BlobManager::new(ctx.digester); + let mut blob_idx_map = HashMap::new(); + let mut parent_layers = 0; + + // Load parent bootstrap + if let Some(parent_bootstrap_path) = &parent_bootstrap_path { + let (rs, _) = + RafsSuper::load_from_file(parent_bootstrap_path, config_v2.clone(), false) + .context(format!("load parent bootstrap {:?}", parent_bootstrap_path))?; + let blobs = rs.superblock.get_blob_infos(); + for blob in &blobs { + let blob_ctx = BlobContext::from(ctx, &blob, ChunkSource::Parent)?; + blob_idx_map.insert(blob_ctx.blob_id.clone(), blob_mgr.len()); + blob_mgr.add_blob(blob_ctx); + } + parent_layers = blobs.len(); + tree = Some(Tree::from_bootstrap(&rs, &mut ())?); + } + + // Get the blobs come from chunk dictionary. + let mut chunk_dict_blobs = HashSet::new(); + let mut config = None; + if let Some(chunk_dict_path) = &chunk_dict { + let (rs, _) = RafsSuper::load_from_file(chunk_dict_path, config_v2.clone(), false) + .context(format!("load chunk dict bootstrap {:?}", chunk_dict_path))?; + config = Some(rs.meta.get_config()); + for blob in rs.superblock.get_blob_infos() { + chunk_dict_blobs.insert(blob.blob_id().to_string()); + } + } + + let mut fs_version = RafsVersion::V6; + let mut chunk_size = None; + + for (layer_idx, bootstrap_path) in sources.iter().enumerate() { + let (rs, _) = RafsSuper::load_from_file(bootstrap_path, config_v2.clone(), false) + .context(format!("load bootstrap {:?}", bootstrap_path))?; + config + .get_or_insert_with(|| rs.meta.get_config()) + .check_compatibility(&rs.meta)?; + fs_version = RafsVersion::try_from(rs.meta.version) + .context("failed to get RAFS version number")?; + ctx.compressor = rs.meta.get_compressor(); + ctx.digester = rs.meta.get_digester(); + // If any RAFS filesystems are encrypted, the merged boostrap will be marked as encrypted. + match rs.meta.get_cipher() { + crypt::Algorithm::None => (), + crypt::Algorithm::Aes128Xts => ctx.cipher = crypt::Algorithm::Aes128Xts, + _ => bail!("invalid per layer bootstrap, only supports aes-128-xts"), + } + ctx.explicit_uidgid = rs.meta.explicit_uidgid(); + if config.as_ref().unwrap().is_tarfs_mode { + ctx.conversion_type = ConversionType::TarToTarfs; + ctx.blob_features |= BlobFeatures::TARFS; + } + + let mut parent_blob_added = false; + let blobs = &rs.superblock.get_blob_infos(); + for blob in blobs { + let mut blob_ctx = BlobContext::from(ctx, &blob, ChunkSource::Parent)?; + if let Some(chunk_size) = chunk_size { + ensure!( + chunk_size == blob_ctx.chunk_size, + "can not merge bootstraps with inconsistent chunk size, current bootstrap {:?} with chunk size {:x}, expected {:x}", + bootstrap_path, + blob_ctx.chunk_size, + chunk_size, + ); + } else { + chunk_size = Some(blob_ctx.chunk_size); + } + if !chunk_dict_blobs.contains(&blob.blob_id()) { + // It is assumed that the `nydus-image create` at each layer and `nydus-image merge` commands + // use the same chunk dict bootstrap. So the parent bootstrap includes multiple blobs, but + // only at most one new blob, the other blobs should be from the chunk dict image. + if parent_blob_added { + bail!("invalid per layer bootstrap, having multiple associated data blobs"); + } + parent_blob_added = true; + + if ctx.configuration.internal.blob_accessible() + || ctx.conversion_type == ConversionType::TarToTarfs + { + // `blob.blob_id()` should have been fixed when loading the bootstrap. + blob_ctx.blob_id = blob.blob_id(); + } else { + // The blob id (blob sha256 hash) in parent bootstrap is invalid for nydusd + // runtime, should change it to the hash of whole tar blob. + if let Some(original_id) = + Self::get_string_from_list(&original_blob_ids, layer_idx)? + { + blob_ctx.blob_id = original_id; + } else { + blob_ctx.blob_id = + BlobInfo::get_blob_id_from_meta_path(bootstrap_path)?; + } + } + if let Some(digest) = Self::get_digest_from_list(&blob_digests, layer_idx)? { + if blob.has_feature(BlobFeatures::SEPARATE) { + blob_ctx.blob_meta_digest = digest; + } else { + blob_ctx.blob_id = hex::encode(digest); + } + } + if let Some(size) = Self::get_size_from_list(&blob_sizes, layer_idx)? { + if blob.has_feature(BlobFeatures::SEPARATE) { + blob_ctx.blob_meta_size = size; + } else { + blob_ctx.compressed_blob_size = size; + } + } + if let Some(digest) = Self::get_digest_from_list(&blob_toc_digests, layer_idx)? + { + blob_ctx.blob_toc_digest = digest; + } + if let Some(size) = Self::get_size_from_list(&blob_toc_sizes, layer_idx)? { + blob_ctx.blob_toc_size = size as u32; + } + } + + if let Entry::Vacant(e) = blob_idx_map.entry(blob.blob_id()) { + e.insert(blob_mgr.len()); + blob_mgr.add_blob(blob_ctx); + } + } + + let upper = Tree::from_bootstrap(&rs, &mut ())?; + upper.walk_bfs(true, &mut |n| { + let mut node = n.lock_node(); + for chunk in &mut node.chunks { + let origin_blob_index = chunk.inner.blob_index() as usize; + let blob_ctx = blobs[origin_blob_index].as_ref(); + if let Some(blob_index) = blob_idx_map.get(&blob_ctx.blob_id()) { + // Set the blob index of chunk to real index in blob table of final bootstrap. + chunk.set_blob_index(*blob_index as u32); + } + } + // Set node's layer index to distinguish same inode number (from bootstrap) + // between different layers. + let idx = u16::try_from(layer_idx).context(format!( + "too many layers {}, limited to {}", + layer_idx, + u16::MAX + ))?; + if parent_layers + idx as usize > u16::MAX as usize { + bail!("too many layers {}, limited to {}", layer_idx, u16::MAX); + } + node.layer_idx = idx + parent_layers as u16; + node.overlay = Overlay::UpperAddition; + Ok(()) + })?; + + if let Some(tree) = &mut tree { + tree.merge_overaly(ctx, upper)?; + } else { + tree = Some(upper); + } + } + + if ctx.conversion_type == ConversionType::TarToTarfs { + if parent_layers > 0 { + bail!("merging RAFS in TARFS mode conflicts with `--parent-bootstrap`"); + } + if !chunk_dict_blobs.is_empty() { + bail!("merging RAFS in TARFS mode conflicts with `--chunk-dict`"); + } + } + + // Safe to unwrap because there is at least one source bootstrap. + let tree = tree.unwrap(); + ctx.fs_version = fs_version; + if let Some(chunk_size) = chunk_size { + ctx.chunk_size = chunk_size; + } + + let mut bootstrap_ctx = BootstrapContext::new(Some(target.clone()), false)?; + let mut bootstrap = Bootstrap::new(tree)?; + bootstrap.build(ctx, &mut bootstrap_ctx)?; + let blob_table = blob_mgr.to_blob_table(ctx)?; + let mut bootstrap_storage = Some(target.clone()); + bootstrap + .dump(ctx, &mut bootstrap_storage, &mut bootstrap_ctx, &blob_table) + .context(format!("dump bootstrap to {:?}", target.display()))?; + BuildOutput::new(&blob_mgr, &bootstrap_storage) + } +} + +#[cfg(test)] +mod tests { + use nydus_utils::digest; + use vmm_sys_util::tempfile::TempFile; + + use super::*; + + #[test] + fn test_merger_get_string_from_list() { + let res = Merger::get_string_from_list(&None, 1); + assert!(res.is_ok()); + assert!(res.unwrap().is_none()); + + let original_ids = vec!["string1".to_owned(), "string2".to_owned()]; + let original_ids = Some(original_ids); + + let res = Merger::get_string_from_list(&original_ids, 0); + assert!(res.is_ok()); + assert_eq!(res.unwrap(), Some("string1".to_owned())); + assert!(Merger::get_string_from_list(&original_ids, 2).is_err()); + } + + #[test] + fn test_merger_get_digest_from_list() { + let res = Merger::get_digest_from_list(&None, 1); + assert!(res.is_ok()); + assert!(res.unwrap().is_none()); + + let original_ids = vec!["string1".to_owned(), "12ab".repeat(16)]; + let original_ids = Some(original_ids); + + let res = Merger::get_digest_from_list(&original_ids, 1); + assert!(res.is_ok()); + assert_eq!( + res.unwrap(), + Some([ + 18u8, 171, 18, 171, 18, 171, 18, 171, 18, 171, 18, 171, 18, 171, 18, 171, 18, 171, + 18, 171, 18, 171, 18, 171, 18, 171, 18, 171, 18, 171, 18, 171 + ]) + ); + assert!(Merger::get_digest_from_list(&original_ids, 0).is_err()); + assert!(Merger::get_digest_from_list(&original_ids, 2).is_err()); + } + + #[test] + fn test_merger_get_size_from_list() { + let res = Merger::get_size_from_list(&None, 1); + assert!(res.is_ok()); + assert!(res.unwrap().is_none()); + + let original_ids = vec![1u64, 2, 3, 4]; + let original_ids = Some(original_ids); + let res = Merger::get_size_from_list(&original_ids, 1); + assert!(res.is_ok()); + assert_eq!(res.unwrap(), Some(2u64)); + assert!(Merger::get_size_from_list(&original_ids, 4).is_err()); + } + + #[test] + fn test_merger_merge() { + let mut ctx = BuildContext::default(); + ctx.configuration.internal.set_blob_accessible(false); + ctx.digester = digest::Algorithm::Sha256; + + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let mut source_path1 = PathBuf::from(root_dir); + source_path1.push("../tests/texture/bootstrap/rafs-v6-2.2.boot"); + let mut source_path2 = PathBuf::from(root_dir); + source_path2.push("../tests/texture/bootstrap/rafs-v6-2.2.boot"); + + let tmp_file = TempFile::new().unwrap(); + let target = ArtifactStorage::SingleFile(tmp_file.as_path().to_path_buf()); + + let blob_toc_digests = Some(vec![ + "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855".to_owned(), + "4cf0c409788fc1c149afbf4c81276b92427ae41e46412334ca495991b8526650".to_owned(), + ]); + + let build_output = Merger::merge( + &mut ctx, + None, + vec![source_path1, source_path2], + Some(vec!["a70f".repeat(16), "9bd3".repeat(16)]), + Some(vec!["blob_id".to_owned(), "blob_id2".to_owned()]), + Some(vec![16u64, 32u64]), + blob_toc_digests, + Some(vec![64u64, 128]), + target, + None, + Arc::new(ConfigV2::new("config_v2")), + ); + assert!(build_output.is_ok()); + let build_output = build_output.unwrap(); + println!("BuildOutput: {}", build_output); + assert_eq!(build_output.blob_size, Some(16)); + } +} diff --git a/builder/src/stargz.rs b/builder/src/stargz.rs index d84a2d214a8..2d846683754 100644 --- a/builder/src/stargz.rs +++ b/builder/src/stargz.rs @@ -1,1053 +1,1053 @@ -// Copyright 2020 Alibaba cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Generate a RAFS filesystem bootstrap from an stargz layer, reusing the stargz layer as data blob. - -use std::collections::HashMap; -use std::ffi::{OsStr, OsString}; -use std::fs::File; -use std::io::{Seek, SeekFrom}; -use std::ops::Deref; -use std::os::unix::ffi::OsStrExt; -use std::path::{Path, PathBuf}; -use std::sync::Arc; - -use anyhow::{anyhow, bail, Context, Error, Result}; -use base64::Engine; -use nix::NixPath; -use nydus_rafs::metadata::chunk::ChunkWrapper; -use nydus_rafs::metadata::inode::{InodeWrapper, RafsInodeFlags, RafsV6Inode}; -use nydus_rafs::metadata::layout::v5::RafsV5ChunkInfo; -use nydus_rafs::metadata::layout::RafsXAttrs; -use nydus_rafs::metadata::RafsVersion; -use nydus_storage::device::BlobChunkFlags; -use nydus_storage::{RAFS_MAX_CHUNKS_PER_BLOB, RAFS_MAX_CHUNK_SIZE}; -use nydus_utils::compact::makedev; -use nydus_utils::compress::{self, compute_compressed_gzip_size}; -use nydus_utils::digest::{self, DigestData, RafsDigest}; -use nydus_utils::{lazy_drop, root_tracer, timing_tracer, try_round_up_4k, ByteSize}; -use serde::{Deserialize, Serialize}; - -use crate::core::context::{Artifact, NoopArtifactWriter}; - -use super::core::blob::Blob; -use super::core::context::{ - ArtifactWriter, BlobManager, BootstrapManager, BuildContext, BuildOutput, -}; -use super::core::node::{ChunkSource, Node, NodeChunk, NodeInfo}; -use super::{ - build_bootstrap, dump_bootstrap, finalize_blob, Bootstrap, Builder, TarBuilder, Tree, TreeNode, -}; - -#[derive(Deserialize, Serialize, Debug, Clone, Default)] -struct TocEntry { - /// This REQUIRED property contains the name of the tar entry. - /// - /// This MUST be the complete path stored in the tar file. - pub name: PathBuf, - - /// This REQUIRED property contains the type of tar entry. - /// - /// This MUST be either of the following. - /// - dir: directory - /// - reg: regular file - /// - symlink: symbolic link - /// - hardlink: hard link - /// - char: character device - /// - block: block device - /// - fifo: fifo - /// - chunk: a chunk of regular file data As described in the above section, - /// a regular file can be divided into several chunks. TOCEntry MUST be created for each chunk. - /// TOCEntry of the first chunk of that file MUST be typed as reg. TOCEntry of each chunk after - /// 2nd MUST be typed as chunk. chunk TOCEntry MUST set offset, chunkOffset and chunkSize - /// properties. - #[serde(rename = "type")] - pub toc_type: String, - - /// This OPTIONAL property contains the uncompressed size of the regular file. - /// - /// Non-empty reg file MUST set this property. - #[serde(default)] - pub size: u64, - - // This OPTIONAL property contains the modification time of the tar entry. - // - // Empty means zero or unknown. Otherwise, the value is in UTC RFC3339 format. - // // ModTime3339 is the modification time of the tar entry. Empty - // // means zero or unknown. Otherwise it's in UTC RFC3339 - // // format. Use the ModTime method to access the time.Time value. - // #[serde(default, alias = "modtime")] - // mod_time_3339: String, - // #[serde(skip)] - // mod_time: Time, - /// This OPTIONAL property contains the link target. - /// - /// Symlink and hardlink MUST set this property. - #[serde(default, rename = "linkName")] - pub link_name: PathBuf, - - /// This REQUIRED property contains the permission and mode bits. - #[serde(default)] - pub mode: u32, - - /// This REQUIRED property contains the user ID of the owner of this file. - #[serde(default)] - pub uid: u32, - - /// This REQUIRED property contains the group ID of the owner of this file. - #[serde(default)] - pub gid: u32, - - /// This OPTIONAL property contains the username of the owner. - /// - /// In the serialized JSON, this field may only be present for - /// the first entry with the same Uid. - #[serde(default, rename = "userName")] - pub uname: String, - - /// This OPTIONAL property contains the groupname of the owner. - /// - /// In the serialized JSON, this field may only be present for - /// the first entry with the same Gid. - #[serde(default, rename = "groupName")] - pub gname: String, - - /// This OPTIONAL property contains the major device number of device files. - /// - /// char and block files MUST set this property. - #[serde(default, rename = "devMajor")] - pub dev_major: u64, - - /// This OPTIONAL property contains the minor device number of device files. - /// - /// char and block files MUST set this property. - #[serde(default, rename = "devMinor")] - pub dev_minor: u64, - - /// This OPTIONAL property contains the extended attribute for the tar entry. - #[serde(default)] - pub xattrs: HashMap, - - /// This OPTIONAL property contains the digest of the regular file contents. - /// - /// It has the form "sha256:abcdef01234....". - #[serde(default)] - pub digest: String, - - /// This OPTIONAL property contains the offset of the gzip header of the regular file or chunk - /// in the blob. - /// - /// TOCEntries of non-empty reg and chunk MUST set this property. - #[serde(default)] - pub offset: u64, - - /// This OPTIONAL property contains the offset of this chunk in the decompressed regular file - /// payload. TOCEntries of chunk type MUST set this property. - /// - /// ChunkOffset is non-zero if this is a chunk of a large, regular file. - /// If so, the Offset is where the gzip header of ChunkSize bytes at ChunkOffset in Name begin. - /// - /// In serialized form, a "chunkSize" JSON field of zero means that the chunk goes to the end - /// of the file. After reading from the stargz TOC, though, the ChunkSize is initialized to - /// a non-zero file for when Type is either "reg" or "chunk". - #[serde(default, rename = "chunkOffset")] - pub chunk_offset: u64, - - /// This OPTIONAL property contains the decompressed size of this chunk. - /// - /// The last chunk in a reg file or reg file that isn't chunked MUST set this property to zero. - /// Other reg and chunk MUST set this property. - #[serde(default, rename = "chunkSize")] - pub chunk_size: u64, - - /// This OPTIONAL property contains a digest of this chunk. - /// - /// TOCEntries of non-empty reg and chunk MUST set this property. This MAY be used for verifying - /// the data of the chunk. - #[serde(default, rename = "chunkDigest")] - pub chunk_digest: String, - - /// This OPTIONAL property indicates the uncompressed offset of the "reg" or "chunk" entry - /// payload in a stream starts from offset field. - /// - /// `innerOffset` enables to put multiple "reg" or "chunk" payloads in one gzip stream starts - /// from offset. - #[serde(default, rename = "innerOffset")] - pub inner_offset: u64, -} - -impl TocEntry { - /// Check whether the `TocEntry` is a directory. - pub fn is_dir(&self) -> bool { - self.toc_type.as_str() == "dir" - } - - /// Check whether the `TocEntry` is a regular file. - pub fn is_reg(&self) -> bool { - self.toc_type.as_str() == "reg" - } - - /// Check whether the `TocEntry` is a symlink. - pub fn is_symlink(&self) -> bool { - self.toc_type.as_str() == "symlink" - } - - /// Check whether the `TocEntry` is a hardlink. - pub fn is_hardlink(&self) -> bool { - self.toc_type.as_str() == "hardlink" - } - - /// Check whether the `TocEntry` is a file data chunk. - pub fn is_chunk(&self) -> bool { - self.toc_type.as_str() == "chunk" - } - - /// Check whether the `TocEntry` is a block device. - pub fn is_blockdev(&self) -> bool { - self.toc_type.as_str() == "block" - } - - /// Check whether the `TocEntry` is a char device. - pub fn is_chardev(&self) -> bool { - self.toc_type.as_str() == "char" - } - - /// Check whether the `TocEntry` is a FIFO. - pub fn is_fifo(&self) -> bool { - self.toc_type.as_str() == "fifo" - } - - /// Check whether the `TocEntry` is a special entry. - pub fn is_special(&self) -> bool { - self.is_blockdev() || self.is_chardev() || self.is_fifo() - } - - pub fn is_supported(&self) -> bool { - self.is_dir() || self.is_reg() || self.is_symlink() || self.is_hardlink() || self.is_chunk() - } - - /// Check whether the `TocEntry` has associated extended attributes. - pub fn has_xattr(&self) -> bool { - !self.xattrs.is_empty() - } - - /// Get access permission and file mode of the `TocEntry`. - pub fn mode(&self) -> u32 { - let mut mode = 0; - if self.is_dir() { - mode |= libc::S_IFDIR; - } else if self.is_reg() || self.is_hardlink() { - mode |= libc::S_IFREG; - } else if self.is_symlink() { - mode |= libc::S_IFLNK; - } else if self.is_blockdev() { - mode |= libc::S_IFBLK; - } else if self.is_chardev() { - mode |= libc::S_IFCHR; - } else if self.is_fifo() { - mode |= libc::S_IFIFO; - } - - self.mode & !libc::S_IFMT as u32 | mode as u32 - } - - /// Get real device id associated with the `TocEntry`. - pub fn rdev(&self) -> u32 { - if self.is_special() { - makedev(self.dev_major, self.dev_minor) as u32 - } else { - u32::MAX - } - } - - /// Get content size of the entry. - pub fn size(&self) -> u64 { - if self.is_reg() { - self.size - } else { - 0 - } - } - - /// Get file name of the `TocEntry` from the associated path. - /// - /// For example: `` to `/`, `/` to `/`, `a/b` to `b`, `a/b/` to `b` - pub fn name(&self) -> Result<&OsStr> { - let name = if self.name == Path::new("/") { - OsStr::new("/") - } else { - self.name - .file_name() - .ok_or_else(|| anyhow!("stargz: invalid entry name {}", self.name.display()))? - }; - Ok(name) - } - - /// Get absolute path for the `TocEntry`. - /// - /// For example: `` to `/`, `a/b` to `/a/b`, `a/b/` to `/a/b` - pub fn path(&self) -> &Path { - &self.name - } - - /// Convert link path of hardlink entry to rootfs absolute path - /// - /// For example: `a/b` to `/a/b` - pub fn hardlink_link_path(&self) -> &Path { - assert!(self.is_hardlink()); - &self.link_name - } - - /// Get target of symlink. - pub fn symlink_link_path(&self) -> &Path { - assert!(self.is_symlink()); - &self.link_name - } - - pub fn block_id(&self) -> Result { - if self.chunk_digest.len() != 71 || !self.chunk_digest.starts_with("sha256:") { - bail!("stargz: invalid chunk digest {}", self.chunk_digest); - } - match hex::decode(&self.chunk_digest[7..]) { - Err(_e) => bail!("stargz: invalid chunk digest {}", self.chunk_digest), - Ok(v) => { - let mut data = DigestData::default(); - data.copy_from_slice(&v[..32]); - Ok(RafsDigest { data }) - } - } - } - - fn normalize(&mut self) -> Result<()> { - if self.name.is_empty() { - bail!("stargz: invalid TocEntry with empty name"); - } - self.name = PathBuf::from("/").join(&self.name); - - if !self.is_supported() && !self.is_special() { - bail!("stargz: invalid type {} for TocEntry", self.toc_type); - } - - if (self.is_symlink() || self.is_hardlink()) && self.link_name.is_empty() { - bail!("stargz: empty link target"); - } - if self.is_hardlink() { - self.link_name = PathBuf::from("/").join(&self.link_name); - } - - if (self.is_reg() || self.is_chunk()) - && (self.digest.is_empty() || self.chunk_digest.is_empty()) - { - bail!("stargz: missing digest or chunk digest"); - } - - if self.is_chunk() && self.chunk_offset == 0 { - bail!("stargz: chunk offset is zero"); - } - - Ok(()) - } -} - -#[derive(Deserialize, Debug, Clone, Default)] -struct TocIndex { - pub version: u32, - pub entries: Vec, -} - -impl TocIndex { - fn load(path: &Path, offset: u64) -> Result { - let mut index_file = File::open(path) - .with_context(|| format!("stargz: failed to open index file {:?}", path))?; - let pos = index_file - .seek(SeekFrom::Start(offset)) - .context("stargz: failed to seek to start of TOC")?; - if pos != offset { - bail!("stargz: failed to seek file position to start of TOC"); - } - let mut toc_index: TocIndex = serde_json::from_reader(index_file).with_context(|| { - format!( - "stargz: failed to deserialize stargz TOC index file {:?}", - path - ) - })?; - - if toc_index.version != 1 { - return Err(Error::msg(format!( - "stargz: unsupported index version {}", - toc_index.version - ))); - } - - for entry in toc_index.entries.iter_mut() { - entry.normalize()?; - } - - Ok(toc_index) - } -} - -/// Build RAFS filesystems from eStargz images. -pub struct StargzBuilder { - blob_size: u64, - builder: TarBuilder, - file_chunk_map: HashMap)>, - hardlink_map: HashMap, - uncompressed_offset: u64, -} - -impl StargzBuilder { - /// Create a new instance of [StargzBuilder]. - pub fn new(blob_size: u64, ctx: &BuildContext) -> Self { - Self { - blob_size, - builder: TarBuilder::new(ctx.explicit_uidgid, 0, ctx.fs_version), - file_chunk_map: HashMap::new(), - hardlink_map: HashMap::new(), - uncompressed_offset: 0, - } - } - - fn build_tree(&mut self, ctx: &mut BuildContext, layer_idx: u16) -> Result { - let toc_index = TocIndex::load(&ctx.source_path, 0)?; - if toc_index.version != 1 { - bail!("stargz: TOC version {} is unsupported", toc_index.version); - } else if toc_index.entries.is_empty() { - bail!("stargz: TOC array is empty"); - } - - self.builder.layer_idx = layer_idx; - let root = self.builder.create_directory(&[OsString::from("/")])?; - let mut tree = Tree::new(root); - - // Map regular file path to chunks: HashMap<, <(file_size, chunks)>> - let mut last_reg_entry: Option<&TocEntry> = None; - for entry in toc_index.entries.iter() { - let path = entry.path(); - - // TODO: support chardev/blockdev/fifo - if !entry.is_supported() { - warn!( - "stargz: unsupported {} with type {}", - path.display(), - entry.toc_type - ); - continue; - } else if self.builder.is_stargz_special_files(path) { - // skip estargz special files. - continue; - } - - // Build RAFS chunk info from eStargz regular file or chunk data record. - let uncompress_size = Self::get_content_size(ctx, entry, &mut last_reg_entry)?; - if (entry.is_reg() || entry.is_chunk()) && uncompress_size != 0 { - let block_id = entry - .block_id() - .context("stargz: failed to get chunk digest")?; - // blob_index, index and compressed_size will be fixed later - let chunk_info = ChunkWrapper::V6(RafsV5ChunkInfo { - block_id, - blob_index: 0, - flags: BlobChunkFlags::COMPRESSED, - compressed_size: 0, - uncompressed_size: uncompress_size as u32, - compressed_offset: entry.offset as u64, - uncompressed_offset: self.uncompressed_offset, - file_offset: entry.chunk_offset as u64, - index: 0, - reserved: 0, - }); - let chunk = NodeChunk { - source: ChunkSource::Build, - inner: Arc::new(chunk_info), - }; - - if let Some((size, chunks)) = self.file_chunk_map.get_mut(path) { - chunks.push(chunk); - if entry.is_reg() { - *size = entry.size; - } - } else if entry.is_reg() { - self.file_chunk_map - .insert(path.to_path_buf(), (entry.size, vec![chunk])); - } else { - bail!("stargz: file chunk lacks of corresponding head regular file entry"); - } - - let aligned_chunk_size = if ctx.aligned_chunk { - // Safe to unwrap because `chunk_size` is much less than u32::MAX. - try_round_up_4k(uncompress_size).unwrap() - } else { - uncompress_size - }; - self.uncompressed_offset += aligned_chunk_size; - } - - if !entry.is_chunk() && !self.builder.is_stargz_special_files(path) { - self.parse_entry(&mut tree, entry, path)?; - } - } - - for (size, ref mut chunks) in self.file_chunk_map.values_mut() { - Self::sort_and_validate_chunks(chunks, *size)?; - } - - Ok(tree) - } - - /// Get content size of a regular file or file chunk entry. - fn get_content_size<'a>( - ctx: &mut BuildContext, - entry: &'a TocEntry, - last_reg_entry: &mut Option<&'a TocEntry>, - ) -> Result { - if entry.is_reg() { - // Regular file without chunk - if entry.chunk_offset == 0 && entry.chunk_size == 0 { - Ok(entry.size) - } else if entry.chunk_offset % ctx.chunk_size as u64 != 0 { - bail!( - "stargz: chunk offset (0x{:x}) is not aligned to 0x{:x}", - entry.chunk_offset, - ctx.chunk_size - ); - } else if entry.chunk_size != ctx.chunk_size as u64 { - bail!("stargz: first chunk size is not 0x{:x}", ctx.chunk_size); - } else { - *last_reg_entry = Some(entry); - Ok(entry.chunk_size) - } - } else if entry.is_chunk() { - if entry.chunk_offset % ctx.chunk_size as u64 != 0 { - bail!( - "stargz: chunk offset (0x{:x}) is not aligned to 0x{:x}", - entry.chunk_offset, - ctx.chunk_size - ); - } else if entry.chunk_size == 0 { - // Figure out content size for the last chunk entry of regular file - if let Some(reg_entry) = last_reg_entry { - let size = reg_entry.size - entry.chunk_offset; - if size > ctx.chunk_size as u64 { - bail!( - "stargz: size of last chunk 0x{:x} is bigger than chunk size 0x {:x}", - size, - ctx.chunk_size - ); - } - *last_reg_entry = None; - Ok(size) - } else { - bail!("stargz: tailer chunk lacks of corresponding head chunk"); - } - } else if entry.chunk_size != ctx.chunk_size as u64 { - bail!( - "stargz: chunk size 0x{:x} is not 0x{:x}", - entry.chunk_size, - ctx.chunk_size - ); - } else { - Ok(entry.chunk_size) - } - } else { - Ok(0) - } - } - - fn parse_entry(&mut self, tree: &mut Tree, entry: &TocEntry, path: &Path) -> Result<()> { - let name_size = entry.name()?.byte_size() as u16; - let uid = if self.builder.explicit_uidgid { - entry.uid - } else { - 0 - }; - let gid = if self.builder.explicit_uidgid { - entry.gid - } else { - 0 - }; - let mut file_size = entry.size(); - let mut flags = RafsInodeFlags::default(); - - // Parse symlink - let (symlink, symlink_size) = if entry.is_symlink() { - let symlink_link_path = entry.symlink_link_path(); - let symlink_size = symlink_link_path.as_os_str().byte_size() as u16; - file_size = symlink_size.into(); - flags |= RafsInodeFlags::SYMLINK; - (Some(symlink_link_path.as_os_str().to_owned()), symlink_size) - } else { - (None, 0) - }; - - // Handle hardlink ino - let ino = if entry.is_hardlink() { - let link_path = entry.hardlink_link_path(); - let link_path = link_path.components().as_path(); - let targets = Node::generate_target_vec(link_path); - assert!(!targets.is_empty()); - let mut tmp_tree: &Tree = tree; - for name in &targets[1..] { - match tmp_tree.get_child_idx(name.as_bytes()) { - Some(idx) => tmp_tree = &tmp_tree.children[idx], - None => { - bail!( - "stargz: unknown target {} for hardlink {}", - link_path.display(), - path.display(), - ); - } - } - } - - let mut tmp_node = tmp_tree.lock_node(); - if !tmp_node.is_reg() { - bail!( - "stargz: target {} for hardlink {} is not a regular file", - link_path.display(), - path.display() - ); - } - self.hardlink_map - .insert(path.to_path_buf(), tmp_tree.node.clone()); - flags |= RafsInodeFlags::HARDLINK; - tmp_node.inode.set_has_hardlink(true); - tmp_node.inode.ino() - } else { - self.builder.next_ino() - }; - - // Parse xattrs - let mut xattrs = RafsXAttrs::new(); - if entry.has_xattr() { - for (name, value) in entry.xattrs.iter() { - flags |= RafsInodeFlags::XATTR; - let value = base64::engine::general_purpose::STANDARD - .decode(value) - .with_context(|| { - format!( - "stargz: failed to parse xattr {:?} for entry {:?}", - path, name - ) - })?; - xattrs.add(OsString::from(name), value)?; - } - } - - let mut inode = InodeWrapper::V6(RafsV6Inode { - i_ino: ino, - i_projid: 0, - i_uid: uid, - i_gid: gid, - i_mode: entry.mode(), - i_size: file_size, - i_nlink: 1, - i_blocks: 0, - i_flags: flags, - i_child_count: 0, - i_name_size: name_size, - i_symlink_size: symlink_size, - i_rdev: entry.rdev(), - // TODO: add mtime from entry.ModTime() - i_mtime: 0, - i_mtime_nsec: 0, - }); - inode.set_has_xattr(!xattrs.is_empty()); - - let source = PathBuf::from("/"); - let target = Node::generate_target(&path, &source); - let target_vec = Node::generate_target_vec(&target); - let info = NodeInfo { - explicit_uidgid: self.builder.explicit_uidgid, - src_ino: ino, - src_dev: u64::MAX, - rdev: entry.rdev() as u64, - source, - target, - path: path.to_path_buf(), - target_vec, - symlink, - xattrs, - v6_force_extended_inode: false, - }; - let node = Node::new(inode, info, self.builder.layer_idx); - - self.builder.insert_into_tree(tree, node) - } - - fn sort_and_validate_chunks(chunks: &mut [NodeChunk], size: u64) -> Result<()> { - if chunks.len() > RAFS_MAX_CHUNKS_PER_BLOB as usize { - bail!("stargz: file has two many chunks"); - } - - if chunks.len() > 1 { - chunks.sort_unstable_by_key(|v| v.inner.file_offset()); - for idx in 0..chunks.len() - 2 { - let curr = &chunks[idx].inner; - let pos = curr - .file_offset() - .checked_add(curr.uncompressed_size() as u64); - match pos { - Some(pos) => { - if pos != chunks[idx + 1].inner.file_offset() { - bail!("stargz: unexpected holes between data chunks"); - } - } - None => { - bail!( - "stargz: invalid chunk offset 0x{:x} or size 0x{:x}", - curr.file_offset(), - curr.uncompressed_size() - ) - } - } - } - } - - if !chunks.is_empty() { - let last = &chunks[chunks.len() - 1]; - if last.inner.file_offset() + last.inner.uncompressed_size() as u64 != size { - bail!("stargz: file size and sum of chunk size doesn't match"); - } - } else if size != 0 { - bail!("stargz: file size and sum of chunk size doesn't match"); - } - - Ok(()) - } - - fn fix_chunk_info(&mut self, ctx: &mut BuildContext, blob_mgr: &mut BlobManager) -> Result<()> { - /* - let mut header = BlobMetaHeaderOndisk::default(); - header.set_4k_aligned(true); - header.set_ci_separate(ctx.blob_meta_features & BLOB_META_FEATURE_SEPARATE != 0); - header.set_chunk_info_v2(ctx.blob_meta_features & BLOB_META_FEATURE_CHUNK_INFO_V2 != 0); - header.set_ci_zran(ctx.blob_meta_features & BLOB_META_FEATURE_ZRAN != 0); - blob_ctx.blob_meta_header = header; - */ - - // Ensure that the chunks in the blob meta are sorted by uncompressed_offset and ordered - // by chunk index so that they can be found quickly at runtime with a binary search. - let mut blob_chunks: Vec<&mut NodeChunk> = Vec::with_capacity(10240); - for (_, chunks) in self.file_chunk_map.values_mut() { - for chunk in chunks.iter_mut() { - blob_chunks.push(chunk); - } - } - blob_chunks.sort_unstable_by(|a, b| { - a.inner - .uncompressed_offset() - .cmp(&b.inner.uncompressed_offset()) - }); - if blob_chunks.is_empty() { - return Ok(()); - } - - // Compute compressed_size for chunks. - let (blob_index, blob_ctx) = blob_mgr.get_or_create_current_blob(ctx)?; - let chunk_count = blob_chunks.len(); - let mut compressed_blob_size = 0u64; - for idx in 0..chunk_count { - let curr = blob_chunks[idx].inner.compressed_offset(); - let next = if idx == chunk_count - 1 { - self.blob_size - } else { - blob_chunks[idx + 1].inner.compressed_offset() - }; - if curr >= next { - bail!("stargz: compressed offset is out of order"); - } else if next - curr > RAFS_MAX_CHUNK_SIZE { - bail!("stargz: compressed size is too big"); - } - - let mut chunk = blob_chunks[idx].inner.deref().clone(); - let uncomp_size = chunk.uncompressed_size() as usize; - let max_size = (next - curr) as usize; - let max_gzip_size = compute_compressed_gzip_size(uncomp_size, max_size); - let chunk_index = blob_ctx.alloc_chunk_index()?; - chunk.set_index(chunk_index); - chunk.set_blob_index(blob_index); - chunk.set_compressed_size(max_gzip_size as u32); - blob_ctx.add_chunk_meta_info(&chunk, None)?; - compressed_blob_size = std::cmp::max( - compressed_blob_size, - chunk.compressed_offset() + chunk.compressed_size() as u64, - ); - assert_eq!(Arc::strong_count(&blob_chunks[idx].inner), 1); - blob_chunks[idx].inner = Arc::new(chunk); - } - - blob_ctx.uncompressed_blob_size = self.uncompressed_offset; - blob_ctx.compressed_blob_size = compressed_blob_size; - - Ok(()) - } - - fn fix_nodes(&mut self, bootstrap: &mut Bootstrap) -> Result<()> { - bootstrap - .tree - .walk_bfs(true, &mut |n| { - let mut node = n.lock_node(); - let node_path = node.path(); - if let Some((size, ref mut chunks)) = self.file_chunk_map.get_mut(node_path) { - node.inode.set_size(*size); - node.inode.set_child_count(chunks.len() as u32); - node.chunks = chunks.to_vec(); - } - - Ok(()) - }) - .context("stargz: failed to update chunk info array for nodes")?; - - for (k, v) in self.hardlink_map.iter() { - match bootstrap.tree.get_node(k) { - Some(n) => { - let mut node = n.lock_node(); - let target = v.lock().unwrap(); - node.inode.set_size(target.inode.size()); - node.inode.set_child_count(target.inode.child_count()); - node.chunks = target.chunks.clone(); - node.set_xattr(target.info.xattrs.clone()); - } - None => bail!( - "stargz: failed to get target node for hardlink {}", - k.display() - ), - } - } - - Ok(()) - } -} - -impl Builder for StargzBuilder { - fn build( - &mut self, - ctx: &mut BuildContext, - bootstrap_mgr: &mut BootstrapManager, - blob_mgr: &mut BlobManager, - ) -> Result { - if ctx.fs_version != RafsVersion::V6 { - bail!( - "stargz: unsupported filesystem version {:?}", - ctx.fs_version - ); - } else if ctx.compressor != compress::Algorithm::GZip { - bail!("stargz: invalid compression algorithm {:?}", ctx.compressor); - } else if ctx.digester != digest::Algorithm::Sha256 { - bail!("stargz: invalid digest algorithm {:?}", ctx.digester); - } - let mut blob_writer: Box = if let Some(blob_stor) = ctx.blob_storage.clone() { - Box::new(ArtifactWriter::new(blob_stor)?) - } else { - Box::::default() - }; - let mut bootstrap_ctx = bootstrap_mgr.create_ctx()?; - let layer_idx = u16::from(bootstrap_ctx.layered); - - // Build filesystem tree from the stargz TOC. - let tree = timing_tracer!({ self.build_tree(ctx, layer_idx) }, "build_tree")?; - - // Build bootstrap - let mut bootstrap = timing_tracer!( - { build_bootstrap(ctx, bootstrap_mgr, &mut bootstrap_ctx, blob_mgr, tree) }, - "build_bootstrap" - )?; - - self.fix_chunk_info(ctx, blob_mgr)?; - self.fix_nodes(&mut bootstrap)?; - - // Dump blob file - timing_tracer!( - { Blob::dump(ctx, blob_mgr, blob_writer.as_mut()) }, - "dump_blob" - )?; - - // Dump blob meta information - if let Some((_, blob_ctx)) = blob_mgr.get_current_blob() { - Blob::dump_meta_data(ctx, blob_ctx, blob_writer.as_mut())?; - } - - // Dump RAFS meta/bootstrap and finalize the data blob. - if ctx.blob_inline_meta { - timing_tracer!( - { - dump_bootstrap( - ctx, - bootstrap_mgr, - &mut bootstrap_ctx, - &mut bootstrap, - blob_mgr, - blob_writer.as_mut(), - ) - }, - "dump_bootstrap" - )?; - finalize_blob(ctx, blob_mgr, blob_writer.as_mut())?; - } else { - finalize_blob(ctx, blob_mgr, blob_writer.as_mut())?; - timing_tracer!( - { - dump_bootstrap( - ctx, - bootstrap_mgr, - &mut bootstrap_ctx, - &mut bootstrap, - blob_mgr, - blob_writer.as_mut(), - ) - }, - "dump_bootstrap" - )?; - } - - lazy_drop(bootstrap_ctx); - - BuildOutput::new(blob_mgr, &bootstrap_mgr.bootstrap_storage) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::{ArtifactStorage, ConversionType, Features, Prefetch, WhiteoutSpec}; - - #[test] - fn test_build_stargz_toc() { - let tmp_dir = vmm_sys_util::tempdir::TempDir::new().unwrap(); - let mut tmp_dir = tmp_dir.as_path().to_path_buf(); - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let source_path = - PathBuf::from(root_dir).join("../tests/texture/stargz/estargz_sample.json"); - let prefetch = Prefetch::default(); - let mut ctx = BuildContext::new( - "".to_string(), - true, - 0, - compress::Algorithm::GZip, - digest::Algorithm::Sha256, - true, - WhiteoutSpec::Oci, - ConversionType::EStargzIndexToRef, - source_path, - prefetch, - Some(ArtifactStorage::FileDir(tmp_dir.clone())), - false, - Features::new(), - false, - ); - ctx.fs_version = RafsVersion::V6; - ctx.conversion_type = ConversionType::EStargzToRafs; - let mut bootstrap_mgr = - BootstrapManager::new(Some(ArtifactStorage::FileDir(tmp_dir.clone())), None); - let mut blob_mgr = BlobManager::new(digest::Algorithm::Sha256); - let mut builder = StargzBuilder::new(0x1000000, &ctx); - - let builder = builder.build(&mut ctx, &mut bootstrap_mgr, &mut blob_mgr); - assert!(builder.is_ok()); - let builder = builder.unwrap(); - assert_eq!( - builder.blobs, - vec![String::from( - "bd4eff3fe6f5a352457c076d2133583e43db895b4af08d717b3fbcaeca89834e" - )] - ); - assert_eq!(builder.blob_size, Some(4128)); - tmp_dir.push("e60676aef5cc0d5caca9f4c8031f5b0c8392a0611d44c8e1bbc46dbf7fe7bfef"); - assert_eq!( - builder.bootstrap_path.unwrap(), - tmp_dir.to_str().unwrap().to_string() - ) - } - - #[test] - fn test_toc_entry() { - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let source_path = PathBuf::from(root_dir).join("../tests/texture/tar/all-entry-type.tar"); - - let mut entry = TocEntry { - name: source_path, - toc_type: "".to_string(), - size: 0x10, - link_name: PathBuf::from("link_name"), - mode: 0, - uid: 1, - gid: 1, - uname: "user_name".to_string(), - gname: "group_name".to_string(), - dev_major: 255, - dev_minor: 33, - xattrs: Default::default(), - digest: Default::default(), - offset: 0, - chunk_offset: 0, - chunk_size: 0, - chunk_digest: "sha256:".to_owned(), - inner_offset: 0, - }; - entry.chunk_digest.extend(vec!['a'; 64].iter()); - - entry.toc_type = "dir".to_owned(); - assert!(entry.is_dir()); - assert!(entry.is_supported()); - assert_eq!(entry.mode(), libc::S_IFDIR as u32); - assert_eq!(entry.rdev(), u32::MAX); - - entry.toc_type = "req".to_owned(); - assert!(!entry.is_reg()); - entry.toc_type = "reg".to_owned(); - assert!(entry.is_reg()); - assert!(entry.is_supported()); - assert_eq!(entry.mode(), libc::S_IFREG as u32); - assert_eq!(entry.size(), 0x10); - - entry.toc_type = "symlink".to_owned(); - assert!(entry.is_symlink()); - assert!(entry.is_supported()); - assert_eq!(entry.mode(), libc::S_IFLNK as u32); - assert_eq!(entry.symlink_link_path(), Path::new("link_name")); - assert!(entry.normalize().is_ok()); - - entry.toc_type = "hardlink".to_owned(); - assert!(entry.is_supported()); - assert!(entry.is_hardlink()); - assert_eq!(entry.mode(), libc::S_IFREG as u32); - assert_eq!(entry.hardlink_link_path(), Path::new("link_name")); - assert!(entry.normalize().is_ok()); - - entry.toc_type = "chunk".to_owned(); - assert!(entry.is_supported()); - assert!(entry.is_chunk()); - assert_eq!(entry.mode(), 0); - assert_eq!(entry.size(), 0); - assert!(entry.normalize().is_err()); - - entry.toc_type = "block".to_owned(); - assert!(entry.is_special()); - assert!(entry.is_blockdev()); - assert_eq!(entry.mode(), libc::S_IFBLK as u32); - - entry.toc_type = "char".to_owned(); - assert!(entry.is_special()); - assert!(entry.is_chardev()); - assert_eq!(entry.mode(), libc::S_IFCHR as u32); - assert_ne!(entry.size(), 0x10); - - entry.toc_type = "fifo".to_owned(); - assert!(entry.is_fifo()); - assert!(entry.is_special()); - assert_eq!(entry.mode(), libc::S_IFIFO as u32); - assert_eq!(entry.rdev(), 65313); - - assert_eq!(entry.name().unwrap().to_str(), Some("all-entry-type.tar")); - entry.name = PathBuf::from("/"); - assert_eq!(entry.name().unwrap().to_str(), Some("/")); - assert_ne!(entry.path(), Path::new("all-entry-type.tar")); - - assert_eq!(entry.block_id().unwrap().data, [0xaa as u8; 32]); - - entry.name = PathBuf::from(""); - assert!(entry.normalize().is_err()); - } -} +// Copyright 2020 Alibaba cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Generate a RAFS filesystem bootstrap from an stargz layer, reusing the stargz layer as data blob. + +use std::collections::HashMap; +use std::ffi::{OsStr, OsString}; +use std::fs::File; +use std::io::{Seek, SeekFrom}; +use std::ops::Deref; +use std::os::unix::ffi::OsStrExt; +use std::path::{Path, PathBuf}; +use std::sync::Arc; + +use anyhow::{anyhow, bail, Context, Error, Result}; +use base64::Engine; +use nix::NixPath; +use nydus_rafs::metadata::chunk::ChunkWrapper; +use nydus_rafs::metadata::inode::{InodeWrapper, RafsInodeFlags, RafsV6Inode}; +use nydus_rafs::metadata::layout::v5::RafsV5ChunkInfo; +use nydus_rafs::metadata::layout::RafsXAttrs; +use nydus_rafs::metadata::RafsVersion; +use nydus_storage::device::BlobChunkFlags; +use nydus_storage::{RAFS_MAX_CHUNKS_PER_BLOB, RAFS_MAX_CHUNK_SIZE}; +use nydus_utils::compact::makedev; +use nydus_utils::compress::{self, compute_compressed_gzip_size}; +use nydus_utils::digest::{self, DigestData, RafsDigest}; +use nydus_utils::{lazy_drop, root_tracer, timing_tracer, try_round_up_4k, ByteSize}; +use serde::{Deserialize, Serialize}; + +use crate::core::context::{Artifact, NoopArtifactWriter}; + +use super::core::blob::Blob; +use super::core::context::{ + ArtifactWriter, BlobManager, BootstrapManager, BuildContext, BuildOutput, +}; +use super::core::node::{ChunkSource, Node, NodeChunk, NodeInfo}; +use super::{ + build_bootstrap, dump_bootstrap, finalize_blob, Bootstrap, Builder, TarBuilder, Tree, TreeNode, +}; + +#[derive(Deserialize, Serialize, Debug, Clone, Default)] +struct TocEntry { + /// This REQUIRED property contains the name of the tar entry. + /// + /// This MUST be the complete path stored in the tar file. + pub name: PathBuf, + + /// This REQUIRED property contains the type of tar entry. + /// + /// This MUST be either of the following. + /// - dir: directory + /// - reg: regular file + /// - symlink: symbolic link + /// - hardlink: hard link + /// - char: character device + /// - block: block device + /// - fifo: fifo + /// - chunk: a chunk of regular file data As described in the above section, + /// a regular file can be divided into several chunks. TOCEntry MUST be created for each chunk. + /// TOCEntry of the first chunk of that file MUST be typed as reg. TOCEntry of each chunk after + /// 2nd MUST be typed as chunk. chunk TOCEntry MUST set offset, chunkOffset and chunkSize + /// properties. + #[serde(rename = "type")] + pub toc_type: String, + + /// This OPTIONAL property contains the uncompressed size of the regular file. + /// + /// Non-empty reg file MUST set this property. + #[serde(default)] + pub size: u64, + + // This OPTIONAL property contains the modification time of the tar entry. + // + // Empty means zero or unknown. Otherwise, the value is in UTC RFC3339 format. + // // ModTime3339 is the modification time of the tar entry. Empty + // // means zero or unknown. Otherwise it's in UTC RFC3339 + // // format. Use the ModTime method to access the time.Time value. + // #[serde(default, alias = "modtime")] + // mod_time_3339: String, + // #[serde(skip)] + // mod_time: Time, + /// This OPTIONAL property contains the link target. + /// + /// Symlink and hardlink MUST set this property. + #[serde(default, rename = "linkName")] + pub link_name: PathBuf, + + /// This REQUIRED property contains the permission and mode bits. + #[serde(default)] + pub mode: u32, + + /// This REQUIRED property contains the user ID of the owner of this file. + #[serde(default)] + pub uid: u32, + + /// This REQUIRED property contains the group ID of the owner of this file. + #[serde(default)] + pub gid: u32, + + /// This OPTIONAL property contains the username of the owner. + /// + /// In the serialized JSON, this field may only be present for + /// the first entry with the same Uid. + #[serde(default, rename = "userName")] + pub uname: String, + + /// This OPTIONAL property contains the groupname of the owner. + /// + /// In the serialized JSON, this field may only be present for + /// the first entry with the same Gid. + #[serde(default, rename = "groupName")] + pub gname: String, + + /// This OPTIONAL property contains the major device number of device files. + /// + /// char and block files MUST set this property. + #[serde(default, rename = "devMajor")] + pub dev_major: u64, + + /// This OPTIONAL property contains the minor device number of device files. + /// + /// char and block files MUST set this property. + #[serde(default, rename = "devMinor")] + pub dev_minor: u64, + + /// This OPTIONAL property contains the extended attribute for the tar entry. + #[serde(default)] + pub xattrs: HashMap, + + /// This OPTIONAL property contains the digest of the regular file contents. + /// + /// It has the form "sha256:abcdef01234....". + #[serde(default)] + pub digest: String, + + /// This OPTIONAL property contains the offset of the gzip header of the regular file or chunk + /// in the blob. + /// + /// TOCEntries of non-empty reg and chunk MUST set this property. + #[serde(default)] + pub offset: u64, + + /// This OPTIONAL property contains the offset of this chunk in the decompressed regular file + /// payload. TOCEntries of chunk type MUST set this property. + /// + /// ChunkOffset is non-zero if this is a chunk of a large, regular file. + /// If so, the Offset is where the gzip header of ChunkSize bytes at ChunkOffset in Name begin. + /// + /// In serialized form, a "chunkSize" JSON field of zero means that the chunk goes to the end + /// of the file. After reading from the stargz TOC, though, the ChunkSize is initialized to + /// a non-zero file for when Type is either "reg" or "chunk". + #[serde(default, rename = "chunkOffset")] + pub chunk_offset: u64, + + /// This OPTIONAL property contains the decompressed size of this chunk. + /// + /// The last chunk in a reg file or reg file that isn't chunked MUST set this property to zero. + /// Other reg and chunk MUST set this property. + #[serde(default, rename = "chunkSize")] + pub chunk_size: u64, + + /// This OPTIONAL property contains a digest of this chunk. + /// + /// TOCEntries of non-empty reg and chunk MUST set this property. This MAY be used for verifying + /// the data of the chunk. + #[serde(default, rename = "chunkDigest")] + pub chunk_digest: String, + + /// This OPTIONAL property indicates the uncompressed offset of the "reg" or "chunk" entry + /// payload in a stream starts from offset field. + /// + /// `innerOffset` enables to put multiple "reg" or "chunk" payloads in one gzip stream starts + /// from offset. + #[serde(default, rename = "innerOffset")] + pub inner_offset: u64, +} + +impl TocEntry { + /// Check whether the `TocEntry` is a directory. + pub fn is_dir(&self) -> bool { + self.toc_type.as_str() == "dir" + } + + /// Check whether the `TocEntry` is a regular file. + pub fn is_reg(&self) -> bool { + self.toc_type.as_str() == "reg" + } + + /// Check whether the `TocEntry` is a symlink. + pub fn is_symlink(&self) -> bool { + self.toc_type.as_str() == "symlink" + } + + /// Check whether the `TocEntry` is a hardlink. + pub fn is_hardlink(&self) -> bool { + self.toc_type.as_str() == "hardlink" + } + + /// Check whether the `TocEntry` is a file data chunk. + pub fn is_chunk(&self) -> bool { + self.toc_type.as_str() == "chunk" + } + + /// Check whether the `TocEntry` is a block device. + pub fn is_blockdev(&self) -> bool { + self.toc_type.as_str() == "block" + } + + /// Check whether the `TocEntry` is a char device. + pub fn is_chardev(&self) -> bool { + self.toc_type.as_str() == "char" + } + + /// Check whether the `TocEntry` is a FIFO. + pub fn is_fifo(&self) -> bool { + self.toc_type.as_str() == "fifo" + } + + /// Check whether the `TocEntry` is a special entry. + pub fn is_special(&self) -> bool { + self.is_blockdev() || self.is_chardev() || self.is_fifo() + } + + pub fn is_supported(&self) -> bool { + self.is_dir() || self.is_reg() || self.is_symlink() || self.is_hardlink() || self.is_chunk() + } + + /// Check whether the `TocEntry` has associated extended attributes. + pub fn has_xattr(&self) -> bool { + !self.xattrs.is_empty() + } + + /// Get access permission and file mode of the `TocEntry`. + pub fn mode(&self) -> u32 { + let mut mode = 0; + if self.is_dir() { + mode |= libc::S_IFDIR; + } else if self.is_reg() || self.is_hardlink() { + mode |= libc::S_IFREG; + } else if self.is_symlink() { + mode |= libc::S_IFLNK; + } else if self.is_blockdev() { + mode |= libc::S_IFBLK; + } else if self.is_chardev() { + mode |= libc::S_IFCHR; + } else if self.is_fifo() { + mode |= libc::S_IFIFO; + } + + self.mode & !libc::S_IFMT as u32 | mode as u32 + } + + /// Get real device id associated with the `TocEntry`. + pub fn rdev(&self) -> u32 { + if self.is_special() { + makedev(self.dev_major, self.dev_minor) as u32 + } else { + u32::MAX + } + } + + /// Get content size of the entry. + pub fn size(&self) -> u64 { + if self.is_reg() { + self.size + } else { + 0 + } + } + + /// Get file name of the `TocEntry` from the associated path. + /// + /// For example: `` to `/`, `/` to `/`, `a/b` to `b`, `a/b/` to `b` + pub fn name(&self) -> Result<&OsStr> { + let name = if self.name == Path::new("/") { + OsStr::new("/") + } else { + self.name + .file_name() + .ok_or_else(|| anyhow!("stargz: invalid entry name {}", self.name.display()))? + }; + Ok(name) + } + + /// Get absolute path for the `TocEntry`. + /// + /// For example: `` to `/`, `a/b` to `/a/b`, `a/b/` to `/a/b` + pub fn path(&self) -> &Path { + &self.name + } + + /// Convert link path of hardlink entry to rootfs absolute path + /// + /// For example: `a/b` to `/a/b` + pub fn hardlink_link_path(&self) -> &Path { + assert!(self.is_hardlink()); + &self.link_name + } + + /// Get target of symlink. + pub fn symlink_link_path(&self) -> &Path { + assert!(self.is_symlink()); + &self.link_name + } + + pub fn block_id(&self) -> Result { + if self.chunk_digest.len() != 71 || !self.chunk_digest.starts_with("sha256:") { + bail!("stargz: invalid chunk digest {}", self.chunk_digest); + } + match hex::decode(&self.chunk_digest[7..]) { + Err(_e) => bail!("stargz: invalid chunk digest {}", self.chunk_digest), + Ok(v) => { + let mut data = DigestData::default(); + data.copy_from_slice(&v[..32]); + Ok(RafsDigest { data }) + } + } + } + + fn normalize(&mut self) -> Result<()> { + if self.name.is_empty() { + bail!("stargz: invalid TocEntry with empty name"); + } + self.name = PathBuf::from("/").join(&self.name); + + if !self.is_supported() && !self.is_special() { + bail!("stargz: invalid type {} for TocEntry", self.toc_type); + } + + if (self.is_symlink() || self.is_hardlink()) && self.link_name.is_empty() { + bail!("stargz: empty link target"); + } + if self.is_hardlink() { + self.link_name = PathBuf::from("/").join(&self.link_name); + } + + if (self.is_reg() || self.is_chunk()) + && (self.digest.is_empty() || self.chunk_digest.is_empty()) + { + bail!("stargz: missing digest or chunk digest"); + } + + if self.is_chunk() && self.chunk_offset == 0 { + bail!("stargz: chunk offset is zero"); + } + + Ok(()) + } +} + +#[derive(Deserialize, Debug, Clone, Default)] +struct TocIndex { + pub version: u32, + pub entries: Vec, +} + +impl TocIndex { + fn load(path: &Path, offset: u64) -> Result { + let mut index_file = File::open(path) + .with_context(|| format!("stargz: failed to open index file {:?}", path))?; + let pos = index_file + .seek(SeekFrom::Start(offset)) + .context("stargz: failed to seek to start of TOC")?; + if pos != offset { + bail!("stargz: failed to seek file position to start of TOC"); + } + let mut toc_index: TocIndex = serde_json::from_reader(index_file).with_context(|| { + format!( + "stargz: failed to deserialize stargz TOC index file {:?}", + path + ) + })?; + + if toc_index.version != 1 { + return Err(Error::msg(format!( + "stargz: unsupported index version {}", + toc_index.version + ))); + } + + for entry in toc_index.entries.iter_mut() { + entry.normalize()?; + } + + Ok(toc_index) + } +} + +/// Build RAFS filesystems from eStargz images. +pub struct StargzBuilder { + blob_size: u64, + builder: TarBuilder, + file_chunk_map: HashMap)>, + hardlink_map: HashMap, + uncompressed_offset: u64, +} + +impl StargzBuilder { + /// Create a new instance of [StargzBuilder]. + pub fn new(blob_size: u64, ctx: &BuildContext) -> Self { + Self { + blob_size, + builder: TarBuilder::new(ctx.explicit_uidgid, 0, ctx.fs_version), + file_chunk_map: HashMap::new(), + hardlink_map: HashMap::new(), + uncompressed_offset: 0, + } + } + + fn build_tree(&mut self, ctx: &mut BuildContext, layer_idx: u16) -> Result { + let toc_index = TocIndex::load(&ctx.source_path, 0)?; + if toc_index.version != 1 { + bail!("stargz: TOC version {} is unsupported", toc_index.version); + } else if toc_index.entries.is_empty() { + bail!("stargz: TOC array is empty"); + } + + self.builder.layer_idx = layer_idx; + let root = self.builder.create_directory(&[OsString::from("/")])?; + let mut tree = Tree::new(root); + + // Map regular file path to chunks: HashMap<, <(file_size, chunks)>> + let mut last_reg_entry: Option<&TocEntry> = None; + for entry in toc_index.entries.iter() { + let path = entry.path(); + + // TODO: support chardev/blockdev/fifo + if !entry.is_supported() { + warn!( + "stargz: unsupported {} with type {}", + path.display(), + entry.toc_type + ); + continue; + } else if self.builder.is_stargz_special_files(path) { + // skip estargz special files. + continue; + } + + // Build RAFS chunk info from eStargz regular file or chunk data record. + let uncompress_size = Self::get_content_size(ctx, entry, &mut last_reg_entry)?; + if (entry.is_reg() || entry.is_chunk()) && uncompress_size != 0 { + let block_id = entry + .block_id() + .context("stargz: failed to get chunk digest")?; + // blob_index, index and compressed_size will be fixed later + let chunk_info = ChunkWrapper::V6(RafsV5ChunkInfo { + block_id, + blob_index: 0, + flags: BlobChunkFlags::COMPRESSED, + compressed_size: 0, + uncompressed_size: uncompress_size as u32, + compressed_offset: entry.offset as u64, + uncompressed_offset: self.uncompressed_offset, + file_offset: entry.chunk_offset as u64, + index: 0, + reserved: 0, + }); + let chunk = NodeChunk { + source: ChunkSource::Build, + inner: Arc::new(chunk_info), + }; + + if let Some((size, chunks)) = self.file_chunk_map.get_mut(path) { + chunks.push(chunk); + if entry.is_reg() { + *size = entry.size; + } + } else if entry.is_reg() { + self.file_chunk_map + .insert(path.to_path_buf(), (entry.size, vec![chunk])); + } else { + bail!("stargz: file chunk lacks of corresponding head regular file entry"); + } + + let aligned_chunk_size = if ctx.aligned_chunk { + // Safe to unwrap because `chunk_size` is much less than u32::MAX. + try_round_up_4k(uncompress_size).unwrap() + } else { + uncompress_size + }; + self.uncompressed_offset += aligned_chunk_size; + } + + if !entry.is_chunk() && !self.builder.is_stargz_special_files(path) { + self.parse_entry(&mut tree, entry, path)?; + } + } + + for (size, ref mut chunks) in self.file_chunk_map.values_mut() { + Self::sort_and_validate_chunks(chunks, *size)?; + } + + Ok(tree) + } + + /// Get content size of a regular file or file chunk entry. + fn get_content_size<'a>( + ctx: &mut BuildContext, + entry: &'a TocEntry, + last_reg_entry: &mut Option<&'a TocEntry>, + ) -> Result { + if entry.is_reg() { + // Regular file without chunk + if entry.chunk_offset == 0 && entry.chunk_size == 0 { + Ok(entry.size) + } else if entry.chunk_offset % ctx.chunk_size as u64 != 0 { + bail!( + "stargz: chunk offset (0x{:x}) is not aligned to 0x{:x}", + entry.chunk_offset, + ctx.chunk_size + ); + } else if entry.chunk_size != ctx.chunk_size as u64 { + bail!("stargz: first chunk size is not 0x{:x}", ctx.chunk_size); + } else { + *last_reg_entry = Some(entry); + Ok(entry.chunk_size) + } + } else if entry.is_chunk() { + if entry.chunk_offset % ctx.chunk_size as u64 != 0 { + bail!( + "stargz: chunk offset (0x{:x}) is not aligned to 0x{:x}", + entry.chunk_offset, + ctx.chunk_size + ); + } else if entry.chunk_size == 0 { + // Figure out content size for the last chunk entry of regular file + if let Some(reg_entry) = last_reg_entry { + let size = reg_entry.size - entry.chunk_offset; + if size > ctx.chunk_size as u64 { + bail!( + "stargz: size of last chunk 0x{:x} is bigger than chunk size 0x {:x}", + size, + ctx.chunk_size + ); + } + *last_reg_entry = None; + Ok(size) + } else { + bail!("stargz: tailer chunk lacks of corresponding head chunk"); + } + } else if entry.chunk_size != ctx.chunk_size as u64 { + bail!( + "stargz: chunk size 0x{:x} is not 0x{:x}", + entry.chunk_size, + ctx.chunk_size + ); + } else { + Ok(entry.chunk_size) + } + } else { + Ok(0) + } + } + + fn parse_entry(&mut self, tree: &mut Tree, entry: &TocEntry, path: &Path) -> Result<()> { + let name_size = entry.name()?.byte_size() as u16; + let uid = if self.builder.explicit_uidgid { + entry.uid + } else { + 0 + }; + let gid = if self.builder.explicit_uidgid { + entry.gid + } else { + 0 + }; + let mut file_size = entry.size(); + let mut flags = RafsInodeFlags::default(); + + // Parse symlink + let (symlink, symlink_size) = if entry.is_symlink() { + let symlink_link_path = entry.symlink_link_path(); + let symlink_size = symlink_link_path.as_os_str().byte_size() as u16; + file_size = symlink_size.into(); + flags |= RafsInodeFlags::SYMLINK; + (Some(symlink_link_path.as_os_str().to_owned()), symlink_size) + } else { + (None, 0) + }; + + // Handle hardlink ino + let ino = if entry.is_hardlink() { + let link_path = entry.hardlink_link_path(); + let link_path = link_path.components().as_path(); + let targets = Node::generate_target_vec(link_path); + assert!(!targets.is_empty()); + let mut tmp_tree: &Tree = tree; + for name in &targets[1..] { + match tmp_tree.get_child_idx(name.as_bytes()) { + Some(idx) => tmp_tree = &tmp_tree.children[idx], + None => { + bail!( + "stargz: unknown target {} for hardlink {}", + link_path.display(), + path.display(), + ); + } + } + } + + let mut tmp_node = tmp_tree.lock_node(); + if !tmp_node.is_reg() { + bail!( + "stargz: target {} for hardlink {} is not a regular file", + link_path.display(), + path.display() + ); + } + self.hardlink_map + .insert(path.to_path_buf(), tmp_tree.node.clone()); + flags |= RafsInodeFlags::HARDLINK; + tmp_node.inode.set_has_hardlink(true); + tmp_node.inode.ino() + } else { + self.builder.next_ino() + }; + + // Parse xattrs + let mut xattrs = RafsXAttrs::new(); + if entry.has_xattr() { + for (name, value) in entry.xattrs.iter() { + flags |= RafsInodeFlags::XATTR; + let value = base64::engine::general_purpose::STANDARD + .decode(value) + .with_context(|| { + format!( + "stargz: failed to parse xattr {:?} for entry {:?}", + path, name + ) + })?; + xattrs.add(OsString::from(name), value)?; + } + } + + let mut inode = InodeWrapper::V6(RafsV6Inode { + i_ino: ino, + i_projid: 0, + i_uid: uid, + i_gid: gid, + i_mode: entry.mode(), + i_size: file_size, + i_nlink: 1, + i_blocks: 0, + i_flags: flags, + i_child_count: 0, + i_name_size: name_size, + i_symlink_size: symlink_size, + i_rdev: entry.rdev(), + // TODO: add mtime from entry.ModTime() + i_mtime: 0, + i_mtime_nsec: 0, + }); + inode.set_has_xattr(!xattrs.is_empty()); + + let source = PathBuf::from("/"); + let target = Node::generate_target(&path, &source); + let target_vec = Node::generate_target_vec(&target); + let info = NodeInfo { + explicit_uidgid: self.builder.explicit_uidgid, + src_ino: ino, + src_dev: u64::MAX, + rdev: entry.rdev() as u64, + source, + target, + path: path.to_path_buf(), + target_vec, + symlink, + xattrs, + v6_force_extended_inode: false, + }; + let node = Node::new(inode, info, self.builder.layer_idx); + + self.builder.insert_into_tree(tree, node) + } + + fn sort_and_validate_chunks(chunks: &mut [NodeChunk], size: u64) -> Result<()> { + if chunks.len() > RAFS_MAX_CHUNKS_PER_BLOB as usize { + bail!("stargz: file has two many chunks"); + } + + if chunks.len() > 1 { + chunks.sort_unstable_by_key(|v| v.inner.file_offset()); + for idx in 0..chunks.len() - 2 { + let curr = &chunks[idx].inner; + let pos = curr + .file_offset() + .checked_add(curr.uncompressed_size() as u64); + match pos { + Some(pos) => { + if pos != chunks[idx + 1].inner.file_offset() { + bail!("stargz: unexpected holes between data chunks"); + } + } + None => { + bail!( + "stargz: invalid chunk offset 0x{:x} or size 0x{:x}", + curr.file_offset(), + curr.uncompressed_size() + ) + } + } + } + } + + if !chunks.is_empty() { + let last = &chunks[chunks.len() - 1]; + if last.inner.file_offset() + last.inner.uncompressed_size() as u64 != size { + bail!("stargz: file size and sum of chunk size doesn't match"); + } + } else if size != 0 { + bail!("stargz: file size and sum of chunk size doesn't match"); + } + + Ok(()) + } + + fn fix_chunk_info(&mut self, ctx: &mut BuildContext, blob_mgr: &mut BlobManager) -> Result<()> { + /* + let mut header = BlobMetaHeaderOndisk::default(); + header.set_4k_aligned(true); + header.set_ci_separate(ctx.blob_meta_features & BLOB_META_FEATURE_SEPARATE != 0); + header.set_chunk_info_v2(ctx.blob_meta_features & BLOB_META_FEATURE_CHUNK_INFO_V2 != 0); + header.set_ci_zran(ctx.blob_meta_features & BLOB_META_FEATURE_ZRAN != 0); + blob_ctx.blob_meta_header = header; + */ + + // Ensure that the chunks in the blob meta are sorted by uncompressed_offset and ordered + // by chunk index so that they can be found quickly at runtime with a binary search. + let mut blob_chunks: Vec<&mut NodeChunk> = Vec::with_capacity(10240); + for (_, chunks) in self.file_chunk_map.values_mut() { + for chunk in chunks.iter_mut() { + blob_chunks.push(chunk); + } + } + blob_chunks.sort_unstable_by(|a, b| { + a.inner + .uncompressed_offset() + .cmp(&b.inner.uncompressed_offset()) + }); + if blob_chunks.is_empty() { + return Ok(()); + } + + // Compute compressed_size for chunks. + let (blob_index, blob_ctx) = blob_mgr.get_or_create_current_blob(ctx)?; + let chunk_count = blob_chunks.len(); + let mut compressed_blob_size = 0u64; + for idx in 0..chunk_count { + let curr = blob_chunks[idx].inner.compressed_offset(); + let next = if idx == chunk_count - 1 { + self.blob_size + } else { + blob_chunks[idx + 1].inner.compressed_offset() + }; + if curr >= next { + bail!("stargz: compressed offset is out of order"); + } else if next - curr > RAFS_MAX_CHUNK_SIZE { + bail!("stargz: compressed size is too big"); + } + + let mut chunk = blob_chunks[idx].inner.deref().clone(); + let uncomp_size = chunk.uncompressed_size() as usize; + let max_size = (next - curr) as usize; + let max_gzip_size = compute_compressed_gzip_size(uncomp_size, max_size); + let chunk_index = blob_ctx.alloc_chunk_index()?; + chunk.set_index(chunk_index); + chunk.set_blob_index(blob_index); + chunk.set_compressed_size(max_gzip_size as u32); + blob_ctx.add_chunk_meta_info(&chunk, None)?; + compressed_blob_size = std::cmp::max( + compressed_blob_size, + chunk.compressed_offset() + chunk.compressed_size() as u64, + ); + assert_eq!(Arc::strong_count(&blob_chunks[idx].inner), 1); + blob_chunks[idx].inner = Arc::new(chunk); + } + + blob_ctx.uncompressed_blob_size = self.uncompressed_offset; + blob_ctx.compressed_blob_size = compressed_blob_size; + + Ok(()) + } + + fn fix_nodes(&mut self, bootstrap: &mut Bootstrap) -> Result<()> { + bootstrap + .tree + .walk_bfs(true, &mut |n| { + let mut node = n.lock_node(); + let node_path = node.path(); + if let Some((size, ref mut chunks)) = self.file_chunk_map.get_mut(node_path) { + node.inode.set_size(*size); + node.inode.set_child_count(chunks.len() as u32); + node.chunks = chunks.to_vec(); + } + + Ok(()) + }) + .context("stargz: failed to update chunk info array for nodes")?; + + for (k, v) in self.hardlink_map.iter() { + match bootstrap.tree.get_node(k) { + Some(n) => { + let mut node = n.lock_node(); + let target = v.lock().unwrap(); + node.inode.set_size(target.inode.size()); + node.inode.set_child_count(target.inode.child_count()); + node.chunks = target.chunks.clone(); + node.set_xattr(target.info.xattrs.clone()); + } + None => bail!( + "stargz: failed to get target node for hardlink {}", + k.display() + ), + } + } + + Ok(()) + } +} + +impl Builder for StargzBuilder { + fn build( + &mut self, + ctx: &mut BuildContext, + bootstrap_mgr: &mut BootstrapManager, + blob_mgr: &mut BlobManager, + ) -> Result { + if ctx.fs_version != RafsVersion::V6 { + bail!( + "stargz: unsupported filesystem version {:?}", + ctx.fs_version + ); + } else if ctx.compressor != compress::Algorithm::GZip { + bail!("stargz: invalid compression algorithm {:?}", ctx.compressor); + } else if ctx.digester != digest::Algorithm::Sha256 { + bail!("stargz: invalid digest algorithm {:?}", ctx.digester); + } + let mut blob_writer: Box = if let Some(blob_stor) = ctx.blob_storage.clone() { + Box::new(ArtifactWriter::new(blob_stor)?) + } else { + Box::::default() + }; + let mut bootstrap_ctx = bootstrap_mgr.create_ctx()?; + let layer_idx = u16::from(bootstrap_ctx.layered); + + // Build filesystem tree from the stargz TOC. + let tree = timing_tracer!({ self.build_tree(ctx, layer_idx) }, "build_tree")?; + + // Build bootstrap + let mut bootstrap = timing_tracer!( + { build_bootstrap(ctx, bootstrap_mgr, &mut bootstrap_ctx, blob_mgr, tree) }, + "build_bootstrap" + )?; + + self.fix_chunk_info(ctx, blob_mgr)?; + self.fix_nodes(&mut bootstrap)?; + + // Dump blob file + timing_tracer!( + { Blob::dump(ctx, blob_mgr, blob_writer.as_mut()) }, + "dump_blob" + )?; + + // Dump blob meta information + if let Some((_, blob_ctx)) = blob_mgr.get_current_blob() { + Blob::dump_meta_data(ctx, blob_ctx, blob_writer.as_mut())?; + } + + // Dump RAFS meta/bootstrap and finalize the data blob. + if ctx.blob_inline_meta { + timing_tracer!( + { + dump_bootstrap( + ctx, + bootstrap_mgr, + &mut bootstrap_ctx, + &mut bootstrap, + blob_mgr, + blob_writer.as_mut(), + ) + }, + "dump_bootstrap" + )?; + finalize_blob(ctx, blob_mgr, blob_writer.as_mut())?; + } else { + finalize_blob(ctx, blob_mgr, blob_writer.as_mut())?; + timing_tracer!( + { + dump_bootstrap( + ctx, + bootstrap_mgr, + &mut bootstrap_ctx, + &mut bootstrap, + blob_mgr, + blob_writer.as_mut(), + ) + }, + "dump_bootstrap" + )?; + } + + lazy_drop(bootstrap_ctx); + + BuildOutput::new(blob_mgr, &bootstrap_mgr.bootstrap_storage) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{ArtifactStorage, ConversionType, Features, Prefetch, WhiteoutSpec}; + + #[test] + fn test_build_stargz_toc() { + let tmp_dir = vmm_sys_util::tempdir::TempDir::new().unwrap(); + let mut tmp_dir = tmp_dir.as_path().to_path_buf(); + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let source_path = + PathBuf::from(root_dir).join("../tests/texture/stargz/estargz_sample.json"); + let prefetch = Prefetch::default(); + let mut ctx = BuildContext::new( + "".to_string(), + true, + 0, + compress::Algorithm::GZip, + digest::Algorithm::Sha256, + true, + WhiteoutSpec::Oci, + ConversionType::EStargzIndexToRef, + source_path, + prefetch, + Some(ArtifactStorage::FileDir(tmp_dir.clone())), + false, + Features::new(), + false, + ); + ctx.fs_version = RafsVersion::V6; + ctx.conversion_type = ConversionType::EStargzToRafs; + let mut bootstrap_mgr = + BootstrapManager::new(Some(ArtifactStorage::FileDir(tmp_dir.clone())), None); + let mut blob_mgr = BlobManager::new(digest::Algorithm::Sha256); + let mut builder = StargzBuilder::new(0x1000000, &ctx); + + let builder = builder.build(&mut ctx, &mut bootstrap_mgr, &mut blob_mgr); + assert!(builder.is_ok()); + let builder = builder.unwrap(); + assert_eq!( + builder.blobs, + vec![String::from( + "bd4eff3fe6f5a352457c076d2133583e43db895b4af08d717b3fbcaeca89834e" + )] + ); + assert_eq!(builder.blob_size, Some(4128)); + tmp_dir.push("e60676aef5cc0d5caca9f4c8031f5b0c8392a0611d44c8e1bbc46dbf7fe7bfef"); + assert_eq!( + builder.bootstrap_path.unwrap(), + tmp_dir.to_str().unwrap().to_string() + ) + } + + #[test] + fn test_toc_entry() { + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let source_path = PathBuf::from(root_dir).join("../tests/texture/tar/all-entry-type.tar"); + + let mut entry = TocEntry { + name: source_path, + toc_type: "".to_string(), + size: 0x10, + link_name: PathBuf::from("link_name"), + mode: 0, + uid: 1, + gid: 1, + uname: "user_name".to_string(), + gname: "group_name".to_string(), + dev_major: 255, + dev_minor: 33, + xattrs: Default::default(), + digest: Default::default(), + offset: 0, + chunk_offset: 0, + chunk_size: 0, + chunk_digest: "sha256:".to_owned(), + inner_offset: 0, + }; + entry.chunk_digest.extend(vec!['a'; 64].iter()); + + entry.toc_type = "dir".to_owned(); + assert!(entry.is_dir()); + assert!(entry.is_supported()); + assert_eq!(entry.mode(), libc::S_IFDIR as u32); + assert_eq!(entry.rdev(), u32::MAX); + + entry.toc_type = "req".to_owned(); + assert!(!entry.is_reg()); + entry.toc_type = "reg".to_owned(); + assert!(entry.is_reg()); + assert!(entry.is_supported()); + assert_eq!(entry.mode(), libc::S_IFREG as u32); + assert_eq!(entry.size(), 0x10); + + entry.toc_type = "symlink".to_owned(); + assert!(entry.is_symlink()); + assert!(entry.is_supported()); + assert_eq!(entry.mode(), libc::S_IFLNK as u32); + assert_eq!(entry.symlink_link_path(), Path::new("link_name")); + assert!(entry.normalize().is_ok()); + + entry.toc_type = "hardlink".to_owned(); + assert!(entry.is_supported()); + assert!(entry.is_hardlink()); + assert_eq!(entry.mode(), libc::S_IFREG as u32); + assert_eq!(entry.hardlink_link_path(), Path::new("link_name")); + assert!(entry.normalize().is_ok()); + + entry.toc_type = "chunk".to_owned(); + assert!(entry.is_supported()); + assert!(entry.is_chunk()); + assert_eq!(entry.mode(), 0); + assert_eq!(entry.size(), 0); + assert!(entry.normalize().is_err()); + + entry.toc_type = "block".to_owned(); + assert!(entry.is_special()); + assert!(entry.is_blockdev()); + assert_eq!(entry.mode(), libc::S_IFBLK as u32); + + entry.toc_type = "char".to_owned(); + assert!(entry.is_special()); + assert!(entry.is_chardev()); + assert_eq!(entry.mode(), libc::S_IFCHR as u32); + assert_ne!(entry.size(), 0x10); + + entry.toc_type = "fifo".to_owned(); + assert!(entry.is_fifo()); + assert!(entry.is_special()); + assert_eq!(entry.mode(), libc::S_IFIFO as u32); + assert_eq!(entry.rdev(), 65313); + + assert_eq!(entry.name().unwrap().to_str(), Some("all-entry-type.tar")); + entry.name = PathBuf::from("/"); + assert_eq!(entry.name().unwrap().to_str(), Some("/")); + assert_ne!(entry.path(), Path::new("all-entry-type.tar")); + + assert_eq!(entry.block_id().unwrap().data, [0xaa as u8; 32]); + + entry.name = PathBuf::from(""); + assert!(entry.normalize().is_err()); + } +} diff --git a/builder/src/tarball.rs b/builder/src/tarball.rs index edc996ac553..890e5b6ddd4 100644 --- a/builder/src/tarball.rs +++ b/builder/src/tarball.rs @@ -1,735 +1,735 @@ -// Copyright 2022 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Generate RAFS filesystem from a tarball. -//! -//! It support generating RAFS filesystem from a tar/targz/stargz file with or without data blob. -//! -//! The tarball data is arrange as a sequence of tar headers with associated file data interleaved. -//! - (tar header) (tar header) (file data) (tar header) (file data) (tar header) -//! And to support read tarball data from FIFO, we could only go over the tarball stream once. -//! So the workflow is as: -//! - for each tar header from the stream -//! -- generate RAFS filesystem node from the tar header -//! -- optionally dump file data associated with the tar header into RAFS data blob -//! - arrange all generated RAFS nodes into a RAFS filesystem tree -//! - dump the RAFS filesystem tree into RAFS metadata blob -use std::ffi::{OsStr, OsString}; -use std::fs::{File, OpenOptions}; -use std::io::{BufReader, Read, Seek, SeekFrom}; -use std::os::unix::ffi::OsStrExt; -use std::path::{Path, PathBuf}; -use std::sync::Mutex; - -use anyhow::{anyhow, bail, Context, Result}; -use tar::{Archive, Entry, EntryType, Header}; - -use nydus_api::enosys; -use nydus_rafs::metadata::inode::{InodeWrapper, RafsInodeFlags, RafsV6Inode}; -use nydus_rafs::metadata::layout::v5::RafsV5Inode; -use nydus_rafs::metadata::layout::RafsXAttrs; -use nydus_rafs::metadata::RafsVersion; -use nydus_storage::device::BlobFeatures; -use nydus_storage::meta::ZranContextGenerator; -use nydus_storage::RAFS_MAX_CHUNKS_PER_BLOB; -use nydus_utils::compact::makedev; -use nydus_utils::compress::zlib_random::{ZranReader, ZRAN_READER_BUF_SIZE}; -use nydus_utils::compress::ZlibDecoder; -use nydus_utils::digest::RafsDigest; -use nydus_utils::{div_round_up, lazy_drop, root_tracer, timing_tracer, BufReaderInfo, ByteSize}; - -use crate::core::context::{Artifact, NoopArtifactWriter}; - -use super::core::blob::Blob; -use super::core::context::{ - ArtifactWriter, BlobManager, BootstrapManager, BuildContext, BuildOutput, ConversionType, -}; -use super::core::node::{Node, NodeInfo}; -use super::core::tree::Tree; -use super::{build_bootstrap, dump_bootstrap, finalize_blob, Builder, TarBuilder}; - -enum CompressionType { - None, - Gzip, -} - -enum TarReader { - File(File), - BufReader(BufReader), - BufReaderInfo(BufReaderInfo), - BufReaderInfoSeekable(BufReaderInfo), - TarGzFile(Box>), - TarGzBufReader(Box>>), - ZranReader(ZranReader), -} - -impl Read for TarReader { - fn read(&mut self, buf: &mut [u8]) -> std::io::Result { - match self { - TarReader::File(f) => f.read(buf), - TarReader::BufReader(f) => f.read(buf), - TarReader::BufReaderInfo(b) => b.read(buf), - TarReader::BufReaderInfoSeekable(b) => b.read(buf), - TarReader::TarGzFile(f) => f.read(buf), - TarReader::TarGzBufReader(b) => b.read(buf), - TarReader::ZranReader(f) => f.read(buf), - } - } -} - -impl TarReader { - fn seekable(&self) -> bool { - matches!( - self, - TarReader::File(_) | TarReader::BufReaderInfoSeekable(_) - ) - } -} - -impl Seek for TarReader { - fn seek(&mut self, pos: SeekFrom) -> std::io::Result { - match self { - TarReader::File(f) => f.seek(pos), - TarReader::BufReaderInfoSeekable(b) => b.seek(pos), - _ => Err(enosys!("seek() not supported!")), - } - } -} - -struct TarballTreeBuilder<'a> { - ty: ConversionType, - ctx: &'a mut BuildContext, - blob_mgr: &'a mut BlobManager, - blob_writer: &'a mut dyn Artifact, - buf: Vec, - builder: TarBuilder, -} - -impl<'a> TarballTreeBuilder<'a> { - /// Create a new instance of `TarballBuilder`. - pub fn new( - ty: ConversionType, - ctx: &'a mut BuildContext, - blob_mgr: &'a mut BlobManager, - blob_writer: &'a mut dyn Artifact, - layer_idx: u16, - ) -> Self { - let builder = TarBuilder::new(ctx.explicit_uidgid, layer_idx, ctx.fs_version); - Self { - ty, - ctx, - blob_mgr, - buf: Vec::new(), - blob_writer, - builder, - } - } - - fn build_tree(&mut self) -> Result { - let file = OpenOptions::new() - .read(true) - .open(self.ctx.source_path.clone()) - .context("tarball: can not open source file for conversion")?; - let mut is_file = match file.metadata() { - Ok(md) => md.file_type().is_file(), - Err(_) => false, - }; - - let reader = match self.ty { - ConversionType::EStargzToRef - | ConversionType::TargzToRef - | ConversionType::TarToRef => match Self::detect_compression_algo(file)? { - (CompressionType::Gzip, buf_reader) => { - let generator = ZranContextGenerator::from_buf_reader(buf_reader)?; - let reader = generator.reader(); - self.ctx.blob_zran_generator = Some(Mutex::new(generator)); - self.ctx.blob_features.insert(BlobFeatures::ZRAN); - TarReader::ZranReader(reader) - } - (CompressionType::None, buf_reader) => { - self.ty = ConversionType::TarToRef; - let reader = BufReaderInfo::from_buf_reader(buf_reader); - self.ctx.blob_tar_reader = Some(reader.clone()); - TarReader::BufReaderInfo(reader) - } - }, - ConversionType::EStargzToRafs - | ConversionType::TargzToRafs - | ConversionType::TarToRafs => match Self::detect_compression_algo(file)? { - (CompressionType::Gzip, buf_reader) => { - if is_file { - let mut file = buf_reader.into_inner(); - file.seek(SeekFrom::Start(0))?; - TarReader::TarGzFile(Box::new(ZlibDecoder::new(file))) - } else { - TarReader::TarGzBufReader(Box::new(ZlibDecoder::new(buf_reader))) - } - } - (CompressionType::None, buf_reader) => { - if is_file { - let mut file = buf_reader.into_inner(); - file.seek(SeekFrom::Start(0))?; - TarReader::File(file) - } else { - TarReader::BufReader(buf_reader) - } - } - }, - ConversionType::TarToTarfs => { - let mut reader = BufReaderInfo::from_buf_reader(BufReader::new(file)); - self.ctx.blob_tar_reader = Some(reader.clone()); - if !self.ctx.blob_id.is_empty() { - reader.enable_digest_calculation(false); - } else { - // Disable seek when need to calculate hash value. - is_file = false; - } - // only enable seek when hash computing is disabled. - if is_file { - TarReader::BufReaderInfoSeekable(reader) - } else { - TarReader::BufReaderInfo(reader) - } - } - _ => return Err(anyhow!("tarball: unsupported image conversion type")), - }; - - let is_seekable = reader.seekable(); - let mut tar = Archive::new(reader); - tar.set_ignore_zeros(true); - tar.set_preserve_mtime(true); - tar.set_preserve_permissions(true); - tar.set_unpack_xattrs(true); - - // Prepare scratch buffer for dumping file data. - if self.buf.len() < self.ctx.chunk_size as usize { - self.buf = vec![0u8; self.ctx.chunk_size as usize]; - } - - // Generate the root node in advance, it may be overwritten by entries from the tar stream. - let root = self.builder.create_directory(&[OsString::from("/")])?; - let mut tree = Tree::new(root); - - // Generate RAFS node for each tar entry, and optionally adding missing parents. - let entries = if is_seekable { - tar.entries_with_seek() - .context("tarball: failed to read entries from tar")? - } else { - tar.entries() - .context("tarball: failed to read entries from tar")? - }; - for entry in entries { - let mut entry = entry.context("tarball: failed to read entry from tar")?; - let path = entry - .path() - .context("tarball: failed to to get path from tar entry")?; - let path = PathBuf::from("/").join(path); - let path = path.components().as_path(); - if !self.builder.is_stargz_special_files(path) { - self.parse_entry(&mut tree, &mut entry, path)?; - } - } - - // Update directory size for RAFS V5 after generating the tree. - if self.ctx.fs_version.is_v5() { - Self::set_v5_dir_size(&mut tree); - } - - Ok(tree) - } - - fn parse_entry( - &mut self, - tree: &mut Tree, - entry: &mut Entry, - path: &Path, - ) -> Result<()> { - let header = entry.header(); - let entry_type = header.entry_type(); - if entry_type.is_gnu_longname() { - return Err(anyhow!("tarball: unsupported gnu_longname from tar header")); - } else if entry_type.is_gnu_longlink() { - return Err(anyhow!("tarball: unsupported gnu_longlink from tar header")); - } else if entry_type.is_pax_local_extensions() { - return Err(anyhow!( - "tarball: unsupported pax_local_extensions from tar header" - )); - } else if entry_type.is_pax_global_extensions() { - return Err(anyhow!( - "tarball: unsupported pax_global_extensions from tar header" - )); - } else if entry_type.is_contiguous() { - return Err(anyhow!( - "tarball: unsupported contiguous entry type from tar header" - )); - } else if entry_type.is_gnu_sparse() { - return Err(anyhow!( - "tarball: unsupported gnu sparse file extension from tar header" - )); - } - - let mut file_size = entry.size(); - let name = Self::get_file_name(path)?; - let mode = Self::get_mode(header)?; - let (uid, gid) = Self::get_uid_gid(self.ctx, header)?; - let mtime = header.mtime().unwrap_or_default(); - let mut flags = match self.ctx.fs_version { - RafsVersion::V5 => RafsInodeFlags::default(), - RafsVersion::V6 => RafsInodeFlags::default(), - }; - - // Parse special files - let rdev = if entry_type.is_block_special() - || entry_type.is_character_special() - || entry_type.is_fifo() - { - let major = header - .device_major() - .context("tarball: failed to get device major from tar entry")? - .ok_or_else(|| anyhow!("tarball: failed to get major device from tar entry"))?; - let minor = header - .device_minor() - .context("tarball: failed to get device major from tar entry")? - .ok_or_else(|| anyhow!("tarball: failed to get minor device from tar entry"))?; - makedev(major as u64, minor as u64) as u32 - } else { - u32::MAX - }; - - // Parse symlink - let (symlink, symlink_size) = if entry_type.is_symlink() { - let symlink_link_path = entry - .link_name() - .context("tarball: failed to get target path for tar symlink entry")? - .ok_or_else(|| anyhow!("tarball: failed to get symlink target tor tar entry"))?; - let symlink_size = symlink_link_path.as_os_str().byte_size(); - if symlink_size > u16::MAX as usize { - bail!("tarball: symlink target from tar entry is too big"); - } - file_size = symlink_size as u64; - flags |= RafsInodeFlags::SYMLINK; - ( - Some(symlink_link_path.as_os_str().to_owned()), - symlink_size as u16, - ) - } else { - (None, 0) - }; - - let mut child_count = 0; - if entry_type.is_file() { - child_count = div_round_up(file_size, self.ctx.chunk_size as u64); - if child_count > RAFS_MAX_CHUNKS_PER_BLOB as u64 { - bail!("tarball: file size 0x{:x} is too big", file_size); - } - } - - // Handle hardlink ino - let mut hardlink_target = None; - let ino = if entry_type.is_hard_link() { - let link_path = entry - .link_name() - .context("tarball: failed to get target path for tar symlink entry")? - .ok_or_else(|| anyhow!("tarball: failed to get symlink target tor tar entry"))?; - let link_path = PathBuf::from("/").join(link_path); - let link_path = link_path.components().as_path(); - let targets = Node::generate_target_vec(link_path); - assert!(!targets.is_empty()); - let mut tmp_tree: &Tree = tree; - for name in &targets[1..] { - match tmp_tree.get_child_idx(name.as_bytes()) { - Some(idx) => tmp_tree = &tmp_tree.children[idx], - None => { - bail!( - "tarball: unknown target {} for hardlink {}", - link_path.display(), - path.display() - ); - } - } - } - let mut tmp_node = tmp_tree.lock_node(); - if !tmp_node.is_reg() { - bail!( - "tarball: target {} for hardlink {} is not a regular file", - link_path.display(), - path.display() - ); - } - hardlink_target = Some(tmp_tree); - flags |= RafsInodeFlags::HARDLINK; - tmp_node.inode.set_has_hardlink(true); - tmp_node.inode.ino() - } else { - self.builder.next_ino() - }; - - // Parse xattrs - let mut xattrs = RafsXAttrs::new(); - if let Some(exts) = entry.pax_extensions()? { - for p in exts { - match p { - Ok(pax) => { - let prefix = b"SCHILY.xattr."; - let key = pax.key_bytes(); - if key.starts_with(prefix) { - let x_key = OsStr::from_bytes(&key[prefix.len()..]); - xattrs.add(x_key.to_os_string(), pax.value_bytes().to_vec())?; - } - } - Err(e) => { - return Err(anyhow!( - "tarball: failed to parse PaxExtension from tar header, {}", - e - )) - } - } - } - } - - let mut inode = match self.ctx.fs_version { - RafsVersion::V5 => InodeWrapper::V5(RafsV5Inode { - i_digest: RafsDigest::default(), - i_parent: 0, - i_ino: ino, - i_projid: 0, - i_uid: uid, - i_gid: gid, - i_mode: mode, - i_size: file_size, - i_nlink: 1, - i_blocks: 0, - i_flags: flags, - i_child_index: 0, - i_child_count: child_count as u32, - i_name_size: name.len() as u16, - i_symlink_size: symlink_size, - i_rdev: rdev, - i_mtime: mtime, - i_mtime_nsec: 0, - i_reserved: [0; 8], - }), - RafsVersion::V6 => InodeWrapper::V6(RafsV6Inode { - i_ino: ino, - i_projid: 0, - i_uid: uid, - i_gid: gid, - i_mode: mode, - i_size: file_size, - i_nlink: 1, - i_blocks: 0, - i_flags: flags, - i_child_count: child_count as u32, - i_name_size: name.len() as u16, - i_symlink_size: symlink_size, - i_rdev: rdev, - i_mtime: mtime, - i_mtime_nsec: 0, - }), - }; - inode.set_has_xattr(!xattrs.is_empty()); - - let source = PathBuf::from("/"); - let target = Node::generate_target(path, &source); - let target_vec = Node::generate_target_vec(&target); - let info = NodeInfo { - explicit_uidgid: self.ctx.explicit_uidgid, - src_ino: ino, - src_dev: u64::MAX, - rdev: rdev as u64, - path: path.to_path_buf(), - source, - target, - target_vec, - symlink, - xattrs, - v6_force_extended_inode: false, - }; - let mut node = Node::new(inode, info, self.builder.layer_idx); - - // Special handling of hardlink. - // Tar hardlink header has zero file size and no file data associated, so copy value from - // the associated regular file. - if let Some(t) = hardlink_target { - let n = t.lock_node(); - if n.inode.is_v5() { - node.inode.set_digest(n.inode.digest().to_owned()); - } - node.inode.set_size(n.inode.size()); - node.inode.set_child_count(n.inode.child_count()); - node.chunks = n.chunks.clone(); - node.set_xattr(n.info.xattrs.clone()); - } else { - node.dump_node_data_with_reader( - self.ctx, - self.blob_mgr, - self.blob_writer, - Some(entry), - &mut self.buf, - )?; - } - - // Update inode.i_blocks for RAFS v5. - if self.ctx.fs_version == RafsVersion::V5 && !entry_type.is_dir() { - node.v5_set_inode_blocks(); - } - - self.builder.insert_into_tree(tree, node) - } - - fn get_uid_gid(ctx: &BuildContext, header: &Header) -> Result<(u32, u32)> { - let uid = if ctx.explicit_uidgid { - header.uid().unwrap_or_default() - } else { - 0 - }; - let gid = if ctx.explicit_uidgid { - header.gid().unwrap_or_default() - } else { - 0 - }; - if uid > u32::MAX as u64 || gid > u32::MAX as u64 { - bail!( - "tarball: uid {:x} or gid {:x} from tar entry is out of range", - uid, - gid - ); - } - - Ok((uid as u32, gid as u32)) - } - - fn get_mode(header: &Header) -> Result { - let mode = header - .mode() - .context("tarball: failed to get permission/mode from tar entry")?; - let ty = match header.entry_type() { - EntryType::Regular | EntryType::Link => libc::S_IFREG, - EntryType::Directory => libc::S_IFDIR, - EntryType::Symlink => libc::S_IFLNK, - EntryType::Block => libc::S_IFBLK, - EntryType::Char => libc::S_IFCHR, - EntryType::Fifo => libc::S_IFIFO, - _ => bail!("tarball: unsupported tar entry type"), - }; - Ok((mode & !libc::S_IFMT as u32) | ty as u32) - } - - fn get_file_name(path: &Path) -> Result<&OsStr> { - let name = if path == Path::new("/") { - path.as_os_str() - } else { - path.file_name().ok_or_else(|| { - anyhow!( - "tarball: failed to get file name from tar entry with path {}", - path.display() - ) - })? - }; - if name.len() > u16::MAX as usize { - bail!( - "tarball: file name {} from tar entry is too long", - name.to_str().unwrap_or_default() - ); - } - Ok(name) - } - - fn set_v5_dir_size(tree: &mut Tree) { - for c in &mut tree.children { - Self::set_v5_dir_size(c); - } - let mut node = tree.lock_node(); - node.v5_set_dir_size(RafsVersion::V5, &tree.children); - } - - fn detect_compression_algo(file: File) -> Result<(CompressionType, BufReader)> { - // Use 64K buffer to keep consistence with zlib-random. - let mut buf_reader = BufReader::with_capacity(ZRAN_READER_BUF_SIZE, file); - let mut buf = [0u8; 3]; - buf_reader.read_exact(&mut buf)?; - if buf[0] == 0x1f && buf[1] == 0x8b && buf[2] == 0x08 { - buf_reader.seek_relative(-3).unwrap(); - Ok((CompressionType::Gzip, buf_reader)) - } else { - buf_reader.seek_relative(-3).unwrap(); - Ok((CompressionType::None, buf_reader)) - } - } -} - -/// Builder to create RAFS filesystems from tarballs. -pub struct TarballBuilder { - ty: ConversionType, -} - -impl TarballBuilder { - /// Create a new instance of [TarballBuilder] to build a RAFS filesystem from a tarball. - pub fn new(conversion_type: ConversionType) -> Self { - Self { - ty: conversion_type, - } - } -} - -impl Builder for TarballBuilder { - fn build( - &mut self, - ctx: &mut BuildContext, - bootstrap_mgr: &mut BootstrapManager, - blob_mgr: &mut BlobManager, - ) -> Result { - let mut bootstrap_ctx = bootstrap_mgr.create_ctx()?; - let layer_idx = u16::from(bootstrap_ctx.layered); - let mut blob_writer: Box = match self.ty { - ConversionType::EStargzToRafs - | ConversionType::EStargzToRef - | ConversionType::TargzToRafs - | ConversionType::TargzToRef - | ConversionType::TarToRafs - | ConversionType::TarToTarfs => { - if let Some(blob_stor) = ctx.blob_storage.clone() { - Box::new(ArtifactWriter::new(blob_stor)?) - } else { - Box::::default() - } - } - _ => { - return Err(anyhow!( - "tarball: unsupported image conversion type '{}'", - self.ty - )) - } - }; - - let mut tree_builder = - TarballTreeBuilder::new(self.ty, ctx, blob_mgr, blob_writer.as_mut(), layer_idx); - let tree = timing_tracer!({ tree_builder.build_tree() }, "build_tree")?; - - // Build bootstrap - let mut bootstrap = timing_tracer!( - { build_bootstrap(ctx, bootstrap_mgr, &mut bootstrap_ctx, blob_mgr, tree) }, - "build_bootstrap" - )?; - - // Dump blob file - timing_tracer!( - { Blob::dump(ctx, blob_mgr, blob_writer.as_mut()) }, - "dump_blob" - )?; - - // Dump blob meta information - if let Some((_, blob_ctx)) = blob_mgr.get_current_blob() { - Blob::dump_meta_data(ctx, blob_ctx, blob_writer.as_mut())?; - } - - // Dump RAFS meta/bootstrap and finalize the data blob. - if ctx.blob_inline_meta { - timing_tracer!( - { - dump_bootstrap( - ctx, - bootstrap_mgr, - &mut bootstrap_ctx, - &mut bootstrap, - blob_mgr, - blob_writer.as_mut(), - ) - }, - "dump_bootstrap" - )?; - finalize_blob(ctx, blob_mgr, blob_writer.as_mut())?; - } else { - finalize_blob(ctx, blob_mgr, blob_writer.as_mut())?; - timing_tracer!( - { - dump_bootstrap( - ctx, - bootstrap_mgr, - &mut bootstrap_ctx, - &mut bootstrap, - blob_mgr, - blob_writer.as_mut(), - ) - }, - "dump_bootstrap" - )?; - } - - lazy_drop(bootstrap_ctx); - - BuildOutput::new(blob_mgr, &bootstrap_mgr.bootstrap_storage) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::{ArtifactStorage, Features, Prefetch, WhiteoutSpec}; - use nydus_utils::{compress, digest}; - - #[test] - fn test_build_tarfs() { - let tmp_dir = vmm_sys_util::tempdir::TempDir::new().unwrap(); - let tmp_dir = tmp_dir.as_path().to_path_buf(); - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let source_path = PathBuf::from(root_dir).join("../tests/texture/tar/all-entry-type.tar"); - let prefetch = Prefetch::default(); - let mut ctx = BuildContext::new( - "test".to_string(), - true, - 0, - compress::Algorithm::None, - digest::Algorithm::Sha256, - true, - WhiteoutSpec::Oci, - ConversionType::TarToTarfs, - source_path, - prefetch, - Some(ArtifactStorage::FileDir(tmp_dir.clone())), - false, - Features::new(), - false, - ); - let mut bootstrap_mgr = - BootstrapManager::new(Some(ArtifactStorage::FileDir(tmp_dir)), None); - let mut blob_mgr = BlobManager::new(digest::Algorithm::Sha256); - let mut builder = TarballBuilder::new(ConversionType::TarToTarfs); - builder - .build(&mut ctx, &mut bootstrap_mgr, &mut blob_mgr) - .unwrap(); - } - - #[test] - fn test_build_encrypted_tarfs() { - let tmp_dir = vmm_sys_util::tempdir::TempDir::new().unwrap(); - let tmp_dir = tmp_dir.as_path().to_path_buf(); - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let source_path = PathBuf::from(root_dir).join("../tests/texture/tar/all-entry-type.tar"); - let prefetch = Prefetch::default(); - let mut ctx = BuildContext::new( - "test".to_string(), - true, - 0, - compress::Algorithm::None, - digest::Algorithm::Sha256, - true, - WhiteoutSpec::Oci, - ConversionType::TarToTarfs, - source_path, - prefetch, - Some(ArtifactStorage::FileDir(tmp_dir.clone())), - false, - Features::new(), - true, - ); - let mut bootstrap_mgr = - BootstrapManager::new(Some(ArtifactStorage::FileDir(tmp_dir)), None); - let mut blob_mgr = BlobManager::new(digest::Algorithm::Sha256); - let mut builder = TarballBuilder::new(ConversionType::TarToTarfs); - builder - .build(&mut ctx, &mut bootstrap_mgr, &mut blob_mgr) - .unwrap(); - } -} +// Copyright 2022 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Generate RAFS filesystem from a tarball. +//! +//! It support generating RAFS filesystem from a tar/targz/stargz file with or without data blob. +//! +//! The tarball data is arrange as a sequence of tar headers with associated file data interleaved. +//! - (tar header) (tar header) (file data) (tar header) (file data) (tar header) +//! And to support read tarball data from FIFO, we could only go over the tarball stream once. +//! So the workflow is as: +//! - for each tar header from the stream +//! -- generate RAFS filesystem node from the tar header +//! -- optionally dump file data associated with the tar header into RAFS data blob +//! - arrange all generated RAFS nodes into a RAFS filesystem tree +//! - dump the RAFS filesystem tree into RAFS metadata blob +use std::ffi::{OsStr, OsString}; +use std::fs::{File, OpenOptions}; +use std::io::{BufReader, Read, Seek, SeekFrom}; +use std::os::unix::ffi::OsStrExt; +use std::path::{Path, PathBuf}; +use std::sync::Mutex; + +use anyhow::{anyhow, bail, Context, Result}; +use tar::{Archive, Entry, EntryType, Header}; + +use nydus_api::enosys; +use nydus_rafs::metadata::inode::{InodeWrapper, RafsInodeFlags, RafsV6Inode}; +use nydus_rafs::metadata::layout::v5::RafsV5Inode; +use nydus_rafs::metadata::layout::RafsXAttrs; +use nydus_rafs::metadata::RafsVersion; +use nydus_storage::device::BlobFeatures; +use nydus_storage::meta::ZranContextGenerator; +use nydus_storage::RAFS_MAX_CHUNKS_PER_BLOB; +use nydus_utils::compact::makedev; +use nydus_utils::compress::zlib_random::{ZranReader, ZRAN_READER_BUF_SIZE}; +use nydus_utils::compress::ZlibDecoder; +use nydus_utils::digest::RafsDigest; +use nydus_utils::{div_round_up, lazy_drop, root_tracer, timing_tracer, BufReaderInfo, ByteSize}; + +use crate::core::context::{Artifact, NoopArtifactWriter}; + +use super::core::blob::Blob; +use super::core::context::{ + ArtifactWriter, BlobManager, BootstrapManager, BuildContext, BuildOutput, ConversionType, +}; +use super::core::node::{Node, NodeInfo}; +use super::core::tree::Tree; +use super::{build_bootstrap, dump_bootstrap, finalize_blob, Builder, TarBuilder}; + +enum CompressionType { + None, + Gzip, +} + +enum TarReader { + File(File), + BufReader(BufReader), + BufReaderInfo(BufReaderInfo), + BufReaderInfoSeekable(BufReaderInfo), + TarGzFile(Box>), + TarGzBufReader(Box>>), + ZranReader(ZranReader), +} + +impl Read for TarReader { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + match self { + TarReader::File(f) => f.read(buf), + TarReader::BufReader(f) => f.read(buf), + TarReader::BufReaderInfo(b) => b.read(buf), + TarReader::BufReaderInfoSeekable(b) => b.read(buf), + TarReader::TarGzFile(f) => f.read(buf), + TarReader::TarGzBufReader(b) => b.read(buf), + TarReader::ZranReader(f) => f.read(buf), + } + } +} + +impl TarReader { + fn seekable(&self) -> bool { + matches!( + self, + TarReader::File(_) | TarReader::BufReaderInfoSeekable(_) + ) + } +} + +impl Seek for TarReader { + fn seek(&mut self, pos: SeekFrom) -> std::io::Result { + match self { + TarReader::File(f) => f.seek(pos), + TarReader::BufReaderInfoSeekable(b) => b.seek(pos), + _ => Err(enosys!("seek() not supported!")), + } + } +} + +struct TarballTreeBuilder<'a> { + ty: ConversionType, + ctx: &'a mut BuildContext, + blob_mgr: &'a mut BlobManager, + blob_writer: &'a mut dyn Artifact, + buf: Vec, + builder: TarBuilder, +} + +impl<'a> TarballTreeBuilder<'a> { + /// Create a new instance of `TarballBuilder`. + pub fn new( + ty: ConversionType, + ctx: &'a mut BuildContext, + blob_mgr: &'a mut BlobManager, + blob_writer: &'a mut dyn Artifact, + layer_idx: u16, + ) -> Self { + let builder = TarBuilder::new(ctx.explicit_uidgid, layer_idx, ctx.fs_version); + Self { + ty, + ctx, + blob_mgr, + buf: Vec::new(), + blob_writer, + builder, + } + } + + fn build_tree(&mut self) -> Result { + let file = OpenOptions::new() + .read(true) + .open(self.ctx.source_path.clone()) + .context("tarball: can not open source file for conversion")?; + let mut is_file = match file.metadata() { + Ok(md) => md.file_type().is_file(), + Err(_) => false, + }; + + let reader = match self.ty { + ConversionType::EStargzToRef + | ConversionType::TargzToRef + | ConversionType::TarToRef => match Self::detect_compression_algo(file)? { + (CompressionType::Gzip, buf_reader) => { + let generator = ZranContextGenerator::from_buf_reader(buf_reader)?; + let reader = generator.reader(); + self.ctx.blob_zran_generator = Some(Mutex::new(generator)); + self.ctx.blob_features.insert(BlobFeatures::ZRAN); + TarReader::ZranReader(reader) + } + (CompressionType::None, buf_reader) => { + self.ty = ConversionType::TarToRef; + let reader = BufReaderInfo::from_buf_reader(buf_reader); + self.ctx.blob_tar_reader = Some(reader.clone()); + TarReader::BufReaderInfo(reader) + } + }, + ConversionType::EStargzToRafs + | ConversionType::TargzToRafs + | ConversionType::TarToRafs => match Self::detect_compression_algo(file)? { + (CompressionType::Gzip, buf_reader) => { + if is_file { + let mut file = buf_reader.into_inner(); + file.seek(SeekFrom::Start(0))?; + TarReader::TarGzFile(Box::new(ZlibDecoder::new(file))) + } else { + TarReader::TarGzBufReader(Box::new(ZlibDecoder::new(buf_reader))) + } + } + (CompressionType::None, buf_reader) => { + if is_file { + let mut file = buf_reader.into_inner(); + file.seek(SeekFrom::Start(0))?; + TarReader::File(file) + } else { + TarReader::BufReader(buf_reader) + } + } + }, + ConversionType::TarToTarfs => { + let mut reader = BufReaderInfo::from_buf_reader(BufReader::new(file)); + self.ctx.blob_tar_reader = Some(reader.clone()); + if !self.ctx.blob_id.is_empty() { + reader.enable_digest_calculation(false); + } else { + // Disable seek when need to calculate hash value. + is_file = false; + } + // only enable seek when hash computing is disabled. + if is_file { + TarReader::BufReaderInfoSeekable(reader) + } else { + TarReader::BufReaderInfo(reader) + } + } + _ => return Err(anyhow!("tarball: unsupported image conversion type")), + }; + + let is_seekable = reader.seekable(); + let mut tar = Archive::new(reader); + tar.set_ignore_zeros(true); + tar.set_preserve_mtime(true); + tar.set_preserve_permissions(true); + tar.set_unpack_xattrs(true); + + // Prepare scratch buffer for dumping file data. + if self.buf.len() < self.ctx.chunk_size as usize { + self.buf = vec![0u8; self.ctx.chunk_size as usize]; + } + + // Generate the root node in advance, it may be overwritten by entries from the tar stream. + let root = self.builder.create_directory(&[OsString::from("/")])?; + let mut tree = Tree::new(root); + + // Generate RAFS node for each tar entry, and optionally adding missing parents. + let entries = if is_seekable { + tar.entries_with_seek() + .context("tarball: failed to read entries from tar")? + } else { + tar.entries() + .context("tarball: failed to read entries from tar")? + }; + for entry in entries { + let mut entry = entry.context("tarball: failed to read entry from tar")?; + let path = entry + .path() + .context("tarball: failed to to get path from tar entry")?; + let path = PathBuf::from("/").join(path); + let path = path.components().as_path(); + if !self.builder.is_stargz_special_files(path) { + self.parse_entry(&mut tree, &mut entry, path)?; + } + } + + // Update directory size for RAFS V5 after generating the tree. + if self.ctx.fs_version.is_v5() { + Self::set_v5_dir_size(&mut tree); + } + + Ok(tree) + } + + fn parse_entry( + &mut self, + tree: &mut Tree, + entry: &mut Entry, + path: &Path, + ) -> Result<()> { + let header = entry.header(); + let entry_type = header.entry_type(); + if entry_type.is_gnu_longname() { + return Err(anyhow!("tarball: unsupported gnu_longname from tar header")); + } else if entry_type.is_gnu_longlink() { + return Err(anyhow!("tarball: unsupported gnu_longlink from tar header")); + } else if entry_type.is_pax_local_extensions() { + return Err(anyhow!( + "tarball: unsupported pax_local_extensions from tar header" + )); + } else if entry_type.is_pax_global_extensions() { + return Err(anyhow!( + "tarball: unsupported pax_global_extensions from tar header" + )); + } else if entry_type.is_contiguous() { + return Err(anyhow!( + "tarball: unsupported contiguous entry type from tar header" + )); + } else if entry_type.is_gnu_sparse() { + return Err(anyhow!( + "tarball: unsupported gnu sparse file extension from tar header" + )); + } + + let mut file_size = entry.size(); + let name = Self::get_file_name(path)?; + let mode = Self::get_mode(header)?; + let (uid, gid) = Self::get_uid_gid(self.ctx, header)?; + let mtime = header.mtime().unwrap_or_default(); + let mut flags = match self.ctx.fs_version { + RafsVersion::V5 => RafsInodeFlags::default(), + RafsVersion::V6 => RafsInodeFlags::default(), + }; + + // Parse special files + let rdev = if entry_type.is_block_special() + || entry_type.is_character_special() + || entry_type.is_fifo() + { + let major = header + .device_major() + .context("tarball: failed to get device major from tar entry")? + .ok_or_else(|| anyhow!("tarball: failed to get major device from tar entry"))?; + let minor = header + .device_minor() + .context("tarball: failed to get device major from tar entry")? + .ok_or_else(|| anyhow!("tarball: failed to get minor device from tar entry"))?; + makedev(major as u64, minor as u64) as u32 + } else { + u32::MAX + }; + + // Parse symlink + let (symlink, symlink_size) = if entry_type.is_symlink() { + let symlink_link_path = entry + .link_name() + .context("tarball: failed to get target path for tar symlink entry")? + .ok_or_else(|| anyhow!("tarball: failed to get symlink target tor tar entry"))?; + let symlink_size = symlink_link_path.as_os_str().byte_size(); + if symlink_size > u16::MAX as usize { + bail!("tarball: symlink target from tar entry is too big"); + } + file_size = symlink_size as u64; + flags |= RafsInodeFlags::SYMLINK; + ( + Some(symlink_link_path.as_os_str().to_owned()), + symlink_size as u16, + ) + } else { + (None, 0) + }; + + let mut child_count = 0; + if entry_type.is_file() { + child_count = div_round_up(file_size, self.ctx.chunk_size as u64); + if child_count > RAFS_MAX_CHUNKS_PER_BLOB as u64 { + bail!("tarball: file size 0x{:x} is too big", file_size); + } + } + + // Handle hardlink ino + let mut hardlink_target = None; + let ino = if entry_type.is_hard_link() { + let link_path = entry + .link_name() + .context("tarball: failed to get target path for tar symlink entry")? + .ok_or_else(|| anyhow!("tarball: failed to get symlink target tor tar entry"))?; + let link_path = PathBuf::from("/").join(link_path); + let link_path = link_path.components().as_path(); + let targets = Node::generate_target_vec(link_path); + assert!(!targets.is_empty()); + let mut tmp_tree: &Tree = tree; + for name in &targets[1..] { + match tmp_tree.get_child_idx(name.as_bytes()) { + Some(idx) => tmp_tree = &tmp_tree.children[idx], + None => { + bail!( + "tarball: unknown target {} for hardlink {}", + link_path.display(), + path.display() + ); + } + } + } + let mut tmp_node = tmp_tree.lock_node(); + if !tmp_node.is_reg() { + bail!( + "tarball: target {} for hardlink {} is not a regular file", + link_path.display(), + path.display() + ); + } + hardlink_target = Some(tmp_tree); + flags |= RafsInodeFlags::HARDLINK; + tmp_node.inode.set_has_hardlink(true); + tmp_node.inode.ino() + } else { + self.builder.next_ino() + }; + + // Parse xattrs + let mut xattrs = RafsXAttrs::new(); + if let Some(exts) = entry.pax_extensions()? { + for p in exts { + match p { + Ok(pax) => { + let prefix = b"SCHILY.xattr."; + let key = pax.key_bytes(); + if key.starts_with(prefix) { + let x_key = OsStr::from_bytes(&key[prefix.len()..]); + xattrs.add(x_key.to_os_string(), pax.value_bytes().to_vec())?; + } + } + Err(e) => { + return Err(anyhow!( + "tarball: failed to parse PaxExtension from tar header, {}", + e + )) + } + } + } + } + + let mut inode = match self.ctx.fs_version { + RafsVersion::V5 => InodeWrapper::V5(RafsV5Inode { + i_digest: RafsDigest::default(), + i_parent: 0, + i_ino: ino, + i_projid: 0, + i_uid: uid, + i_gid: gid, + i_mode: mode, + i_size: file_size, + i_nlink: 1, + i_blocks: 0, + i_flags: flags, + i_child_index: 0, + i_child_count: child_count as u32, + i_name_size: name.len() as u16, + i_symlink_size: symlink_size, + i_rdev: rdev, + i_mtime: mtime, + i_mtime_nsec: 0, + i_reserved: [0; 8], + }), + RafsVersion::V6 => InodeWrapper::V6(RafsV6Inode { + i_ino: ino, + i_projid: 0, + i_uid: uid, + i_gid: gid, + i_mode: mode, + i_size: file_size, + i_nlink: 1, + i_blocks: 0, + i_flags: flags, + i_child_count: child_count as u32, + i_name_size: name.len() as u16, + i_symlink_size: symlink_size, + i_rdev: rdev, + i_mtime: mtime, + i_mtime_nsec: 0, + }), + }; + inode.set_has_xattr(!xattrs.is_empty()); + + let source = PathBuf::from("/"); + let target = Node::generate_target(path, &source); + let target_vec = Node::generate_target_vec(&target); + let info = NodeInfo { + explicit_uidgid: self.ctx.explicit_uidgid, + src_ino: ino, + src_dev: u64::MAX, + rdev: rdev as u64, + path: path.to_path_buf(), + source, + target, + target_vec, + symlink, + xattrs, + v6_force_extended_inode: false, + }; + let mut node = Node::new(inode, info, self.builder.layer_idx); + + // Special handling of hardlink. + // Tar hardlink header has zero file size and no file data associated, so copy value from + // the associated regular file. + if let Some(t) = hardlink_target { + let n = t.lock_node(); + if n.inode.is_v5() { + node.inode.set_digest(n.inode.digest().to_owned()); + } + node.inode.set_size(n.inode.size()); + node.inode.set_child_count(n.inode.child_count()); + node.chunks = n.chunks.clone(); + node.set_xattr(n.info.xattrs.clone()); + } else { + node.dump_node_data_with_reader( + self.ctx, + self.blob_mgr, + self.blob_writer, + Some(entry), + &mut self.buf, + )?; + } + + // Update inode.i_blocks for RAFS v5. + if self.ctx.fs_version == RafsVersion::V5 && !entry_type.is_dir() { + node.v5_set_inode_blocks(); + } + + self.builder.insert_into_tree(tree, node) + } + + fn get_uid_gid(ctx: &BuildContext, header: &Header) -> Result<(u32, u32)> { + let uid = if ctx.explicit_uidgid { + header.uid().unwrap_or_default() + } else { + 0 + }; + let gid = if ctx.explicit_uidgid { + header.gid().unwrap_or_default() + } else { + 0 + }; + if uid > u32::MAX as u64 || gid > u32::MAX as u64 { + bail!( + "tarball: uid {:x} or gid {:x} from tar entry is out of range", + uid, + gid + ); + } + + Ok((uid as u32, gid as u32)) + } + + fn get_mode(header: &Header) -> Result { + let mode = header + .mode() + .context("tarball: failed to get permission/mode from tar entry")?; + let ty = match header.entry_type() { + EntryType::Regular | EntryType::Link => libc::S_IFREG, + EntryType::Directory => libc::S_IFDIR, + EntryType::Symlink => libc::S_IFLNK, + EntryType::Block => libc::S_IFBLK, + EntryType::Char => libc::S_IFCHR, + EntryType::Fifo => libc::S_IFIFO, + _ => bail!("tarball: unsupported tar entry type"), + }; + Ok((mode & !libc::S_IFMT as u32) | ty as u32) + } + + fn get_file_name(path: &Path) -> Result<&OsStr> { + let name = if path == Path::new("/") { + path.as_os_str() + } else { + path.file_name().ok_or_else(|| { + anyhow!( + "tarball: failed to get file name from tar entry with path {}", + path.display() + ) + })? + }; + if name.len() > u16::MAX as usize { + bail!( + "tarball: file name {} from tar entry is too long", + name.to_str().unwrap_or_default() + ); + } + Ok(name) + } + + fn set_v5_dir_size(tree: &mut Tree) { + for c in &mut tree.children { + Self::set_v5_dir_size(c); + } + let mut node = tree.lock_node(); + node.v5_set_dir_size(RafsVersion::V5, &tree.children); + } + + fn detect_compression_algo(file: File) -> Result<(CompressionType, BufReader)> { + // Use 64K buffer to keep consistence with zlib-random. + let mut buf_reader = BufReader::with_capacity(ZRAN_READER_BUF_SIZE, file); + let mut buf = [0u8; 3]; + buf_reader.read_exact(&mut buf)?; + if buf[0] == 0x1f && buf[1] == 0x8b && buf[2] == 0x08 { + buf_reader.seek_relative(-3).unwrap(); + Ok((CompressionType::Gzip, buf_reader)) + } else { + buf_reader.seek_relative(-3).unwrap(); + Ok((CompressionType::None, buf_reader)) + } + } +} + +/// Builder to create RAFS filesystems from tarballs. +pub struct TarballBuilder { + ty: ConversionType, +} + +impl TarballBuilder { + /// Create a new instance of [TarballBuilder] to build a RAFS filesystem from a tarball. + pub fn new(conversion_type: ConversionType) -> Self { + Self { + ty: conversion_type, + } + } +} + +impl Builder for TarballBuilder { + fn build( + &mut self, + ctx: &mut BuildContext, + bootstrap_mgr: &mut BootstrapManager, + blob_mgr: &mut BlobManager, + ) -> Result { + let mut bootstrap_ctx = bootstrap_mgr.create_ctx()?; + let layer_idx = u16::from(bootstrap_ctx.layered); + let mut blob_writer: Box = match self.ty { + ConversionType::EStargzToRafs + | ConversionType::EStargzToRef + | ConversionType::TargzToRafs + | ConversionType::TargzToRef + | ConversionType::TarToRafs + | ConversionType::TarToTarfs => { + if let Some(blob_stor) = ctx.blob_storage.clone() { + Box::new(ArtifactWriter::new(blob_stor)?) + } else { + Box::::default() + } + } + _ => { + return Err(anyhow!( + "tarball: unsupported image conversion type '{}'", + self.ty + )) + } + }; + + let mut tree_builder = + TarballTreeBuilder::new(self.ty, ctx, blob_mgr, blob_writer.as_mut(), layer_idx); + let tree = timing_tracer!({ tree_builder.build_tree() }, "build_tree")?; + + // Build bootstrap + let mut bootstrap = timing_tracer!( + { build_bootstrap(ctx, bootstrap_mgr, &mut bootstrap_ctx, blob_mgr, tree) }, + "build_bootstrap" + )?; + + // Dump blob file + timing_tracer!( + { Blob::dump(ctx, blob_mgr, blob_writer.as_mut()) }, + "dump_blob" + )?; + + // Dump blob meta information + if let Some((_, blob_ctx)) = blob_mgr.get_current_blob() { + Blob::dump_meta_data(ctx, blob_ctx, blob_writer.as_mut())?; + } + + // Dump RAFS meta/bootstrap and finalize the data blob. + if ctx.blob_inline_meta { + timing_tracer!( + { + dump_bootstrap( + ctx, + bootstrap_mgr, + &mut bootstrap_ctx, + &mut bootstrap, + blob_mgr, + blob_writer.as_mut(), + ) + }, + "dump_bootstrap" + )?; + finalize_blob(ctx, blob_mgr, blob_writer.as_mut())?; + } else { + finalize_blob(ctx, blob_mgr, blob_writer.as_mut())?; + timing_tracer!( + { + dump_bootstrap( + ctx, + bootstrap_mgr, + &mut bootstrap_ctx, + &mut bootstrap, + blob_mgr, + blob_writer.as_mut(), + ) + }, + "dump_bootstrap" + )?; + } + + lazy_drop(bootstrap_ctx); + + BuildOutput::new(blob_mgr, &bootstrap_mgr.bootstrap_storage) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{ArtifactStorage, Features, Prefetch, WhiteoutSpec}; + use nydus_utils::{compress, digest}; + + #[test] + fn test_build_tarfs() { + let tmp_dir = vmm_sys_util::tempdir::TempDir::new().unwrap(); + let tmp_dir = tmp_dir.as_path().to_path_buf(); + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let source_path = PathBuf::from(root_dir).join("../tests/texture/tar/all-entry-type.tar"); + let prefetch = Prefetch::default(); + let mut ctx = BuildContext::new( + "test".to_string(), + true, + 0, + compress::Algorithm::None, + digest::Algorithm::Sha256, + true, + WhiteoutSpec::Oci, + ConversionType::TarToTarfs, + source_path, + prefetch, + Some(ArtifactStorage::FileDir(tmp_dir.clone())), + false, + Features::new(), + false, + ); + let mut bootstrap_mgr = + BootstrapManager::new(Some(ArtifactStorage::FileDir(tmp_dir)), None); + let mut blob_mgr = BlobManager::new(digest::Algorithm::Sha256); + let mut builder = TarballBuilder::new(ConversionType::TarToTarfs); + builder + .build(&mut ctx, &mut bootstrap_mgr, &mut blob_mgr) + .unwrap(); + } + + #[test] + fn test_build_encrypted_tarfs() { + let tmp_dir = vmm_sys_util::tempdir::TempDir::new().unwrap(); + let tmp_dir = tmp_dir.as_path().to_path_buf(); + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let source_path = PathBuf::from(root_dir).join("../tests/texture/tar/all-entry-type.tar"); + let prefetch = Prefetch::default(); + let mut ctx = BuildContext::new( + "test".to_string(), + true, + 0, + compress::Algorithm::None, + digest::Algorithm::Sha256, + true, + WhiteoutSpec::Oci, + ConversionType::TarToTarfs, + source_path, + prefetch, + Some(ArtifactStorage::FileDir(tmp_dir.clone())), + false, + Features::new(), + true, + ); + let mut bootstrap_mgr = + BootstrapManager::new(Some(ArtifactStorage::FileDir(tmp_dir)), None); + let mut blob_mgr = BlobManager::new(digest::Algorithm::Sha256); + let mut builder = TarballBuilder::new(ConversionType::TarToTarfs); + builder + .build(&mut ctx, &mut bootstrap_mgr, &mut blob_mgr) + .unwrap(); + } +} diff --git a/clib/Cargo.toml b/clib/Cargo.toml index 7f076036e13..0fb40dd7241 100644 --- a/clib/Cargo.toml +++ b/clib/Cargo.toml @@ -1,28 +1,28 @@ -[package] -name = "nydus-clib" -version = "0.1.0" -description = "C wrapper library for Nydus SDK" -authors = ["The Nydus Developers"] -license = "Apache-2.0" -homepage = "https://nydus.dev/" -repository = "https://github.com/dragonflyoss/nydus" -edition = "2021" - -[lib] -name = "nydus_clib" -crate-type = ["cdylib", "staticlib"] - -[dependencies] -libc = "0.2.137" -log = "0.4.17" -fuse-backend-rs = "^0.12.0" -nydus-api = { version = "0.3", path = "../api" } -nydus-rafs = { version = "0.3.1", path = "../rafs" } -nydus-storage = { version = "0.6.3", path = "../storage" } - -[features] -baekend-s3 = ["nydus-storage/backend-s3"] -backend-oss = ["nydus-storage/backend-oss"] -backend-registry = ["nydus-storage/backend-registry"] -backend-http-proxy = ["nydus-storage/backend-http-proxy"] -backend-localdisk = ["nydus-storage/backend-localdisk"] +[package] +name = "nydus-clib" +version = "0.1.0" +description = "C wrapper library for Nydus SDK" +authors = ["The Nydus Developers"] +license = "Apache-2.0" +homepage = "https://nydus.dev/" +repository = "https://github.com/dragonflyoss/nydus" +edition = "2021" + +[lib] +name = "nydus_clib" +crate-type = ["cdylib", "staticlib"] + +[dependencies] +libc = "0.2.137" +log = "0.4.17" +fuse-backend-rs = "^0.12.0" +nydus-api = { version = "0.3", path = "../api" } +nydus-rafs = { version = "0.3.1", path = "../rafs" } +nydus-storage = { version = "0.6.3", path = "../storage" } + +[features] +baekend-s3 = ["nydus-storage/backend-s3"] +backend-oss = ["nydus-storage/backend-oss"] +backend-registry = ["nydus-storage/backend-registry"] +backend-http-proxy = ["nydus-storage/backend-http-proxy"] +backend-localdisk = ["nydus-storage/backend-localdisk"] diff --git a/clib/examples/nydus_rafs.c b/clib/examples/nydus_rafs.c index d07d373a2ee..a9aba30c930 100644 --- a/clib/examples/nydus_rafs.c +++ b/clib/examples/nydus_rafs.c @@ -1,20 +1,20 @@ -#include -#include "../nydus.h" - -int main(int argc, char **argv) -{ - char *bootstrap = "../../tests/texture/repeatable/sha256-nocompress-repeatable"; - char *config = "version = 2\nid = \"my_id\"\n[backend]\ntype = \"localfs\"\n[backend.localfs]\ndir = \"../../tests/texture/repeatable/blobs\"\n[cache]\ntype = \"dummycache\"\n[rafs]"; - NydusFsHandle fs_handle; - - fs_handle = nydus_open_rafs(bootstrap, config); - if (fs_handle == NYDUS_INVALID_FS_HANDLE) { - printf("failed to open rafs filesystem from ../../tests/texture/repeatable/sha256-nocompress-repeatable\n"); - return -1; - } - - printf("succeed to open rafs filesystem from ../../tests/texture/repeatable/sha256-nocompress-repeatable\n"); - nydus_close_rafs(fs_handle); - - return 0; -} +#include +#include "../nydus.h" + +int main(int argc, char **argv) +{ + char *bootstrap = "../../tests/texture/repeatable/sha256-nocompress-repeatable"; + char *config = "version = 2\nid = \"my_id\"\n[backend]\ntype = \"localfs\"\n[backend.localfs]\ndir = \"../../tests/texture/repeatable/blobs\"\n[cache]\ntype = \"dummycache\"\n[rafs]"; + NydusFsHandle fs_handle; + + fs_handle = nydus_open_rafs(bootstrap, config); + if (fs_handle == NYDUS_INVALID_FS_HANDLE) { + printf("failed to open rafs filesystem from ../../tests/texture/repeatable/sha256-nocompress-repeatable\n"); + return -1; + } + + printf("succeed to open rafs filesystem from ../../tests/texture/repeatable/sha256-nocompress-repeatable\n"); + nydus_close_rafs(fs_handle); + + return 0; +} diff --git a/clib/include/nydus.h b/clib/include/nydus.h index 81a6c03cf50..d21ce792390 100644 --- a/clib/include/nydus.h +++ b/clib/include/nydus.h @@ -1,70 +1,70 @@ -#include -#include -#include -#include - -/** - * Magic number for Nydus file handle. - */ -#define NYDUS_FILE_HANDLE_MAGIC 17148644263605784967ull - -/** - * Value representing an invalid Nydus file handle. - */ -#define NYDUS_INVALID_FILE_HANDLE 0 - -/** - * Magic number for Nydus filesystem handle. - */ -#define NYDUS_FS_HANDLE_MAGIC 17148643159786606983ull - -/** - * Value representing an invalid Nydus filesystem handle. - */ -#define NYDUS_INVALID_FS_HANDLE 0 - -/** - * Handle representing a Nydus file object. - */ -typedef uintptr_t NydusFileHandle; - -/** - * Handle representing a Nydus filesystem object. - */ -typedef uintptr_t NydusFsHandle; - -/** - * Open the file with `path` in readonly mode. - * - * The `NydusFileHandle` returned should be freed by calling `nydus_close()`. - */ -NydusFileHandle nydus_fopen(NydusFsHandle fs_handle, const char *path); - -/** - * Close the file handle returned by `nydus_fopen()`. - */ -void nydus_fclose(NydusFileHandle handle); - -/** - * Open a RAFS filesystem and return a handle to the filesystem object. - * - * The returned filesystem handle should be freed by calling `nydus_close_rafs()`, otherwise - * it will cause memory leak. - */ -NydusFsHandle nydus_open_rafs(const char *bootstrap, const char *config); - -/** - * Open a RAFS filesystem with default configuration and return a handle to the filesystem object. - * - * The returned filesystem handle should be freed by calling `nydus_close_rafs()`, otherwise - * it will cause memory leak. - */ -NydusFsHandle nydus_open_rafs_default(const char *bootstrap, const char *dir_path); - -/** - * Close the RAFS filesystem returned by `nydus_open_rafs()` and friends. - * - * All `NydusFileHandle` objects created from the `NydusFsHandle` should be freed before calling - * `nydus_close_rafs()`, otherwise it may cause panic. - */ -void nydus_close_rafs(NydusFsHandle handle); +#include +#include +#include +#include + +/** + * Magic number for Nydus file handle. + */ +#define NYDUS_FILE_HANDLE_MAGIC 17148644263605784967ull + +/** + * Value representing an invalid Nydus file handle. + */ +#define NYDUS_INVALID_FILE_HANDLE 0 + +/** + * Magic number for Nydus filesystem handle. + */ +#define NYDUS_FS_HANDLE_MAGIC 17148643159786606983ull + +/** + * Value representing an invalid Nydus filesystem handle. + */ +#define NYDUS_INVALID_FS_HANDLE 0 + +/** + * Handle representing a Nydus file object. + */ +typedef uintptr_t NydusFileHandle; + +/** + * Handle representing a Nydus filesystem object. + */ +typedef uintptr_t NydusFsHandle; + +/** + * Open the file with `path` in readonly mode. + * + * The `NydusFileHandle` returned should be freed by calling `nydus_close()`. + */ +NydusFileHandle nydus_fopen(NydusFsHandle fs_handle, const char *path); + +/** + * Close the file handle returned by `nydus_fopen()`. + */ +void nydus_fclose(NydusFileHandle handle); + +/** + * Open a RAFS filesystem and return a handle to the filesystem object. + * + * The returned filesystem handle should be freed by calling `nydus_close_rafs()`, otherwise + * it will cause memory leak. + */ +NydusFsHandle nydus_open_rafs(const char *bootstrap, const char *config); + +/** + * Open a RAFS filesystem with default configuration and return a handle to the filesystem object. + * + * The returned filesystem handle should be freed by calling `nydus_close_rafs()`, otherwise + * it will cause memory leak. + */ +NydusFsHandle nydus_open_rafs_default(const char *bootstrap, const char *dir_path); + +/** + * Close the RAFS filesystem returned by `nydus_open_rafs()` and friends. + * + * All `NydusFileHandle` objects created from the `NydusFsHandle` should be freed before calling + * `nydus_close_rafs()`, otherwise it may cause panic. + */ +void nydus_close_rafs(NydusFsHandle handle); diff --git a/clib/src/file.rs b/clib/src/file.rs index 1d4a093acb5..463f669c9d9 100644 --- a/clib/src/file.rs +++ b/clib/src/file.rs @@ -1,90 +1,90 @@ -// Copyright (C) 2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Implement file operations for RAFS filesystem in userspace. -//! -//! Provide following file operation functions to access files in a RAFS filesystem: -//! - fopen: -//! - fclose: -//! - fread: -//! - fwrite: -//! - fseek: -//! - ftell - -use std::os::raw::c_char; -use std::ptr::null_mut; - -use fuse_backend_rs::api::filesystem::{Context, FileSystem}; - -use crate::{set_errno, FileSystemState, Inode, NydusFsHandle}; - -/// Magic number for Nydus file handle. -pub const NYDUS_FILE_HANDLE_MAGIC: u64 = 0xedfc_3919_afc3_5187; -/// Value representing an invalid Nydus file handle. -pub const NYDUS_INVALID_FILE_HANDLE: usize = 0; - -/// Handle representing a Nydus file object. -pub type NydusFileHandle = usize; - -#[repr(C)] -pub(crate) struct FileState { - magic: u64, - ino: Inode, - pos: u64, - fs_handle: NydusFsHandle, -} - -/// Open the file with `path` in readonly mode. -/// -/// The `NydusFileHandle` returned should be freed by calling `nydus_close()`. -/// -/// # Safety -/// Caller needs to ensure `fs_handle` and `path` are valid, otherwise it may cause memory access -/// violation. -#[no_mangle] -pub unsafe extern "C" fn nydus_fopen( - fs_handle: NydusFsHandle, - path: *const c_char, -) -> NydusFileHandle { - if path.is_null() { - set_errno(libc::EINVAL); - return null_mut::() as NydusFileHandle; - } - let fs = match FileSystemState::try_from_handle(fs_handle) { - Err(e) => { - set_errno(e); - return null_mut::() as NydusFileHandle; - } - Ok(v) => v, - }; - - //////////////////////////////////////////////////////////// - // TODO: open file; - ////////////////////////////////////////////////////////////////////////// - - let file = Box::new(FileState { - magic: NYDUS_FILE_HANDLE_MAGIC, - ino: fs.root_ino, - pos: 0, - fs_handle, - }); - - Box::into_raw(file) as NydusFileHandle -} - -/// Close the file handle returned by `nydus_fopen()`. -/// -/// # Safety -/// Caller needs to ensure `fs_handle` is valid, otherwise it may cause memory access violation. -#[no_mangle] -pub unsafe extern "C" fn nydus_fclose(handle: NydusFileHandle) { - let mut file = Box::from_raw(handle as *mut FileState); - assert_eq!(file.magic, NYDUS_FILE_HANDLE_MAGIC); - - let ctx = Context::default(); - let fs = FileSystemState::from_handle(file.fs_handle); - fs.rafs.forget(&ctx, file.ino, 1); - - file.magic -= 0x4fdf_ae34_9d9a_03cd; -} +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Implement file operations for RAFS filesystem in userspace. +//! +//! Provide following file operation functions to access files in a RAFS filesystem: +//! - fopen: +//! - fclose: +//! - fread: +//! - fwrite: +//! - fseek: +//! - ftell + +use std::os::raw::c_char; +use std::ptr::null_mut; + +use fuse_backend_rs::api::filesystem::{Context, FileSystem}; + +use crate::{set_errno, FileSystemState, Inode, NydusFsHandle}; + +/// Magic number for Nydus file handle. +pub const NYDUS_FILE_HANDLE_MAGIC: u64 = 0xedfc_3919_afc3_5187; +/// Value representing an invalid Nydus file handle. +pub const NYDUS_INVALID_FILE_HANDLE: usize = 0; + +/// Handle representing a Nydus file object. +pub type NydusFileHandle = usize; + +#[repr(C)] +pub(crate) struct FileState { + magic: u64, + ino: Inode, + pos: u64, + fs_handle: NydusFsHandle, +} + +/// Open the file with `path` in readonly mode. +/// +/// The `NydusFileHandle` returned should be freed by calling `nydus_close()`. +/// +/// # Safety +/// Caller needs to ensure `fs_handle` and `path` are valid, otherwise it may cause memory access +/// violation. +#[no_mangle] +pub unsafe extern "C" fn nydus_fopen( + fs_handle: NydusFsHandle, + path: *const c_char, +) -> NydusFileHandle { + if path.is_null() { + set_errno(libc::EINVAL); + return null_mut::() as NydusFileHandle; + } + let fs = match FileSystemState::try_from_handle(fs_handle) { + Err(e) => { + set_errno(e); + return null_mut::() as NydusFileHandle; + } + Ok(v) => v, + }; + + //////////////////////////////////////////////////////////// + // TODO: open file; + ////////////////////////////////////////////////////////////////////////// + + let file = Box::new(FileState { + magic: NYDUS_FILE_HANDLE_MAGIC, + ino: fs.root_ino, + pos: 0, + fs_handle, + }); + + Box::into_raw(file) as NydusFileHandle +} + +/// Close the file handle returned by `nydus_fopen()`. +/// +/// # Safety +/// Caller needs to ensure `fs_handle` is valid, otherwise it may cause memory access violation. +#[no_mangle] +pub unsafe extern "C" fn nydus_fclose(handle: NydusFileHandle) { + let mut file = Box::from_raw(handle as *mut FileState); + assert_eq!(file.magic, NYDUS_FILE_HANDLE_MAGIC); + + let ctx = Context::default(); + let fs = FileSystemState::from_handle(file.fs_handle); + fs.rafs.forget(&ctx, file.ino, 1); + + file.magic -= 0x4fdf_ae34_9d9a_03cd; +} diff --git a/clib/src/fs.rs b/clib/src/fs.rs index 10589b67678..dc51ef2576e 100644 --- a/clib/src/fs.rs +++ b/clib/src/fs.rs @@ -1,251 +1,251 @@ -// Copyright (C) 2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Provide structures and functions to open/close/access a filesystem instance. - -use std::ffi::CStr; -use std::os::raw::c_char; -use std::path::Path; -use std::ptr::{null, null_mut}; -use std::str::FromStr; -use std::sync::Arc; - -use nydus_api::ConfigV2; -use nydus_rafs::fs::Rafs; - -use crate::{cstr_to_str, set_errno, Inode}; - -/// Magic number for Nydus filesystem handle. -pub const NYDUS_FS_HANDLE_MAGIC: u64 = 0xedfc_3818_af03_5187; -/// Value representing an invalid Nydus filesystem handle. -pub const NYDUS_INVALID_FS_HANDLE: usize = 0; - -/// Handle representing a Nydus filesystem object. -pub type NydusFsHandle = usize; - -#[repr(C)] -pub(crate) struct FileSystemState { - magic: u64, - pub(crate) root_ino: Inode, - pub(crate) rafs: Rafs, -} - -impl FileSystemState { - /// Caller needs to ensure the lifetime of returned reference. - pub(crate) unsafe fn from_handle(hdl: NydusFsHandle) -> &'static mut Self { - let fs = &mut *(hdl as *const FileSystemState as *mut FileSystemState); - assert_eq!(fs.magic, NYDUS_FS_HANDLE_MAGIC); - fs - } - - /// Caller needs to ensure the lifetime of returned reference. - pub(crate) unsafe fn try_from_handle(hdl: NydusFsHandle) -> Result<&'static mut Self, i32> { - if hdl == null::() as usize { - return Err(libc::EINVAL); - } - let fs = &mut *(hdl as *const FileSystemState as *mut FileSystemState); - assert_eq!(fs.magic, NYDUS_FS_HANDLE_MAGIC); - Ok(fs) - } -} - -fn fs_error_einval() -> NydusFsHandle { - set_errno(libc::EINVAL); - null_mut::() as NydusFsHandle -} - -fn default_localfs_rafs_config(dir: &str) -> String { - format!( - r#" - version = 2 - id = "my_id" - [backend] - type = "localfs" - [backend.localfs] - dir = "{}" - [cache] - type = "dummycache" - [rafs] - "#, - dir - ) -} - -fn do_nydus_open_rafs(bootstrap: &str, config: &str) -> NydusFsHandle { - let cfg = match ConfigV2::from_str(config) { - Ok(v) => v, - Err(e) => { - warn!("failed to parse configuration info: {}", e); - return fs_error_einval(); - } - }; - let cfg = Arc::new(cfg); - let (mut rafs, reader) = match Rafs::new(&cfg, &cfg.id, Path::new(bootstrap)) { - Err(e) => { - warn!( - "failed to open filesystem from bootstrap {}, {}", - bootstrap, e - ); - return fs_error_einval(); - } - Ok(v) => v, - }; - if let Err(e) = rafs.import(reader, None) { - warn!("failed to import RAFS filesystem, {}", e); - return fs_error_einval(); - } - - let root_ino = rafs.metadata().root_inode; - let fs = Box::new(FileSystemState { - magic: NYDUS_FS_HANDLE_MAGIC, - root_ino, - rafs, - }); - Box::into_raw(fs) as NydusFsHandle -} - -/// Open a RAFS filesystem and return a handle to the filesystem object. -/// -/// The returned filesystem handle should be freed by calling `nydus_close_rafs()`, otherwise -/// it will cause memory leak. -/// -/// # Safety -/// Caller needs to ensure `bootstrap` and `config` are valid, otherwise it may cause memory access -/// violation. -#[no_mangle] -pub unsafe extern "C" fn nydus_open_rafs( - bootstrap: *const c_char, - config: *const c_char, -) -> NydusFsHandle { - if bootstrap.is_null() || config.is_null() { - return fs_error_einval(); - } - let bootstrap = cstr_to_str!(bootstrap, null_mut::() as NydusFsHandle); - let config = cstr_to_str!(config, null_mut::() as NydusFsHandle); - - do_nydus_open_rafs(bootstrap, config) -} - -/// Open a RAFS filesystem with default configuration and return a handle to the filesystem object. -/// -/// The returned filesystem handle should be freed by calling `nydus_close_rafs()`, otherwise -/// it will cause memory leak. -/// -/// # Safety -/// Caller needs to ensure `bootstrap` and `dir_path` are valid, otherwise it may cause memory -/// access violation. -#[no_mangle] -pub unsafe extern "C" fn nydus_open_rafs_default( - bootstrap: *const c_char, - dir_path: *const c_char, -) -> NydusFsHandle { - if bootstrap.is_null() || dir_path.is_null() { - return fs_error_einval(); - } - let bootstrap = cstr_to_str!(bootstrap, null_mut::() as NydusFsHandle); - let dir_path = cstr_to_str!(dir_path, null_mut::() as NydusFsHandle); - - let p_tmp; - let mut path = Path::new(bootstrap); - if path.parent().is_none() { - p_tmp = Path::new(dir_path).join(bootstrap); - path = &p_tmp - } - let bootstrap = match path.to_str() { - Some(v) => v, - None => { - warn!("invalid bootstrap path '{}'", bootstrap); - return fs_error_einval(); - } - }; - let config = default_localfs_rafs_config(dir_path); - - do_nydus_open_rafs(bootstrap, &config) -} - -/// Close the RAFS filesystem returned by `nydus_open_rafs()` and friends. -/// -/// All `NydusFileHandle` objects created from the `NydusFsHandle` should be freed before calling -/// `nydus_close_rafs()`, otherwise it may cause panic. -/// -/// # Safety -/// Caller needs to ensure `handle` is valid, otherwise it may cause memory access violation. -#[no_mangle] -pub unsafe extern "C" fn nydus_close_rafs(handle: NydusFsHandle) { - let mut fs = Box::from_raw(handle as *mut FileSystemState); - assert_eq!(fs.magic, NYDUS_FS_HANDLE_MAGIC); - fs.magic -= 0x4fdf_03cd_ae34_9d9a; - fs.rafs.destroy().unwrap(); -} - -#[cfg(test)] -mod tests { - use super::*; - use std::ffi::CString; - use std::io::Error; - use std::path::PathBuf; - use std::ptr::null; - - pub(crate) fn open_file_system() -> NydusFsHandle { - let ret = unsafe { nydus_open_rafs(null(), null()) }; - assert_eq!(ret, NYDUS_INVALID_FS_HANDLE); - assert_eq!( - Error::raw_os_error(&Error::last_os_error()), - Some(libc::EINVAL) - ); - - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let bootstrap = PathBuf::from(root_dir) - .join("../tests/texture/repeatable/sha256-nocompress-repeatable"); - let bootstrap = bootstrap.to_str().unwrap(); - let bootstrap = CString::new(bootstrap).unwrap(); - let blob_dir = PathBuf::from(root_dir).join("../tests/texture/repeatable/blobs"); - - let config = format!( - r#" - version = 2 - id = "my_id" - [backend] - type = "localfs" - [backend.localfs] - dir = "{}" - [cache] - type = "dummycache" - [rafs] - "#, - blob_dir.display() - ); - let config = CString::new(config).unwrap(); - let fs = unsafe { - nydus_open_rafs( - bootstrap.as_ptr() as *const c_char, - config.as_ptr() as *const c_char, - ) - }; - assert_ne!(fs, NYDUS_INVALID_FS_HANDLE); - - fs - } - - #[test] - fn test_open_rafs() { - let fs = open_file_system(); - unsafe { nydus_close_rafs(fs) }; - } - - #[test] - fn test_open_rafs_default() { - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let bootstrap = PathBuf::from(root_dir) - .join("../tests/texture/repeatable/sha256-nocompress-repeatable"); - let bootstrap = bootstrap.to_str().unwrap(); - let bootstrap = CString::new(bootstrap).unwrap(); - let blob_dir = PathBuf::from(root_dir).join("../tests/texture/repeatable/blobs"); - let blob_dir = blob_dir.to_str().unwrap(); - let fs = unsafe { - nydus_open_rafs_default(bootstrap.as_ptr(), blob_dir.as_ptr() as *const c_char) - }; - unsafe { nydus_close_rafs(fs) }; - } -} +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Provide structures and functions to open/close/access a filesystem instance. + +use std::ffi::CStr; +use std::os::raw::c_char; +use std::path::Path; +use std::ptr::{null, null_mut}; +use std::str::FromStr; +use std::sync::Arc; + +use nydus_api::ConfigV2; +use nydus_rafs::fs::Rafs; + +use crate::{cstr_to_str, set_errno, Inode}; + +/// Magic number for Nydus filesystem handle. +pub const NYDUS_FS_HANDLE_MAGIC: u64 = 0xedfc_3818_af03_5187; +/// Value representing an invalid Nydus filesystem handle. +pub const NYDUS_INVALID_FS_HANDLE: usize = 0; + +/// Handle representing a Nydus filesystem object. +pub type NydusFsHandle = usize; + +#[repr(C)] +pub(crate) struct FileSystemState { + magic: u64, + pub(crate) root_ino: Inode, + pub(crate) rafs: Rafs, +} + +impl FileSystemState { + /// Caller needs to ensure the lifetime of returned reference. + pub(crate) unsafe fn from_handle(hdl: NydusFsHandle) -> &'static mut Self { + let fs = &mut *(hdl as *const FileSystemState as *mut FileSystemState); + assert_eq!(fs.magic, NYDUS_FS_HANDLE_MAGIC); + fs + } + + /// Caller needs to ensure the lifetime of returned reference. + pub(crate) unsafe fn try_from_handle(hdl: NydusFsHandle) -> Result<&'static mut Self, i32> { + if hdl == null::() as usize { + return Err(libc::EINVAL); + } + let fs = &mut *(hdl as *const FileSystemState as *mut FileSystemState); + assert_eq!(fs.magic, NYDUS_FS_HANDLE_MAGIC); + Ok(fs) + } +} + +fn fs_error_einval() -> NydusFsHandle { + set_errno(libc::EINVAL); + null_mut::() as NydusFsHandle +} + +fn default_localfs_rafs_config(dir: &str) -> String { + format!( + r#" + version = 2 + id = "my_id" + [backend] + type = "localfs" + [backend.localfs] + dir = "{}" + [cache] + type = "dummycache" + [rafs] + "#, + dir + ) +} + +fn do_nydus_open_rafs(bootstrap: &str, config: &str) -> NydusFsHandle { + let cfg = match ConfigV2::from_str(config) { + Ok(v) => v, + Err(e) => { + warn!("failed to parse configuration info: {}", e); + return fs_error_einval(); + } + }; + let cfg = Arc::new(cfg); + let (mut rafs, reader) = match Rafs::new(&cfg, &cfg.id, Path::new(bootstrap)) { + Err(e) => { + warn!( + "failed to open filesystem from bootstrap {}, {}", + bootstrap, e + ); + return fs_error_einval(); + } + Ok(v) => v, + }; + if let Err(e) = rafs.import(reader, None) { + warn!("failed to import RAFS filesystem, {}", e); + return fs_error_einval(); + } + + let root_ino = rafs.metadata().root_inode; + let fs = Box::new(FileSystemState { + magic: NYDUS_FS_HANDLE_MAGIC, + root_ino, + rafs, + }); + Box::into_raw(fs) as NydusFsHandle +} + +/// Open a RAFS filesystem and return a handle to the filesystem object. +/// +/// The returned filesystem handle should be freed by calling `nydus_close_rafs()`, otherwise +/// it will cause memory leak. +/// +/// # Safety +/// Caller needs to ensure `bootstrap` and `config` are valid, otherwise it may cause memory access +/// violation. +#[no_mangle] +pub unsafe extern "C" fn nydus_open_rafs( + bootstrap: *const c_char, + config: *const c_char, +) -> NydusFsHandle { + if bootstrap.is_null() || config.is_null() { + return fs_error_einval(); + } + let bootstrap = cstr_to_str!(bootstrap, null_mut::() as NydusFsHandle); + let config = cstr_to_str!(config, null_mut::() as NydusFsHandle); + + do_nydus_open_rafs(bootstrap, config) +} + +/// Open a RAFS filesystem with default configuration and return a handle to the filesystem object. +/// +/// The returned filesystem handle should be freed by calling `nydus_close_rafs()`, otherwise +/// it will cause memory leak. +/// +/// # Safety +/// Caller needs to ensure `bootstrap` and `dir_path` are valid, otherwise it may cause memory +/// access violation. +#[no_mangle] +pub unsafe extern "C" fn nydus_open_rafs_default( + bootstrap: *const c_char, + dir_path: *const c_char, +) -> NydusFsHandle { + if bootstrap.is_null() || dir_path.is_null() { + return fs_error_einval(); + } + let bootstrap = cstr_to_str!(bootstrap, null_mut::() as NydusFsHandle); + let dir_path = cstr_to_str!(dir_path, null_mut::() as NydusFsHandle); + + let p_tmp; + let mut path = Path::new(bootstrap); + if path.parent().is_none() { + p_tmp = Path::new(dir_path).join(bootstrap); + path = &p_tmp + } + let bootstrap = match path.to_str() { + Some(v) => v, + None => { + warn!("invalid bootstrap path '{}'", bootstrap); + return fs_error_einval(); + } + }; + let config = default_localfs_rafs_config(dir_path); + + do_nydus_open_rafs(bootstrap, &config) +} + +/// Close the RAFS filesystem returned by `nydus_open_rafs()` and friends. +/// +/// All `NydusFileHandle` objects created from the `NydusFsHandle` should be freed before calling +/// `nydus_close_rafs()`, otherwise it may cause panic. +/// +/// # Safety +/// Caller needs to ensure `handle` is valid, otherwise it may cause memory access violation. +#[no_mangle] +pub unsafe extern "C" fn nydus_close_rafs(handle: NydusFsHandle) { + let mut fs = Box::from_raw(handle as *mut FileSystemState); + assert_eq!(fs.magic, NYDUS_FS_HANDLE_MAGIC); + fs.magic -= 0x4fdf_03cd_ae34_9d9a; + fs.rafs.destroy().unwrap(); +} + +#[cfg(test)] +mod tests { + use super::*; + use std::ffi::CString; + use std::io::Error; + use std::path::PathBuf; + use std::ptr::null; + + pub(crate) fn open_file_system() -> NydusFsHandle { + let ret = unsafe { nydus_open_rafs(null(), null()) }; + assert_eq!(ret, NYDUS_INVALID_FS_HANDLE); + assert_eq!( + Error::raw_os_error(&Error::last_os_error()), + Some(libc::EINVAL) + ); + + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let bootstrap = PathBuf::from(root_dir) + .join("../tests/texture/repeatable/sha256-nocompress-repeatable"); + let bootstrap = bootstrap.to_str().unwrap(); + let bootstrap = CString::new(bootstrap).unwrap(); + let blob_dir = PathBuf::from(root_dir).join("../tests/texture/repeatable/blobs"); + + let config = format!( + r#" + version = 2 + id = "my_id" + [backend] + type = "localfs" + [backend.localfs] + dir = "{}" + [cache] + type = "dummycache" + [rafs] + "#, + blob_dir.display() + ); + let config = CString::new(config).unwrap(); + let fs = unsafe { + nydus_open_rafs( + bootstrap.as_ptr() as *const c_char, + config.as_ptr() as *const c_char, + ) + }; + assert_ne!(fs, NYDUS_INVALID_FS_HANDLE); + + fs + } + + #[test] + fn test_open_rafs() { + let fs = open_file_system(); + unsafe { nydus_close_rafs(fs) }; + } + + #[test] + fn test_open_rafs_default() { + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let bootstrap = PathBuf::from(root_dir) + .join("../tests/texture/repeatable/sha256-nocompress-repeatable"); + let bootstrap = bootstrap.to_str().unwrap(); + let bootstrap = CString::new(bootstrap).unwrap(); + let blob_dir = PathBuf::from(root_dir).join("../tests/texture/repeatable/blobs"); + let blob_dir = blob_dir.to_str().unwrap(); + let fs = unsafe { + nydus_open_rafs_default(bootstrap.as_ptr(), blob_dir.as_ptr() as *const c_char) + }; + unsafe { nydus_close_rafs(fs) }; + } +} diff --git a/clib/src/lib.rs b/clib/src/lib.rs index 09bed819883..5c42c21e89c 100644 --- a/clib/src/lib.rs +++ b/clib/src/lib.rs @@ -1,80 +1,80 @@ -// Copyright (C) 2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! SDK C wrappers to access `nydus-rafs` and `nydus-storage` functionalities. -//! -//! # Generate Header File -//! Please use cbindgen to generate `nydus.h` header file from rust source code by: -//! ``` -//! cargo install cbindgen -//! cbindgen -l c -v -o include/nydus.h -//! ``` -//! -//! # Run C Test -//! ``` -//! gcc -o nydus -L ../../target/debug/ -lnydus_clib nydus_rafs.c -//! ``` - -#[macro_use] -extern crate log; -extern crate core; - -pub use file::*; -pub use fs::*; - -mod file; -mod fs; - -/// Type for RAFS filesystem inode number. -pub type Inode = u64; - -/// Helper to set libc::errno -#[cfg(target_os = "linux")] -fn set_errno(errno: i32) { - unsafe { *libc::__errno_location() = errno }; -} - -/// Helper to set libc::errno -#[cfg(target_os = "macos")] -fn set_errno(errno: i32) { - unsafe { *libc::__error() = errno }; -} - -/// Macro to convert C `char *` into rust `&str`. -#[macro_export] -macro_rules! cstr_to_str { - ($var: ident, $ret: expr) => {{ - let s = CStr::from_ptr($var); - match s.to_str() { - Ok(v) => v, - Err(_e) => { - set_errno(libc::EINVAL); - return $ret; - } - } - }}; -} - -#[cfg(test)] -mod tests { - use super::*; - use std::io::Error; - - #[test] - fn test_set_errno() { - assert_eq!(Error::raw_os_error(&Error::last_os_error()), Some(0)); - set_errno(libc::EINVAL); - assert_eq!( - Error::raw_os_error(&Error::last_os_error()), - Some(libc::EINVAL) - ); - set_errno(libc::ENOSYS); - assert_eq!( - Error::raw_os_error(&Error::last_os_error()), - Some(libc::ENOSYS) - ); - set_errno(0); - assert_eq!(Error::raw_os_error(&Error::last_os_error()), Some(0)); - } -} +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! SDK C wrappers to access `nydus-rafs` and `nydus-storage` functionalities. +//! +//! # Generate Header File +//! Please use cbindgen to generate `nydus.h` header file from rust source code by: +//! ``` +//! cargo install cbindgen +//! cbindgen -l c -v -o include/nydus.h +//! ``` +//! +//! # Run C Test +//! ``` +//! gcc -o nydus -L ../../target/debug/ -lnydus_clib nydus_rafs.c +//! ``` + +#[macro_use] +extern crate log; +extern crate core; + +pub use file::*; +pub use fs::*; + +mod file; +mod fs; + +/// Type for RAFS filesystem inode number. +pub type Inode = u64; + +/// Helper to set libc::errno +#[cfg(target_os = "linux")] +fn set_errno(errno: i32) { + unsafe { *libc::__errno_location() = errno }; +} + +/// Helper to set libc::errno +#[cfg(target_os = "macos")] +fn set_errno(errno: i32) { + unsafe { *libc::__error() = errno }; +} + +/// Macro to convert C `char *` into rust `&str`. +#[macro_export] +macro_rules! cstr_to_str { + ($var: ident, $ret: expr) => {{ + let s = CStr::from_ptr($var); + match s.to_str() { + Ok(v) => v, + Err(_e) => { + set_errno(libc::EINVAL); + return $ret; + } + } + }}; +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Error; + + #[test] + fn test_set_errno() { + assert_eq!(Error::raw_os_error(&Error::last_os_error()), Some(0)); + set_errno(libc::EINVAL); + assert_eq!( + Error::raw_os_error(&Error::last_os_error()), + Some(libc::EINVAL) + ); + set_errno(libc::ENOSYS); + assert_eq!( + Error::raw_os_error(&Error::last_os_error()), + Some(libc::ENOSYS) + ); + set_errno(0); + assert_eq!(Error::raw_os_error(&Error::last_os_error()), Some(0)); + } +} diff --git a/contrib/ctr-remote/.golangci.yml b/contrib/ctr-remote/.golangci.yml index 734653d6721..2755646facd 100644 --- a/contrib/ctr-remote/.golangci.yml +++ b/contrib/ctr-remote/.golangci.yml @@ -1,21 +1,21 @@ -# https://golangci-lint.run/usage/configuration#config-file - -linters: - enable: - - staticcheck - - unconvert - - gofmt - - goimports - - revive - - ineffassign - - vet - - unused - - misspell - disable: - - errcheck - -run: - deadline: 4m - skip-dirs: - - misc - +# https://golangci-lint.run/usage/configuration#config-file + +linters: + enable: + - staticcheck + - unconvert + - gofmt + - goimports + - revive + - ineffassign + - vet + - unused + - misspell + disable: + - errcheck + +run: + deadline: 4m + skip-dirs: + - misc + diff --git a/contrib/ctr-remote/Makefile b/contrib/ctr-remote/Makefile index d00b32f6106..39b9bb0f6ff 100644 --- a/contrib/ctr-remote/Makefile +++ b/contrib/ctr-remote/Makefile @@ -1,29 +1,29 @@ -GIT_COMMIT := $(shell git rev-list -1 HEAD) -BUILD_TIME := $(shell date -u +%Y%m%d.%H%M) -PACKAGES ?= $(shell go list ./... | grep -v /vendor/) -GOARCH ?= $(shell go env GOARCH) -GOPROXY ?= https://goproxy.io - -ifdef GOPROXY -PROXY := GOPROXY=${GOPROXY} -endif - -.PHONY: all build release test clean - -all: build - -build: - @CGO_ENABLED=0 ${PROXY} GOOS=linux GOARCH=${GOARCH} go build -v -o bin/ctr-remote ./cmd/main.go - -release: - @CGO_ENABLED=0 ${PROXY} GOOS=linux GOARCH=${GOARCH} go build -ldflags '-s -w -extldflags "-static"' -v -o bin/ctr-remote ./cmd/main.go - -test: - go vet $(PACKAGES) - go test -v -cover ${PACKAGES} - -lint: - golangci-lint run - -clean: - rm -f bin/* +GIT_COMMIT := $(shell git rev-list -1 HEAD) +BUILD_TIME := $(shell date -u +%Y%m%d.%H%M) +PACKAGES ?= $(shell go list ./... | grep -v /vendor/) +GOARCH ?= $(shell go env GOARCH) +GOPROXY ?= https://goproxy.io + +ifdef GOPROXY +PROXY := GOPROXY=${GOPROXY} +endif + +.PHONY: all build release test clean + +all: build + +build: + @CGO_ENABLED=0 ${PROXY} GOOS=linux GOARCH=${GOARCH} go build -v -o bin/ctr-remote ./cmd/main.go + +release: + @CGO_ENABLED=0 ${PROXY} GOOS=linux GOARCH=${GOARCH} go build -ldflags '-s -w -extldflags "-static"' -v -o bin/ctr-remote ./cmd/main.go + +test: + go vet $(PACKAGES) + go test -v -cover ${PACKAGES} + +lint: + golangci-lint run + +clean: + rm -f bin/* diff --git a/contrib/ctr-remote/cmd/main.go b/contrib/ctr-remote/cmd/main.go index 4721e3122a5..93cba8d9174 100644 --- a/contrib/ctr-remote/cmd/main.go +++ b/contrib/ctr-remote/cmd/main.go @@ -1,67 +1,67 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package main - -import ( - "fmt" - "os" - - "github.com/containerd/containerd/cmd/ctr/app" - "github.com/containerd/containerd/pkg/seed" //nolint:staticcheck // Global math/rand seed is deprecated, but still used by external dependencies - "github.com/dragonflyoss/nydus/contrib/ctr-remote/commands" - "github.com/urfave/cli" -) - -func init() { - // From https://github.com/containerd/containerd/blob/f7f2be732159a411eae46b78bfdb479b133a823b/cmd/ctr/main.go - //nolint:staticcheck // Global math/rand seed is deprecated, but still used by external dependencies - seed.WithTimeAndRand() -} - -func main() { - customCommands := []cli.Command{commands.RpullCommand} - app := app.New() - app.Description = "NOTE: Enhanced for nydus-snapshotter\n" + app.Description - for i := range app.Commands { - if app.Commands[i].Name == "images" { - sc := map[string]cli.Command{} - for _, subcmd := range customCommands { - sc[subcmd.Name] = subcmd - } - - // First, replace duplicated subcommands - for j := range app.Commands[i].Subcommands { - for name, subcmd := range sc { - if name == app.Commands[i].Subcommands[j].Name { - app.Commands[i].Subcommands[j] = subcmd - delete(sc, name) - } - } - } - - // Next, append all new sub commands - for _, subcmd := range sc { - app.Commands[i].Subcommands = append(app.Commands[i].Subcommands, subcmd) - } - break - } - } - if err := app.Run(os.Args); err != nil { - fmt.Fprintf(os.Stderr, "ctr-remote: %v\n", err) - os.Exit(1) - } -} +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package main + +import ( + "fmt" + "os" + + "github.com/containerd/containerd/cmd/ctr/app" + "github.com/containerd/containerd/pkg/seed" //nolint:staticcheck // Global math/rand seed is deprecated, but still used by external dependencies + "github.com/dragonflyoss/nydus/contrib/ctr-remote/commands" + "github.com/urfave/cli" +) + +func init() { + // From https://github.com/containerd/containerd/blob/f7f2be732159a411eae46b78bfdb479b133a823b/cmd/ctr/main.go + //nolint:staticcheck // Global math/rand seed is deprecated, but still used by external dependencies + seed.WithTimeAndRand() +} + +func main() { + customCommands := []cli.Command{commands.RpullCommand} + app := app.New() + app.Description = "NOTE: Enhanced for nydus-snapshotter\n" + app.Description + for i := range app.Commands { + if app.Commands[i].Name == "images" { + sc := map[string]cli.Command{} + for _, subcmd := range customCommands { + sc[subcmd.Name] = subcmd + } + + // First, replace duplicated subcommands + for j := range app.Commands[i].Subcommands { + for name, subcmd := range sc { + if name == app.Commands[i].Subcommands[j].Name { + app.Commands[i].Subcommands[j] = subcmd + delete(sc, name) + } + } + } + + // Next, append all new sub commands + for _, subcmd := range sc { + app.Commands[i].Subcommands = append(app.Commands[i].Subcommands, subcmd) + } + break + } + } + if err := app.Run(os.Args); err != nil { + fmt.Fprintf(os.Stderr, "ctr-remote: %v\n", err) + os.Exit(1) + } +} diff --git a/contrib/ctr-remote/commands/rpull.go b/contrib/ctr-remote/commands/rpull.go index e9b28604b1d..5fa39957229 100644 --- a/contrib/ctr-remote/commands/rpull.go +++ b/contrib/ctr-remote/commands/rpull.go @@ -1,103 +1,103 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package commands - -import ( - "context" - "fmt" - - "github.com/containerd/containerd" - "github.com/containerd/containerd/cmd/ctr/commands" - "github.com/containerd/containerd/cmd/ctr/commands/content" - "github.com/containerd/containerd/images" - "github.com/containerd/log" - "github.com/containerd/nydus-snapshotter/pkg/label" - ocispec "github.com/opencontainers/image-spec/specs-go/v1" - "github.com/urfave/cli" -) - -const ( - remoteSnapshotterName = "nydus" -) - -// RpullCommand is a subcommand to pull an image from a registry levaraging nydus snapshotter -var RpullCommand = cli.Command{ - Name: "rpull", - Usage: "pull an image from a registry leveraging nydus-snapshotter", - ArgsUsage: "[flags] ", - Description: `Fetch and prepare an image for use in containerd leveraging nydus-snapshotter. -After pulling an image, it should be ready to use the same reference in a run command.`, - Flags: append(commands.RegistryFlags, commands.LabelFlag), - Action: func(context *cli.Context) error { - var ( - ref = context.Args().First() - config = &rPullConfig{} - ) - if ref == "" { - return fmt.Errorf("please provide an image reference to pull") - } - - client, ctx, cancel, err := commands.NewClient(context) - if err != nil { - return err - } - defer cancel() - - ctx, done, err := client.WithLease(ctx) - if err != nil { - return err - } - defer done(ctx) - - fc, err := content.NewFetchConfig(ctx, context) - if err != nil { - return err - } - config.FetchConfig = fc - - return pull(ctx, client, ref, config) - }, -} - -type rPullConfig struct { - *content.FetchConfig -} - -func pull(ctx context.Context, client *containerd.Client, ref string, config *rPullConfig) error { - pCtx := ctx - h := images.HandlerFunc(func(_ context.Context, desc ocispec.Descriptor) ([]ocispec.Descriptor, error) { - if desc.MediaType != images.MediaTypeDockerSchema1Manifest { - fmt.Printf("fetching %v... %v\n", desc.Digest.String()[:15], desc.MediaType) - } - return nil, nil - }) - - log.G(pCtx).WithField("image", ref).Debug("fetching") - configLabels := commands.LabelArgs(config.Labels) - if _, err := client.Pull(pCtx, ref, []containerd.RemoteOpt{ - containerd.WithPullLabels(configLabels), - containerd.WithResolver(config.Resolver), - containerd.WithImageHandler(h), - containerd.WithPullUnpack, - containerd.WithPullSnapshotter(remoteSnapshotterName), - containerd.WithImageHandlerWrapper(label.AppendLabelsHandlerWrapper(ref)), - }...); err != nil { - return err - } - - return nil -} +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package commands + +import ( + "context" + "fmt" + + "github.com/containerd/containerd" + "github.com/containerd/containerd/cmd/ctr/commands" + "github.com/containerd/containerd/cmd/ctr/commands/content" + "github.com/containerd/containerd/images" + "github.com/containerd/log" + "github.com/containerd/nydus-snapshotter/pkg/label" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/urfave/cli" +) + +const ( + remoteSnapshotterName = "nydus" +) + +// RpullCommand is a subcommand to pull an image from a registry levaraging nydus snapshotter +var RpullCommand = cli.Command{ + Name: "rpull", + Usage: "pull an image from a registry leveraging nydus-snapshotter", + ArgsUsage: "[flags] ", + Description: `Fetch and prepare an image for use in containerd leveraging nydus-snapshotter. +After pulling an image, it should be ready to use the same reference in a run command.`, + Flags: append(commands.RegistryFlags, commands.LabelFlag), + Action: func(context *cli.Context) error { + var ( + ref = context.Args().First() + config = &rPullConfig{} + ) + if ref == "" { + return fmt.Errorf("please provide an image reference to pull") + } + + client, ctx, cancel, err := commands.NewClient(context) + if err != nil { + return err + } + defer cancel() + + ctx, done, err := client.WithLease(ctx) + if err != nil { + return err + } + defer done(ctx) + + fc, err := content.NewFetchConfig(ctx, context) + if err != nil { + return err + } + config.FetchConfig = fc + + return pull(ctx, client, ref, config) + }, +} + +type rPullConfig struct { + *content.FetchConfig +} + +func pull(ctx context.Context, client *containerd.Client, ref string, config *rPullConfig) error { + pCtx := ctx + h := images.HandlerFunc(func(_ context.Context, desc ocispec.Descriptor) ([]ocispec.Descriptor, error) { + if desc.MediaType != images.MediaTypeDockerSchema1Manifest { + fmt.Printf("fetching %v... %v\n", desc.Digest.String()[:15], desc.MediaType) + } + return nil, nil + }) + + log.G(pCtx).WithField("image", ref).Debug("fetching") + configLabels := commands.LabelArgs(config.Labels) + if _, err := client.Pull(pCtx, ref, []containerd.RemoteOpt{ + containerd.WithPullLabels(configLabels), + containerd.WithResolver(config.Resolver), + containerd.WithImageHandler(h), + containerd.WithPullUnpack, + containerd.WithPullSnapshotter(remoteSnapshotterName), + containerd.WithImageHandlerWrapper(label.AppendLabelsHandlerWrapper(ref)), + }...); err != nil { + return err + } + + return nil +} diff --git a/contrib/ctr-remote/go.mod b/contrib/ctr-remote/go.mod index 55917c4e660..2da7f258d7c 100644 --- a/contrib/ctr-remote/go.mod +++ b/contrib/ctr-remote/go.mod @@ -1,84 +1,84 @@ -module github.com/dragonflyoss/nydus/contrib/ctr-remote - -go 1.21 - -require ( - github.com/containerd/containerd v1.7.11 - github.com/containerd/log v0.1.0 - github.com/containerd/nydus-snapshotter v0.13.4 - github.com/opencontainers/image-spec v1.1.0-rc5 - github.com/urfave/cli v1.22.14 -) - -require ( - github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 // indirect - github.com/AdamKorcz/go-118-fuzz-build v0.0.0-20231105174938-2b5cbb29f3e2 // indirect - github.com/Microsoft/go-winio v0.6.1 // indirect - github.com/Microsoft/hcsshim v0.11.4 // indirect - github.com/cilium/ebpf v0.10.0 // indirect - github.com/containerd/cgroups v1.1.0 // indirect - github.com/containerd/cgroups/v3 v3.0.2 // indirect - github.com/containerd/console v1.0.3 // indirect - github.com/containerd/continuity v0.4.3 // indirect - github.com/containerd/fifo v1.1.0 // indirect - github.com/containerd/go-cni v1.1.9 // indirect - github.com/containerd/go-runc v1.0.0 // indirect - github.com/containerd/ttrpc v1.2.2 // indirect - github.com/containerd/typeurl/v2 v2.1.1 // indirect - github.com/containernetworking/cni v1.1.2 // indirect - github.com/containernetworking/plugins v1.2.0 // indirect - github.com/coreos/go-systemd/v22 v22.5.0 // indirect - github.com/cpuguy83/go-md2man/v2 v2.0.3 // indirect - github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c // indirect - github.com/docker/go-units v0.5.0 // indirect - github.com/felixge/httpsnoop v1.0.3 // indirect - github.com/go-logr/logr v1.4.1 // indirect - github.com/go-logr/stdr v1.2.2 // indirect - github.com/godbus/dbus/v5 v5.1.0 // indirect - github.com/gogo/protobuf v1.3.2 // indirect - github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect - github.com/golang/protobuf v1.5.3 // indirect - github.com/google/go-cmp v0.6.0 // indirect - github.com/google/uuid v1.5.0 // indirect - github.com/hashicorp/errwrap v1.1.0 // indirect - github.com/hashicorp/go-multierror v1.1.1 // indirect - github.com/intel/goresctrl v0.3.0 // indirect - github.com/klauspost/compress v1.17.4 // indirect - github.com/moby/locker v1.0.1 // indirect - github.com/moby/sys/mountinfo v0.7.1 // indirect - github.com/moby/sys/sequential v0.5.0 // indirect - github.com/moby/sys/signal v0.7.0 // indirect - github.com/moby/sys/symlink v0.2.0 // indirect - github.com/onsi/gomega v1.27.6 // indirect - github.com/opencontainers/go-digest v1.0.0 // indirect - github.com/opencontainers/runc v1.1.12 // indirect - github.com/opencontainers/runtime-spec v1.1.0 // indirect - github.com/opencontainers/selinux v1.11.0 // indirect - github.com/pelletier/go-toml v1.9.5 // indirect - github.com/pkg/errors v0.9.1 // indirect - github.com/prometheus/procfs v0.12.0 // indirect - github.com/rogpeppe/go-internal v1.12.0 // indirect - github.com/russross/blackfriday/v2 v2.1.0 // indirect - github.com/sirupsen/logrus v1.9.3 // indirect - github.com/stretchr/objx v0.5.1 // indirect - go.opencensus.io v0.24.0 // indirect - go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0 // indirect - go.opentelemetry.io/otel v1.21.0 // indirect - go.opentelemetry.io/otel/metric v1.21.0 // indirect - go.opentelemetry.io/otel/trace v1.21.0 // indirect - go.uber.org/goleak v1.2.1 // indirect - golang.org/x/mod v0.14.0 // indirect - golang.org/x/net v0.23.0 // indirect - golang.org/x/sync v0.5.0 // indirect - golang.org/x/sys v0.18.0 // indirect - golang.org/x/text v0.14.0 // indirect - golang.org/x/tools v0.16.1 // indirect - google.golang.org/genproto v0.0.0-20240102182953-50ed04b92917 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20240102182953-50ed04b92917 // indirect - google.golang.org/grpc v1.60.1 // indirect - google.golang.org/protobuf v1.32.0 // indirect - gopkg.in/inf.v0 v0.9.1 // indirect - gopkg.in/yaml.v2 v2.4.0 // indirect - k8s.io/apimachinery v0.28.3 // indirect - sigs.k8s.io/yaml v1.3.0 // indirect -) +module github.com/dragonflyoss/nydus/contrib/ctr-remote + +go 1.21 + +require ( + github.com/containerd/containerd v1.7.11 + github.com/containerd/log v0.1.0 + github.com/containerd/nydus-snapshotter v0.13.4 + github.com/opencontainers/image-spec v1.1.0-rc5 + github.com/urfave/cli v1.22.14 +) + +require ( + github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 // indirect + github.com/AdamKorcz/go-118-fuzz-build v0.0.0-20231105174938-2b5cbb29f3e2 // indirect + github.com/Microsoft/go-winio v0.6.1 // indirect + github.com/Microsoft/hcsshim v0.11.4 // indirect + github.com/cilium/ebpf v0.10.0 // indirect + github.com/containerd/cgroups v1.1.0 // indirect + github.com/containerd/cgroups/v3 v3.0.2 // indirect + github.com/containerd/console v1.0.3 // indirect + github.com/containerd/continuity v0.4.3 // indirect + github.com/containerd/fifo v1.1.0 // indirect + github.com/containerd/go-cni v1.1.9 // indirect + github.com/containerd/go-runc v1.0.0 // indirect + github.com/containerd/ttrpc v1.2.2 // indirect + github.com/containerd/typeurl/v2 v2.1.1 // indirect + github.com/containernetworking/cni v1.1.2 // indirect + github.com/containernetworking/plugins v1.2.0 // indirect + github.com/coreos/go-systemd/v22 v22.5.0 // indirect + github.com/cpuguy83/go-md2man/v2 v2.0.3 // indirect + github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c // indirect + github.com/docker/go-units v0.5.0 // indirect + github.com/felixge/httpsnoop v1.0.3 // indirect + github.com/go-logr/logr v1.4.1 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/godbus/dbus/v5 v5.1.0 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect + github.com/golang/protobuf v1.5.3 // indirect + github.com/google/go-cmp v0.6.0 // indirect + github.com/google/uuid v1.5.0 // indirect + github.com/hashicorp/errwrap v1.1.0 // indirect + github.com/hashicorp/go-multierror v1.1.1 // indirect + github.com/intel/goresctrl v0.3.0 // indirect + github.com/klauspost/compress v1.17.4 // indirect + github.com/moby/locker v1.0.1 // indirect + github.com/moby/sys/mountinfo v0.7.1 // indirect + github.com/moby/sys/sequential v0.5.0 // indirect + github.com/moby/sys/signal v0.7.0 // indirect + github.com/moby/sys/symlink v0.2.0 // indirect + github.com/onsi/gomega v1.27.6 // indirect + github.com/opencontainers/go-digest v1.0.0 // indirect + github.com/opencontainers/runc v1.1.12 // indirect + github.com/opencontainers/runtime-spec v1.1.0 // indirect + github.com/opencontainers/selinux v1.11.0 // indirect + github.com/pelletier/go-toml v1.9.5 // indirect + github.com/pkg/errors v0.9.1 // indirect + github.com/prometheus/procfs v0.12.0 // indirect + github.com/rogpeppe/go-internal v1.12.0 // indirect + github.com/russross/blackfriday/v2 v2.1.0 // indirect + github.com/sirupsen/logrus v1.9.3 // indirect + github.com/stretchr/objx v0.5.1 // indirect + go.opencensus.io v0.24.0 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0 // indirect + go.opentelemetry.io/otel v1.21.0 // indirect + go.opentelemetry.io/otel/metric v1.21.0 // indirect + go.opentelemetry.io/otel/trace v1.21.0 // indirect + go.uber.org/goleak v1.2.1 // indirect + golang.org/x/mod v0.14.0 // indirect + golang.org/x/net v0.23.0 // indirect + golang.org/x/sync v0.5.0 // indirect + golang.org/x/sys v0.18.0 // indirect + golang.org/x/text v0.14.0 // indirect + golang.org/x/tools v0.16.1 // indirect + google.golang.org/genproto v0.0.0-20240102182953-50ed04b92917 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240102182953-50ed04b92917 // indirect + google.golang.org/grpc v1.60.1 // indirect + google.golang.org/protobuf v1.32.0 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect + k8s.io/apimachinery v0.28.3 // indirect + sigs.k8s.io/yaml v1.3.0 // indirect +) diff --git a/contrib/ctr-remote/go.sum b/contrib/ctr-remote/go.sum index 04d4b061e3f..4b0e7d97c43 100644 --- a/contrib/ctr-remote/go.sum +++ b/contrib/ctr-remote/go.sum @@ -1,358 +1,358 @@ -cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 h1:bvDV9vkmnHYOMsOr4WLk+Vo07yKIzd94sVoIqshQ4bU= -github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8= -github.com/AdamKorcz/go-118-fuzz-build v0.0.0-20231105174938-2b5cbb29f3e2 h1:dIScnXFlF784X79oi7MzVT6GWqr/W1uUt0pB5CsDs9M= -github.com/AdamKorcz/go-118-fuzz-build v0.0.0-20231105174938-2b5cbb29f3e2/go.mod h1:gCLVsLfv1egrcZu+GoJATN5ts75F2s62ih/457eWzOw= -github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= -github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow= -github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM= -github.com/Microsoft/hcsshim v0.11.4 h1:68vKo2VN8DE9AdN4tnkWnmdhqdbpUFM8OF3Airm7fz8= -github.com/Microsoft/hcsshim v0.11.4/go.mod h1:smjE4dvqPX9Zldna+t5FG3rnoHhaB7QYxPRqGcpAD9w= -github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= -github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= -github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= -github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= -github.com/cilium/ebpf v0.10.0 h1:nk5HPMeoBXtOzbkZBWym+ZWq1GIiHUsBFXxwewXAHLQ= -github.com/cilium/ebpf v0.10.0/go.mod h1:DPiVdY/kT534dgc9ERmvP8mWA+9gvwgKfRvk4nNWnoE= -github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= -github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= -github.com/containerd/cgroups v1.1.0 h1:v8rEWFl6EoqHB+swVNjVoCJE8o3jX7e8nqBGPLaDFBM= -github.com/containerd/cgroups v1.1.0/go.mod h1:6ppBcbh/NOOUU+dMKrykgaBnK9lCIBxHqJDGwsa1mIw= -github.com/containerd/cgroups/v3 v3.0.2 h1:f5WFqIVSgo5IZmtTT3qVBo6TzI1ON6sycSBKkymb9L0= -github.com/containerd/cgroups/v3 v3.0.2/go.mod h1:JUgITrzdFqp42uI2ryGA+ge0ap/nxzYgkGmIcetmErE= -github.com/containerd/console v1.0.1/go.mod h1:XUsP6YE/mKtz6bxc+I8UiKKTP04qjQL4qcS3XoQ5xkw= -github.com/containerd/console v1.0.3 h1:lIr7SlA5PxZyMV30bDW0MGbiOPXwc63yRuCP0ARubLw= -github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U= -github.com/containerd/containerd v1.7.11 h1:lfGKw3eU35sjV0aG2eYZTiwFEY1pCzxdzicHP3SZILw= -github.com/containerd/containerd v1.7.11/go.mod h1:5UluHxHTX2rdvYuZ5OJTC5m/KJNs0Zs9wVoJm9zf5ZE= -github.com/containerd/continuity v0.4.3 h1:6HVkalIp+2u1ZLH1J/pYX2oBVXlJZvh1X1A7bEZ9Su8= -github.com/containerd/continuity v0.4.3/go.mod h1:F6PTNCKepoxEaXLQp3wDAjygEnImnZ/7o4JzpodfroQ= -github.com/containerd/fifo v1.1.0 h1:4I2mbh5stb1u6ycIABlBw9zgtlK8viPI9QkQNRQEEmY= -github.com/containerd/fifo v1.1.0/go.mod h1:bmC4NWMbXlt2EZ0Hc7Fx7QzTFxgPID13eH0Qu+MAb2o= -github.com/containerd/go-cni v1.1.9 h1:ORi7P1dYzCwVM6XPN4n3CbkuOx/NZ2DOqy+SHRdo9rU= -github.com/containerd/go-cni v1.1.9/go.mod h1:XYrZJ1d5W6E2VOvjffL3IZq0Dz6bsVlERHbekNK90PM= -github.com/containerd/go-runc v1.0.0 h1:oU+lLv1ULm5taqgV/CJivypVODI4SUz1znWjv3nNYS0= -github.com/containerd/go-runc v1.0.0/go.mod h1:cNU0ZbCgCQVZK4lgG3P+9tn9/PaJNmoDXPpoJhDR+Ok= -github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= -github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo= -github.com/containerd/nydus-snapshotter v0.13.4 h1:veTQCgpfRGdPD031dVNGlU+vK/W9vBhZNlMWR9oupiQ= -github.com/containerd/nydus-snapshotter v0.13.4/go.mod h1:y41TM10lXhskfHHvge7kf1VucM4CeWwsCmQ5Q51UJrc= -github.com/containerd/ttrpc v1.2.2 h1:9vqZr0pxwOF5koz6N0N3kJ0zDHokrcPxIR/ZR2YFtOs= -github.com/containerd/ttrpc v1.2.2/go.mod h1:sIT6l32Ph/H9cvnJsfXM5drIVzTr5A2flTf1G5tYZak= -github.com/containerd/typeurl/v2 v2.1.1 h1:3Q4Pt7i8nYwy2KmQWIw2+1hTvwTE/6w9FqcttATPO/4= -github.com/containerd/typeurl/v2 v2.1.1/go.mod h1:IDp2JFvbwZ31H8dQbEIY7sDl2L3o3HZj1hsSQlywkQ0= -github.com/containernetworking/cni v1.1.2 h1:wtRGZVv7olUHMOqouPpn3cXJWpJgM6+EUl31EQbXALQ= -github.com/containernetworking/cni v1.1.2/go.mod h1:sDpYKmGVENF3s6uvMvGgldDWeG8dMxakj/u+i9ht9vw= -github.com/containernetworking/plugins v1.2.0 h1:SWgg3dQG1yzUo4d9iD8cwSVh1VqI+bP7mkPDoSfP9VU= -github.com/containernetworking/plugins v1.2.0/go.mod h1:/VjX4uHecW5vVimFa1wkG4s+r/s9qIfPdqlLF4TW8c4= -github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs= -github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= -github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= -github.com/cpuguy83/go-md2man/v2 v2.0.3 h1:qMCsGGgs+MAzDFyp9LpAe1Lqy/fY/qCovCm0qnXZOBM= -github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c h1:+pKlWGMw7gf6bQ+oDZB4KHQFypsfjYlq/C4rfL7D3g8= -github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c/go.mod h1:Uw6UezgYA44ePAFQYUehOuCzmy5zmg/+nl2ZfMWGkpA= -github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= -github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= -github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= -github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= -github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= -github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= -github.com/felixge/httpsnoop v1.0.3 h1:s/nj+GCswXYzN5v2DpNMuMQYe+0DDwt5WVCU6CWBdXk= -github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= -github.com/frankban/quicktest v1.14.4 h1:g2rn0vABPOOXmZUj+vbmUp0lPoXEMuhTpIluN0XL9UY= -github.com/frankban/quicktest v1.14.4/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= -github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= -github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= -github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY= -github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw= -github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= -github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= -github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= -github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= -github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE= -github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= -github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= -github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/godbus/dbus/v5 v5.1.0 h1:4KLkAxT3aOY8Li4FRJe/KvhoNFFxo0m6fNuFUO8QJUk= -github.com/godbus/dbus/v5 v5.1.0/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= -github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= -github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= -github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= -github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= -github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= -github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= -github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= -github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= -github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= -github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= -github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= -github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= -github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= -github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= -github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= -github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= -github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= -github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= -github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= -github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= -github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= -github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 h1:K6RDEckDVWvDI9JAJYCmNdQXq6neHJOYx3V6jnqNEec= -github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= -github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU= -github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= -github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I= -github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= -github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= -github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= -github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= -github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= -github.com/intel/goresctrl v0.3.0 h1:K2D3GOzihV7xSBedGxONSlaw/un1LZgWsc9IfqipN4c= -github.com/intel/goresctrl v0.3.0/go.mod h1:fdz3mD85cmP9sHD8JUlrNWAxvwM86CrbmVXltEKd7zk= -github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= -github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4= -github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= -github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= -github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= -github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= -github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/moby/locker v1.0.1 h1:fOXqR41zeveg4fFODix+1Ch4mj/gT0NE1XJbp/epuBg= -github.com/moby/locker v1.0.1/go.mod h1:S7SDdo5zpBK84bzzVlKr2V0hz+7x9hWbYC/kq7oQppc= -github.com/moby/sys/mountinfo v0.7.1 h1:/tTvQaSJRr2FshkhXiIpux6fQ2Zvc4j7tAhMTStAG2g= -github.com/moby/sys/mountinfo v0.7.1/go.mod h1:IJb6JQeOklcdMU9F5xQ8ZALD+CUr5VlGpwtX+VE0rpI= -github.com/moby/sys/sequential v0.5.0 h1:OPvI35Lzn9K04PBbCLW0g4LcFAJgHsvXsRyewg5lXtc= -github.com/moby/sys/sequential v0.5.0/go.mod h1:tH2cOOs5V9MlPiXcQzRC+eEyab644PWKGRYaaV5ZZlo= -github.com/moby/sys/signal v0.7.0 h1:25RW3d5TnQEoKvRbEKUGay6DCQ46IxAVTT9CUMgmsSI= -github.com/moby/sys/signal v0.7.0/go.mod h1:GQ6ObYZfqacOwTtlXvcmh9A26dVRul/hbOZn88Kg8Tg= -github.com/moby/sys/symlink v0.2.0 h1:tk1rOM+Ljp0nFmfOIBtlV3rTDlWOwFRhjEeAhZB0nZc= -github.com/moby/sys/symlink v0.2.0/go.mod h1:7uZVF2dqJjG/NsClqul95CqKOBRQyYSNnJ6BMgR/gFs= -github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= -github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= -github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= -github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= -github.com/onsi/ginkgo v1.16.4/go.mod h1:dX+/inL/fNMqNlz0e9LfyB9TswhZpCVdJM/Z6Vvnwo0= -github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= -github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU= -github.com/onsi/ginkgo/v2 v2.1.3/go.mod h1:vw5CSIxN1JObi/U8gcbwft7ZxR2dgaR70JSE3/PpL4c= -github.com/onsi/ginkgo/v2 v2.9.4 h1:xR7vG4IXt5RWx6FfIjyAtsoMAtnc3C/rFXBBd2AjZwE= -github.com/onsi/ginkgo/v2 v2.9.4/go.mod h1:gCQYp2Q+kSoIj7ykSVb9nskRSsR6PUj4AiLywzIhbKM= -github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= -github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= -github.com/onsi/gomega v1.17.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY= -github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE= -github.com/onsi/gomega v1.27.6/go.mod h1:PIQNjfQwkP3aQAH7lf7j87O/5FiNr+ZR8+ipb+qQlhg= -github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= -github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= -github.com/opencontainers/image-spec v1.1.0-rc5 h1:Ygwkfw9bpDvs+c9E34SdgGOj41dX/cbdlwvlWt0pnFI= -github.com/opencontainers/image-spec v1.1.0-rc5/go.mod h1:X4pATf0uXsnn3g5aiGIsVnJBR4mxhKzfwmvK/B2NTm8= -github.com/opencontainers/runc v1.1.12 h1:BOIssBaW1La0/qbNZHXOOa71dZfZEQOzW7dqQf3phss= -github.com/opencontainers/runc v1.1.12/go.mod h1:S+lQwSfncpBha7XTy/5lBwWgm5+y5Ma/O44Ekby9FK8= -github.com/opencontainers/runtime-spec v1.0.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= -github.com/opencontainers/runtime-spec v1.1.0 h1:HHUyrt9mwHUjtasSbXSMvs4cyFxh+Bll4AjJ9odEGpg= -github.com/opencontainers/runtime-spec v1.1.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= -github.com/opencontainers/selinux v1.11.0 h1:+5Zbo97w3Lbmb3PeqQtpmTkMwsW5nRI3YaLpt7tQ7oU= -github.com/opencontainers/selinux v1.11.0/go.mod h1:E5dMC3VPuVvVHDYmi78qvhJp8+M586T4DlDRYpFkyec= -github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8= -github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= -github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo= -github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo= -github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= -github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= -github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= -github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= -github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= -github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= -github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= -github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= -github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= -github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= -github.com/stretchr/objx v0.5.1 h1:4VhoImhV/Bm0ToFkXFi8hXNXwpDRZ/ynw3amt82mzq0= -github.com/stretchr/objx v0.5.1/go.mod h1:/iHQpkQwBD6DLUmQ4pE+s1TXdob1mORJ4/UFdrifcy0= -github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= -github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= -github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= -github.com/urfave/cli v1.22.14 h1:ebbhrRiGK2i4naQJr+1Xj92HXZCrK7MsyTS/ob3HnAk= -github.com/urfave/cli v1.22.14/go.mod h1:X0eDS6pD6Exaclxm99NJ3FiCDRED7vIHpx2mDOHLvkA= -github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= -go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0 h1:x8Z78aZx8cOF0+Kkazoc7lwUNMGy0LrzEMxTm4BbTxg= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0/go.mod h1:62CPTSry9QZtOaSsE3tOzhx6LzDhHnXJ6xHeMNNiM6Q= -go.opentelemetry.io/otel v1.21.0 h1:hzLeKBZEL7Okw2mGzZ0cc4k/A7Fta0uoPgaJCr8fsFc= -go.opentelemetry.io/otel v1.21.0/go.mod h1:QZzNPQPm1zLX4gZK4cMi+71eaorMSGT3A4znnUvNNEo= -go.opentelemetry.io/otel/metric v1.21.0 h1:tlYWfeo+Bocx5kLEloTjbcDwBuELRrIFxwdQ36PlJu4= -go.opentelemetry.io/otel/metric v1.21.0/go.mod h1:o1p3CA8nNHW8j5yuQLdc1eeqEaPfzug24uvsyIEJRWM= -go.opentelemetry.io/otel/trace v1.21.0 h1:WD9i5gzvoUPuXIXH24ZNBudiarZDKuekPqi/E8fpfLc= -go.opentelemetry.io/otel/trace v1.21.0/go.mod h1:LGbsEB0f9LGjN+OZaQQ26sohbOmiMR+BaslueVtS/qQ= -go.uber.org/goleak v1.2.1 h1:NBol2c7O1ZokfZ0LEU9K6Whx/KnwvepVetCUhtKja4A= -go.uber.org/goleak v1.2.1/go.mod h1:qlT2yGI9QafXHhZZLxlSuNsMw3FFLxBr+tBRlmO1xH4= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= -golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= -golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.14.0 h1:dGoOF9QVLYng8IHTm7BAyWqCqSheQ5pYWGhzW00YJr0= -golang.org/x/mod v0.14.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= -golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk= -golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= -golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= -golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE= -golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= -golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200916030750-2334cc1a136f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= -golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= -golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.16.1 h1:TLyB3WofjdOEepBHAU20JdNC1Zbg87elYofWYAY5oZA= -golang.org/x/tools v0.16.1/go.mod h1:kYVVN6I1mBNoB1OX+noeBjbRk4IUEPa7JJ+TJMEooJ0= -golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= -google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= -google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= -google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= -google.golang.org/genproto v0.0.0-20240102182953-50ed04b92917 h1:nz5NESFLZbJGPFxDT/HCn+V1mZ8JGNoY4nUpmW/Y2eg= -google.golang.org/genproto v0.0.0-20240102182953-50ed04b92917/go.mod h1:pZqR+glSb11aJ+JQcczCvgf47+duRuzNSKqE8YAQnV0= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240102182953-50ed04b92917 h1:6G8oQ016D88m1xAKljMlBOOGWDZkes4kMhgGFlf8WcQ= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240102182953-50ed04b92917/go.mod h1:xtjpI3tXFPP051KaWnhvxkiubL/6dJ18vLVf7q2pTOU= -google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= -google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= -google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= -google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= -google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= -google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= -google.golang.org/grpc v1.60.1 h1:26+wFr+cNqSGFcOXcabYC0lUVJVRa2Sb2ortSK7VrEU= -google.golang.org/grpc v1.60.1/go.mod h1:OlCHIeLYqSSsLi6i49B5QGdzaMZK9+M7LXN2FKz4eGM= -google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= -google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= -google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= -google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= -google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= -google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= -google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= -google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.32.0 h1:pPC6BG5ex8PDFnkbrGU3EixyhKcQ2aDuBS36lqK/C7I= -google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= -gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= -gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= -gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= -gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= -gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= -gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= -gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= -gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= -gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -k8s.io/apimachinery v0.28.3 h1:B1wYx8txOaCQG0HmYF6nbpU8dg6HvA06x5tEffvOe7A= -k8s.io/apimachinery v0.28.3/go.mod h1:uQTKmIqs+rAYaq+DFaoD2X7pcjLOqbQX2AOiO0nIpb8= -sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo= -sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8= +cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 h1:bvDV9vkmnHYOMsOr4WLk+Vo07yKIzd94sVoIqshQ4bU= +github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8= +github.com/AdamKorcz/go-118-fuzz-build v0.0.0-20231105174938-2b5cbb29f3e2 h1:dIScnXFlF784X79oi7MzVT6GWqr/W1uUt0pB5CsDs9M= +github.com/AdamKorcz/go-118-fuzz-build v0.0.0-20231105174938-2b5cbb29f3e2/go.mod h1:gCLVsLfv1egrcZu+GoJATN5ts75F2s62ih/457eWzOw= +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= +github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow= +github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM= +github.com/Microsoft/hcsshim v0.11.4 h1:68vKo2VN8DE9AdN4tnkWnmdhqdbpUFM8OF3Airm7fz8= +github.com/Microsoft/hcsshim v0.11.4/go.mod h1:smjE4dvqPX9Zldna+t5FG3rnoHhaB7QYxPRqGcpAD9w= +github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= +github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= +github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= +github.com/cilium/ebpf v0.10.0 h1:nk5HPMeoBXtOzbkZBWym+ZWq1GIiHUsBFXxwewXAHLQ= +github.com/cilium/ebpf v0.10.0/go.mod h1:DPiVdY/kT534dgc9ERmvP8mWA+9gvwgKfRvk4nNWnoE= +github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= +github.com/containerd/cgroups v1.1.0 h1:v8rEWFl6EoqHB+swVNjVoCJE8o3jX7e8nqBGPLaDFBM= +github.com/containerd/cgroups v1.1.0/go.mod h1:6ppBcbh/NOOUU+dMKrykgaBnK9lCIBxHqJDGwsa1mIw= +github.com/containerd/cgroups/v3 v3.0.2 h1:f5WFqIVSgo5IZmtTT3qVBo6TzI1ON6sycSBKkymb9L0= +github.com/containerd/cgroups/v3 v3.0.2/go.mod h1:JUgITrzdFqp42uI2ryGA+ge0ap/nxzYgkGmIcetmErE= +github.com/containerd/console v1.0.1/go.mod h1:XUsP6YE/mKtz6bxc+I8UiKKTP04qjQL4qcS3XoQ5xkw= +github.com/containerd/console v1.0.3 h1:lIr7SlA5PxZyMV30bDW0MGbiOPXwc63yRuCP0ARubLw= +github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U= +github.com/containerd/containerd v1.7.11 h1:lfGKw3eU35sjV0aG2eYZTiwFEY1pCzxdzicHP3SZILw= +github.com/containerd/containerd v1.7.11/go.mod h1:5UluHxHTX2rdvYuZ5OJTC5m/KJNs0Zs9wVoJm9zf5ZE= +github.com/containerd/continuity v0.4.3 h1:6HVkalIp+2u1ZLH1J/pYX2oBVXlJZvh1X1A7bEZ9Su8= +github.com/containerd/continuity v0.4.3/go.mod h1:F6PTNCKepoxEaXLQp3wDAjygEnImnZ/7o4JzpodfroQ= +github.com/containerd/fifo v1.1.0 h1:4I2mbh5stb1u6ycIABlBw9zgtlK8viPI9QkQNRQEEmY= +github.com/containerd/fifo v1.1.0/go.mod h1:bmC4NWMbXlt2EZ0Hc7Fx7QzTFxgPID13eH0Qu+MAb2o= +github.com/containerd/go-cni v1.1.9 h1:ORi7P1dYzCwVM6XPN4n3CbkuOx/NZ2DOqy+SHRdo9rU= +github.com/containerd/go-cni v1.1.9/go.mod h1:XYrZJ1d5W6E2VOvjffL3IZq0Dz6bsVlERHbekNK90PM= +github.com/containerd/go-runc v1.0.0 h1:oU+lLv1ULm5taqgV/CJivypVODI4SUz1znWjv3nNYS0= +github.com/containerd/go-runc v1.0.0/go.mod h1:cNU0ZbCgCQVZK4lgG3P+9tn9/PaJNmoDXPpoJhDR+Ok= +github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= +github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo= +github.com/containerd/nydus-snapshotter v0.13.4 h1:veTQCgpfRGdPD031dVNGlU+vK/W9vBhZNlMWR9oupiQ= +github.com/containerd/nydus-snapshotter v0.13.4/go.mod h1:y41TM10lXhskfHHvge7kf1VucM4CeWwsCmQ5Q51UJrc= +github.com/containerd/ttrpc v1.2.2 h1:9vqZr0pxwOF5koz6N0N3kJ0zDHokrcPxIR/ZR2YFtOs= +github.com/containerd/ttrpc v1.2.2/go.mod h1:sIT6l32Ph/H9cvnJsfXM5drIVzTr5A2flTf1G5tYZak= +github.com/containerd/typeurl/v2 v2.1.1 h1:3Q4Pt7i8nYwy2KmQWIw2+1hTvwTE/6w9FqcttATPO/4= +github.com/containerd/typeurl/v2 v2.1.1/go.mod h1:IDp2JFvbwZ31H8dQbEIY7sDl2L3o3HZj1hsSQlywkQ0= +github.com/containernetworking/cni v1.1.2 h1:wtRGZVv7olUHMOqouPpn3cXJWpJgM6+EUl31EQbXALQ= +github.com/containernetworking/cni v1.1.2/go.mod h1:sDpYKmGVENF3s6uvMvGgldDWeG8dMxakj/u+i9ht9vw= +github.com/containernetworking/plugins v1.2.0 h1:SWgg3dQG1yzUo4d9iD8cwSVh1VqI+bP7mkPDoSfP9VU= +github.com/containernetworking/plugins v1.2.0/go.mod h1:/VjX4uHecW5vVimFa1wkG4s+r/s9qIfPdqlLF4TW8c4= +github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs= +github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= +github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/cpuguy83/go-md2man/v2 v2.0.3 h1:qMCsGGgs+MAzDFyp9LpAe1Lqy/fY/qCovCm0qnXZOBM= +github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c h1:+pKlWGMw7gf6bQ+oDZB4KHQFypsfjYlq/C4rfL7D3g8= +github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c/go.mod h1:Uw6UezgYA44ePAFQYUehOuCzmy5zmg/+nl2ZfMWGkpA= +github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= +github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= +github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= +github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= +github.com/felixge/httpsnoop v1.0.3 h1:s/nj+GCswXYzN5v2DpNMuMQYe+0DDwt5WVCU6CWBdXk= +github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/frankban/quicktest v1.14.4 h1:g2rn0vABPOOXmZUj+vbmUp0lPoXEMuhTpIluN0XL9UY= +github.com/frankban/quicktest v1.14.4/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= +github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= +github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= +github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY= +github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= +github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE= +github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= +github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= +github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= +github.com/godbus/dbus/v5 v5.1.0 h1:4KLkAxT3aOY8Li4FRJe/KvhoNFFxo0m6fNuFUO8QJUk= +github.com/godbus/dbus/v5 v5.1.0/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= +github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= +github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= +github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= +github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= +github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= +github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= +github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= +github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 h1:K6RDEckDVWvDI9JAJYCmNdQXq6neHJOYx3V6jnqNEec= +github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU= +github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= +github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I= +github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= +github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= +github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= +github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= +github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= +github.com/intel/goresctrl v0.3.0 h1:K2D3GOzihV7xSBedGxONSlaw/un1LZgWsc9IfqipN4c= +github.com/intel/goresctrl v0.3.0/go.mod h1:fdz3mD85cmP9sHD8JUlrNWAxvwM86CrbmVXltEKd7zk= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4= +github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/moby/locker v1.0.1 h1:fOXqR41zeveg4fFODix+1Ch4mj/gT0NE1XJbp/epuBg= +github.com/moby/locker v1.0.1/go.mod h1:S7SDdo5zpBK84bzzVlKr2V0hz+7x9hWbYC/kq7oQppc= +github.com/moby/sys/mountinfo v0.7.1 h1:/tTvQaSJRr2FshkhXiIpux6fQ2Zvc4j7tAhMTStAG2g= +github.com/moby/sys/mountinfo v0.7.1/go.mod h1:IJb6JQeOklcdMU9F5xQ8ZALD+CUr5VlGpwtX+VE0rpI= +github.com/moby/sys/sequential v0.5.0 h1:OPvI35Lzn9K04PBbCLW0g4LcFAJgHsvXsRyewg5lXtc= +github.com/moby/sys/sequential v0.5.0/go.mod h1:tH2cOOs5V9MlPiXcQzRC+eEyab644PWKGRYaaV5ZZlo= +github.com/moby/sys/signal v0.7.0 h1:25RW3d5TnQEoKvRbEKUGay6DCQ46IxAVTT9CUMgmsSI= +github.com/moby/sys/signal v0.7.0/go.mod h1:GQ6ObYZfqacOwTtlXvcmh9A26dVRul/hbOZn88Kg8Tg= +github.com/moby/sys/symlink v0.2.0 h1:tk1rOM+Ljp0nFmfOIBtlV3rTDlWOwFRhjEeAhZB0nZc= +github.com/moby/sys/symlink v0.2.0/go.mod h1:7uZVF2dqJjG/NsClqul95CqKOBRQyYSNnJ6BMgR/gFs= +github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= +github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= +github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= +github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= +github.com/onsi/ginkgo v1.16.4/go.mod h1:dX+/inL/fNMqNlz0e9LfyB9TswhZpCVdJM/Z6Vvnwo0= +github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= +github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU= +github.com/onsi/ginkgo/v2 v2.1.3/go.mod h1:vw5CSIxN1JObi/U8gcbwft7ZxR2dgaR70JSE3/PpL4c= +github.com/onsi/ginkgo/v2 v2.9.4 h1:xR7vG4IXt5RWx6FfIjyAtsoMAtnc3C/rFXBBd2AjZwE= +github.com/onsi/ginkgo/v2 v2.9.4/go.mod h1:gCQYp2Q+kSoIj7ykSVb9nskRSsR6PUj4AiLywzIhbKM= +github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= +github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= +github.com/onsi/gomega v1.17.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY= +github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE= +github.com/onsi/gomega v1.27.6/go.mod h1:PIQNjfQwkP3aQAH7lf7j87O/5FiNr+ZR8+ipb+qQlhg= +github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= +github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= +github.com/opencontainers/image-spec v1.1.0-rc5 h1:Ygwkfw9bpDvs+c9E34SdgGOj41dX/cbdlwvlWt0pnFI= +github.com/opencontainers/image-spec v1.1.0-rc5/go.mod h1:X4pATf0uXsnn3g5aiGIsVnJBR4mxhKzfwmvK/B2NTm8= +github.com/opencontainers/runc v1.1.12 h1:BOIssBaW1La0/qbNZHXOOa71dZfZEQOzW7dqQf3phss= +github.com/opencontainers/runc v1.1.12/go.mod h1:S+lQwSfncpBha7XTy/5lBwWgm5+y5Ma/O44Ekby9FK8= +github.com/opencontainers/runtime-spec v1.0.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= +github.com/opencontainers/runtime-spec v1.1.0 h1:HHUyrt9mwHUjtasSbXSMvs4cyFxh+Bll4AjJ9odEGpg= +github.com/opencontainers/runtime-spec v1.1.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= +github.com/opencontainers/selinux v1.11.0 h1:+5Zbo97w3Lbmb3PeqQtpmTkMwsW5nRI3YaLpt7tQ7oU= +github.com/opencontainers/selinux v1.11.0/go.mod h1:E5dMC3VPuVvVHDYmi78qvhJp8+M586T4DlDRYpFkyec= +github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8= +github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= +github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo= +github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo= +github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= +github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= +github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= +github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= +github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= +github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/objx v0.5.1 h1:4VhoImhV/Bm0ToFkXFi8hXNXwpDRZ/ynw3amt82mzq0= +github.com/stretchr/objx v0.5.1/go.mod h1:/iHQpkQwBD6DLUmQ4pE+s1TXdob1mORJ4/UFdrifcy0= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/urfave/cli v1.22.14 h1:ebbhrRiGK2i4naQJr+1Xj92HXZCrK7MsyTS/ob3HnAk= +github.com/urfave/cli v1.22.14/go.mod h1:X0eDS6pD6Exaclxm99NJ3FiCDRED7vIHpx2mDOHLvkA= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= +go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0 h1:x8Z78aZx8cOF0+Kkazoc7lwUNMGy0LrzEMxTm4BbTxg= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0/go.mod h1:62CPTSry9QZtOaSsE3tOzhx6LzDhHnXJ6xHeMNNiM6Q= +go.opentelemetry.io/otel v1.21.0 h1:hzLeKBZEL7Okw2mGzZ0cc4k/A7Fta0uoPgaJCr8fsFc= +go.opentelemetry.io/otel v1.21.0/go.mod h1:QZzNPQPm1zLX4gZK4cMi+71eaorMSGT3A4znnUvNNEo= +go.opentelemetry.io/otel/metric v1.21.0 h1:tlYWfeo+Bocx5kLEloTjbcDwBuELRrIFxwdQ36PlJu4= +go.opentelemetry.io/otel/metric v1.21.0/go.mod h1:o1p3CA8nNHW8j5yuQLdc1eeqEaPfzug24uvsyIEJRWM= +go.opentelemetry.io/otel/trace v1.21.0 h1:WD9i5gzvoUPuXIXH24ZNBudiarZDKuekPqi/E8fpfLc= +go.opentelemetry.io/otel/trace v1.21.0/go.mod h1:LGbsEB0f9LGjN+OZaQQ26sohbOmiMR+BaslueVtS/qQ= +go.uber.org/goleak v1.2.1 h1:NBol2c7O1ZokfZ0LEU9K6Whx/KnwvepVetCUhtKja4A= +go.uber.org/goleak v1.2.1/go.mod h1:qlT2yGI9QafXHhZZLxlSuNsMw3FFLxBr+tBRlmO1xH4= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= +golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.14.0 h1:dGoOF9QVLYng8IHTm7BAyWqCqSheQ5pYWGhzW00YJr0= +golang.org/x/mod v0.14.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk= +golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= +golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= +golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE= +golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200916030750-2334cc1a136f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= +golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= +golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.16.1 h1:TLyB3WofjdOEepBHAU20JdNC1Zbg87elYofWYAY5oZA= +golang.org/x/tools v0.16.1/go.mod h1:kYVVN6I1mBNoB1OX+noeBjbRk4IUEPa7JJ+TJMEooJ0= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= +google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= +google.golang.org/genproto v0.0.0-20240102182953-50ed04b92917 h1:nz5NESFLZbJGPFxDT/HCn+V1mZ8JGNoY4nUpmW/Y2eg= +google.golang.org/genproto v0.0.0-20240102182953-50ed04b92917/go.mod h1:pZqR+glSb11aJ+JQcczCvgf47+duRuzNSKqE8YAQnV0= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240102182953-50ed04b92917 h1:6G8oQ016D88m1xAKljMlBOOGWDZkes4kMhgGFlf8WcQ= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240102182953-50ed04b92917/go.mod h1:xtjpI3tXFPP051KaWnhvxkiubL/6dJ18vLVf7q2pTOU= +google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= +google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= +google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= +google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= +google.golang.org/grpc v1.60.1 h1:26+wFr+cNqSGFcOXcabYC0lUVJVRa2Sb2ortSK7VrEU= +google.golang.org/grpc v1.60.1/go.mod h1:OlCHIeLYqSSsLi6i49B5QGdzaMZK9+M7LXN2FKz4eGM= +google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= +google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= +google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= +google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= +google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= +google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.32.0 h1:pPC6BG5ex8PDFnkbrGU3EixyhKcQ2aDuBS36lqK/C7I= +google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +k8s.io/apimachinery v0.28.3 h1:B1wYx8txOaCQG0HmYF6nbpU8dg6HvA06x5tEffvOe7A= +k8s.io/apimachinery v0.28.3/go.mod h1:uQTKmIqs+rAYaq+DFaoD2X7pcjLOqbQX2AOiO0nIpb8= +sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo= +sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8= diff --git a/contrib/docker-nydus-graphdriver/README.md b/contrib/docker-nydus-graphdriver/README.md index aba4afd719b..f646cf103cf 100644 --- a/contrib/docker-nydus-graphdriver/README.md +++ b/contrib/docker-nydus-graphdriver/README.md @@ -1,3 +1,3 @@ -# Docker Nydus Graph Driver - -Moved to [docker-nydus-graphdriver](https://github.com/nydusaccelerator/docker-nydus-graphdriver). +# Docker Nydus Graph Driver + +Moved to [docker-nydus-graphdriver](https://github.com/nydusaccelerator/docker-nydus-graphdriver). diff --git a/contrib/kernel-patches/0001-cachefiles-optimize-on-demand-IO-path-with-buffer-IO.patch b/contrib/kernel-patches/0001-cachefiles-optimize-on-demand-IO-path-with-buffer-IO.patch index 586496f4b56..2dca98f90eb 100644 --- a/contrib/kernel-patches/0001-cachefiles-optimize-on-demand-IO-path-with-buffer-IO.patch +++ b/contrib/kernel-patches/0001-cachefiles-optimize-on-demand-IO-path-with-buffer-IO.patch @@ -1,132 +1,132 @@ -From 304939a8dca54edd9833b27f1ca48435ade2ed49 Mon Sep 17 00:00:00 2001 -From: Xin Yin -Date: Thu, 8 Sep 2022 10:52:08 +0800 -Subject: [PATCH] cachefiles: optimize on-demand IO path with buffer IO - -The cachefiles framework use dio for local cache files filling -and reading, which may affects the performance for on-demand IO -path. - -Change to use buffer IO for cache files filling, and first try -to find data in the pagecache during cache files reading. After -the pagecache for cache files is recycled, we will not suffer from -double caching issue. - -Signed-off-by: Xin Yin ---- - fs/cachefiles/io.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- - 1 file changed, 72 insertions(+), 2 deletions(-) - -diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c -index 000a28f46e59..636491806ff8 100644 ---- a/fs/cachefiles/io.c -+++ b/fs/cachefiles/io.c -@@ -11,9 +11,11 @@ - #include - #include - #include -+#include - #include - #include "internal.h" - -+ - struct cachefiles_kiocb { - struct kiocb iocb; - refcount_t ki_refcnt; -@@ -67,6 +69,60 @@ static void cachefiles_read_complete(struct kiocb *iocb, long ret) - cachefiles_put_kiocb(ki); - } - -+static void cachefiles_page_copy(struct cachefiles_kiocb *ki, struct iov_iter *iter) -+{ -+ struct address_space *mapping = ki->iocb.ki_filp->f_mapping; -+ struct kiocb *iocb = &ki->iocb; -+ loff_t isize = i_size_read(mapping->host); -+ loff_t end = min_t(loff_t, isize, iocb->ki_pos + iov_iter_count(iter)); -+ struct pagevec pv; -+ pgoff_t index; -+ unsigned int i; -+ bool writably_mapped; -+ int error = 0; -+ -+ while (iocb->ki_pos < end && !error) { -+ index = iocb->ki_pos >> PAGE_SHIFT; -+ pv.nr = find_get_pages_contig(mapping, index, PAGEVEC_SIZE, pv.pages); -+ -+ if (pv.nr == 0) -+ break; -+ -+ writably_mapped = mapping_writably_mapped(mapping); -+ -+ for (i = 0; i < pv.nr; i++) { -+ struct page *page = pv.pages[i]; -+ unsigned int offset = iocb->ki_pos & ~PAGE_MASK; -+ unsigned int bytes = min_t(loff_t, end - iocb->ki_pos, -+ PAGE_SIZE - offset); -+ unsigned int copied; -+ -+ if (page->index * PAGE_SIZE >= end) -+ break; -+ -+ if (!PageUptodate(page)) { -+ error = -EFAULT; -+ break; -+ } -+ -+ if (writably_mapped) -+ flush_dcache_page(page); -+ -+ copied = copy_page_to_iter(page, offset, bytes, iter); -+ -+ iocb->ki_pos += copied; -+ if (copied < bytes) { -+ error = -EFAULT; -+ break; -+ } -+ } -+ -+ for (i = 0; i < pv.nr; i++) -+ put_page(pv.pages[i]); -+ } -+ -+} -+ - /* - * Initiate a read from the cache. - */ -@@ -155,8 +211,19 @@ static int cachefiles_read(struct netfs_cache_resources *cres, - trace_cachefiles_read(object, file_inode(file), ki->iocb.ki_pos, len - skipped); - old_nofs = memalloc_nofs_save(); - ret = cachefiles_inject_read_error(); -- if (ret == 0) -+ if (ret == 0) { -+ // for ondemand mode try to fill iter form pagecache first -+ if (cachefiles_in_ondemand_mode(object->volume->cache)) { -+ cachefiles_page_copy(ki, iter); -+ if (!iov_iter_count(iter)) { -+ memalloc_nofs_restore(old_nofs); -+ ki->was_async = false; -+ cachefiles_read_complete(&ki->iocb, len - skipped); -+ goto in_progress; -+ } -+ } - ret = vfs_iocb_iter_read(file, &ki->iocb, iter); -+ } - memalloc_nofs_restore(old_nofs); - switch (ret) { - case -EIOCBQUEUED: -@@ -308,7 +375,10 @@ int __cachefiles_write(struct cachefiles_object *object, - refcount_set(&ki->ki_refcnt, 2); - ki->iocb.ki_filp = file; - ki->iocb.ki_pos = start_pos; -- ki->iocb.ki_flags = IOCB_DIRECT | IOCB_WRITE; -+ if (cachefiles_in_ondemand_mode(cache)) -+ ki->iocb.ki_flags = IOCB_WRITE; -+ else -+ ki->iocb.ki_flags = IOCB_DIRECT | IOCB_WRITE; - ki->iocb.ki_ioprio = get_current_ioprio(); - ki->object = object; - ki->start = start_pos; --- -2.11.0 - +From 304939a8dca54edd9833b27f1ca48435ade2ed49 Mon Sep 17 00:00:00 2001 +From: Xin Yin +Date: Thu, 8 Sep 2022 10:52:08 +0800 +Subject: [PATCH] cachefiles: optimize on-demand IO path with buffer IO + +The cachefiles framework use dio for local cache files filling +and reading, which may affects the performance for on-demand IO +path. + +Change to use buffer IO for cache files filling, and first try +to find data in the pagecache during cache files reading. After +the pagecache for cache files is recycled, we will not suffer from +double caching issue. + +Signed-off-by: Xin Yin +--- + fs/cachefiles/io.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 72 insertions(+), 2 deletions(-) + +diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c +index 000a28f46e59..636491806ff8 100644 +--- a/fs/cachefiles/io.c ++++ b/fs/cachefiles/io.c +@@ -11,9 +11,11 @@ + #include + #include + #include ++#include + #include + #include "internal.h" + ++ + struct cachefiles_kiocb { + struct kiocb iocb; + refcount_t ki_refcnt; +@@ -67,6 +69,60 @@ static void cachefiles_read_complete(struct kiocb *iocb, long ret) + cachefiles_put_kiocb(ki); + } + ++static void cachefiles_page_copy(struct cachefiles_kiocb *ki, struct iov_iter *iter) ++{ ++ struct address_space *mapping = ki->iocb.ki_filp->f_mapping; ++ struct kiocb *iocb = &ki->iocb; ++ loff_t isize = i_size_read(mapping->host); ++ loff_t end = min_t(loff_t, isize, iocb->ki_pos + iov_iter_count(iter)); ++ struct pagevec pv; ++ pgoff_t index; ++ unsigned int i; ++ bool writably_mapped; ++ int error = 0; ++ ++ while (iocb->ki_pos < end && !error) { ++ index = iocb->ki_pos >> PAGE_SHIFT; ++ pv.nr = find_get_pages_contig(mapping, index, PAGEVEC_SIZE, pv.pages); ++ ++ if (pv.nr == 0) ++ break; ++ ++ writably_mapped = mapping_writably_mapped(mapping); ++ ++ for (i = 0; i < pv.nr; i++) { ++ struct page *page = pv.pages[i]; ++ unsigned int offset = iocb->ki_pos & ~PAGE_MASK; ++ unsigned int bytes = min_t(loff_t, end - iocb->ki_pos, ++ PAGE_SIZE - offset); ++ unsigned int copied; ++ ++ if (page->index * PAGE_SIZE >= end) ++ break; ++ ++ if (!PageUptodate(page)) { ++ error = -EFAULT; ++ break; ++ } ++ ++ if (writably_mapped) ++ flush_dcache_page(page); ++ ++ copied = copy_page_to_iter(page, offset, bytes, iter); ++ ++ iocb->ki_pos += copied; ++ if (copied < bytes) { ++ error = -EFAULT; ++ break; ++ } ++ } ++ ++ for (i = 0; i < pv.nr; i++) ++ put_page(pv.pages[i]); ++ } ++ ++} ++ + /* + * Initiate a read from the cache. + */ +@@ -155,8 +211,19 @@ static int cachefiles_read(struct netfs_cache_resources *cres, + trace_cachefiles_read(object, file_inode(file), ki->iocb.ki_pos, len - skipped); + old_nofs = memalloc_nofs_save(); + ret = cachefiles_inject_read_error(); +- if (ret == 0) ++ if (ret == 0) { ++ // for ondemand mode try to fill iter form pagecache first ++ if (cachefiles_in_ondemand_mode(object->volume->cache)) { ++ cachefiles_page_copy(ki, iter); ++ if (!iov_iter_count(iter)) { ++ memalloc_nofs_restore(old_nofs); ++ ki->was_async = false; ++ cachefiles_read_complete(&ki->iocb, len - skipped); ++ goto in_progress; ++ } ++ } + ret = vfs_iocb_iter_read(file, &ki->iocb, iter); ++ } + memalloc_nofs_restore(old_nofs); + switch (ret) { + case -EIOCBQUEUED: +@@ -308,7 +375,10 @@ int __cachefiles_write(struct cachefiles_object *object, + refcount_set(&ki->ki_refcnt, 2); + ki->iocb.ki_filp = file; + ki->iocb.ki_pos = start_pos; +- ki->iocb.ki_flags = IOCB_DIRECT | IOCB_WRITE; ++ if (cachefiles_in_ondemand_mode(cache)) ++ ki->iocb.ki_flags = IOCB_WRITE; ++ else ++ ki->iocb.ki_flags = IOCB_DIRECT | IOCB_WRITE; + ki->iocb.ki_ioprio = get_current_ioprio(); + ki->object = object; + ki->start = start_pos; +-- +2.11.0 + diff --git a/contrib/nydus-backend-proxy/Cargo.lock b/contrib/nydus-backend-proxy/Cargo.lock index eb5d4520f6d..39c40699e71 100644 --- a/contrib/nydus-backend-proxy/Cargo.lock +++ b/contrib/nydus-backend-proxy/Cargo.lock @@ -1,1663 +1,1663 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "addr2line" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" -dependencies = [ - "gimli", -] - -[[package]] -name = "adler" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" - -[[package]] -name = "aho-corasick" -version = "1.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" -dependencies = [ - "memchr", -] - -[[package]] -name = "anstream" -version = "0.6.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d96bd03f33fe50a863e394ee9718a706f988b9079b20c3784fb726e7678b62fb" -dependencies = [ - "anstyle", - "anstyle-parse", - "anstyle-query", - "anstyle-wincon", - "colorchoice", - "utf8parse", -] - -[[package]] -name = "anstyle" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" - -[[package]] -name = "anstyle-parse" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" -dependencies = [ - "utf8parse", -] - -[[package]] -name = "anstyle-query" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" -dependencies = [ - "windows-sys 0.52.0", -] - -[[package]] -name = "anstyle-wincon" -version = "3.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" -dependencies = [ - "anstyle", - "windows-sys 0.52.0", -] - -[[package]] -name = "async-stream" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd56dd203fef61ac097dd65721a419ddccb106b2d2b70ba60a6b529f03961a51" -dependencies = [ - "async-stream-impl", - "futures-core", - "pin-project-lite", -] - -[[package]] -name = "async-stream-impl" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "async-trait" -version = "0.1.79" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a507401cad91ec6a857ed5513a2073c82a9b9048762b885bb98655b306964681" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "atomic" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c59bdb34bc650a32731b31bd8f0829cc15d24a708ee31559e0bb34f2bc320cba" - -[[package]] -name = "atomic" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d818003e740b63afc82337e3160717f4f63078720a810b7b903e70a5d1d2994" -dependencies = [ - "bytemuck", -] - -[[package]] -name = "autocfg" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80" - -[[package]] -name = "backtrace" -version = "0.3.71" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26b05800d2e817c8b3b4b54abd461726265fa9789ae34330622f2db9ee696f9d" -dependencies = [ - "addr2line", - "cc", - "cfg-if", - "libc", - "miniz_oxide", - "object", - "rustc-demangle", -] - -[[package]] -name = "binascii" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "383d29d513d8764dcdc42ea295d979eb99c3c9f00607b3692cf68a431f7dca72" - -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - -[[package]] -name = "bitflags" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" - -[[package]] -name = "bytemuck" -version = "1.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d6d68c57235a3a081186990eca2867354726650f42f7516ca50c28d6281fd15" - -[[package]] -name = "bytes" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" - -[[package]] -name = "cc" -version = "1.0.92" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2678b2e3449475e95b0aa6f9b506a28e61b3dc8996592b983695e8ebb58a8b41" - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "cfg_aliases" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" - -[[package]] -name = "clap" -version = "4.4.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e578d6ec4194633722ccf9544794b71b1385c3c027efe0c55db226fc880865c" -dependencies = [ - "clap_builder", -] - -[[package]] -name = "clap_builder" -version = "4.4.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4df4df40ec50c46000231c914968278b1eb05098cf8f1b3a518a95030e71d1c7" -dependencies = [ - "anstream", - "anstyle", - "clap_lex", - "strsim", -] - -[[package]] -name = "clap_lex" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1" - -[[package]] -name = "colorchoice" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" - -[[package]] -name = "cookie" -version = "0.18.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ddef33a339a91ea89fb53151bd0a4689cfce27055c291dfa69945475d22c747" -dependencies = [ - "percent-encoding", - "time", - "version_check", -] - -[[package]] -name = "deranged" -version = "0.3.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" -dependencies = [ - "powerfmt", -] - -[[package]] -name = "devise" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6eacefd3f541c66fc61433d65e54e0e46e0a029a819a7dbbc7a7b489e8a85f8" -dependencies = [ - "devise_codegen", - "devise_core", -] - -[[package]] -name = "devise_codegen" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8cf4b8dd484ede80fd5c547592c46c3745a617c8af278e2b72bea86b2dfed6" -dependencies = [ - "devise_core", - "quote", -] - -[[package]] -name = "devise_core" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35b50dba0afdca80b187392b24f2499a88c336d5a8493e4b4ccfb608708be56a" -dependencies = [ - "bitflags 2.5.0", - "proc-macro2", - "proc-macro2-diagnostics", - "quote", - "syn", -] - -[[package]] -name = "either" -version = "1.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" - -[[package]] -name = "encoding_rs" -version = "0.8.33" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "equivalent" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" - -[[package]] -name = "errno" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" -dependencies = [ - "libc", - "windows-sys 0.52.0", -] - -[[package]] -name = "fastrand" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "658bd65b1cf4c852a3cc96f18a8ce7b5640f6b703f905c7d74532294c2a63984" - -[[package]] -name = "figment" -version = "0.10.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7270677e7067213e04f323b55084586195f18308cd7546cfac9f873344ccceb6" -dependencies = [ - "atomic 0.6.0", - "pear", - "serde", - "toml", - "uncased", - "version_check", -] - -[[package]] -name = "fnv" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" - -[[package]] -name = "futures" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" -dependencies = [ - "futures-channel", - "futures-core", - "futures-io", - "futures-sink", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-channel" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" -dependencies = [ - "futures-core", - "futures-sink", -] - -[[package]] -name = "futures-core" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" - -[[package]] -name = "futures-io" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" - -[[package]] -name = "futures-sink" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" - -[[package]] -name = "futures-task" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" - -[[package]] -name = "futures-util" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" -dependencies = [ - "futures-channel", - "futures-core", - "futures-io", - "futures-sink", - "futures-task", - "memchr", - "pin-project-lite", - "pin-utils", - "slab", -] - -[[package]] -name = "generator" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cc16584ff22b460a382b7feec54b23d2908d858152e5739a120b949293bd74e" -dependencies = [ - "cc", - "libc", - "log", - "rustversion", - "windows", -] - -[[package]] -name = "getrandom" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94b22e06ecb0110981051723910cbf0b5f5e09a2062dd7663334ee79a9d1286c" -dependencies = [ - "cfg-if", - "libc", - "wasi", -] - -[[package]] -name = "gimli" -version = "0.28.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" - -[[package]] -name = "glob" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" - -[[package]] -name = "h2" -version = "0.3.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8" -dependencies = [ - "bytes", - "fnv", - "futures-core", - "futures-sink", - "futures-util", - "http", - "indexmap", - "slab", - "tokio", - "tokio-util", - "tracing", -] - -[[package]] -name = "hashbrown" -version = "0.14.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" - -[[package]] -name = "hermit-abi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" - -[[package]] -name = "http" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" -dependencies = [ - "bytes", - "fnv", - "itoa", -] - -[[package]] -name = "http-body" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" -dependencies = [ - "bytes", - "http", - "pin-project-lite", -] - -[[package]] -name = "http-range" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21dec9db110f5f872ed9699c3ecf50cf16f423502706ba5c72462e28d3157573" - -[[package]] -name = "httparse" -version = "1.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" - -[[package]] -name = "httpdate" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" - -[[package]] -name = "hyper" -version = "0.14.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf96e135eb83a2a8ddf766e426a841d8ddd7449d5f00d34ea02b41d2f19eef80" -dependencies = [ - "bytes", - "futures-channel", - "futures-core", - "futures-util", - "h2", - "http", - "http-body", - "httparse", - "httpdate", - "itoa", - "pin-project-lite", - "socket2", - "tokio", - "tower-service", - "tracing", - "want", -] - -[[package]] -name = "indexmap" -version = "2.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" -dependencies = [ - "equivalent", - "hashbrown", - "serde", -] - -[[package]] -name = "inlinable_string" -version = "0.1.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb" - -[[package]] -name = "is-terminal" -version = "0.4.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" -dependencies = [ - "hermit-abi", - "libc", - "windows-sys 0.52.0", -] - -[[package]] -name = "itoa" -version = "1.0.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" - -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" - -[[package]] -name = "libc" -version = "0.2.153" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" - -[[package]] -name = "linux-raw-sys" -version = "0.4.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" - -[[package]] -name = "lock_api" -version = "0.4.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" -dependencies = [ - "autocfg", - "scopeguard", -] - -[[package]] -name = "log" -version = "0.4.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" - -[[package]] -name = "loom" -version = "0.5.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff50ecb28bb86013e935fb6683ab1f6d3a20016f123c76fd4c27470076ac30f5" -dependencies = [ - "cfg-if", - "generator", - "scoped-tls", - "serde", - "serde_json", - "tracing", - "tracing-subscriber", -] - -[[package]] -name = "matchers" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" -dependencies = [ - "regex-automata 0.1.10", -] - -[[package]] -name = "memchr" -version = "2.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" - -[[package]] -name = "mime" -version = "0.3.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" - -[[package]] -name = "miniz_oxide" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" -dependencies = [ - "adler", -] - -[[package]] -name = "mio" -version = "0.8.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" -dependencies = [ - "libc", - "wasi", - "windows-sys 0.48.0", -] - -[[package]] -name = "multer" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01acbdc23469fd8fe07ab135923371d5f5a422fbf9c522158677c8eb15bc51c2" -dependencies = [ - "bytes", - "encoding_rs", - "futures-util", - "http", - "httparse", - "log", - "memchr", - "mime", - "spin", - "tokio", - "tokio-util", - "version_check", -] - -[[package]] -name = "nix" -version = "0.28.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4" -dependencies = [ - "bitflags 2.5.0", - "cfg-if", - "cfg_aliases", - "libc", -] - -[[package]] -name = "nu-ansi-term" -version = "0.46.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" -dependencies = [ - "overload", - "winapi", -] - -[[package]] -name = "num-conv" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" - -[[package]] -name = "num_cpus" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" -dependencies = [ - "hermit-abi", - "libc", -] - -[[package]] -name = "nydus-backend-proxy" -version = "0.2.0" -dependencies = [ - "clap", - "http-range", - "lazy_static", - "nix", - "once_cell", - "rocket", -] - -[[package]] -name = "object" -version = "0.32.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" -dependencies = [ - "memchr", -] - -[[package]] -name = "once_cell" -version = "1.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" - -[[package]] -name = "overload" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" - -[[package]] -name = "parking_lot" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" -dependencies = [ - "lock_api", - "parking_lot_core", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall", - "smallvec", - "windows-targets 0.48.5", -] - -[[package]] -name = "pear" -version = "0.2.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdeeaa00ce488657faba8ebf44ab9361f9365a97bd39ffb8a60663f57ff4b467" -dependencies = [ - "inlinable_string", - "pear_codegen", - "yansi", -] - -[[package]] -name = "pear_codegen" -version = "0.2.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bab5b985dc082b345f812b7df84e1bef27e7207b39e448439ba8bd69c93f147" -dependencies = [ - "proc-macro2", - "proc-macro2-diagnostics", - "quote", - "syn", -] - -[[package]] -name = "percent-encoding" -version = "2.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" - -[[package]] -name = "pin-project-lite" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" - -[[package]] -name = "pin-utils" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" - -[[package]] -name = "powerfmt" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" - -[[package]] -name = "ppv-lite86" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" - -[[package]] -name = "proc-macro2" -version = "1.0.79" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "proc-macro2-diagnostics" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "version_check", - "yansi", -] - -[[package]] -name = "quote" -version = "1.0.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha", - "rand_core", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom", -] - -[[package]] -name = "redox_syscall" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" -dependencies = [ - "bitflags 1.3.2", -] - -[[package]] -name = "ref-cast" -version = "1.0.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4846d4c50d1721b1a3bef8af76924eef20d5e723647333798c1b519b3a9473f" -dependencies = [ - "ref-cast-impl", -] - -[[package]] -name = "ref-cast-impl" -version = "1.0.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fddb4f8d99b0a2ebafc65a87a69a7b9875e4b1ae1f00db265d300ef7f28bccc" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "regex" -version = "1.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" -dependencies = [ - "aho-corasick", - "memchr", - "regex-automata 0.4.6", - "regex-syntax 0.8.3", -] - -[[package]] -name = "regex-automata" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" -dependencies = [ - "regex-syntax 0.6.29", -] - -[[package]] -name = "regex-automata" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax 0.8.3", -] - -[[package]] -name = "regex-syntax" -version = "0.6.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" - -[[package]] -name = "regex-syntax" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" - -[[package]] -name = "rocket" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e7bb57ccb26670d73b6a47396c83139447b9e7878cab627fdfe9ea8da489150" -dependencies = [ - "async-stream", - "async-trait", - "atomic 0.5.3", - "binascii", - "bytes", - "either", - "figment", - "futures", - "indexmap", - "log", - "memchr", - "multer", - "num_cpus", - "parking_lot", - "pin-project-lite", - "rand", - "ref-cast", - "rocket_codegen", - "rocket_http", - "serde", - "state", - "tempfile", - "time", - "tokio", - "tokio-stream", - "tokio-util", - "ubyte", - "version_check", - "yansi", -] - -[[package]] -name = "rocket_codegen" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2238066abf75f21be6cd7dc1a09d5414a671f4246e384e49fe3f8a4936bd04c" -dependencies = [ - "devise", - "glob", - "indexmap", - "proc-macro2", - "quote", - "rocket_http", - "syn", - "unicode-xid", - "version_check", -] - -[[package]] -name = "rocket_http" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37a1663694d059fe5f943ea5481363e48050acedd241d46deb2e27f71110389e" -dependencies = [ - "cookie", - "either", - "futures", - "http", - "hyper", - "indexmap", - "log", - "memchr", - "pear", - "percent-encoding", - "pin-project-lite", - "ref-cast", - "serde", - "smallvec", - "stable-pattern", - "state", - "time", - "tokio", - "uncased", -] - -[[package]] -name = "rustc-demangle" -version = "0.1.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" - -[[package]] -name = "rustix" -version = "0.38.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65e04861e65f21776e67888bfbea442b3642beaa0138fdb1dd7a84a52dffdb89" -dependencies = [ - "bitflags 2.5.0", - "errno", - "libc", - "linux-raw-sys", - "windows-sys 0.52.0", -] - -[[package]] -name = "rustversion" -version = "1.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80af6f9131f277a45a3fba6ce8e2258037bb0477a67e610d3c1fe046ab31de47" - -[[package]] -name = "ryu" -version = "1.0.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" - -[[package]] -name = "scoped-tls" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" - -[[package]] -name = "scopeguard" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" - -[[package]] -name = "serde" -version = "1.0.197" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.197" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "serde_json" -version = "1.0.115" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12dc5c46daa8e9fdf4f5e71b6cf9a53f2487da0e86e55808e2d35539666497dd" -dependencies = [ - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "serde_spanned" -version = "0.6.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb3622f419d1296904700073ea6cc23ad690adbd66f13ea683df73298736f0c1" -dependencies = [ - "serde", -] - -[[package]] -name = "sharded-slab" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" -dependencies = [ - "lazy_static", -] - -[[package]] -name = "signal-hook-registry" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" -dependencies = [ - "libc", -] - -[[package]] -name = "slab" -version = "0.4.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" -dependencies = [ - "autocfg", -] - -[[package]] -name = "smallvec" -version = "1.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" - -[[package]] -name = "socket2" -version = "0.5.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05ffd9c0a93b7543e062e759284fcf5f5e3b098501104bfbdde4d404db792871" -dependencies = [ - "libc", - "windows-sys 0.52.0", -] - -[[package]] -name = "spin" -version = "0.9.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" - -[[package]] -name = "stable-pattern" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4564168c00635f88eaed410d5efa8131afa8d8699a612c80c455a0ba05c21045" -dependencies = [ - "memchr", -] - -[[package]] -name = "state" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b8c4a4445d81357df8b1a650d0d0d6fbbbfe99d064aa5e02f3e4022061476d8" -dependencies = [ - "loom", -] - -[[package]] -name = "strsim" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" - -[[package]] -name = "syn" -version = "2.0.58" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44cfb93f38070beee36b3fef7d4f5a16f27751d94b187b666a5cc5e9b0d30687" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "tempfile" -version = "3.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" -dependencies = [ - "cfg-if", - "fastrand", - "rustix", - "windows-sys 0.52.0", -] - -[[package]] -name = "thread_local" -version = "1.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" -dependencies = [ - "cfg-if", - "once_cell", -] - -[[package]] -name = "time" -version = "0.3.34" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8248b6521bb14bc45b4067159b9b6ad792e2d6d754d6c41fb50e29fefe38749" -dependencies = [ - "deranged", - "itoa", - "num-conv", - "powerfmt", - "serde", - "time-core", - "time-macros", -] - -[[package]] -name = "time-core" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" - -[[package]] -name = "time-macros" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ba3a3ef41e6672a2f0f001392bb5dcd3ff0a9992d618ca761a11c3121547774" -dependencies = [ - "num-conv", - "time-core", -] - -[[package]] -name = "tokio" -version = "1.37.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787" -dependencies = [ - "backtrace", - "bytes", - "libc", - "mio", - "num_cpus", - "pin-project-lite", - "signal-hook-registry", - "socket2", - "tokio-macros", - "windows-sys 0.48.0", -] - -[[package]] -name = "tokio-macros" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "tokio-stream" -version = "0.1.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "267ac89e0bec6e691e5813911606935d77c476ff49024f98abcea3e7b15e37af" -dependencies = [ - "futures-core", - "pin-project-lite", - "tokio", -] - -[[package]] -name = "tokio-util" -version = "0.7.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5419f34732d9eb6ee4c3578b7989078579b7f039cbbb9ca2c4da015749371e15" -dependencies = [ - "bytes", - "futures-core", - "futures-sink", - "pin-project-lite", - "tokio", - "tracing", -] - -[[package]] -name = "toml" -version = "0.8.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9dd1545e8208b4a5af1aa9bbd0b4cf7e9ea08fabc5d0a5c67fcaafa17433aa3" -dependencies = [ - "serde", - "serde_spanned", - "toml_datetime", - "toml_edit", -] - -[[package]] -name = "toml_datetime" -version = "0.6.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1" -dependencies = [ - "serde", -] - -[[package]] -name = "toml_edit" -version = "0.22.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e40bb779c5187258fd7aad0eb68cb8706a0a81fa712fbea808ab43c4b8374c4" -dependencies = [ - "indexmap", - "serde", - "serde_spanned", - "toml_datetime", - "winnow", -] - -[[package]] -name = "tower-service" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" - -[[package]] -name = "tracing" -version = "0.1.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" -dependencies = [ - "pin-project-lite", - "tracing-attributes", - "tracing-core", -] - -[[package]] -name = "tracing-attributes" -version = "0.1.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "tracing-core" -version = "0.1.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" -dependencies = [ - "once_cell", - "valuable", -] - -[[package]] -name = "tracing-log" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" -dependencies = [ - "log", - "once_cell", - "tracing-core", -] - -[[package]] -name = "tracing-subscriber" -version = "0.3.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" -dependencies = [ - "matchers", - "nu-ansi-term", - "once_cell", - "regex", - "sharded-slab", - "smallvec", - "thread_local", - "tracing", - "tracing-core", - "tracing-log", -] - -[[package]] -name = "try-lock" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" - -[[package]] -name = "ubyte" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f720def6ce1ee2fc44d40ac9ed6d3a59c361c80a75a7aa8e75bb9baed31cf2ea" -dependencies = [ - "serde", -] - -[[package]] -name = "uncased" -version = "0.9.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1b88fcfe09e89d3866a5c11019378088af2d24c3fbd4f0543f96b479ec90697" -dependencies = [ - "serde", - "version_check", -] - -[[package]] -name = "unicode-ident" -version = "1.0.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" - -[[package]] -name = "unicode-xid" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" - -[[package]] -name = "utf8parse" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" - -[[package]] -name = "valuable" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" - -[[package]] -name = "version_check" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" - -[[package]] -name = "want" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" -dependencies = [ - "try-lock", -] - -[[package]] -name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" -dependencies = [ - "windows-targets 0.48.5", -] - -[[package]] -name = "windows-sys" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" -dependencies = [ - "windows-targets 0.48.5", -] - -[[package]] -name = "windows-sys" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" -dependencies = [ - "windows-targets 0.52.4", -] - -[[package]] -name = "windows-targets" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" -dependencies = [ - "windows_aarch64_gnullvm 0.48.5", - "windows_aarch64_msvc 0.48.5", - "windows_i686_gnu 0.48.5", - "windows_i686_msvc 0.48.5", - "windows_x86_64_gnu 0.48.5", - "windows_x86_64_gnullvm 0.48.5", - "windows_x86_64_msvc 0.48.5", -] - -[[package]] -name = "windows-targets" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" -dependencies = [ - "windows_aarch64_gnullvm 0.52.4", - "windows_aarch64_msvc 0.52.4", - "windows_i686_gnu 0.52.4", - "windows_i686_msvc 0.52.4", - "windows_x86_64_gnu 0.52.4", - "windows_x86_64_gnullvm 0.52.4", - "windows_x86_64_msvc 0.52.4", -] - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" - -[[package]] -name = "windows_i686_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" - -[[package]] -name = "windows_i686_gnu" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" - -[[package]] -name = "windows_i686_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" - -[[package]] -name = "windows_i686_msvc" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" - -[[package]] -name = "winnow" -version = "0.6.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dffa400e67ed5a4dd237983829e66475f0a4a26938c4b04c21baede6262215b8" -dependencies = [ - "memchr", -] - -[[package]] -name = "yansi" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" -dependencies = [ - "is-terminal", -] +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "addr2line" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "anstream" +version = "0.6.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d96bd03f33fe50a863e394ee9718a706f988b9079b20c3784fb726e7678b62fb" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" + +[[package]] +name = "anstyle-parse" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" +dependencies = [ + "windows-sys 0.52.0", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" +dependencies = [ + "anstyle", + "windows-sys 0.52.0", +] + +[[package]] +name = "async-stream" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd56dd203fef61ac097dd65721a419ddccb106b2d2b70ba60a6b529f03961a51" +dependencies = [ + "async-stream-impl", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-stream-impl" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "async-trait" +version = "0.1.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a507401cad91ec6a857ed5513a2073c82a9b9048762b885bb98655b306964681" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "atomic" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c59bdb34bc650a32731b31bd8f0829cc15d24a708ee31559e0bb34f2bc320cba" + +[[package]] +name = "atomic" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d818003e740b63afc82337e3160717f4f63078720a810b7b903e70a5d1d2994" +dependencies = [ + "bytemuck", +] + +[[package]] +name = "autocfg" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80" + +[[package]] +name = "backtrace" +version = "0.3.71" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26b05800d2e817c8b3b4b54abd461726265fa9789ae34330622f2db9ee696f9d" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + +[[package]] +name = "binascii" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "383d29d513d8764dcdc42ea295d979eb99c3c9f00607b3692cf68a431f7dca72" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" + +[[package]] +name = "bytemuck" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d6d68c57235a3a081186990eca2867354726650f42f7516ca50c28d6281fd15" + +[[package]] +name = "bytes" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" + +[[package]] +name = "cc" +version = "1.0.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2678b2e3449475e95b0aa6f9b506a28e61b3dc8996592b983695e8ebb58a8b41" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "cfg_aliases" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" + +[[package]] +name = "clap" +version = "4.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e578d6ec4194633722ccf9544794b71b1385c3c027efe0c55db226fc880865c" +dependencies = [ + "clap_builder", +] + +[[package]] +name = "clap_builder" +version = "4.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4df4df40ec50c46000231c914968278b1eb05098cf8f1b3a518a95030e71d1c7" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_lex" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1" + +[[package]] +name = "colorchoice" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" + +[[package]] +name = "cookie" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ddef33a339a91ea89fb53151bd0a4689cfce27055c291dfa69945475d22c747" +dependencies = [ + "percent-encoding", + "time", + "version_check", +] + +[[package]] +name = "deranged" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" +dependencies = [ + "powerfmt", +] + +[[package]] +name = "devise" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6eacefd3f541c66fc61433d65e54e0e46e0a029a819a7dbbc7a7b489e8a85f8" +dependencies = [ + "devise_codegen", + "devise_core", +] + +[[package]] +name = "devise_codegen" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8cf4b8dd484ede80fd5c547592c46c3745a617c8af278e2b72bea86b2dfed6" +dependencies = [ + "devise_core", + "quote", +] + +[[package]] +name = "devise_core" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35b50dba0afdca80b187392b24f2499a88c336d5a8493e4b4ccfb608708be56a" +dependencies = [ + "bitflags 2.5.0", + "proc-macro2", + "proc-macro2-diagnostics", + "quote", + "syn", +] + +[[package]] +name = "either" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" + +[[package]] +name = "encoding_rs" +version = "0.8.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "errno" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "fastrand" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "658bd65b1cf4c852a3cc96f18a8ce7b5640f6b703f905c7d74532294c2a63984" + +[[package]] +name = "figment" +version = "0.10.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7270677e7067213e04f323b55084586195f18308cd7546cfac9f873344ccceb6" +dependencies = [ + "atomic 0.6.0", + "pear", + "serde", + "toml", + "uncased", + "version_check", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "futures" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" + +[[package]] +name = "futures-io" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" + +[[package]] +name = "futures-sink" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" + +[[package]] +name = "futures-task" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" + +[[package]] +name = "futures-util" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "generator" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cc16584ff22b460a382b7feec54b23d2908d858152e5739a120b949293bd74e" +dependencies = [ + "cc", + "libc", + "log", + "rustversion", + "windows", +] + +[[package]] +name = "getrandom" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94b22e06ecb0110981051723910cbf0b5f5e09a2062dd7663334ee79a9d1286c" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "gimli" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" + +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + +[[package]] +name = "h2" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "hashbrown" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" + +[[package]] +name = "hermit-abi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" + +[[package]] +name = "http" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http", + "pin-project-lite", +] + +[[package]] +name = "http-range" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21dec9db110f5f872ed9699c3ecf50cf16f423502706ba5c72462e28d3157573" + +[[package]] +name = "httparse" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "hyper" +version = "0.14.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf96e135eb83a2a8ddf766e426a841d8ddd7449d5f00d34ea02b41d2f19eef80" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", + "want", +] + +[[package]] +name = "indexmap" +version = "2.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" +dependencies = [ + "equivalent", + "hashbrown", + "serde", +] + +[[package]] +name = "inlinable_string" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb" + +[[package]] +name = "is-terminal" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.153" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" + +[[package]] +name = "linux-raw-sys" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" + +[[package]] +name = "lock_api" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" + +[[package]] +name = "loom" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff50ecb28bb86013e935fb6683ab1f6d3a20016f123c76fd4c27470076ac30f5" +dependencies = [ + "cfg-if", + "generator", + "scoped-tls", + "serde", + "serde_json", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata 0.1.10", +] + +[[package]] +name = "memchr" +version = "2.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" + +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + +[[package]] +name = "miniz_oxide" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" +dependencies = [ + "adler", +] + +[[package]] +name = "mio" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.48.0", +] + +[[package]] +name = "multer" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01acbdc23469fd8fe07ab135923371d5f5a422fbf9c522158677c8eb15bc51c2" +dependencies = [ + "bytes", + "encoding_rs", + "futures-util", + "http", + "httparse", + "log", + "memchr", + "mime", + "spin", + "tokio", + "tokio-util", + "version_check", +] + +[[package]] +name = "nix" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4" +dependencies = [ + "bitflags 2.5.0", + "cfg-if", + "cfg_aliases", + "libc", +] + +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "nydus-backend-proxy" +version = "0.2.0" +dependencies = [ + "clap", + "http-range", + "lazy_static", + "nix", + "once_cell", + "rocket", +] + +[[package]] +name = "object" +version = "0.32.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + +[[package]] +name = "parking_lot" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets 0.48.5", +] + +[[package]] +name = "pear" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdeeaa00ce488657faba8ebf44ab9361f9365a97bd39ffb8a60663f57ff4b467" +dependencies = [ + "inlinable_string", + "pear_codegen", + "yansi", +] + +[[package]] +name = "pear_codegen" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bab5b985dc082b345f812b7df84e1bef27e7207b39e448439ba8bd69c93f147" +dependencies = [ + "proc-macro2", + "proc-macro2-diagnostics", + "quote", + "syn", +] + +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "pin-project-lite" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + +[[package]] +name = "proc-macro2" +version = "1.0.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "proc-macro2-diagnostics" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "version_check", + "yansi", +] + +[[package]] +name = "quote" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "redox_syscall" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "ref-cast" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4846d4c50d1721b1a3bef8af76924eef20d5e723647333798c1b519b3a9473f" +dependencies = [ + "ref-cast-impl", +] + +[[package]] +name = "ref-cast-impl" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fddb4f8d99b0a2ebafc65a87a69a7b9875e4b1ae1f00db265d300ef7f28bccc" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "regex" +version = "1.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata 0.4.6", + "regex-syntax 0.8.3", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax 0.6.29", +] + +[[package]] +name = "regex-automata" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax 0.8.3", +] + +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + +[[package]] +name = "regex-syntax" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" + +[[package]] +name = "rocket" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e7bb57ccb26670d73b6a47396c83139447b9e7878cab627fdfe9ea8da489150" +dependencies = [ + "async-stream", + "async-trait", + "atomic 0.5.3", + "binascii", + "bytes", + "either", + "figment", + "futures", + "indexmap", + "log", + "memchr", + "multer", + "num_cpus", + "parking_lot", + "pin-project-lite", + "rand", + "ref-cast", + "rocket_codegen", + "rocket_http", + "serde", + "state", + "tempfile", + "time", + "tokio", + "tokio-stream", + "tokio-util", + "ubyte", + "version_check", + "yansi", +] + +[[package]] +name = "rocket_codegen" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2238066abf75f21be6cd7dc1a09d5414a671f4246e384e49fe3f8a4936bd04c" +dependencies = [ + "devise", + "glob", + "indexmap", + "proc-macro2", + "quote", + "rocket_http", + "syn", + "unicode-xid", + "version_check", +] + +[[package]] +name = "rocket_http" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37a1663694d059fe5f943ea5481363e48050acedd241d46deb2e27f71110389e" +dependencies = [ + "cookie", + "either", + "futures", + "http", + "hyper", + "indexmap", + "log", + "memchr", + "pear", + "percent-encoding", + "pin-project-lite", + "ref-cast", + "serde", + "smallvec", + "stable-pattern", + "state", + "time", + "tokio", + "uncased", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" + +[[package]] +name = "rustix" +version = "0.38.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65e04861e65f21776e67888bfbea442b3642beaa0138fdb1dd7a84a52dffdb89" +dependencies = [ + "bitflags 2.5.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", +] + +[[package]] +name = "rustversion" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80af6f9131f277a45a3fba6ce8e2258037bb0477a67e610d3c1fe046ab31de47" + +[[package]] +name = "ryu" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" + +[[package]] +name = "scoped-tls" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "serde" +version = "1.0.197" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.197" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.115" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12dc5c46daa8e9fdf4f5e71b6cf9a53f2487da0e86e55808e2d35539666497dd" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "serde_spanned" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb3622f419d1296904700073ea6cc23ad690adbd66f13ea683df73298736f0c1" +dependencies = [ + "serde", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "signal-hook-registry" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" +dependencies = [ + "libc", +] + +[[package]] +name = "slab" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +dependencies = [ + "autocfg", +] + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "socket2" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05ffd9c0a93b7543e062e759284fcf5f5e3b098501104bfbdde4d404db792871" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" + +[[package]] +name = "stable-pattern" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4564168c00635f88eaed410d5efa8131afa8d8699a612c80c455a0ba05c21045" +dependencies = [ + "memchr", +] + +[[package]] +name = "state" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b8c4a4445d81357df8b1a650d0d0d6fbbbfe99d064aa5e02f3e4022061476d8" +dependencies = [ + "loom", +] + +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + +[[package]] +name = "syn" +version = "2.0.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44cfb93f38070beee36b3fef7d4f5a16f27751d94b187b666a5cc5e9b0d30687" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tempfile" +version = "3.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" +dependencies = [ + "cfg-if", + "fastrand", + "rustix", + "windows-sys 0.52.0", +] + +[[package]] +name = "thread_local" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" +dependencies = [ + "cfg-if", + "once_cell", +] + +[[package]] +name = "time" +version = "0.3.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8248b6521bb14bc45b4067159b9b6ad792e2d6d754d6c41fb50e29fefe38749" +dependencies = [ + "deranged", + "itoa", + "num-conv", + "powerfmt", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" + +[[package]] +name = "time-macros" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ba3a3ef41e6672a2f0f001392bb5dcd3ff0a9992d618ca761a11c3121547774" +dependencies = [ + "num-conv", + "time-core", +] + +[[package]] +name = "tokio" +version = "1.37.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787" +dependencies = [ + "backtrace", + "bytes", + "libc", + "mio", + "num_cpus", + "pin-project-lite", + "signal-hook-registry", + "socket2", + "tokio-macros", + "windows-sys 0.48.0", +] + +[[package]] +name = "tokio-macros" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tokio-stream" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "267ac89e0bec6e691e5813911606935d77c476ff49024f98abcea3e7b15e37af" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5419f34732d9eb6ee4c3578b7989078579b7f039cbbb9ca2c4da015749371e15" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", + "tracing", +] + +[[package]] +name = "toml" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9dd1545e8208b4a5af1aa9bbd0b4cf7e9ea08fabc5d0a5c67fcaafa17433aa3" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + +[[package]] +name = "toml_datetime" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_edit" +version = "0.22.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e40bb779c5187258fd7aad0eb68cb8706a0a81fa712fbea808ab43c4b8374c4" +dependencies = [ + "indexmap", + "serde", + "serde_spanned", + "toml_datetime", + "winnow", +] + +[[package]] +name = "tower-service" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" + +[[package]] +name = "tracing" +version = "0.1.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "try-lock" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + +[[package]] +name = "ubyte" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f720def6ce1ee2fc44d40ac9ed6d3a59c361c80a75a7aa8e75bb9baed31cf2ea" +dependencies = [ + "serde", +] + +[[package]] +name = "uncased" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1b88fcfe09e89d3866a5c11019378088af2d24c3fbd4f0543f96b479ec90697" +dependencies = [ + "serde", + "version_check", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unicode-xid" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" + +[[package]] +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.4", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" +dependencies = [ + "windows_aarch64_gnullvm 0.52.4", + "windows_aarch64_msvc 0.52.4", + "windows_i686_gnu 0.52.4", + "windows_i686_msvc 0.52.4", + "windows_x86_64_gnu 0.52.4", + "windows_x86_64_gnullvm 0.52.4", + "windows_x86_64_msvc 0.52.4", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" + +[[package]] +name = "winnow" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dffa400e67ed5a4dd237983829e66475f0a4a26938c4b04c21baede6262215b8" +dependencies = [ + "memchr", +] + +[[package]] +name = "yansi" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" +dependencies = [ + "is-terminal", +] diff --git a/contrib/nydus-backend-proxy/Cargo.toml b/contrib/nydus-backend-proxy/Cargo.toml index 1dca6d8b63b..05991e39f3d 100644 --- a/contrib/nydus-backend-proxy/Cargo.toml +++ b/contrib/nydus-backend-proxy/Cargo.toml @@ -1,19 +1,19 @@ -[package] -name = "nydus-backend-proxy" -version = "0.2.0" -authors = ["The Nydus Developers"] -description = "A simple HTTP server to provide a fake container registry for nydusd" -homepage = "https://nydus.dev/" -repository = "https://github.com/dragonflyoss/nydus" -edition = "2021" -license = "Apache-2.0" - -[dependencies] -rocket = "0.5.0" -http-range = "0.1.5" -nix = { version = "0.28", features = ["uio"] } -clap = "4.4" -once_cell = "1.19.0" -lazy_static = "1.4" - -[workspace] +[package] +name = "nydus-backend-proxy" +version = "0.2.0" +authors = ["The Nydus Developers"] +description = "A simple HTTP server to provide a fake container registry for nydusd" +homepage = "https://nydus.dev/" +repository = "https://github.com/dragonflyoss/nydus" +edition = "2021" +license = "Apache-2.0" + +[dependencies] +rocket = "0.5.0" +http-range = "0.1.5" +nix = { version = "0.28", features = ["uio"] } +clap = "4.4" +once_cell = "1.19.0" +lazy_static = "1.4" + +[workspace] diff --git a/contrib/nydus-backend-proxy/LICENSE-APACHE b/contrib/nydus-backend-proxy/LICENSE-APACHE index d6456956733..75b52484ea4 100644 --- a/contrib/nydus-backend-proxy/LICENSE-APACHE +++ b/contrib/nydus-backend-proxy/LICENSE-APACHE @@ -1,202 +1,202 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/contrib/nydus-backend-proxy/Makefile b/contrib/nydus-backend-proxy/Makefile index ce34df0e303..660fae82515 100644 --- a/contrib/nydus-backend-proxy/Makefile +++ b/contrib/nydus-backend-proxy/Makefile @@ -1,23 +1,23 @@ -all:.format build - -current_dir := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) -rust_arch := $(shell uname -p) - -.musl_target: - $(eval CARGO_BUILD_FLAGS += --target ${rust_arch}-unknown-linux-musl) - -.release_version: - $(eval CARGO_BUILD_FLAGS += --release) - -.format: - cargo fmt -- --check - -build: - cargo build $(CARGO_BUILD_FLAGS) - -release: .format .release_version build - -static-release: .musl_target .format .release_version build - -clean: - cargo clean +all:.format build + +current_dir := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) +rust_arch := $(shell uname -p) + +.musl_target: + $(eval CARGO_BUILD_FLAGS += --target ${rust_arch}-unknown-linux-musl) + +.release_version: + $(eval CARGO_BUILD_FLAGS += --release) + +.format: + cargo fmt -- --check + +build: + cargo build $(CARGO_BUILD_FLAGS) + +release: .format .release_version build + +static-release: .musl_target .format .release_version build + +clean: + cargo clean diff --git a/contrib/nydus-backend-proxy/README.md b/contrib/nydus-backend-proxy/README.md index 67080af5e16..742e947e47c 100644 --- a/contrib/nydus-backend-proxy/README.md +++ b/contrib/nydus-backend-proxy/README.md @@ -1,104 +1,104 @@ -# nydus-backend-proxy -A simple HTTP server to serve a local directory as blob backend for nydusd. - -In some scenarios such as [sealer](https://github.com/alibaba/sealer), it uses nydus to boost up cluster image distribution. There is no registry (OCI distribution) or OSS service available for blob storage, so we need a simple HTTP server to serve a local directory as blob backend for nydusd. This server exposes OCI distribution like API to handle HTTP HEAD and range GET requests from nydusd for checking and fetching blob. - -## Definition for response -support the following APIs: -```bash -HEAD /$namespace/$repo/blobs/sha256:xxx ### Check Blob -GET /$namespace/$repo/blobs/sha256:xxx ### Fetch Blob -``` -### Check Blob -``` -HEAD /v2//blobs/ -``` -On Success: OK -``` -200 OK -Content-Length: -Docker-Content-Digest: - -``` -### Fetch Blob -``` -GET /v2//blobs/ -Host: -``` -On Success: OK -``` -200 OK -Content-Length: -Docker-Content-Digest: -Content-Type: application/octet-stream - - -``` -On Failure: Not Found -``` -404 Not Found -``` -### Fetch Blob in Chunks -``` -GET /v2//blobs/ -Host: -Range: bytes=- -``` -On Success: OK -``` -200 OK -Content-Length: -Docker-Content-Digest: -Content-Range: bytes -/ -Content-Type: application/octet-stream - - -``` -On Failure: Not Found -``` -404 Not Found -``` - -On Failure: Range Not Satisfiable -``` -416 Range Not Satisfiable -``` - -## How to use - -### Run nydus-backend-proxy -```bash -./nydus-backend-proxy --blobsdir /path/to/nydus/blobs/dir -``` -### Nydusd config -reuse nydusd registry backend -```bash -#cat httpserver.json -{ - "device": { - "backend": { - "type": "registry", - "config": { - "scheme": "http", - "host": "xxx.xxx.xxx.xxx:8000", - "repo": "xxxx" - } - }, - "cache": { - "type": "blobcache", - "config": { - "work_dir": "./cache" - } - } - }, - "mode": "direct", - "digest_validate": false, - "enable_xattr": true, - "fs_prefetch": { - "enable": true, - "threads_count": 2, - "merging_size": 131072, - "bandwidth_rate":10485760 - } -} -``` +# nydus-backend-proxy +A simple HTTP server to serve a local directory as blob backend for nydusd. + +In some scenarios such as [sealer](https://github.com/alibaba/sealer), it uses nydus to boost up cluster image distribution. There is no registry (OCI distribution) or OSS service available for blob storage, so we need a simple HTTP server to serve a local directory as blob backend for nydusd. This server exposes OCI distribution like API to handle HTTP HEAD and range GET requests from nydusd for checking and fetching blob. + +## Definition for response +support the following APIs: +```bash +HEAD /$namespace/$repo/blobs/sha256:xxx ### Check Blob +GET /$namespace/$repo/blobs/sha256:xxx ### Fetch Blob +``` +### Check Blob +``` +HEAD /v2//blobs/ +``` +On Success: OK +``` +200 OK +Content-Length: +Docker-Content-Digest: + +``` +### Fetch Blob +``` +GET /v2//blobs/ +Host: +``` +On Success: OK +``` +200 OK +Content-Length: +Docker-Content-Digest: +Content-Type: application/octet-stream + + +``` +On Failure: Not Found +``` +404 Not Found +``` +### Fetch Blob in Chunks +``` +GET /v2//blobs/ +Host: +Range: bytes=- +``` +On Success: OK +``` +200 OK +Content-Length: +Docker-Content-Digest: +Content-Range: bytes -/ +Content-Type: application/octet-stream + + +``` +On Failure: Not Found +``` +404 Not Found +``` + +On Failure: Range Not Satisfiable +``` +416 Range Not Satisfiable +``` + +## How to use + +### Run nydus-backend-proxy +```bash +./nydus-backend-proxy --blobsdir /path/to/nydus/blobs/dir +``` +### Nydusd config +reuse nydusd registry backend +```bash +#cat httpserver.json +{ + "device": { + "backend": { + "type": "registry", + "config": { + "scheme": "http", + "host": "xxx.xxx.xxx.xxx:8000", + "repo": "xxxx" + } + }, + "cache": { + "type": "blobcache", + "config": { + "work_dir": "./cache" + } + } + }, + "mode": "direct", + "digest_validate": false, + "enable_xattr": true, + "fs_prefetch": { + "enable": true, + "threads_count": 2, + "merging_size": 131072, + "bandwidth_rate":10485760 + } +} +``` diff --git a/contrib/nydus-backend-proxy/src/main.rs b/contrib/nydus-backend-proxy/src/main.rs index 2b75468de0b..6ab3fe99ede 100644 --- a/contrib/nydus-backend-proxy/src/main.rs +++ b/contrib/nydus-backend-proxy/src/main.rs @@ -1,301 +1,301 @@ -// Copyright (C) 2022 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: (Apache-2.0 AND BSD-3-Clause) - -use std::collections::HashMap; -use std::env; -use std::path::{Path, PathBuf}; -use std::sync::Arc; -use std::{fs, io}; - -use clap::*; -use http_range::HttpRange; -use lazy_static::lazy_static; -use nix::sys::uio; -use rocket::fs::{FileServer, NamedFile}; -use rocket::futures::lock::{Mutex, MutexGuard}; -use rocket::http::Status; -use rocket::request::{self, FromRequest, Outcome}; -use rocket::response::{self, stream::ReaderStream, Responder}; -use rocket::*; - -lazy_static! { - static ref BLOB_BACKEND: Mutex = Mutex::new(BlobBackend { - root: PathBuf::default(), - blobs: HashMap::new() - }); -} - -async fn blob_backend_mut() -> MutexGuard<'static, BlobBackend> { - BLOB_BACKEND.lock().await -} - -async fn init_blob_backend(root: &Path) { - let mut b = BlobBackend { - root: root.to_path_buf(), - blobs: HashMap::new(), - }; - - b.populate_blobs_map(); - *BLOB_BACKEND.lock().await = b; -} - -#[derive(Debug)] -struct BlobBackend { - root: PathBuf, - blobs: HashMap>, -} - -impl BlobBackend { - fn populate_blobs_map(&mut self) { - for entry in self - .root - .read_dir() - .expect("read blobsdir failed") - .flatten() - { - let filepath = entry.path(); - if filepath.is_file() { - // Collaborating system should put files with valid name which - // can also be converted to UTF-8 - let digest = filepath.file_name().unwrap().to_string_lossy(); - if self.blobs.contains_key(digest.as_ref()) { - continue; - } - - match fs::File::open(&filepath) { - Ok(f) => { - self.blobs.insert(digest.into_owned(), Arc::new(f)); - } - Err(e) => warn!("failed to open file {}, {}", digest, e), - } - } else { - debug!("%s: Not regular file"); - } - } - } -} - -#[derive(Debug)] -struct HeaderData { - _host: String, - range: String, -} - -#[rocket::async_trait] -impl<'r> FromRequest<'r> for HeaderData { - type Error = Status; - - async fn from_request(req: &'r Request<'_>) -> request::Outcome { - let headers = req.headers(); - let _host = headers.get_one("Host").unwrap_or_default().to_string(); - let range = headers.get_one("Range").unwrap_or_default().to_string(); - - Outcome::Success(HeaderData { _host, range }) - } -} - -#[rocket::head("/<_namespace>/<_repo>/blobs/")] -async fn check( - _namespace: PathBuf, - _repo: PathBuf, - digest: String, -) -> Result, Status> { - if !digest.starts_with("sha256:") { - return Err(Status::BadRequest); - } - - // Trim "sha256:" prefix - let dis = &digest[7..]; - let backend = blob_backend_mut(); - let path = backend.await.root.join(&dis); - - NamedFile::open(path) - .await - .map_err(|_e| Status::NotFound) - .map(|nf| Some(FileStream(nf, dis.to_string()))) -} - -/* fetch blob response - * NamedFile: blob data - * String: Docker-Content-Digest - */ -struct FileStream(NamedFile, String); - -impl<'r> Responder<'r, 'static> for FileStream { - fn respond_to(self, req: &'r Request<'_>) -> response::Result<'static> { - let res = self.0.respond_to(req)?; - Response::build_from(res) - .raw_header("Docker-Content-Digest", self.1) - .raw_header("Content-Type", "application/octet-stream") - .ok() - } -} - -/* fetch blob part response(stream) - * path: path of blob - * dis: Docker-Content-Digest - * start & end: "Content-Range: bytes -/" - */ -struct RangeStream { - dis: String, - start: u64, - len: u64, - file: Arc, -} - -impl RangeStream { - fn get_rangestr(&self) -> String { - let endpos = self.start + self.len - 1; - format!("bytes {}-{}/{}", self.start, endpos, self.len) - } -} - -impl<'r> Responder<'r, 'static> for RangeStream { - fn respond_to(self, _req: &'r Request<'_>) -> response::Result<'static> { - const BUFSIZE: usize = 4096; - let mut buf = vec![0; BUFSIZE]; - let mut read = 0u64; - let startpos = self.start as i64; - let size = self.len; - let file = self.file.clone(); - - Response::build() - .streamed_body(ReaderStream! { - while read < size { - match uio::pread(file.as_ref(), &mut buf, startpos + read as i64) { - Ok(mut n) => { - n = std::cmp::min(n, (size - read) as usize); - read += n as u64; - if n == 0 { - break; - } else if n < BUFSIZE { - yield io::Cursor::new(buf[0..n].to_vec()); - } else { - yield io::Cursor::new(buf.clone()); - } - } - Err(err) => { - eprintln!("ReaderStream Error: {}", err); - break; - } - } - } - }) - .raw_header("Content-Range", self.get_rangestr()) - .raw_header("Docker-Content-Digest", self.dis) - .raw_header("Content-Type", "application/octet-stream") - .ok() - } -} - -#[derive(Responder)] -enum StoredData { - AllFile(FileStream), - Range(RangeStream), -} - -#[get("/<_namespace>/<_repo>/blobs/")] -async fn fetch( - _namespace: PathBuf, - _repo: PathBuf, - digest: String, - header_data: HeaderData, -) -> Result { - if !digest.starts_with("sha256:") { - return Err(Status::BadRequest); - } - - // Trim "sha256:" prefix - let dis = &digest[7..]; - - //if no range in Request header,return fetch blob response - if header_data.range.is_empty() { - let filepath = blob_backend_mut().await.root.join(&dis); - NamedFile::open(filepath) - .await - .map_err(|_e| Status::NotFound) - .map(|nf| StoredData::AllFile(FileStream(nf, dis.to_string()))) - } else { - let mut guard = blob_backend_mut().await; - let blob_file = if let Some(f) = guard.blobs.get(dis) { - f.clone() - } else { - trace!("Blob object not found: {}", dis); - // Re-populate blobs map by `readdir()` again to scan if files are newly added. - guard.populate_blobs_map(); - trace!("re-populating to search blob {}", dis); - guard.blobs.get(dis).cloned().ok_or_else(|| { - error!("Blob {} not found finally!", dis); - Status::NotFound - })? - }; - drop(guard); - - let metadata = match blob_file.metadata() { - Ok(meta) => meta, - Err(e) => { - eprintln!("Get file metadata failed! Error: {}", e); - return Err(Status::InternalServerError); - } - }; - - let ranges = match HttpRange::parse(&header_data.range, metadata.len()) { - Ok(r) => r, - Err(e) => { - eprintln!("HttpRange parse failed! Error: {:#?}", e); - return Err(Status::RangeNotSatisfiable); - } - }; - let start_pos = ranges[0].start as u64; - let size = ranges[0].length; - - Ok(StoredData::Range(RangeStream { - dis: dis.to_string(), - len: size, - start: start_pos, - file: blob_file, - })) - } -} - -#[rocket::main] -async fn main() { - let cmd = Command::new("nydus-backend-proxy") - .author(env!("CARGO_PKG_AUTHORS")) - .version(env!("CARGO_PKG_VERSION")) - .about("A simple HTTP server to provide a fake container registry for nydusd.") - .arg( - Arg::new("blobsdir") - .short('b') - .long("blobsdir") - .required(true) - .help("path to directory hosting nydus blob files"), - ) - .help_template( - "\ -{before-help}{name} {version} -{author-with-newline}{about-with-newline} -{usage-heading} {usage} - -{all-args}{after-help} - ", - ) - .get_matches(); - // Safe to unwrap() because `blobsdir` takes a value. - let path = cmd - .get_one::("blobsdir") - .expect("required argument"); - - init_blob_backend(Path::new(path)).await; - - if let Err(e) = rocket::build() - .mount("/", rocket::routes![check, fetch]) - .mount("/", FileServer::from(&path)) - .launch() - .await - { - error!("Rocket failed to launch, {:#?}", e); - std::process::exit(-1); - } -} +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: (Apache-2.0 AND BSD-3-Clause) + +use std::collections::HashMap; +use std::env; +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use std::{fs, io}; + +use clap::*; +use http_range::HttpRange; +use lazy_static::lazy_static; +use nix::sys::uio; +use rocket::fs::{FileServer, NamedFile}; +use rocket::futures::lock::{Mutex, MutexGuard}; +use rocket::http::Status; +use rocket::request::{self, FromRequest, Outcome}; +use rocket::response::{self, stream::ReaderStream, Responder}; +use rocket::*; + +lazy_static! { + static ref BLOB_BACKEND: Mutex = Mutex::new(BlobBackend { + root: PathBuf::default(), + blobs: HashMap::new() + }); +} + +async fn blob_backend_mut() -> MutexGuard<'static, BlobBackend> { + BLOB_BACKEND.lock().await +} + +async fn init_blob_backend(root: &Path) { + let mut b = BlobBackend { + root: root.to_path_buf(), + blobs: HashMap::new(), + }; + + b.populate_blobs_map(); + *BLOB_BACKEND.lock().await = b; +} + +#[derive(Debug)] +struct BlobBackend { + root: PathBuf, + blobs: HashMap>, +} + +impl BlobBackend { + fn populate_blobs_map(&mut self) { + for entry in self + .root + .read_dir() + .expect("read blobsdir failed") + .flatten() + { + let filepath = entry.path(); + if filepath.is_file() { + // Collaborating system should put files with valid name which + // can also be converted to UTF-8 + let digest = filepath.file_name().unwrap().to_string_lossy(); + if self.blobs.contains_key(digest.as_ref()) { + continue; + } + + match fs::File::open(&filepath) { + Ok(f) => { + self.blobs.insert(digest.into_owned(), Arc::new(f)); + } + Err(e) => warn!("failed to open file {}, {}", digest, e), + } + } else { + debug!("%s: Not regular file"); + } + } + } +} + +#[derive(Debug)] +struct HeaderData { + _host: String, + range: String, +} + +#[rocket::async_trait] +impl<'r> FromRequest<'r> for HeaderData { + type Error = Status; + + async fn from_request(req: &'r Request<'_>) -> request::Outcome { + let headers = req.headers(); + let _host = headers.get_one("Host").unwrap_or_default().to_string(); + let range = headers.get_one("Range").unwrap_or_default().to_string(); + + Outcome::Success(HeaderData { _host, range }) + } +} + +#[rocket::head("/<_namespace>/<_repo>/blobs/")] +async fn check( + _namespace: PathBuf, + _repo: PathBuf, + digest: String, +) -> Result, Status> { + if !digest.starts_with("sha256:") { + return Err(Status::BadRequest); + } + + // Trim "sha256:" prefix + let dis = &digest[7..]; + let backend = blob_backend_mut(); + let path = backend.await.root.join(&dis); + + NamedFile::open(path) + .await + .map_err(|_e| Status::NotFound) + .map(|nf| Some(FileStream(nf, dis.to_string()))) +} + +/* fetch blob response + * NamedFile: blob data + * String: Docker-Content-Digest + */ +struct FileStream(NamedFile, String); + +impl<'r> Responder<'r, 'static> for FileStream { + fn respond_to(self, req: &'r Request<'_>) -> response::Result<'static> { + let res = self.0.respond_to(req)?; + Response::build_from(res) + .raw_header("Docker-Content-Digest", self.1) + .raw_header("Content-Type", "application/octet-stream") + .ok() + } +} + +/* fetch blob part response(stream) + * path: path of blob + * dis: Docker-Content-Digest + * start & end: "Content-Range: bytes -/" + */ +struct RangeStream { + dis: String, + start: u64, + len: u64, + file: Arc, +} + +impl RangeStream { + fn get_rangestr(&self) -> String { + let endpos = self.start + self.len - 1; + format!("bytes {}-{}/{}", self.start, endpos, self.len) + } +} + +impl<'r> Responder<'r, 'static> for RangeStream { + fn respond_to(self, _req: &'r Request<'_>) -> response::Result<'static> { + const BUFSIZE: usize = 4096; + let mut buf = vec![0; BUFSIZE]; + let mut read = 0u64; + let startpos = self.start as i64; + let size = self.len; + let file = self.file.clone(); + + Response::build() + .streamed_body(ReaderStream! { + while read < size { + match uio::pread(file.as_ref(), &mut buf, startpos + read as i64) { + Ok(mut n) => { + n = std::cmp::min(n, (size - read) as usize); + read += n as u64; + if n == 0 { + break; + } else if n < BUFSIZE { + yield io::Cursor::new(buf[0..n].to_vec()); + } else { + yield io::Cursor::new(buf.clone()); + } + } + Err(err) => { + eprintln!("ReaderStream Error: {}", err); + break; + } + } + } + }) + .raw_header("Content-Range", self.get_rangestr()) + .raw_header("Docker-Content-Digest", self.dis) + .raw_header("Content-Type", "application/octet-stream") + .ok() + } +} + +#[derive(Responder)] +enum StoredData { + AllFile(FileStream), + Range(RangeStream), +} + +#[get("/<_namespace>/<_repo>/blobs/")] +async fn fetch( + _namespace: PathBuf, + _repo: PathBuf, + digest: String, + header_data: HeaderData, +) -> Result { + if !digest.starts_with("sha256:") { + return Err(Status::BadRequest); + } + + // Trim "sha256:" prefix + let dis = &digest[7..]; + + //if no range in Request header,return fetch blob response + if header_data.range.is_empty() { + let filepath = blob_backend_mut().await.root.join(&dis); + NamedFile::open(filepath) + .await + .map_err(|_e| Status::NotFound) + .map(|nf| StoredData::AllFile(FileStream(nf, dis.to_string()))) + } else { + let mut guard = blob_backend_mut().await; + let blob_file = if let Some(f) = guard.blobs.get(dis) { + f.clone() + } else { + trace!("Blob object not found: {}", dis); + // Re-populate blobs map by `readdir()` again to scan if files are newly added. + guard.populate_blobs_map(); + trace!("re-populating to search blob {}", dis); + guard.blobs.get(dis).cloned().ok_or_else(|| { + error!("Blob {} not found finally!", dis); + Status::NotFound + })? + }; + drop(guard); + + let metadata = match blob_file.metadata() { + Ok(meta) => meta, + Err(e) => { + eprintln!("Get file metadata failed! Error: {}", e); + return Err(Status::InternalServerError); + } + }; + + let ranges = match HttpRange::parse(&header_data.range, metadata.len()) { + Ok(r) => r, + Err(e) => { + eprintln!("HttpRange parse failed! Error: {:#?}", e); + return Err(Status::RangeNotSatisfiable); + } + }; + let start_pos = ranges[0].start as u64; + let size = ranges[0].length; + + Ok(StoredData::Range(RangeStream { + dis: dis.to_string(), + len: size, + start: start_pos, + file: blob_file, + })) + } +} + +#[rocket::main] +async fn main() { + let cmd = Command::new("nydus-backend-proxy") + .author(env!("CARGO_PKG_AUTHORS")) + .version(env!("CARGO_PKG_VERSION")) + .about("A simple HTTP server to provide a fake container registry for nydusd.") + .arg( + Arg::new("blobsdir") + .short('b') + .long("blobsdir") + .required(true) + .help("path to directory hosting nydus blob files"), + ) + .help_template( + "\ +{before-help}{name} {version} +{author-with-newline}{about-with-newline} +{usage-heading} {usage} + +{all-args}{after-help} + ", + ) + .get_matches(); + // Safe to unwrap() because `blobsdir` takes a value. + let path = cmd + .get_one::("blobsdir") + .expect("required argument"); + + init_blob_backend(Path::new(path)).await; + + if let Err(e) = rocket::build() + .mount("/", rocket::routes![check, fetch]) + .mount("/", FileServer::from(&path)) + .launch() + .await + { + error!("Rocket failed to launch, {:#?}", e); + std::process::exit(-1); + } +} diff --git a/contrib/nydus-overlayfs/.golangci.yml b/contrib/nydus-overlayfs/.golangci.yml index 734653d6721..2755646facd 100644 --- a/contrib/nydus-overlayfs/.golangci.yml +++ b/contrib/nydus-overlayfs/.golangci.yml @@ -1,21 +1,21 @@ -# https://golangci-lint.run/usage/configuration#config-file - -linters: - enable: - - staticcheck - - unconvert - - gofmt - - goimports - - revive - - ineffassign - - vet - - unused - - misspell - disable: - - errcheck - -run: - deadline: 4m - skip-dirs: - - misc - +# https://golangci-lint.run/usage/configuration#config-file + +linters: + enable: + - staticcheck + - unconvert + - gofmt + - goimports + - revive + - ineffassign + - vet + - unused + - misspell + disable: + - errcheck + +run: + deadline: 4m + skip-dirs: + - misc + diff --git a/contrib/nydus-overlayfs/Makefile b/contrib/nydus-overlayfs/Makefile index a70009d6cf8..2773115f8cd 100644 --- a/contrib/nydus-overlayfs/Makefile +++ b/contrib/nydus-overlayfs/Makefile @@ -1,29 +1,29 @@ -GIT_COMMIT := $(shell git rev-parse --verify HEAD --short=7) -BUILD_TIME := $(shell date -u +%Y%m%d.%H%M) -PACKAGES ?= $(shell go list ./... | grep -v /vendor/) -GOARCH ?= $(shell go env GOARCH) -GOPROXY ?= https://goproxy.io - -ifdef GOPROXY -PROXY := GOPROXY=${GOPROXY} -endif - -.PHONY: all build release test clean - -all: build - -build: - @CGO_ENABLED=0 ${PROXY} GOOS=linux GOARCH=${GOARCH} go build -ldflags="-s -w -X 'main.Version=${GIT_COMMIT}' -X 'main.BuildTime=${BUILD_TIME}'" -v -o bin/nydus-overlayfs ./cmd/main.go - -release: - @CGO_ENABLED=0 ${PROXY} GOOS=linux GOARCH=${GOARCH} go build -ldflags '-s -w -extldflags "-static"' -v -o bin/nydus-overlayfs ./cmd/main.go - -test: build - go vet $(PACKAGES) - go test -v -cover ${PACKAGES} - -lint: - golangci-lint run - -clean: - rm -f bin/* +GIT_COMMIT := $(shell git rev-parse --verify HEAD --short=7) +BUILD_TIME := $(shell date -u +%Y%m%d.%H%M) +PACKAGES ?= $(shell go list ./... | grep -v /vendor/) +GOARCH ?= $(shell go env GOARCH) +GOPROXY ?= https://goproxy.io + +ifdef GOPROXY +PROXY := GOPROXY=${GOPROXY} +endif + +.PHONY: all build release test clean + +all: build + +build: + @CGO_ENABLED=0 ${PROXY} GOOS=linux GOARCH=${GOARCH} go build -ldflags="-s -w -X 'main.Version=${GIT_COMMIT}' -X 'main.BuildTime=${BUILD_TIME}'" -v -o bin/nydus-overlayfs ./cmd/main.go + +release: + @CGO_ENABLED=0 ${PROXY} GOOS=linux GOARCH=${GOARCH} go build -ldflags '-s -w -extldflags "-static"' -v -o bin/nydus-overlayfs ./cmd/main.go + +test: build + go vet $(PACKAGES) + go test -v -cover ${PACKAGES} + +lint: + golangci-lint run + +clean: + rm -f bin/* diff --git a/contrib/nydus-overlayfs/cmd/main.go b/contrib/nydus-overlayfs/cmd/main.go index ab15b2133c4..4b97e42d751 100644 --- a/contrib/nydus-overlayfs/cmd/main.go +++ b/contrib/nydus-overlayfs/cmd/main.go @@ -1,143 +1,143 @@ -package main - -import ( - "fmt" - "log" - "os" - "strings" - "syscall" - - "github.com/pkg/errors" - "github.com/urfave/cli/v2" - "golang.org/x/sys/unix" -) - -const ( - // Extra mount option to pass Nydus specific information from snapshotter to runtime through containerd. - extraOptionKey = "extraoption=" - // Kata virtual volume infmation passed from snapshotter to runtime through containerd, superset of `extraOptionKey`. - // Please refer to `KataVirtualVolume` in https://github.com/kata-containers/kata-containers/blob/main/src/libs/kata-types/src/mount.rs - kataVolumeOptionKey = "io.katacontainers.volume=" -) - -var ( - Version = "development" - BuildTime = "unknown" -) - -/* -containerd run fuse.mount format: nydus-overlayfs overlay /tmp/ctd-volume107067851 --o lowerdir=/foo/lower2:/foo/lower1,upperdir=/foo/upper,workdir=/foo/work,extraoption={...},dev,suid] -*/ -type mountArgs struct { - fsType string - target string - options []string -} - -func parseArgs(args []string) (*mountArgs, error) { - margs := &mountArgs{ - fsType: args[0], - target: args[1], - } - if margs.fsType != "overlay" { - return nil, errors.New("fsType only support overlay") - } - if len(margs.target) == 0 { - return nil, errors.New("target can not be empty") - } - if args[2] == "-o" && len(args[3]) != 0 { - for _, opt := range strings.Split(args[3], ",") { - if strings.HasPrefix(opt, extraOptionKey) || strings.HasPrefix(opt, kataVolumeOptionKey) { - // filter extraoption - continue - } - margs.options = append(margs.options, opt) - } - } - if len(margs.options) == 0 { - return nil, errors.New("options can not be empty") - } - return margs, nil -} - -func parseOptions(options []string) (int, string) { - flagsTable := map[string]int{ - "async": unix.MS_SYNCHRONOUS, - "atime": unix.MS_NOATIME, - "bind": unix.MS_BIND, - "defaults": 0, - "dev": unix.MS_NODEV, - "diratime": unix.MS_NODIRATIME, - "dirsync": unix.MS_DIRSYNC, - "exec": unix.MS_NOEXEC, - "mand": unix.MS_MANDLOCK, - "noatime": unix.MS_NOATIME, - "nodev": unix.MS_NODEV, - "nodiratime": unix.MS_NODIRATIME, - "noexec": unix.MS_NOEXEC, - "nomand": unix.MS_MANDLOCK, - "norelatime": unix.MS_RELATIME, - "nostrictatime": unix.MS_STRICTATIME, - "nosuid": unix.MS_NOSUID, - "rbind": unix.MS_BIND | unix.MS_REC, - "relatime": unix.MS_RELATIME, - "remount": unix.MS_REMOUNT, - "ro": unix.MS_RDONLY, - "rw": unix.MS_RDONLY, - "strictatime": unix.MS_STRICTATIME, - "suid": unix.MS_NOSUID, - "sync": unix.MS_SYNCHRONOUS, - } - var ( - flags int - data []string - ) - for _, o := range options { - if f, exist := flagsTable[o]; exist { - flags |= f - } else { - data = append(data, o) - } - } - return flags, strings.Join(data, ",") -} - -func run(args cli.Args) error { - margs, err := parseArgs(args.Slice()) - if err != nil { - return errors.Wrap(err, "parseArgs err") - } - - log.Printf("domount info: %v\n", margs) - - flags, data := parseOptions(margs.options) - err = syscall.Mount(margs.fsType, margs.target, margs.fsType, uintptr(flags), data) - if err != nil { - return errors.Wrap(err, "doMount err") - } - return nil -} - -func main() { - app := &cli.App{ - Name: "NydusOverlayfs", - Usage: "Binary for containerd mount helper to do mount operation in nydus env", - Version: fmt.Sprintf("%s.%s", Version, BuildTime), - UsageText: "[Usage]: ./nydus-overlayfs overlay -o ", - Action: func(c *cli.Context) error { - return run(c.Args()) - }, - Before: func(c *cli.Context) error { - if c.NArg() != 4 { - cli.ShowAppHelpAndExit(c, 1) - } - return nil - }, - } - err := app.Run(os.Args) - if err != nil { - log.Fatal(err) - } - os.Exit(0) -} +package main + +import ( + "fmt" + "log" + "os" + "strings" + "syscall" + + "github.com/pkg/errors" + "github.com/urfave/cli/v2" + "golang.org/x/sys/unix" +) + +const ( + // Extra mount option to pass Nydus specific information from snapshotter to runtime through containerd. + extraOptionKey = "extraoption=" + // Kata virtual volume infmation passed from snapshotter to runtime through containerd, superset of `extraOptionKey`. + // Please refer to `KataVirtualVolume` in https://github.com/kata-containers/kata-containers/blob/main/src/libs/kata-types/src/mount.rs + kataVolumeOptionKey = "io.katacontainers.volume=" +) + +var ( + Version = "development" + BuildTime = "unknown" +) + +/* +containerd run fuse.mount format: nydus-overlayfs overlay /tmp/ctd-volume107067851 +-o lowerdir=/foo/lower2:/foo/lower1,upperdir=/foo/upper,workdir=/foo/work,extraoption={...},dev,suid] +*/ +type mountArgs struct { + fsType string + target string + options []string +} + +func parseArgs(args []string) (*mountArgs, error) { + margs := &mountArgs{ + fsType: args[0], + target: args[1], + } + if margs.fsType != "overlay" { + return nil, errors.New("fsType only support overlay") + } + if len(margs.target) == 0 { + return nil, errors.New("target can not be empty") + } + if args[2] == "-o" && len(args[3]) != 0 { + for _, opt := range strings.Split(args[3], ",") { + if strings.HasPrefix(opt, extraOptionKey) || strings.HasPrefix(opt, kataVolumeOptionKey) { + // filter extraoption + continue + } + margs.options = append(margs.options, opt) + } + } + if len(margs.options) == 0 { + return nil, errors.New("options can not be empty") + } + return margs, nil +} + +func parseOptions(options []string) (int, string) { + flagsTable := map[string]int{ + "async": unix.MS_SYNCHRONOUS, + "atime": unix.MS_NOATIME, + "bind": unix.MS_BIND, + "defaults": 0, + "dev": unix.MS_NODEV, + "diratime": unix.MS_NODIRATIME, + "dirsync": unix.MS_DIRSYNC, + "exec": unix.MS_NOEXEC, + "mand": unix.MS_MANDLOCK, + "noatime": unix.MS_NOATIME, + "nodev": unix.MS_NODEV, + "nodiratime": unix.MS_NODIRATIME, + "noexec": unix.MS_NOEXEC, + "nomand": unix.MS_MANDLOCK, + "norelatime": unix.MS_RELATIME, + "nostrictatime": unix.MS_STRICTATIME, + "nosuid": unix.MS_NOSUID, + "rbind": unix.MS_BIND | unix.MS_REC, + "relatime": unix.MS_RELATIME, + "remount": unix.MS_REMOUNT, + "ro": unix.MS_RDONLY, + "rw": unix.MS_RDONLY, + "strictatime": unix.MS_STRICTATIME, + "suid": unix.MS_NOSUID, + "sync": unix.MS_SYNCHRONOUS, + } + var ( + flags int + data []string + ) + for _, o := range options { + if f, exist := flagsTable[o]; exist { + flags |= f + } else { + data = append(data, o) + } + } + return flags, strings.Join(data, ",") +} + +func run(args cli.Args) error { + margs, err := parseArgs(args.Slice()) + if err != nil { + return errors.Wrap(err, "parseArgs err") + } + + log.Printf("domount info: %v\n", margs) + + flags, data := parseOptions(margs.options) + err = syscall.Mount(margs.fsType, margs.target, margs.fsType, uintptr(flags), data) + if err != nil { + return errors.Wrap(err, "doMount err") + } + return nil +} + +func main() { + app := &cli.App{ + Name: "NydusOverlayfs", + Usage: "Binary for containerd mount helper to do mount operation in nydus env", + Version: fmt.Sprintf("%s.%s", Version, BuildTime), + UsageText: "[Usage]: ./nydus-overlayfs overlay -o ", + Action: func(c *cli.Context) error { + return run(c.Args()) + }, + Before: func(c *cli.Context) error { + if c.NArg() != 4 { + cli.ShowAppHelpAndExit(c, 1) + } + return nil + }, + } + err := app.Run(os.Args) + if err != nil { + log.Fatal(err) + } + os.Exit(0) +} diff --git a/contrib/nydus-overlayfs/go.mod b/contrib/nydus-overlayfs/go.mod index 024c29ee14a..f33e106b192 100644 --- a/contrib/nydus-overlayfs/go.mod +++ b/contrib/nydus-overlayfs/go.mod @@ -1,15 +1,15 @@ -module github.com/dragonflyoss/nydus/contrib/nydus-overlayfs - -go 1.21 - -require ( - github.com/pkg/errors v0.9.1 - github.com/urfave/cli/v2 v2.27.1 - golang.org/x/sys v0.15.0 -) - -require ( - github.com/cpuguy83/go-md2man/v2 v2.0.3 // indirect - github.com/russross/blackfriday/v2 v2.1.0 // indirect - github.com/xrash/smetrics v0.0.0-20231213231151-1d8dd44e695e // indirect -) +module github.com/dragonflyoss/nydus/contrib/nydus-overlayfs + +go 1.21 + +require ( + github.com/pkg/errors v0.9.1 + github.com/urfave/cli/v2 v2.27.1 + golang.org/x/sys v0.15.0 +) + +require ( + github.com/cpuguy83/go-md2man/v2 v2.0.3 // indirect + github.com/russross/blackfriday/v2 v2.1.0 // indirect + github.com/xrash/smetrics v0.0.0-20231213231151-1d8dd44e695e // indirect +) diff --git a/contrib/nydus-overlayfs/go.sum b/contrib/nydus-overlayfs/go.sum index 6663fe1d913..2d154c7cd0f 100644 --- a/contrib/nydus-overlayfs/go.sum +++ b/contrib/nydus-overlayfs/go.sum @@ -1,10 +1,10 @@ -github.com/cpuguy83/go-md2man/v2 v2.0.3 h1:qMCsGGgs+MAzDFyp9LpAe1Lqy/fY/qCovCm0qnXZOBM= -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= -github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/urfave/cli/v2 v2.27.1 h1:8xSQ6szndafKVRmfyeUMxkNUJQMjL1F2zmsZ+qHpfho= -github.com/urfave/cli/v2 v2.27.1/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ= -github.com/xrash/smetrics v0.0.0-20231213231151-1d8dd44e695e h1:+SOyEddqYF09QP7vr7CgJ1eti3pY9Fn3LHO1M1r/0sI= -golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc= -golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +github.com/cpuguy83/go-md2man/v2 v2.0.3 h1:qMCsGGgs+MAzDFyp9LpAe1Lqy/fY/qCovCm0qnXZOBM= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/urfave/cli/v2 v2.27.1 h1:8xSQ6szndafKVRmfyeUMxkNUJQMjL1F2zmsZ+qHpfho= +github.com/urfave/cli/v2 v2.27.1/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ= +github.com/xrash/smetrics v0.0.0-20231213231151-1d8dd44e695e h1:+SOyEddqYF09QP7vr7CgJ1eti3pY9Fn3LHO1M1r/0sI= +golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc= +golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= diff --git a/contrib/nydusify/.gitignore b/contrib/nydusify/.gitignore index 5f11974a951..955c1e4a496 100644 --- a/contrib/nydusify/.gitignore +++ b/contrib/nydusify/.gitignore @@ -1,6 +1,6 @@ -.vscode -tmp -cmd/nydusify -output -nydus-hook-plugin -coverage.txt +.vscode +tmp +cmd/nydusify +output +nydus-hook-plugin +coverage.txt diff --git a/contrib/nydusify/.golangci.yml b/contrib/nydusify/.golangci.yml index 734653d6721..2755646facd 100644 --- a/contrib/nydusify/.golangci.yml +++ b/contrib/nydusify/.golangci.yml @@ -1,21 +1,21 @@ -# https://golangci-lint.run/usage/configuration#config-file - -linters: - enable: - - staticcheck - - unconvert - - gofmt - - goimports - - revive - - ineffassign - - vet - - unused - - misspell - disable: - - errcheck - -run: - deadline: 4m - skip-dirs: - - misc - +# https://golangci-lint.run/usage/configuration#config-file + +linters: + enable: + - staticcheck + - unconvert + - gofmt + - goimports + - revive + - ineffassign + - vet + - unused + - misspell + disable: + - errcheck + +run: + deadline: 4m + skip-dirs: + - misc + diff --git a/contrib/nydusify/Makefile b/contrib/nydusify/Makefile index 39a5f06662d..694a552eb5c 100644 --- a/contrib/nydusify/Makefile +++ b/contrib/nydusify/Makefile @@ -1,41 +1,41 @@ -PACKAGES ?= $(shell go list ./... | grep -v /vendor/) -GOARCH ?= $(shell go env GOARCH) -GOPROXY ?= https://goproxy.io - -ifdef GOPROXY -PROXY := GOPROXY=${GOPROXY} -endif - -# Used to populate variables in version package. -BUILD_TIMESTAMP=$(shell date '+%Y-%m-%dT%H:%M:%S') -VERSION=$(shell git describe --match 'v[0-9]*' --dirty='.m' --always --tags) -REVISION=$(shell git rev-parse HEAD)$(shell if ! git diff --no-ext-diff --quiet --exit-code; then echo .m; fi) - -RELEASE_INFO = -X main.revision=${REVISION} -X main.gitVersion=${VERSION} -X main.buildTime=${BUILD_TIMESTAMP} - -.PHONY: all build release plugin test clean build-smoke - -all: build - -build: - @CGO_ENABLED=0 ${PROXY} GOOS=linux GOARCH=${GOARCH} go build -ldflags '${RELEASE_INFO}' -gcflags=all="-N -l" -o ./cmd ./cmd/nydusify.go - -release: - @CGO_ENABLED=0 ${PROXY} GOOS=linux GOARCH=${GOARCH} go build -ldflags '${RELEASE_INFO} -s -w -extldflags "-static"' -o ./cmd ./cmd/nydusify.go - -plugin: - @CGO_ENABLED=0 ${PROXY} GOOS=linux GOARCH=${GOARCH} go build -ldflags '-s -w -extldflags "-static"' -o nydus-hook-plugin ./plugin - -test: - @go vet $(PACKAGES) - @go test -covermode=atomic -coverprofile=coverage.txt -count=1 -v -timeout 20m -parallel 16 -race ${PACKAGES} - -lint: - golangci-lint run - -coverage: test - @go tool cover -func=coverage.txt - -clean: - rm -f cmd/nydusify - rm -f coverage.txt +PACKAGES ?= $(shell go list ./... | grep -v /vendor/) +GOARCH ?= $(shell go env GOARCH) +GOPROXY ?= https://goproxy.io + +ifdef GOPROXY +PROXY := GOPROXY=${GOPROXY} +endif + +# Used to populate variables in version package. +BUILD_TIMESTAMP=$(shell date '+%Y-%m-%dT%H:%M:%S') +VERSION=$(shell git describe --match 'v[0-9]*' --dirty='.m' --always --tags) +REVISION=$(shell git rev-parse HEAD)$(shell if ! git diff --no-ext-diff --quiet --exit-code; then echo .m; fi) + +RELEASE_INFO = -X main.revision=${REVISION} -X main.gitVersion=${VERSION} -X main.buildTime=${BUILD_TIMESTAMP} + +.PHONY: all build release plugin test clean build-smoke + +all: build + +build: + @CGO_ENABLED=0 ${PROXY} GOOS=linux GOARCH=${GOARCH} go build -ldflags '${RELEASE_INFO}' -gcflags=all="-N -l" -o ./cmd ./cmd/nydusify.go + +release: + @CGO_ENABLED=0 ${PROXY} GOOS=linux GOARCH=${GOARCH} go build -ldflags '${RELEASE_INFO} -s -w -extldflags "-static"' -o ./cmd ./cmd/nydusify.go + +plugin: + @CGO_ENABLED=0 ${PROXY} GOOS=linux GOARCH=${GOARCH} go build -ldflags '-s -w -extldflags "-static"' -o nydus-hook-plugin ./plugin + +test: + @go vet $(PACKAGES) + @go test -covermode=atomic -coverprofile=coverage.txt -count=1 -v -timeout 20m -parallel 16 -race ${PACKAGES} + +lint: + golangci-lint run + +coverage: test + @go tool cover -func=coverage.txt + +clean: + rm -f cmd/nydusify + rm -f coverage.txt diff --git a/contrib/nydusify/cmd/nydusify.go b/contrib/nydusify/cmd/nydusify.go index 590ef133e05..299f85904d3 100644 --- a/contrib/nydusify/cmd/nydusify.go +++ b/contrib/nydusify/cmd/nydusify.go @@ -1,1296 +1,1296 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -// The Nydusify CLI tool converts an OCI container image from source registry into -// a Nydus image using `nydus-image` CLI layer by layer, then pushes Nydus image to -// target registry. -package main - -import ( - "context" - "encoding/json" - "fmt" - "io" - "os" - "runtime" - "strings" - - "github.com/containerd/containerd/reference/docker" - "github.com/distribution/reference" - "github.com/dustin/go-humanize" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - "github.com/urfave/cli/v2" - - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/checker" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/checker/rule" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/chunkdict/generator" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/committer" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/converter" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/copier" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/packer" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/provider" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/viewer" -) - -var ( - revision string - buildTime string - gitVersion string -) - -var maxCacheMaxRecords uint = 200 - -const defaultLogLevel = logrus.InfoLevel - -func isPossibleValue(excepted []string, value string) bool { - for _, v := range excepted { - if value == v { - return true - } - } - return false -} - -// This only works for OSS backend right now -func parseBackendConfig(backendConfigJSON, backendConfigFile string) (string, error) { - if backendConfigJSON != "" && backendConfigFile != "" { - return "", fmt.Errorf("--backend-config conflicts with --backend-config-file") - } - - if backendConfigFile != "" { - _backendConfigJSON, err := os.ReadFile(backendConfigFile) - if err != nil { - return "", errors.Wrap(err, "parse backend config file") - } - backendConfigJSON = string(_backendConfigJSON) - } - - return backendConfigJSON, nil -} - -func getBackendConfig(c *cli.Context, prefix string, required bool) (string, string, error) { - backendType := c.String(prefix + "backend-type") - if backendType == "" { - if required { - return "", "", errors.Errorf("backend type is empty, please specify option '--%sbackend-type'", prefix) - } - return "", "", nil - } - - possibleBackendTypes := []string{"oss", "s3"} - if !isPossibleValue(possibleBackendTypes, backendType) { - return "", "", fmt.Errorf("--%sbackend-type should be one of %v", prefix, possibleBackendTypes) - } - - backendConfig, err := parseBackendConfig( - c.String(prefix+"backend-config"), c.String(prefix+"backend-config-file"), - ) - if err != nil { - return "", "", err - } else if (backendType == "oss" || backendType == "s3") && strings.TrimSpace(backendConfig) == "" { - return "", "", errors.Errorf("backend configuration is empty, please specify option '--%sbackend-config'", prefix) - } - - return backendType, backendConfig, nil -} - -// Add suffix to source image reference as the target -// image reference, like this: -// Source: localhost:5000/nginx:latest -// Target: localhost:5000/nginx:latest-suffix -func addReferenceSuffix(source, suffix string) (string, error) { - named, err := docker.ParseDockerRef(source) - if err != nil { - return "", fmt.Errorf("invalid source image reference: %s", err) - } - if _, ok := named.(docker.Digested); ok { - return "", fmt.Errorf("unsupported digested image reference: %s", named.String()) - } - named = docker.TagNameOnly(named) - target := named.String() + suffix - return target, nil -} - -func getTargetReference(c *cli.Context) (string, error) { - target := c.String("target") - targetSuffix := c.String("target-suffix") - if target != "" && targetSuffix != "" { - return "", fmt.Errorf("--target conflicts with --target-suffix") - } - if target == "" && targetSuffix == "" { - return "", fmt.Errorf("--target or --target-suffix is required") - } - var err error - if targetSuffix != "" { - target, err = addReferenceSuffix(c.String("source"), targetSuffix) - if err != nil { - return "", err - } - } - return target, nil -} - -func getCacheReference(c *cli.Context, target string) (string, error) { - cache := c.String("build-cache") - cacheTag := c.String("build-cache-tag") - if cache != "" && cacheTag != "" { - return "", fmt.Errorf("--build-cache conflicts with --build-cache-tag") - } - if cacheTag != "" { - named, err := docker.ParseDockerRef(target) - if err != nil { - return "", fmt.Errorf("invalid target image reference: %s", err) - } - cache = fmt.Sprintf("%s/%s:%s", docker.Domain(named), docker.Path(named), cacheTag) - } - return cache, nil -} - -func getPrefetchPatterns(c *cli.Context) (string, error) { - prefetchedDir := c.String("prefetch-dir") - prefetchPatterns := c.Bool("prefetch-patterns") - - if len(prefetchedDir) > 0 && prefetchPatterns { - return "", fmt.Errorf("--prefetch-dir conflicts with --prefetch-patterns") - } - - var patterns string - - if prefetchPatterns { - bytes, err := io.ReadAll(os.Stdin) - if err != nil { - return "", errors.Wrap(err, "read prefetch patterns from STDIN") - } - patterns = string(bytes) - } - - if len(prefetchedDir) > 0 { - patterns = prefetchedDir - } - - if len(patterns) == 0 { - patterns = "/" - } - - return patterns, nil -} - -func main() { - logrus.SetFormatter(&logrus.TextFormatter{ - FullTimestamp: true, - }) - - version := fmt.Sprintf("\nVersion : %s\nRevision : %s\nGo version : %s\nBuild time : %s", gitVersion, revision, runtime.Version(), buildTime) - - app := &cli.App{ - Name: "Nydusify", - Usage: "Nydus utility tool to build, convert, verify and view container images", - Version: version, - } - - // global options - app.Flags = []cli.Flag{ - &cli.BoolFlag{ - Name: "debug", - Aliases: []string{"D"}, - Required: false, - Value: false, - Usage: "Enable debug log level, overwrites the 'log-level' option", - EnvVars: []string{"DEBUG_LOG_LEVEL"}}, - &cli.StringFlag{ - Name: "log-level", - Aliases: []string{"l"}, - Value: "info", - Usage: "Set log level (panic, fatal, error, warn, info, debug, trace)", - EnvVars: []string{"LOG_LEVEL"}, - }, - } - - app.Commands = []*cli.Command{ - { - Name: "convert", - Usage: "Generate a Nydus image from an OCI image", - Flags: []cli.Flag{ - &cli.StringFlag{ - Name: "source", - Required: true, - Usage: "Source OCI image reference", - EnvVars: []string{"SOURCE"}, - }, - &cli.StringFlag{ - Name: "target", - Required: false, - Usage: "Target (Nydus) image reference", - EnvVars: []string{"TARGET"}, - }, - &cli.StringFlag{ - Name: "target-suffix", - Required: false, - Usage: "Generate the target image reference by adding a suffix to the source image reference, conflicts with --target", - EnvVars: []string{"TARGET_SUFFIX"}, - }, - &cli.BoolFlag{ - Name: "source-insecure", - Required: false, - Usage: "Skip verifying server certs for HTTPS source registry", - EnvVars: []string{"SOURCE_INSECURE"}, - }, - &cli.BoolFlag{ - Name: "target-insecure", - Required: false, - Usage: "Skip verifying server certs for HTTPS target registry", - EnvVars: []string{"TARGET_INSECURE"}, - }, - - &cli.StringFlag{ - Name: "backend-type", - Value: "", - Usage: "Type of storage backend, possible values: 'oss', 's3'", - EnvVars: []string{"BACKEND_TYPE"}, - }, - &cli.StringFlag{ - Name: "backend-config", - Value: "", - Usage: "Json configuration string for storage backend", - EnvVars: []string{"BACKEND_CONFIG"}, - }, - &cli.PathFlag{ - Name: "backend-config-file", - Value: "", - TakesFile: true, - Usage: "Json configuration file for storage backend", - EnvVars: []string{"BACKEND_CONFIG_FILE"}, - }, - &cli.BoolFlag{ - Name: "backend-force-push", - Value: false, Usage: "Force to push Nydus blobs even if they already exist in storage backend", - EnvVars: []string{"BACKEND_FORCE_PUSH"}, - }, - - &cli.StringFlag{ - Name: "build-cache", - Value: "", - Usage: "Specify a cache image to accelerate nydus image conversion", - EnvVars: []string{"BUILD_CACHE"}, - }, - &cli.StringFlag{ - Name: "build-cache-tag", - Value: "", - Usage: "Use $target:$build-cache-tag as cache image, conflict with --build-cache", - EnvVars: []string{"BUILD_CACHE_TAG"}, - }, - &cli.StringFlag{ - Name: "build-cache-version", - Value: "v1", - Usage: "Version number to filter cache images", - EnvVars: []string{"BUILD_CACHE_VERSION"}, - }, - &cli.BoolFlag{ - Name: "build-cache-insecure", - Required: false, - Usage: "Skip verifying server certs for HTTPS cache registry", - EnvVars: []string{"BUILD_CACHE_INSECURE"}, - }, - // The --build-cache-max-records flag represents the maximum number - // of layers in cache image. 200 (bootstrap + blob in one record) was - // chosen to make it compatible with the 127 max in graph driver of - // docker so that we can pull cache image using docker. - &cli.UintFlag{ - Name: "build-cache-max-records", - Value: maxCacheMaxRecords, - Usage: "Maximum cache records in a cache image", - EnvVars: []string{"BUILD_CACHE_MAX_RECORDS"}, - }, - &cli.StringFlag{ - Name: "chunk-dict", - Required: false, - Usage: "Specify a chunk dict expression for chunk deduplication, " + - "for examples: bootstrap:registry:localhost:5000/namespace/app:chunk_dict, bootstrap:local:/path/to/chunk_dict.boot", - EnvVars: []string{"CHUNK_DICT"}, - }, - &cli.BoolFlag{ - Name: "chunk-dict-insecure", - Required: false, - Value: false, - Usage: "Skip verifying server certs for HTTPS dict registry", - EnvVars: []string{"CHUNK_DICT_INSECURE"}, - }, - - &cli.BoolFlag{ - Name: "merge-platform", - Value: false, - Usage: "Generate an OCI image index with both OCI and Nydus manifests for the image", - EnvVars: []string{"MERGE_PLATFORM"}, - Aliases: []string{"multi-platform"}, - }, - &cli.BoolFlag{ - Name: "all-platforms", - Value: false, - Usage: "Convert images for all platforms, conflicts with --platform", - }, - &cli.StringFlag{ - Name: "platform", - Value: "linux/" + runtime.GOARCH, - Usage: "Convert images for specific platforms, for example: 'linux/amd64,linux/arm64'", - }, - &cli.BoolFlag{ - Name: "oci-ref", - Value: false, - Usage: "Convert to OCI-referenced nydus zran image", - EnvVars: []string{"OCI_REF"}, - }, - &cli.BoolFlag{ - Name: "with-referrer", - Value: false, - Usage: "Associate a reference to the source image, see https://github.com/opencontainers/distribution-spec/blob/main/spec.md#listing-referrers", - EnvVars: []string{"WITH_REFERRER"}, - }, - &cli.BoolFlag{ - Name: "oci", - Value: false, - Usage: "Convert Docker media types to OCI media types", - EnvVars: []string{"OCI"}, - }, - &cli.BoolFlag{ - Name: "docker-v2-format", - Value: false, - Hidden: true, - }, - &cli.StringFlag{ - Name: "fs-version", - Required: false, - Value: "6", - DefaultText: "V6 nydus image format", - Usage: "Nydus image format version number, possible values: 5, 6", - EnvVars: []string{"FS_VERSION"}, - }, - &cli.BoolFlag{ - Name: "fs-align-chunk", - Value: false, - Usage: "Enable chunk data alignment(4K) for Nydus image", - EnvVars: []string{"FS_ALIGN_CHUNK"}, - }, - &cli.BoolFlag{ - Name: "backend-aligned-chunk", - Value: false, - Usage: "[Deprecated] Enable chunk data alignment(4K) for Nydus image", - EnvVars: []string{"BACKEND_ALIGNED_CHUNK"}, - }, - &cli.StringFlag{ - Name: "prefetch-dir", - Value: "", - Usage: "Specify an absolute path within the image for prefetch", - EnvVars: []string{"PREFETCH_DIR"}, - }, - &cli.BoolFlag{ - Name: "prefetch-patterns", - Value: false, - Usage: "Read prefetch list from STDIN, please input absolute paths line by line", - EnvVars: []string{"PREFETCH_PATTERNS"}, - }, - &cli.StringFlag{ - Name: "compressor", - Value: "zstd", - Usage: "Algorithm to compress image data blob, possible values: none, lz4_block, zstd", - EnvVars: []string{"COMPRESSOR"}, - }, - &cli.StringFlag{ - Name: "fs-chunk-size", - Value: "0x100000", - Usage: "size of nydus image data chunk, must be power of two and between 0x1000-0x100000, [default: 0x100000]", - EnvVars: []string{"FS_CHUNK_SIZE"}, - Aliases: []string{"chunk-size"}, - }, - &cli.StringFlag{ - Name: "batch-size", - Value: "0", - Usage: "size of batch data chunks, must be power of two, between 0x1000-0x1000000 or zero, [default: 0]", - EnvVars: []string{"BATCH_SIZE"}, - }, - &cli.StringFlag{ - Name: "work-dir", - Value: "./tmp", - Usage: "Working directory for image conversion", - EnvVars: []string{"WORK_DIR"}, - }, - &cli.StringFlag{ - Name: "nydus-image", - Value: "nydus-image", - Usage: "Path to the nydus-image binary, default to search in PATH", - EnvVars: []string{"NYDUS_IMAGE"}, - }, - &cli.StringFlag{ - Name: "output-json", - Value: "", - Usage: "File path to save the metrics collected during conversion in JSON format, for example: './output.json'", - EnvVars: []string{"OUTPUT_JSON"}, - }, - }, - Action: func(c *cli.Context) error { - setupLogLevel(c) - - targetRef, err := getTargetReference(c) - if err != nil { - return err - } - - backendType, backendConfig, err := getBackendConfig(c, "", false) - if err != nil { - return err - } - - cacheRef, err := getCacheReference(c, targetRef) - if err != nil { - return err - } - cacheMaxRecords := c.Uint("build-cache-max-records") - if cacheMaxRecords < 1 { - return fmt.Errorf("--build-cache-max-records should be greater than 0") - } - if cacheMaxRecords > maxCacheMaxRecords { - return fmt.Errorf("--build-cache-max-records should not be greater than %d", maxCacheMaxRecords) - } - cacheVersion := c.String("build-cache-version") - - fsVersion := c.String("fs-version") - possibleFsVersions := []string{"5", "6"} - if !isPossibleValue(possibleFsVersions, fsVersion) { - return fmt.Errorf("--fs-version should be one of %v", possibleFsVersions) - } - - prefetchPatterns, err := getPrefetchPatterns(c) - if err != nil { - return err - } - - chunkDictRef := "" - chunkDict := c.String("chunk-dict") - if chunkDict != "" { - _, _, chunkDictRef, err = converter.ParseChunkDictArgs(chunkDict) - if err != nil { - return errors.Wrap(err, "parse chunk dict arguments") - } - } - - docker2OCI := false - if c.Bool("docker-v2-format") { - logrus.Warn("the option `--docker-v2-format` has been deprecated, use `--oci` instead") - docker2OCI = false - } else if c.Bool("oci") { - docker2OCI = true - } - - // Forcibly enable `--oci` option when `--oci-ref` be enabled. - if c.Bool("oci-ref") { - logrus.Warn("forcibly enabled `--oci` option when `--oci-ref` be enabled") - docker2OCI = true - } - - opt := converter.Opt{ - WorkDir: c.String("work-dir"), - NydusImagePath: c.String("nydus-image"), - - Source: c.String("source"), - Target: targetRef, - SourceInsecure: c.Bool("source-insecure"), - TargetInsecure: c.Bool("target-insecure"), - - BackendType: backendType, - BackendConfig: backendConfig, - BackendForcePush: c.Bool("backend-force-push"), - - CacheRef: cacheRef, - CacheInsecure: c.Bool("build-cache-insecure"), - CacheMaxRecords: cacheMaxRecords, - CacheVersion: cacheVersion, - - ChunkDictRef: chunkDictRef, - ChunkDictInsecure: c.Bool("chunk-dict-insecure"), - - PrefetchPatterns: prefetchPatterns, - MergePlatform: c.Bool("merge-platform"), - Docker2OCI: docker2OCI, - FsVersion: fsVersion, - FsAlignChunk: c.Bool("backend-aligned-chunk") || c.Bool("fs-align-chunk"), - Compressor: c.String("compressor"), - ChunkSize: c.String("chunk-size"), - BatchSize: c.String("batch-size"), - - OCIRef: c.Bool("oci-ref"), - WithReferrer: c.Bool("with-referrer"), - AllPlatforms: c.Bool("all-platforms"), - Platforms: c.String("platform"), - - OutputJSON: c.String("output-json"), - } - - return converter.Convert(context.Background(), opt) - }, - }, - { - Name: "check", - Usage: "Verify nydus image format and content", - Flags: []cli.Flag{ - &cli.StringFlag{ - Name: "source", - Required: false, - Usage: "Source OCI image reference", - EnvVars: []string{"SOURCE"}, - }, - &cli.StringFlag{ - Name: "target", - Required: true, - Usage: "Target (Nydus) image reference", - EnvVars: []string{"TARGET"}, - }, - &cli.BoolFlag{ - Name: "source-insecure", - Required: false, - Usage: "Skip verifying server certs for HTTPS source registry", - EnvVars: []string{"SOURCE_INSECURE"}, - }, - &cli.BoolFlag{ - Name: "target-insecure", - Required: false, - Usage: "Skip verifying server certs for HTTPS target registry", - EnvVars: []string{"TARGET_INSECURE"}, - }, - - &cli.StringFlag{ - Name: "backend-type", - Value: "", - Usage: "Type of storage backend, enable verification of file data in Nydus image if specified, possible values: 'oss', 's3'", - EnvVars: []string{"BACKEND_TYPE"}, - }, - &cli.StringFlag{ - Name: "backend-config", - Value: "", - Usage: "Json string for storage backend configuration", - EnvVars: []string{"BACKEND_CONFIG"}, - }, - &cli.PathFlag{ - Name: "backend-config-file", - Value: "", - TakesFile: true, - Usage: "Json configuration file for storage backend", - EnvVars: []string{"BACKEND_CONFIG_FILE"}, - }, - - &cli.BoolFlag{ - Name: "multi-platform", - Value: false, - Usage: "Verify that the image contains an image index with both OCI and Nydus manifests", - EnvVars: []string{"MULTI_PLATFORM"}, - }, - &cli.StringFlag{ - Name: "platform", - Value: "linux/" + runtime.GOARCH, - Usage: "Specify platform identifier to choose image manifest, possible values: 'linux/amd64' and 'linux/arm64'", - }, - - &cli.StringFlag{ - Name: "work-dir", - Value: "./output", - Usage: "Working directory for image verification", - EnvVars: []string{"WORK_DIR"}, - }, - &cli.StringFlag{ - Name: "nydus-image", - Value: "nydus-image", - Usage: "Path to the nydus-image binary, default to search in PATH", - EnvVars: []string{"NYDUS_IMAGE"}, - }, - &cli.StringFlag{ - Name: "nydusd", - Value: "nydusd", - Usage: "Path to the nydusd binary, default to search in PATH", - EnvVars: []string{"NYDUSD"}, - }, - }, - Action: func(c *cli.Context) error { - setupLogLevel(c) - - backendType, backendConfig, err := getBackendConfig(c, "", false) - if err != nil { - return err - } - - _, arch, err := provider.ExtractOsArch(c.String("platform")) - if err != nil { - return err - } - - checker, err := checker.New(checker.Opt{ - WorkDir: c.String("work-dir"), - Source: c.String("source"), - Target: c.String("target"), - MultiPlatform: c.Bool("multi-platform"), - SourceInsecure: c.Bool("source-insecure"), - TargetInsecure: c.Bool("target-insecure"), - NydusImagePath: c.String("nydus-image"), - NydusdPath: c.String("nydusd"), - BackendType: backendType, - BackendConfig: backendConfig, - ExpectedArch: arch, - }) - if err != nil { - return err - } - - return checker.Check(context.Background()) - }, - }, - { - Name: "chunkdict", - Usage: "Deduplicate chunk for Nydus image (experimental)", - Subcommands: []*cli.Command{ - { - Name: "generate", - Usage: "Save chunk and blob information of Multi-image into the database (experimental)", - Flags: []cli.Flag{ - &cli.StringSliceFlag{ - Name: "sources", - Required: true, - Usage: "One or more Nydus image reference(Multiple images should be split by commas)", - EnvVars: []string{"SOURCES"}, - }, - &cli.StringFlag{ - Name: "target", - Required: false, - Usage: "Target chunkdict image (Nydus) reference", - EnvVars: []string{"TARGET"}, - }, - &cli.BoolFlag{ - Name: "source-insecure", - Required: false, - Usage: "Skip verifying server certs for HTTPS source registry", - EnvVars: []string{"SOURCE_INSECURE"}, - }, - &cli.BoolFlag{ - Name: "target-insecure", - Required: false, - Usage: "Skip verifying server certs for HTTPS target registry", - EnvVars: []string{"TARGET_INSECURE"}, - }, - - &cli.StringFlag{ - Name: "backend-type", - Value: "", - Usage: "Type of storage backend, possible values: 'oss', 's3'", - EnvVars: []string{"BACKEND_TYPE"}, - }, - &cli.StringFlag{ - Name: "backend-config", - Value: "", - Usage: "Json configuration string for storage backend", - EnvVars: []string{"BACKEND_CONFIG"}, - }, - &cli.PathFlag{ - Name: "backend-config-file", - Value: "", - TakesFile: true, - Usage: "Json configuration file for storage backend", - EnvVars: []string{"BACKEND_CONFIG_FILE"}, - }, - - &cli.StringFlag{ - Name: "work-dir", - Value: "./output", - Usage: "Working directory for generating chunkdict image", - EnvVars: []string{"WORK_DIR"}, - }, - &cli.StringFlag{ - Name: "nydus-image", - Value: "nydus-image", - Usage: "Path to the nydus-image binary, default to search in PATH", - EnvVars: []string{"NYDUS_IMAGE"}, - }, - - &cli.BoolFlag{ - Name: "all-platforms", - Value: false, - Usage: "Generate chunkdict image for all platforms, conflicts with --platform", - }, - &cli.StringFlag{ - Name: "platform", - Value: "linux/" + runtime.GOARCH, - Usage: "Specify platform identifier to choose image manifest, possible values: 'linux/amd64' and 'linux/arm64'", - }, - }, - Action: func(c *cli.Context) error { - setupLogLevel(c) - - backendType, backendConfig, err := getBackendConfig(c, "", false) - if err != nil { - return err - } - - _, arch, err := provider.ExtractOsArch(c.String("platform")) - if err != nil { - return err - } - - generator, err := generator.New(generator.Opt{ - Sources: c.StringSlice("sources"), - Target: c.String("target"), - SourceInsecure: c.Bool("source-insecure"), - TargetInsecure: c.Bool("target-insecure"), - - BackendType: backendType, - BackendConfig: backendConfig, - BackendForcePush: c.Bool("backend-force-push"), - - WorkDir: c.String("work-dir"), - NydusImagePath: c.String("nydus-image"), - ExpectedArch: arch, - AllPlatforms: c.Bool("all-platforms"), - Platforms: c.String("platform"), - }) - if err != nil { - return err - } - - return generator.Generate(context.Background()) - }, - }, - }, - }, - { - Name: "mount", - Aliases: []string{"view"}, - Usage: "Mount the nydus image as a filesystem", - Flags: []cli.Flag{ - &cli.StringFlag{ - Name: "target", - Required: true, - Usage: "Target (Nydus) image reference", - EnvVars: []string{"TARGET"}, - }, - &cli.BoolFlag{ - Name: "target-insecure", - Required: false, - Usage: "Skip verifying server certs for HTTPS target registry", - EnvVars: []string{"TARGET_INSECURE"}, - }, - - &cli.StringFlag{ - Name: "backend-type", - Value: "", - Required: false, - Usage: "Type of storage backend, possible values: 'oss', 's3'", - EnvVars: []string{"BACKEND_TYPE"}, - }, - &cli.StringFlag{ - Name: "backend-config", - Value: "", - Usage: "Json configuration string for storage backend", - EnvVars: []string{"BACKEND_CONFIG"}, - }, - &cli.PathFlag{ - Name: "backend-config-file", - Value: "", - TakesFile: true, - Usage: "Json configuration file for storage backend", - EnvVars: []string{"BACKEND_CONFIG_FILE"}, - }, - - &cli.StringFlag{ - Name: "mount-path", - Value: "./image-fs", - Usage: "Path to mount the image", - EnvVars: []string{"MOUNT_PATH"}, - }, - &cli.StringFlag{ - Name: "platform", - Value: "linux/" + runtime.GOARCH, - Usage: "Specify platform identifier to choose image manifest, possible values: 'linux/amd64' and 'linux/arm64'", - }, - - &cli.StringFlag{ - Name: "work-dir", - Value: "./tmp", - Usage: "Working directory for image view, will be cleaned up after viewing", - EnvVars: []string{"WORK_DIR"}, - }, - &cli.StringFlag{ - Name: "nydusd", - Value: "nydusd", - Usage: "The nydusd binary path, if unset, search in PATH environment", - EnvVars: []string{"NYDUSD"}, - }, - }, - Action: func(c *cli.Context) error { - setupLogLevel(c) - - backendType, backendConfig, err := getBackendConfig(c, "", false) - if err != nil { - return err - } else if backendConfig == "" { - - backendType = "registry" - parsed, err := reference.ParseNormalizedNamed(c.String("target")) - if err != nil { - return err - } - - backendConfigStruct, err := rule.NewRegistryBackendConfig(parsed) - if err != nil { - return errors.Wrap(err, "parse registry backend configuration") - } - - backendConfigStruct.SkipVerify = c.Bool("target-insecure") - - bytes, err := json.Marshal(backendConfigStruct) - if err != nil { - return errors.Wrap(err, "marshal registry backend configuration") - } - backendConfig = string(bytes) - - } - - _, arch, err := provider.ExtractOsArch(c.String("platform")) - if err != nil { - return err - } - - fsViewer, err := viewer.New(viewer.Opt{ - WorkDir: c.String("work-dir"), - Target: c.String("target"), - TargetInsecure: c.Bool("target-insecure"), - MountPath: c.String("mount-path"), - NydusdPath: c.String("nydusd"), - BackendType: backendType, - BackendConfig: backendConfig, - ExpectedArch: arch, - }) - if err != nil { - return err - } - - return fsViewer.View(context.Background()) - }, - }, - { - Name: "build", - Aliases: []string{"pack"}, - Usage: "Build a Nydus filesystem from a source directory", - Flags: []cli.Flag{ - &cli.StringFlag{ - Name: "source-dir", - Aliases: []string{"target-dir"}, // for compatibility - Required: true, - Usage: "Source directory to build Nydus filesystem from", - EnvVars: []string{"SOURCE_DIR"}, - }, - &cli.StringFlag{ - Name: "output-dir", - Aliases: []string{"o"}, - Required: false, - Usage: "Output directory for built artifacts", - EnvVars: []string{"OUTPUT_DIR"}, - }, - &cli.StringFlag{ - Name: "name", - Aliases: []string{"meta", "bootstrap"}, // for compatibility - Required: true, - Usage: "Image name, which will be used as suffix for the generated Nydus image bootstrap/data blobs", - EnvVars: []string{"BOOTSTRAP", "IMAGE_NAME"}, - }, - - &cli.BoolFlag{ - Name: "backend-push", - Value: false, - Usage: "Push generated Nydus filesystem to storage backend", - EnvVars: []string{"BACKEND_PUSH"}, - }, - &cli.StringFlag{ - Name: "backend-type", - Value: "oss", - DefaultText: "oss", - Usage: "Type of storage backend, possible values: 'oss', 's3'", - EnvVars: []string{"BACKEND_TYPE"}, - }, - &cli.StringFlag{ - Name: "backend-config", - Value: "", - Usage: "Json configuration string for storage backend", - EnvVars: []string{"BACKEND_CONFIG"}, - }, - &cli.PathFlag{ - Name: "backend-config-file", - TakesFile: true, - Usage: "Json configuration file for storage backend", - EnvVars: []string{"BACKEND_CONFIG_FILE"}, - }, - - &cli.StringFlag{ - Name: "chunk-dict", - Usage: "Specify a chunk dict expression for chunk deduplication, for example: bootstrap=/path/to/dict.boot", - EnvVars: []string{"CHUNK_DICT"}, - }, - &cli.StringFlag{ - Name: "parent-bootstrap", - Usage: "Specify a parent metadata to reference data chunks", - EnvVars: []string{"PARENT_BOOTSTRAP"}, - }, - &cli.BoolFlag{ - Name: "compact", - Usage: "Compact parent bootstrap before building the image when needed", - EnvVars: []string{"COMPACT"}, - }, - &cli.PathFlag{ - Name: "compact-config-file", - TakesFile: true, - Usage: "Compact configuration file, default configuration is " + - "{\"min_used_ratio\": 5, \"compact_blob_size\": 10485760, \"max_compact_size\": 104857600, " + - "\"layers_to_compact\": 32}", - EnvVars: []string{"COMPACT_CONFIG_FILE"}, - }, - - &cli.StringFlag{ - Name: "fs-version", - Required: false, - Usage: "Nydus image format version number, possible values: 5, 6", - EnvVars: []string{"FS_VERSION"}, - Value: "6", - DefaultText: "V6 nydus image format", - }, - &cli.StringFlag{ - Name: "compressor", - Value: "zstd", - Usage: "Algorithm to compress image data blob, possible values: none, lz4_block, zstd", - EnvVars: []string{"COMPRESSOR"}, - }, - &cli.StringFlag{ - Name: "chunk-size", - Value: "0x100000", - Usage: "size of nydus image data chunk, must be power of two and between 0x1000-0x100000, [default: 0x100000]", - EnvVars: []string{"CHUNK_SIZE"}, - }, - - &cli.StringFlag{ - Name: "nydus-image", - Value: "nydus-image", - Usage: "Path to the nydus-image binary, default to search in PATH", - EnvVars: []string{"NYDUS_IMAGE"}, - }, - }, - Before: func(ctx *cli.Context) error { - sourcePath := ctx.String("source-dir") - fi, err := os.Stat(sourcePath) - if err != nil { - return errors.Wrapf(err, "failed to check source directory") - } - if !fi.IsDir() { - return errors.Errorf("source path '%s' is not a directory", sourcePath) - } - return nil - }, - Action: func(c *cli.Context) error { - setupLogLevel(c) - - var ( - p *packer.Packer - res packer.PackResult - backendConfig packer.BackendConfig - err error - ) - - // if backend-push is specified, we should make sure backend-config-file exists - if c.Bool("backend-push") || c.Bool("compact") { - _backendType, _backendConfig, err := getBackendConfig(c, "", true) - if err != nil { - return err - } - // we can verify the _backendType in the `packer.ParseBackendConfigString` function - cfg, err := packer.ParseBackendConfigString(_backendType, _backendConfig) - if err != nil { - return errors.Errorf("failed to parse backend-config '%s', err = %v", _backendConfig, err) - } - backendConfig = cfg - } - - if p, err = packer.New(packer.Opt{ - LogLevel: logrus.GetLevel(), - NydusImagePath: c.String("nydus-image"), - OutputDir: c.String("output-dir"), - BackendConfig: backendConfig, - }); err != nil { - return err - } - - if res, err = p.Pack(context.Background(), packer.PackRequest{ - SourceDir: c.String("source-dir"), - ImageName: c.String("name"), - PushToRemote: c.Bool("backend-push"), - FsVersion: c.String("fs-version"), - Compressor: c.String("compressor"), - ChunkSize: c.String("chunk-size"), - - ChunkDict: c.String("chunk-dict"), - Parent: c.String("parent-bootstrap"), - TryCompact: c.Bool("compact"), - CompactConfigPath: c.String("compact-config-file"), - }); err != nil { - return err - } - logrus.Infof("successfully built Nydus image (bootstrap:'%s', blob:'%s')", res.Meta, res.Blob) - return nil - }, - }, - { - Name: "copy", - Usage: "Copy an image from source to target", - Flags: []cli.Flag{ - &cli.StringFlag{ - Name: "source", - Required: true, - Usage: "Source image reference", - EnvVars: []string{"SOURCE"}, - }, - &cli.StringFlag{ - Name: "target", - Required: false, - Usage: "Target image reference", - EnvVars: []string{"TARGET"}, - }, - &cli.BoolFlag{ - Name: "source-insecure", - Required: false, - Usage: "Skip verifying server certs for HTTPS source registry", - EnvVars: []string{"SOURCE_INSECURE"}, - }, - &cli.BoolFlag{ - Name: "target-insecure", - Required: false, - Usage: "Skip verifying server certs for HTTPS target registry", - EnvVars: []string{"TARGET_INSECURE"}, - }, - - &cli.StringFlag{ - Name: "source-backend-type", - Value: "", - Usage: "Type of storage backend, possible values: 'oss', 's3'", - EnvVars: []string{"BACKEND_TYPE"}, - }, - &cli.StringFlag{ - Name: "source-backend-config", - Value: "", - Usage: "Json configuration string for storage backend", - EnvVars: []string{"BACKEND_CONFIG"}, - }, - &cli.PathFlag{ - Name: "source-backend-config-file", - Value: "", - TakesFile: true, - Usage: "Json configuration file for storage backend", - EnvVars: []string{"BACKEND_CONFIG_FILE"}, - }, - - &cli.BoolFlag{ - Name: "all-platforms", - Value: false, - Usage: "Copy images for all platforms, conflicts with --platform", - }, - &cli.StringFlag{ - Name: "platform", - Value: "linux/" + runtime.GOARCH, - Usage: "Copy images for specific platforms, for example: 'linux/amd64,linux/arm64'", - }, - - &cli.StringFlag{ - Name: "push-chunk-size", - Value: "0MB", - Usage: "Chunk size for pushing a blob layer in chunked", - }, - - &cli.StringFlag{ - Name: "work-dir", - Value: "./tmp", - Usage: "Working directory for image copy", - EnvVars: []string{"WORK_DIR"}, - }, - &cli.StringFlag{ - Name: "nydus-image", - Value: "nydus-image", - Usage: "Path to the nydus-image binary, default to search in PATH", - EnvVars: []string{"NYDUS_IMAGE"}, - }, - }, - Action: func(c *cli.Context) error { - setupLogLevel(c) - - sourceBackendType, sourceBackendConfig, err := getBackendConfig(c, "source-", false) - if err != nil { - return err - } - - pushChunkSize, err := humanize.ParseBytes(c.String("push-chunk-size")) - if err != nil { - return errors.Wrap(err, "invalid --push-chunk-size option") - } - if pushChunkSize > 0 { - logrus.Infof("will copy layer with chunk size %s", c.String("push-chunk-size")) - } - - opt := copier.Opt{ - WorkDir: c.String("work-dir"), - NydusImagePath: c.String("nydus-image"), - - Source: c.String("source"), - Target: c.String("target"), - SourceInsecure: c.Bool("source-insecure"), - TargetInsecure: c.Bool("target-insecure"), - - SourceBackendType: sourceBackendType, - SourceBackendConfig: sourceBackendConfig, - - AllPlatforms: c.Bool("all-platforms"), - Platforms: c.String("platform"), - - PushChunkSize: int64(pushChunkSize), - } - - return copier.Copy(context.Background(), opt) - }, - }, - { - Name: "commit", - Usage: "Create and push a new nydus image from a container's changes that use a nydus image", - Flags: []cli.Flag{ - &cli.StringFlag{ - Name: "work-dir", - Value: "./tmp", - Usage: "Working directory for commit workflow", - EnvVars: []string{"WORK_DIR"}, - }, - &cli.StringFlag{ - Name: "nydus-image", - Value: "nydus-image", - Usage: "Path to the nydus-image binary, default to search in PATH", - EnvVars: []string{"NYDUS_IMAGE"}, - }, - &cli.StringFlag{ - Name: "containerd-address", - Value: "/run/containerd/containerd.sock", - Usage: "Containerd address, optionally with \"unix://\" prefix [$CONTAINERD_ADDRESS] (default \"/run/containerd/containerd.sock\")", - EnvVars: []string{"CONTAINERD_ADDR"}, - }, - &cli.StringFlag{ - Name: "namespace", - Aliases: []string{"n"}, - Value: "default", - Usage: "Container namespace, default with \"default\" namespace", - EnvVars: []string{"NAMESPACE"}, - }, - &cli.StringFlag{ - Name: "container", - Required: true, - Usage: "Target container id", - EnvVars: []string{"CONTAINER"}, - }, - &cli.StringFlag{ - Name: "target", - Required: true, - Usage: "Target nydus image reference", - EnvVars: []string{"TARGET"}, - }, - &cli.BoolFlag{ - Name: "source-insecure", - Required: false, - Usage: "Skip verifying server certs for HTTPS source registry", - EnvVars: []string{"SOURCE_INSECURE"}, - }, - &cli.BoolFlag{ - Name: "target-insecure", - Required: false, - Usage: "Skip verifying server certs for HTTPS target registry", - EnvVars: []string{"TARGET_INSECURE"}, - }, - &cli.IntFlag{ - Name: "maximum-times", - Required: false, - DefaultText: "400", - Value: 400, - Usage: "The maximum times allowed to be committed", - EnvVars: []string{"MAXIMUM_TIMES"}, - }, - &cli.StringSliceFlag{ - Name: "with-path", - Aliases: []string{"with-mount-path"}, - Required: false, - Usage: "The external directory (for example mountpoint) in container that need to be committed", - EnvVars: []string{"WITH_PATH"}, - }, - }, - Action: func(c *cli.Context) error { - setupLogLevel(c) - parsePaths := func(paths []string) ([]string, []string) { - withPaths := []string{} - withoutPaths := []string{} - - for _, path := range paths { - path = strings.TrimSpace(path) - if strings.HasPrefix(path, "!") { - path = strings.TrimLeft(path, "!") - path = strings.TrimRight(path, "/") - withoutPaths = append(withoutPaths, path) - } else { - withPaths = append(withPaths, path) - } - } - - return withPaths, withoutPaths - } - - withPaths, withoutPaths := parsePaths(c.StringSlice("with-path")) - opt := committer.Opt{ - WorkDir: c.String("work-dir"), - NydusImagePath: c.String("nydus-image"), - ContainerdAddress: c.String("containerd-address"), - Namespace: c.String("namespace"), - ContainerID: c.String("container"), - TargetRef: c.String("target"), - SourceInsecure: c.Bool("source-insecure"), - TargetInsecure: c.Bool("target-insecure"), - MaximumTimes: c.Int("maximum-times"), - WithPaths: withPaths, - WithoutPaths: withoutPaths, - } - cm, err := committer.NewCommitter(opt) - if err != nil { - return errors.Wrap(err, "create commiter") - } - return cm.Commit(c.Context, opt) - }, - }, - } - - if !utils.IsSupportedArch(runtime.GOARCH) { - logrus.Fatal("Nydusify can only work under architecture 'amd64' and 'arm64'") - } - - if err := app.Run(os.Args); err != nil { - logrus.Fatal(err) - } -} - -func setupLogLevel(c *cli.Context) { - // global `-D` has the highest priority - if c.Bool("D") { - logrus.SetLevel(logrus.DebugLevel) - return - } - - lvl := c.String("log-level") - logLevel, err := logrus.ParseLevel(lvl) - if err != nil { - logrus.Warnf("failed to parse log level(%s): %+v\ndefault log level(info) will be used", lvl, err) - logLevel = defaultLogLevel - } - - logrus.SetLevel(logLevel) -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +// The Nydusify CLI tool converts an OCI container image from source registry into +// a Nydus image using `nydus-image` CLI layer by layer, then pushes Nydus image to +// target registry. +package main + +import ( + "context" + "encoding/json" + "fmt" + "io" + "os" + "runtime" + "strings" + + "github.com/containerd/containerd/reference/docker" + "github.com/distribution/reference" + "github.com/dustin/go-humanize" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "github.com/urfave/cli/v2" + + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/checker" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/checker/rule" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/chunkdict/generator" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/committer" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/converter" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/copier" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/packer" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/provider" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/viewer" +) + +var ( + revision string + buildTime string + gitVersion string +) + +var maxCacheMaxRecords uint = 200 + +const defaultLogLevel = logrus.InfoLevel + +func isPossibleValue(excepted []string, value string) bool { + for _, v := range excepted { + if value == v { + return true + } + } + return false +} + +// This only works for OSS backend right now +func parseBackendConfig(backendConfigJSON, backendConfigFile string) (string, error) { + if backendConfigJSON != "" && backendConfigFile != "" { + return "", fmt.Errorf("--backend-config conflicts with --backend-config-file") + } + + if backendConfigFile != "" { + _backendConfigJSON, err := os.ReadFile(backendConfigFile) + if err != nil { + return "", errors.Wrap(err, "parse backend config file") + } + backendConfigJSON = string(_backendConfigJSON) + } + + return backendConfigJSON, nil +} + +func getBackendConfig(c *cli.Context, prefix string, required bool) (string, string, error) { + backendType := c.String(prefix + "backend-type") + if backendType == "" { + if required { + return "", "", errors.Errorf("backend type is empty, please specify option '--%sbackend-type'", prefix) + } + return "", "", nil + } + + possibleBackendTypes := []string{"oss", "s3"} + if !isPossibleValue(possibleBackendTypes, backendType) { + return "", "", fmt.Errorf("--%sbackend-type should be one of %v", prefix, possibleBackendTypes) + } + + backendConfig, err := parseBackendConfig( + c.String(prefix+"backend-config"), c.String(prefix+"backend-config-file"), + ) + if err != nil { + return "", "", err + } else if (backendType == "oss" || backendType == "s3") && strings.TrimSpace(backendConfig) == "" { + return "", "", errors.Errorf("backend configuration is empty, please specify option '--%sbackend-config'", prefix) + } + + return backendType, backendConfig, nil +} + +// Add suffix to source image reference as the target +// image reference, like this: +// Source: localhost:5000/nginx:latest +// Target: localhost:5000/nginx:latest-suffix +func addReferenceSuffix(source, suffix string) (string, error) { + named, err := docker.ParseDockerRef(source) + if err != nil { + return "", fmt.Errorf("invalid source image reference: %s", err) + } + if _, ok := named.(docker.Digested); ok { + return "", fmt.Errorf("unsupported digested image reference: %s", named.String()) + } + named = docker.TagNameOnly(named) + target := named.String() + suffix + return target, nil +} + +func getTargetReference(c *cli.Context) (string, error) { + target := c.String("target") + targetSuffix := c.String("target-suffix") + if target != "" && targetSuffix != "" { + return "", fmt.Errorf("--target conflicts with --target-suffix") + } + if target == "" && targetSuffix == "" { + return "", fmt.Errorf("--target or --target-suffix is required") + } + var err error + if targetSuffix != "" { + target, err = addReferenceSuffix(c.String("source"), targetSuffix) + if err != nil { + return "", err + } + } + return target, nil +} + +func getCacheReference(c *cli.Context, target string) (string, error) { + cache := c.String("build-cache") + cacheTag := c.String("build-cache-tag") + if cache != "" && cacheTag != "" { + return "", fmt.Errorf("--build-cache conflicts with --build-cache-tag") + } + if cacheTag != "" { + named, err := docker.ParseDockerRef(target) + if err != nil { + return "", fmt.Errorf("invalid target image reference: %s", err) + } + cache = fmt.Sprintf("%s/%s:%s", docker.Domain(named), docker.Path(named), cacheTag) + } + return cache, nil +} + +func getPrefetchPatterns(c *cli.Context) (string, error) { + prefetchedDir := c.String("prefetch-dir") + prefetchPatterns := c.Bool("prefetch-patterns") + + if len(prefetchedDir) > 0 && prefetchPatterns { + return "", fmt.Errorf("--prefetch-dir conflicts with --prefetch-patterns") + } + + var patterns string + + if prefetchPatterns { + bytes, err := io.ReadAll(os.Stdin) + if err != nil { + return "", errors.Wrap(err, "read prefetch patterns from STDIN") + } + patterns = string(bytes) + } + + if len(prefetchedDir) > 0 { + patterns = prefetchedDir + } + + if len(patterns) == 0 { + patterns = "/" + } + + return patterns, nil +} + +func main() { + logrus.SetFormatter(&logrus.TextFormatter{ + FullTimestamp: true, + }) + + version := fmt.Sprintf("\nVersion : %s\nRevision : %s\nGo version : %s\nBuild time : %s", gitVersion, revision, runtime.Version(), buildTime) + + app := &cli.App{ + Name: "Nydusify", + Usage: "Nydus utility tool to build, convert, verify and view container images", + Version: version, + } + + // global options + app.Flags = []cli.Flag{ + &cli.BoolFlag{ + Name: "debug", + Aliases: []string{"D"}, + Required: false, + Value: false, + Usage: "Enable debug log level, overwrites the 'log-level' option", + EnvVars: []string{"DEBUG_LOG_LEVEL"}}, + &cli.StringFlag{ + Name: "log-level", + Aliases: []string{"l"}, + Value: "info", + Usage: "Set log level (panic, fatal, error, warn, info, debug, trace)", + EnvVars: []string{"LOG_LEVEL"}, + }, + } + + app.Commands = []*cli.Command{ + { + Name: "convert", + Usage: "Generate a Nydus image from an OCI image", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "source", + Required: true, + Usage: "Source OCI image reference", + EnvVars: []string{"SOURCE"}, + }, + &cli.StringFlag{ + Name: "target", + Required: false, + Usage: "Target (Nydus) image reference", + EnvVars: []string{"TARGET"}, + }, + &cli.StringFlag{ + Name: "target-suffix", + Required: false, + Usage: "Generate the target image reference by adding a suffix to the source image reference, conflicts with --target", + EnvVars: []string{"TARGET_SUFFIX"}, + }, + &cli.BoolFlag{ + Name: "source-insecure", + Required: false, + Usage: "Skip verifying server certs for HTTPS source registry", + EnvVars: []string{"SOURCE_INSECURE"}, + }, + &cli.BoolFlag{ + Name: "target-insecure", + Required: false, + Usage: "Skip verifying server certs for HTTPS target registry", + EnvVars: []string{"TARGET_INSECURE"}, + }, + + &cli.StringFlag{ + Name: "backend-type", + Value: "", + Usage: "Type of storage backend, possible values: 'oss', 's3'", + EnvVars: []string{"BACKEND_TYPE"}, + }, + &cli.StringFlag{ + Name: "backend-config", + Value: "", + Usage: "Json configuration string for storage backend", + EnvVars: []string{"BACKEND_CONFIG"}, + }, + &cli.PathFlag{ + Name: "backend-config-file", + Value: "", + TakesFile: true, + Usage: "Json configuration file for storage backend", + EnvVars: []string{"BACKEND_CONFIG_FILE"}, + }, + &cli.BoolFlag{ + Name: "backend-force-push", + Value: false, Usage: "Force to push Nydus blobs even if they already exist in storage backend", + EnvVars: []string{"BACKEND_FORCE_PUSH"}, + }, + + &cli.StringFlag{ + Name: "build-cache", + Value: "", + Usage: "Specify a cache image to accelerate nydus image conversion", + EnvVars: []string{"BUILD_CACHE"}, + }, + &cli.StringFlag{ + Name: "build-cache-tag", + Value: "", + Usage: "Use $target:$build-cache-tag as cache image, conflict with --build-cache", + EnvVars: []string{"BUILD_CACHE_TAG"}, + }, + &cli.StringFlag{ + Name: "build-cache-version", + Value: "v1", + Usage: "Version number to filter cache images", + EnvVars: []string{"BUILD_CACHE_VERSION"}, + }, + &cli.BoolFlag{ + Name: "build-cache-insecure", + Required: false, + Usage: "Skip verifying server certs for HTTPS cache registry", + EnvVars: []string{"BUILD_CACHE_INSECURE"}, + }, + // The --build-cache-max-records flag represents the maximum number + // of layers in cache image. 200 (bootstrap + blob in one record) was + // chosen to make it compatible with the 127 max in graph driver of + // docker so that we can pull cache image using docker. + &cli.UintFlag{ + Name: "build-cache-max-records", + Value: maxCacheMaxRecords, + Usage: "Maximum cache records in a cache image", + EnvVars: []string{"BUILD_CACHE_MAX_RECORDS"}, + }, + &cli.StringFlag{ + Name: "chunk-dict", + Required: false, + Usage: "Specify a chunk dict expression for chunk deduplication, " + + "for examples: bootstrap:registry:localhost:5000/namespace/app:chunk_dict, bootstrap:local:/path/to/chunk_dict.boot", + EnvVars: []string{"CHUNK_DICT"}, + }, + &cli.BoolFlag{ + Name: "chunk-dict-insecure", + Required: false, + Value: false, + Usage: "Skip verifying server certs for HTTPS dict registry", + EnvVars: []string{"CHUNK_DICT_INSECURE"}, + }, + + &cli.BoolFlag{ + Name: "merge-platform", + Value: false, + Usage: "Generate an OCI image index with both OCI and Nydus manifests for the image", + EnvVars: []string{"MERGE_PLATFORM"}, + Aliases: []string{"multi-platform"}, + }, + &cli.BoolFlag{ + Name: "all-platforms", + Value: false, + Usage: "Convert images for all platforms, conflicts with --platform", + }, + &cli.StringFlag{ + Name: "platform", + Value: "linux/" + runtime.GOARCH, + Usage: "Convert images for specific platforms, for example: 'linux/amd64,linux/arm64'", + }, + &cli.BoolFlag{ + Name: "oci-ref", + Value: false, + Usage: "Convert to OCI-referenced nydus zran image", + EnvVars: []string{"OCI_REF"}, + }, + &cli.BoolFlag{ + Name: "with-referrer", + Value: false, + Usage: "Associate a reference to the source image, see https://github.com/opencontainers/distribution-spec/blob/main/spec.md#listing-referrers", + EnvVars: []string{"WITH_REFERRER"}, + }, + &cli.BoolFlag{ + Name: "oci", + Value: false, + Usage: "Convert Docker media types to OCI media types", + EnvVars: []string{"OCI"}, + }, + &cli.BoolFlag{ + Name: "docker-v2-format", + Value: false, + Hidden: true, + }, + &cli.StringFlag{ + Name: "fs-version", + Required: false, + Value: "6", + DefaultText: "V6 nydus image format", + Usage: "Nydus image format version number, possible values: 5, 6", + EnvVars: []string{"FS_VERSION"}, + }, + &cli.BoolFlag{ + Name: "fs-align-chunk", + Value: false, + Usage: "Enable chunk data alignment(4K) for Nydus image", + EnvVars: []string{"FS_ALIGN_CHUNK"}, + }, + &cli.BoolFlag{ + Name: "backend-aligned-chunk", + Value: false, + Usage: "[Deprecated] Enable chunk data alignment(4K) for Nydus image", + EnvVars: []string{"BACKEND_ALIGNED_CHUNK"}, + }, + &cli.StringFlag{ + Name: "prefetch-dir", + Value: "", + Usage: "Specify an absolute path within the image for prefetch", + EnvVars: []string{"PREFETCH_DIR"}, + }, + &cli.BoolFlag{ + Name: "prefetch-patterns", + Value: false, + Usage: "Read prefetch list from STDIN, please input absolute paths line by line", + EnvVars: []string{"PREFETCH_PATTERNS"}, + }, + &cli.StringFlag{ + Name: "compressor", + Value: "zstd", + Usage: "Algorithm to compress image data blob, possible values: none, lz4_block, zstd", + EnvVars: []string{"COMPRESSOR"}, + }, + &cli.StringFlag{ + Name: "fs-chunk-size", + Value: "0x100000", + Usage: "size of nydus image data chunk, must be power of two and between 0x1000-0x100000, [default: 0x100000]", + EnvVars: []string{"FS_CHUNK_SIZE"}, + Aliases: []string{"chunk-size"}, + }, + &cli.StringFlag{ + Name: "batch-size", + Value: "0", + Usage: "size of batch data chunks, must be power of two, between 0x1000-0x1000000 or zero, [default: 0]", + EnvVars: []string{"BATCH_SIZE"}, + }, + &cli.StringFlag{ + Name: "work-dir", + Value: "./tmp", + Usage: "Working directory for image conversion", + EnvVars: []string{"WORK_DIR"}, + }, + &cli.StringFlag{ + Name: "nydus-image", + Value: "nydus-image", + Usage: "Path to the nydus-image binary, default to search in PATH", + EnvVars: []string{"NYDUS_IMAGE"}, + }, + &cli.StringFlag{ + Name: "output-json", + Value: "", + Usage: "File path to save the metrics collected during conversion in JSON format, for example: './output.json'", + EnvVars: []string{"OUTPUT_JSON"}, + }, + }, + Action: func(c *cli.Context) error { + setupLogLevel(c) + + targetRef, err := getTargetReference(c) + if err != nil { + return err + } + + backendType, backendConfig, err := getBackendConfig(c, "", false) + if err != nil { + return err + } + + cacheRef, err := getCacheReference(c, targetRef) + if err != nil { + return err + } + cacheMaxRecords := c.Uint("build-cache-max-records") + if cacheMaxRecords < 1 { + return fmt.Errorf("--build-cache-max-records should be greater than 0") + } + if cacheMaxRecords > maxCacheMaxRecords { + return fmt.Errorf("--build-cache-max-records should not be greater than %d", maxCacheMaxRecords) + } + cacheVersion := c.String("build-cache-version") + + fsVersion := c.String("fs-version") + possibleFsVersions := []string{"5", "6"} + if !isPossibleValue(possibleFsVersions, fsVersion) { + return fmt.Errorf("--fs-version should be one of %v", possibleFsVersions) + } + + prefetchPatterns, err := getPrefetchPatterns(c) + if err != nil { + return err + } + + chunkDictRef := "" + chunkDict := c.String("chunk-dict") + if chunkDict != "" { + _, _, chunkDictRef, err = converter.ParseChunkDictArgs(chunkDict) + if err != nil { + return errors.Wrap(err, "parse chunk dict arguments") + } + } + + docker2OCI := false + if c.Bool("docker-v2-format") { + logrus.Warn("the option `--docker-v2-format` has been deprecated, use `--oci` instead") + docker2OCI = false + } else if c.Bool("oci") { + docker2OCI = true + } + + // Forcibly enable `--oci` option when `--oci-ref` be enabled. + if c.Bool("oci-ref") { + logrus.Warn("forcibly enabled `--oci` option when `--oci-ref` be enabled") + docker2OCI = true + } + + opt := converter.Opt{ + WorkDir: c.String("work-dir"), + NydusImagePath: c.String("nydus-image"), + + Source: c.String("source"), + Target: targetRef, + SourceInsecure: c.Bool("source-insecure"), + TargetInsecure: c.Bool("target-insecure"), + + BackendType: backendType, + BackendConfig: backendConfig, + BackendForcePush: c.Bool("backend-force-push"), + + CacheRef: cacheRef, + CacheInsecure: c.Bool("build-cache-insecure"), + CacheMaxRecords: cacheMaxRecords, + CacheVersion: cacheVersion, + + ChunkDictRef: chunkDictRef, + ChunkDictInsecure: c.Bool("chunk-dict-insecure"), + + PrefetchPatterns: prefetchPatterns, + MergePlatform: c.Bool("merge-platform"), + Docker2OCI: docker2OCI, + FsVersion: fsVersion, + FsAlignChunk: c.Bool("backend-aligned-chunk") || c.Bool("fs-align-chunk"), + Compressor: c.String("compressor"), + ChunkSize: c.String("chunk-size"), + BatchSize: c.String("batch-size"), + + OCIRef: c.Bool("oci-ref"), + WithReferrer: c.Bool("with-referrer"), + AllPlatforms: c.Bool("all-platforms"), + Platforms: c.String("platform"), + + OutputJSON: c.String("output-json"), + } + + return converter.Convert(context.Background(), opt) + }, + }, + { + Name: "check", + Usage: "Verify nydus image format and content", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "source", + Required: false, + Usage: "Source OCI image reference", + EnvVars: []string{"SOURCE"}, + }, + &cli.StringFlag{ + Name: "target", + Required: true, + Usage: "Target (Nydus) image reference", + EnvVars: []string{"TARGET"}, + }, + &cli.BoolFlag{ + Name: "source-insecure", + Required: false, + Usage: "Skip verifying server certs for HTTPS source registry", + EnvVars: []string{"SOURCE_INSECURE"}, + }, + &cli.BoolFlag{ + Name: "target-insecure", + Required: false, + Usage: "Skip verifying server certs for HTTPS target registry", + EnvVars: []string{"TARGET_INSECURE"}, + }, + + &cli.StringFlag{ + Name: "backend-type", + Value: "", + Usage: "Type of storage backend, enable verification of file data in Nydus image if specified, possible values: 'oss', 's3'", + EnvVars: []string{"BACKEND_TYPE"}, + }, + &cli.StringFlag{ + Name: "backend-config", + Value: "", + Usage: "Json string for storage backend configuration", + EnvVars: []string{"BACKEND_CONFIG"}, + }, + &cli.PathFlag{ + Name: "backend-config-file", + Value: "", + TakesFile: true, + Usage: "Json configuration file for storage backend", + EnvVars: []string{"BACKEND_CONFIG_FILE"}, + }, + + &cli.BoolFlag{ + Name: "multi-platform", + Value: false, + Usage: "Verify that the image contains an image index with both OCI and Nydus manifests", + EnvVars: []string{"MULTI_PLATFORM"}, + }, + &cli.StringFlag{ + Name: "platform", + Value: "linux/" + runtime.GOARCH, + Usage: "Specify platform identifier to choose image manifest, possible values: 'linux/amd64' and 'linux/arm64'", + }, + + &cli.StringFlag{ + Name: "work-dir", + Value: "./output", + Usage: "Working directory for image verification", + EnvVars: []string{"WORK_DIR"}, + }, + &cli.StringFlag{ + Name: "nydus-image", + Value: "nydus-image", + Usage: "Path to the nydus-image binary, default to search in PATH", + EnvVars: []string{"NYDUS_IMAGE"}, + }, + &cli.StringFlag{ + Name: "nydusd", + Value: "nydusd", + Usage: "Path to the nydusd binary, default to search in PATH", + EnvVars: []string{"NYDUSD"}, + }, + }, + Action: func(c *cli.Context) error { + setupLogLevel(c) + + backendType, backendConfig, err := getBackendConfig(c, "", false) + if err != nil { + return err + } + + _, arch, err := provider.ExtractOsArch(c.String("platform")) + if err != nil { + return err + } + + checker, err := checker.New(checker.Opt{ + WorkDir: c.String("work-dir"), + Source: c.String("source"), + Target: c.String("target"), + MultiPlatform: c.Bool("multi-platform"), + SourceInsecure: c.Bool("source-insecure"), + TargetInsecure: c.Bool("target-insecure"), + NydusImagePath: c.String("nydus-image"), + NydusdPath: c.String("nydusd"), + BackendType: backendType, + BackendConfig: backendConfig, + ExpectedArch: arch, + }) + if err != nil { + return err + } + + return checker.Check(context.Background()) + }, + }, + { + Name: "chunkdict", + Usage: "Deduplicate chunk for Nydus image (experimental)", + Subcommands: []*cli.Command{ + { + Name: "generate", + Usage: "Save chunk and blob information of Multi-image into the database (experimental)", + Flags: []cli.Flag{ + &cli.StringSliceFlag{ + Name: "sources", + Required: true, + Usage: "One or more Nydus image reference(Multiple images should be split by commas)", + EnvVars: []string{"SOURCES"}, + }, + &cli.StringFlag{ + Name: "target", + Required: false, + Usage: "Target chunkdict image (Nydus) reference", + EnvVars: []string{"TARGET"}, + }, + &cli.BoolFlag{ + Name: "source-insecure", + Required: false, + Usage: "Skip verifying server certs for HTTPS source registry", + EnvVars: []string{"SOURCE_INSECURE"}, + }, + &cli.BoolFlag{ + Name: "target-insecure", + Required: false, + Usage: "Skip verifying server certs for HTTPS target registry", + EnvVars: []string{"TARGET_INSECURE"}, + }, + + &cli.StringFlag{ + Name: "backend-type", + Value: "", + Usage: "Type of storage backend, possible values: 'oss', 's3'", + EnvVars: []string{"BACKEND_TYPE"}, + }, + &cli.StringFlag{ + Name: "backend-config", + Value: "", + Usage: "Json configuration string for storage backend", + EnvVars: []string{"BACKEND_CONFIG"}, + }, + &cli.PathFlag{ + Name: "backend-config-file", + Value: "", + TakesFile: true, + Usage: "Json configuration file for storage backend", + EnvVars: []string{"BACKEND_CONFIG_FILE"}, + }, + + &cli.StringFlag{ + Name: "work-dir", + Value: "./output", + Usage: "Working directory for generating chunkdict image", + EnvVars: []string{"WORK_DIR"}, + }, + &cli.StringFlag{ + Name: "nydus-image", + Value: "nydus-image", + Usage: "Path to the nydus-image binary, default to search in PATH", + EnvVars: []string{"NYDUS_IMAGE"}, + }, + + &cli.BoolFlag{ + Name: "all-platforms", + Value: false, + Usage: "Generate chunkdict image for all platforms, conflicts with --platform", + }, + &cli.StringFlag{ + Name: "platform", + Value: "linux/" + runtime.GOARCH, + Usage: "Specify platform identifier to choose image manifest, possible values: 'linux/amd64' and 'linux/arm64'", + }, + }, + Action: func(c *cli.Context) error { + setupLogLevel(c) + + backendType, backendConfig, err := getBackendConfig(c, "", false) + if err != nil { + return err + } + + _, arch, err := provider.ExtractOsArch(c.String("platform")) + if err != nil { + return err + } + + generator, err := generator.New(generator.Opt{ + Sources: c.StringSlice("sources"), + Target: c.String("target"), + SourceInsecure: c.Bool("source-insecure"), + TargetInsecure: c.Bool("target-insecure"), + + BackendType: backendType, + BackendConfig: backendConfig, + BackendForcePush: c.Bool("backend-force-push"), + + WorkDir: c.String("work-dir"), + NydusImagePath: c.String("nydus-image"), + ExpectedArch: arch, + AllPlatforms: c.Bool("all-platforms"), + Platforms: c.String("platform"), + }) + if err != nil { + return err + } + + return generator.Generate(context.Background()) + }, + }, + }, + }, + { + Name: "mount", + Aliases: []string{"view"}, + Usage: "Mount the nydus image as a filesystem", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "target", + Required: true, + Usage: "Target (Nydus) image reference", + EnvVars: []string{"TARGET"}, + }, + &cli.BoolFlag{ + Name: "target-insecure", + Required: false, + Usage: "Skip verifying server certs for HTTPS target registry", + EnvVars: []string{"TARGET_INSECURE"}, + }, + + &cli.StringFlag{ + Name: "backend-type", + Value: "", + Required: false, + Usage: "Type of storage backend, possible values: 'oss', 's3'", + EnvVars: []string{"BACKEND_TYPE"}, + }, + &cli.StringFlag{ + Name: "backend-config", + Value: "", + Usage: "Json configuration string for storage backend", + EnvVars: []string{"BACKEND_CONFIG"}, + }, + &cli.PathFlag{ + Name: "backend-config-file", + Value: "", + TakesFile: true, + Usage: "Json configuration file for storage backend", + EnvVars: []string{"BACKEND_CONFIG_FILE"}, + }, + + &cli.StringFlag{ + Name: "mount-path", + Value: "./image-fs", + Usage: "Path to mount the image", + EnvVars: []string{"MOUNT_PATH"}, + }, + &cli.StringFlag{ + Name: "platform", + Value: "linux/" + runtime.GOARCH, + Usage: "Specify platform identifier to choose image manifest, possible values: 'linux/amd64' and 'linux/arm64'", + }, + + &cli.StringFlag{ + Name: "work-dir", + Value: "./tmp", + Usage: "Working directory for image view, will be cleaned up after viewing", + EnvVars: []string{"WORK_DIR"}, + }, + &cli.StringFlag{ + Name: "nydusd", + Value: "nydusd", + Usage: "The nydusd binary path, if unset, search in PATH environment", + EnvVars: []string{"NYDUSD"}, + }, + }, + Action: func(c *cli.Context) error { + setupLogLevel(c) + + backendType, backendConfig, err := getBackendConfig(c, "", false) + if err != nil { + return err + } else if backendConfig == "" { + + backendType = "registry" + parsed, err := reference.ParseNormalizedNamed(c.String("target")) + if err != nil { + return err + } + + backendConfigStruct, err := rule.NewRegistryBackendConfig(parsed) + if err != nil { + return errors.Wrap(err, "parse registry backend configuration") + } + + backendConfigStruct.SkipVerify = c.Bool("target-insecure") + + bytes, err := json.Marshal(backendConfigStruct) + if err != nil { + return errors.Wrap(err, "marshal registry backend configuration") + } + backendConfig = string(bytes) + + } + + _, arch, err := provider.ExtractOsArch(c.String("platform")) + if err != nil { + return err + } + + fsViewer, err := viewer.New(viewer.Opt{ + WorkDir: c.String("work-dir"), + Target: c.String("target"), + TargetInsecure: c.Bool("target-insecure"), + MountPath: c.String("mount-path"), + NydusdPath: c.String("nydusd"), + BackendType: backendType, + BackendConfig: backendConfig, + ExpectedArch: arch, + }) + if err != nil { + return err + } + + return fsViewer.View(context.Background()) + }, + }, + { + Name: "build", + Aliases: []string{"pack"}, + Usage: "Build a Nydus filesystem from a source directory", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "source-dir", + Aliases: []string{"target-dir"}, // for compatibility + Required: true, + Usage: "Source directory to build Nydus filesystem from", + EnvVars: []string{"SOURCE_DIR"}, + }, + &cli.StringFlag{ + Name: "output-dir", + Aliases: []string{"o"}, + Required: false, + Usage: "Output directory for built artifacts", + EnvVars: []string{"OUTPUT_DIR"}, + }, + &cli.StringFlag{ + Name: "name", + Aliases: []string{"meta", "bootstrap"}, // for compatibility + Required: true, + Usage: "Image name, which will be used as suffix for the generated Nydus image bootstrap/data blobs", + EnvVars: []string{"BOOTSTRAP", "IMAGE_NAME"}, + }, + + &cli.BoolFlag{ + Name: "backend-push", + Value: false, + Usage: "Push generated Nydus filesystem to storage backend", + EnvVars: []string{"BACKEND_PUSH"}, + }, + &cli.StringFlag{ + Name: "backend-type", + Value: "oss", + DefaultText: "oss", + Usage: "Type of storage backend, possible values: 'oss', 's3'", + EnvVars: []string{"BACKEND_TYPE"}, + }, + &cli.StringFlag{ + Name: "backend-config", + Value: "", + Usage: "Json configuration string for storage backend", + EnvVars: []string{"BACKEND_CONFIG"}, + }, + &cli.PathFlag{ + Name: "backend-config-file", + TakesFile: true, + Usage: "Json configuration file for storage backend", + EnvVars: []string{"BACKEND_CONFIG_FILE"}, + }, + + &cli.StringFlag{ + Name: "chunk-dict", + Usage: "Specify a chunk dict expression for chunk deduplication, for example: bootstrap=/path/to/dict.boot", + EnvVars: []string{"CHUNK_DICT"}, + }, + &cli.StringFlag{ + Name: "parent-bootstrap", + Usage: "Specify a parent metadata to reference data chunks", + EnvVars: []string{"PARENT_BOOTSTRAP"}, + }, + &cli.BoolFlag{ + Name: "compact", + Usage: "Compact parent bootstrap before building the image when needed", + EnvVars: []string{"COMPACT"}, + }, + &cli.PathFlag{ + Name: "compact-config-file", + TakesFile: true, + Usage: "Compact configuration file, default configuration is " + + "{\"min_used_ratio\": 5, \"compact_blob_size\": 10485760, \"max_compact_size\": 104857600, " + + "\"layers_to_compact\": 32}", + EnvVars: []string{"COMPACT_CONFIG_FILE"}, + }, + + &cli.StringFlag{ + Name: "fs-version", + Required: false, + Usage: "Nydus image format version number, possible values: 5, 6", + EnvVars: []string{"FS_VERSION"}, + Value: "6", + DefaultText: "V6 nydus image format", + }, + &cli.StringFlag{ + Name: "compressor", + Value: "zstd", + Usage: "Algorithm to compress image data blob, possible values: none, lz4_block, zstd", + EnvVars: []string{"COMPRESSOR"}, + }, + &cli.StringFlag{ + Name: "chunk-size", + Value: "0x100000", + Usage: "size of nydus image data chunk, must be power of two and between 0x1000-0x100000, [default: 0x100000]", + EnvVars: []string{"CHUNK_SIZE"}, + }, + + &cli.StringFlag{ + Name: "nydus-image", + Value: "nydus-image", + Usage: "Path to the nydus-image binary, default to search in PATH", + EnvVars: []string{"NYDUS_IMAGE"}, + }, + }, + Before: func(ctx *cli.Context) error { + sourcePath := ctx.String("source-dir") + fi, err := os.Stat(sourcePath) + if err != nil { + return errors.Wrapf(err, "failed to check source directory") + } + if !fi.IsDir() { + return errors.Errorf("source path '%s' is not a directory", sourcePath) + } + return nil + }, + Action: func(c *cli.Context) error { + setupLogLevel(c) + + var ( + p *packer.Packer + res packer.PackResult + backendConfig packer.BackendConfig + err error + ) + + // if backend-push is specified, we should make sure backend-config-file exists + if c.Bool("backend-push") || c.Bool("compact") { + _backendType, _backendConfig, err := getBackendConfig(c, "", true) + if err != nil { + return err + } + // we can verify the _backendType in the `packer.ParseBackendConfigString` function + cfg, err := packer.ParseBackendConfigString(_backendType, _backendConfig) + if err != nil { + return errors.Errorf("failed to parse backend-config '%s', err = %v", _backendConfig, err) + } + backendConfig = cfg + } + + if p, err = packer.New(packer.Opt{ + LogLevel: logrus.GetLevel(), + NydusImagePath: c.String("nydus-image"), + OutputDir: c.String("output-dir"), + BackendConfig: backendConfig, + }); err != nil { + return err + } + + if res, err = p.Pack(context.Background(), packer.PackRequest{ + SourceDir: c.String("source-dir"), + ImageName: c.String("name"), + PushToRemote: c.Bool("backend-push"), + FsVersion: c.String("fs-version"), + Compressor: c.String("compressor"), + ChunkSize: c.String("chunk-size"), + + ChunkDict: c.String("chunk-dict"), + Parent: c.String("parent-bootstrap"), + TryCompact: c.Bool("compact"), + CompactConfigPath: c.String("compact-config-file"), + }); err != nil { + return err + } + logrus.Infof("successfully built Nydus image (bootstrap:'%s', blob:'%s')", res.Meta, res.Blob) + return nil + }, + }, + { + Name: "copy", + Usage: "Copy an image from source to target", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "source", + Required: true, + Usage: "Source image reference", + EnvVars: []string{"SOURCE"}, + }, + &cli.StringFlag{ + Name: "target", + Required: false, + Usage: "Target image reference", + EnvVars: []string{"TARGET"}, + }, + &cli.BoolFlag{ + Name: "source-insecure", + Required: false, + Usage: "Skip verifying server certs for HTTPS source registry", + EnvVars: []string{"SOURCE_INSECURE"}, + }, + &cli.BoolFlag{ + Name: "target-insecure", + Required: false, + Usage: "Skip verifying server certs for HTTPS target registry", + EnvVars: []string{"TARGET_INSECURE"}, + }, + + &cli.StringFlag{ + Name: "source-backend-type", + Value: "", + Usage: "Type of storage backend, possible values: 'oss', 's3'", + EnvVars: []string{"BACKEND_TYPE"}, + }, + &cli.StringFlag{ + Name: "source-backend-config", + Value: "", + Usage: "Json configuration string for storage backend", + EnvVars: []string{"BACKEND_CONFIG"}, + }, + &cli.PathFlag{ + Name: "source-backend-config-file", + Value: "", + TakesFile: true, + Usage: "Json configuration file for storage backend", + EnvVars: []string{"BACKEND_CONFIG_FILE"}, + }, + + &cli.BoolFlag{ + Name: "all-platforms", + Value: false, + Usage: "Copy images for all platforms, conflicts with --platform", + }, + &cli.StringFlag{ + Name: "platform", + Value: "linux/" + runtime.GOARCH, + Usage: "Copy images for specific platforms, for example: 'linux/amd64,linux/arm64'", + }, + + &cli.StringFlag{ + Name: "push-chunk-size", + Value: "0MB", + Usage: "Chunk size for pushing a blob layer in chunked", + }, + + &cli.StringFlag{ + Name: "work-dir", + Value: "./tmp", + Usage: "Working directory for image copy", + EnvVars: []string{"WORK_DIR"}, + }, + &cli.StringFlag{ + Name: "nydus-image", + Value: "nydus-image", + Usage: "Path to the nydus-image binary, default to search in PATH", + EnvVars: []string{"NYDUS_IMAGE"}, + }, + }, + Action: func(c *cli.Context) error { + setupLogLevel(c) + + sourceBackendType, sourceBackendConfig, err := getBackendConfig(c, "source-", false) + if err != nil { + return err + } + + pushChunkSize, err := humanize.ParseBytes(c.String("push-chunk-size")) + if err != nil { + return errors.Wrap(err, "invalid --push-chunk-size option") + } + if pushChunkSize > 0 { + logrus.Infof("will copy layer with chunk size %s", c.String("push-chunk-size")) + } + + opt := copier.Opt{ + WorkDir: c.String("work-dir"), + NydusImagePath: c.String("nydus-image"), + + Source: c.String("source"), + Target: c.String("target"), + SourceInsecure: c.Bool("source-insecure"), + TargetInsecure: c.Bool("target-insecure"), + + SourceBackendType: sourceBackendType, + SourceBackendConfig: sourceBackendConfig, + + AllPlatforms: c.Bool("all-platforms"), + Platforms: c.String("platform"), + + PushChunkSize: int64(pushChunkSize), + } + + return copier.Copy(context.Background(), opt) + }, + }, + { + Name: "commit", + Usage: "Create and push a new nydus image from a container's changes that use a nydus image", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "work-dir", + Value: "./tmp", + Usage: "Working directory for commit workflow", + EnvVars: []string{"WORK_DIR"}, + }, + &cli.StringFlag{ + Name: "nydus-image", + Value: "nydus-image", + Usage: "Path to the nydus-image binary, default to search in PATH", + EnvVars: []string{"NYDUS_IMAGE"}, + }, + &cli.StringFlag{ + Name: "containerd-address", + Value: "/run/containerd/containerd.sock", + Usage: "Containerd address, optionally with \"unix://\" prefix [$CONTAINERD_ADDRESS] (default \"/run/containerd/containerd.sock\")", + EnvVars: []string{"CONTAINERD_ADDR"}, + }, + &cli.StringFlag{ + Name: "namespace", + Aliases: []string{"n"}, + Value: "default", + Usage: "Container namespace, default with \"default\" namespace", + EnvVars: []string{"NAMESPACE"}, + }, + &cli.StringFlag{ + Name: "container", + Required: true, + Usage: "Target container id", + EnvVars: []string{"CONTAINER"}, + }, + &cli.StringFlag{ + Name: "target", + Required: true, + Usage: "Target nydus image reference", + EnvVars: []string{"TARGET"}, + }, + &cli.BoolFlag{ + Name: "source-insecure", + Required: false, + Usage: "Skip verifying server certs for HTTPS source registry", + EnvVars: []string{"SOURCE_INSECURE"}, + }, + &cli.BoolFlag{ + Name: "target-insecure", + Required: false, + Usage: "Skip verifying server certs for HTTPS target registry", + EnvVars: []string{"TARGET_INSECURE"}, + }, + &cli.IntFlag{ + Name: "maximum-times", + Required: false, + DefaultText: "400", + Value: 400, + Usage: "The maximum times allowed to be committed", + EnvVars: []string{"MAXIMUM_TIMES"}, + }, + &cli.StringSliceFlag{ + Name: "with-path", + Aliases: []string{"with-mount-path"}, + Required: false, + Usage: "The external directory (for example mountpoint) in container that need to be committed", + EnvVars: []string{"WITH_PATH"}, + }, + }, + Action: func(c *cli.Context) error { + setupLogLevel(c) + parsePaths := func(paths []string) ([]string, []string) { + withPaths := []string{} + withoutPaths := []string{} + + for _, path := range paths { + path = strings.TrimSpace(path) + if strings.HasPrefix(path, "!") { + path = strings.TrimLeft(path, "!") + path = strings.TrimRight(path, "/") + withoutPaths = append(withoutPaths, path) + } else { + withPaths = append(withPaths, path) + } + } + + return withPaths, withoutPaths + } + + withPaths, withoutPaths := parsePaths(c.StringSlice("with-path")) + opt := committer.Opt{ + WorkDir: c.String("work-dir"), + NydusImagePath: c.String("nydus-image"), + ContainerdAddress: c.String("containerd-address"), + Namespace: c.String("namespace"), + ContainerID: c.String("container"), + TargetRef: c.String("target"), + SourceInsecure: c.Bool("source-insecure"), + TargetInsecure: c.Bool("target-insecure"), + MaximumTimes: c.Int("maximum-times"), + WithPaths: withPaths, + WithoutPaths: withoutPaths, + } + cm, err := committer.NewCommitter(opt) + if err != nil { + return errors.Wrap(err, "create commiter") + } + return cm.Commit(c.Context, opt) + }, + }, + } + + if !utils.IsSupportedArch(runtime.GOARCH) { + logrus.Fatal("Nydusify can only work under architecture 'amd64' and 'arm64'") + } + + if err := app.Run(os.Args); err != nil { + logrus.Fatal(err) + } +} + +func setupLogLevel(c *cli.Context) { + // global `-D` has the highest priority + if c.Bool("D") { + logrus.SetLevel(logrus.DebugLevel) + return + } + + lvl := c.String("log-level") + logLevel, err := logrus.ParseLevel(lvl) + if err != nil { + logrus.Warnf("failed to parse log level(%s): %+v\ndefault log level(info) will be used", lvl, err) + logLevel = defaultLogLevel + } + + logrus.SetLevel(logLevel) +} diff --git a/contrib/nydusify/cmd/nydusify_test.go b/contrib/nydusify/cmd/nydusify_test.go index ff7afe2a4fc..d6b17d0e7a5 100644 --- a/contrib/nydusify/cmd/nydusify_test.go +++ b/contrib/nydusify/cmd/nydusify_test.go @@ -1,322 +1,322 @@ -// Copyright 2023 Alibaba Cloud. All rights reserved. -// Copyright 2023 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package main - -import ( - "encoding/json" - "flag" - "os" - "testing" - - "github.com/stretchr/testify/require" - "github.com/urfave/cli/v2" -) - -func TestIsPossibleValue(t *testing.T) { - value := "qwe" - list := []string{"abc", "qwe", "xyz"} - require.True(t, isPossibleValue(list, value)) - - // Failure situation - value2 := "vdf" - require.False(t, isPossibleValue(list, value2)) -} - -func TestAddReferenceSuffix(t *testing.T) { - source := "localhost:5000/nginx:latest" - suffix := "-suffix" - target, err := addReferenceSuffix(source, suffix) - require.NoError(t, err) - require.Equal(t, target, "localhost:5000/nginx:latest-suffix") - - // Failure situation - source = "localhost:5000\nginx:latest" - suffix = "-suffix" - _, err = addReferenceSuffix(source, suffix) - require.Error(t, err) - require.Contains(t, err.Error(), "invalid source image reference") - - source = "localhost:5000/nginx:latest@sha256:757574c5a2102627de54971a0083d4ecd24eb48fdf06b234d063f19f7bbc22fb" - suffix = "-suffix" - _, err = addReferenceSuffix(source, suffix) - require.Error(t, err) - require.Contains(t, err.Error(), "unsupported digested image reference") -} - -func TestParseBackendConfig(t *testing.T) { - configJSON := ` - { - "bucket_name": "test", - "endpoint": "region.oss.com", - "access_key_id": "testAK", - "access_key_secret": "testSK", - "meta_prefix": "meta", - "blob_prefix": "blob" - }` - require.True(t, json.Valid([]byte(configJSON))) - - file, err := os.CreateTemp("", "nydusify-backend-config-test.json") - require.NoError(t, err) - defer os.RemoveAll(file.Name()) - - _, err = file.WriteString(configJSON) - require.NoError(t, err) - file.Sync() - - resultJSON, err := parseBackendConfig("", file.Name()) - require.NoError(t, err) - require.True(t, json.Valid([]byte(resultJSON))) - require.Equal(t, configJSON, resultJSON) - - // Failure situation - _, err = parseBackendConfig(configJSON, file.Name()) - require.Error(t, err) - - _, err = parseBackendConfig("", "non-existent.json") - require.Error(t, err) -} - -func TestGetBackendConfig(t *testing.T) { - app := &cli.App{ - Flags: []cli.Flag{ - &cli.StringFlag{ - Name: "prefixbackend-type", - Value: "", - }, - &cli.StringFlag{ - Name: "prefixbackend-config", - Value: "", - }, - &cli.StringFlag{ - Name: "prefixbackend-config-file", - Value: "", - }, - }, - } - ctx := cli.NewContext(app, nil, nil) - - backendType, backendConfig, err := getBackendConfig(ctx, "prefix", false) - require.NoError(t, err) - require.Empty(t, backendType) - require.Empty(t, backendConfig) - - backendType, backendConfig, err = getBackendConfig(ctx, "prefix", true) - require.Error(t, err) - require.Contains(t, err.Error(), "backend type is empty, please specify option") - require.Empty(t, backendType) - require.Empty(t, backendConfig) - - flagSet := flag.NewFlagSet("test1", flag.PanicOnError) - flagSet.String("prefixbackend-type", "errType", "") - ctx = cli.NewContext(app, flagSet, nil) - backendType, backendConfig, err = getBackendConfig(ctx, "prefix", true) - require.Error(t, err) - require.Contains(t, err.Error(), "backend-type should be one of") - require.Empty(t, backendType) - require.Empty(t, backendConfig) - - flagSet = flag.NewFlagSet("test2", flag.PanicOnError) - flagSet.String("prefixbackend-type", "oss", "") - ctx = cli.NewContext(app, flagSet, nil) - backendType, backendConfig, err = getBackendConfig(ctx, "prefix", true) - require.Error(t, err) - require.Contains(t, err.Error(), "backend configuration is empty, please specify option") - require.Empty(t, backendType) - require.Empty(t, backendConfig) - - configJSON := ` - { - "bucket_name": "test", - "endpoint": "region.oss.com", - "access_key_id": "testAK", - "access_key_secret": "testSK", - "meta_prefix": "meta", - "blob_prefix": "blob" - }` - require.True(t, json.Valid([]byte(configJSON))) - - flagSet = flag.NewFlagSet("test3", flag.PanicOnError) - flagSet.String("prefixbackend-type", "oss", "") - flagSet.String("prefixbackend-config", configJSON, "") - ctx = cli.NewContext(app, flagSet, nil) - backendType, backendConfig, err = getBackendConfig(ctx, "prefix", true) - require.NoError(t, err) - require.Equal(t, "oss", backendType) - require.Equal(t, configJSON, backendConfig) - - file, err := os.CreateTemp("", "nydusify-backend-config-test.json") - require.NoError(t, err) - defer os.RemoveAll(file.Name()) - - _, err = file.WriteString(configJSON) - require.NoError(t, err) - file.Sync() - - flagSet = flag.NewFlagSet("test4", flag.PanicOnError) - flagSet.String("prefixbackend-type", "oss", "") - flagSet.String("prefixbackend-config-file", file.Name(), "") - ctx = cli.NewContext(app, flagSet, nil) - backendType, backendConfig, err = getBackendConfig(ctx, "prefix", true) - require.NoError(t, err) - require.Equal(t, "oss", backendType) - require.Equal(t, configJSON, backendConfig) - - flagSet = flag.NewFlagSet("test5", flag.PanicOnError) - flagSet.String("prefixbackend-type", "oss", "") - flagSet.String("prefixbackend-config", configJSON, "") - flagSet.String("prefixbackend-config-file", file.Name(), "") - ctx = cli.NewContext(app, flagSet, nil) - backendType, backendConfig, err = getBackendConfig(ctx, "prefix", true) - require.Error(t, err) - require.Contains(t, err.Error(), "--backend-config conflicts with --backend-config-file") - require.Empty(t, backendType) - require.Empty(t, backendConfig) -} - -func TestGetTargetReference(t *testing.T) { - app := &cli.App{ - Flags: []cli.Flag{ - &cli.StringFlag{ - Name: "target", - Value: "", - }, - &cli.StringFlag{ - Name: "target-suffix", - Value: "", - }, - &cli.StringFlag{ - Name: "source", - Value: "", - }, - }, - } - ctx := cli.NewContext(app, nil, nil) - - target, err := getTargetReference(ctx) - require.Error(t, err) - require.Contains(t, err.Error(), "--target or --target-suffix is required") - require.Empty(t, target) - - flagSet := flag.NewFlagSet("test1", flag.PanicOnError) - flagSet.String("target", "testTarget", "") - flagSet.String("target-suffix", "testSuffix", "") - ctx = cli.NewContext(app, flagSet, nil) - target, err = getTargetReference(ctx) - require.Error(t, err) - require.Contains(t, err.Error(), "-target conflicts with --target-suffix") - require.Empty(t, target) - - flagSet = flag.NewFlagSet("test2", flag.PanicOnError) - flagSet.String("target-suffix", "-nydus", "") - flagSet.String("source", "localhost:5000/nginx:latest", "") - ctx = cli.NewContext(app, flagSet, nil) - target, err = getTargetReference(ctx) - require.NoError(t, err) - require.Equal(t, "localhost:5000/nginx:latest-nydus", target) - - flagSet = flag.NewFlagSet("test3", flag.PanicOnError) - flagSet.String("target-suffix", "-nydus", "") - flagSet.String("source", "localhost:5000\nginx:latest", "") - ctx = cli.NewContext(app, flagSet, nil) - target, err = getTargetReference(ctx) - require.Error(t, err) - require.Contains(t, err.Error(), "invalid source image reference") - require.Empty(t, target) - - flagSet = flag.NewFlagSet("test4", flag.PanicOnError) - flagSet.String("target", "testTarget", "") - ctx = cli.NewContext(app, flagSet, nil) - target, err = getTargetReference(ctx) - require.NoError(t, err) - require.Equal(t, "testTarget", target) -} - -func TestGetCacheReference(t *testing.T) { - app := &cli.App{ - Flags: []cli.Flag{ - &cli.StringFlag{ - Name: "build-cache", - Value: "", - }, - &cli.StringFlag{ - Name: "build-cache-tag", - Value: "", - }, - }, - } - ctx := cli.NewContext(app, nil, nil) - - cache, err := getCacheReference(ctx, "") - require.NoError(t, err) - require.Empty(t, cache) - - flagSet := flag.NewFlagSet("test1", flag.PanicOnError) - flagSet.String("build-cache", "cache", "") - flagSet.String("build-cache-tag", "cacheTag", "") - ctx = cli.NewContext(app, flagSet, nil) - cache, err = getCacheReference(ctx, "") - require.Error(t, err) - require.Contains(t, err.Error(), "--build-cache conflicts with --build-cache-tag") - require.Empty(t, cache) - - flagSet = flag.NewFlagSet("test2", flag.PanicOnError) - flagSet.String("build-cache-tag", "cacheTag", "errTarget") - ctx = cli.NewContext(app, flagSet, nil) - cache, err = getCacheReference(ctx, "") - require.Error(t, err) - require.Contains(t, err.Error(), "invalid target image reference: invalid reference format") - require.Empty(t, cache) - - flagSet = flag.NewFlagSet("test2", flag.PanicOnError) - flagSet.String("build-cache-tag", "latest-cache", "") - ctx = cli.NewContext(app, flagSet, nil) - cache, err = getCacheReference(ctx, "localhost:5000/nginx:latest") - require.NoError(t, err) - require.Equal(t, "localhost:5000/nginx:latest-cache", cache) -} - -func TestGetPrefetchPatterns(t *testing.T) { - app := &cli.App{ - Flags: []cli.Flag{ - &cli.StringFlag{ - Name: "prefetch-dir", - Value: "", - }, - &cli.BoolFlag{ - Name: "prefetch-patterns", - Value: false, - }, - }, - } - ctx := cli.NewContext(app, nil, nil) - - patterns, err := getPrefetchPatterns(ctx) - require.NoError(t, err) - require.Equal(t, "/", patterns) - - flagSet := flag.NewFlagSet("test1", flag.PanicOnError) - flagSet.String("prefetch-dir", "/etc/passwd", "") - ctx = cli.NewContext(app, flagSet, nil) - patterns, err = getPrefetchPatterns(ctx) - require.NoError(t, err) - require.Equal(t, "/etc/passwd", patterns) - - flagSet = flag.NewFlagSet("test2", flag.PanicOnError) - flagSet.String("prefetch-dir", "/etc/passwd", "") - flagSet.Bool("prefetch-patterns", true, "") - ctx = cli.NewContext(app, flagSet, nil) - patterns, err = getPrefetchPatterns(ctx) - require.Error(t, err) - require.Contains(t, err.Error(), "--prefetch-dir conflicts with --prefetch-patterns") - require.Empty(t, patterns) - - flagSet = flag.NewFlagSet("test3", flag.PanicOnError) - flagSet.Bool("prefetch-patterns", true, "") - ctx = cli.NewContext(app, flagSet, nil) - patterns, err = getPrefetchPatterns(ctx) - require.NoError(t, err) - require.Equal(t, "/", patterns) -} +// Copyright 2023 Alibaba Cloud. All rights reserved. +// Copyright 2023 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "encoding/json" + "flag" + "os" + "testing" + + "github.com/stretchr/testify/require" + "github.com/urfave/cli/v2" +) + +func TestIsPossibleValue(t *testing.T) { + value := "qwe" + list := []string{"abc", "qwe", "xyz"} + require.True(t, isPossibleValue(list, value)) + + // Failure situation + value2 := "vdf" + require.False(t, isPossibleValue(list, value2)) +} + +func TestAddReferenceSuffix(t *testing.T) { + source := "localhost:5000/nginx:latest" + suffix := "-suffix" + target, err := addReferenceSuffix(source, suffix) + require.NoError(t, err) + require.Equal(t, target, "localhost:5000/nginx:latest-suffix") + + // Failure situation + source = "localhost:5000\nginx:latest" + suffix = "-suffix" + _, err = addReferenceSuffix(source, suffix) + require.Error(t, err) + require.Contains(t, err.Error(), "invalid source image reference") + + source = "localhost:5000/nginx:latest@sha256:757574c5a2102627de54971a0083d4ecd24eb48fdf06b234d063f19f7bbc22fb" + suffix = "-suffix" + _, err = addReferenceSuffix(source, suffix) + require.Error(t, err) + require.Contains(t, err.Error(), "unsupported digested image reference") +} + +func TestParseBackendConfig(t *testing.T) { + configJSON := ` + { + "bucket_name": "test", + "endpoint": "region.oss.com", + "access_key_id": "testAK", + "access_key_secret": "testSK", + "meta_prefix": "meta", + "blob_prefix": "blob" + }` + require.True(t, json.Valid([]byte(configJSON))) + + file, err := os.CreateTemp("", "nydusify-backend-config-test.json") + require.NoError(t, err) + defer os.RemoveAll(file.Name()) + + _, err = file.WriteString(configJSON) + require.NoError(t, err) + file.Sync() + + resultJSON, err := parseBackendConfig("", file.Name()) + require.NoError(t, err) + require.True(t, json.Valid([]byte(resultJSON))) + require.Equal(t, configJSON, resultJSON) + + // Failure situation + _, err = parseBackendConfig(configJSON, file.Name()) + require.Error(t, err) + + _, err = parseBackendConfig("", "non-existent.json") + require.Error(t, err) +} + +func TestGetBackendConfig(t *testing.T) { + app := &cli.App{ + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "prefixbackend-type", + Value: "", + }, + &cli.StringFlag{ + Name: "prefixbackend-config", + Value: "", + }, + &cli.StringFlag{ + Name: "prefixbackend-config-file", + Value: "", + }, + }, + } + ctx := cli.NewContext(app, nil, nil) + + backendType, backendConfig, err := getBackendConfig(ctx, "prefix", false) + require.NoError(t, err) + require.Empty(t, backendType) + require.Empty(t, backendConfig) + + backendType, backendConfig, err = getBackendConfig(ctx, "prefix", true) + require.Error(t, err) + require.Contains(t, err.Error(), "backend type is empty, please specify option") + require.Empty(t, backendType) + require.Empty(t, backendConfig) + + flagSet := flag.NewFlagSet("test1", flag.PanicOnError) + flagSet.String("prefixbackend-type", "errType", "") + ctx = cli.NewContext(app, flagSet, nil) + backendType, backendConfig, err = getBackendConfig(ctx, "prefix", true) + require.Error(t, err) + require.Contains(t, err.Error(), "backend-type should be one of") + require.Empty(t, backendType) + require.Empty(t, backendConfig) + + flagSet = flag.NewFlagSet("test2", flag.PanicOnError) + flagSet.String("prefixbackend-type", "oss", "") + ctx = cli.NewContext(app, flagSet, nil) + backendType, backendConfig, err = getBackendConfig(ctx, "prefix", true) + require.Error(t, err) + require.Contains(t, err.Error(), "backend configuration is empty, please specify option") + require.Empty(t, backendType) + require.Empty(t, backendConfig) + + configJSON := ` + { + "bucket_name": "test", + "endpoint": "region.oss.com", + "access_key_id": "testAK", + "access_key_secret": "testSK", + "meta_prefix": "meta", + "blob_prefix": "blob" + }` + require.True(t, json.Valid([]byte(configJSON))) + + flagSet = flag.NewFlagSet("test3", flag.PanicOnError) + flagSet.String("prefixbackend-type", "oss", "") + flagSet.String("prefixbackend-config", configJSON, "") + ctx = cli.NewContext(app, flagSet, nil) + backendType, backendConfig, err = getBackendConfig(ctx, "prefix", true) + require.NoError(t, err) + require.Equal(t, "oss", backendType) + require.Equal(t, configJSON, backendConfig) + + file, err := os.CreateTemp("", "nydusify-backend-config-test.json") + require.NoError(t, err) + defer os.RemoveAll(file.Name()) + + _, err = file.WriteString(configJSON) + require.NoError(t, err) + file.Sync() + + flagSet = flag.NewFlagSet("test4", flag.PanicOnError) + flagSet.String("prefixbackend-type", "oss", "") + flagSet.String("prefixbackend-config-file", file.Name(), "") + ctx = cli.NewContext(app, flagSet, nil) + backendType, backendConfig, err = getBackendConfig(ctx, "prefix", true) + require.NoError(t, err) + require.Equal(t, "oss", backendType) + require.Equal(t, configJSON, backendConfig) + + flagSet = flag.NewFlagSet("test5", flag.PanicOnError) + flagSet.String("prefixbackend-type", "oss", "") + flagSet.String("prefixbackend-config", configJSON, "") + flagSet.String("prefixbackend-config-file", file.Name(), "") + ctx = cli.NewContext(app, flagSet, nil) + backendType, backendConfig, err = getBackendConfig(ctx, "prefix", true) + require.Error(t, err) + require.Contains(t, err.Error(), "--backend-config conflicts with --backend-config-file") + require.Empty(t, backendType) + require.Empty(t, backendConfig) +} + +func TestGetTargetReference(t *testing.T) { + app := &cli.App{ + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "target", + Value: "", + }, + &cli.StringFlag{ + Name: "target-suffix", + Value: "", + }, + &cli.StringFlag{ + Name: "source", + Value: "", + }, + }, + } + ctx := cli.NewContext(app, nil, nil) + + target, err := getTargetReference(ctx) + require.Error(t, err) + require.Contains(t, err.Error(), "--target or --target-suffix is required") + require.Empty(t, target) + + flagSet := flag.NewFlagSet("test1", flag.PanicOnError) + flagSet.String("target", "testTarget", "") + flagSet.String("target-suffix", "testSuffix", "") + ctx = cli.NewContext(app, flagSet, nil) + target, err = getTargetReference(ctx) + require.Error(t, err) + require.Contains(t, err.Error(), "-target conflicts with --target-suffix") + require.Empty(t, target) + + flagSet = flag.NewFlagSet("test2", flag.PanicOnError) + flagSet.String("target-suffix", "-nydus", "") + flagSet.String("source", "localhost:5000/nginx:latest", "") + ctx = cli.NewContext(app, flagSet, nil) + target, err = getTargetReference(ctx) + require.NoError(t, err) + require.Equal(t, "localhost:5000/nginx:latest-nydus", target) + + flagSet = flag.NewFlagSet("test3", flag.PanicOnError) + flagSet.String("target-suffix", "-nydus", "") + flagSet.String("source", "localhost:5000\nginx:latest", "") + ctx = cli.NewContext(app, flagSet, nil) + target, err = getTargetReference(ctx) + require.Error(t, err) + require.Contains(t, err.Error(), "invalid source image reference") + require.Empty(t, target) + + flagSet = flag.NewFlagSet("test4", flag.PanicOnError) + flagSet.String("target", "testTarget", "") + ctx = cli.NewContext(app, flagSet, nil) + target, err = getTargetReference(ctx) + require.NoError(t, err) + require.Equal(t, "testTarget", target) +} + +func TestGetCacheReference(t *testing.T) { + app := &cli.App{ + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "build-cache", + Value: "", + }, + &cli.StringFlag{ + Name: "build-cache-tag", + Value: "", + }, + }, + } + ctx := cli.NewContext(app, nil, nil) + + cache, err := getCacheReference(ctx, "") + require.NoError(t, err) + require.Empty(t, cache) + + flagSet := flag.NewFlagSet("test1", flag.PanicOnError) + flagSet.String("build-cache", "cache", "") + flagSet.String("build-cache-tag", "cacheTag", "") + ctx = cli.NewContext(app, flagSet, nil) + cache, err = getCacheReference(ctx, "") + require.Error(t, err) + require.Contains(t, err.Error(), "--build-cache conflicts with --build-cache-tag") + require.Empty(t, cache) + + flagSet = flag.NewFlagSet("test2", flag.PanicOnError) + flagSet.String("build-cache-tag", "cacheTag", "errTarget") + ctx = cli.NewContext(app, flagSet, nil) + cache, err = getCacheReference(ctx, "") + require.Error(t, err) + require.Contains(t, err.Error(), "invalid target image reference: invalid reference format") + require.Empty(t, cache) + + flagSet = flag.NewFlagSet("test2", flag.PanicOnError) + flagSet.String("build-cache-tag", "latest-cache", "") + ctx = cli.NewContext(app, flagSet, nil) + cache, err = getCacheReference(ctx, "localhost:5000/nginx:latest") + require.NoError(t, err) + require.Equal(t, "localhost:5000/nginx:latest-cache", cache) +} + +func TestGetPrefetchPatterns(t *testing.T) { + app := &cli.App{ + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "prefetch-dir", + Value: "", + }, + &cli.BoolFlag{ + Name: "prefetch-patterns", + Value: false, + }, + }, + } + ctx := cli.NewContext(app, nil, nil) + + patterns, err := getPrefetchPatterns(ctx) + require.NoError(t, err) + require.Equal(t, "/", patterns) + + flagSet := flag.NewFlagSet("test1", flag.PanicOnError) + flagSet.String("prefetch-dir", "/etc/passwd", "") + ctx = cli.NewContext(app, flagSet, nil) + patterns, err = getPrefetchPatterns(ctx) + require.NoError(t, err) + require.Equal(t, "/etc/passwd", patterns) + + flagSet = flag.NewFlagSet("test2", flag.PanicOnError) + flagSet.String("prefetch-dir", "/etc/passwd", "") + flagSet.Bool("prefetch-patterns", true, "") + ctx = cli.NewContext(app, flagSet, nil) + patterns, err = getPrefetchPatterns(ctx) + require.Error(t, err) + require.Contains(t, err.Error(), "--prefetch-dir conflicts with --prefetch-patterns") + require.Empty(t, patterns) + + flagSet = flag.NewFlagSet("test3", flag.PanicOnError) + flagSet.Bool("prefetch-patterns", true, "") + ctx = cli.NewContext(app, flagSet, nil) + patterns, err = getPrefetchPatterns(ctx) + require.NoError(t, err) + require.Equal(t, "/", patterns) +} diff --git a/contrib/nydusify/examples/converter/main.go b/contrib/nydusify/examples/converter/main.go index d9f9bd654ed..0149f87c080 100644 --- a/contrib/nydusify/examples/converter/main.go +++ b/contrib/nydusify/examples/converter/main.go @@ -1,33 +1,33 @@ -package main - -import ( - "context" - - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/converter" -) - -func main() { - // Configurable parameters for converter - workDir := "./tmp" - nydusImagePath := "/path/to/nydus-image" - source := "localhost:5000/ubuntu:latest" - target := "localhost:5000/ubuntu:latest-nydus" - - opt := converter.Opt{ - Platforms: "linux/amd64", - Source: source, - Target: target, - SourceInsecure: true, - TargetInsecure: true, - - WorkDir: workDir, - PrefetchPatterns: "/", - NydusImagePath: nydusImagePath, - MergePlatform: false, - Docker2OCI: true, - } - - if err := converter.Convert(context.Background(), opt); err != nil { - panic(err) - } -} +package main + +import ( + "context" + + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/converter" +) + +func main() { + // Configurable parameters for converter + workDir := "./tmp" + nydusImagePath := "/path/to/nydus-image" + source := "localhost:5000/ubuntu:latest" + target := "localhost:5000/ubuntu:latest-nydus" + + opt := converter.Opt{ + Platforms: "linux/amd64", + Source: source, + Target: target, + SourceInsecure: true, + TargetInsecure: true, + + WorkDir: workDir, + PrefetchPatterns: "/", + NydusImagePath: nydusImagePath, + MergePlatform: false, + Docker2OCI: true, + } + + if err := converter.Convert(context.Background(), opt); err != nil { + panic(err) + } +} diff --git a/contrib/nydusify/examples/manifest/cache_manifest.json b/contrib/nydusify/examples/manifest/cache_manifest.json index 58c1ad65ba4..e4a8e2202ac 100644 --- a/contrib/nydusify/examples/manifest/cache_manifest.json +++ b/contrib/nydusify/examples/manifest/cache_manifest.json @@ -1,82 +1,82 @@ -{ - "mediaType": "application/vnd.oci.image.manifest.v1+json", - "schemaVersion": 2, - "config": { - "mediaType": "application/vnd.oci.image.config.v1+json", - "digest": "sha256:205eed24cbec29ad9cb4593a73168ef1803402370a82f7d51ce25646fc2f943a", - "size": 76 - }, - "layers": [ - { - "mediaType": "application/vnd.oci.image.layer.nydus.blob.v1", - "digest": "sha256:09845cce1d983b158d4865fc37c23bbfb892d4775c786e8114d3cf868975c059", - "size": 83528010, - "annotations": { - "containerd.io/snapshot/nydus-blob": "true" - } - }, - { - "mediaType": "application/vnd.oci.image.layer.v1.tar+gzip", - "digest": "sha256:fdfe86772cfb157dd364a7caf7a64fdc6f10abd047701c0a3fcd629b8ebc8766", - "size": 272154, - "annotations": { - "containerd.io/snapshot/nydus-bootstrap": "true", - "containerd.io/snapshot/nydus-source-chainid": "sha256:bacd3af13903e13a43fe87b6944acd1ff21024132aad6e74b4452d984fb1a99a", - "containerd.io/uncompressed": "sha256:032ef23acc516fb5ffda4900db1616f85b39cffb626bc0def51915e14a6a7d8d", - "containerd.io/snapshot/nydus-reference-blob-ids": "[\"09845cce1d983b158d4865fc37c23bbfb892d4775c786e8114d3cf868975c059\"]" - } - }, - { - "mediaType": "application/vnd.oci.image.layer.nydus.blob.v1", - "digest": "sha256:b413839e4ee5248697ef30fe9a84b659fa744d69bbc9b7754113adc2b2b6bc90", - "size": 40712206, - "annotations": { - "containerd.io/snapshot/nydus-blob": "true", - "containerd.io/snapshot/nydus-source-chainid": "sha256:bacd3af13903e13a43fe87b6944acd1ff21024132aad6e74b4452d984fb1a99a" - } - }, - { - "mediaType": "application/vnd.oci.image.layer.v1.tar+gzip", - "digest": "sha256:2c89f5d83488a50f398ac3cbd3bdbcafd5ba9521b224a74133f6a45acd616cd0", - "size": 273077, - "annotations": { - "containerd.io/snapshot/nydus-bootstrap": "true", - "containerd.io/snapshot/nydus-source-chainid": "sha256:3779241fda7b1caf03964626c3503e930f2f19a5ffaba6f4b4ad21fd38df3b6b", - "containerd.io/uncompressed": "sha256:06014764637029de0a5d37c5a2e52249d46f45a5edecca0ad81d98347f076d7a", - "containerd.io/snapshot/nydus-reference-blob-ids": "[\"09845cce1d983b158d4865fc37c23bbfb892d4775c786e8114d3cf868975c059\"]" - } - }, - { - "mediaType": "application/vnd.oci.image.layer.nydus.blob.v1", - "digest": "sha256:b6a85be8248b0d3c2f0565ef71d549f404f8edcee1ab666c9871a8e6d9384860", - "size": 441, - "annotations": { - "containerd.io/snapshot/nydus-blob": "true", - "containerd.io/snapshot/nydus-source-chainid": "sha256:3779241fda7b1caf03964626c3503e930f2f19a5ffaba6f4b4ad21fd38df3b6b" - } - }, - { - "mediaType": "application/vnd.oci.image.layer.v1.tar+gzip", - "digest": "sha256:aec98c9e3dce739877b8f5fe1cddd339de1db2b36c20995d76f6265056dbdb08", - "size": 273320, - "annotations": { - "containerd.io/snapshot/nydus-bootstrap": "true", - "containerd.io/snapshot/nydus-source-chainid": "sha256:9386795d450ce06c6819c8bc5eff8daa71d47ccb9f9fb8d49fe1ccfb5fb3edbe", - "containerd.io/uncompressed": "sha256:e3a229c2fa7d489240052abe2f9ad235e26f1aa10d70060fc8e78d478b624503", - "containerd.io/snapshot/nydus-reference-blob-ids": "[\"09845cce1d983b158d4865fc37c23bbfb892d4775c786e8114d3cf868975c059\"]" - } - }, - { - "mediaType": "application/vnd.oci.image.layer.nydus.blob.v1", - "digest": "sha256:00d151e7d392e68e2c756a6fc42640006ddc0a98d37dba3f90a7b73f63188bbd", - "size": 7, - "annotations": { - "containerd.io/snapshot/nydus-blob": "true", - "containerd.io/snapshot/nydus-source-chainid": "sha256:9386795d450ce06c6819c8bc5eff8daa71d47ccb9f9fb8d49fe1ccfb5fb3edbe" - } - } - ], - "annotations": { - "containerd.io/snapshot/nydus-cache": "v1" - } -} +{ + "mediaType": "application/vnd.oci.image.manifest.v1+json", + "schemaVersion": 2, + "config": { + "mediaType": "application/vnd.oci.image.config.v1+json", + "digest": "sha256:205eed24cbec29ad9cb4593a73168ef1803402370a82f7d51ce25646fc2f943a", + "size": 76 + }, + "layers": [ + { + "mediaType": "application/vnd.oci.image.layer.nydus.blob.v1", + "digest": "sha256:09845cce1d983b158d4865fc37c23bbfb892d4775c786e8114d3cf868975c059", + "size": 83528010, + "annotations": { + "containerd.io/snapshot/nydus-blob": "true" + } + }, + { + "mediaType": "application/vnd.oci.image.layer.v1.tar+gzip", + "digest": "sha256:fdfe86772cfb157dd364a7caf7a64fdc6f10abd047701c0a3fcd629b8ebc8766", + "size": 272154, + "annotations": { + "containerd.io/snapshot/nydus-bootstrap": "true", + "containerd.io/snapshot/nydus-source-chainid": "sha256:bacd3af13903e13a43fe87b6944acd1ff21024132aad6e74b4452d984fb1a99a", + "containerd.io/uncompressed": "sha256:032ef23acc516fb5ffda4900db1616f85b39cffb626bc0def51915e14a6a7d8d", + "containerd.io/snapshot/nydus-reference-blob-ids": "[\"09845cce1d983b158d4865fc37c23bbfb892d4775c786e8114d3cf868975c059\"]" + } + }, + { + "mediaType": "application/vnd.oci.image.layer.nydus.blob.v1", + "digest": "sha256:b413839e4ee5248697ef30fe9a84b659fa744d69bbc9b7754113adc2b2b6bc90", + "size": 40712206, + "annotations": { + "containerd.io/snapshot/nydus-blob": "true", + "containerd.io/snapshot/nydus-source-chainid": "sha256:bacd3af13903e13a43fe87b6944acd1ff21024132aad6e74b4452d984fb1a99a" + } + }, + { + "mediaType": "application/vnd.oci.image.layer.v1.tar+gzip", + "digest": "sha256:2c89f5d83488a50f398ac3cbd3bdbcafd5ba9521b224a74133f6a45acd616cd0", + "size": 273077, + "annotations": { + "containerd.io/snapshot/nydus-bootstrap": "true", + "containerd.io/snapshot/nydus-source-chainid": "sha256:3779241fda7b1caf03964626c3503e930f2f19a5ffaba6f4b4ad21fd38df3b6b", + "containerd.io/uncompressed": "sha256:06014764637029de0a5d37c5a2e52249d46f45a5edecca0ad81d98347f076d7a", + "containerd.io/snapshot/nydus-reference-blob-ids": "[\"09845cce1d983b158d4865fc37c23bbfb892d4775c786e8114d3cf868975c059\"]" + } + }, + { + "mediaType": "application/vnd.oci.image.layer.nydus.blob.v1", + "digest": "sha256:b6a85be8248b0d3c2f0565ef71d549f404f8edcee1ab666c9871a8e6d9384860", + "size": 441, + "annotations": { + "containerd.io/snapshot/nydus-blob": "true", + "containerd.io/snapshot/nydus-source-chainid": "sha256:3779241fda7b1caf03964626c3503e930f2f19a5ffaba6f4b4ad21fd38df3b6b" + } + }, + { + "mediaType": "application/vnd.oci.image.layer.v1.tar+gzip", + "digest": "sha256:aec98c9e3dce739877b8f5fe1cddd339de1db2b36c20995d76f6265056dbdb08", + "size": 273320, + "annotations": { + "containerd.io/snapshot/nydus-bootstrap": "true", + "containerd.io/snapshot/nydus-source-chainid": "sha256:9386795d450ce06c6819c8bc5eff8daa71d47ccb9f9fb8d49fe1ccfb5fb3edbe", + "containerd.io/uncompressed": "sha256:e3a229c2fa7d489240052abe2f9ad235e26f1aa10d70060fc8e78d478b624503", + "containerd.io/snapshot/nydus-reference-blob-ids": "[\"09845cce1d983b158d4865fc37c23bbfb892d4775c786e8114d3cf868975c059\"]" + } + }, + { + "mediaType": "application/vnd.oci.image.layer.nydus.blob.v1", + "digest": "sha256:00d151e7d392e68e2c756a6fc42640006ddc0a98d37dba3f90a7b73f63188bbd", + "size": 7, + "annotations": { + "containerd.io/snapshot/nydus-blob": "true", + "containerd.io/snapshot/nydus-source-chainid": "sha256:9386795d450ce06c6819c8bc5eff8daa71d47ccb9f9fb8d49fe1ccfb5fb3edbe" + } + } + ], + "annotations": { + "containerd.io/snapshot/nydus-cache": "v1" + } +} diff --git a/contrib/nydusify/examples/manifest/index.json b/contrib/nydusify/examples/manifest/index.json index 4b2cad2e0c1..7cff0c151f9 100644 --- a/contrib/nydusify/examples/manifest/index.json +++ b/contrib/nydusify/examples/manifest/index.json @@ -1,26 +1,26 @@ -{ - "schemaVersion": 2, - "manifests": [ - { - "mediaType": "application/vnd.docker.distribution.manifest.v2+json", - "digest": "sha256:4e4bc990609ed865e07afc8427c30ffdddca5153fd4e82c20d8f0783a291e241", - "size": 943, - "platform": { - "architecture": "amd64", - "os": "linux" - } - }, - { - "mediaType": "application/vnd.oci.image.manifest.v1+json", - "digest": "sha256:db30bb2870067ed3e0e73c7448d9f0b529169da8295b5b5155b417624d861d81", - "size": 1367, - "platform": { - "architecture": "amd64", - "os": "linux", - "os.features": [ - "nydus.remoteimage.v1" - ] - } - } - ] -} +{ + "schemaVersion": 2, + "manifests": [ + { + "mediaType": "application/vnd.docker.distribution.manifest.v2+json", + "digest": "sha256:4e4bc990609ed865e07afc8427c30ffdddca5153fd4e82c20d8f0783a291e241", + "size": 943, + "platform": { + "architecture": "amd64", + "os": "linux" + } + }, + { + "mediaType": "application/vnd.oci.image.manifest.v1+json", + "digest": "sha256:db30bb2870067ed3e0e73c7448d9f0b529169da8295b5b5155b417624d861d81", + "size": 1367, + "platform": { + "architecture": "amd64", + "os": "linux", + "os.features": [ + "nydus.remoteimage.v1" + ] + } + } + ] +} diff --git a/contrib/nydusify/examples/manifest/manifest.json b/contrib/nydusify/examples/manifest/manifest.json index 9a0d45ce36e..fe92a52c62a 100644 --- a/contrib/nydusify/examples/manifest/manifest.json +++ b/contrib/nydusify/examples/manifest/manifest.json @@ -1,51 +1,51 @@ -{ - "schemaVersion": 2, - "config": { - "mediaType": "application/vnd.oci.image.config.v1+json", - "digest": "sha256:563fad1f51cec2ee4c972af4bfd7275914061e2f73770585cfb04309cb5e0d6b", - "size": 523 - }, - "layers": [ - { - "mediaType": "application/vnd.oci.image.layer.nydus.blob.v1", - "digest": "sha256:09845cce1d983b158d4865fc37c23bbfb892d4775c786e8114d3cf868975c059", - "size": 83528010, - "annotations": { - "containerd.io/snapshot/nydus-blob": "true" - } - }, - { - "mediaType": "application/vnd.oci.image.layer.nydus.blob.v1", - "digest": "sha256:b413839e4ee5248697ef30fe9a84b659fa744d69bbc9b7754113adc2b2b6bc90", - "size": 40712206, - "annotations": { - "containerd.io/snapshot/nydus-blob": "true" - } - }, - { - "mediaType": "application/vnd.oci.image.layer.nydus.blob.v1", - "digest": "sha256:b6a85be8248b0d3c2f0565ef71d549f404f8edcee1ab666c9871a8e6d9384860", - "size": 441, - "annotations": { - "containerd.io/snapshot/nydus-blob": "true" - } - }, - { - "mediaType": "application/vnd.oci.image.layer.nydus.blob.v1", - "digest": "sha256:00d151e7d392e68e2c756a6fc42640006ddc0a98d37dba3f90a7b73f63188bbd", - "size": 7, - "annotations": { - "containerd.io/snapshot/nydus-blob": "true" - } - }, - { - "mediaType": "application/vnd.oci.image.layer.v1.tar+gzip", - "digest": "sha256:aec98c9e3dce739877b8f5fe1cddd339de1db2b36c20995d76f6265056dbdb08", - "size": 273320, - "annotations": { - "containerd.io/snapshot/nydus-bootstrap": "true", - "containerd.io/snapshot/nydus-reference-blob-ids": "[\"09845cce1d983b158d4865fc37c23bbfb892d4775c786e8114d3cf868975c059\"]" - } - } - ] +{ + "schemaVersion": 2, + "config": { + "mediaType": "application/vnd.oci.image.config.v1+json", + "digest": "sha256:563fad1f51cec2ee4c972af4bfd7275914061e2f73770585cfb04309cb5e0d6b", + "size": 523 + }, + "layers": [ + { + "mediaType": "application/vnd.oci.image.layer.nydus.blob.v1", + "digest": "sha256:09845cce1d983b158d4865fc37c23bbfb892d4775c786e8114d3cf868975c059", + "size": 83528010, + "annotations": { + "containerd.io/snapshot/nydus-blob": "true" + } + }, + { + "mediaType": "application/vnd.oci.image.layer.nydus.blob.v1", + "digest": "sha256:b413839e4ee5248697ef30fe9a84b659fa744d69bbc9b7754113adc2b2b6bc90", + "size": 40712206, + "annotations": { + "containerd.io/snapshot/nydus-blob": "true" + } + }, + { + "mediaType": "application/vnd.oci.image.layer.nydus.blob.v1", + "digest": "sha256:b6a85be8248b0d3c2f0565ef71d549f404f8edcee1ab666c9871a8e6d9384860", + "size": 441, + "annotations": { + "containerd.io/snapshot/nydus-blob": "true" + } + }, + { + "mediaType": "application/vnd.oci.image.layer.nydus.blob.v1", + "digest": "sha256:00d151e7d392e68e2c756a6fc42640006ddc0a98d37dba3f90a7b73f63188bbd", + "size": 7, + "annotations": { + "containerd.io/snapshot/nydus-blob": "true" + } + }, + { + "mediaType": "application/vnd.oci.image.layer.v1.tar+gzip", + "digest": "sha256:aec98c9e3dce739877b8f5fe1cddd339de1db2b36c20995d76f6265056dbdb08", + "size": 273320, + "annotations": { + "containerd.io/snapshot/nydus-bootstrap": "true", + "containerd.io/snapshot/nydus-reference-blob-ids": "[\"09845cce1d983b158d4865fc37c23bbfb892d4775c786e8114d3cf868975c059\"]" + } + } + ] } \ No newline at end of file diff --git a/contrib/nydusify/go.mod b/contrib/nydusify/go.mod index d73c44c7a21..1dde070f1a7 100644 --- a/contrib/nydusify/go.mod +++ b/contrib/nydusify/go.mod @@ -1,126 +1,126 @@ -module github.com/dragonflyoss/nydus/contrib/nydusify - -go 1.21 - -require ( - github.com/aliyun/aliyun-oss-go-sdk v3.0.2+incompatible - github.com/aws/aws-sdk-go-v2 v1.24.1 - github.com/aws/aws-sdk-go-v2/config v1.26.6 - github.com/aws/aws-sdk-go-v2/credentials v1.16.16 - github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.15.15 - github.com/aws/aws-sdk-go-v2/service/s3 v1.48.1 - github.com/containerd/containerd v1.7.18 - github.com/containerd/continuity v0.4.3 - github.com/containerd/nydus-snapshotter v0.13.11 - github.com/distribution/reference v0.5.0 - github.com/docker/cli v26.0.0+incompatible - github.com/dustin/go-humanize v1.0.1 - github.com/goharbor/acceleration-service v0.2.14 - github.com/google/uuid v1.6.0 - github.com/hashicorp/go-hclog v1.6.2 - github.com/hashicorp/go-plugin v1.6.0 - github.com/moby/buildkit v0.13.0 - github.com/opencontainers/go-digest v1.0.0 - github.com/opencontainers/image-spec v1.1.0 - github.com/pkg/errors v0.9.1 - github.com/pkg/xattr v0.4.9 - github.com/prometheus/client_golang v1.19.0 - github.com/sirupsen/logrus v1.9.3 - github.com/stretchr/testify v1.9.0 - github.com/urfave/cli/v2 v2.27.1 - golang.org/x/sync v0.6.0 - golang.org/x/sys v0.18.0 - lukechampine.com/blake3 v1.2.1 -) - -require ( - github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 // indirect - github.com/AdamKorcz/go-118-fuzz-build v0.0.0-20231105174938-2b5cbb29f3e2 // indirect - github.com/Microsoft/go-winio v0.6.2 // indirect - github.com/Microsoft/hcsshim v0.11.5 // indirect - github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.5.4 // indirect - github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.14.11 // indirect - github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.10 // indirect - github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.10 // indirect - github.com/aws/aws-sdk-go-v2/internal/ini v1.7.3 // indirect - github.com/aws/aws-sdk-go-v2/internal/v4a v1.2.10 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.10.4 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.2.10 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.10.10 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.16.10 // indirect - github.com/aws/aws-sdk-go-v2/service/sso v1.18.7 // indirect - github.com/aws/aws-sdk-go-v2/service/ssooidc v1.21.7 // indirect - github.com/aws/aws-sdk-go-v2/service/sts v1.26.7 // indirect - github.com/aws/smithy-go v1.19.0 // indirect - github.com/beorn7/perks v1.0.1 // indirect - github.com/cespare/xxhash/v2 v2.2.0 // indirect - github.com/containerd/cgroups v1.1.0 // indirect - github.com/containerd/errdefs v0.1.0 // indirect - github.com/containerd/fifo v1.1.0 // indirect - github.com/containerd/log v0.1.0 // indirect - github.com/containerd/stargz-snapshotter v0.15.1 // indirect - github.com/containerd/stargz-snapshotter/estargz v0.15.1 // indirect - github.com/containerd/ttrpc v1.2.4 // indirect - github.com/containerd/typeurl/v2 v2.1.1 // indirect - github.com/containers/ocicrypt v1.1.10 // indirect - github.com/cpuguy83/go-md2man/v2 v2.0.3 // indirect - github.com/davecgh/go-spew v1.1.1 // indirect - github.com/docker/docker v25.0.5+incompatible // indirect - github.com/docker/docker-credential-helpers v0.8.0 // indirect - github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c // indirect - github.com/fatih/color v1.16.0 // indirect - github.com/felixge/httpsnoop v1.0.4 // indirect - github.com/go-jose/go-jose/v3 v3.0.3 // indirect - github.com/go-logr/logr v1.4.1 // indirect - github.com/go-logr/stdr v1.2.2 // indirect - github.com/gogo/protobuf v1.3.2 // indirect - github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect - github.com/golang/protobuf v1.5.4 // indirect - github.com/google/go-cmp v0.6.0 // indirect - github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect - github.com/hashicorp/yamux v0.1.1 // indirect - github.com/jmespath/go-jmespath v0.4.0 // indirect - github.com/klauspost/compress v1.17.4 // indirect - github.com/klauspost/cpuid/v2 v2.2.6 // indirect - github.com/mattn/go-colorable v0.1.13 // indirect - github.com/mattn/go-isatty v0.0.20 // indirect - github.com/miekg/pkcs11 v1.1.1 // indirect - github.com/mitchellh/go-testing-interface v1.14.1 // indirect - github.com/moby/locker v1.0.1 // indirect - github.com/moby/sys/mountinfo v0.7.1 // indirect - github.com/moby/sys/sequential v0.5.0 // indirect - github.com/moby/sys/signal v0.7.0 // indirect - github.com/moby/sys/user v0.1.0 // indirect - github.com/oklog/run v1.1.0 // indirect - github.com/opencontainers/runtime-spec v1.1.0 // indirect - github.com/opencontainers/selinux v1.11.0 // indirect - github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/prometheus/client_model v0.6.0 // indirect - github.com/prometheus/common v0.50.0 // indirect - github.com/prometheus/procfs v0.13.0 // indirect - github.com/rogpeppe/go-internal v1.12.0 // indirect - github.com/russross/blackfriday/v2 v2.1.0 // indirect - github.com/stefanberger/go-pkcs11uri v0.0.0-20230803200340-78284954bff6 // indirect - github.com/stretchr/objx v0.5.2 // indirect - github.com/vbatts/tar-split v0.11.5 // indirect - github.com/xrash/smetrics v0.0.0-20231213231151-1d8dd44e695e // indirect - go.etcd.io/bbolt v1.3.10 // indirect - go.mozilla.org/pkcs7 v0.0.0-20210826202110-33d05740a352 // indirect - go.opencensus.io v0.24.0 // indirect - go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.46.1 // indirect - go.opentelemetry.io/otel v1.21.0 // indirect - go.opentelemetry.io/otel/metric v1.21.0 // indirect - go.opentelemetry.io/otel/trace v1.21.0 // indirect - golang.org/x/crypto v0.21.0 // indirect - golang.org/x/net v0.23.0 // indirect - golang.org/x/term v0.18.0 // indirect - golang.org/x/text v0.14.0 // indirect - golang.org/x/time v0.5.0 // indirect - google.golang.org/genproto v0.0.0-20240123012728-ef4313101c80 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20240123012728-ef4313101c80 // indirect - google.golang.org/grpc v1.62.1 // indirect - google.golang.org/protobuf v1.33.0 // indirect - gopkg.in/yaml.v3 v3.0.1 // indirect -) - -replace github.com/containerd/containerd => github.com/nydusaccelerator/containerd v0.0.0-20240605070649-62e0d4d66f9f +module github.com/dragonflyoss/nydus/contrib/nydusify + +go 1.21 + +require ( + github.com/aliyun/aliyun-oss-go-sdk v3.0.2+incompatible + github.com/aws/aws-sdk-go-v2 v1.24.1 + github.com/aws/aws-sdk-go-v2/config v1.26.6 + github.com/aws/aws-sdk-go-v2/credentials v1.16.16 + github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.15.15 + github.com/aws/aws-sdk-go-v2/service/s3 v1.48.1 + github.com/containerd/containerd v1.7.18 + github.com/containerd/continuity v0.4.3 + github.com/containerd/nydus-snapshotter v0.13.11 + github.com/distribution/reference v0.5.0 + github.com/docker/cli v26.0.0+incompatible + github.com/dustin/go-humanize v1.0.1 + github.com/goharbor/acceleration-service v0.2.14 + github.com/google/uuid v1.6.0 + github.com/hashicorp/go-hclog v1.6.2 + github.com/hashicorp/go-plugin v1.6.0 + github.com/moby/buildkit v0.13.0 + github.com/opencontainers/go-digest v1.0.0 + github.com/opencontainers/image-spec v1.1.0 + github.com/pkg/errors v0.9.1 + github.com/pkg/xattr v0.4.9 + github.com/prometheus/client_golang v1.19.0 + github.com/sirupsen/logrus v1.9.3 + github.com/stretchr/testify v1.9.0 + github.com/urfave/cli/v2 v2.27.1 + golang.org/x/sync v0.6.0 + golang.org/x/sys v0.18.0 + lukechampine.com/blake3 v1.2.1 +) + +require ( + github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 // indirect + github.com/AdamKorcz/go-118-fuzz-build v0.0.0-20231105174938-2b5cbb29f3e2 // indirect + github.com/Microsoft/go-winio v0.6.2 // indirect + github.com/Microsoft/hcsshim v0.11.5 // indirect + github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.5.4 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.14.11 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.10 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.10 // indirect + github.com/aws/aws-sdk-go-v2/internal/ini v1.7.3 // indirect + github.com/aws/aws-sdk-go-v2/internal/v4a v1.2.10 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.10.4 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.2.10 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.10.10 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.16.10 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.18.7 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.21.7 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.26.7 // indirect + github.com/aws/smithy-go v1.19.0 // indirect + github.com/beorn7/perks v1.0.1 // indirect + github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/containerd/cgroups v1.1.0 // indirect + github.com/containerd/errdefs v0.1.0 // indirect + github.com/containerd/fifo v1.1.0 // indirect + github.com/containerd/log v0.1.0 // indirect + github.com/containerd/stargz-snapshotter v0.15.1 // indirect + github.com/containerd/stargz-snapshotter/estargz v0.15.1 // indirect + github.com/containerd/ttrpc v1.2.4 // indirect + github.com/containerd/typeurl/v2 v2.1.1 // indirect + github.com/containers/ocicrypt v1.1.10 // indirect + github.com/cpuguy83/go-md2man/v2 v2.0.3 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/docker/docker v25.0.5+incompatible // indirect + github.com/docker/docker-credential-helpers v0.8.0 // indirect + github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c // indirect + github.com/fatih/color v1.16.0 // indirect + github.com/felixge/httpsnoop v1.0.4 // indirect + github.com/go-jose/go-jose/v3 v3.0.3 // indirect + github.com/go-logr/logr v1.4.1 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect + github.com/golang/protobuf v1.5.4 // indirect + github.com/google/go-cmp v0.6.0 // indirect + github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect + github.com/hashicorp/yamux v0.1.1 // indirect + github.com/jmespath/go-jmespath v0.4.0 // indirect + github.com/klauspost/compress v1.17.4 // indirect + github.com/klauspost/cpuid/v2 v2.2.6 // indirect + github.com/mattn/go-colorable v0.1.13 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect + github.com/miekg/pkcs11 v1.1.1 // indirect + github.com/mitchellh/go-testing-interface v1.14.1 // indirect + github.com/moby/locker v1.0.1 // indirect + github.com/moby/sys/mountinfo v0.7.1 // indirect + github.com/moby/sys/sequential v0.5.0 // indirect + github.com/moby/sys/signal v0.7.0 // indirect + github.com/moby/sys/user v0.1.0 // indirect + github.com/oklog/run v1.1.0 // indirect + github.com/opencontainers/runtime-spec v1.1.0 // indirect + github.com/opencontainers/selinux v1.11.0 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/prometheus/client_model v0.6.0 // indirect + github.com/prometheus/common v0.50.0 // indirect + github.com/prometheus/procfs v0.13.0 // indirect + github.com/rogpeppe/go-internal v1.12.0 // indirect + github.com/russross/blackfriday/v2 v2.1.0 // indirect + github.com/stefanberger/go-pkcs11uri v0.0.0-20230803200340-78284954bff6 // indirect + github.com/stretchr/objx v0.5.2 // indirect + github.com/vbatts/tar-split v0.11.5 // indirect + github.com/xrash/smetrics v0.0.0-20231213231151-1d8dd44e695e // indirect + go.etcd.io/bbolt v1.3.10 // indirect + go.mozilla.org/pkcs7 v0.0.0-20210826202110-33d05740a352 // indirect + go.opencensus.io v0.24.0 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.46.1 // indirect + go.opentelemetry.io/otel v1.21.0 // indirect + go.opentelemetry.io/otel/metric v1.21.0 // indirect + go.opentelemetry.io/otel/trace v1.21.0 // indirect + golang.org/x/crypto v0.21.0 // indirect + golang.org/x/net v0.23.0 // indirect + golang.org/x/term v0.18.0 // indirect + golang.org/x/text v0.14.0 // indirect + golang.org/x/time v0.5.0 // indirect + google.golang.org/genproto v0.0.0-20240123012728-ef4313101c80 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240123012728-ef4313101c80 // indirect + google.golang.org/grpc v1.62.1 // indirect + google.golang.org/protobuf v1.33.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) + +replace github.com/containerd/containerd => github.com/nydusaccelerator/containerd v0.0.0-20240605070649-62e0d4d66f9f diff --git a/contrib/nydusify/go.sum b/contrib/nydusify/go.sum index 02d10653667..524041d4be7 100644 --- a/contrib/nydusify/go.sum +++ b/contrib/nydusify/go.sum @@ -1,409 +1,409 @@ -cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 h1:bvDV9vkmnHYOMsOr4WLk+Vo07yKIzd94sVoIqshQ4bU= -github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8= -github.com/AdamKorcz/go-118-fuzz-build v0.0.0-20231105174938-2b5cbb29f3e2 h1:dIScnXFlF784X79oi7MzVT6GWqr/W1uUt0pB5CsDs9M= -github.com/AdamKorcz/go-118-fuzz-build v0.0.0-20231105174938-2b5cbb29f3e2/go.mod h1:gCLVsLfv1egrcZu+GoJATN5ts75F2s62ih/457eWzOw= -github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= -github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= -github.com/Microsoft/hcsshim v0.11.5 h1:haEcLNpj9Ka1gd3B3tAEs9CpE0c+1IhoL59w/exYU38= -github.com/Microsoft/hcsshim v0.11.5/go.mod h1:MV8xMfmECjl5HdO7U/3/hFVnkmSBjAjmA09d4bExKcU= -github.com/aliyun/aliyun-oss-go-sdk v3.0.2+incompatible h1:8psS8a+wKfiLt1iVDX79F7Y6wUM49Lcha2FMXt4UM8g= -github.com/aliyun/aliyun-oss-go-sdk v3.0.2+incompatible/go.mod h1:T/Aws4fEfogEE9v+HPhhw+CntffsBHJ8nXQCwKr0/g8= -github.com/aws/aws-sdk-go-v2 v1.24.1 h1:xAojnj+ktS95YZlDf0zxWBkbFtymPeDP+rvUQIH3uAU= -github.com/aws/aws-sdk-go-v2 v1.24.1/go.mod h1:LNh45Br1YAkEKaAqvmE1m8FUx6a5b/V0oAKV7of29b4= -github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.5.4 h1:OCs21ST2LrepDfD3lwlQiOqIGp6JiEUqG84GzTDoyJs= -github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.5.4/go.mod h1:usURWEKSNNAcAZuzRn/9ZYPT8aZQkR7xcCtunK/LkJo= -github.com/aws/aws-sdk-go-v2/config v1.26.6 h1:Z/7w9bUqlRI0FFQpetVuFYEsjzE3h7fpU6HuGmfPL/o= -github.com/aws/aws-sdk-go-v2/config v1.26.6/go.mod h1:uKU6cnDmYCvJ+pxO9S4cWDb2yWWIH5hra+32hVh1MI4= -github.com/aws/aws-sdk-go-v2/credentials v1.16.16 h1:8q6Rliyv0aUFAVtzaldUEcS+T5gbadPbWdV1WcAddK8= -github.com/aws/aws-sdk-go-v2/credentials v1.16.16/go.mod h1:UHVZrdUsv63hPXFo1H7c5fEneoVo9UXiz36QG1GEPi0= -github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.14.11 h1:c5I5iH+DZcH3xOIMlz3/tCKJDaHFwYEmxvlh2fAcFo8= -github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.14.11/go.mod h1:cRrYDYAMUohBJUtUnOhydaMHtiK/1NZ0Otc9lIb6O0Y= -github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.15.15 h1:2MUXyGW6dVaQz6aqycpbdLIH1NMcUI6kW6vQ0RabGYg= -github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.15.15/go.mod h1:aHbhbR6WEQgHAiRj41EQ2W47yOYwNtIkWTXmcAtYqj8= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.10 h1:vF+Zgd9s+H4vOXd5BMaPWykta2a6Ih0AKLq/X6NYKn4= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.10/go.mod h1:6BkRjejp/GR4411UGqkX8+wFMbFbqsUIimfK4XjOKR4= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.10 h1:nYPe006ktcqUji8S2mqXf9c/7NdiKriOwMvWQHgYztw= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.10/go.mod h1:6UV4SZkVvmODfXKql4LCbaZUpF7HO2BX38FgBf9ZOLw= -github.com/aws/aws-sdk-go-v2/internal/ini v1.7.3 h1:n3GDfwqF2tzEkXlv5cuy4iy7LpKDtqDMcNLfZDu9rls= -github.com/aws/aws-sdk-go-v2/internal/ini v1.7.3/go.mod h1:6fQQgfuGmw8Al/3M2IgIllycxV7ZW7WCdVSqfBeUiCY= -github.com/aws/aws-sdk-go-v2/internal/v4a v1.2.10 h1:5oE2WzJE56/mVveuDZPJESKlg/00AaS2pY2QZcnxg4M= -github.com/aws/aws-sdk-go-v2/internal/v4a v1.2.10/go.mod h1:FHbKWQtRBYUz4vO5WBWjzMD2by126ny5y/1EoaWoLfI= -github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.10.4 h1:/b31bi3YVNlkzkBrm9LfpaKoaYZUxIAj4sHfOTmLfqw= -github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.10.4/go.mod h1:2aGXHFmbInwgP9ZfpmdIfOELL79zhdNYNmReK8qDfdQ= -github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.2.10 h1:L0ai8WICYHozIKK+OtPzVJBugL7culcuM4E4JOpIEm8= -github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.2.10/go.mod h1:byqfyxJBshFk0fF9YmK0M0ugIO8OWjzH2T3bPG4eGuA= -github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.10.10 h1:DBYTXwIGQSGs9w4jKm60F5dmCQ3EEruxdc0MFh+3EY4= -github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.10.10/go.mod h1:wohMUQiFdzo0NtxbBg0mSRGZ4vL3n0dKjLTINdcIino= -github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.16.10 h1:KOxnQeWy5sXyS37fdKEvAsGHOr9fa/qvwxfJurR/BzE= -github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.16.10/go.mod h1:jMx5INQFYFYB3lQD9W0D8Ohgq6Wnl7NYOJ2TQndbulI= -github.com/aws/aws-sdk-go-v2/service/s3 v1.48.1 h1:5XNlsBsEvBZBMO6p82y+sqpWg8j5aBCe+5C2GBFgqBQ= -github.com/aws/aws-sdk-go-v2/service/s3 v1.48.1/go.mod h1:4qXHrG1Ne3VGIMZPCB8OjH/pLFO94sKABIusjh0KWPU= -github.com/aws/aws-sdk-go-v2/service/sso v1.18.7 h1:eajuO3nykDPdYicLlP3AGgOyVN3MOlFmZv7WGTuJPow= -github.com/aws/aws-sdk-go-v2/service/sso v1.18.7/go.mod h1:+mJNDdF+qiUlNKNC3fxn74WWNN+sOiGOEImje+3ScPM= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.21.7 h1:QPMJf+Jw8E1l7zqhZmMlFw6w1NmfkfiSK8mS4zOx3BA= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.21.7/go.mod h1:ykf3COxYI0UJmxcfcxcVuz7b6uADi1FkiUz6Eb7AgM8= -github.com/aws/aws-sdk-go-v2/service/sts v1.26.7 h1:NzO4Vrau795RkUdSHKEwiR01FaGzGOH1EETJ+5QHnm0= -github.com/aws/aws-sdk-go-v2/service/sts v1.26.7/go.mod h1:6h2YuIoxaMSCFf5fi1EgZAwdfkGMgDY+DVfa61uLe4U= -github.com/aws/smithy-go v1.19.0 h1:KWFKQV80DpP3vJrrA9sVAHQ5gc2z8i4EzrLhLlWXcBM= -github.com/aws/smithy-go v1.19.0/go.mod h1:NukqUGpCZIILqqiV0NIjeFh24kd/FAa4beRb6nbIUPE= -github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= -github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= -github.com/bufbuild/protocompile v0.4.0 h1:LbFKd2XowZvQ/kajzguUp2DC9UEIQhIq77fZZlaQsNA= -github.com/bufbuild/protocompile v0.4.0/go.mod h1:3v93+mbWn/v3xzN+31nwkJfrEpAUwp+BagBSZWx+TP8= -github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= -github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= -github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= -github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= -github.com/containerd/cgroups v1.1.0 h1:v8rEWFl6EoqHB+swVNjVoCJE8o3jX7e8nqBGPLaDFBM= -github.com/containerd/cgroups v1.1.0/go.mod h1:6ppBcbh/NOOUU+dMKrykgaBnK9lCIBxHqJDGwsa1mIw= -github.com/containerd/continuity v0.4.3 h1:6HVkalIp+2u1ZLH1J/pYX2oBVXlJZvh1X1A7bEZ9Su8= -github.com/containerd/continuity v0.4.3/go.mod h1:F6PTNCKepoxEaXLQp3wDAjygEnImnZ/7o4JzpodfroQ= -github.com/containerd/errdefs v0.1.0 h1:m0wCRBiu1WJT/Fr+iOoQHMQS/eP5myQ8lCv4Dz5ZURM= -github.com/containerd/errdefs v0.1.0/go.mod h1:YgWiiHtLmSeBrvpw+UfPijzbLaB77mEG1WwJTDETIV0= -github.com/containerd/fifo v1.1.0 h1:4I2mbh5stb1u6ycIABlBw9zgtlK8viPI9QkQNRQEEmY= -github.com/containerd/fifo v1.1.0/go.mod h1:bmC4NWMbXlt2EZ0Hc7Fx7QzTFxgPID13eH0Qu+MAb2o= -github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= -github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo= -github.com/containerd/nydus-snapshotter v0.13.11 h1:0euz1viJ0/4sZ5P0GP28wKrd+m0YqKRQcM6GZjuSKZk= -github.com/containerd/nydus-snapshotter v0.13.11/go.mod h1:VPVKQ3jmHFIcUIV2yiQ1kImZuBFS3GXDohKs9mRABVE= -github.com/containerd/stargz-snapshotter v0.15.1 h1:fpsP4kf/Z4n2EYnU0WT8ZCE3eiKDwikDhL6VwxIlgeA= -github.com/containerd/stargz-snapshotter v0.15.1/go.mod h1:74D+J1m1RMXytLmWxegXWhtOSRHPWZKpKc2NdK3S+us= -github.com/containerd/stargz-snapshotter/estargz v0.15.1 h1:eXJjw9RbkLFgioVaTG+G/ZW/0kEe2oEKCdS/ZxIyoCU= -github.com/containerd/stargz-snapshotter/estargz v0.15.1/go.mod h1:gr2RNwukQ/S9Nv33Lt6UC7xEx58C+LHRdoqbEKjz1Kk= -github.com/containerd/ttrpc v1.2.4 h1:eQCQK4h9dxDmpOb9QOOMh2NHTfzroH1IkmHiKZi05Oo= -github.com/containerd/ttrpc v1.2.4/go.mod h1:ojvb8SJBSch0XkqNO0L0YX/5NxR3UnVk2LzFKBK0upc= -github.com/containerd/typeurl/v2 v2.1.1 h1:3Q4Pt7i8nYwy2KmQWIw2+1hTvwTE/6w9FqcttATPO/4= -github.com/containerd/typeurl/v2 v2.1.1/go.mod h1:IDp2JFvbwZ31H8dQbEIY7sDl2L3o3HZj1hsSQlywkQ0= -github.com/containers/ocicrypt v1.1.10 h1:r7UR6o8+lyhkEywetubUUgcKFjOWOaWz8cEBrCPX0ic= -github.com/containers/ocicrypt v1.1.10/go.mod h1:YfzSSr06PTHQwSTUKqDSjish9BeW1E4HUmreluQcMd8= -github.com/cpuguy83/go-md2man/v2 v2.0.3 h1:qMCsGGgs+MAzDFyp9LpAe1Lqy/fY/qCovCm0qnXZOBM= -github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/distribution/reference v0.5.0 h1:/FUIFXtfc/x2gpa5/VGfiGLuOIdYa1t65IKK2OFGvA0= -github.com/distribution/reference v0.5.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E= -github.com/docker/cli v26.0.0+incompatible h1:90BKrx1a1HKYpSnnBFR6AgDq/FqkHxwlUyzJVPxD30I= -github.com/docker/cli v26.0.0+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8= -github.com/docker/docker v25.0.5+incompatible h1:UmQydMduGkrD5nQde1mecF/YnSbTOaPeFIeP5C4W+DE= -github.com/docker/docker v25.0.5+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= -github.com/docker/docker-credential-helpers v0.8.0 h1:YQFtbBQb4VrpoPxhFuzEBPQ9E16qz5SpHLS+uswaCp8= -github.com/docker/docker-credential-helpers v0.8.0/go.mod h1:UGFXcuoQ5TxPiB54nHOZ32AWRqQdECoh/Mg0AlEYb40= -github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c h1:+pKlWGMw7gf6bQ+oDZB4KHQFypsfjYlq/C4rfL7D3g8= -github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c/go.mod h1:Uw6UezgYA44ePAFQYUehOuCzmy5zmg/+nl2ZfMWGkpA= -github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= -github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= -github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= -github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= -github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= -github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= -github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk= -github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM= -github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE= -github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= -github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= -github.com/go-jose/go-jose/v3 v3.0.3 h1:fFKWeig/irsp7XD2zBxvnmA/XaRWp5V3CBsZXJF7G7k= -github.com/go-jose/go-jose/v3 v3.0.3/go.mod h1:5b+7YgP7ZICgJDBdfjZaIt+H/9L9T/YQrVfLAMboGkQ= -github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= -github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= -github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= -github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= -github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= -github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= -github.com/goharbor/acceleration-service v0.2.14 h1:VfhahIoWRRWACfMb+520+9MNXIGBUk4QRJHokEUAj8M= -github.com/goharbor/acceleration-service v0.2.14/go.mod h1:IaoZkVBLwnGpaJ46je7ZD294TBeWaQwFroX/ein2PiE= -github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= -github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= -github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= -github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= -github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= -github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= -github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= -github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= -github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= -github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= -github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= -github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= -github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= -github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= -github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= -github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= -github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I= -github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= -github.com/hashicorp/go-hclog v1.6.2 h1:NOtoftovWkDheyUM/8JW3QMiXyxJK3uHRK7wV04nD2I= -github.com/hashicorp/go-hclog v1.6.2/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M= -github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= -github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= -github.com/hashicorp/go-plugin v1.6.0 h1:wgd4KxHJTVGGqWBq4QPB1i5BZNEx9BR8+OFmHDmTk8A= -github.com/hashicorp/go-plugin v1.6.0/go.mod h1:lBS5MtSSBZk0SHc66KACcjjlU6WzEVP/8pwz68aMkCI= -github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= -github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= -github.com/hashicorp/yamux v0.1.1 h1:yrQxtgseBDrq9Y652vSRDvsKCJKOUD+GzTS4Y0Y8pvE= -github.com/hashicorp/yamux v0.1.1/go.mod h1:CtWFDAQgb7dxtzFs4tWbplKIe2jSi3+5vKbgIO0SLnQ= -github.com/jhump/protoreflect v1.15.1 h1:HUMERORf3I3ZdX05WaQ6MIpd/NJ434hTp5YiKgfCL6c= -github.com/jhump/protoreflect v1.15.1/go.mod h1:jD/2GMKKE6OqX8qTjhADU1e6DShO+gavG9e0Q693nKo= -github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= -github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= -github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= -github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= -github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= -github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4= -github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= -github.com/klauspost/cpuid/v2 v2.2.6 h1:ndNyv040zDGIDh8thGkXYjnFtiN02M1PVVF+JE/48xc= -github.com/klauspost/cpuid/v2 v2.2.6/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= -github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= -github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= -github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= -github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= -github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4= -github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= -github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= -github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= -github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= -github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= -github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= -github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= -github.com/miekg/pkcs11 v1.1.1 h1:Ugu9pdy6vAYku5DEpVWVFPYnzV+bxB+iRdbuFSu7TvU= -github.com/miekg/pkcs11 v1.1.1/go.mod h1:XsNlhZGX73bx86s2hdc/FuaLm2CPZJemRLMA+WTFxgs= -github.com/mitchellh/go-testing-interface v1.14.1 h1:jrgshOhYAUVNMAJiKbEu7EqAwgJJ2JqpQmpLJOu07cU= -github.com/mitchellh/go-testing-interface v1.14.1/go.mod h1:gfgS7OtZj6MA4U1UrDRp04twqAjfvlZyCfX3sDjEym8= -github.com/moby/buildkit v0.13.0 h1:reVR1Y+rbNIUQ9jf0Q1YZVH5a/nhOixZsl+HJ9qQEGI= -github.com/moby/buildkit v0.13.0/go.mod h1:aNmNQKLBFYAOFuzQjR3VA27/FijlvtBD1pjNwTSN37k= -github.com/moby/locker v1.0.1 h1:fOXqR41zeveg4fFODix+1Ch4mj/gT0NE1XJbp/epuBg= -github.com/moby/locker v1.0.1/go.mod h1:S7SDdo5zpBK84bzzVlKr2V0hz+7x9hWbYC/kq7oQppc= -github.com/moby/sys/mountinfo v0.7.1 h1:/tTvQaSJRr2FshkhXiIpux6fQ2Zvc4j7tAhMTStAG2g= -github.com/moby/sys/mountinfo v0.7.1/go.mod h1:IJb6JQeOklcdMU9F5xQ8ZALD+CUr5VlGpwtX+VE0rpI= -github.com/moby/sys/sequential v0.5.0 h1:OPvI35Lzn9K04PBbCLW0g4LcFAJgHsvXsRyewg5lXtc= -github.com/moby/sys/sequential v0.5.0/go.mod h1:tH2cOOs5V9MlPiXcQzRC+eEyab644PWKGRYaaV5ZZlo= -github.com/moby/sys/signal v0.7.0 h1:25RW3d5TnQEoKvRbEKUGay6DCQ46IxAVTT9CUMgmsSI= -github.com/moby/sys/signal v0.7.0/go.mod h1:GQ6ObYZfqacOwTtlXvcmh9A26dVRul/hbOZn88Kg8Tg= -github.com/moby/sys/user v0.1.0 h1:WmZ93f5Ux6het5iituh9x2zAG7NFY9Aqi49jjE1PaQg= -github.com/moby/sys/user v0.1.0/go.mod h1:fKJhFOnsCN6xZ5gSfbM6zaHGgDJMrqt9/reuj4T7MmU= -github.com/nydusaccelerator/containerd v0.0.0-20240605070649-62e0d4d66f9f h1:jbWfZohlnnbKXcYykpfw0VT8baJpI90sWg0hxvD596g= -github.com/nydusaccelerator/containerd v0.0.0-20240605070649-62e0d4d66f9f/go.mod h1:IYEk9/IO6wAPUz2bCMVUbsfXjzw5UNP5fLz4PsUygQ4= -github.com/oklog/run v1.1.0 h1:GEenZ1cK0+q0+wsJew9qUg/DyD8k3JzYsZAi5gYi2mA= -github.com/oklog/run v1.1.0/go.mod h1:sVPdnTZT1zYwAJeCMu2Th4T21pA3FPOQRfWjQlk7DVU= -github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= -github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= -github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug= -github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM= -github.com/opencontainers/runtime-spec v1.1.0 h1:HHUyrt9mwHUjtasSbXSMvs4cyFxh+Bll4AjJ9odEGpg= -github.com/opencontainers/runtime-spec v1.1.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= -github.com/opencontainers/selinux v1.11.0 h1:+5Zbo97w3Lbmb3PeqQtpmTkMwsW5nRI3YaLpt7tQ7oU= -github.com/opencontainers/selinux v1.11.0/go.mod h1:E5dMC3VPuVvVHDYmi78qvhJp8+M586T4DlDRYpFkyec= -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pkg/xattr v0.4.9 h1:5883YPCtkSd8LFbs13nXplj9g9tlrwoJRjgpgMu1/fE= -github.com/pkg/xattr v0.4.9/go.mod h1:di8WF84zAKk8jzR1UBTEWh9AUlIZZ7M/JNt8e9B6ktU= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/client_golang v1.19.0 h1:ygXvpU1AoN1MhdzckN+PyD9QJOSD4x7kmXYlnfbA6JU= -github.com/prometheus/client_golang v1.19.0/go.mod h1:ZRM9uEAypZakd+q/x7+gmsvXdURP+DABIEIjnmDdp+k= -github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/client_model v0.6.0 h1:k1v3CzpSRUTrKMppY35TLwPvxHqBu0bYgxZzqGIgaos= -github.com/prometheus/client_model v0.6.0/go.mod h1:NTQHnmxFpouOD0DpvP4XujX3CdOAGQPoaGhyTchlyt8= -github.com/prometheus/common v0.50.0 h1:YSZE6aa9+luNa2da6/Tik0q0A5AbR+U003TItK57CPQ= -github.com/prometheus/common v0.50.0/go.mod h1:wHFBCEVWVmHMUpg7pYcOm2QUR/ocQdYSJVQJKnHc3xQ= -github.com/prometheus/procfs v0.13.0 h1:GqzLlQyfsPbaEHaQkO7tbDlriv/4o5Hudv6OXHGKX7o= -github.com/prometheus/procfs v0.13.0/go.mod h1:cd4PFCR54QLnGKPaKGA6l+cfuNXtht43ZKY6tow0Y1g= -github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= -github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= -github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= -github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= -github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= -github.com/stefanberger/go-pkcs11uri v0.0.0-20230803200340-78284954bff6 h1:pnnLyeX7o/5aX8qUQ69P/mLojDqwda8hFOCBTmP/6hw= -github.com/stefanberger/go-pkcs11uri v0.0.0-20230803200340-78284954bff6/go.mod h1:39R/xuhNgVhi+K0/zst4TLrJrVmbm6LVgl4A0+ZFS5M= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= -github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= -github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= -github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals= -github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= -github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= -github.com/urfave/cli/v2 v2.27.1 h1:8xSQ6szndafKVRmfyeUMxkNUJQMjL1F2zmsZ+qHpfho= -github.com/urfave/cli/v2 v2.27.1/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ= -github.com/vbatts/tar-split v0.11.5 h1:3bHCTIheBm1qFTcgh9oPu+nNBtX+XJIupG/vacinCts= -github.com/vbatts/tar-split v0.11.5/go.mod h1:yZbwRsSeGjusneWgA781EKej9HF8vme8okylkAeNKLk= -github.com/xrash/smetrics v0.0.0-20231213231151-1d8dd44e695e h1:+SOyEddqYF09QP7vr7CgJ1eti3pY9Fn3LHO1M1r/0sI= -github.com/xrash/smetrics v0.0.0-20231213231151-1d8dd44e695e/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8= -github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= -go.etcd.io/bbolt v1.3.10 h1:+BqfJTcCzTItrop8mq/lbzL8wSGtj94UO/3U31shqG0= -go.etcd.io/bbolt v1.3.10/go.mod h1:bK3UQLPJZly7IlNmV7uVHJDxfe5aK9Ll93e/74Y9oEQ= -go.mozilla.org/pkcs7 v0.0.0-20210826202110-33d05740a352 h1:CCriYyAfq1Br1aIYettdHZTy8mBTIPo7We18TuO/bak= -go.mozilla.org/pkcs7 v0.0.0-20210826202110-33d05740a352/go.mod h1:SNgMg+EgDFwmvSmLRTNKC5fegJjB7v23qTQ0XLGUNHk= -go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= -go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.46.1 h1:aFJWCqJMNjENlcleuuOkGAPH82y0yULBScfXcIEdS24= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.46.1/go.mod h1:sEGXWArGqc3tVa+ekntsN65DmVbVeW+7lTKTjZF3/Fo= -go.opentelemetry.io/otel v1.21.0 h1:hzLeKBZEL7Okw2mGzZ0cc4k/A7Fta0uoPgaJCr8fsFc= -go.opentelemetry.io/otel v1.21.0/go.mod h1:QZzNPQPm1zLX4gZK4cMi+71eaorMSGT3A4znnUvNNEo= -go.opentelemetry.io/otel/metric v1.21.0 h1:tlYWfeo+Bocx5kLEloTjbcDwBuELRrIFxwdQ36PlJu4= -go.opentelemetry.io/otel/metric v1.21.0/go.mod h1:o1p3CA8nNHW8j5yuQLdc1eeqEaPfzug24uvsyIEJRWM= -go.opentelemetry.io/otel/trace v1.21.0 h1:WD9i5gzvoUPuXIXH24ZNBudiarZDKuekPqi/E8fpfLc= -go.opentelemetry.io/otel/trace v1.21.0/go.mod h1:LGbsEB0f9LGjN+OZaQQ26sohbOmiMR+BaslueVtS/qQ= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= -golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA= -golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= -golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= -golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= -golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= -golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= -golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= -golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= -golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= -golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= -golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= -golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= -golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= -golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= -golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220408201424-a24fb2fb8a0f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= -golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= -golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= -golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= -golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8= -golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= -golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= -golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= -golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= -golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= -golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= -google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= -google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= -google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= -google.golang.org/genproto v0.0.0-20240123012728-ef4313101c80 h1:KAeGQVN3M9nD0/bQXnr/ClcEMJ968gUXJQ9pwfSynuQ= -google.golang.org/genproto v0.0.0-20240123012728-ef4313101c80/go.mod h1:cc8bqMqtv9gMOr0zHg2Vzff5ULhhL2IXP4sbcn32Dro= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240123012728-ef4313101c80 h1:AjyfHzEPEFp/NpvfN5g+KDla3EMojjhRVZc1i7cj+oM= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240123012728-ef4313101c80/go.mod h1:PAREbraiVEVGVdTZsVWjSbbTtSyGbAgIIvni8a8CD5s= -google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= -google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= -google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= -google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= -google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= -google.golang.org/grpc v1.62.1 h1:B4n+nfKzOICUXMgyrNd19h/I9oH0L1pizfk1d4zSgTk= -google.golang.org/grpc v1.62.1/go.mod h1:IWTG0VlJLCh1SkC58F7np9ka9mx/WNkjl4PGJaiq+QE= -google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= -google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= -google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= -google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= -google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= -google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= -google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= -google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= -google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= -gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= -gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= -gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= -gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo= -gotest.tools/v3 v3.4.0 h1:ZazjZUfuVeZGLAmlKKuyv3IKP5orXcwtOwDQH6YVr6o= -gotest.tools/v3 v3.4.0/go.mod h1:CtbdzLSsqVhDgMtKsx03ird5YTGB3ar27v0u/yKBW5g= -honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -lukechampine.com/blake3 v1.2.1 h1:YuqqRuaqsGV71BV/nm9xlI0MKUv4QC54jQnBChWbGnI= -lukechampine.com/blake3 v1.2.1/go.mod h1:0OFRp7fBtAylGVCO40o87sbupkyIGgbpv1+M1k1LM6k= +cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 h1:bvDV9vkmnHYOMsOr4WLk+Vo07yKIzd94sVoIqshQ4bU= +github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8= +github.com/AdamKorcz/go-118-fuzz-build v0.0.0-20231105174938-2b5cbb29f3e2 h1:dIScnXFlF784X79oi7MzVT6GWqr/W1uUt0pB5CsDs9M= +github.com/AdamKorcz/go-118-fuzz-build v0.0.0-20231105174938-2b5cbb29f3e2/go.mod h1:gCLVsLfv1egrcZu+GoJATN5ts75F2s62ih/457eWzOw= +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= +github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= +github.com/Microsoft/hcsshim v0.11.5 h1:haEcLNpj9Ka1gd3B3tAEs9CpE0c+1IhoL59w/exYU38= +github.com/Microsoft/hcsshim v0.11.5/go.mod h1:MV8xMfmECjl5HdO7U/3/hFVnkmSBjAjmA09d4bExKcU= +github.com/aliyun/aliyun-oss-go-sdk v3.0.2+incompatible h1:8psS8a+wKfiLt1iVDX79F7Y6wUM49Lcha2FMXt4UM8g= +github.com/aliyun/aliyun-oss-go-sdk v3.0.2+incompatible/go.mod h1:T/Aws4fEfogEE9v+HPhhw+CntffsBHJ8nXQCwKr0/g8= +github.com/aws/aws-sdk-go-v2 v1.24.1 h1:xAojnj+ktS95YZlDf0zxWBkbFtymPeDP+rvUQIH3uAU= +github.com/aws/aws-sdk-go-v2 v1.24.1/go.mod h1:LNh45Br1YAkEKaAqvmE1m8FUx6a5b/V0oAKV7of29b4= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.5.4 h1:OCs21ST2LrepDfD3lwlQiOqIGp6JiEUqG84GzTDoyJs= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.5.4/go.mod h1:usURWEKSNNAcAZuzRn/9ZYPT8aZQkR7xcCtunK/LkJo= +github.com/aws/aws-sdk-go-v2/config v1.26.6 h1:Z/7w9bUqlRI0FFQpetVuFYEsjzE3h7fpU6HuGmfPL/o= +github.com/aws/aws-sdk-go-v2/config v1.26.6/go.mod h1:uKU6cnDmYCvJ+pxO9S4cWDb2yWWIH5hra+32hVh1MI4= +github.com/aws/aws-sdk-go-v2/credentials v1.16.16 h1:8q6Rliyv0aUFAVtzaldUEcS+T5gbadPbWdV1WcAddK8= +github.com/aws/aws-sdk-go-v2/credentials v1.16.16/go.mod h1:UHVZrdUsv63hPXFo1H7c5fEneoVo9UXiz36QG1GEPi0= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.14.11 h1:c5I5iH+DZcH3xOIMlz3/tCKJDaHFwYEmxvlh2fAcFo8= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.14.11/go.mod h1:cRrYDYAMUohBJUtUnOhydaMHtiK/1NZ0Otc9lIb6O0Y= +github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.15.15 h1:2MUXyGW6dVaQz6aqycpbdLIH1NMcUI6kW6vQ0RabGYg= +github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.15.15/go.mod h1:aHbhbR6WEQgHAiRj41EQ2W47yOYwNtIkWTXmcAtYqj8= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.10 h1:vF+Zgd9s+H4vOXd5BMaPWykta2a6Ih0AKLq/X6NYKn4= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.10/go.mod h1:6BkRjejp/GR4411UGqkX8+wFMbFbqsUIimfK4XjOKR4= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.10 h1:nYPe006ktcqUji8S2mqXf9c/7NdiKriOwMvWQHgYztw= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.10/go.mod h1:6UV4SZkVvmODfXKql4LCbaZUpF7HO2BX38FgBf9ZOLw= +github.com/aws/aws-sdk-go-v2/internal/ini v1.7.3 h1:n3GDfwqF2tzEkXlv5cuy4iy7LpKDtqDMcNLfZDu9rls= +github.com/aws/aws-sdk-go-v2/internal/ini v1.7.3/go.mod h1:6fQQgfuGmw8Al/3M2IgIllycxV7ZW7WCdVSqfBeUiCY= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.2.10 h1:5oE2WzJE56/mVveuDZPJESKlg/00AaS2pY2QZcnxg4M= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.2.10/go.mod h1:FHbKWQtRBYUz4vO5WBWjzMD2by126ny5y/1EoaWoLfI= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.10.4 h1:/b31bi3YVNlkzkBrm9LfpaKoaYZUxIAj4sHfOTmLfqw= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.10.4/go.mod h1:2aGXHFmbInwgP9ZfpmdIfOELL79zhdNYNmReK8qDfdQ= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.2.10 h1:L0ai8WICYHozIKK+OtPzVJBugL7culcuM4E4JOpIEm8= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.2.10/go.mod h1:byqfyxJBshFk0fF9YmK0M0ugIO8OWjzH2T3bPG4eGuA= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.10.10 h1:DBYTXwIGQSGs9w4jKm60F5dmCQ3EEruxdc0MFh+3EY4= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.10.10/go.mod h1:wohMUQiFdzo0NtxbBg0mSRGZ4vL3n0dKjLTINdcIino= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.16.10 h1:KOxnQeWy5sXyS37fdKEvAsGHOr9fa/qvwxfJurR/BzE= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.16.10/go.mod h1:jMx5INQFYFYB3lQD9W0D8Ohgq6Wnl7NYOJ2TQndbulI= +github.com/aws/aws-sdk-go-v2/service/s3 v1.48.1 h1:5XNlsBsEvBZBMO6p82y+sqpWg8j5aBCe+5C2GBFgqBQ= +github.com/aws/aws-sdk-go-v2/service/s3 v1.48.1/go.mod h1:4qXHrG1Ne3VGIMZPCB8OjH/pLFO94sKABIusjh0KWPU= +github.com/aws/aws-sdk-go-v2/service/sso v1.18.7 h1:eajuO3nykDPdYicLlP3AGgOyVN3MOlFmZv7WGTuJPow= +github.com/aws/aws-sdk-go-v2/service/sso v1.18.7/go.mod h1:+mJNDdF+qiUlNKNC3fxn74WWNN+sOiGOEImje+3ScPM= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.21.7 h1:QPMJf+Jw8E1l7zqhZmMlFw6w1NmfkfiSK8mS4zOx3BA= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.21.7/go.mod h1:ykf3COxYI0UJmxcfcxcVuz7b6uADi1FkiUz6Eb7AgM8= +github.com/aws/aws-sdk-go-v2/service/sts v1.26.7 h1:NzO4Vrau795RkUdSHKEwiR01FaGzGOH1EETJ+5QHnm0= +github.com/aws/aws-sdk-go-v2/service/sts v1.26.7/go.mod h1:6h2YuIoxaMSCFf5fi1EgZAwdfkGMgDY+DVfa61uLe4U= +github.com/aws/smithy-go v1.19.0 h1:KWFKQV80DpP3vJrrA9sVAHQ5gc2z8i4EzrLhLlWXcBM= +github.com/aws/smithy-go v1.19.0/go.mod h1:NukqUGpCZIILqqiV0NIjeFh24kd/FAa4beRb6nbIUPE= +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/bufbuild/protocompile v0.4.0 h1:LbFKd2XowZvQ/kajzguUp2DC9UEIQhIq77fZZlaQsNA= +github.com/bufbuild/protocompile v0.4.0/go.mod h1:3v93+mbWn/v3xzN+31nwkJfrEpAUwp+BagBSZWx+TP8= +github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= +github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= +github.com/containerd/cgroups v1.1.0 h1:v8rEWFl6EoqHB+swVNjVoCJE8o3jX7e8nqBGPLaDFBM= +github.com/containerd/cgroups v1.1.0/go.mod h1:6ppBcbh/NOOUU+dMKrykgaBnK9lCIBxHqJDGwsa1mIw= +github.com/containerd/continuity v0.4.3 h1:6HVkalIp+2u1ZLH1J/pYX2oBVXlJZvh1X1A7bEZ9Su8= +github.com/containerd/continuity v0.4.3/go.mod h1:F6PTNCKepoxEaXLQp3wDAjygEnImnZ/7o4JzpodfroQ= +github.com/containerd/errdefs v0.1.0 h1:m0wCRBiu1WJT/Fr+iOoQHMQS/eP5myQ8lCv4Dz5ZURM= +github.com/containerd/errdefs v0.1.0/go.mod h1:YgWiiHtLmSeBrvpw+UfPijzbLaB77mEG1WwJTDETIV0= +github.com/containerd/fifo v1.1.0 h1:4I2mbh5stb1u6ycIABlBw9zgtlK8viPI9QkQNRQEEmY= +github.com/containerd/fifo v1.1.0/go.mod h1:bmC4NWMbXlt2EZ0Hc7Fx7QzTFxgPID13eH0Qu+MAb2o= +github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= +github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo= +github.com/containerd/nydus-snapshotter v0.13.11 h1:0euz1viJ0/4sZ5P0GP28wKrd+m0YqKRQcM6GZjuSKZk= +github.com/containerd/nydus-snapshotter v0.13.11/go.mod h1:VPVKQ3jmHFIcUIV2yiQ1kImZuBFS3GXDohKs9mRABVE= +github.com/containerd/stargz-snapshotter v0.15.1 h1:fpsP4kf/Z4n2EYnU0WT8ZCE3eiKDwikDhL6VwxIlgeA= +github.com/containerd/stargz-snapshotter v0.15.1/go.mod h1:74D+J1m1RMXytLmWxegXWhtOSRHPWZKpKc2NdK3S+us= +github.com/containerd/stargz-snapshotter/estargz v0.15.1 h1:eXJjw9RbkLFgioVaTG+G/ZW/0kEe2oEKCdS/ZxIyoCU= +github.com/containerd/stargz-snapshotter/estargz v0.15.1/go.mod h1:gr2RNwukQ/S9Nv33Lt6UC7xEx58C+LHRdoqbEKjz1Kk= +github.com/containerd/ttrpc v1.2.4 h1:eQCQK4h9dxDmpOb9QOOMh2NHTfzroH1IkmHiKZi05Oo= +github.com/containerd/ttrpc v1.2.4/go.mod h1:ojvb8SJBSch0XkqNO0L0YX/5NxR3UnVk2LzFKBK0upc= +github.com/containerd/typeurl/v2 v2.1.1 h1:3Q4Pt7i8nYwy2KmQWIw2+1hTvwTE/6w9FqcttATPO/4= +github.com/containerd/typeurl/v2 v2.1.1/go.mod h1:IDp2JFvbwZ31H8dQbEIY7sDl2L3o3HZj1hsSQlywkQ0= +github.com/containers/ocicrypt v1.1.10 h1:r7UR6o8+lyhkEywetubUUgcKFjOWOaWz8cEBrCPX0ic= +github.com/containers/ocicrypt v1.1.10/go.mod h1:YfzSSr06PTHQwSTUKqDSjish9BeW1E4HUmreluQcMd8= +github.com/cpuguy83/go-md2man/v2 v2.0.3 h1:qMCsGGgs+MAzDFyp9LpAe1Lqy/fY/qCovCm0qnXZOBM= +github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/distribution/reference v0.5.0 h1:/FUIFXtfc/x2gpa5/VGfiGLuOIdYa1t65IKK2OFGvA0= +github.com/distribution/reference v0.5.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E= +github.com/docker/cli v26.0.0+incompatible h1:90BKrx1a1HKYpSnnBFR6AgDq/FqkHxwlUyzJVPxD30I= +github.com/docker/cli v26.0.0+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8= +github.com/docker/docker v25.0.5+incompatible h1:UmQydMduGkrD5nQde1mecF/YnSbTOaPeFIeP5C4W+DE= +github.com/docker/docker v25.0.5+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= +github.com/docker/docker-credential-helpers v0.8.0 h1:YQFtbBQb4VrpoPxhFuzEBPQ9E16qz5SpHLS+uswaCp8= +github.com/docker/docker-credential-helpers v0.8.0/go.mod h1:UGFXcuoQ5TxPiB54nHOZ32AWRqQdECoh/Mg0AlEYb40= +github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c h1:+pKlWGMw7gf6bQ+oDZB4KHQFypsfjYlq/C4rfL7D3g8= +github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c/go.mod h1:Uw6UezgYA44ePAFQYUehOuCzmy5zmg/+nl2ZfMWGkpA= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= +github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= +github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= +github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk= +github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM= +github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE= +github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= +github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/go-jose/go-jose/v3 v3.0.3 h1:fFKWeig/irsp7XD2zBxvnmA/XaRWp5V3CBsZXJF7G7k= +github.com/go-jose/go-jose/v3 v3.0.3/go.mod h1:5b+7YgP7ZICgJDBdfjZaIt+H/9L9T/YQrVfLAMboGkQ= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= +github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/goharbor/acceleration-service v0.2.14 h1:VfhahIoWRRWACfMb+520+9MNXIGBUk4QRJHokEUAj8M= +github.com/goharbor/acceleration-service v0.2.14/go.mod h1:IaoZkVBLwnGpaJ46je7ZD294TBeWaQwFroX/ein2PiE= +github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= +github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= +github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= +github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= +github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= +github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= +github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I= +github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= +github.com/hashicorp/go-hclog v1.6.2 h1:NOtoftovWkDheyUM/8JW3QMiXyxJK3uHRK7wV04nD2I= +github.com/hashicorp/go-hclog v1.6.2/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M= +github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= +github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= +github.com/hashicorp/go-plugin v1.6.0 h1:wgd4KxHJTVGGqWBq4QPB1i5BZNEx9BR8+OFmHDmTk8A= +github.com/hashicorp/go-plugin v1.6.0/go.mod h1:lBS5MtSSBZk0SHc66KACcjjlU6WzEVP/8pwz68aMkCI= +github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= +github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= +github.com/hashicorp/yamux v0.1.1 h1:yrQxtgseBDrq9Y652vSRDvsKCJKOUD+GzTS4Y0Y8pvE= +github.com/hashicorp/yamux v0.1.1/go.mod h1:CtWFDAQgb7dxtzFs4tWbplKIe2jSi3+5vKbgIO0SLnQ= +github.com/jhump/protoreflect v1.15.1 h1:HUMERORf3I3ZdX05WaQ6MIpd/NJ434hTp5YiKgfCL6c= +github.com/jhump/protoreflect v1.15.1/go.mod h1:jD/2GMKKE6OqX8qTjhADU1e6DShO+gavG9e0Q693nKo= +github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= +github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= +github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= +github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4= +github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= +github.com/klauspost/cpuid/v2 v2.2.6 h1:ndNyv040zDGIDh8thGkXYjnFtiN02M1PVVF+JE/48xc= +github.com/klauspost/cpuid/v2 v2.2.6/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= +github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4= +github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= +github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= +github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= +github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= +github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/miekg/pkcs11 v1.1.1 h1:Ugu9pdy6vAYku5DEpVWVFPYnzV+bxB+iRdbuFSu7TvU= +github.com/miekg/pkcs11 v1.1.1/go.mod h1:XsNlhZGX73bx86s2hdc/FuaLm2CPZJemRLMA+WTFxgs= +github.com/mitchellh/go-testing-interface v1.14.1 h1:jrgshOhYAUVNMAJiKbEu7EqAwgJJ2JqpQmpLJOu07cU= +github.com/mitchellh/go-testing-interface v1.14.1/go.mod h1:gfgS7OtZj6MA4U1UrDRp04twqAjfvlZyCfX3sDjEym8= +github.com/moby/buildkit v0.13.0 h1:reVR1Y+rbNIUQ9jf0Q1YZVH5a/nhOixZsl+HJ9qQEGI= +github.com/moby/buildkit v0.13.0/go.mod h1:aNmNQKLBFYAOFuzQjR3VA27/FijlvtBD1pjNwTSN37k= +github.com/moby/locker v1.0.1 h1:fOXqR41zeveg4fFODix+1Ch4mj/gT0NE1XJbp/epuBg= +github.com/moby/locker v1.0.1/go.mod h1:S7SDdo5zpBK84bzzVlKr2V0hz+7x9hWbYC/kq7oQppc= +github.com/moby/sys/mountinfo v0.7.1 h1:/tTvQaSJRr2FshkhXiIpux6fQ2Zvc4j7tAhMTStAG2g= +github.com/moby/sys/mountinfo v0.7.1/go.mod h1:IJb6JQeOklcdMU9F5xQ8ZALD+CUr5VlGpwtX+VE0rpI= +github.com/moby/sys/sequential v0.5.0 h1:OPvI35Lzn9K04PBbCLW0g4LcFAJgHsvXsRyewg5lXtc= +github.com/moby/sys/sequential v0.5.0/go.mod h1:tH2cOOs5V9MlPiXcQzRC+eEyab644PWKGRYaaV5ZZlo= +github.com/moby/sys/signal v0.7.0 h1:25RW3d5TnQEoKvRbEKUGay6DCQ46IxAVTT9CUMgmsSI= +github.com/moby/sys/signal v0.7.0/go.mod h1:GQ6ObYZfqacOwTtlXvcmh9A26dVRul/hbOZn88Kg8Tg= +github.com/moby/sys/user v0.1.0 h1:WmZ93f5Ux6het5iituh9x2zAG7NFY9Aqi49jjE1PaQg= +github.com/moby/sys/user v0.1.0/go.mod h1:fKJhFOnsCN6xZ5gSfbM6zaHGgDJMrqt9/reuj4T7MmU= +github.com/nydusaccelerator/containerd v0.0.0-20240605070649-62e0d4d66f9f h1:jbWfZohlnnbKXcYykpfw0VT8baJpI90sWg0hxvD596g= +github.com/nydusaccelerator/containerd v0.0.0-20240605070649-62e0d4d66f9f/go.mod h1:IYEk9/IO6wAPUz2bCMVUbsfXjzw5UNP5fLz4PsUygQ4= +github.com/oklog/run v1.1.0 h1:GEenZ1cK0+q0+wsJew9qUg/DyD8k3JzYsZAi5gYi2mA= +github.com/oklog/run v1.1.0/go.mod h1:sVPdnTZT1zYwAJeCMu2Th4T21pA3FPOQRfWjQlk7DVU= +github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= +github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= +github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug= +github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM= +github.com/opencontainers/runtime-spec v1.1.0 h1:HHUyrt9mwHUjtasSbXSMvs4cyFxh+Bll4AjJ9odEGpg= +github.com/opencontainers/runtime-spec v1.1.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= +github.com/opencontainers/selinux v1.11.0 h1:+5Zbo97w3Lbmb3PeqQtpmTkMwsW5nRI3YaLpt7tQ7oU= +github.com/opencontainers/selinux v1.11.0/go.mod h1:E5dMC3VPuVvVHDYmi78qvhJp8+M586T4DlDRYpFkyec= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/xattr v0.4.9 h1:5883YPCtkSd8LFbs13nXplj9g9tlrwoJRjgpgMu1/fE= +github.com/pkg/xattr v0.4.9/go.mod h1:di8WF84zAKk8jzR1UBTEWh9AUlIZZ7M/JNt8e9B6ktU= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v1.19.0 h1:ygXvpU1AoN1MhdzckN+PyD9QJOSD4x7kmXYlnfbA6JU= +github.com/prometheus/client_golang v1.19.0/go.mod h1:ZRM9uEAypZakd+q/x7+gmsvXdURP+DABIEIjnmDdp+k= +github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/client_model v0.6.0 h1:k1v3CzpSRUTrKMppY35TLwPvxHqBu0bYgxZzqGIgaos= +github.com/prometheus/client_model v0.6.0/go.mod h1:NTQHnmxFpouOD0DpvP4XujX3CdOAGQPoaGhyTchlyt8= +github.com/prometheus/common v0.50.0 h1:YSZE6aa9+luNa2da6/Tik0q0A5AbR+U003TItK57CPQ= +github.com/prometheus/common v0.50.0/go.mod h1:wHFBCEVWVmHMUpg7pYcOm2QUR/ocQdYSJVQJKnHc3xQ= +github.com/prometheus/procfs v0.13.0 h1:GqzLlQyfsPbaEHaQkO7tbDlriv/4o5Hudv6OXHGKX7o= +github.com/prometheus/procfs v0.13.0/go.mod h1:cd4PFCR54QLnGKPaKGA6l+cfuNXtht43ZKY6tow0Y1g= +github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= +github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= +github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= +github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= +github.com/stefanberger/go-pkcs11uri v0.0.0-20230803200340-78284954bff6 h1:pnnLyeX7o/5aX8qUQ69P/mLojDqwda8hFOCBTmP/6hw= +github.com/stefanberger/go-pkcs11uri v0.0.0-20230803200340-78284954bff6/go.mod h1:39R/xuhNgVhi+K0/zst4TLrJrVmbm6LVgl4A0+ZFS5M= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/urfave/cli/v2 v2.27.1 h1:8xSQ6szndafKVRmfyeUMxkNUJQMjL1F2zmsZ+qHpfho= +github.com/urfave/cli/v2 v2.27.1/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ= +github.com/vbatts/tar-split v0.11.5 h1:3bHCTIheBm1qFTcgh9oPu+nNBtX+XJIupG/vacinCts= +github.com/vbatts/tar-split v0.11.5/go.mod h1:yZbwRsSeGjusneWgA781EKej9HF8vme8okylkAeNKLk= +github.com/xrash/smetrics v0.0.0-20231213231151-1d8dd44e695e h1:+SOyEddqYF09QP7vr7CgJ1eti3pY9Fn3LHO1M1r/0sI= +github.com/xrash/smetrics v0.0.0-20231213231151-1d8dd44e695e/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +go.etcd.io/bbolt v1.3.10 h1:+BqfJTcCzTItrop8mq/lbzL8wSGtj94UO/3U31shqG0= +go.etcd.io/bbolt v1.3.10/go.mod h1:bK3UQLPJZly7IlNmV7uVHJDxfe5aK9Ll93e/74Y9oEQ= +go.mozilla.org/pkcs7 v0.0.0-20210826202110-33d05740a352 h1:CCriYyAfq1Br1aIYettdHZTy8mBTIPo7We18TuO/bak= +go.mozilla.org/pkcs7 v0.0.0-20210826202110-33d05740a352/go.mod h1:SNgMg+EgDFwmvSmLRTNKC5fegJjB7v23qTQ0XLGUNHk= +go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= +go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.46.1 h1:aFJWCqJMNjENlcleuuOkGAPH82y0yULBScfXcIEdS24= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.46.1/go.mod h1:sEGXWArGqc3tVa+ekntsN65DmVbVeW+7lTKTjZF3/Fo= +go.opentelemetry.io/otel v1.21.0 h1:hzLeKBZEL7Okw2mGzZ0cc4k/A7Fta0uoPgaJCr8fsFc= +go.opentelemetry.io/otel v1.21.0/go.mod h1:QZzNPQPm1zLX4gZK4cMi+71eaorMSGT3A4znnUvNNEo= +go.opentelemetry.io/otel/metric v1.21.0 h1:tlYWfeo+Bocx5kLEloTjbcDwBuELRrIFxwdQ36PlJu4= +go.opentelemetry.io/otel/metric v1.21.0/go.mod h1:o1p3CA8nNHW8j5yuQLdc1eeqEaPfzug24uvsyIEJRWM= +go.opentelemetry.io/otel/trace v1.21.0 h1:WD9i5gzvoUPuXIXH24ZNBudiarZDKuekPqi/E8fpfLc= +go.opentelemetry.io/otel/trace v1.21.0/go.mod h1:LGbsEB0f9LGjN+OZaQQ26sohbOmiMR+BaslueVtS/qQ= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= +golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA= +golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= +golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= +golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= +golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= +golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= +golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= +golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220408201424-a24fb2fb8a0f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= +golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= +golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= +golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= +golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8= +golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= +golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= +golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= +google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= +google.golang.org/genproto v0.0.0-20240123012728-ef4313101c80 h1:KAeGQVN3M9nD0/bQXnr/ClcEMJ968gUXJQ9pwfSynuQ= +google.golang.org/genproto v0.0.0-20240123012728-ef4313101c80/go.mod h1:cc8bqMqtv9gMOr0zHg2Vzff5ULhhL2IXP4sbcn32Dro= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240123012728-ef4313101c80 h1:AjyfHzEPEFp/NpvfN5g+KDla3EMojjhRVZc1i7cj+oM= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240123012728-ef4313101c80/go.mod h1:PAREbraiVEVGVdTZsVWjSbbTtSyGbAgIIvni8a8CD5s= +google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= +google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= +google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= +google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= +google.golang.org/grpc v1.62.1 h1:B4n+nfKzOICUXMgyrNd19h/I9oH0L1pizfk1d4zSgTk= +google.golang.org/grpc v1.62.1/go.mod h1:IWTG0VlJLCh1SkC58F7np9ka9mx/WNkjl4PGJaiq+QE= +google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= +google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= +google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= +google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= +google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= +google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= +google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo= +gotest.tools/v3 v3.4.0 h1:ZazjZUfuVeZGLAmlKKuyv3IKP5orXcwtOwDQH6YVr6o= +gotest.tools/v3 v3.4.0/go.mod h1:CtbdzLSsqVhDgMtKsx03ird5YTGB3ar27v0u/yKBW5g= +honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +lukechampine.com/blake3 v1.2.1 h1:YuqqRuaqsGV71BV/nm9xlI0MKUv4QC54jQnBChWbGnI= +lukechampine.com/blake3 v1.2.1/go.mod h1:0OFRp7fBtAylGVCO40o87sbupkyIGgbpv1+M1k1LM6k= diff --git a/contrib/nydusify/pkg/backend/backend.go b/contrib/nydusify/pkg/backend/backend.go index 2a7dc0d32c8..9c4f4b27a8c 100644 --- a/contrib/nydusify/pkg/backend/backend.go +++ b/contrib/nydusify/pkg/backend/backend.go @@ -1,78 +1,78 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package backend - -import ( - "context" - "fmt" - "io" - - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/remote" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" - "github.com/opencontainers/go-digest" - ocispec "github.com/opencontainers/image-spec/specs-go/v1" -) - -// Backend transfers artifacts generated during image conversion to a backend storage such as: -// 1. registry: complying to OCI distribution specification, push blob file -// to registry and use the registry as a storage. -// 2. oss: A object storage backend, which uses its SDK to transfer blob file. -type Backend interface { - // TODO: Hopefully, we can pass `Layer` struct in, thus to be able to cook both - // file handle and file path. - Upload(ctx context.Context, blobID, blobPath string, blobSize int64, forcePush bool) (*ocispec.Descriptor, error) - Finalize(cancel bool) error - Check(blobID string) (bool, error) - Type() Type - Reader(blobID string) (io.ReadCloser, error) - Size(blobID string) (int64, error) -} - -// TODO: Directly forward blob data to storage backend - -type Type = int - -const ( - OssBackend Type = iota - RegistryBackend - S3backend -) - -func blobDesc(size int64, blobID string) ocispec.Descriptor { - blobDigest := digest.NewDigestFromEncoded(digest.SHA256, blobID) - desc := ocispec.Descriptor{ - Digest: blobDigest, - Size: size, - MediaType: utils.MediaTypeNydusBlob, - Annotations: map[string]string{ - // Use `utils.LayerAnnotationUncompressed` to generate - // DiffID of layer defined in OCI spec - utils.LayerAnnotationUncompressed: blobDigest.String(), - utils.LayerAnnotationNydusBlob: "true", - }, - } - - return desc -} - -// Nydusify majorly works for registry backend, which means blob is stored in -// registry as per OCI distribution specification. But nydus can also make OSS -// as rafs backend storage. Therefore, nydusify better have the ability to upload -// blob into OSS. OSS is configured via a json string input. Currently, it has -// no effect to registry backend now. -// Save byte slice here because I don't find a way to represent -// all the backend types at the same time -func NewBackend(bt string, config []byte, remote *remote.Remote) (Backend, error) { - switch bt { - case "oss": - return newOSSBackend(config) - case "registry": - return newRegistryBackend(config, remote) - case "s3": - return newS3Backend(config) - default: - return nil, fmt.Errorf("unsupported backend type %s", bt) - } -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package backend + +import ( + "context" + "fmt" + "io" + + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/remote" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" + "github.com/opencontainers/go-digest" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" +) + +// Backend transfers artifacts generated during image conversion to a backend storage such as: +// 1. registry: complying to OCI distribution specification, push blob file +// to registry and use the registry as a storage. +// 2. oss: A object storage backend, which uses its SDK to transfer blob file. +type Backend interface { + // TODO: Hopefully, we can pass `Layer` struct in, thus to be able to cook both + // file handle and file path. + Upload(ctx context.Context, blobID, blobPath string, blobSize int64, forcePush bool) (*ocispec.Descriptor, error) + Finalize(cancel bool) error + Check(blobID string) (bool, error) + Type() Type + Reader(blobID string) (io.ReadCloser, error) + Size(blobID string) (int64, error) +} + +// TODO: Directly forward blob data to storage backend + +type Type = int + +const ( + OssBackend Type = iota + RegistryBackend + S3backend +) + +func blobDesc(size int64, blobID string) ocispec.Descriptor { + blobDigest := digest.NewDigestFromEncoded(digest.SHA256, blobID) + desc := ocispec.Descriptor{ + Digest: blobDigest, + Size: size, + MediaType: utils.MediaTypeNydusBlob, + Annotations: map[string]string{ + // Use `utils.LayerAnnotationUncompressed` to generate + // DiffID of layer defined in OCI spec + utils.LayerAnnotationUncompressed: blobDigest.String(), + utils.LayerAnnotationNydusBlob: "true", + }, + } + + return desc +} + +// Nydusify majorly works for registry backend, which means blob is stored in +// registry as per OCI distribution specification. But nydus can also make OSS +// as rafs backend storage. Therefore, nydusify better have the ability to upload +// blob into OSS. OSS is configured via a json string input. Currently, it has +// no effect to registry backend now. +// Save byte slice here because I don't find a way to represent +// all the backend types at the same time +func NewBackend(bt string, config []byte, remote *remote.Remote) (Backend, error) { + switch bt { + case "oss": + return newOSSBackend(config) + case "registry": + return newRegistryBackend(config, remote) + case "s3": + return newS3Backend(config) + default: + return nil, fmt.Errorf("unsupported backend type %s", bt) + } +} diff --git a/contrib/nydusify/pkg/backend/backend_test.go b/contrib/nydusify/pkg/backend/backend_test.go index 771e40b9f57..54b2532f8ca 100644 --- a/contrib/nydusify/pkg/backend/backend_test.go +++ b/contrib/nydusify/pkg/backend/backend_test.go @@ -1,66 +1,66 @@ -// Copyright 2023 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package backend - -import ( - "encoding/json" - "testing" - - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/provider" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" - "github.com/stretchr/testify/require" -) - -func TestBlobDesc(t *testing.T) { - desc := blobDesc(123456, "205eed24cbec29ad9cb4593a73168ef1803402370a82f7d51ce25646fc2f943a") - require.Equal(t, int64(123456), desc.Size) - require.Equal(t, "sha256:205eed24cbec29ad9cb4593a73168ef1803402370a82f7d51ce25646fc2f943a", desc.Digest.String()) - require.Equal(t, utils.MediaTypeNydusBlob, desc.MediaType) - require.Equal(t, map[string]string{ - utils.LayerAnnotationUncompressed: "sha256:205eed24cbec29ad9cb4593a73168ef1803402370a82f7d51ce25646fc2f943a", - utils.LayerAnnotationNydusBlob: "true", - }, desc.Annotations) -} - -func TestNewBackend(t *testing.T) { - ossConfigJSON := ` - { - "bucket_name": "test", - "endpoint": "region.oss.com", - "access_key_id": "testAK", - "access_key_secret": "testSK", - "object_prefix": "blob" - }` - require.True(t, json.Valid([]byte(ossConfigJSON))) - backend, err := NewBackend("oss", []byte(ossConfigJSON), nil) - require.NoError(t, err) - require.Equal(t, OssBackend, backend.Type()) - - s3ConfigJSON := ` - { - "bucket_name": "test", - "endpoint": "s3.amazonaws.com", - "access_key_id": "testAK", - "access_key_secret": "testSK", - "object_prefix": "blob", - "scheme": "https", - "region": "region1" - }` - require.True(t, json.Valid([]byte(s3ConfigJSON))) - backend, err = NewBackend("s3", []byte(s3ConfigJSON), nil) - require.NoError(t, err) - require.Equal(t, S3backend, backend.Type()) - - testRegistryRemote, err := provider.DefaultRemote("test", false) - require.NoError(t, err) - backend, err = NewBackend("registry", nil, testRegistryRemote) - require.NoError(t, err) - require.Equal(t, RegistryBackend, backend.Type()) - - backend, err = NewBackend("errBackend", nil, testRegistryRemote) - require.Error(t, err) - require.Contains(t, err.Error(), "unsupported backend type") - require.Nil(t, backend) -} +// Copyright 2023 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package backend + +import ( + "encoding/json" + "testing" + + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/provider" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" + "github.com/stretchr/testify/require" +) + +func TestBlobDesc(t *testing.T) { + desc := blobDesc(123456, "205eed24cbec29ad9cb4593a73168ef1803402370a82f7d51ce25646fc2f943a") + require.Equal(t, int64(123456), desc.Size) + require.Equal(t, "sha256:205eed24cbec29ad9cb4593a73168ef1803402370a82f7d51ce25646fc2f943a", desc.Digest.String()) + require.Equal(t, utils.MediaTypeNydusBlob, desc.MediaType) + require.Equal(t, map[string]string{ + utils.LayerAnnotationUncompressed: "sha256:205eed24cbec29ad9cb4593a73168ef1803402370a82f7d51ce25646fc2f943a", + utils.LayerAnnotationNydusBlob: "true", + }, desc.Annotations) +} + +func TestNewBackend(t *testing.T) { + ossConfigJSON := ` + { + "bucket_name": "test", + "endpoint": "region.oss.com", + "access_key_id": "testAK", + "access_key_secret": "testSK", + "object_prefix": "blob" + }` + require.True(t, json.Valid([]byte(ossConfigJSON))) + backend, err := NewBackend("oss", []byte(ossConfigJSON), nil) + require.NoError(t, err) + require.Equal(t, OssBackend, backend.Type()) + + s3ConfigJSON := ` + { + "bucket_name": "test", + "endpoint": "s3.amazonaws.com", + "access_key_id": "testAK", + "access_key_secret": "testSK", + "object_prefix": "blob", + "scheme": "https", + "region": "region1" + }` + require.True(t, json.Valid([]byte(s3ConfigJSON))) + backend, err = NewBackend("s3", []byte(s3ConfigJSON), nil) + require.NoError(t, err) + require.Equal(t, S3backend, backend.Type()) + + testRegistryRemote, err := provider.DefaultRemote("test", false) + require.NoError(t, err) + backend, err = NewBackend("registry", nil, testRegistryRemote) + require.NoError(t, err) + require.Equal(t, RegistryBackend, backend.Type()) + + backend, err = NewBackend("errBackend", nil, testRegistryRemote) + require.Error(t, err) + require.Contains(t, err.Error(), "unsupported backend type") + require.Nil(t, backend) +} diff --git a/contrib/nydusify/pkg/backend/oss.go b/contrib/nydusify/pkg/backend/oss.go index 1c02d9099d0..ca82746bbe0 100644 --- a/contrib/nydusify/pkg/backend/oss.go +++ b/contrib/nydusify/pkg/backend/oss.go @@ -1,284 +1,284 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package backend - -import ( - "context" - "encoding/json" - "fmt" - "hash/crc64" - "io" - "net/http" - "os" - "strconv" - "sync" - "time" - - "github.com/aliyun/aliyun-oss-go-sdk/oss" - ocispec "github.com/opencontainers/image-spec/specs-go/v1" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - "golang.org/x/sync/errgroup" -) - -const ( - // For multipart uploads, OSS has a maximum number of 10000 chunks, - // so we can only upload blob size of about 10000 * multipartChunkSize. - multipartChunkSize = 200 * 1024 * 1024 /// 200MB -) - -type multipartStatus struct { - imur *oss.InitiateMultipartUploadResult - parts []oss.UploadPart - blobObjectKey string - crc64Chan chan uint64 - crc64ErrChan chan error -} - -type OSSBackend struct { - // OSS storage does not support directory. Therefore add a prefix to each object - // to make it a path-like object. - objectPrefix string - bucket *oss.Bucket - ms []multipartStatus - msMutex sync.Mutex -} - -func newOSSBackend(rawConfig []byte) (*OSSBackend, error) { - var configMap map[string]string - if err := json.Unmarshal(rawConfig, &configMap); err != nil { - return nil, errors.Wrap(err, "Parse OSS storage backend configuration") - } - - endpoint := configMap["endpoint"] - bucketName := configMap["bucket_name"] - - // Below items are not mandatory - accessKeyID := configMap["access_key_id"] - accessKeySecret := configMap["access_key_secret"] - objectPrefix := configMap["object_prefix"] - - if endpoint == "" || bucketName == "" { - return nil, fmt.Errorf("invalid OSS configuration: missing 'endpoint' or 'bucket'") - } - - client, err := oss.New(endpoint, accessKeyID, accessKeySecret) - if err != nil { - return nil, errors.Wrap(err, "Create client") - } - - bucket, err := client.Bucket(bucketName) - if err != nil { - return nil, errors.Wrap(err, "Create bucket") - } - - return &OSSBackend{ - objectPrefix: objectPrefix, - bucket: bucket, - }, nil -} - -func calcCrc64ECMA(path string) (uint64, error) { - buf := make([]byte, 4*1024) - table := crc64.MakeTable(crc64.ECMA) - - f, err := os.Open(path) - if err != nil { - return 0, errors.Wrapf(err, "calc md5sum") - } - defer f.Close() - - n, err := f.Read(buf) - if err != nil && err != io.EOF { - return 0, err - } - blobCrc64 := crc64.Checksum(buf[:n], table) - - for { - n, err := f.Read(buf) - blobCrc64 = crc64.Update(blobCrc64, table, buf[:n]) - if err == io.EOF || n == 0 { - break - } - } - - return blobCrc64, nil -} - -// Upload blob as image layer to oss backend and verify -// integrity by calculate CRC64. -func (b *OSSBackend) Upload(_ context.Context, blobID, blobPath string, size int64, forcePush bool) (*ocispec.Descriptor, error) { - blobObjectKey := b.objectPrefix + blobID - - desc := blobDesc(size, blobID) - desc.URLs = append(desc.URLs, b.remoteID(blobID)) - - if !forcePush { - if exist, err := b.bucket.IsObjectExist(blobObjectKey); err != nil { - return nil, errors.Wrap(err, "check object existence") - } else if exist { - logrus.Infof("skip upload because blob exists: %s", blobID) - return &desc, nil - } - } - - start := time.Now() - crc64Chan := make(chan uint64, 1) - crc64ErrChan := make(chan error, 1) - go func() { - defer func() { - close(crc64Chan) - close(crc64ErrChan) - }() - crc64Val, e := calcCrc64ECMA(blobPath) - crc64Chan <- crc64Val - crc64ErrChan <- e - }() - - logrus.Debugf("upload %s using multipart method", blobObjectKey) - chunks, err := oss.SplitFileByPartSize(blobPath, multipartChunkSize) - if err != nil { - return nil, errors.Wrap(err, "split file by part size") - } - - imur, err := b.bucket.InitiateMultipartUpload(blobObjectKey) - if err != nil { - return nil, errors.Wrap(err, "initiate multipart upload") - } - - eg := new(errgroup.Group) - partsChan := make(chan oss.UploadPart, len(chunks)) - for _, chunk := range chunks { - ck := chunk - eg.Go(func() error { - p, err := b.bucket.UploadPartFromFile(imur, blobPath, ck.Offset, ck.Size, ck.Number) - if err != nil { - return errors.Wrap(err, "upload part from file") - } - partsChan <- p - return nil - }) - } - - if err := eg.Wait(); err != nil { - close(partsChan) - if err := b.bucket.AbortMultipartUpload(imur); err != nil { - return nil, errors.Wrap(err, "abort multipart upload") - } - return nil, errors.Wrap(err, "upload parts") - } - close(partsChan) - - var parts []oss.UploadPart - for p := range partsChan { - parts = append(parts, p) - } - - ms := multipartStatus{ - imur: &imur, - parts: parts, - blobObjectKey: blobObjectKey, - crc64Chan: crc64Chan, - crc64ErrChan: crc64ErrChan, - } - b.msMutex.Lock() - defer b.msMutex.Unlock() - b.ms = append(b.ms, ms) - - logrus.Debugf("uploaded blob %s to oss backend, costs %s", blobObjectKey, time.Since(start)) - - return &desc, nil -} - -func (b *OSSBackend) Finalize(cancel bool) error { - b.msMutex.Lock() - defer b.msMutex.Unlock() - - for _, ms := range b.ms { - if cancel { - // If there is any failure during conversion process, it will - // cause the uploaded blob to be left on oss, and these blobs - // are hard to be GC-ed, so we need always to use the multipart - // upload, and should call the `AbortMultipartUpload` method to - // prevent blob residue as much as possible once any error happens - // during conversion process. - if err := b.bucket.AbortMultipartUpload(*ms.imur); err != nil { - logrus.WithError(err).Warn("abort multipart upload") - } else { - logrus.Warnf("blob upload has been aborted: %s", ms.blobObjectKey) - } - continue - } - - _, err := b.bucket.CompleteMultipartUpload(*ms.imur, ms.parts) - if err != nil { - return errors.Wrap(err, "complete multipart upload") - } - - props, err := b.bucket.GetObjectDetailedMeta(ms.blobObjectKey) - if err != nil { - return errors.Wrapf(err, "get object meta") - } - - // Try to validate blob object integrity if any crc64 value is returned. - if value, ok := props[http.CanonicalHeaderKey("x-oss-hash-crc64ecma")]; ok { - if len(value) == 1 { - uploadedCrc, err := strconv.ParseUint(value[0], 10, 64) - if err != nil { - return errors.Wrapf(err, "parse uploaded crc64") - } - - err = <-ms.crc64ErrChan - if err != nil { - return errors.Wrapf(err, "calculate crc64") - } - - if crc64Val := <-ms.crc64Chan; uploadedCrc != crc64Val { - return errors.Errorf("crc64 mismatch, uploaded=%d, expected=%d", uploadedCrc, crc64Val) - } - - } else { - logrus.Warnf("too many values, skip crc64 integrity check.") - } - } else { - logrus.Warnf("no crc64 in header, skip crc64 integrity check.") - } - } - - return nil -} - -func (b *OSSBackend) Check(blobID string) (bool, error) { - blobID = b.objectPrefix + blobID - return b.bucket.IsObjectExist(blobID) -} - -func (b *OSSBackend) Type() Type { - return OssBackend -} - -func (b *OSSBackend) Reader(blobID string) (io.ReadCloser, error) { - blobID = b.objectPrefix + blobID - rc, err := b.bucket.GetObject(blobID) - return rc, err -} - -func (b *OSSBackend) Size(blobID string) (int64, error) { - blobID = b.objectPrefix + blobID - headers, err := b.bucket.GetObjectMeta(blobID) - if err != nil { - return 0, errors.Wrap(err, "get object size") - } - sizeStr := headers.Get("Content-Length") - size, err := strconv.ParseInt(sizeStr, 10, 0) - if err != nil { - return 0, errors.Wrap(err, "parse content-length header") - } - return size, nil -} - -func (b *OSSBackend) remoteID(blobID string) string { - return fmt.Sprintf("oss://%s/%s%s", b.bucket.BucketName, b.objectPrefix, blobID) -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package backend + +import ( + "context" + "encoding/json" + "fmt" + "hash/crc64" + "io" + "net/http" + "os" + "strconv" + "sync" + "time" + + "github.com/aliyun/aliyun-oss-go-sdk/oss" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "golang.org/x/sync/errgroup" +) + +const ( + // For multipart uploads, OSS has a maximum number of 10000 chunks, + // so we can only upload blob size of about 10000 * multipartChunkSize. + multipartChunkSize = 200 * 1024 * 1024 /// 200MB +) + +type multipartStatus struct { + imur *oss.InitiateMultipartUploadResult + parts []oss.UploadPart + blobObjectKey string + crc64Chan chan uint64 + crc64ErrChan chan error +} + +type OSSBackend struct { + // OSS storage does not support directory. Therefore add a prefix to each object + // to make it a path-like object. + objectPrefix string + bucket *oss.Bucket + ms []multipartStatus + msMutex sync.Mutex +} + +func newOSSBackend(rawConfig []byte) (*OSSBackend, error) { + var configMap map[string]string + if err := json.Unmarshal(rawConfig, &configMap); err != nil { + return nil, errors.Wrap(err, "Parse OSS storage backend configuration") + } + + endpoint := configMap["endpoint"] + bucketName := configMap["bucket_name"] + + // Below items are not mandatory + accessKeyID := configMap["access_key_id"] + accessKeySecret := configMap["access_key_secret"] + objectPrefix := configMap["object_prefix"] + + if endpoint == "" || bucketName == "" { + return nil, fmt.Errorf("invalid OSS configuration: missing 'endpoint' or 'bucket'") + } + + client, err := oss.New(endpoint, accessKeyID, accessKeySecret) + if err != nil { + return nil, errors.Wrap(err, "Create client") + } + + bucket, err := client.Bucket(bucketName) + if err != nil { + return nil, errors.Wrap(err, "Create bucket") + } + + return &OSSBackend{ + objectPrefix: objectPrefix, + bucket: bucket, + }, nil +} + +func calcCrc64ECMA(path string) (uint64, error) { + buf := make([]byte, 4*1024) + table := crc64.MakeTable(crc64.ECMA) + + f, err := os.Open(path) + if err != nil { + return 0, errors.Wrapf(err, "calc md5sum") + } + defer f.Close() + + n, err := f.Read(buf) + if err != nil && err != io.EOF { + return 0, err + } + blobCrc64 := crc64.Checksum(buf[:n], table) + + for { + n, err := f.Read(buf) + blobCrc64 = crc64.Update(blobCrc64, table, buf[:n]) + if err == io.EOF || n == 0 { + break + } + } + + return blobCrc64, nil +} + +// Upload blob as image layer to oss backend and verify +// integrity by calculate CRC64. +func (b *OSSBackend) Upload(_ context.Context, blobID, blobPath string, size int64, forcePush bool) (*ocispec.Descriptor, error) { + blobObjectKey := b.objectPrefix + blobID + + desc := blobDesc(size, blobID) + desc.URLs = append(desc.URLs, b.remoteID(blobID)) + + if !forcePush { + if exist, err := b.bucket.IsObjectExist(blobObjectKey); err != nil { + return nil, errors.Wrap(err, "check object existence") + } else if exist { + logrus.Infof("skip upload because blob exists: %s", blobID) + return &desc, nil + } + } + + start := time.Now() + crc64Chan := make(chan uint64, 1) + crc64ErrChan := make(chan error, 1) + go func() { + defer func() { + close(crc64Chan) + close(crc64ErrChan) + }() + crc64Val, e := calcCrc64ECMA(blobPath) + crc64Chan <- crc64Val + crc64ErrChan <- e + }() + + logrus.Debugf("upload %s using multipart method", blobObjectKey) + chunks, err := oss.SplitFileByPartSize(blobPath, multipartChunkSize) + if err != nil { + return nil, errors.Wrap(err, "split file by part size") + } + + imur, err := b.bucket.InitiateMultipartUpload(blobObjectKey) + if err != nil { + return nil, errors.Wrap(err, "initiate multipart upload") + } + + eg := new(errgroup.Group) + partsChan := make(chan oss.UploadPart, len(chunks)) + for _, chunk := range chunks { + ck := chunk + eg.Go(func() error { + p, err := b.bucket.UploadPartFromFile(imur, blobPath, ck.Offset, ck.Size, ck.Number) + if err != nil { + return errors.Wrap(err, "upload part from file") + } + partsChan <- p + return nil + }) + } + + if err := eg.Wait(); err != nil { + close(partsChan) + if err := b.bucket.AbortMultipartUpload(imur); err != nil { + return nil, errors.Wrap(err, "abort multipart upload") + } + return nil, errors.Wrap(err, "upload parts") + } + close(partsChan) + + var parts []oss.UploadPart + for p := range partsChan { + parts = append(parts, p) + } + + ms := multipartStatus{ + imur: &imur, + parts: parts, + blobObjectKey: blobObjectKey, + crc64Chan: crc64Chan, + crc64ErrChan: crc64ErrChan, + } + b.msMutex.Lock() + defer b.msMutex.Unlock() + b.ms = append(b.ms, ms) + + logrus.Debugf("uploaded blob %s to oss backend, costs %s", blobObjectKey, time.Since(start)) + + return &desc, nil +} + +func (b *OSSBackend) Finalize(cancel bool) error { + b.msMutex.Lock() + defer b.msMutex.Unlock() + + for _, ms := range b.ms { + if cancel { + // If there is any failure during conversion process, it will + // cause the uploaded blob to be left on oss, and these blobs + // are hard to be GC-ed, so we need always to use the multipart + // upload, and should call the `AbortMultipartUpload` method to + // prevent blob residue as much as possible once any error happens + // during conversion process. + if err := b.bucket.AbortMultipartUpload(*ms.imur); err != nil { + logrus.WithError(err).Warn("abort multipart upload") + } else { + logrus.Warnf("blob upload has been aborted: %s", ms.blobObjectKey) + } + continue + } + + _, err := b.bucket.CompleteMultipartUpload(*ms.imur, ms.parts) + if err != nil { + return errors.Wrap(err, "complete multipart upload") + } + + props, err := b.bucket.GetObjectDetailedMeta(ms.blobObjectKey) + if err != nil { + return errors.Wrapf(err, "get object meta") + } + + // Try to validate blob object integrity if any crc64 value is returned. + if value, ok := props[http.CanonicalHeaderKey("x-oss-hash-crc64ecma")]; ok { + if len(value) == 1 { + uploadedCrc, err := strconv.ParseUint(value[0], 10, 64) + if err != nil { + return errors.Wrapf(err, "parse uploaded crc64") + } + + err = <-ms.crc64ErrChan + if err != nil { + return errors.Wrapf(err, "calculate crc64") + } + + if crc64Val := <-ms.crc64Chan; uploadedCrc != crc64Val { + return errors.Errorf("crc64 mismatch, uploaded=%d, expected=%d", uploadedCrc, crc64Val) + } + + } else { + logrus.Warnf("too many values, skip crc64 integrity check.") + } + } else { + logrus.Warnf("no crc64 in header, skip crc64 integrity check.") + } + } + + return nil +} + +func (b *OSSBackend) Check(blobID string) (bool, error) { + blobID = b.objectPrefix + blobID + return b.bucket.IsObjectExist(blobID) +} + +func (b *OSSBackend) Type() Type { + return OssBackend +} + +func (b *OSSBackend) Reader(blobID string) (io.ReadCloser, error) { + blobID = b.objectPrefix + blobID + rc, err := b.bucket.GetObject(blobID) + return rc, err +} + +func (b *OSSBackend) Size(blobID string) (int64, error) { + blobID = b.objectPrefix + blobID + headers, err := b.bucket.GetObjectMeta(blobID) + if err != nil { + return 0, errors.Wrap(err, "get object size") + } + sizeStr := headers.Get("Content-Length") + size, err := strconv.ParseInt(sizeStr, 10, 0) + if err != nil { + return 0, errors.Wrap(err, "parse content-length header") + } + return size, nil +} + +func (b *OSSBackend) remoteID(blobID string) string { + return fmt.Sprintf("oss://%s/%s%s", b.bucket.BucketName, b.objectPrefix, blobID) +} diff --git a/contrib/nydusify/pkg/backend/oss_test.go b/contrib/nydusify/pkg/backend/oss_test.go index 72c567e7f3d..8db19073807 100644 --- a/contrib/nydusify/pkg/backend/oss_test.go +++ b/contrib/nydusify/pkg/backend/oss_test.go @@ -1,137 +1,137 @@ -// Copyright 2023 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package backend - -import ( - "encoding/json" - "hash/crc64" - "os" - "testing" - - "github.com/stretchr/testify/require" -) - -func tempOSSBackend() *OSSBackend { - ossConfigJSON := ` - { - "bucket_name": "test", - "endpoint": "region.oss.com", - "access_key_id": "testAK", - "access_key_secret": "testSK", - "object_prefix": "blob" - }` - backend, _ := newOSSBackend([]byte(ossConfigJSON)) - return backend -} - -func TestCalcCrc64ECMA(t *testing.T) { - blobCrc64, err := calcCrc64ECMA("nil") - require.Error(t, err) - require.Contains(t, err.Error(), "calc md5sum") - require.Zero(t, blobCrc64) - - file, err := os.CreateTemp("", "temp") - require.NoError(t, err) - defer os.RemoveAll(file.Name()) - - _, err = file.WriteString("123") - require.NoError(t, err) - file.Sync() - - blobCrc64, err = calcCrc64ECMA(file.Name()) - require.NoError(t, err) - require.Equal(t, crc64.Checksum([]byte("123"), crc64.MakeTable(crc64.ECMA)), blobCrc64) -} - -func TestOSSRemoteID(t *testing.T) { - ossBackend := tempOSSBackend() - id := ossBackend.remoteID("111") - require.Equal(t, "oss://test/blob111", id) -} - -func TestNewOSSBackend(t *testing.T) { - ossConfigJSON1 := ` - { - "bucket_name": "test", - "endpoint": "region.oss.com", - "access_key_id": "testAK", - "access_key_secret": "testSK", - "object_prefix": "blob" - }` - require.True(t, json.Valid([]byte(ossConfigJSON1))) - backend, err := newOSSBackend([]byte(ossConfigJSON1)) - require.NoError(t, err) - require.Equal(t, "test", backend.bucket.BucketName) - require.Equal(t, "blob", backend.objectPrefix) - - ossConfigJSON2 := ` - { - "bucket_name": "test", - "access_key_id": "testAK", - "access_key_secret": "testSK", - "object_prefix": "blob" - }` - require.True(t, json.Valid([]byte(ossConfigJSON2))) - backend, err = newOSSBackend([]byte(ossConfigJSON2)) - require.Error(t, err) - require.Contains(t, err.Error(), "invalid OSS configuration: missing 'endpoint' or 'bucket'") - require.Nil(t, backend) - - ossConfigJSON3 := ` - { - "bucket_name": "test", - "access_key_id": "testAK", - "access_key_secret": "testSK", - "object_prefix": "blob" - }` - require.True(t, json.Valid([]byte(ossConfigJSON3))) - backend, err = newOSSBackend([]byte(ossConfigJSON3)) - require.Error(t, err) - require.Contains(t, err.Error(), "invalid OSS configuration: missing 'endpoint' or 'bucket'") - require.Nil(t, backend) - - ossConfigJSON4 := ` - { - "bucket_name": "t", - "endpoint": "region.oss.com", - "access_key_id": "testAK", - "access_key_secret": "testSK", - "object_prefix": "blob" - }` - require.True(t, json.Valid([]byte(ossConfigJSON4))) - backend, err = newOSSBackend([]byte(ossConfigJSON4)) - require.Error(t, err) - require.Contains(t, err.Error(), "Create bucket") - require.Contains(t, err.Error(), "len is between [3-63],now is") - require.Nil(t, backend) - - ossConfigJSON5 := ` - { - "bucket_name": "AAA", - "endpoint": "region.oss.com", - "access_key_id": "testAK", - "access_key_secret": "testSK", - "object_prefix": "blob" - }` - require.True(t, json.Valid([]byte(ossConfigJSON5))) - backend, err = newOSSBackend([]byte(ossConfigJSON5)) - require.Error(t, err) - require.Contains(t, err.Error(), "Create bucket") - require.Contains(t, err.Error(), "can only include lowercase letters, numbers, and -") - require.Nil(t, backend) - - ossConfigJSON6 := ` - { - "bucket_name": "AAA", - "endpoint": "region.oss.com", - "access_key_id": "testAK", - "access_key_secret": "testSK", - "object_prefix": "blob", - }` - backend, err = newOSSBackend([]byte(ossConfigJSON6)) - require.Error(t, err) - require.Contains(t, err.Error(), "Parse OSS storage backend configuration") - require.Nil(t, backend) -} +// Copyright 2023 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package backend + +import ( + "encoding/json" + "hash/crc64" + "os" + "testing" + + "github.com/stretchr/testify/require" +) + +func tempOSSBackend() *OSSBackend { + ossConfigJSON := ` + { + "bucket_name": "test", + "endpoint": "region.oss.com", + "access_key_id": "testAK", + "access_key_secret": "testSK", + "object_prefix": "blob" + }` + backend, _ := newOSSBackend([]byte(ossConfigJSON)) + return backend +} + +func TestCalcCrc64ECMA(t *testing.T) { + blobCrc64, err := calcCrc64ECMA("nil") + require.Error(t, err) + require.Contains(t, err.Error(), "calc md5sum") + require.Zero(t, blobCrc64) + + file, err := os.CreateTemp("", "temp") + require.NoError(t, err) + defer os.RemoveAll(file.Name()) + + _, err = file.WriteString("123") + require.NoError(t, err) + file.Sync() + + blobCrc64, err = calcCrc64ECMA(file.Name()) + require.NoError(t, err) + require.Equal(t, crc64.Checksum([]byte("123"), crc64.MakeTable(crc64.ECMA)), blobCrc64) +} + +func TestOSSRemoteID(t *testing.T) { + ossBackend := tempOSSBackend() + id := ossBackend.remoteID("111") + require.Equal(t, "oss://test/blob111", id) +} + +func TestNewOSSBackend(t *testing.T) { + ossConfigJSON1 := ` + { + "bucket_name": "test", + "endpoint": "region.oss.com", + "access_key_id": "testAK", + "access_key_secret": "testSK", + "object_prefix": "blob" + }` + require.True(t, json.Valid([]byte(ossConfigJSON1))) + backend, err := newOSSBackend([]byte(ossConfigJSON1)) + require.NoError(t, err) + require.Equal(t, "test", backend.bucket.BucketName) + require.Equal(t, "blob", backend.objectPrefix) + + ossConfigJSON2 := ` + { + "bucket_name": "test", + "access_key_id": "testAK", + "access_key_secret": "testSK", + "object_prefix": "blob" + }` + require.True(t, json.Valid([]byte(ossConfigJSON2))) + backend, err = newOSSBackend([]byte(ossConfigJSON2)) + require.Error(t, err) + require.Contains(t, err.Error(), "invalid OSS configuration: missing 'endpoint' or 'bucket'") + require.Nil(t, backend) + + ossConfigJSON3 := ` + { + "bucket_name": "test", + "access_key_id": "testAK", + "access_key_secret": "testSK", + "object_prefix": "blob" + }` + require.True(t, json.Valid([]byte(ossConfigJSON3))) + backend, err = newOSSBackend([]byte(ossConfigJSON3)) + require.Error(t, err) + require.Contains(t, err.Error(), "invalid OSS configuration: missing 'endpoint' or 'bucket'") + require.Nil(t, backend) + + ossConfigJSON4 := ` + { + "bucket_name": "t", + "endpoint": "region.oss.com", + "access_key_id": "testAK", + "access_key_secret": "testSK", + "object_prefix": "blob" + }` + require.True(t, json.Valid([]byte(ossConfigJSON4))) + backend, err = newOSSBackend([]byte(ossConfigJSON4)) + require.Error(t, err) + require.Contains(t, err.Error(), "Create bucket") + require.Contains(t, err.Error(), "len is between [3-63],now is") + require.Nil(t, backend) + + ossConfigJSON5 := ` + { + "bucket_name": "AAA", + "endpoint": "region.oss.com", + "access_key_id": "testAK", + "access_key_secret": "testSK", + "object_prefix": "blob" + }` + require.True(t, json.Valid([]byte(ossConfigJSON5))) + backend, err = newOSSBackend([]byte(ossConfigJSON5)) + require.Error(t, err) + require.Contains(t, err.Error(), "Create bucket") + require.Contains(t, err.Error(), "can only include lowercase letters, numbers, and -") + require.Nil(t, backend) + + ossConfigJSON6 := ` + { + "bucket_name": "AAA", + "endpoint": "region.oss.com", + "access_key_id": "testAK", + "access_key_secret": "testSK", + "object_prefix": "blob", + }` + backend, err = newOSSBackend([]byte(ossConfigJSON6)) + require.Error(t, err) + require.Contains(t, err.Error(), "Parse OSS storage backend configuration") + require.Nil(t, backend) +} diff --git a/contrib/nydusify/pkg/backend/registry.go b/contrib/nydusify/pkg/backend/registry.go index 7853fe5a47c..22773e5e4d4 100644 --- a/contrib/nydusify/pkg/backend/registry.go +++ b/contrib/nydusify/pkg/backend/registry.go @@ -1,60 +1,60 @@ -package backend - -import ( - "context" - "io" - "os" - - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/remote" - ocispec "github.com/opencontainers/image-spec/specs-go/v1" - "github.com/pkg/errors" -) - -type Registry struct { - remote *remote.Remote -} - -func (r *Registry) Upload( - ctx context.Context, blobID, blobPath string, size int64, _ bool, -) (*ocispec.Descriptor, error) { - // The `forcePush` option is useless for registry backend, because - // the blob existed in registry can't be pushed again. - - desc := blobDesc(size, blobID) - - blobFile, err := os.Open(blobPath) - if err != nil { - return nil, errors.Wrap(err, "Open blob file") - } - defer blobFile.Close() - - if err := r.remote.Push(ctx, desc, true, blobFile); err != nil { - return nil, errors.Wrap(err, "Push blob layer") - } - - return &desc, nil -} - -func (r *Registry) Finalize(_ bool) error { - return nil -} - -func (r *Registry) Check(_ string) (bool, error) { - return true, nil -} - -func (r *Registry) Type() Type { - return RegistryBackend -} - -func (r *Registry) Reader(_ string) (io.ReadCloser, error) { - panic("not implemented") -} - -func (r *Registry) Size(_ string) (int64, error) { - panic("not implemented") -} - -func newRegistryBackend(_ []byte, remote *remote.Remote) (Backend, error) { - return &Registry{remote: remote}, nil -} +package backend + +import ( + "context" + "io" + "os" + + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/remote" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/pkg/errors" +) + +type Registry struct { + remote *remote.Remote +} + +func (r *Registry) Upload( + ctx context.Context, blobID, blobPath string, size int64, _ bool, +) (*ocispec.Descriptor, error) { + // The `forcePush` option is useless for registry backend, because + // the blob existed in registry can't be pushed again. + + desc := blobDesc(size, blobID) + + blobFile, err := os.Open(blobPath) + if err != nil { + return nil, errors.Wrap(err, "Open blob file") + } + defer blobFile.Close() + + if err := r.remote.Push(ctx, desc, true, blobFile); err != nil { + return nil, errors.Wrap(err, "Push blob layer") + } + + return &desc, nil +} + +func (r *Registry) Finalize(_ bool) error { + return nil +} + +func (r *Registry) Check(_ string) (bool, error) { + return true, nil +} + +func (r *Registry) Type() Type { + return RegistryBackend +} + +func (r *Registry) Reader(_ string) (io.ReadCloser, error) { + panic("not implemented") +} + +func (r *Registry) Size(_ string) (int64, error) { + panic("not implemented") +} + +func newRegistryBackend(_ []byte, remote *remote.Remote) (Backend, error) { + return &Registry{remote: remote}, nil +} diff --git a/contrib/nydusify/pkg/backend/s3.go b/contrib/nydusify/pkg/backend/s3.go index e91af7750e1..7068662fd7c 100644 --- a/contrib/nydusify/pkg/backend/s3.go +++ b/contrib/nydusify/pkg/backend/s3.go @@ -1,188 +1,188 @@ -// Copyright 2022 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package backend - -import ( - "context" - "encoding/json" - "fmt" - "io" - "net/http" - "net/url" - "os" - "path" - "time" - - "github.com/aws/aws-sdk-go-v2/aws" - awshttp "github.com/aws/aws-sdk-go-v2/aws/transport/http" - awscfg "github.com/aws/aws-sdk-go-v2/config" - "github.com/aws/aws-sdk-go-v2/credentials" - "github.com/aws/aws-sdk-go-v2/feature/s3/manager" - "github.com/aws/aws-sdk-go-v2/service/s3" - "github.com/aws/aws-sdk-go-v2/service/s3/types" - ocispec "github.com/opencontainers/image-spec/specs-go/v1" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" -) - -type S3Backend struct { - // objectPrefix is the path prefix of the uploaded object. - // For example, if the blobID which should be uploaded is "abc", - // and the objectPrefix is "path/to/my-registry/", then the object key will be - // "path/to/my-registry/abc". - objectPrefix string - bucketName string - endpointWithScheme string - client *s3.Client -} - -type S3Config struct { - AccessKeyID string `json:"access_key_id,omitempty"` - AccessKeySecret string `json:"access_key_secret,omitempty"` - Endpoint string `json:"endpoint,omitempty"` - Scheme string `json:"scheme,omitempty"` - BucketName string `json:"bucket_name,omitempty"` - Region string `json:"region,omitempty"` - ObjectPrefix string `json:"object_prefix,omitempty"` -} - -func newS3Backend(rawConfig []byte) (*S3Backend, error) { - cfg := &S3Config{} - if err := json.Unmarshal(rawConfig, cfg); err != nil { - return nil, errors.Wrap(err, "parse S3 storage backend configuration") - } - if cfg.Endpoint == "" { - cfg.Endpoint = "s3.amazonaws.com" - } - if cfg.Scheme == "" { - cfg.Scheme = "https" - } - endpointWithScheme := fmt.Sprintf("%s://%s", cfg.Scheme, cfg.Endpoint) - - if cfg.BucketName == "" || cfg.Region == "" { - return nil, fmt.Errorf("invalid S3 configuration: missing 'bucket_name' or 'region'") - } - - s3AWSConfig, err := awscfg.LoadDefaultConfig(context.TODO()) - if err != nil { - return nil, errors.Wrap(err, "load default AWS config") - } - - client := s3.NewFromConfig(s3AWSConfig, func(o *s3.Options) { - o.BaseEndpoint = &endpointWithScheme - o.Region = cfg.Region - o.UsePathStyle = true - if len(cfg.AccessKeySecret) > 0 && len(cfg.AccessKeyID) > 0 { - o.Credentials = credentials.NewStaticCredentialsProvider(cfg.AccessKeyID, cfg.AccessKeySecret, "") - } - o.UsePathStyle = true - }) - - return &S3Backend{ - objectPrefix: cfg.ObjectPrefix, - bucketName: cfg.BucketName, - endpointWithScheme: endpointWithScheme, - client: client, - }, nil -} - -func (b *S3Backend) Upload(ctx context.Context, blobID, blobPath string, size int64, forcePush bool) (*ocispec.Descriptor, error) { - blobObjectKey := b.blobObjectKey(blobID) - - desc := blobDesc(size, blobID) - desc.URLs = append(desc.URLs, b.remoteID(blobObjectKey)) - - if !forcePush { - if exist, err := b.existObject(ctx, blobObjectKey); err != nil { - return nil, errors.Wrap(err, "check object existence") - } else if exist { - logrus.Infof("skip upload because blob exists: %s", blobID) - return &desc, nil - } - } - - start := time.Now() - - blobFile, err := os.Open(blobPath) - if err != nil { - return nil, errors.Wrap(err, "open blob file") - } - defer blobFile.Close() - - uploader := manager.NewUploader(b.client, func(u *manager.Uploader) { - u.PartSize = multipartChunkSize - }) - _, err = uploader.Upload(ctx, &s3.PutObjectInput{ - Bucket: aws.String(b.bucketName), - Key: aws.String(blobObjectKey), - Body: blobFile, - ChecksumAlgorithm: types.ChecksumAlgorithmCrc32, - }) - if err != nil { - return nil, errors.Wrap(err, "upload blob to s3 backend") - } - - logrus.Debugf("uploaded blob %s to s3 backend, costs %s", blobObjectKey, time.Since(start)) - - return &desc, nil -} - -func (b *S3Backend) Finalize(_ bool) error { - return nil -} - -func (b *S3Backend) Check(blobID string) (bool, error) { - return b.existObject(context.TODO(), b.blobObjectKey(blobID)) -} - -func (b *S3Backend) Type() Type { - return S3backend -} - -func (b *S3Backend) existObject(ctx context.Context, objectKey string) (bool, error) { - _, err := b.client.HeadObject(ctx, &s3.HeadObjectInput{ - Bucket: &b.bucketName, - Key: &objectKey, - }) - if err != nil { - var responseError *awshttp.ResponseError - if errors.As(err, &responseError) && responseError.ResponseError.HTTPStatusCode() == http.StatusNotFound { - return false, nil - } - return false, err - } - return true, nil -} - -func (b *S3Backend) blobObjectKey(blobID string) string { - return b.objectPrefix + blobID -} - -func (b *S3Backend) Reader(blobID string) (io.ReadCloser, error) { - objectKey := b.blobObjectKey(blobID) - output, err := b.client.GetObject(context.TODO(), &s3.GetObjectInput{ - Bucket: &b.bucketName, - Key: &objectKey, - }) - return output.Body, err -} - -func (b *S3Backend) Size(blobID string) (int64, error) { - objectKey := b.blobObjectKey(blobID) - output, err := b.client.GetObjectAttributes(context.TODO(), &s3.GetObjectAttributesInput{ - Bucket: &b.bucketName, - Key: &objectKey, - }) - if err != nil { - return 0, errors.Wrap(err, "get object size") - } - return *output.ObjectSize, nil -} - -func (b *S3Backend) remoteID(blobObjectKey string) string { - remoteURL, _ := url.Parse(b.endpointWithScheme) - remoteURL.Path = path.Join(remoteURL.Path, b.bucketName, blobObjectKey) - return remoteURL.String() -} +// Copyright 2022 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package backend + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "os" + "path" + "time" + + "github.com/aws/aws-sdk-go-v2/aws" + awshttp "github.com/aws/aws-sdk-go-v2/aws/transport/http" + awscfg "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/credentials" + "github.com/aws/aws-sdk-go-v2/feature/s3/manager" + "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/aws/aws-sdk-go-v2/service/s3/types" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +type S3Backend struct { + // objectPrefix is the path prefix of the uploaded object. + // For example, if the blobID which should be uploaded is "abc", + // and the objectPrefix is "path/to/my-registry/", then the object key will be + // "path/to/my-registry/abc". + objectPrefix string + bucketName string + endpointWithScheme string + client *s3.Client +} + +type S3Config struct { + AccessKeyID string `json:"access_key_id,omitempty"` + AccessKeySecret string `json:"access_key_secret,omitempty"` + Endpoint string `json:"endpoint,omitempty"` + Scheme string `json:"scheme,omitempty"` + BucketName string `json:"bucket_name,omitempty"` + Region string `json:"region,omitempty"` + ObjectPrefix string `json:"object_prefix,omitempty"` +} + +func newS3Backend(rawConfig []byte) (*S3Backend, error) { + cfg := &S3Config{} + if err := json.Unmarshal(rawConfig, cfg); err != nil { + return nil, errors.Wrap(err, "parse S3 storage backend configuration") + } + if cfg.Endpoint == "" { + cfg.Endpoint = "s3.amazonaws.com" + } + if cfg.Scheme == "" { + cfg.Scheme = "https" + } + endpointWithScheme := fmt.Sprintf("%s://%s", cfg.Scheme, cfg.Endpoint) + + if cfg.BucketName == "" || cfg.Region == "" { + return nil, fmt.Errorf("invalid S3 configuration: missing 'bucket_name' or 'region'") + } + + s3AWSConfig, err := awscfg.LoadDefaultConfig(context.TODO()) + if err != nil { + return nil, errors.Wrap(err, "load default AWS config") + } + + client := s3.NewFromConfig(s3AWSConfig, func(o *s3.Options) { + o.BaseEndpoint = &endpointWithScheme + o.Region = cfg.Region + o.UsePathStyle = true + if len(cfg.AccessKeySecret) > 0 && len(cfg.AccessKeyID) > 0 { + o.Credentials = credentials.NewStaticCredentialsProvider(cfg.AccessKeyID, cfg.AccessKeySecret, "") + } + o.UsePathStyle = true + }) + + return &S3Backend{ + objectPrefix: cfg.ObjectPrefix, + bucketName: cfg.BucketName, + endpointWithScheme: endpointWithScheme, + client: client, + }, nil +} + +func (b *S3Backend) Upload(ctx context.Context, blobID, blobPath string, size int64, forcePush bool) (*ocispec.Descriptor, error) { + blobObjectKey := b.blobObjectKey(blobID) + + desc := blobDesc(size, blobID) + desc.URLs = append(desc.URLs, b.remoteID(blobObjectKey)) + + if !forcePush { + if exist, err := b.existObject(ctx, blobObjectKey); err != nil { + return nil, errors.Wrap(err, "check object existence") + } else if exist { + logrus.Infof("skip upload because blob exists: %s", blobID) + return &desc, nil + } + } + + start := time.Now() + + blobFile, err := os.Open(blobPath) + if err != nil { + return nil, errors.Wrap(err, "open blob file") + } + defer blobFile.Close() + + uploader := manager.NewUploader(b.client, func(u *manager.Uploader) { + u.PartSize = multipartChunkSize + }) + _, err = uploader.Upload(ctx, &s3.PutObjectInput{ + Bucket: aws.String(b.bucketName), + Key: aws.String(blobObjectKey), + Body: blobFile, + ChecksumAlgorithm: types.ChecksumAlgorithmCrc32, + }) + if err != nil { + return nil, errors.Wrap(err, "upload blob to s3 backend") + } + + logrus.Debugf("uploaded blob %s to s3 backend, costs %s", blobObjectKey, time.Since(start)) + + return &desc, nil +} + +func (b *S3Backend) Finalize(_ bool) error { + return nil +} + +func (b *S3Backend) Check(blobID string) (bool, error) { + return b.existObject(context.TODO(), b.blobObjectKey(blobID)) +} + +func (b *S3Backend) Type() Type { + return S3backend +} + +func (b *S3Backend) existObject(ctx context.Context, objectKey string) (bool, error) { + _, err := b.client.HeadObject(ctx, &s3.HeadObjectInput{ + Bucket: &b.bucketName, + Key: &objectKey, + }) + if err != nil { + var responseError *awshttp.ResponseError + if errors.As(err, &responseError) && responseError.ResponseError.HTTPStatusCode() == http.StatusNotFound { + return false, nil + } + return false, err + } + return true, nil +} + +func (b *S3Backend) blobObjectKey(blobID string) string { + return b.objectPrefix + blobID +} + +func (b *S3Backend) Reader(blobID string) (io.ReadCloser, error) { + objectKey := b.blobObjectKey(blobID) + output, err := b.client.GetObject(context.TODO(), &s3.GetObjectInput{ + Bucket: &b.bucketName, + Key: &objectKey, + }) + return output.Body, err +} + +func (b *S3Backend) Size(blobID string) (int64, error) { + objectKey := b.blobObjectKey(blobID) + output, err := b.client.GetObjectAttributes(context.TODO(), &s3.GetObjectAttributesInput{ + Bucket: &b.bucketName, + Key: &objectKey, + }) + if err != nil { + return 0, errors.Wrap(err, "get object size") + } + return *output.ObjectSize, nil +} + +func (b *S3Backend) remoteID(blobObjectKey string) string { + remoteURL, _ := url.Parse(b.endpointWithScheme) + remoteURL.Path = path.Join(remoteURL.Path, b.bucketName, blobObjectKey) + return remoteURL.String() +} diff --git a/contrib/nydusify/pkg/backend/s3_test.go b/contrib/nydusify/pkg/backend/s3_test.go index b90a418932e..9fc76b2fbfa 100644 --- a/contrib/nydusify/pkg/backend/s3_test.go +++ b/contrib/nydusify/pkg/backend/s3_test.go @@ -1,119 +1,119 @@ -// Copyright 2023 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package backend - -import ( - "context" - "encoding/json" - "testing" - - "github.com/aws/aws-sdk-go-v2/credentials" - "github.com/stretchr/testify/require" -) - -func tempS3Backend() *S3Backend { - s3ConfigJSON := ` - { - "bucket_name": "test", - "endpoint": "s3.amazonaws.com", - "access_key_id": "testAK", - "access_key_secret": "testSK", - "object_prefix": "blob", - "scheme": "https", - "region": "region1" - }` - backend, _ := newS3Backend([]byte(s3ConfigJSON)) - return backend -} - -func TestS3RemoteID(t *testing.T) { - s3Backend := tempS3Backend() - id := s3Backend.remoteID("111") - require.Equal(t, "https://s3.amazonaws.com/test/111", id) -} - -func TestBlobObjectKey(t *testing.T) { - s3Backend := tempS3Backend() - blobObjectKey := s3Backend.blobObjectKey("111") - require.Equal(t, "blob111", blobObjectKey) -} - -func TestNewS3Backend(t *testing.T) { - s3ConfigJSON1 := ` - { - "bucket_name": "test", - "endpoint": "s3.amazonaws.com", - "access_key_id": "testAK", - "access_key_secret": "testSK", - "object_prefix": "blob", - "scheme": "https", - "region": "region1" - }` - require.True(t, json.Valid([]byte(s3ConfigJSON1))) - backend, err := newS3Backend([]byte(s3ConfigJSON1)) - require.NoError(t, err) - require.Equal(t, "blob", backend.objectPrefix) - require.Equal(t, "test", backend.bucketName) - require.Equal(t, "https://s3.amazonaws.com", backend.endpointWithScheme) - require.Equal(t, "https://s3.amazonaws.com", *backend.client.Options().BaseEndpoint) - testCredentials, err := backend.client.Options().Credentials.Retrieve(context.Background()) - require.NoError(t, err) - realCredentials, err := credentials.NewStaticCredentialsProvider("testAK", "testSK", "").Retrieve(context.Background()) - require.NoError(t, err) - require.Equal(t, testCredentials, realCredentials) - - s3ConfigJSON2 := ` - { - "bucket_name": "test", - "endpoint": "s3.amazonaws.com", - "access_key_id": "testAK", - "access_key_secret": "testSK", - "object_prefix": "blob", - "scheme": "https", - "region": "region1", - }` - backend, err = newS3Backend([]byte(s3ConfigJSON2)) - require.Error(t, err) - require.Contains(t, err.Error(), "parse S3 storage backend configuration") - require.Nil(t, backend) - - s3ConfigJSON3 := ` - { - "bucket_name": "test", - "endpoint": "", - "access_key_id": "testAK", - "access_key_secret": "testSK", - "object_prefix": "blob", - "scheme": "", - "region": "region1" - }` - require.True(t, json.Valid([]byte(s3ConfigJSON3))) - backend, err = newS3Backend([]byte(s3ConfigJSON3)) - require.NoError(t, err) - require.Equal(t, "blob", backend.objectPrefix) - require.Equal(t, "test", backend.bucketName) - require.Equal(t, "https://s3.amazonaws.com", backend.endpointWithScheme) - testCredentials, err = backend.client.Options().Credentials.Retrieve(context.Background()) - require.NoError(t, err) - realCredentials, err = credentials.NewStaticCredentialsProvider("testAK", "testSK", "").Retrieve(context.Background()) - require.NoError(t, err) - require.Equal(t, testCredentials, realCredentials) - - s3ConfigJSON4 := ` - { - "bucket_name": "", - "endpoint": "s3.amazonaws.com", - "access_key_id": "testAK", - "access_key_secret": "testSK", - "object_prefix": "blob", - "scheme": "https", - "region": "" - }` - require.True(t, json.Valid([]byte(s3ConfigJSON4))) - backend, err = newS3Backend([]byte(s3ConfigJSON4)) - require.Error(t, err) - require.Contains(t, err.Error(), "invalid S3 configuration: missing 'bucket_name' or 'region'") - require.Nil(t, backend) -} +// Copyright 2023 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package backend + +import ( + "context" + "encoding/json" + "testing" + + "github.com/aws/aws-sdk-go-v2/credentials" + "github.com/stretchr/testify/require" +) + +func tempS3Backend() *S3Backend { + s3ConfigJSON := ` + { + "bucket_name": "test", + "endpoint": "s3.amazonaws.com", + "access_key_id": "testAK", + "access_key_secret": "testSK", + "object_prefix": "blob", + "scheme": "https", + "region": "region1" + }` + backend, _ := newS3Backend([]byte(s3ConfigJSON)) + return backend +} + +func TestS3RemoteID(t *testing.T) { + s3Backend := tempS3Backend() + id := s3Backend.remoteID("111") + require.Equal(t, "https://s3.amazonaws.com/test/111", id) +} + +func TestBlobObjectKey(t *testing.T) { + s3Backend := tempS3Backend() + blobObjectKey := s3Backend.blobObjectKey("111") + require.Equal(t, "blob111", blobObjectKey) +} + +func TestNewS3Backend(t *testing.T) { + s3ConfigJSON1 := ` + { + "bucket_name": "test", + "endpoint": "s3.amazonaws.com", + "access_key_id": "testAK", + "access_key_secret": "testSK", + "object_prefix": "blob", + "scheme": "https", + "region": "region1" + }` + require.True(t, json.Valid([]byte(s3ConfigJSON1))) + backend, err := newS3Backend([]byte(s3ConfigJSON1)) + require.NoError(t, err) + require.Equal(t, "blob", backend.objectPrefix) + require.Equal(t, "test", backend.bucketName) + require.Equal(t, "https://s3.amazonaws.com", backend.endpointWithScheme) + require.Equal(t, "https://s3.amazonaws.com", *backend.client.Options().BaseEndpoint) + testCredentials, err := backend.client.Options().Credentials.Retrieve(context.Background()) + require.NoError(t, err) + realCredentials, err := credentials.NewStaticCredentialsProvider("testAK", "testSK", "").Retrieve(context.Background()) + require.NoError(t, err) + require.Equal(t, testCredentials, realCredentials) + + s3ConfigJSON2 := ` + { + "bucket_name": "test", + "endpoint": "s3.amazonaws.com", + "access_key_id": "testAK", + "access_key_secret": "testSK", + "object_prefix": "blob", + "scheme": "https", + "region": "region1", + }` + backend, err = newS3Backend([]byte(s3ConfigJSON2)) + require.Error(t, err) + require.Contains(t, err.Error(), "parse S3 storage backend configuration") + require.Nil(t, backend) + + s3ConfigJSON3 := ` + { + "bucket_name": "test", + "endpoint": "", + "access_key_id": "testAK", + "access_key_secret": "testSK", + "object_prefix": "blob", + "scheme": "", + "region": "region1" + }` + require.True(t, json.Valid([]byte(s3ConfigJSON3))) + backend, err = newS3Backend([]byte(s3ConfigJSON3)) + require.NoError(t, err) + require.Equal(t, "blob", backend.objectPrefix) + require.Equal(t, "test", backend.bucketName) + require.Equal(t, "https://s3.amazonaws.com", backend.endpointWithScheme) + testCredentials, err = backend.client.Options().Credentials.Retrieve(context.Background()) + require.NoError(t, err) + realCredentials, err = credentials.NewStaticCredentialsProvider("testAK", "testSK", "").Retrieve(context.Background()) + require.NoError(t, err) + require.Equal(t, testCredentials, realCredentials) + + s3ConfigJSON4 := ` + { + "bucket_name": "", + "endpoint": "s3.amazonaws.com", + "access_key_id": "testAK", + "access_key_secret": "testSK", + "object_prefix": "blob", + "scheme": "https", + "region": "" + }` + require.True(t, json.Valid([]byte(s3ConfigJSON4))) + backend, err = newS3Backend([]byte(s3ConfigJSON4)) + require.Error(t, err) + require.Contains(t, err.Error(), "invalid S3 configuration: missing 'bucket_name' or 'region'") + require.Nil(t, backend) +} diff --git a/contrib/nydusify/pkg/build/builder.go b/contrib/nydusify/pkg/build/builder.go index 177c0b9a209..ecad85ce285 100644 --- a/contrib/nydusify/pkg/build/builder.go +++ b/contrib/nydusify/pkg/build/builder.go @@ -1,172 +1,172 @@ -// Copyright 2023 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package build - -import ( - "io" - "os" - "os/exec" - "strings" - - "github.com/sirupsen/logrus" -) - -type BuilderOption struct { - ParentBootstrapPath string - ChunkDict string - BootstrapPath string - RootfsPath string - BackendType string - BackendConfig string - WhiteoutSpec string - OutputJSONPath string - PrefetchPatterns string - // A regular file or fifo into which commands nydus-image to dump contents. - BlobPath string - AlignedChunk bool - Compressor string - ChunkSize string - FsVersion string -} - -type CompactOption struct { - ChunkDict string - BootstrapPath string - OutputBootstrapPath string - BackendType string - BackendConfigPath string - OutputJSONPath string - CompactConfigPath string -} - -type GenerateOption struct { - BootstrapPaths []string - DatabasePath string - ChunkdictBootstrapPath string - OutputPath string -} - -type Builder struct { - binaryPath string - stdout io.Writer - stderr io.Writer -} - -func NewBuilder(binaryPath string) *Builder { - return &Builder{ - binaryPath: binaryPath, - stdout: os.Stdout, - stderr: os.Stderr, - } -} - -func (builder *Builder) run(args []string, prefetchPatterns string) error { - logrus.Debugf("\tCommand: %s %s", builder.binaryPath, strings.Join(args[:], " ")) - - cmd := exec.Command(builder.binaryPath, args...) - cmd.Stdout = builder.stdout - cmd.Stderr = builder.stderr - cmd.Stdin = strings.NewReader(prefetchPatterns) - - if err := cmd.Run(); err != nil { - logrus.WithError(err).Errorf("fail to run %v %+v", builder.binaryPath, args) - return err - } - - return nil -} - -func (builder *Builder) Compact(option CompactOption) error { - args := []string{ - "compact", - "--bootstrap", option.BootstrapPath, - "--config", option.CompactConfigPath, - "--backend-type", option.BackendType, - "--backend-config-file", option.BackendConfigPath, - "--log-level", "info", - "--output-json", option.OutputJSONPath, - } - if option.OutputBootstrapPath != "" { - args = append(args, "--output-bootstrap", option.OutputBootstrapPath) - } - if option.ChunkDict != "" { - args = append(args, "--chunk-dict", option.ChunkDict) - } - return builder.run(args, "") -} - -// Run exec nydus-image CLI to build layer -func (builder *Builder) Run(option BuilderOption) error { - var args []string - if option.ParentBootstrapPath == "" { - args = []string{ - "create", - } - } else { - args = []string{ - "create", - "--parent-bootstrap", - option.ParentBootstrapPath, - } - } - if option.AlignedChunk { - args = append(args, "--aligned-chunk") - } - if option.ChunkDict != "" { - args = append(args, "--chunk-dict", option.ChunkDict) - } - - args = append( - args, - "--bootstrap", - option.BootstrapPath, - "--log-level", - "warn", - "--whiteout-spec", - option.WhiteoutSpec, - "--output-json", - option.OutputJSONPath, - "--blob", - option.BlobPath, - "--fs-version", - option.FsVersion, - ) - - if option.Compressor != "" { - args = append(args, "--compressor", option.Compressor) - } - - if len(option.PrefetchPatterns) > 0 { - args = append(args, "--prefetch-policy", "fs") - } - - if option.ChunkSize != "" { - args = append(args, "--chunk-size", option.ChunkSize) - } - - args = append(args, option.RootfsPath) - - return builder.run(args, option.PrefetchPatterns) -} - -// Generate calls `nydus-image chunkdict generate` to get chunkdict -func (builder *Builder) Generate(option GenerateOption) error { - logrus.Infof("Invoking 'nydus-image chunkdict generate' command") - args := []string{ - "chunkdict", - "generate", - "--log-level", - "warn", - "--bootstrap", - option.ChunkdictBootstrapPath, - "--database", - option.DatabasePath, - "--output-json", - option.OutputPath, - } - args = append(args, option.BootstrapPaths...) - - return builder.run(args, "") -} +// Copyright 2023 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package build + +import ( + "io" + "os" + "os/exec" + "strings" + + "github.com/sirupsen/logrus" +) + +type BuilderOption struct { + ParentBootstrapPath string + ChunkDict string + BootstrapPath string + RootfsPath string + BackendType string + BackendConfig string + WhiteoutSpec string + OutputJSONPath string + PrefetchPatterns string + // A regular file or fifo into which commands nydus-image to dump contents. + BlobPath string + AlignedChunk bool + Compressor string + ChunkSize string + FsVersion string +} + +type CompactOption struct { + ChunkDict string + BootstrapPath string + OutputBootstrapPath string + BackendType string + BackendConfigPath string + OutputJSONPath string + CompactConfigPath string +} + +type GenerateOption struct { + BootstrapPaths []string + DatabasePath string + ChunkdictBootstrapPath string + OutputPath string +} + +type Builder struct { + binaryPath string + stdout io.Writer + stderr io.Writer +} + +func NewBuilder(binaryPath string) *Builder { + return &Builder{ + binaryPath: binaryPath, + stdout: os.Stdout, + stderr: os.Stderr, + } +} + +func (builder *Builder) run(args []string, prefetchPatterns string) error { + logrus.Debugf("\tCommand: %s %s", builder.binaryPath, strings.Join(args[:], " ")) + + cmd := exec.Command(builder.binaryPath, args...) + cmd.Stdout = builder.stdout + cmd.Stderr = builder.stderr + cmd.Stdin = strings.NewReader(prefetchPatterns) + + if err := cmd.Run(); err != nil { + logrus.WithError(err).Errorf("fail to run %v %+v", builder.binaryPath, args) + return err + } + + return nil +} + +func (builder *Builder) Compact(option CompactOption) error { + args := []string{ + "compact", + "--bootstrap", option.BootstrapPath, + "--config", option.CompactConfigPath, + "--backend-type", option.BackendType, + "--backend-config-file", option.BackendConfigPath, + "--log-level", "info", + "--output-json", option.OutputJSONPath, + } + if option.OutputBootstrapPath != "" { + args = append(args, "--output-bootstrap", option.OutputBootstrapPath) + } + if option.ChunkDict != "" { + args = append(args, "--chunk-dict", option.ChunkDict) + } + return builder.run(args, "") +} + +// Run exec nydus-image CLI to build layer +func (builder *Builder) Run(option BuilderOption) error { + var args []string + if option.ParentBootstrapPath == "" { + args = []string{ + "create", + } + } else { + args = []string{ + "create", + "--parent-bootstrap", + option.ParentBootstrapPath, + } + } + if option.AlignedChunk { + args = append(args, "--aligned-chunk") + } + if option.ChunkDict != "" { + args = append(args, "--chunk-dict", option.ChunkDict) + } + + args = append( + args, + "--bootstrap", + option.BootstrapPath, + "--log-level", + "warn", + "--whiteout-spec", + option.WhiteoutSpec, + "--output-json", + option.OutputJSONPath, + "--blob", + option.BlobPath, + "--fs-version", + option.FsVersion, + ) + + if option.Compressor != "" { + args = append(args, "--compressor", option.Compressor) + } + + if len(option.PrefetchPatterns) > 0 { + args = append(args, "--prefetch-policy", "fs") + } + + if option.ChunkSize != "" { + args = append(args, "--chunk-size", option.ChunkSize) + } + + args = append(args, option.RootfsPath) + + return builder.run(args, option.PrefetchPatterns) +} + +// Generate calls `nydus-image chunkdict generate` to get chunkdict +func (builder *Builder) Generate(option GenerateOption) error { + logrus.Infof("Invoking 'nydus-image chunkdict generate' command") + args := []string{ + "chunkdict", + "generate", + "--log-level", + "warn", + "--bootstrap", + option.ChunkdictBootstrapPath, + "--database", + option.DatabasePath, + "--output-json", + option.OutputPath, + } + args = append(args, option.BootstrapPaths...) + + return builder.run(args, "") +} diff --git a/contrib/nydusify/pkg/build/workflow.go b/contrib/nydusify/pkg/build/workflow.go index 35dc855cde9..d30b49ae419 100644 --- a/contrib/nydusify/pkg/build/workflow.go +++ b/contrib/nydusify/pkg/build/workflow.go @@ -1,169 +1,169 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package build - -import ( - "encoding/json" - "fmt" - "os" - "path/filepath" - - "github.com/google/uuid" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" -) - -type WorkflowOption struct { - ChunkDict string - TargetDir string - NydusImagePath string - PrefetchPatterns string - FsVersion string - Compressor string - ChunkSize string -} - -type Workflow struct { - WorkflowOption - BuilderVersion string - bootstrapPath string - blobsDir string - backendConfig string - parentBootstrapPath string - builder *Builder - lastBlobID string -} - -type debugJSON struct { - Version string - Blobs []string -} - -// Dump output json file of every layer to $workdir/bootstraps directory -// for debug or perf analysis purpose -func (workflow *Workflow) buildOutputJSONPath() string { - return workflow.bootstrapPath + "-output.json" -} - -// Get latest built blob from blobs directory -func (workflow *Workflow) getLatestBlobPath() (string, error) { - var data debugJSON - jsonBytes, err := os.ReadFile(workflow.buildOutputJSONPath()) - if err != nil { - return "", err - } - if err := json.Unmarshal(jsonBytes, &data); err != nil { - return "", err - } - blobIDs := data.Blobs - - // Record builder version of current build environment for easy - // debugging and troubleshooting afterwards. - workflow.BuilderVersion = data.Version - - if len(blobIDs) == 0 { - return "", nil - } - - latestBlobID := blobIDs[len(blobIDs)-1] - if latestBlobID != workflow.lastBlobID { - workflow.lastBlobID = latestBlobID - blobPath := filepath.Join(workflow.blobsDir, latestBlobID) - return blobPath, nil - } - - return "", nil -} - -// NewWorkflow prepare bootstrap and blobs path for layered build workflow -func NewWorkflow(option WorkflowOption) (*Workflow, error) { - blobsDir := filepath.Join(option.TargetDir, "blobs") - if err := os.RemoveAll(blobsDir); err != nil { - return nil, errors.Wrap(err, "Remove blob directory") - } - if err := os.MkdirAll(blobsDir, 0755); err != nil { - return nil, errors.Wrap(err, "Create blob directory") - } - - backendConfig := fmt.Sprintf(`{"dir": "%s"}`, blobsDir) - builder := NewBuilder(option.NydusImagePath) - - return &Workflow{ - WorkflowOption: option, - blobsDir: blobsDir, - backendConfig: backendConfig, - builder: builder, - }, nil -} - -// Build nydus bootstrap and blob, returned blobPath's basename is sha256 hex string -func (workflow *Workflow) Build( - layerDir, whiteoutSpec, parentBootstrapPath, bootstrapPath string, alignedChunk bool, -) (string, error) { - workflow.bootstrapPath = bootstrapPath - - if parentBootstrapPath != "" { - workflow.parentBootstrapPath = parentBootstrapPath - } - - blobPath := filepath.Join(workflow.blobsDir, uuid.NewString()) - - if err := workflow.builder.Run(BuilderOption{ - ParentBootstrapPath: workflow.parentBootstrapPath, - BootstrapPath: workflow.bootstrapPath, - RootfsPath: layerDir, - PrefetchPatterns: workflow.PrefetchPatterns, - WhiteoutSpec: whiteoutSpec, - OutputJSONPath: workflow.buildOutputJSONPath(), - BlobPath: blobPath, - AlignedChunk: alignedChunk, - ChunkDict: workflow.ChunkDict, - FsVersion: workflow.FsVersion, - Compressor: workflow.Compressor, - ChunkSize: workflow.ChunkSize, - }); err != nil { - return "", errors.Wrapf(err, "build layer %s", layerDir) - } - - workflow.parentBootstrapPath = workflow.bootstrapPath - - digestedBlobPath, err := workflow.getLatestBlobPath() - if err != nil { - return "", errors.Wrap(err, "get latest blob") - } - - logrus.Debugf("original: %s. digested: %s", blobPath, digestedBlobPath) - - // Ignore the empty blob file generated by this build. - blobInfo, err := os.Stat(blobPath) - if err != nil { - if os.IsNotExist(err) { - return "", nil - } - return "", err - } - if blobInfo.Size() == 0 { - return "", nil - } - - // Rename the newly generated blob to its sha256 digest. - // Because the flow will use the basename as the blob object to be pushed to registry. - // When `digestedBlobPath` is void, this layer's bootstrap can be pushed meanwhile not for blob - if digestedBlobPath != "" { - err = os.Rename(blobPath, digestedBlobPath) - // It's possible that two blobs that are built with the same digest. - // It's not fatal during image creation since rafs can access exactly - // what it wants since the two are the same, though registry only have - // one blob corresponding to two layers. - if err != nil && err != os.ErrExist { - return "", err - } else if err == os.ErrExist { - logrus.Warnf("Same blob %s are generated", digestedBlobPath) - return "", nil - } - } - - return digestedBlobPath, nil -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package build + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + + "github.com/google/uuid" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +type WorkflowOption struct { + ChunkDict string + TargetDir string + NydusImagePath string + PrefetchPatterns string + FsVersion string + Compressor string + ChunkSize string +} + +type Workflow struct { + WorkflowOption + BuilderVersion string + bootstrapPath string + blobsDir string + backendConfig string + parentBootstrapPath string + builder *Builder + lastBlobID string +} + +type debugJSON struct { + Version string + Blobs []string +} + +// Dump output json file of every layer to $workdir/bootstraps directory +// for debug or perf analysis purpose +func (workflow *Workflow) buildOutputJSONPath() string { + return workflow.bootstrapPath + "-output.json" +} + +// Get latest built blob from blobs directory +func (workflow *Workflow) getLatestBlobPath() (string, error) { + var data debugJSON + jsonBytes, err := os.ReadFile(workflow.buildOutputJSONPath()) + if err != nil { + return "", err + } + if err := json.Unmarshal(jsonBytes, &data); err != nil { + return "", err + } + blobIDs := data.Blobs + + // Record builder version of current build environment for easy + // debugging and troubleshooting afterwards. + workflow.BuilderVersion = data.Version + + if len(blobIDs) == 0 { + return "", nil + } + + latestBlobID := blobIDs[len(blobIDs)-1] + if latestBlobID != workflow.lastBlobID { + workflow.lastBlobID = latestBlobID + blobPath := filepath.Join(workflow.blobsDir, latestBlobID) + return blobPath, nil + } + + return "", nil +} + +// NewWorkflow prepare bootstrap and blobs path for layered build workflow +func NewWorkflow(option WorkflowOption) (*Workflow, error) { + blobsDir := filepath.Join(option.TargetDir, "blobs") + if err := os.RemoveAll(blobsDir); err != nil { + return nil, errors.Wrap(err, "Remove blob directory") + } + if err := os.MkdirAll(blobsDir, 0755); err != nil { + return nil, errors.Wrap(err, "Create blob directory") + } + + backendConfig := fmt.Sprintf(`{"dir": "%s"}`, blobsDir) + builder := NewBuilder(option.NydusImagePath) + + return &Workflow{ + WorkflowOption: option, + blobsDir: blobsDir, + backendConfig: backendConfig, + builder: builder, + }, nil +} + +// Build nydus bootstrap and blob, returned blobPath's basename is sha256 hex string +func (workflow *Workflow) Build( + layerDir, whiteoutSpec, parentBootstrapPath, bootstrapPath string, alignedChunk bool, +) (string, error) { + workflow.bootstrapPath = bootstrapPath + + if parentBootstrapPath != "" { + workflow.parentBootstrapPath = parentBootstrapPath + } + + blobPath := filepath.Join(workflow.blobsDir, uuid.NewString()) + + if err := workflow.builder.Run(BuilderOption{ + ParentBootstrapPath: workflow.parentBootstrapPath, + BootstrapPath: workflow.bootstrapPath, + RootfsPath: layerDir, + PrefetchPatterns: workflow.PrefetchPatterns, + WhiteoutSpec: whiteoutSpec, + OutputJSONPath: workflow.buildOutputJSONPath(), + BlobPath: blobPath, + AlignedChunk: alignedChunk, + ChunkDict: workflow.ChunkDict, + FsVersion: workflow.FsVersion, + Compressor: workflow.Compressor, + ChunkSize: workflow.ChunkSize, + }); err != nil { + return "", errors.Wrapf(err, "build layer %s", layerDir) + } + + workflow.parentBootstrapPath = workflow.bootstrapPath + + digestedBlobPath, err := workflow.getLatestBlobPath() + if err != nil { + return "", errors.Wrap(err, "get latest blob") + } + + logrus.Debugf("original: %s. digested: %s", blobPath, digestedBlobPath) + + // Ignore the empty blob file generated by this build. + blobInfo, err := os.Stat(blobPath) + if err != nil { + if os.IsNotExist(err) { + return "", nil + } + return "", err + } + if blobInfo.Size() == 0 { + return "", nil + } + + // Rename the newly generated blob to its sha256 digest. + // Because the flow will use the basename as the blob object to be pushed to registry. + // When `digestedBlobPath` is void, this layer's bootstrap can be pushed meanwhile not for blob + if digestedBlobPath != "" { + err = os.Rename(blobPath, digestedBlobPath) + // It's possible that two blobs that are built with the same digest. + // It's not fatal during image creation since rafs can access exactly + // what it wants since the two are the same, though registry only have + // one blob corresponding to two layers. + if err != nil && err != os.ErrExist { + return "", err + } else if err == os.ErrExist { + logrus.Warnf("Same blob %s are generated", digestedBlobPath) + return "", nil + } + } + + return digestedBlobPath, nil +} diff --git a/contrib/nydusify/pkg/cache/cache.go b/contrib/nydusify/pkg/cache/cache.go index 445442b5b87..98b007f6d78 100644 --- a/contrib/nydusify/pkg/cache/cache.go +++ b/contrib/nydusify/pkg/cache/cache.go @@ -1,567 +1,567 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package cache - -import ( - "bytes" - "context" - "encoding/json" - "fmt" - "io" - "strconv" - - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/backend" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/remote" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" - "github.com/sirupsen/logrus" - - "github.com/containerd/containerd/images" - digest "github.com/opencontainers/go-digest" - "github.com/opencontainers/image-spec/specs-go" - ocispec "github.com/opencontainers/image-spec/specs-go/v1" - "github.com/pkg/errors" -) - -// Opt configures Nydus cache -type Opt struct { - // Maximum records(bootstrap layer + blob layer) in cache image. - MaxRecords uint - // Version of cache image, we need to discard cache layers when - // the required version (specified by `--build-cache-version`) - // is unmatched with the cache image version, for example nydus - // bootstrap format has a minor upgrade. - Version string - // Bootstrap's RAFS version of cache image, we need to discard cache - // layers when the required version (specified by `--fs-version`) is - // unmatched with the fs version recorded in cache image, for example - // we can't use rafs v5 cache layers for rafs v6 image. - FsVersion string - // Make cache image manifest compatible with the docker v2 media - // type defined in github.com/containerd/containerd/images. - DockerV2Format bool - // The blob layer record will not be written to cache image if - // the backend be specified, because the blob layer will be uploaded - // to backend. - Backend backend.Backend -} - -// Cache creates an image to store cache records in its image manifest, -// every record presents the relationship like: -// -// source_layer_chainid -> (nydus_blob_layer_digest, nydus_bootstrap_layer_digest) -// If the converter hits cache record during build source layer, we can -// skip the layer building, see cache image example: examples/manifest/cache_manifest.json. -// -// Here is the build cache workflow: -// 1. Import cache records from registry; -// 2. Check cache record using source layer ChainID before layer build, -// skip layer build if the cache hit; -// 3. Export new cache records to registry; -type Cache struct { - opt Opt - // Remote is responsible for pulling & pushing cache image - remote *remote.Remote - // Records referenced - referenceRecords map[digest.Digest]*Record - // Records pulled from registry - pulledRecords map[digest.Digest]*Record - // Records to be push to registry - pushedRecords []*Record -} - -// New creates Nydus cache instance, -func New(remote *remote.Remote, opt Opt) (*Cache, error) { - cache := &Cache{ - opt: opt, - remote: remote, - // source_layer_chain_id -> cache_record - pulledRecords: make(map[digest.Digest]*Record), - referenceRecords: make(map[digest.Digest]*Record), - pushedRecords: []*Record{}, - } - - return cache, nil -} - -func (cacheRecord *Record) GetReferenceBlobs() []string { - listStr := cacheRecord.NydusBootstrapDesc.Annotations[utils.LayerAnnotationNydusReferenceBlobIDs] - if listStr == "" { - return []string{} - } - var blobs []string - if err := json.Unmarshal([]byte(listStr), &blobs); err != nil { - return []string{} - } - return blobs -} - -func (cache *Cache) GetReference(d digest.Digest) *Record { - r, ok := cache.referenceRecords[d] - if !ok { - return nil - } - return r -} - -func (cache *Cache) SetReference(layer *ocispec.Descriptor) { - record := cache.layerToRecord(layer) - cache.referenceRecords[layer.Digest] = record -} - -func (cache *Cache) recordToLayer(record *Record) (*ocispec.Descriptor, *ocispec.Descriptor) { - // Handle referenced nydus data blob - if record.SourceChainID == "" { - if record.NydusBlobDesc != nil { - if cache.opt.Backend.Type() == backend.RegistryBackend { - return nil, &ocispec.Descriptor{ - MediaType: utils.MediaTypeNydusBlob, - Digest: record.NydusBlobDesc.Digest, - Size: record.NydusBlobDesc.Size, - Annotations: map[string]string{ - utils.LayerAnnotationNydusBlob: "true", - }, - } - } - } - return nil, nil - } - - bootstrapCacheMediaType := ocispec.MediaTypeImageLayerGzip - if cache.opt.DockerV2Format { - bootstrapCacheMediaType = images.MediaTypeDockerSchema2LayerGzip - } - bootstrapCacheDesc := &ocispec.Descriptor{ - MediaType: bootstrapCacheMediaType, - Digest: record.NydusBootstrapDesc.Digest, - Size: record.NydusBootstrapDesc.Size, - Annotations: map[string]string{ - utils.LayerAnnotationNydusBootstrap: "true", - utils.LayerAnnotationNydusFsVersion: cache.opt.FsVersion, - utils.LayerAnnotationNydusSourceChainID: record.SourceChainID.String(), - // Use the annotation to record bootstrap layer DiffID. - utils.LayerAnnotationUncompressed: record.NydusBootstrapDiffID.String(), - }, - } - if referenceBlobsStr, ok := record.NydusBootstrapDesc.Annotations[utils.LayerAnnotationNydusReferenceBlobIDs]; ok { - bootstrapCacheDesc.Annotations[utils.LayerAnnotationNydusReferenceBlobIDs] = referenceBlobsStr - } - - var blobCacheDesc *ocispec.Descriptor - if record.NydusBlobDesc != nil { - // Record blob layer to cache image if the blob be pushed - // to registry instead of storage backend. - if cache.opt.Backend.Type() == backend.RegistryBackend { - blobCacheDesc = &ocispec.Descriptor{ - MediaType: utils.MediaTypeNydusBlob, - Digest: record.NydusBlobDesc.Digest, - Size: record.NydusBlobDesc.Size, - Annotations: map[string]string{ - utils.LayerAnnotationNydusBlob: "true", - utils.LayerAnnotationNydusSourceChainID: record.SourceChainID.String(), - }, - } - } else { - bootstrapCacheDesc.Annotations[utils.LayerAnnotationNydusBlobDigest] = record.NydusBlobDesc.Digest.String() - bootstrapCacheDesc.Annotations[utils.LayerAnnotationNydusBlobSize] = strconv.FormatInt(record.NydusBlobDesc.Size, 10) - } - } - - return bootstrapCacheDesc, blobCacheDesc -} - -func (cache *Cache) exportRecordsToLayers() []ocispec.Descriptor { - var ( - layers []ocispec.Descriptor - referenceLayers []ocispec.Descriptor - ) - - for _, record := range cache.pushedRecords { - referenceBlobIDs := record.GetReferenceBlobs() - for _, blobID := range referenceBlobIDs { - // for oss backend, GetReference always return nil - // for registry backend, GetReference should not return nil - referenceRecord := cache.GetReference(digest.NewDigestFromEncoded(digest.SHA256, blobID)) - if referenceRecord != nil { - _, blobDesc := cache.recordToLayer(referenceRecord) - referenceLayers = append(referenceLayers, *blobDesc) - } - } - bootstrapCacheDesc, blobCacheDesc := cache.recordToLayer(record) - layers = append(layers, *bootstrapCacheDesc) - if blobCacheDesc != nil { - layers = append(layers, *blobCacheDesc) - } - } - - return append(referenceLayers, layers...) -} - -func (cache *Cache) layerToRecord(layer *ocispec.Descriptor) *Record { - sourceChainIDStr, ok := layer.Annotations[utils.LayerAnnotationNydusSourceChainID] - if !ok { - if layer.Annotations[utils.LayerAnnotationNydusBlob] == "true" { - // for reference blob layers - return &Record{ - NydusBlobDesc: &ocispec.Descriptor{ - MediaType: layer.MediaType, - Digest: layer.Digest, - Size: layer.Size, - Annotations: map[string]string{ - utils.LayerAnnotationNydusBlob: "true", - }, - }, - } - } - return nil - } - sourceChainID := digest.Digest(sourceChainIDStr) - if sourceChainID.Validate() != nil { - return nil - } - if layer.Annotations == nil { - return nil - } - - // Handle bootstrap cache layer - if layer.Annotations[utils.LayerAnnotationNydusBootstrap] == "true" { - uncompressedDigestStr := layer.Annotations[utils.LayerAnnotationUncompressed] - if uncompressedDigestStr == "" { - return nil - } - bootstrapDiffID := digest.Digest(uncompressedDigestStr) - if bootstrapDiffID.Validate() != nil { - return nil - } - bootstrapDesc := ocispec.Descriptor{ - MediaType: layer.MediaType, - Digest: layer.Digest, - Size: layer.Size, - Annotations: map[string]string{ - utils.LayerAnnotationNydusBootstrap: "true", - utils.LayerAnnotationNydusFsVersion: cache.opt.FsVersion, - utils.LayerAnnotationUncompressed: uncompressedDigestStr, - }, - } - referenceBlobsStr := layer.Annotations[utils.LayerAnnotationNydusReferenceBlobIDs] - if referenceBlobsStr != "" { - bootstrapDesc.Annotations[utils.LayerAnnotationNydusReferenceBlobIDs] = referenceBlobsStr - } - var nydusBlobDesc *ocispec.Descriptor - if layer.Annotations[utils.LayerAnnotationNydusBlobDigest] != "" && - layer.Annotations[utils.LayerAnnotationNydusBlobSize] != "" { - blobDigest := digest.Digest(layer.Annotations[utils.LayerAnnotationNydusBlobDigest]) - if blobDigest.Validate() != nil { - return nil - } - blobSize, err := strconv.ParseInt(layer.Annotations[utils.LayerAnnotationNydusBlobSize], 10, 64) - if err != nil { - return nil - } - nydusBlobDesc = &ocispec.Descriptor{ - MediaType: utils.MediaTypeNydusBlob, - Digest: blobDigest, - Size: blobSize, - Annotations: map[string]string{ - utils.LayerAnnotationNydusBlob: "true", - }, - } - } - return &Record{ - SourceChainID: sourceChainID, - NydusBootstrapDesc: &bootstrapDesc, - NydusBlobDesc: nydusBlobDesc, - NydusBootstrapDiffID: bootstrapDiffID, - } - } - - // Handle blob cache layer - if layer.Annotations[utils.LayerAnnotationNydusBlob] == "true" { - nydusBlobDesc := &ocispec.Descriptor{ - MediaType: layer.MediaType, - Digest: layer.Digest, - Size: layer.Size, - Annotations: map[string]string{ - utils.LayerAnnotationNydusBlob: "true", - }, - } - return &Record{ - SourceChainID: sourceChainID, - NydusBlobDesc: nydusBlobDesc, - } - } - - return nil -} - -func mergeRecord(old, new *Record) *Record { - if old == nil { - old = &Record{ - SourceChainID: new.SourceChainID, - } - } - - if new.NydusBootstrapDesc != nil { - old.NydusBootstrapDesc = new.NydusBootstrapDesc - old.NydusBootstrapDiffID = new.NydusBootstrapDiffID - } - - if new.NydusBlobDesc != nil { - old.NydusBlobDesc = new.NydusBlobDesc - } - - return old -} - -func (cache *Cache) importRecordsFromLayers(layers []ocispec.Descriptor) { - pulledRecords := make(map[digest.Digest]*Record) - referenceRecords := make(map[digest.Digest]*Record) - pushedRecords := []*Record{} - - for _, layer := range layers { - record := cache.layerToRecord(&layer) - if record != nil { - if record.SourceChainID == "" { - referenceRecords[record.NydusBlobDesc.Digest] = record - logrus.Infof("Found reference blob layer %s", record.NydusBlobDesc.Digest) - } else { - // Merge bootstrap and related blob layer to record - newRecord := mergeRecord( - pulledRecords[record.SourceChainID], - record, - ) - pulledRecords[record.SourceChainID] = newRecord - pushedRecords = append(pushedRecords, newRecord) - } - } else { - logrus.Warnf("Strange! Build cache layer can't produce a valid record. %s", layer.Digest) - } - } - - cache.pulledRecords = pulledRecords - cache.pushedRecords = pushedRecords - cache.referenceRecords = referenceRecords -} - -// Export pushes cache manifest index to remote registry -func (cache *Cache) Export(ctx context.Context) error { - if len(cache.pushedRecords) == 0 { - return nil - } - - layers := cache.exportRecordsToLayers() - - // Ensure layers from manifest match with image config, - // this will keep compatibility when using docker pull - // for the image that only included bootstrap layers. - diffIDs := []digest.Digest{} - for _, layer := range layers { - var diffID digest.Digest - if layer.MediaType == utils.MediaTypeNydusBlob { - diffID = layer.Digest - } else { - diffID = digest.Digest(layer.Annotations[utils.LayerAnnotationUncompressed]) - } - if diffID.Validate() == nil { - diffIDs = append(diffIDs, diffID) - } else { - logrus.Warn("Drop the entire diff id list due to an invalid diff id") - diffIDs = []digest.Digest{} - // It is possible that some existing cache images don't have diff ids, - // but we can't break the cache export, so just break the loop. - break - } - } - - // Prepare empty image config, just for registry API compatibility, - // manifest requires a valid config field. - configMediaType := ocispec.MediaTypeImageConfig - if cache.opt.DockerV2Format { - configMediaType = images.MediaTypeDockerSchema2Config - } - config := ocispec.Image{ - Config: ocispec.ImageConfig{}, - RootFS: ocispec.RootFS{ - Type: "layers", - // Layers from manifest must be match image config. - DiffIDs: diffIDs, - }, - } - configDesc, configBytes, err := utils.MarshalToDesc(config, configMediaType) - if err != nil { - return errors.Wrap(err, "Marshal cache config") - } - if err := cache.remote.Push(ctx, *configDesc, false, bytes.NewReader(configBytes)); err != nil { - return errors.Wrap(err, "Push cache config") - } - - // Push cache manifest to remote registry - mediaType := ocispec.MediaTypeImageManifest - if cache.opt.DockerV2Format { - mediaType = images.MediaTypeDockerSchema2Manifest - } - - manifest := Manifest{ - MediaType: mediaType, - Manifest: ocispec.Manifest{ - Versioned: specs.Versioned{ - SchemaVersion: 2, - }, - // Just for registry API compatibility, registry required a - // valid config field. - Config: *configDesc, - Layers: layers, - Annotations: map[string]string{ - utils.ManifestNydusCache: cache.opt.Version, - utils.LayerAnnotationNydusFsVersion: cache.opt.FsVersion, - }, - }, - } - - manifestDesc, manifestBytes, err := utils.MarshalToDesc(manifest, manifest.MediaType) - if err != nil { - return errors.Wrap(err, "Push cache manifest") - } - - if err := cache.remote.Push(ctx, *manifestDesc, false, bytes.NewReader(manifestBytes)); err != nil { - return errors.Wrap(err, "Push cache manifest") - } - - return nil -} - -// Import pulls cache manifest index from remote registry -func (cache *Cache) Import(ctx context.Context) error { - manifestDesc, err := cache.remote.Resolve(ctx) - if err != nil { - return errors.Wrap(err, "Resolve cache image") - } - - // Fetch cache manifest from remote registry - manifestReader, err := cache.remote.Pull(ctx, *manifestDesc, true) - if err != nil { - return errors.Wrap(err, "Pull cache image") - } - defer manifestReader.Close() - - manifestBytes, err := io.ReadAll(manifestReader) - if err != nil { - return errors.Wrap(err, "Read cache manifest") - } - - var manifest Manifest - if err := json.Unmarshal(manifestBytes, &manifest); err != nil { - return errors.Wrap(err, "Unmarshal cache manifest") - } - - // Discard the cache if mismatched version - if manifest.Annotations[utils.ManifestNydusCache] != cache.opt.Version { - return fmt.Errorf( - "unmatched cache image version %s, required to be %s", - manifest.Annotations[utils.ManifestNydusCache], cache.opt.Version, - ) - } - - // Discard the cache if mismatched RAFS FsVersion - // If utils.LayerAnnotationNydusFsVersion == "" and cache.opt.FsVersion == "5", - // it should be old cache image. - if manifest.Annotations[utils.LayerAnnotationNydusFsVersion] != cache.opt.FsVersion && - !(manifest.Annotations[utils.LayerAnnotationNydusFsVersion] == "" && cache.opt.FsVersion == "5") { - return fmt.Errorf( - "unmatched fs version %s, required to be %s", - manifest.Annotations[utils.LayerAnnotationNydusFsVersion], cache.opt.FsVersion, - ) - } - - cache.importRecordsFromLayers(manifest.Layers) - - return nil -} - -// Check checks bootstrap & blob layer exists in registry or storage backend -func (cache *Cache) Check(ctx context.Context, layerChainID digest.Digest) (*Record, io.ReadCloser, io.ReadCloser, error) { - record, ok := cache.pulledRecords[layerChainID] - if !ok { - return nil, nil, nil, nil - } - - // Check bootstrap layer on cache - bootstrapReader, err := cache.remote.Pull(ctx, *record.NydusBootstrapDesc, true) - if err != nil { - return nil, nil, nil, errors.Wrap(err, "Check bootstrap layer") - } - defer func() { - if err != nil { - bootstrapReader.Close() - } - }() - - var exist bool - var blobReader io.ReadCloser - - // Check blob layer on cache - if record.NydusBlobDesc != nil { - if cache.opt.Backend.Type() == backend.RegistryBackend { - blobReader, err = cache.remote.Pull(ctx, *record.NydusBlobDesc, true) - if err != nil { - return nil, nil, nil, errors.Wrap(err, "Check blob layer") - } - } else { - exist, err = cache.opt.Backend.Check(record.NydusBlobDesc.Digest.Hex()) - if err != nil { - return nil, nil, nil, errors.Wrap(err, "Check blob on backend") - } else if !exist { - err = errors.New("Not found blob on backend") - return nil, nil, nil, err - } - } - } - - return record, bootstrapReader, blobReader, nil -} - -// Record puts new bootstrap & blob layer to cache record, it's a limited queue. -func (cache *Cache) Record(records []*Record) { - moveFront := map[digest.Digest]bool{} - for _, record := range records { - moveFront[record.SourceChainID] = true - } - - pushedRecords := records - for _, record := range cache.pushedRecords { - if !moveFront[record.SourceChainID] { - pushedRecords = append(pushedRecords, record) - if len(pushedRecords) >= int(cache.opt.MaxRecords) { - break - } - } - } - - if len(pushedRecords) > int(cache.opt.MaxRecords) { - cache.pushedRecords = pushedRecords[:int(cache.opt.MaxRecords)] - } else { - cache.pushedRecords = pushedRecords - } -} - -// PullBootstrap pulls bootstrap layer from registry, and unpack to a specified path, -// we can use it to prepare parent bootstrap for building. -func (cache *Cache) PullBootstrap(ctx context.Context, bootstrapDesc *ocispec.Descriptor, target string) error { - reader, err := cache.remote.Pull(ctx, *bootstrapDesc, true) - if err != nil { - return errors.Wrap(err, "Pull cached bootstrap layer") - } - defer reader.Close() - - if err := utils.UnpackFile(reader, utils.BootstrapFileNameInLayer, target); err != nil { - return errors.Wrap(err, "Unpack cached bootstrap layer") - } - - return nil -} - -// Push pushes cache image to registry -func (cache *Cache) Push(ctx context.Context, desc ocispec.Descriptor, reader io.Reader) error { - return cache.remote.Push(ctx, desc, true, reader) -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package cache + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "strconv" + + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/backend" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/remote" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" + "github.com/sirupsen/logrus" + + "github.com/containerd/containerd/images" + digest "github.com/opencontainers/go-digest" + "github.com/opencontainers/image-spec/specs-go" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/pkg/errors" +) + +// Opt configures Nydus cache +type Opt struct { + // Maximum records(bootstrap layer + blob layer) in cache image. + MaxRecords uint + // Version of cache image, we need to discard cache layers when + // the required version (specified by `--build-cache-version`) + // is unmatched with the cache image version, for example nydus + // bootstrap format has a minor upgrade. + Version string + // Bootstrap's RAFS version of cache image, we need to discard cache + // layers when the required version (specified by `--fs-version`) is + // unmatched with the fs version recorded in cache image, for example + // we can't use rafs v5 cache layers for rafs v6 image. + FsVersion string + // Make cache image manifest compatible with the docker v2 media + // type defined in github.com/containerd/containerd/images. + DockerV2Format bool + // The blob layer record will not be written to cache image if + // the backend be specified, because the blob layer will be uploaded + // to backend. + Backend backend.Backend +} + +// Cache creates an image to store cache records in its image manifest, +// every record presents the relationship like: +// +// source_layer_chainid -> (nydus_blob_layer_digest, nydus_bootstrap_layer_digest) +// If the converter hits cache record during build source layer, we can +// skip the layer building, see cache image example: examples/manifest/cache_manifest.json. +// +// Here is the build cache workflow: +// 1. Import cache records from registry; +// 2. Check cache record using source layer ChainID before layer build, +// skip layer build if the cache hit; +// 3. Export new cache records to registry; +type Cache struct { + opt Opt + // Remote is responsible for pulling & pushing cache image + remote *remote.Remote + // Records referenced + referenceRecords map[digest.Digest]*Record + // Records pulled from registry + pulledRecords map[digest.Digest]*Record + // Records to be push to registry + pushedRecords []*Record +} + +// New creates Nydus cache instance, +func New(remote *remote.Remote, opt Opt) (*Cache, error) { + cache := &Cache{ + opt: opt, + remote: remote, + // source_layer_chain_id -> cache_record + pulledRecords: make(map[digest.Digest]*Record), + referenceRecords: make(map[digest.Digest]*Record), + pushedRecords: []*Record{}, + } + + return cache, nil +} + +func (cacheRecord *Record) GetReferenceBlobs() []string { + listStr := cacheRecord.NydusBootstrapDesc.Annotations[utils.LayerAnnotationNydusReferenceBlobIDs] + if listStr == "" { + return []string{} + } + var blobs []string + if err := json.Unmarshal([]byte(listStr), &blobs); err != nil { + return []string{} + } + return blobs +} + +func (cache *Cache) GetReference(d digest.Digest) *Record { + r, ok := cache.referenceRecords[d] + if !ok { + return nil + } + return r +} + +func (cache *Cache) SetReference(layer *ocispec.Descriptor) { + record := cache.layerToRecord(layer) + cache.referenceRecords[layer.Digest] = record +} + +func (cache *Cache) recordToLayer(record *Record) (*ocispec.Descriptor, *ocispec.Descriptor) { + // Handle referenced nydus data blob + if record.SourceChainID == "" { + if record.NydusBlobDesc != nil { + if cache.opt.Backend.Type() == backend.RegistryBackend { + return nil, &ocispec.Descriptor{ + MediaType: utils.MediaTypeNydusBlob, + Digest: record.NydusBlobDesc.Digest, + Size: record.NydusBlobDesc.Size, + Annotations: map[string]string{ + utils.LayerAnnotationNydusBlob: "true", + }, + } + } + } + return nil, nil + } + + bootstrapCacheMediaType := ocispec.MediaTypeImageLayerGzip + if cache.opt.DockerV2Format { + bootstrapCacheMediaType = images.MediaTypeDockerSchema2LayerGzip + } + bootstrapCacheDesc := &ocispec.Descriptor{ + MediaType: bootstrapCacheMediaType, + Digest: record.NydusBootstrapDesc.Digest, + Size: record.NydusBootstrapDesc.Size, + Annotations: map[string]string{ + utils.LayerAnnotationNydusBootstrap: "true", + utils.LayerAnnotationNydusFsVersion: cache.opt.FsVersion, + utils.LayerAnnotationNydusSourceChainID: record.SourceChainID.String(), + // Use the annotation to record bootstrap layer DiffID. + utils.LayerAnnotationUncompressed: record.NydusBootstrapDiffID.String(), + }, + } + if referenceBlobsStr, ok := record.NydusBootstrapDesc.Annotations[utils.LayerAnnotationNydusReferenceBlobIDs]; ok { + bootstrapCacheDesc.Annotations[utils.LayerAnnotationNydusReferenceBlobIDs] = referenceBlobsStr + } + + var blobCacheDesc *ocispec.Descriptor + if record.NydusBlobDesc != nil { + // Record blob layer to cache image if the blob be pushed + // to registry instead of storage backend. + if cache.opt.Backend.Type() == backend.RegistryBackend { + blobCacheDesc = &ocispec.Descriptor{ + MediaType: utils.MediaTypeNydusBlob, + Digest: record.NydusBlobDesc.Digest, + Size: record.NydusBlobDesc.Size, + Annotations: map[string]string{ + utils.LayerAnnotationNydusBlob: "true", + utils.LayerAnnotationNydusSourceChainID: record.SourceChainID.String(), + }, + } + } else { + bootstrapCacheDesc.Annotations[utils.LayerAnnotationNydusBlobDigest] = record.NydusBlobDesc.Digest.String() + bootstrapCacheDesc.Annotations[utils.LayerAnnotationNydusBlobSize] = strconv.FormatInt(record.NydusBlobDesc.Size, 10) + } + } + + return bootstrapCacheDesc, blobCacheDesc +} + +func (cache *Cache) exportRecordsToLayers() []ocispec.Descriptor { + var ( + layers []ocispec.Descriptor + referenceLayers []ocispec.Descriptor + ) + + for _, record := range cache.pushedRecords { + referenceBlobIDs := record.GetReferenceBlobs() + for _, blobID := range referenceBlobIDs { + // for oss backend, GetReference always return nil + // for registry backend, GetReference should not return nil + referenceRecord := cache.GetReference(digest.NewDigestFromEncoded(digest.SHA256, blobID)) + if referenceRecord != nil { + _, blobDesc := cache.recordToLayer(referenceRecord) + referenceLayers = append(referenceLayers, *blobDesc) + } + } + bootstrapCacheDesc, blobCacheDesc := cache.recordToLayer(record) + layers = append(layers, *bootstrapCacheDesc) + if blobCacheDesc != nil { + layers = append(layers, *blobCacheDesc) + } + } + + return append(referenceLayers, layers...) +} + +func (cache *Cache) layerToRecord(layer *ocispec.Descriptor) *Record { + sourceChainIDStr, ok := layer.Annotations[utils.LayerAnnotationNydusSourceChainID] + if !ok { + if layer.Annotations[utils.LayerAnnotationNydusBlob] == "true" { + // for reference blob layers + return &Record{ + NydusBlobDesc: &ocispec.Descriptor{ + MediaType: layer.MediaType, + Digest: layer.Digest, + Size: layer.Size, + Annotations: map[string]string{ + utils.LayerAnnotationNydusBlob: "true", + }, + }, + } + } + return nil + } + sourceChainID := digest.Digest(sourceChainIDStr) + if sourceChainID.Validate() != nil { + return nil + } + if layer.Annotations == nil { + return nil + } + + // Handle bootstrap cache layer + if layer.Annotations[utils.LayerAnnotationNydusBootstrap] == "true" { + uncompressedDigestStr := layer.Annotations[utils.LayerAnnotationUncompressed] + if uncompressedDigestStr == "" { + return nil + } + bootstrapDiffID := digest.Digest(uncompressedDigestStr) + if bootstrapDiffID.Validate() != nil { + return nil + } + bootstrapDesc := ocispec.Descriptor{ + MediaType: layer.MediaType, + Digest: layer.Digest, + Size: layer.Size, + Annotations: map[string]string{ + utils.LayerAnnotationNydusBootstrap: "true", + utils.LayerAnnotationNydusFsVersion: cache.opt.FsVersion, + utils.LayerAnnotationUncompressed: uncompressedDigestStr, + }, + } + referenceBlobsStr := layer.Annotations[utils.LayerAnnotationNydusReferenceBlobIDs] + if referenceBlobsStr != "" { + bootstrapDesc.Annotations[utils.LayerAnnotationNydusReferenceBlobIDs] = referenceBlobsStr + } + var nydusBlobDesc *ocispec.Descriptor + if layer.Annotations[utils.LayerAnnotationNydusBlobDigest] != "" && + layer.Annotations[utils.LayerAnnotationNydusBlobSize] != "" { + blobDigest := digest.Digest(layer.Annotations[utils.LayerAnnotationNydusBlobDigest]) + if blobDigest.Validate() != nil { + return nil + } + blobSize, err := strconv.ParseInt(layer.Annotations[utils.LayerAnnotationNydusBlobSize], 10, 64) + if err != nil { + return nil + } + nydusBlobDesc = &ocispec.Descriptor{ + MediaType: utils.MediaTypeNydusBlob, + Digest: blobDigest, + Size: blobSize, + Annotations: map[string]string{ + utils.LayerAnnotationNydusBlob: "true", + }, + } + } + return &Record{ + SourceChainID: sourceChainID, + NydusBootstrapDesc: &bootstrapDesc, + NydusBlobDesc: nydusBlobDesc, + NydusBootstrapDiffID: bootstrapDiffID, + } + } + + // Handle blob cache layer + if layer.Annotations[utils.LayerAnnotationNydusBlob] == "true" { + nydusBlobDesc := &ocispec.Descriptor{ + MediaType: layer.MediaType, + Digest: layer.Digest, + Size: layer.Size, + Annotations: map[string]string{ + utils.LayerAnnotationNydusBlob: "true", + }, + } + return &Record{ + SourceChainID: sourceChainID, + NydusBlobDesc: nydusBlobDesc, + } + } + + return nil +} + +func mergeRecord(old, new *Record) *Record { + if old == nil { + old = &Record{ + SourceChainID: new.SourceChainID, + } + } + + if new.NydusBootstrapDesc != nil { + old.NydusBootstrapDesc = new.NydusBootstrapDesc + old.NydusBootstrapDiffID = new.NydusBootstrapDiffID + } + + if new.NydusBlobDesc != nil { + old.NydusBlobDesc = new.NydusBlobDesc + } + + return old +} + +func (cache *Cache) importRecordsFromLayers(layers []ocispec.Descriptor) { + pulledRecords := make(map[digest.Digest]*Record) + referenceRecords := make(map[digest.Digest]*Record) + pushedRecords := []*Record{} + + for _, layer := range layers { + record := cache.layerToRecord(&layer) + if record != nil { + if record.SourceChainID == "" { + referenceRecords[record.NydusBlobDesc.Digest] = record + logrus.Infof("Found reference blob layer %s", record.NydusBlobDesc.Digest) + } else { + // Merge bootstrap and related blob layer to record + newRecord := mergeRecord( + pulledRecords[record.SourceChainID], + record, + ) + pulledRecords[record.SourceChainID] = newRecord + pushedRecords = append(pushedRecords, newRecord) + } + } else { + logrus.Warnf("Strange! Build cache layer can't produce a valid record. %s", layer.Digest) + } + } + + cache.pulledRecords = pulledRecords + cache.pushedRecords = pushedRecords + cache.referenceRecords = referenceRecords +} + +// Export pushes cache manifest index to remote registry +func (cache *Cache) Export(ctx context.Context) error { + if len(cache.pushedRecords) == 0 { + return nil + } + + layers := cache.exportRecordsToLayers() + + // Ensure layers from manifest match with image config, + // this will keep compatibility when using docker pull + // for the image that only included bootstrap layers. + diffIDs := []digest.Digest{} + for _, layer := range layers { + var diffID digest.Digest + if layer.MediaType == utils.MediaTypeNydusBlob { + diffID = layer.Digest + } else { + diffID = digest.Digest(layer.Annotations[utils.LayerAnnotationUncompressed]) + } + if diffID.Validate() == nil { + diffIDs = append(diffIDs, diffID) + } else { + logrus.Warn("Drop the entire diff id list due to an invalid diff id") + diffIDs = []digest.Digest{} + // It is possible that some existing cache images don't have diff ids, + // but we can't break the cache export, so just break the loop. + break + } + } + + // Prepare empty image config, just for registry API compatibility, + // manifest requires a valid config field. + configMediaType := ocispec.MediaTypeImageConfig + if cache.opt.DockerV2Format { + configMediaType = images.MediaTypeDockerSchema2Config + } + config := ocispec.Image{ + Config: ocispec.ImageConfig{}, + RootFS: ocispec.RootFS{ + Type: "layers", + // Layers from manifest must be match image config. + DiffIDs: diffIDs, + }, + } + configDesc, configBytes, err := utils.MarshalToDesc(config, configMediaType) + if err != nil { + return errors.Wrap(err, "Marshal cache config") + } + if err := cache.remote.Push(ctx, *configDesc, false, bytes.NewReader(configBytes)); err != nil { + return errors.Wrap(err, "Push cache config") + } + + // Push cache manifest to remote registry + mediaType := ocispec.MediaTypeImageManifest + if cache.opt.DockerV2Format { + mediaType = images.MediaTypeDockerSchema2Manifest + } + + manifest := Manifest{ + MediaType: mediaType, + Manifest: ocispec.Manifest{ + Versioned: specs.Versioned{ + SchemaVersion: 2, + }, + // Just for registry API compatibility, registry required a + // valid config field. + Config: *configDesc, + Layers: layers, + Annotations: map[string]string{ + utils.ManifestNydusCache: cache.opt.Version, + utils.LayerAnnotationNydusFsVersion: cache.opt.FsVersion, + }, + }, + } + + manifestDesc, manifestBytes, err := utils.MarshalToDesc(manifest, manifest.MediaType) + if err != nil { + return errors.Wrap(err, "Push cache manifest") + } + + if err := cache.remote.Push(ctx, *manifestDesc, false, bytes.NewReader(manifestBytes)); err != nil { + return errors.Wrap(err, "Push cache manifest") + } + + return nil +} + +// Import pulls cache manifest index from remote registry +func (cache *Cache) Import(ctx context.Context) error { + manifestDesc, err := cache.remote.Resolve(ctx) + if err != nil { + return errors.Wrap(err, "Resolve cache image") + } + + // Fetch cache manifest from remote registry + manifestReader, err := cache.remote.Pull(ctx, *manifestDesc, true) + if err != nil { + return errors.Wrap(err, "Pull cache image") + } + defer manifestReader.Close() + + manifestBytes, err := io.ReadAll(manifestReader) + if err != nil { + return errors.Wrap(err, "Read cache manifest") + } + + var manifest Manifest + if err := json.Unmarshal(manifestBytes, &manifest); err != nil { + return errors.Wrap(err, "Unmarshal cache manifest") + } + + // Discard the cache if mismatched version + if manifest.Annotations[utils.ManifestNydusCache] != cache.opt.Version { + return fmt.Errorf( + "unmatched cache image version %s, required to be %s", + manifest.Annotations[utils.ManifestNydusCache], cache.opt.Version, + ) + } + + // Discard the cache if mismatched RAFS FsVersion + // If utils.LayerAnnotationNydusFsVersion == "" and cache.opt.FsVersion == "5", + // it should be old cache image. + if manifest.Annotations[utils.LayerAnnotationNydusFsVersion] != cache.opt.FsVersion && + !(manifest.Annotations[utils.LayerAnnotationNydusFsVersion] == "" && cache.opt.FsVersion == "5") { + return fmt.Errorf( + "unmatched fs version %s, required to be %s", + manifest.Annotations[utils.LayerAnnotationNydusFsVersion], cache.opt.FsVersion, + ) + } + + cache.importRecordsFromLayers(manifest.Layers) + + return nil +} + +// Check checks bootstrap & blob layer exists in registry or storage backend +func (cache *Cache) Check(ctx context.Context, layerChainID digest.Digest) (*Record, io.ReadCloser, io.ReadCloser, error) { + record, ok := cache.pulledRecords[layerChainID] + if !ok { + return nil, nil, nil, nil + } + + // Check bootstrap layer on cache + bootstrapReader, err := cache.remote.Pull(ctx, *record.NydusBootstrapDesc, true) + if err != nil { + return nil, nil, nil, errors.Wrap(err, "Check bootstrap layer") + } + defer func() { + if err != nil { + bootstrapReader.Close() + } + }() + + var exist bool + var blobReader io.ReadCloser + + // Check blob layer on cache + if record.NydusBlobDesc != nil { + if cache.opt.Backend.Type() == backend.RegistryBackend { + blobReader, err = cache.remote.Pull(ctx, *record.NydusBlobDesc, true) + if err != nil { + return nil, nil, nil, errors.Wrap(err, "Check blob layer") + } + } else { + exist, err = cache.opt.Backend.Check(record.NydusBlobDesc.Digest.Hex()) + if err != nil { + return nil, nil, nil, errors.Wrap(err, "Check blob on backend") + } else if !exist { + err = errors.New("Not found blob on backend") + return nil, nil, nil, err + } + } + } + + return record, bootstrapReader, blobReader, nil +} + +// Record puts new bootstrap & blob layer to cache record, it's a limited queue. +func (cache *Cache) Record(records []*Record) { + moveFront := map[digest.Digest]bool{} + for _, record := range records { + moveFront[record.SourceChainID] = true + } + + pushedRecords := records + for _, record := range cache.pushedRecords { + if !moveFront[record.SourceChainID] { + pushedRecords = append(pushedRecords, record) + if len(pushedRecords) >= int(cache.opt.MaxRecords) { + break + } + } + } + + if len(pushedRecords) > int(cache.opt.MaxRecords) { + cache.pushedRecords = pushedRecords[:int(cache.opt.MaxRecords)] + } else { + cache.pushedRecords = pushedRecords + } +} + +// PullBootstrap pulls bootstrap layer from registry, and unpack to a specified path, +// we can use it to prepare parent bootstrap for building. +func (cache *Cache) PullBootstrap(ctx context.Context, bootstrapDesc *ocispec.Descriptor, target string) error { + reader, err := cache.remote.Pull(ctx, *bootstrapDesc, true) + if err != nil { + return errors.Wrap(err, "Pull cached bootstrap layer") + } + defer reader.Close() + + if err := utils.UnpackFile(reader, utils.BootstrapFileNameInLayer, target); err != nil { + return errors.Wrap(err, "Unpack cached bootstrap layer") + } + + return nil +} + +// Push pushes cache image to registry +func (cache *Cache) Push(ctx context.Context, desc ocispec.Descriptor, reader io.Reader) error { + return cache.remote.Push(ctx, desc, true, reader) +} diff --git a/contrib/nydusify/pkg/cache/cache_test.go b/contrib/nydusify/pkg/cache/cache_test.go index 1f97ff6ed5a..a1ee616601c 100644 --- a/contrib/nydusify/pkg/cache/cache_test.go +++ b/contrib/nydusify/pkg/cache/cache_test.go @@ -1,137 +1,137 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package cache - -import ( - "fmt" - "strconv" - "testing" - - "github.com/opencontainers/go-digest" - ocispec "github.com/opencontainers/image-spec/specs-go/v1" - "github.com/stretchr/testify/assert" - - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/backend" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" -) - -func makeRecord(id int64, hashBlob bool) *Record { - var blobDesc *ocispec.Descriptor - idStr := strconv.FormatInt(id, 10) - if hashBlob { - blobDesc = &ocispec.Descriptor{ - MediaType: utils.MediaTypeNydusBlob, - Digest: digest.FromString("blob-" + idStr), - Size: id, - } - } - return &Record{ - SourceChainID: digest.FromString("chain-" + idStr), - NydusBootstrapDesc: &ocispec.Descriptor{ - MediaType: ocispec.MediaTypeImageLayerGzip, - Digest: digest.FromString("bootstrap-" + idStr), - Size: id, - }, - NydusBootstrapDiffID: digest.FromString("bootstrap-uncompressed-" + idStr), - NydusBlobDesc: blobDesc, - } -} - -func makeBootstrapLayer(id int64, hasBlob bool) ocispec.Descriptor { - idStr := strconv.FormatInt(id, 10) - desc := ocispec.Descriptor{ - MediaType: ocispec.MediaTypeImageLayerGzip, - Digest: digest.FromString("bootstrap-" + idStr), - Size: id, - Annotations: map[string]string{ - utils.LayerAnnotationNydusBootstrap: "true", - utils.LayerAnnotationNydusFsVersion: "6", - utils.LayerAnnotationNydusSourceChainID: digest.FromString("chain-" + idStr).String(), - utils.LayerAnnotationUncompressed: digest.FromString("bootstrap-uncompressed-" + idStr).String(), - }, - } - if hasBlob { - desc.Annotations[utils.LayerAnnotationNydusBlobDigest] = digest.FromString("blob-" + idStr).String() - desc.Annotations[utils.LayerAnnotationNydusBlobSize] = fmt.Sprintf("%d", id) - } - return desc -} - -func makeBlobLayer(id int64) ocispec.Descriptor { - idStr := strconv.FormatInt(id, 10) - return ocispec.Descriptor{ - MediaType: utils.MediaTypeNydusBlob, - Digest: digest.FromString("blob-" + idStr), - Size: id, - Annotations: map[string]string{ - utils.LayerAnnotationNydusBlob: "true", - utils.LayerAnnotationNydusSourceChainID: digest.FromString("chain-" + idStr).String(), - }, - } -} - -func testWithBackend(t *testing.T, _backend backend.Backend) { - cache, err := New(nil, Opt{ - MaxRecords: 3, - DockerV2Format: false, - Backend: _backend, - FsVersion: "6", - }) - assert.Nil(t, err) - - exported := []*Record{ - makeRecord(1, true), - makeRecord(2, true), - makeRecord(3, false), - } - cache.Record(exported) - cache.Record(exported) - layers := cache.exportRecordsToLayers() - - if _backend.Type() == backend.RegistryBackend { - assert.Equal(t, layers, []ocispec.Descriptor{ - makeBootstrapLayer(1, false), - makeBlobLayer(1), - makeBootstrapLayer(2, false), - makeBlobLayer(2), - makeBootstrapLayer(3, false), - }) - } else { - assert.Equal(t, layers, []ocispec.Descriptor{ - makeBootstrapLayer(1, true), - makeBootstrapLayer(2, true), - makeBootstrapLayer(3, false), - }) - } - - cache.importRecordsFromLayers(layers) - cache.Record([]*Record{ - makeRecord(4, true), - makeRecord(5, true), - }) - layers = cache.exportRecordsToLayers() - - if _backend.Type() == backend.RegistryBackend { - assert.Equal(t, layers, []ocispec.Descriptor{ - makeBootstrapLayer(4, false), - makeBlobLayer(4), - makeBootstrapLayer(5, false), - makeBlobLayer(5), - makeBootstrapLayer(1, false), - makeBlobLayer(1), - }) - } else { - assert.Equal(t, layers, []ocispec.Descriptor{ - makeBootstrapLayer(4, true), - makeBootstrapLayer(5, true), - makeBootstrapLayer(1, true), - }) - } -} - -func TestCache(t *testing.T) { - testWithBackend(t, &backend.Registry{}) - testWithBackend(t, &backend.OSSBackend{}) -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package cache + +import ( + "fmt" + "strconv" + "testing" + + "github.com/opencontainers/go-digest" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/stretchr/testify/assert" + + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/backend" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" +) + +func makeRecord(id int64, hashBlob bool) *Record { + var blobDesc *ocispec.Descriptor + idStr := strconv.FormatInt(id, 10) + if hashBlob { + blobDesc = &ocispec.Descriptor{ + MediaType: utils.MediaTypeNydusBlob, + Digest: digest.FromString("blob-" + idStr), + Size: id, + } + } + return &Record{ + SourceChainID: digest.FromString("chain-" + idStr), + NydusBootstrapDesc: &ocispec.Descriptor{ + MediaType: ocispec.MediaTypeImageLayerGzip, + Digest: digest.FromString("bootstrap-" + idStr), + Size: id, + }, + NydusBootstrapDiffID: digest.FromString("bootstrap-uncompressed-" + idStr), + NydusBlobDesc: blobDesc, + } +} + +func makeBootstrapLayer(id int64, hasBlob bool) ocispec.Descriptor { + idStr := strconv.FormatInt(id, 10) + desc := ocispec.Descriptor{ + MediaType: ocispec.MediaTypeImageLayerGzip, + Digest: digest.FromString("bootstrap-" + idStr), + Size: id, + Annotations: map[string]string{ + utils.LayerAnnotationNydusBootstrap: "true", + utils.LayerAnnotationNydusFsVersion: "6", + utils.LayerAnnotationNydusSourceChainID: digest.FromString("chain-" + idStr).String(), + utils.LayerAnnotationUncompressed: digest.FromString("bootstrap-uncompressed-" + idStr).String(), + }, + } + if hasBlob { + desc.Annotations[utils.LayerAnnotationNydusBlobDigest] = digest.FromString("blob-" + idStr).String() + desc.Annotations[utils.LayerAnnotationNydusBlobSize] = fmt.Sprintf("%d", id) + } + return desc +} + +func makeBlobLayer(id int64) ocispec.Descriptor { + idStr := strconv.FormatInt(id, 10) + return ocispec.Descriptor{ + MediaType: utils.MediaTypeNydusBlob, + Digest: digest.FromString("blob-" + idStr), + Size: id, + Annotations: map[string]string{ + utils.LayerAnnotationNydusBlob: "true", + utils.LayerAnnotationNydusSourceChainID: digest.FromString("chain-" + idStr).String(), + }, + } +} + +func testWithBackend(t *testing.T, _backend backend.Backend) { + cache, err := New(nil, Opt{ + MaxRecords: 3, + DockerV2Format: false, + Backend: _backend, + FsVersion: "6", + }) + assert.Nil(t, err) + + exported := []*Record{ + makeRecord(1, true), + makeRecord(2, true), + makeRecord(3, false), + } + cache.Record(exported) + cache.Record(exported) + layers := cache.exportRecordsToLayers() + + if _backend.Type() == backend.RegistryBackend { + assert.Equal(t, layers, []ocispec.Descriptor{ + makeBootstrapLayer(1, false), + makeBlobLayer(1), + makeBootstrapLayer(2, false), + makeBlobLayer(2), + makeBootstrapLayer(3, false), + }) + } else { + assert.Equal(t, layers, []ocispec.Descriptor{ + makeBootstrapLayer(1, true), + makeBootstrapLayer(2, true), + makeBootstrapLayer(3, false), + }) + } + + cache.importRecordsFromLayers(layers) + cache.Record([]*Record{ + makeRecord(4, true), + makeRecord(5, true), + }) + layers = cache.exportRecordsToLayers() + + if _backend.Type() == backend.RegistryBackend { + assert.Equal(t, layers, []ocispec.Descriptor{ + makeBootstrapLayer(4, false), + makeBlobLayer(4), + makeBootstrapLayer(5, false), + makeBlobLayer(5), + makeBootstrapLayer(1, false), + makeBlobLayer(1), + }) + } else { + assert.Equal(t, layers, []ocispec.Descriptor{ + makeBootstrapLayer(4, true), + makeBootstrapLayer(5, true), + makeBootstrapLayer(1, true), + }) + } +} + +func TestCache(t *testing.T) { + testWithBackend(t, &backend.Registry{}) + testWithBackend(t, &backend.OSSBackend{}) +} diff --git a/contrib/nydusify/pkg/cache/spec.go b/contrib/nydusify/pkg/cache/spec.go index e96906624ef..24876b8d407 100644 --- a/contrib/nydusify/pkg/cache/spec.go +++ b/contrib/nydusify/pkg/cache/spec.go @@ -1,22 +1,22 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package cache - -import ( - "github.com/opencontainers/go-digest" - ocispec "github.com/opencontainers/image-spec/specs-go/v1" -) - -type Manifest struct { - MediaType string `json:"mediaType,omitempty"` - ocispec.Manifest -} - -type Record struct { - SourceChainID digest.Digest - NydusBlobDesc *ocispec.Descriptor - NydusBootstrapDesc *ocispec.Descriptor - NydusBootstrapDiffID digest.Digest -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package cache + +import ( + "github.com/opencontainers/go-digest" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" +) + +type Manifest struct { + MediaType string `json:"mediaType,omitempty"` + ocispec.Manifest +} + +type Record struct { + SourceChainID digest.Digest + NydusBlobDesc *ocispec.Descriptor + NydusBootstrapDesc *ocispec.Descriptor + NydusBootstrapDiffID digest.Digest +} diff --git a/contrib/nydusify/pkg/checker/checker.go b/contrib/nydusify/pkg/checker/checker.go index f8d81a40c11..8162800b295 100644 --- a/contrib/nydusify/pkg/checker/checker.go +++ b/contrib/nydusify/pkg/checker/checker.go @@ -1,194 +1,194 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package checker - -import ( - "context" - "os" - "path/filepath" - - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/checker/rule" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/checker/tool" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/parser" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/provider" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/remote" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" -) - -// Opt defines Checker options. -// Note: target is the Nydus image reference. -type Opt struct { - WorkDir string - Source string - Target string - SourceInsecure bool - TargetInsecure bool - MultiPlatform bool - NydusImagePath string - NydusdPath string - BackendType string - BackendConfig string - ExpectedArch string -} - -// Checker validates Nydus image manifest, bootstrap and mounts filesystem -// by Nydusd to compare file metadata and data with OCI image. -type Checker struct { - Opt - sourceParser *parser.Parser - targetParser *parser.Parser -} - -// New creates Checker instance, target is the Nydus image reference. -func New(opt Opt) (*Checker, error) { - // TODO: support source and target resolver - targetRemote, err := provider.DefaultRemote(opt.Target, opt.TargetInsecure) - if err != nil { - return nil, errors.Wrap(err, "Init target image parser") - } - targetParser, err := parser.New(targetRemote, opt.ExpectedArch) - if err != nil { - return nil, errors.Wrap(err, "failed to create parser") - } - - var sourceParser *parser.Parser - if opt.Source != "" { - sourceRemote, err := provider.DefaultRemote(opt.Source, opt.SourceInsecure) - if err != nil { - return nil, errors.Wrap(err, "Init source image parser") - } - sourceParser, err = parser.New(sourceRemote, opt.ExpectedArch) - if sourceParser == nil { - return nil, errors.Wrap(err, "failed to create parser") - } - } - - checker := &Checker{ - Opt: opt, - sourceParser: sourceParser, - targetParser: targetParser, - } - - return checker, nil -} - -// Check checks Nydus image, and outputs image information to work -// directory, the check workflow is composed of various rules. -func (checker *Checker) Check(ctx context.Context) error { - if err := checker.check(ctx); err != nil { - if utils.RetryWithHTTP(err) { - if checker.sourceParser != nil { - checker.sourceParser.Remote.MaybeWithHTTP(err) - } - checker.targetParser.Remote.MaybeWithHTTP(err) - return checker.check(ctx) - } - return err - } - return nil -} - -// Check checks Nydus image, and outputs image information to work -// directory, the check workflow is composed of various rules. -func (checker *Checker) check(ctx context.Context) error { - targetParsed, err := checker.targetParser.Parse(ctx) - if err != nil { - return errors.Wrap(err, "parse Nydus image") - } - - var sourceParsed *parser.Parsed - if checker.sourceParser != nil { - sourceParsed, err = checker.sourceParser.Parse(ctx) - if err != nil { - return errors.Wrap(err, "parse source image") - } - } else { - sourceParsed = targetParsed - } - - if err := os.RemoveAll(checker.WorkDir); err != nil { - return errors.Wrap(err, "clean up work directory") - } - - if err := os.MkdirAll(filepath.Join(checker.WorkDir, "fs"), 0755); err != nil { - return errors.Wrap(err, "create work directory") - } - - if err := checker.Output(ctx, sourceParsed, targetParsed, checker.WorkDir); err != nil { - return errors.Wrap(err, "output image information") - } - - mode := "direct" - digestValidate := false - if targetParsed.NydusImage != nil { - nydusManifest := parser.FindNydusBootstrapDesc(&targetParsed.NydusImage.Manifest) - if nydusManifest != nil { - v := utils.GetNydusFsVersionOrDefault(nydusManifest.Annotations, utils.V5) - if v == utils.V5 { - // Digest validate is not currently supported for v6, - // but v5 supports it. In order to make the check more sufficient, - // this validate needs to be turned on for v5. - digestValidate = true - } - } - } - - var sourceRemote *remote.Remote - if checker.sourceParser != nil { - sourceRemote = checker.sourceParser.Remote - } - - rules := []rule.Rule{ - &rule.ManifestRule{ - SourceParsed: sourceParsed, - TargetParsed: targetParsed, - MultiPlatform: checker.MultiPlatform, - BackendType: checker.BackendType, - ExpectedArch: checker.ExpectedArch, - }, - &rule.BootstrapRule{ - Parsed: targetParsed, - NydusImagePath: checker.NydusImagePath, - BackendType: checker.BackendType, - BootstrapPath: filepath.Join(checker.WorkDir, "nydus_bootstrap"), - DebugOutputPath: filepath.Join(checker.WorkDir, "nydus_bootstrap_debug.json"), - }, - &rule.FilesystemRule{ - Source: checker.Source, - SourceMountPath: filepath.Join(checker.WorkDir, "fs/source_mounted"), - SourceParsed: sourceParsed, - SourcePath: filepath.Join(checker.WorkDir, "fs/source"), - SourceRemote: sourceRemote, - Target: checker.Target, - TargetInsecure: checker.TargetInsecure, - PlainHTTP: checker.targetParser.Remote.IsWithHTTP(), - NydusdConfig: tool.NydusdConfig{ - NydusdPath: checker.NydusdPath, - BackendType: checker.BackendType, - BackendConfig: checker.BackendConfig, - BootstrapPath: filepath.Join(checker.WorkDir, "nydus_bootstrap"), - ConfigPath: filepath.Join(checker.WorkDir, "fs/nydusd_config.json"), - BlobCacheDir: filepath.Join(checker.WorkDir, "fs/nydus_blobs"), - MountPath: filepath.Join(checker.WorkDir, "fs/nydus_mounted"), - APISockPath: filepath.Join(checker.WorkDir, "fs/nydus_api.sock"), - Mode: mode, - DigestValidate: digestValidate, - }, - }, - } - - for _, rule := range rules { - if err := rule.Validate(); err != nil { - return errors.Wrapf(err, "validate rule %s", rule.Name()) - } - } - - logrus.Infof("Verified Nydus image %s", checker.targetParser.Remote.Ref) - - return nil -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package checker + +import ( + "context" + "os" + "path/filepath" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/checker/rule" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/checker/tool" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/parser" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/provider" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/remote" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" +) + +// Opt defines Checker options. +// Note: target is the Nydus image reference. +type Opt struct { + WorkDir string + Source string + Target string + SourceInsecure bool + TargetInsecure bool + MultiPlatform bool + NydusImagePath string + NydusdPath string + BackendType string + BackendConfig string + ExpectedArch string +} + +// Checker validates Nydus image manifest, bootstrap and mounts filesystem +// by Nydusd to compare file metadata and data with OCI image. +type Checker struct { + Opt + sourceParser *parser.Parser + targetParser *parser.Parser +} + +// New creates Checker instance, target is the Nydus image reference. +func New(opt Opt) (*Checker, error) { + // TODO: support source and target resolver + targetRemote, err := provider.DefaultRemote(opt.Target, opt.TargetInsecure) + if err != nil { + return nil, errors.Wrap(err, "Init target image parser") + } + targetParser, err := parser.New(targetRemote, opt.ExpectedArch) + if err != nil { + return nil, errors.Wrap(err, "failed to create parser") + } + + var sourceParser *parser.Parser + if opt.Source != "" { + sourceRemote, err := provider.DefaultRemote(opt.Source, opt.SourceInsecure) + if err != nil { + return nil, errors.Wrap(err, "Init source image parser") + } + sourceParser, err = parser.New(sourceRemote, opt.ExpectedArch) + if sourceParser == nil { + return nil, errors.Wrap(err, "failed to create parser") + } + } + + checker := &Checker{ + Opt: opt, + sourceParser: sourceParser, + targetParser: targetParser, + } + + return checker, nil +} + +// Check checks Nydus image, and outputs image information to work +// directory, the check workflow is composed of various rules. +func (checker *Checker) Check(ctx context.Context) error { + if err := checker.check(ctx); err != nil { + if utils.RetryWithHTTP(err) { + if checker.sourceParser != nil { + checker.sourceParser.Remote.MaybeWithHTTP(err) + } + checker.targetParser.Remote.MaybeWithHTTP(err) + return checker.check(ctx) + } + return err + } + return nil +} + +// Check checks Nydus image, and outputs image information to work +// directory, the check workflow is composed of various rules. +func (checker *Checker) check(ctx context.Context) error { + targetParsed, err := checker.targetParser.Parse(ctx) + if err != nil { + return errors.Wrap(err, "parse Nydus image") + } + + var sourceParsed *parser.Parsed + if checker.sourceParser != nil { + sourceParsed, err = checker.sourceParser.Parse(ctx) + if err != nil { + return errors.Wrap(err, "parse source image") + } + } else { + sourceParsed = targetParsed + } + + if err := os.RemoveAll(checker.WorkDir); err != nil { + return errors.Wrap(err, "clean up work directory") + } + + if err := os.MkdirAll(filepath.Join(checker.WorkDir, "fs"), 0755); err != nil { + return errors.Wrap(err, "create work directory") + } + + if err := checker.Output(ctx, sourceParsed, targetParsed, checker.WorkDir); err != nil { + return errors.Wrap(err, "output image information") + } + + mode := "direct" + digestValidate := false + if targetParsed.NydusImage != nil { + nydusManifest := parser.FindNydusBootstrapDesc(&targetParsed.NydusImage.Manifest) + if nydusManifest != nil { + v := utils.GetNydusFsVersionOrDefault(nydusManifest.Annotations, utils.V5) + if v == utils.V5 { + // Digest validate is not currently supported for v6, + // but v5 supports it. In order to make the check more sufficient, + // this validate needs to be turned on for v5. + digestValidate = true + } + } + } + + var sourceRemote *remote.Remote + if checker.sourceParser != nil { + sourceRemote = checker.sourceParser.Remote + } + + rules := []rule.Rule{ + &rule.ManifestRule{ + SourceParsed: sourceParsed, + TargetParsed: targetParsed, + MultiPlatform: checker.MultiPlatform, + BackendType: checker.BackendType, + ExpectedArch: checker.ExpectedArch, + }, + &rule.BootstrapRule{ + Parsed: targetParsed, + NydusImagePath: checker.NydusImagePath, + BackendType: checker.BackendType, + BootstrapPath: filepath.Join(checker.WorkDir, "nydus_bootstrap"), + DebugOutputPath: filepath.Join(checker.WorkDir, "nydus_bootstrap_debug.json"), + }, + &rule.FilesystemRule{ + Source: checker.Source, + SourceMountPath: filepath.Join(checker.WorkDir, "fs/source_mounted"), + SourceParsed: sourceParsed, + SourcePath: filepath.Join(checker.WorkDir, "fs/source"), + SourceRemote: sourceRemote, + Target: checker.Target, + TargetInsecure: checker.TargetInsecure, + PlainHTTP: checker.targetParser.Remote.IsWithHTTP(), + NydusdConfig: tool.NydusdConfig{ + NydusdPath: checker.NydusdPath, + BackendType: checker.BackendType, + BackendConfig: checker.BackendConfig, + BootstrapPath: filepath.Join(checker.WorkDir, "nydus_bootstrap"), + ConfigPath: filepath.Join(checker.WorkDir, "fs/nydusd_config.json"), + BlobCacheDir: filepath.Join(checker.WorkDir, "fs/nydus_blobs"), + MountPath: filepath.Join(checker.WorkDir, "fs/nydus_mounted"), + APISockPath: filepath.Join(checker.WorkDir, "fs/nydus_api.sock"), + Mode: mode, + DigestValidate: digestValidate, + }, + }, + } + + for _, rule := range rules { + if err := rule.Validate(); err != nil { + return errors.Wrapf(err, "validate rule %s", rule.Name()) + } + } + + logrus.Infof("Verified Nydus image %s", checker.targetParser.Remote.Ref) + + return nil +} diff --git a/contrib/nydusify/pkg/checker/output.go b/contrib/nydusify/pkg/checker/output.go index a9678a008c4..ebf517bd625 100644 --- a/contrib/nydusify/pkg/checker/output.go +++ b/contrib/nydusify/pkg/checker/output.go @@ -1,96 +1,96 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package checker - -import ( - "context" - "encoding/json" - "os" - "path/filepath" - - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/parser" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" -) - -func prettyDump(obj interface{}, name string) error { - bytes, err := json.MarshalIndent(obj, "", " ") - if err != nil { - return err - } - return os.WriteFile(name, bytes, 0644) -} - -// Output outputs OCI and Nydus image manifest, index, config to JSON file. -// Prefer to use source image to output OCI image information. -func (checker *Checker) Output( - ctx context.Context, sourceParsed, targetParsed *parser.Parsed, outputPath string, -) error { - logrus.Infof("Dumping OCI and Nydus manifests to %s", outputPath) - - if sourceParsed.Index != nil { - if err := prettyDump( - sourceParsed.Index, - filepath.Join(outputPath, "oci_index.json"), - ); err != nil { - return errors.Wrap(err, "output oci index file") - } - } - - if targetParsed.Index != nil { - if err := prettyDump( - targetParsed.Index, - filepath.Join(outputPath, "nydus_index.json"), - ); err != nil { - return errors.Wrap(err, "output nydus index file") - } - } - - if sourceParsed.OCIImage != nil { - if err := prettyDump( - sourceParsed.OCIImage.Manifest, - filepath.Join(outputPath, "oci_manifest.json"), - ); err != nil { - return errors.Wrap(err, "output OCI manifest file") - } - if err := prettyDump( - sourceParsed.OCIImage.Config, - filepath.Join(outputPath, "oci_config.json"), - ); err != nil { - return errors.Wrap(err, "output OCI config file") - } - } - - if targetParsed.NydusImage != nil { - if err := prettyDump( - targetParsed.NydusImage.Manifest, - filepath.Join(outputPath, "nydus_manifest.json"), - ); err != nil { - return errors.Wrap(err, "output Nydus manifest file") - } - if err := prettyDump( - targetParsed.NydusImage.Config, - filepath.Join(outputPath, "nydus_config.json"), - ); err != nil { - return errors.Wrap(err, "output Nydus config file") - } - - target := filepath.Join(outputPath, "nydus_bootstrap") - logrus.Infof("Pulling Nydus bootstrap to %s", target) - bootstrapReader, err := checker.targetParser.PullNydusBootstrap(ctx, targetParsed.NydusImage) - if err != nil { - return errors.Wrap(err, "pull Nydus bootstrap layer") - } - defer bootstrapReader.Close() - - if err := utils.UnpackFile(bootstrapReader, utils.BootstrapFileNameInLayer, target); err != nil { - return errors.Wrap(err, "unpack Nydus bootstrap layer") - } - } - - return nil -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package checker + +import ( + "context" + "encoding/json" + "os" + "path/filepath" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/parser" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" +) + +func prettyDump(obj interface{}, name string) error { + bytes, err := json.MarshalIndent(obj, "", " ") + if err != nil { + return err + } + return os.WriteFile(name, bytes, 0644) +} + +// Output outputs OCI and Nydus image manifest, index, config to JSON file. +// Prefer to use source image to output OCI image information. +func (checker *Checker) Output( + ctx context.Context, sourceParsed, targetParsed *parser.Parsed, outputPath string, +) error { + logrus.Infof("Dumping OCI and Nydus manifests to %s", outputPath) + + if sourceParsed.Index != nil { + if err := prettyDump( + sourceParsed.Index, + filepath.Join(outputPath, "oci_index.json"), + ); err != nil { + return errors.Wrap(err, "output oci index file") + } + } + + if targetParsed.Index != nil { + if err := prettyDump( + targetParsed.Index, + filepath.Join(outputPath, "nydus_index.json"), + ); err != nil { + return errors.Wrap(err, "output nydus index file") + } + } + + if sourceParsed.OCIImage != nil { + if err := prettyDump( + sourceParsed.OCIImage.Manifest, + filepath.Join(outputPath, "oci_manifest.json"), + ); err != nil { + return errors.Wrap(err, "output OCI manifest file") + } + if err := prettyDump( + sourceParsed.OCIImage.Config, + filepath.Join(outputPath, "oci_config.json"), + ); err != nil { + return errors.Wrap(err, "output OCI config file") + } + } + + if targetParsed.NydusImage != nil { + if err := prettyDump( + targetParsed.NydusImage.Manifest, + filepath.Join(outputPath, "nydus_manifest.json"), + ); err != nil { + return errors.Wrap(err, "output Nydus manifest file") + } + if err := prettyDump( + targetParsed.NydusImage.Config, + filepath.Join(outputPath, "nydus_config.json"), + ); err != nil { + return errors.Wrap(err, "output Nydus config file") + } + + target := filepath.Join(outputPath, "nydus_bootstrap") + logrus.Infof("Pulling Nydus bootstrap to %s", target) + bootstrapReader, err := checker.targetParser.PullNydusBootstrap(ctx, targetParsed.NydusImage) + if err != nil { + return errors.Wrap(err, "pull Nydus bootstrap layer") + } + defer bootstrapReader.Close() + + if err := utils.UnpackFile(bootstrapReader, utils.BootstrapFileNameInLayer, target); err != nil { + return errors.Wrap(err, "unpack Nydus bootstrap layer") + } + } + + return nil +} diff --git a/contrib/nydusify/pkg/checker/rule/bootstrap.go b/contrib/nydusify/pkg/checker/rule/bootstrap.go index 5adbeab543c..46b960b9636 100644 --- a/contrib/nydusify/pkg/checker/rule/bootstrap.go +++ b/contrib/nydusify/pkg/checker/rule/bootstrap.go @@ -1,96 +1,96 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package rule - -import ( - "encoding/json" - "fmt" - "os" - - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/checker/tool" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/parser" -) - -// BootstrapRule validates bootstrap in Nydus image -type BootstrapRule struct { - Parsed *parser.Parsed - BootstrapPath string - NydusImagePath string - DebugOutputPath string - BackendType string -} - -type bootstrapDebug struct { - Blobs []string `json:"blobs"` -} - -func (rule *BootstrapRule) Name() string { - return "Bootstrap" -} - -func (rule *BootstrapRule) Validate() error { - logrus.Infof("Checking Nydus bootstrap") - - // Get blob list in the blob table of bootstrap by calling - // `nydus-image check` command - builder := tool.NewBuilder(rule.NydusImagePath) - if err := builder.Check(tool.BuilderOption{ - BootstrapPath: rule.BootstrapPath, - DebugOutputPath: rule.DebugOutputPath, - }); err != nil { - return errors.Wrap(err, "invalid nydus bootstrap format") - } - - // For registry garbage collection, nydus puts the blobs to - // the layers in manifest, so here only need to check blob - // list consistency for registry backend. - if rule.BackendType != "registry" { - return nil - } - - // Parse blob list from blob layers in Nydus manifest - blobListInLayer := map[string]bool{} - layers := rule.Parsed.NydusImage.Manifest.Layers - for i, layer := range layers { - if i != len(layers)-1 { - blobListInLayer[layer.Digest.Hex()] = true - } - } - - // Parse blob list from blob table of bootstrap - var bootstrap bootstrapDebug - bootstrapBytes, err := os.ReadFile(rule.DebugOutputPath) - if err != nil { - return errors.Wrap(err, "read bootstrap debug json") - } - if err := json.Unmarshal(bootstrapBytes, &bootstrap); err != nil { - return errors.Wrap(err, "unmarshal bootstrap output JSON") - } - blobListInBootstrap := map[string]bool{} - lostInLayer := false - for _, blobID := range bootstrap.Blobs { - blobListInBootstrap[blobID] = true - if !blobListInLayer[blobID] { - lostInLayer = true - } - } - - if !lostInLayer { - return nil - } - - // The blobs recorded in blob table of bootstrap should all appear - // in the layers. - return fmt.Errorf( - "nydus blobs in the blob table of bootstrap(%d) should all appear in the layers of manifest(%d), %v != %v", - len(blobListInBootstrap), - len(blobListInLayer), - blobListInBootstrap, - blobListInLayer, - ) -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package rule + +import ( + "encoding/json" + "fmt" + "os" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/checker/tool" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/parser" +) + +// BootstrapRule validates bootstrap in Nydus image +type BootstrapRule struct { + Parsed *parser.Parsed + BootstrapPath string + NydusImagePath string + DebugOutputPath string + BackendType string +} + +type bootstrapDebug struct { + Blobs []string `json:"blobs"` +} + +func (rule *BootstrapRule) Name() string { + return "Bootstrap" +} + +func (rule *BootstrapRule) Validate() error { + logrus.Infof("Checking Nydus bootstrap") + + // Get blob list in the blob table of bootstrap by calling + // `nydus-image check` command + builder := tool.NewBuilder(rule.NydusImagePath) + if err := builder.Check(tool.BuilderOption{ + BootstrapPath: rule.BootstrapPath, + DebugOutputPath: rule.DebugOutputPath, + }); err != nil { + return errors.Wrap(err, "invalid nydus bootstrap format") + } + + // For registry garbage collection, nydus puts the blobs to + // the layers in manifest, so here only need to check blob + // list consistency for registry backend. + if rule.BackendType != "registry" { + return nil + } + + // Parse blob list from blob layers in Nydus manifest + blobListInLayer := map[string]bool{} + layers := rule.Parsed.NydusImage.Manifest.Layers + for i, layer := range layers { + if i != len(layers)-1 { + blobListInLayer[layer.Digest.Hex()] = true + } + } + + // Parse blob list from blob table of bootstrap + var bootstrap bootstrapDebug + bootstrapBytes, err := os.ReadFile(rule.DebugOutputPath) + if err != nil { + return errors.Wrap(err, "read bootstrap debug json") + } + if err := json.Unmarshal(bootstrapBytes, &bootstrap); err != nil { + return errors.Wrap(err, "unmarshal bootstrap output JSON") + } + blobListInBootstrap := map[string]bool{} + lostInLayer := false + for _, blobID := range bootstrap.Blobs { + blobListInBootstrap[blobID] = true + if !blobListInLayer[blobID] { + lostInLayer = true + } + } + + if !lostInLayer { + return nil + } + + // The blobs recorded in blob table of bootstrap should all appear + // in the layers. + return fmt.Errorf( + "nydus blobs in the blob table of bootstrap(%d) should all appear in the layers of manifest(%d), %v != %v", + len(blobListInBootstrap), + len(blobListInLayer), + blobListInBootstrap, + blobListInLayer, + ) +} diff --git a/contrib/nydusify/pkg/checker/rule/filesystem.go b/contrib/nydusify/pkg/checker/rule/filesystem.go index 1bb881b7ce0..fa33cb3a6c8 100644 --- a/contrib/nydusify/pkg/checker/rule/filesystem.go +++ b/contrib/nydusify/pkg/checker/rule/filesystem.go @@ -1,372 +1,372 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package rule - -import ( - "context" - "encoding/base64" - "encoding/hex" - "encoding/json" - "fmt" - "os" - "path/filepath" - "reflect" - "syscall" - - "github.com/distribution/reference" - dockerconfig "github.com/docker/cli/cli/config" - - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/checker/tool" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/parser" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/remote" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" - "github.com/pkg/errors" - "github.com/pkg/xattr" - "github.com/sirupsen/logrus" -) - -// WorkerCount specifies source layer pull concurrency -var WorkerCount uint = 8 - -// FilesystemRule compares file metadata and data in the two mountpoints: -// Mounted by Nydusd for Nydus image, -// Mounted by Overlayfs for OCI image. -type FilesystemRule struct { - NydusdConfig tool.NydusdConfig - Source string - SourceMountPath string - SourceParsed *parser.Parsed - SourcePath string - SourceRemote *remote.Remote - Target string - TargetInsecure bool - PlainHTTP bool -} - -// Node records file metadata and file data hash. -type Node struct { - Path string - Size int64 - Mode os.FileMode - Rdev uint64 - Symlink string - UID uint32 - GID uint32 - Xattrs map[string][]byte - Hash []byte -} - -type RegistryBackendConfig struct { - Scheme string `json:"scheme"` - Host string `json:"host"` - Repo string `json:"repo"` - Auth string `json:"auth,omitempty"` - SkipVerify bool `json:"skip_verify,omitempty"` -} - -func (node *Node) String() string { - return fmt.Sprintf( - "Path: %s, Size: %d, Mode: %d, Rdev: %d, Symink: %s, UID: %d, GID: %d, "+ - "Xattrs: %v, Hash: %s", node.Path, node.Size, node.Mode, node.Rdev, node.Symlink, - node.UID, node.GID, node.Xattrs, hex.EncodeToString(node.Hash), - ) -} - -func (rule *FilesystemRule) Name() string { - return "Filesystem" -} - -func getXattrs(path string) (map[string][]byte, error) { - xattrs := make(map[string][]byte) - - names, err := xattr.LList(path) - if err != nil { - return nil, err - } - - for _, name := range names { - data, err := xattr.LGet(path, name) - if err != nil { - return nil, err - } - xattrs[name] = data - } - - return xattrs, nil -} - -func (rule *FilesystemRule) walk(rootfs string) (map[string]Node, error) { - nodes := map[string]Node{} - - if err := filepath.Walk(rootfs, func(path string, info os.FileInfo, err error) error { - if err != nil { - return errors.Wrapf(err, "Failed to stat file %s", path) - } - - rootfsPath, err := filepath.Rel(rootfs, path) - if err != nil { - return err - } - rootfsPath = filepath.Join("/", rootfsPath) - - var size int64 - if !info.IsDir() { - // Ignore directory size check - size = info.Size() - } - - mode := info.Mode() - var symlink string - if mode&os.ModeSymlink == os.ModeSymlink { - if symlink, err = os.Readlink(path); err != nil { - return errors.Wrapf(err, "read link %s", path) - } - } else { - symlink = rootfsPath - } - - var stat syscall.Stat_t - if err := syscall.Lstat(path, &stat); err != nil { - return errors.Wrapf(err, "lstat %s", path) - } - - xattrs, err := getXattrs(path) - if err != nil { - logrus.Warnf("Failed to get xattr: %s", err) - } - - // Calculate file data hash if the `backend-type` option be specified, - // this will cause that nydusd read data from backend, it's network load - var hash []byte - if rule.NydusdConfig.BackendType != "" && info.Mode().IsRegular() { - hash, err = utils.HashFile(path) - if err != nil { - return err - } - } - - node := Node{ - Path: rootfsPath, - Size: size, - Mode: mode, - Rdev: stat.Rdev, - Symlink: symlink, - UID: stat.Uid, - GID: stat.Gid, - Xattrs: xattrs, - Hash: hash, - } - nodes[rootfsPath] = node - - return nil - }); err != nil { - return nil, err - } - - return nodes, nil -} - -func (rule *FilesystemRule) pullSourceImage() (*tool.Image, error) { - layers := rule.SourceParsed.OCIImage.Manifest.Layers - worker := utils.NewWorkerPool(WorkerCount, uint(len(layers))) - - for idx := range layers { - worker.Put(func(idx int) func() error { - return func() error { - layer := layers[idx] - reader, err := rule.SourceRemote.Pull(context.Background(), layer, true) - if err != nil { - return errors.Wrap(err, "pull source image layers from the remote registry") - } - - if err = utils.UnpackTargz(context.Background(), filepath.Join(rule.SourcePath, fmt.Sprintf("layer-%d", idx)), reader, true); err != nil { - return errors.Wrap(err, "unpack source image layers") - } - - return nil - } - }(idx)) - } - - if err := <-worker.Waiter(); err != nil { - return nil, errors.Wrap(err, "pull source image layers in wait") - } - - return &tool.Image{ - Layers: layers, - Source: rule.Source, - SourcePath: rule.SourcePath, - Rootfs: rule.SourceMountPath, - }, nil -} - -func (rule *FilesystemRule) mountSourceImage() (*tool.Image, error) { - logrus.Infof("Mounting source image to %s", rule.SourceMountPath) - - image, err := rule.pullSourceImage() - if err != nil { - return nil, errors.Wrap(err, "pull source image") - } - - if err := image.Umount(); err != nil { - return nil, errors.Wrap(err, "umount previous rootfs") - } - - if err := image.Mount(); err != nil { - return nil, errors.Wrap(err, "mount source image") - } - - return image, nil -} - -func NewRegistryBackendConfig(parsed reference.Named) (RegistryBackendConfig, error) { - - backendConfig := RegistryBackendConfig{ - Scheme: "https", - Host: reference.Domain(parsed), - Repo: reference.Path(parsed), - } - - config := dockerconfig.LoadDefaultConfigFile(os.Stderr) - authConfig, err := config.GetAuthConfig(backendConfig.Host) - if err != nil { - return backendConfig, errors.Wrap(err, "get docker registry auth config") - } - var auth string - if authConfig.Username != "" && authConfig.Password != "" { - auth = base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf("%s:%s", authConfig.Username, authConfig.Password))) - } - backendConfig.Auth = auth - - return backendConfig, nil -} - -func (rule *FilesystemRule) mountNydusImage() (*tool.Nydusd, error) { - logrus.Infof("Mounting Nydus image to %s", rule.NydusdConfig.MountPath) - - if err := os.MkdirAll(rule.NydusdConfig.BlobCacheDir, 0755); err != nil { - return nil, errors.Wrap(err, "create blob cache directory for Nydusd") - } - - if err := os.MkdirAll(rule.NydusdConfig.MountPath, 0755); err != nil { - return nil, errors.Wrap(err, "create mountpoint directory of Nydus image") - } - - parsed, err := reference.ParseNormalizedNamed(rule.Target) - if err != nil { - return nil, err - } - - if rule.NydusdConfig.BackendType == "" { - rule.NydusdConfig.BackendType = "registry" - - if rule.NydusdConfig.BackendConfig == "" { - backendConfig, err := NewRegistryBackendConfig(parsed) - if err != nil { - return nil, errors.Wrap(err, "failed to parse backend configuration") - } - - if rule.TargetInsecure { - backendConfig.SkipVerify = true - } - - if rule.PlainHTTP { - backendConfig.Scheme = "http" - } - - bytes, err := json.Marshal(backendConfig) - if err != nil { - return nil, errors.Wrap(err, "parse registry backend config") - } - rule.NydusdConfig.BackendConfig = string(bytes) - } - } - - nydusd, err := tool.NewNydusd(rule.NydusdConfig) - if err != nil { - return nil, errors.Wrap(err, "create Nydusd daemon") - } - - if err := nydusd.Mount(); err != nil { - return nil, errors.Wrap(err, "mount Nydus image") - } - - return nydusd, nil -} - -func (rule *FilesystemRule) verify() error { - logrus.Infof("Verifying filesystem for source and Nydus image") - - sourceNodes := map[string]Node{} - - // Concurrently walk the rootfs directory of source and Nydus image - walkErr := make(chan error) - go func() { - var err error - sourceNodes, err = rule.walk(rule.SourceMountPath) - walkErr <- err - }() - - nydusNodes, err := rule.walk(rule.NydusdConfig.MountPath) - if err != nil { - return errors.Wrap(err, "walk rootfs of Nydus image") - } - - if err := <-walkErr; err != nil { - return errors.Wrap(err, "walk rootfs of source image") - } - - for path, sourceNode := range sourceNodes { - nydusNode, exist := nydusNodes[path] - if !exist { - return fmt.Errorf("File not found in Nydus image: %s", path) - } - delete(nydusNodes, path) - - if path != "/" && !reflect.DeepEqual(sourceNode, nydusNode) { - return fmt.Errorf("File not match in Nydus image: %s <=> %s", sourceNode.String(), nydusNode.String()) - } - } - - for path := range nydusNodes { - return fmt.Errorf("File not found in source image: %s", path) - } - - return nil -} - -func (rule *FilesystemRule) Validate() error { - // Skip filesystem validation if no source image be specified - if rule.Source == "" { - return nil - } - - // Cleanup temporary directories - defer func() { - if err := os.RemoveAll(rule.SourcePath); err != nil { - logrus.WithError(err).Warnf("cleanup source image directory %s", rule.SourcePath) - } - if err := os.RemoveAll(rule.NydusdConfig.MountPath); err != nil { - logrus.WithError(err).Warnf("cleanup nydus image directory %s", rule.NydusdConfig.MountPath) - } - if err := os.RemoveAll(rule.NydusdConfig.BlobCacheDir); err != nil { - logrus.WithError(err).Warnf("cleanup nydus blob cache directory %s", rule.NydusdConfig.BlobCacheDir) - } - }() - - image, err := rule.mountSourceImage() - if err != nil { - return err - } - defer image.Umount() - - nydusd, err := rule.mountNydusImage() - if err != nil { - return err - } - defer nydusd.Umount(false) - - return rule.verify() -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package rule + +import ( + "context" + "encoding/base64" + "encoding/hex" + "encoding/json" + "fmt" + "os" + "path/filepath" + "reflect" + "syscall" + + "github.com/distribution/reference" + dockerconfig "github.com/docker/cli/cli/config" + + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/checker/tool" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/parser" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/remote" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" + "github.com/pkg/errors" + "github.com/pkg/xattr" + "github.com/sirupsen/logrus" +) + +// WorkerCount specifies source layer pull concurrency +var WorkerCount uint = 8 + +// FilesystemRule compares file metadata and data in the two mountpoints: +// Mounted by Nydusd for Nydus image, +// Mounted by Overlayfs for OCI image. +type FilesystemRule struct { + NydusdConfig tool.NydusdConfig + Source string + SourceMountPath string + SourceParsed *parser.Parsed + SourcePath string + SourceRemote *remote.Remote + Target string + TargetInsecure bool + PlainHTTP bool +} + +// Node records file metadata and file data hash. +type Node struct { + Path string + Size int64 + Mode os.FileMode + Rdev uint64 + Symlink string + UID uint32 + GID uint32 + Xattrs map[string][]byte + Hash []byte +} + +type RegistryBackendConfig struct { + Scheme string `json:"scheme"` + Host string `json:"host"` + Repo string `json:"repo"` + Auth string `json:"auth,omitempty"` + SkipVerify bool `json:"skip_verify,omitempty"` +} + +func (node *Node) String() string { + return fmt.Sprintf( + "Path: %s, Size: %d, Mode: %d, Rdev: %d, Symink: %s, UID: %d, GID: %d, "+ + "Xattrs: %v, Hash: %s", node.Path, node.Size, node.Mode, node.Rdev, node.Symlink, + node.UID, node.GID, node.Xattrs, hex.EncodeToString(node.Hash), + ) +} + +func (rule *FilesystemRule) Name() string { + return "Filesystem" +} + +func getXattrs(path string) (map[string][]byte, error) { + xattrs := make(map[string][]byte) + + names, err := xattr.LList(path) + if err != nil { + return nil, err + } + + for _, name := range names { + data, err := xattr.LGet(path, name) + if err != nil { + return nil, err + } + xattrs[name] = data + } + + return xattrs, nil +} + +func (rule *FilesystemRule) walk(rootfs string) (map[string]Node, error) { + nodes := map[string]Node{} + + if err := filepath.Walk(rootfs, func(path string, info os.FileInfo, err error) error { + if err != nil { + return errors.Wrapf(err, "Failed to stat file %s", path) + } + + rootfsPath, err := filepath.Rel(rootfs, path) + if err != nil { + return err + } + rootfsPath = filepath.Join("/", rootfsPath) + + var size int64 + if !info.IsDir() { + // Ignore directory size check + size = info.Size() + } + + mode := info.Mode() + var symlink string + if mode&os.ModeSymlink == os.ModeSymlink { + if symlink, err = os.Readlink(path); err != nil { + return errors.Wrapf(err, "read link %s", path) + } + } else { + symlink = rootfsPath + } + + var stat syscall.Stat_t + if err := syscall.Lstat(path, &stat); err != nil { + return errors.Wrapf(err, "lstat %s", path) + } + + xattrs, err := getXattrs(path) + if err != nil { + logrus.Warnf("Failed to get xattr: %s", err) + } + + // Calculate file data hash if the `backend-type` option be specified, + // this will cause that nydusd read data from backend, it's network load + var hash []byte + if rule.NydusdConfig.BackendType != "" && info.Mode().IsRegular() { + hash, err = utils.HashFile(path) + if err != nil { + return err + } + } + + node := Node{ + Path: rootfsPath, + Size: size, + Mode: mode, + Rdev: stat.Rdev, + Symlink: symlink, + UID: stat.Uid, + GID: stat.Gid, + Xattrs: xattrs, + Hash: hash, + } + nodes[rootfsPath] = node + + return nil + }); err != nil { + return nil, err + } + + return nodes, nil +} + +func (rule *FilesystemRule) pullSourceImage() (*tool.Image, error) { + layers := rule.SourceParsed.OCIImage.Manifest.Layers + worker := utils.NewWorkerPool(WorkerCount, uint(len(layers))) + + for idx := range layers { + worker.Put(func(idx int) func() error { + return func() error { + layer := layers[idx] + reader, err := rule.SourceRemote.Pull(context.Background(), layer, true) + if err != nil { + return errors.Wrap(err, "pull source image layers from the remote registry") + } + + if err = utils.UnpackTargz(context.Background(), filepath.Join(rule.SourcePath, fmt.Sprintf("layer-%d", idx)), reader, true); err != nil { + return errors.Wrap(err, "unpack source image layers") + } + + return nil + } + }(idx)) + } + + if err := <-worker.Waiter(); err != nil { + return nil, errors.Wrap(err, "pull source image layers in wait") + } + + return &tool.Image{ + Layers: layers, + Source: rule.Source, + SourcePath: rule.SourcePath, + Rootfs: rule.SourceMountPath, + }, nil +} + +func (rule *FilesystemRule) mountSourceImage() (*tool.Image, error) { + logrus.Infof("Mounting source image to %s", rule.SourceMountPath) + + image, err := rule.pullSourceImage() + if err != nil { + return nil, errors.Wrap(err, "pull source image") + } + + if err := image.Umount(); err != nil { + return nil, errors.Wrap(err, "umount previous rootfs") + } + + if err := image.Mount(); err != nil { + return nil, errors.Wrap(err, "mount source image") + } + + return image, nil +} + +func NewRegistryBackendConfig(parsed reference.Named) (RegistryBackendConfig, error) { + + backendConfig := RegistryBackendConfig{ + Scheme: "https", + Host: reference.Domain(parsed), + Repo: reference.Path(parsed), + } + + config := dockerconfig.LoadDefaultConfigFile(os.Stderr) + authConfig, err := config.GetAuthConfig(backendConfig.Host) + if err != nil { + return backendConfig, errors.Wrap(err, "get docker registry auth config") + } + var auth string + if authConfig.Username != "" && authConfig.Password != "" { + auth = base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf("%s:%s", authConfig.Username, authConfig.Password))) + } + backendConfig.Auth = auth + + return backendConfig, nil +} + +func (rule *FilesystemRule) mountNydusImage() (*tool.Nydusd, error) { + logrus.Infof("Mounting Nydus image to %s", rule.NydusdConfig.MountPath) + + if err := os.MkdirAll(rule.NydusdConfig.BlobCacheDir, 0755); err != nil { + return nil, errors.Wrap(err, "create blob cache directory for Nydusd") + } + + if err := os.MkdirAll(rule.NydusdConfig.MountPath, 0755); err != nil { + return nil, errors.Wrap(err, "create mountpoint directory of Nydus image") + } + + parsed, err := reference.ParseNormalizedNamed(rule.Target) + if err != nil { + return nil, err + } + + if rule.NydusdConfig.BackendType == "" { + rule.NydusdConfig.BackendType = "registry" + + if rule.NydusdConfig.BackendConfig == "" { + backendConfig, err := NewRegistryBackendConfig(parsed) + if err != nil { + return nil, errors.Wrap(err, "failed to parse backend configuration") + } + + if rule.TargetInsecure { + backendConfig.SkipVerify = true + } + + if rule.PlainHTTP { + backendConfig.Scheme = "http" + } + + bytes, err := json.Marshal(backendConfig) + if err != nil { + return nil, errors.Wrap(err, "parse registry backend config") + } + rule.NydusdConfig.BackendConfig = string(bytes) + } + } + + nydusd, err := tool.NewNydusd(rule.NydusdConfig) + if err != nil { + return nil, errors.Wrap(err, "create Nydusd daemon") + } + + if err := nydusd.Mount(); err != nil { + return nil, errors.Wrap(err, "mount Nydus image") + } + + return nydusd, nil +} + +func (rule *FilesystemRule) verify() error { + logrus.Infof("Verifying filesystem for source and Nydus image") + + sourceNodes := map[string]Node{} + + // Concurrently walk the rootfs directory of source and Nydus image + walkErr := make(chan error) + go func() { + var err error + sourceNodes, err = rule.walk(rule.SourceMountPath) + walkErr <- err + }() + + nydusNodes, err := rule.walk(rule.NydusdConfig.MountPath) + if err != nil { + return errors.Wrap(err, "walk rootfs of Nydus image") + } + + if err := <-walkErr; err != nil { + return errors.Wrap(err, "walk rootfs of source image") + } + + for path, sourceNode := range sourceNodes { + nydusNode, exist := nydusNodes[path] + if !exist { + return fmt.Errorf("File not found in Nydus image: %s", path) + } + delete(nydusNodes, path) + + if path != "/" && !reflect.DeepEqual(sourceNode, nydusNode) { + return fmt.Errorf("File not match in Nydus image: %s <=> %s", sourceNode.String(), nydusNode.String()) + } + } + + for path := range nydusNodes { + return fmt.Errorf("File not found in source image: %s", path) + } + + return nil +} + +func (rule *FilesystemRule) Validate() error { + // Skip filesystem validation if no source image be specified + if rule.Source == "" { + return nil + } + + // Cleanup temporary directories + defer func() { + if err := os.RemoveAll(rule.SourcePath); err != nil { + logrus.WithError(err).Warnf("cleanup source image directory %s", rule.SourcePath) + } + if err := os.RemoveAll(rule.NydusdConfig.MountPath); err != nil { + logrus.WithError(err).Warnf("cleanup nydus image directory %s", rule.NydusdConfig.MountPath) + } + if err := os.RemoveAll(rule.NydusdConfig.BlobCacheDir); err != nil { + logrus.WithError(err).Warnf("cleanup nydus blob cache directory %s", rule.NydusdConfig.BlobCacheDir) + } + }() + + image, err := rule.mountSourceImage() + if err != nil { + return err + } + defer image.Umount() + + nydusd, err := rule.mountNydusImage() + if err != nil { + return err + } + defer nydusd.Umount(false) + + return rule.verify() +} diff --git a/contrib/nydusify/pkg/checker/rule/manifest.go b/contrib/nydusify/pkg/checker/rule/manifest.go index cb3c96f2763..cd2e4be7dfd 100644 --- a/contrib/nydusify/pkg/checker/rule/manifest.go +++ b/contrib/nydusify/pkg/checker/rule/manifest.go @@ -1,111 +1,111 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package rule - -import ( - "encoding/json" - "reflect" - - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/parser" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" -) - -// ManifestRule validates manifest format of Nydus image -type ManifestRule struct { - SourceParsed *parser.Parsed - TargetParsed *parser.Parsed - MultiPlatform bool - BackendType string - ExpectedArch string -} - -func (rule *ManifestRule) Name() string { - return "Manifest" -} - -func (rule *ManifestRule) Validate() error { - logrus.Infof("Checking Nydus manifest") - - // Ensure the target image represents a manifest list, - // and it should consist of OCI and Nydus manifest - if rule.MultiPlatform { - if rule.TargetParsed.Index == nil { - return errors.New("not found image manifest list") - } - foundNydusDesc := false - foundOCIDesc := false - for _, desc := range rule.TargetParsed.Index.Manifests { - if desc.Platform == nil { - continue - } - if desc.Platform.Architecture == rule.ExpectedArch && desc.Platform.OS == "linux" { - if utils.IsNydusPlatform(desc.Platform) { - foundNydusDesc = true - } else { - foundOCIDesc = true - } - } - } - if !foundNydusDesc { - return errors.Errorf("not found nydus image of specified platform linux/%s", rule.ExpectedArch) - } - if !foundOCIDesc { - return errors.Errorf("not found OCI image of specified platform linux/%s", rule.ExpectedArch) - } - } - - // Check manifest of Nydus - if rule.TargetParsed.NydusImage == nil { - return errors.New("invalid nydus image manifest") - } - - layers := rule.TargetParsed.NydusImage.Manifest.Layers - for i, layer := range layers { - if i == len(layers)-1 { - if layer.Annotations[utils.LayerAnnotationNydusBootstrap] != "true" { - return errors.New("invalid bootstrap layer in nydus image manifest") - } - } else { - if layer.MediaType != utils.MediaTypeNydusBlob || - layer.Annotations[utils.LayerAnnotationNydusBlob] != "true" { - return errors.New("invalid blob layer in nydus image manifest") - } - } - } - - // Check Nydus image config with OCI image - if rule.SourceParsed.OCIImage != nil { - - //nolint:staticcheck - // ignore static check SA1019 here. We have to assign deprecated field. - // - // Skip ArgsEscaped's Check - // - // This field is present only for legacy compatibility with Docker and - // should not be used by new image builders. Nydusify (1.6 and above) - // ignores it, which is an expected behavior. - // Also ignore it in check. - // - // Addition: [ArgsEscaped in spec](https://github.com/opencontainers/image-spec/pull/892) - rule.TargetParsed.NydusImage.Config.Config.ArgsEscaped = rule.SourceParsed.OCIImage.Config.Config.ArgsEscaped - - ociConfig, err := json.Marshal(rule.SourceParsed.OCIImage.Config.Config) - if err != nil { - return errors.New("marshal oci image config") - } - nydusConfig, err := json.Marshal(rule.TargetParsed.NydusImage.Config.Config) - if err != nil { - return errors.New("marshal nydus image config") - } - if !reflect.DeepEqual(ociConfig, nydusConfig) { - return errors.New("nydus image config should be equal with oci image config") - } - } - - return nil -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package rule + +import ( + "encoding/json" + "reflect" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/parser" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" +) + +// ManifestRule validates manifest format of Nydus image +type ManifestRule struct { + SourceParsed *parser.Parsed + TargetParsed *parser.Parsed + MultiPlatform bool + BackendType string + ExpectedArch string +} + +func (rule *ManifestRule) Name() string { + return "Manifest" +} + +func (rule *ManifestRule) Validate() error { + logrus.Infof("Checking Nydus manifest") + + // Ensure the target image represents a manifest list, + // and it should consist of OCI and Nydus manifest + if rule.MultiPlatform { + if rule.TargetParsed.Index == nil { + return errors.New("not found image manifest list") + } + foundNydusDesc := false + foundOCIDesc := false + for _, desc := range rule.TargetParsed.Index.Manifests { + if desc.Platform == nil { + continue + } + if desc.Platform.Architecture == rule.ExpectedArch && desc.Platform.OS == "linux" { + if utils.IsNydusPlatform(desc.Platform) { + foundNydusDesc = true + } else { + foundOCIDesc = true + } + } + } + if !foundNydusDesc { + return errors.Errorf("not found nydus image of specified platform linux/%s", rule.ExpectedArch) + } + if !foundOCIDesc { + return errors.Errorf("not found OCI image of specified platform linux/%s", rule.ExpectedArch) + } + } + + // Check manifest of Nydus + if rule.TargetParsed.NydusImage == nil { + return errors.New("invalid nydus image manifest") + } + + layers := rule.TargetParsed.NydusImage.Manifest.Layers + for i, layer := range layers { + if i == len(layers)-1 { + if layer.Annotations[utils.LayerAnnotationNydusBootstrap] != "true" { + return errors.New("invalid bootstrap layer in nydus image manifest") + } + } else { + if layer.MediaType != utils.MediaTypeNydusBlob || + layer.Annotations[utils.LayerAnnotationNydusBlob] != "true" { + return errors.New("invalid blob layer in nydus image manifest") + } + } + } + + // Check Nydus image config with OCI image + if rule.SourceParsed.OCIImage != nil { + + //nolint:staticcheck + // ignore static check SA1019 here. We have to assign deprecated field. + // + // Skip ArgsEscaped's Check + // + // This field is present only for legacy compatibility with Docker and + // should not be used by new image builders. Nydusify (1.6 and above) + // ignores it, which is an expected behavior. + // Also ignore it in check. + // + // Addition: [ArgsEscaped in spec](https://github.com/opencontainers/image-spec/pull/892) + rule.TargetParsed.NydusImage.Config.Config.ArgsEscaped = rule.SourceParsed.OCIImage.Config.Config.ArgsEscaped + + ociConfig, err := json.Marshal(rule.SourceParsed.OCIImage.Config.Config) + if err != nil { + return errors.New("marshal oci image config") + } + nydusConfig, err := json.Marshal(rule.TargetParsed.NydusImage.Config.Config) + if err != nil { + return errors.New("marshal nydus image config") + } + if !reflect.DeepEqual(ociConfig, nydusConfig) { + return errors.New("nydus image config should be equal with oci image config") + } + } + + return nil +} diff --git a/contrib/nydusify/pkg/checker/rule/manifest_test.go b/contrib/nydusify/pkg/checker/rule/manifest_test.go index a6f3e3c3a3a..4c336b74dc3 100644 --- a/contrib/nydusify/pkg/checker/rule/manifest_test.go +++ b/contrib/nydusify/pkg/checker/rule/manifest_test.go @@ -1,170 +1,170 @@ -// Copyright 2023 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package rule - -import ( - "testing" - - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/parser" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" - "github.com/stretchr/testify/require" - - ocispec "github.com/opencontainers/image-spec/specs-go/v1" -) - -func TestManifestName(t *testing.T) { - rule := ManifestRule{} - require.Equal(t, "Manifest", rule.Name()) -} - -func TestManifestRuleValidate_IgnoreDeprecatedField(t *testing.T) { - source := &parser.Parsed{ - OCIImage: &parser.Image{ - Config: ocispec.Image{ - Config: ocispec.ImageConfig{ - ArgsEscaped: true, // deprecated field - }, - }, - }, - } - target := &parser.Parsed{ - NydusImage: &parser.Image{ - Config: ocispec.Image{ - Config: ocispec.ImageConfig{ - ArgsEscaped: false, - }, - }, - }, - } - - rule := ManifestRule{ - SourceParsed: source, - TargetParsed: target, - } - - require.Nil(t, rule.Validate()) -} - -func TestManifestRuleValidate_MultiPlatform(t *testing.T) { - source := &parser.Parsed{ - OCIImage: &parser.Image{}, - } - target := &parser.Parsed{ - NydusImage: &parser.Image{}, - } - - rule := ManifestRule{ - MultiPlatform: true, - ExpectedArch: "amd64", - SourceParsed: source, - TargetParsed: target, - } - require.Error(t, rule.Validate()) - require.Contains(t, rule.Validate().Error(), "not found image manifest list") - - rule.TargetParsed.Index = &ocispec.Index{} - require.Error(t, rule.Validate()) - require.Contains(t, rule.Validate().Error(), "not found nydus image of specified platform linux") - - rule.TargetParsed.Index = &ocispec.Index{ - Manifests: []ocispec.Descriptor{ - { - MediaType: utils.MediaTypeNydusBlob, - Platform: &ocispec.Platform{ - Architecture: "amd64", - OS: "linux", - OSFeatures: []string{utils.ManifestOSFeatureNydus}, - }, - }, - }, - } - require.Error(t, rule.Validate()) - require.Contains(t, rule.Validate().Error(), "not found OCI image of specified platform linux") - - rule.TargetParsed.Index.Manifests = append(rule.TargetParsed.Index.Manifests, ocispec.Descriptor{ - MediaType: "application/vnd.oci.image.manifest.v1+json", - Platform: &ocispec.Platform{ - Architecture: "amd64", - OS: "linux", - }, - }) - require.NoError(t, rule.Validate()) -} - -func TestManifestRuleValidate_TargetLayer(t *testing.T) { - rule := ManifestRule{ - SourceParsed: &parser.Parsed{}, - TargetParsed: &parser.Parsed{}, - } - require.Error(t, rule.Validate()) - require.Contains(t, rule.Validate().Error(), "invalid nydus image manifest") - - rule.TargetParsed = &parser.Parsed{ - NydusImage: &parser.Image{ - Manifest: ocispec.Manifest{ - MediaType: "application/vnd.docker.distribution.manifest.v2+json", - Config: ocispec.Descriptor{ - MediaType: "application/vnd.oci.image.config.v1+json", - Digest: "sha256:563fad1f51cec2ee4c972af4bfd7275914061e2f73770585cfb04309cb5e0d6b", - Size: 523, - }, - Layers: []ocispec.Descriptor{ - { - MediaType: "application / vnd.oci.image.layer.v1.tar", - Digest: "sha256:09845cce1d983b158d4865fc37c23bbfb892d4775c786e8114d3cf868975c059", - Size: 83528010, - Annotations: map[string]string{ - "containerd.io/snapshot/nydus-blob": "true", - }, - }, - { - MediaType: "application/vnd.oci.image.layer.nydus.blob.v1", - Digest: "sha256:09845cce1d983b158d4865fc37c23bbfb892d4775c786e8114d3cf868975c059", - Size: 83528010, - Annotations: map[string]string{ - "containerd.io/snapshot/nydus-blob": "true", - }, - }, - }, - }, - }, - } - require.Error(t, rule.Validate()) - require.Contains(t, rule.Validate().Error(), "invalid blob layer in nydus image manifest") - - rule.TargetParsed.NydusImage.Manifest.Layers = []ocispec.Descriptor{ - { - MediaType: "application/vnd.oci.image.layer.nydus.blob.v1", - Digest: "sha256:09845cce1d983b158d4865fc37c23bbfb892d4775c786e8114d3cf868975c059", - Size: 83528010, - Annotations: map[string]string{ - "containerd.io/snapshot/nydus-blob": "true", - }, - }, - } - require.Error(t, rule.Validate()) - require.Contains(t, rule.Validate().Error(), "invalid bootstrap layer in nydus image manifest") - - rule.TargetParsed.NydusImage.Manifest.Layers = []ocispec.Descriptor{ - { - MediaType: "application/vnd.oci.image.layer.nydus.blob.v1", - Digest: "sha256:09845cce1d983b158d4865fc37c23bbfb892d4775c786e8114d3cf868975c059", - Size: 83528010, - Annotations: map[string]string{ - "containerd.io/snapshot/nydus-blob": "true", - }, - }, - { - MediaType: "application/vnd.oci.image.layer.v1.tar+gzip", - Digest: "sha256:aec98c9e3dce739877b8f5fe1cddd339de1db2b36c20995d76f6265056dbdb08", - Size: 273320, - Annotations: map[string]string{ - "containerd.io/snapshot/nydus-bootstrap": "true", - "containerd.io/snapshot/nydus-reference-blob-ids": "[\"09845cce1d983b158d4865fc37c23bbfb892d4775c786e8114d3cf868975c059\"]", - }, - }, - } - require.NoError(t, rule.Validate()) -} +// Copyright 2023 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package rule + +import ( + "testing" + + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/parser" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" + "github.com/stretchr/testify/require" + + ocispec "github.com/opencontainers/image-spec/specs-go/v1" +) + +func TestManifestName(t *testing.T) { + rule := ManifestRule{} + require.Equal(t, "Manifest", rule.Name()) +} + +func TestManifestRuleValidate_IgnoreDeprecatedField(t *testing.T) { + source := &parser.Parsed{ + OCIImage: &parser.Image{ + Config: ocispec.Image{ + Config: ocispec.ImageConfig{ + ArgsEscaped: true, // deprecated field + }, + }, + }, + } + target := &parser.Parsed{ + NydusImage: &parser.Image{ + Config: ocispec.Image{ + Config: ocispec.ImageConfig{ + ArgsEscaped: false, + }, + }, + }, + } + + rule := ManifestRule{ + SourceParsed: source, + TargetParsed: target, + } + + require.Nil(t, rule.Validate()) +} + +func TestManifestRuleValidate_MultiPlatform(t *testing.T) { + source := &parser.Parsed{ + OCIImage: &parser.Image{}, + } + target := &parser.Parsed{ + NydusImage: &parser.Image{}, + } + + rule := ManifestRule{ + MultiPlatform: true, + ExpectedArch: "amd64", + SourceParsed: source, + TargetParsed: target, + } + require.Error(t, rule.Validate()) + require.Contains(t, rule.Validate().Error(), "not found image manifest list") + + rule.TargetParsed.Index = &ocispec.Index{} + require.Error(t, rule.Validate()) + require.Contains(t, rule.Validate().Error(), "not found nydus image of specified platform linux") + + rule.TargetParsed.Index = &ocispec.Index{ + Manifests: []ocispec.Descriptor{ + { + MediaType: utils.MediaTypeNydusBlob, + Platform: &ocispec.Platform{ + Architecture: "amd64", + OS: "linux", + OSFeatures: []string{utils.ManifestOSFeatureNydus}, + }, + }, + }, + } + require.Error(t, rule.Validate()) + require.Contains(t, rule.Validate().Error(), "not found OCI image of specified platform linux") + + rule.TargetParsed.Index.Manifests = append(rule.TargetParsed.Index.Manifests, ocispec.Descriptor{ + MediaType: "application/vnd.oci.image.manifest.v1+json", + Platform: &ocispec.Platform{ + Architecture: "amd64", + OS: "linux", + }, + }) + require.NoError(t, rule.Validate()) +} + +func TestManifestRuleValidate_TargetLayer(t *testing.T) { + rule := ManifestRule{ + SourceParsed: &parser.Parsed{}, + TargetParsed: &parser.Parsed{}, + } + require.Error(t, rule.Validate()) + require.Contains(t, rule.Validate().Error(), "invalid nydus image manifest") + + rule.TargetParsed = &parser.Parsed{ + NydusImage: &parser.Image{ + Manifest: ocispec.Manifest{ + MediaType: "application/vnd.docker.distribution.manifest.v2+json", + Config: ocispec.Descriptor{ + MediaType: "application/vnd.oci.image.config.v1+json", + Digest: "sha256:563fad1f51cec2ee4c972af4bfd7275914061e2f73770585cfb04309cb5e0d6b", + Size: 523, + }, + Layers: []ocispec.Descriptor{ + { + MediaType: "application / vnd.oci.image.layer.v1.tar", + Digest: "sha256:09845cce1d983b158d4865fc37c23bbfb892d4775c786e8114d3cf868975c059", + Size: 83528010, + Annotations: map[string]string{ + "containerd.io/snapshot/nydus-blob": "true", + }, + }, + { + MediaType: "application/vnd.oci.image.layer.nydus.blob.v1", + Digest: "sha256:09845cce1d983b158d4865fc37c23bbfb892d4775c786e8114d3cf868975c059", + Size: 83528010, + Annotations: map[string]string{ + "containerd.io/snapshot/nydus-blob": "true", + }, + }, + }, + }, + }, + } + require.Error(t, rule.Validate()) + require.Contains(t, rule.Validate().Error(), "invalid blob layer in nydus image manifest") + + rule.TargetParsed.NydusImage.Manifest.Layers = []ocispec.Descriptor{ + { + MediaType: "application/vnd.oci.image.layer.nydus.blob.v1", + Digest: "sha256:09845cce1d983b158d4865fc37c23bbfb892d4775c786e8114d3cf868975c059", + Size: 83528010, + Annotations: map[string]string{ + "containerd.io/snapshot/nydus-blob": "true", + }, + }, + } + require.Error(t, rule.Validate()) + require.Contains(t, rule.Validate().Error(), "invalid bootstrap layer in nydus image manifest") + + rule.TargetParsed.NydusImage.Manifest.Layers = []ocispec.Descriptor{ + { + MediaType: "application/vnd.oci.image.layer.nydus.blob.v1", + Digest: "sha256:09845cce1d983b158d4865fc37c23bbfb892d4775c786e8114d3cf868975c059", + Size: 83528010, + Annotations: map[string]string{ + "containerd.io/snapshot/nydus-blob": "true", + }, + }, + { + MediaType: "application/vnd.oci.image.layer.v1.tar+gzip", + Digest: "sha256:aec98c9e3dce739877b8f5fe1cddd339de1db2b36c20995d76f6265056dbdb08", + Size: 273320, + Annotations: map[string]string{ + "containerd.io/snapshot/nydus-bootstrap": "true", + "containerd.io/snapshot/nydus-reference-blob-ids": "[\"09845cce1d983b158d4865fc37c23bbfb892d4775c786e8114d3cf868975c059\"]", + }, + }, + } + require.NoError(t, rule.Validate()) +} diff --git a/contrib/nydusify/pkg/checker/rule/rule.go b/contrib/nydusify/pkg/checker/rule/rule.go index 1a49822e232..11dfca6deab 100644 --- a/contrib/nydusify/pkg/checker/rule/rule.go +++ b/contrib/nydusify/pkg/checker/rule/rule.go @@ -1,10 +1,10 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package rule - -type Rule interface { - Validate() error - Name() string -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package rule + +type Rule interface { + Validate() error + Name() string +} diff --git a/contrib/nydusify/pkg/checker/tool/builder.go b/contrib/nydusify/pkg/checker/tool/builder.go index 50dca87d1cc..5ebb8362fdc 100644 --- a/contrib/nydusify/pkg/checker/tool/builder.go +++ b/contrib/nydusify/pkg/checker/tool/builder.go @@ -1,50 +1,50 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package tool - -import ( - "io" - "os" - "os/exec" -) - -type BuilderOption struct { - BootstrapPath string - DebugOutputPath string -} - -type Builder struct { - binaryPath string - stdout io.Writer - stderr io.Writer -} - -func NewBuilder(binaryPath string) *Builder { - return &Builder{ - binaryPath: binaryPath, - stdout: os.Stdout, - stderr: os.Stderr, - } -} - -// Check calls `nydus-image check` to parse Nydus bootstrap -// and output debug information to specified JSON file. -func (builder *Builder) Check(option BuilderOption) error { - args := []string{ - "check", - "--log-level", - "warn", - "--output-json", - option.DebugOutputPath, - "--bootstrap", - option.BootstrapPath, - } - - cmd := exec.Command(builder.binaryPath, args...) - cmd.Stdout = builder.stdout - cmd.Stderr = builder.stderr - - return cmd.Run() -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package tool + +import ( + "io" + "os" + "os/exec" +) + +type BuilderOption struct { + BootstrapPath string + DebugOutputPath string +} + +type Builder struct { + binaryPath string + stdout io.Writer + stderr io.Writer +} + +func NewBuilder(binaryPath string) *Builder { + return &Builder{ + binaryPath: binaryPath, + stdout: os.Stdout, + stderr: os.Stderr, + } +} + +// Check calls `nydus-image check` to parse Nydus bootstrap +// and output debug information to specified JSON file. +func (builder *Builder) Check(option BuilderOption) error { + args := []string{ + "check", + "--log-level", + "warn", + "--output-json", + option.DebugOutputPath, + "--bootstrap", + option.BootstrapPath, + } + + cmd := exec.Command(builder.binaryPath, args...) + cmd.Stdout = builder.stdout + cmd.Stderr = builder.stderr + + return cmd.Run() +} diff --git a/contrib/nydusify/pkg/checker/tool/image.go b/contrib/nydusify/pkg/checker/tool/image.go index af9d7099cf6..eaa7baf81cf 100644 --- a/contrib/nydusify/pkg/checker/tool/image.go +++ b/contrib/nydusify/pkg/checker/tool/image.go @@ -1,95 +1,95 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package tool - -import ( - "fmt" - "os" - "path/filepath" - "strings" - - "github.com/containerd/containerd/mount" - ocispec "github.com/opencontainers/image-spec/specs-go/v1" - "github.com/pkg/errors" -) - -func mkMounts(dirs []string) []mount.Mount { - var options []string - - if len(dirs) == 0 { - return nil - } - - if len(dirs) == 1 { - return []mount.Mount{ - { - Source: dirs[0], - Type: "bind", - Options: []string{ - "ro", - "rbind", - }, - }, - } - } - - options = append(options, fmt.Sprintf("lowerdir=%s", strings.Join(dirs, ":"))) - return []mount.Mount{ - { - Type: "overlay", - Source: "overlay", - Options: options, - }, - } -} - -type Image struct { - Layers []ocispec.Descriptor - Source string - SourcePath string - Rootfs string -} - -// Mount mounts rootfs of OCI image. -func (image *Image) Mount() error { - if err := os.MkdirAll(image.Rootfs, 0750); err != nil { - return errors.Wrap(err, "create rootfs dir") - } - - var dirs []string - count := len(image.Layers) - for idx := range image.Layers { - layerName := fmt.Sprintf("layer-%d", count-idx-1) - layerDir := filepath.Join(image.SourcePath, layerName) - dirs = append(dirs, strings.ReplaceAll(layerDir, ":", "\\:")) - } - - mounts := mkMounts(dirs) - if err := mount.All(mounts, image.Rootfs); err != nil { - return errors.Wrap(err, "mount source layer") - } - - return nil -} - -// Umount umounts rootfs mountpoint of OCI image. -func (image *Image) Umount() error { - if _, err := os.Stat(image.Rootfs); err != nil { - if os.IsNotExist(err) { - return nil - } - return errors.Wrap(err, "stat rootfs") - } - - if err := mount.Unmount(image.Rootfs, 0); err != nil { - return errors.Wrap(err, "umount rootfs") - } - - if err := os.RemoveAll(image.Rootfs); err != nil { - return errors.Wrap(err, "remove rootfs") - } - - return nil -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package tool + +import ( + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/containerd/containerd/mount" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/pkg/errors" +) + +func mkMounts(dirs []string) []mount.Mount { + var options []string + + if len(dirs) == 0 { + return nil + } + + if len(dirs) == 1 { + return []mount.Mount{ + { + Source: dirs[0], + Type: "bind", + Options: []string{ + "ro", + "rbind", + }, + }, + } + } + + options = append(options, fmt.Sprintf("lowerdir=%s", strings.Join(dirs, ":"))) + return []mount.Mount{ + { + Type: "overlay", + Source: "overlay", + Options: options, + }, + } +} + +type Image struct { + Layers []ocispec.Descriptor + Source string + SourcePath string + Rootfs string +} + +// Mount mounts rootfs of OCI image. +func (image *Image) Mount() error { + if err := os.MkdirAll(image.Rootfs, 0750); err != nil { + return errors.Wrap(err, "create rootfs dir") + } + + var dirs []string + count := len(image.Layers) + for idx := range image.Layers { + layerName := fmt.Sprintf("layer-%d", count-idx-1) + layerDir := filepath.Join(image.SourcePath, layerName) + dirs = append(dirs, strings.ReplaceAll(layerDir, ":", "\\:")) + } + + mounts := mkMounts(dirs) + if err := mount.All(mounts, image.Rootfs); err != nil { + return errors.Wrap(err, "mount source layer") + } + + return nil +} + +// Umount umounts rootfs mountpoint of OCI image. +func (image *Image) Umount() error { + if _, err := os.Stat(image.Rootfs); err != nil { + if os.IsNotExist(err) { + return nil + } + return errors.Wrap(err, "stat rootfs") + } + + if err := mount.Unmount(image.Rootfs, 0); err != nil { + return errors.Wrap(err, "umount rootfs") + } + + if err := os.RemoveAll(image.Rootfs); err != nil { + return errors.Wrap(err, "remove rootfs") + } + + return nil +} diff --git a/contrib/nydusify/pkg/checker/tool/inspector.go b/contrib/nydusify/pkg/checker/tool/inspector.go index 35cde635526..27537cdc21b 100644 --- a/contrib/nydusify/pkg/checker/tool/inspector.go +++ b/contrib/nydusify/pkg/checker/tool/inspector.go @@ -1,76 +1,76 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package tool - -import ( - "encoding/json" - "fmt" - "os/exec" - - "github.com/pkg/errors" -) - -const ( - GetBlobs = iota -) - -type InspectOption struct { - Operation int - Bootstrap string -} - -type BlobInfo struct { - BlobID string `json:"blob_id"` - CompressedSize uint64 `json:"compressed_size"` - DecompressedSize uint64 `json:"decompressed_size"` - ReadaheadOffset uint32 `json:"readahead_offset"` - ReadaheadSize uint32 `json:"readahead_size"` -} - -func (info *BlobInfo) String() string { - jsonBytes, _ := json.Marshal(info) - return string(jsonBytes) -} - -type BlobInfoList []BlobInfo - -func (infos BlobInfoList) String() string { - jsonBytes, _ := json.Marshal(&infos) - return string(jsonBytes) -} - -type Inspector struct { - binaryPath string -} - -func NewInspector(binaryPath string) *Inspector { - return &Inspector{binaryPath: binaryPath} -} - -func (p *Inspector) Inspect(option InspectOption) (interface{}, error) { - var ( - args []string - ) - args = []string{ - "inspect", - option.Bootstrap, - "--request", - } - switch option.Operation { - case GetBlobs: - args = append(args, "blobs") - cmd := exec.Command(p.binaryPath, args...) - msg, err := cmd.CombinedOutput() - if err != nil { - return nil, errors.Wrap(err, string(msg)) - } - var blobs BlobInfoList - if err = json.Unmarshal(msg, &blobs); err != nil { - return nil, err - } - return blobs, nil - } - return nil, fmt.Errorf("not support method %d", option.Operation) -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package tool + +import ( + "encoding/json" + "fmt" + "os/exec" + + "github.com/pkg/errors" +) + +const ( + GetBlobs = iota +) + +type InspectOption struct { + Operation int + Bootstrap string +} + +type BlobInfo struct { + BlobID string `json:"blob_id"` + CompressedSize uint64 `json:"compressed_size"` + DecompressedSize uint64 `json:"decompressed_size"` + ReadaheadOffset uint32 `json:"readahead_offset"` + ReadaheadSize uint32 `json:"readahead_size"` +} + +func (info *BlobInfo) String() string { + jsonBytes, _ := json.Marshal(info) + return string(jsonBytes) +} + +type BlobInfoList []BlobInfo + +func (infos BlobInfoList) String() string { + jsonBytes, _ := json.Marshal(&infos) + return string(jsonBytes) +} + +type Inspector struct { + binaryPath string +} + +func NewInspector(binaryPath string) *Inspector { + return &Inspector{binaryPath: binaryPath} +} + +func (p *Inspector) Inspect(option InspectOption) (interface{}, error) { + var ( + args []string + ) + args = []string{ + "inspect", + option.Bootstrap, + "--request", + } + switch option.Operation { + case GetBlobs: + args = append(args, "blobs") + cmd := exec.Command(p.binaryPath, args...) + msg, err := cmd.CombinedOutput() + if err != nil { + return nil, errors.Wrap(err, string(msg)) + } + var blobs BlobInfoList + if err = json.Unmarshal(msg, &blobs); err != nil { + return nil, err + } + return blobs, nil + } + return nil, fmt.Errorf("not support method %d", option.Operation) +} diff --git a/contrib/nydusify/pkg/checker/tool/nydusd.go b/contrib/nydusify/pkg/checker/tool/nydusd.go index ed8c341ae90..d989dd92af4 100644 --- a/contrib/nydusify/pkg/checker/tool/nydusd.go +++ b/contrib/nydusify/pkg/checker/tool/nydusd.go @@ -1,226 +1,226 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package tool - -import ( - "bytes" - "context" - "encoding/json" - "fmt" - "io" - "net" - "net/http" - "os" - "os/exec" - "text/template" - "time" - - "github.com/pkg/errors" -) - -type NydusdConfig struct { - EnablePrefetch bool - NydusdPath string - BootstrapPath string - ConfigPath string - BackendType string - BackendConfig string - BlobCacheDir string - APISockPath string - MountPath string - Mode string - DigestValidate bool -} - -// Nydusd runs nydusd binary. -type Nydusd struct { - NydusdConfig -} - -type daemonInfo struct { - State string `json:"state"` -} - -var configTpl = ` -{ - "device": { - "backend": { - "type": "{{.BackendType}}", - "config": {{.BackendConfig}} - }, - "cache": { - "type": "blobcache", - "config": { - "work_dir": "{{.BlobCacheDir}}" - } - } - }, - "mode": "{{.Mode}}", - "iostats_files": false, - "fs_prefetch": { - "enable": {{.EnablePrefetch}}, - "threads_count": 10, - "merging_size": 131072 - }, - "digest_validate": {{.DigestValidate}}, - "enable_xattr": true -} -` - -func makeConfig(conf NydusdConfig) error { - tpl := template.Must(template.New("").Parse(configTpl)) - - var ret bytes.Buffer - if conf.BackendType == "" { - conf.BackendType = "localfs" - conf.BackendConfig = `{"dir": "/fake"}` - conf.EnablePrefetch = false - } else { - if conf.BackendConfig == "" { - return errors.Errorf("empty backend configuration string") - } - conf.EnablePrefetch = true - } - if err := tpl.Execute(&ret, conf); err != nil { - return errors.New("failed to prepare configuration file for Nydusd") - } - - if err := os.WriteFile(conf.ConfigPath, ret.Bytes(), 0644); err != nil { - return errors.New("write config file for Nydusd") - } - - return nil -} - -// Wait until Nydusd ready by checking daemon state RUNNING -func checkReady(ctx context.Context, sock string) (<-chan bool, error) { - ready := make(chan bool) - - transport := &http.Transport{ - MaxIdleConns: 10, - IdleConnTimeout: 10 * time.Second, - ExpectContinueTimeout: 1 * time.Second, - DialContext: func(ctx context.Context, _, _ string) (net.Conn, error) { - dialer := &net.Dialer{ - Timeout: 5 * time.Second, - KeepAlive: 5 * time.Second, - } - return dialer.DialContext(ctx, "unix", sock) - }, - } - - client := &http.Client{ - Timeout: 30 * time.Second, - Transport: transport, - } - - go func() { - for { - select { - case <-ctx.Done(): - return - default: - } - - resp, err := client.Get(fmt.Sprintf("http://unix%s", "/api/v1/daemon")) - if err != nil { - continue - } - defer resp.Body.Close() - - body, err := io.ReadAll(resp.Body) - if err != nil { - continue - } - - var info daemonInfo - if err = json.Unmarshal(body, &info); err != nil { - continue - } - - if info.State == "RUNNING" { - ready <- true - break - } - } - }() - - return ready, nil -} - -func NewNydusd(conf NydusdConfig) (*Nydusd, error) { - if err := makeConfig(conf); err != nil { - return nil, errors.Wrapf(err, "failed to create configuration file for Nydusd") - } - return &Nydusd{ - NydusdConfig: conf, - }, nil -} - -func (nydusd *Nydusd) Mount() error { - // Umount is called to clean up mountpoint in nydusd's mount path - // Flag is used as a hint to prevent redundant error message - nydusd.Umount(true) - - args := []string{ - // For backward compatibility, do not use "fuse" subcommand in checker. - // "fuse", - "--config", - nydusd.ConfigPath, - "--mountpoint", - nydusd.MountPath, - "--bootstrap", - nydusd.BootstrapPath, - "--apisock", - nydusd.APISockPath, - "--log-level", - "error", - } - - cmd := exec.Command(nydusd.NydusdPath, args...) - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - - runErr := make(chan error) - go func() { - runErr <- cmd.Run() - }() - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - ready, err := checkReady(ctx, nydusd.APISockPath) - if err != nil { - return errors.New("check Nydusd state") - } - - select { - case err := <-runErr: - if err != nil { - return errors.Wrap(err, "run Nydusd binary") - } - case <-ready: - return nil - case <-time.After(30 * time.Second): - return errors.New("timeout to wait Nydusd ready") - } - - return nil -} - -func (nydusd *Nydusd) Umount(silent bool) error { - if _, err := os.Stat(nydusd.MountPath); err == nil { - cmd := exec.Command("umount", nydusd.MountPath) - - if !silent { - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - } - if err := cmd.Run(); err != nil { - return err - } - } - return nil -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package tool + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net" + "net/http" + "os" + "os/exec" + "text/template" + "time" + + "github.com/pkg/errors" +) + +type NydusdConfig struct { + EnablePrefetch bool + NydusdPath string + BootstrapPath string + ConfigPath string + BackendType string + BackendConfig string + BlobCacheDir string + APISockPath string + MountPath string + Mode string + DigestValidate bool +} + +// Nydusd runs nydusd binary. +type Nydusd struct { + NydusdConfig +} + +type daemonInfo struct { + State string `json:"state"` +} + +var configTpl = ` +{ + "device": { + "backend": { + "type": "{{.BackendType}}", + "config": {{.BackendConfig}} + }, + "cache": { + "type": "blobcache", + "config": { + "work_dir": "{{.BlobCacheDir}}" + } + } + }, + "mode": "{{.Mode}}", + "iostats_files": false, + "fs_prefetch": { + "enable": {{.EnablePrefetch}}, + "threads_count": 10, + "merging_size": 131072 + }, + "digest_validate": {{.DigestValidate}}, + "enable_xattr": true +} +` + +func makeConfig(conf NydusdConfig) error { + tpl := template.Must(template.New("").Parse(configTpl)) + + var ret bytes.Buffer + if conf.BackendType == "" { + conf.BackendType = "localfs" + conf.BackendConfig = `{"dir": "/fake"}` + conf.EnablePrefetch = false + } else { + if conf.BackendConfig == "" { + return errors.Errorf("empty backend configuration string") + } + conf.EnablePrefetch = true + } + if err := tpl.Execute(&ret, conf); err != nil { + return errors.New("failed to prepare configuration file for Nydusd") + } + + if err := os.WriteFile(conf.ConfigPath, ret.Bytes(), 0644); err != nil { + return errors.New("write config file for Nydusd") + } + + return nil +} + +// Wait until Nydusd ready by checking daemon state RUNNING +func checkReady(ctx context.Context, sock string) (<-chan bool, error) { + ready := make(chan bool) + + transport := &http.Transport{ + MaxIdleConns: 10, + IdleConnTimeout: 10 * time.Second, + ExpectContinueTimeout: 1 * time.Second, + DialContext: func(ctx context.Context, _, _ string) (net.Conn, error) { + dialer := &net.Dialer{ + Timeout: 5 * time.Second, + KeepAlive: 5 * time.Second, + } + return dialer.DialContext(ctx, "unix", sock) + }, + } + + client := &http.Client{ + Timeout: 30 * time.Second, + Transport: transport, + } + + go func() { + for { + select { + case <-ctx.Done(): + return + default: + } + + resp, err := client.Get(fmt.Sprintf("http://unix%s", "/api/v1/daemon")) + if err != nil { + continue + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + continue + } + + var info daemonInfo + if err = json.Unmarshal(body, &info); err != nil { + continue + } + + if info.State == "RUNNING" { + ready <- true + break + } + } + }() + + return ready, nil +} + +func NewNydusd(conf NydusdConfig) (*Nydusd, error) { + if err := makeConfig(conf); err != nil { + return nil, errors.Wrapf(err, "failed to create configuration file for Nydusd") + } + return &Nydusd{ + NydusdConfig: conf, + }, nil +} + +func (nydusd *Nydusd) Mount() error { + // Umount is called to clean up mountpoint in nydusd's mount path + // Flag is used as a hint to prevent redundant error message + nydusd.Umount(true) + + args := []string{ + // For backward compatibility, do not use "fuse" subcommand in checker. + // "fuse", + "--config", + nydusd.ConfigPath, + "--mountpoint", + nydusd.MountPath, + "--bootstrap", + nydusd.BootstrapPath, + "--apisock", + nydusd.APISockPath, + "--log-level", + "error", + } + + cmd := exec.Command(nydusd.NydusdPath, args...) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + runErr := make(chan error) + go func() { + runErr <- cmd.Run() + }() + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + ready, err := checkReady(ctx, nydusd.APISockPath) + if err != nil { + return errors.New("check Nydusd state") + } + + select { + case err := <-runErr: + if err != nil { + return errors.Wrap(err, "run Nydusd binary") + } + case <-ready: + return nil + case <-time.After(30 * time.Second): + return errors.New("timeout to wait Nydusd ready") + } + + return nil +} + +func (nydusd *Nydusd) Umount(silent bool) error { + if _, err := os.Stat(nydusd.MountPath); err == nil { + cmd := exec.Command("umount", nydusd.MountPath) + + if !silent { + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + } + if err := cmd.Run(); err != nil { + return err + } + } + return nil +} diff --git a/contrib/nydusify/pkg/chunkdict/generator/generator.go b/contrib/nydusify/pkg/chunkdict/generator/generator.go index 13c91a9ca58..c03d65a91d9 100644 --- a/contrib/nydusify/pkg/chunkdict/generator/generator.go +++ b/contrib/nydusify/pkg/chunkdict/generator/generator.go @@ -1,527 +1,527 @@ -package generator - -import ( - "compress/gzip" - "context" - "encoding/json" - "io" - "io/fs" - "os" - "path/filepath" - "strings" - - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - - "github.com/containerd/containerd/namespaces" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/backend" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/build" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/parser" - originprovider "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/provider" - "github.com/goharbor/acceleration-service/pkg/remote" - - "github.com/containerd/nydus-snapshotter/pkg/converter" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/converter/provider" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" - "github.com/dustin/go-humanize" - "github.com/goharbor/acceleration-service/pkg/platformutil" - serverutils "github.com/goharbor/acceleration-service/pkg/utils" - "github.com/opencontainers/go-digest" - "golang.org/x/sync/errgroup" - "golang.org/x/sync/semaphore" - - "github.com/containerd/containerd/content" - containerdErrdefs "github.com/containerd/containerd/errdefs" - "github.com/goharbor/acceleration-service/pkg/errdefs" - ocispec "github.com/opencontainers/image-spec/specs-go/v1" -) - -// Opt defines Chunkdict generate options. -// Note: sources is one or more Nydus image references. -type Opt struct { - Sources []string - Target string - SourceInsecure bool - TargetInsecure bool - - BackendType string - BackendConfig string - BackendForcePush bool - - WorkDir string - NydusImagePath string - ExpectedArch string - - AllPlatforms bool - Platforms string -} - -// Generator generates chunkdict by deduplicating multiple nydus images -// invoking "nydus-image chunkdict save" to save image information into database. -type Generator struct { - Opt - sourcesParser []*parser.Parser -} - -type output struct { - Blobs []string -} - -// New creates Generator instance. -func New(opt Opt) (*Generator, error) { - // TODO: support sources image resolver - var sourcesParser []*parser.Parser - for _, source := range opt.Sources { - sourcesRemote, err := originprovider.DefaultRemote(source, opt.SourceInsecure) - if err != nil { - return nil, errors.Wrap(err, "Init source image parser") - } - sourceParser, err := parser.New(sourcesRemote, opt.ExpectedArch) - sourcesParser = append(sourcesParser, sourceParser) - if err != nil { - return nil, errors.Wrap(err, "Failed to create parser") - } - } - - generator := &Generator{ - Opt: opt, - sourcesParser: sourcesParser, - } - - return generator, nil -} - -// Generate saves multiple Nydus bootstraps into the database one by one. -func (generator *Generator) Generate(ctx context.Context) error { - var bootstrapPaths []string - bootstrapPaths, err := generator.pull(ctx) - - if err != nil { - if utils.RetryWithHTTP(err) { - for index := range generator.Sources { - generator.sourcesParser[index].Remote.MaybeWithHTTP(err) - } - } - bootstrapPaths, err = generator.pull(ctx) - if err != nil { - return err - } - } - - chunkdictBootstrapPath, outputPath, err := generator.generate(ctx, bootstrapPaths) - if err != nil { - return err - } - - if err := generator.push(ctx, chunkdictBootstrapPath, outputPath); err != nil { - return err - } - - // return os.RemoveAll(generator.WorkDir) - return nil -} - -// Pull the bootstrap of nydus image -func (generator *Generator) pull(ctx context.Context) ([]string, error) { - var bootstrapPaths []string - for index := range generator.Sources { - sourceParsed, err := generator.sourcesParser[index].Parse(ctx) - if err != nil { - return nil, errors.Wrap(err, "parse Nydus image") - } - - // Create a directory to store the image bootstrap - nydusImageName := strings.Replace(generator.Sources[index], "/", ":", -1) - bootstrapDirPath := filepath.Join(generator.WorkDir, nydusImageName) - if err := os.MkdirAll(bootstrapDirPath, fs.ModePerm); err != nil { - return nil, errors.Wrap(err, "creat work directory") - } - if err := generator.Output(ctx, sourceParsed, bootstrapDirPath, index); err != nil { - return nil, errors.Wrap(err, "output image information") - } - bootstrapPath := filepath.Join(bootstrapDirPath, "nydus_bootstrap") - bootstrapPaths = append(bootstrapPaths, bootstrapPath) - } - return bootstrapPaths, nil -} - -func (generator *Generator) generate(_ context.Context, bootstrapSlice []string) (string, string, error) { - // Invoke "nydus-image chunkdict generate" command - currentDir, _ := os.Getwd() - builder := build.NewBuilder(generator.NydusImagePath) - - chunkdictBootstrapPath := filepath.Join(generator.WorkDir, "chunkdict_bootstrap") - databaseType := "sqlite" - var databasePath string - if strings.HasPrefix(generator.WorkDir, "/") { - databasePath = databaseType + "://" + filepath.Join(generator.WorkDir, "database.db") - } else { - databasePath = databaseType + "://" + filepath.Join(currentDir, generator.WorkDir, "database.db") - } - outputPath := filepath.Join(generator.WorkDir, "nydus_bootstrap_output.json") - - if err := builder.Generate(build.GenerateOption{ - BootstrapPaths: bootstrapSlice, - ChunkdictBootstrapPath: chunkdictBootstrapPath, - DatabasePath: databasePath, - OutputPath: outputPath, - }); err != nil { - return "", "", errors.Wrap(err, "invalid nydus bootstrap format") - } - - logrus.Infof("Successfully generate image chunk dictionary") - return chunkdictBootstrapPath, outputPath, nil -} - -func hosts(generator *Generator) remote.HostFunc { - maps := make(map[string]bool) - for _, source := range generator.Sources { - maps[source] = generator.SourceInsecure - } - - maps[generator.Target] = generator.TargetInsecure - return func(ref string) (remote.CredentialFunc, bool, error) { - return remote.NewDockerConfigCredFunc(), maps[ref], nil - } -} - -func (generator *Generator) push(ctx context.Context, chunkdictBootstrapPath string, outputPath string) error { - // Basic configuration - ctx = namespaces.WithNamespace(ctx, "nydusify") - platformMC, err := platformutil.ParsePlatforms(generator.AllPlatforms, generator.Platforms) - if err != nil { - return err - } - - pvd, err := provider.New(generator.WorkDir, hosts(generator), 200, "v1", platformMC, 0) - if err != nil { - return err - } - - var bkd backend.Backend - if generator.BackendType != "" { - bkd, err = backend.NewBackend(generator.BackendType, []byte(generator.BackendConfig), nil) - if err != nil { - return errors.Wrapf(err, "new backend") - } - } - - // Pull source image - for index := range generator.Sources { - if err := pvd.Pull(ctx, generator.Sources[index]); err != nil { - if errdefs.NeedsRetryWithHTTP(err) { - pvd.UsePlainHTTP() - if err := pvd.Pull(ctx, generator.Sources[index]); err != nil { - return errors.Wrap(err, "try to pull image") - } - } else { - return errors.Wrap(err, "pull source image") - } - } - } - - logrus.Infof("pulled source image %s", generator.Sources[0]) - sourceImage, err := pvd.Image(ctx, generator.Sources[0]) - if err != nil { - return errors.Wrap(err, "find image from store") - } - sourceDescs, err := serverutils.GetManifests(ctx, pvd.ContentStore(), *sourceImage, platformMC) - if err != nil { - return errors.Wrap(err, "get image manifests") - } - - targetDescs := make([]ocispec.Descriptor, len(sourceDescs)) - - sem := semaphore.NewWeighted(1) - eg := errgroup.Group{} - for idx := range sourceDescs { - func(idx int) { - eg.Go(func() error { - sem.Acquire(context.Background(), 1) - defer sem.Release(1) - sourceDesc := sourceDescs[idx] - targetDesc := &sourceDesc - - // Get the blob from backend - descs, _targetDesc, err := pushBlobFromBackend(ctx, pvd, bkd, sourceDesc, *generator, chunkdictBootstrapPath, outputPath) - if err != nil { - return errors.Wrap(err, "get resolver") - } - if _targetDesc != nil { - targetDesc = _targetDesc - store := newStore(pvd.ContentStore(), descs) - pvd.SetContentStore(store) - } - - targetDescs[idx] = *targetDesc - - if err := pvd.Push(ctx, *targetDesc, generator.Target); err != nil { - if errdefs.NeedsRetryWithHTTP(err) { - pvd.UsePlainHTTP() - if err := pvd.Push(ctx, *targetDesc, generator.Target); err != nil { - return errors.Wrap(err, "try to push image manifest") - } - } else { - return errors.Wrap(err, "push target image manifest") - } - } - return nil - }) - }(idx) - } - if err := eg.Wait(); err != nil { - return errors.Wrap(err, "push image manifests") - } - return nil -} - -func pushBlobFromBackend( - ctx context.Context, pvd *provider.Provider, bkd backend.Backend, src ocispec.Descriptor, generator Generator, bootstrapPath string, outputPath string, -) ([]ocispec.Descriptor, *ocispec.Descriptor, error) { - manifest := ocispec.Manifest{} - if _, err := serverutils.ReadJSON(ctx, pvd.ContentStore(), &manifest, src); err != nil { - return nil, nil, errors.Wrap(err, "read manifest from store") - } - fsversion := src.Annotations["containerd.io/snapshot/nydus-fs-version"] - // Read the Nydusify output JSON to get the list of blobs - var out output - bytes, err := os.ReadFile(outputPath) - if err != nil { - return nil, nil, errors.Wrap(err, "read output file") - } - if err := json.Unmarshal(bytes, &out); err != nil { - return nil, nil, errors.Wrap(err, "unmarshal output json") - } - - blobIDs := []string{} - blobIDMap := map[string]bool{} - for _, blobID := range out.Blobs { - if blobIDMap[blobID] { - continue - } - blobIDs = append(blobIDs, blobID) - blobIDMap[blobID] = true - } - blobDescs := make([]ocispec.Descriptor, len(blobIDs)) - - eg, ctx := errgroup.WithContext(ctx) - sem := semaphore.NewWeighted(int64(provider.LayerConcurrentLimit)) - for idx := range blobIDs { - func(idx int) { - eg.Go(func() error { - sem.Acquire(context.Background(), 1) - defer sem.Release(1) - - blobID := blobIDs[idx] - blobDigest := digest.Digest("sha256:" + blobID) - - var blobSize int64 - var rc io.ReadCloser - - if bkd != nil { - rc, err = bkd.Reader(blobID) - if err != nil { - return errors.Wrap(err, "get blob reader") - } - blobSize, err = bkd.Size(blobID) - if err != nil { - return errors.Wrap(err, "get blob size") - } - } else { - imageDesc, err := generator.sourcesParser[0].Remote.Resolve(ctx) - if err != nil { - if strings.Contains(err.Error(), "x509: certificate signed by unknown authority") { - logrus.Warningln("try to enable \"--source-insecure\" / \"--target-insecure\" option") - } - return errors.Wrap(err, "resolve image") - } - rc, err = generator.sourcesParser[0].Remote.Pull(ctx, *imageDesc, true) - if err != nil { - return errors.Wrap(err, "get blob reader") - } - blobInfo, err := pvd.ContentStore().Info(ctx, blobDigest) - if err != nil { - return errors.Wrap(err, "get info from content store") - } - blobSize = blobInfo.Size - } - defer rc.Close() - - blobSizeStr := humanize.Bytes(uint64(blobSize)) - logrus.WithField("digest", blobDigest).WithField("size", blobSizeStr).Infof("pushing blob from backend") - - blobDescs[idx] = ocispec.Descriptor{ - Digest: blobDigest, - Size: blobSize, - MediaType: converter.MediaTypeNydusBlob, - Annotations: map[string]string{ - converter.LayerAnnotationNydusBlob: "true", - }, - } - writer, err := getPushWriter(ctx, pvd, blobDescs[idx], generator.Opt) - if err != nil { - if errdefs.NeedsRetryWithHTTP(err) { - pvd.UsePlainHTTP() - writer, err = getPushWriter(ctx, pvd, blobDescs[idx], generator.Opt) - } - if err != nil { - return errors.Wrap(err, "get push writer") - } - } - if writer != nil { - defer writer.Close() - return content.Copy(ctx, writer, rc, blobSize, blobDigest) - } - - logrus.WithField("digest", blobDigest).WithField("size", blobSizeStr).Infof("pushed blob from backend") - - return nil - - }) - }(idx) - } - - if err := eg.Wait(); err != nil { - return nil, nil, errors.Wrap(err, "push blobs") - } - - // Update manifest blob layers - manifest.Layers = nil - manifest.Layers = append(blobDescs, manifest.Layers...) - - // Update bootstrap - cw, err := content.OpenWriter(ctx, pvd.ContentStore(), content.WithRef("merge-bootstrap")) - if err != nil { - return nil, nil, errors.Wrap(err, "open content store writer") - } - defer cw.Close() - - bootstrapPathTar := "image/image.boot" - rc, err := utils.PackTargz(bootstrapPath, bootstrapPathTar, false) - if err != nil { - return nil, nil, errors.Wrap(err, "get bootstrap reader") - } - defer rc.Close() - - gw := gzip.NewWriter(cw) - uncompressedDgst := digest.SHA256.Digester() - compressed := io.MultiWriter(gw, uncompressedDgst.Hash()) - - buffer := make([]byte, 32*1024) - if _, err := io.CopyBuffer(compressed, rc, buffer); err != nil { - return nil, nil, errors.Wrapf(err, "copy bootstrap targz into content store") - } - if err := gw.Close(); err != nil { - return nil, nil, errors.Wrap(err, "close gzip writer") - } - - compressedDgst := cw.Digest() - if err := cw.Commit(ctx, 0, compressedDgst, content.WithLabels(map[string]string{ - "containerd.io/uncompressed": uncompressedDgst.Digest().String(), - })); err != nil { - if !containerdErrdefs.IsAlreadyExists(err) { - return nil, nil, errors.Wrap(err, "commit to content store") - } - } - if err := cw.Close(); err != nil { - return nil, nil, errors.Wrap(err, "close content store writer") - } - - bootstrapInfo, err := pvd.ContentStore().Info(ctx, compressedDgst) - if err != nil { - return nil, nil, errors.Wrap(err, "get info from content store") - } - bootstrapSize := bootstrapInfo.Size - - bootstrapDesc := ocispec.Descriptor{ - Digest: compressedDgst, - Size: bootstrapSize, - MediaType: "application/vnd.docker.image.rootfs.diff.tar.gzip", - Annotations: map[string]string{ - "containerd.io/snapshot/nydus-bootstrap": "true", - "containerd.io/snapshot/nydus-fs-version": fsversion, - }, - } - manifest.Layers = append(manifest.Layers, bootstrapDesc) - - // Update image config - blobDigests := []digest.Digest{} - for idx := range blobDescs { - blobDigests = append(blobDigests, blobDescs[idx].Digest) - } - - config := ocispec.Image{} - if _, err := serverutils.ReadJSON(ctx, pvd.ContentStore(), &config, manifest.Config); err != nil { - return nil, nil, errors.Wrap(err, "read config json") - } - config.RootFS.DiffIDs = nil - config.RootFS.DiffIDs = append(blobDigests, config.RootFS.DiffIDs...) - config.RootFS.DiffIDs = append(config.RootFS.DiffIDs, digest.Digest(uncompressedDgst.Digest().String())) - configDesc, err := serverutils.WriteJSON(ctx, pvd.ContentStore(), config, manifest.Config, generator.Target, nil) - if err != nil { - return nil, nil, errors.Wrap(err, "write config json") - } - manifest.Config = *configDesc - target, err := serverutils.WriteJSON(ctx, pvd.ContentStore(), &manifest, src, generator.Target, nil) - if err != nil { - return nil, nil, errors.Wrap(err, "write manifest json") - } - - return blobDescs, target, nil -} - -func getPushWriter(ctx context.Context, pvd *provider.Provider, desc ocispec.Descriptor, opt Opt) (content.Writer, error) { - resolver, err := pvd.Resolver(opt.Target) - if err != nil { - return nil, errors.Wrap(err, "get resolver") - } - - ref := opt.Target - if !strings.Contains(ref, "@") { - ref = ref + "@" + desc.Digest.String() - } - pusher, err := resolver.Pusher(ctx, ref) - if err != nil { - return nil, errors.Wrap(err, "create pusher") - } - writer, err := pusher.Push(ctx, desc) - if err != nil { - if containerdErrdefs.IsAlreadyExists(err) { - return nil, nil - } - return nil, err - } - - return writer, nil -} - -type store struct { - content.Store - remotes []ocispec.Descriptor -} - -func newStore(base content.Store, remotes []ocispec.Descriptor) *store { - return &store{ - Store: base, - remotes: remotes, - } -} - -func (s *store) Info(ctx context.Context, dgst digest.Digest) (content.Info, error) { - info, err := s.Store.Info(ctx, dgst) - if err != nil { - if !containerdErrdefs.IsNotFound(err) { - return content.Info{}, err - } - for _, desc := range s.remotes { - if desc.Digest == dgst { - return content.Info{ - Digest: desc.Digest, - Size: desc.Size, - }, nil - } - } - return content.Info{}, err - } - return info, nil -} +package generator + +import ( + "compress/gzip" + "context" + "encoding/json" + "io" + "io/fs" + "os" + "path/filepath" + "strings" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + + "github.com/containerd/containerd/namespaces" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/backend" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/build" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/parser" + originprovider "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/provider" + "github.com/goharbor/acceleration-service/pkg/remote" + + "github.com/containerd/nydus-snapshotter/pkg/converter" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/converter/provider" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" + "github.com/dustin/go-humanize" + "github.com/goharbor/acceleration-service/pkg/platformutil" + serverutils "github.com/goharbor/acceleration-service/pkg/utils" + "github.com/opencontainers/go-digest" + "golang.org/x/sync/errgroup" + "golang.org/x/sync/semaphore" + + "github.com/containerd/containerd/content" + containerdErrdefs "github.com/containerd/containerd/errdefs" + "github.com/goharbor/acceleration-service/pkg/errdefs" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" +) + +// Opt defines Chunkdict generate options. +// Note: sources is one or more Nydus image references. +type Opt struct { + Sources []string + Target string + SourceInsecure bool + TargetInsecure bool + + BackendType string + BackendConfig string + BackendForcePush bool + + WorkDir string + NydusImagePath string + ExpectedArch string + + AllPlatforms bool + Platforms string +} + +// Generator generates chunkdict by deduplicating multiple nydus images +// invoking "nydus-image chunkdict save" to save image information into database. +type Generator struct { + Opt + sourcesParser []*parser.Parser +} + +type output struct { + Blobs []string +} + +// New creates Generator instance. +func New(opt Opt) (*Generator, error) { + // TODO: support sources image resolver + var sourcesParser []*parser.Parser + for _, source := range opt.Sources { + sourcesRemote, err := originprovider.DefaultRemote(source, opt.SourceInsecure) + if err != nil { + return nil, errors.Wrap(err, "Init source image parser") + } + sourceParser, err := parser.New(sourcesRemote, opt.ExpectedArch) + sourcesParser = append(sourcesParser, sourceParser) + if err != nil { + return nil, errors.Wrap(err, "Failed to create parser") + } + } + + generator := &Generator{ + Opt: opt, + sourcesParser: sourcesParser, + } + + return generator, nil +} + +// Generate saves multiple Nydus bootstraps into the database one by one. +func (generator *Generator) Generate(ctx context.Context) error { + var bootstrapPaths []string + bootstrapPaths, err := generator.pull(ctx) + + if err != nil { + if utils.RetryWithHTTP(err) { + for index := range generator.Sources { + generator.sourcesParser[index].Remote.MaybeWithHTTP(err) + } + } + bootstrapPaths, err = generator.pull(ctx) + if err != nil { + return err + } + } + + chunkdictBootstrapPath, outputPath, err := generator.generate(ctx, bootstrapPaths) + if err != nil { + return err + } + + if err := generator.push(ctx, chunkdictBootstrapPath, outputPath); err != nil { + return err + } + + // return os.RemoveAll(generator.WorkDir) + return nil +} + +// Pull the bootstrap of nydus image +func (generator *Generator) pull(ctx context.Context) ([]string, error) { + var bootstrapPaths []string + for index := range generator.Sources { + sourceParsed, err := generator.sourcesParser[index].Parse(ctx) + if err != nil { + return nil, errors.Wrap(err, "parse Nydus image") + } + + // Create a directory to store the image bootstrap + nydusImageName := strings.Replace(generator.Sources[index], "/", ":", -1) + bootstrapDirPath := filepath.Join(generator.WorkDir, nydusImageName) + if err := os.MkdirAll(bootstrapDirPath, fs.ModePerm); err != nil { + return nil, errors.Wrap(err, "creat work directory") + } + if err := generator.Output(ctx, sourceParsed, bootstrapDirPath, index); err != nil { + return nil, errors.Wrap(err, "output image information") + } + bootstrapPath := filepath.Join(bootstrapDirPath, "nydus_bootstrap") + bootstrapPaths = append(bootstrapPaths, bootstrapPath) + } + return bootstrapPaths, nil +} + +func (generator *Generator) generate(_ context.Context, bootstrapSlice []string) (string, string, error) { + // Invoke "nydus-image chunkdict generate" command + currentDir, _ := os.Getwd() + builder := build.NewBuilder(generator.NydusImagePath) + + chunkdictBootstrapPath := filepath.Join(generator.WorkDir, "chunkdict_bootstrap") + databaseType := "sqlite" + var databasePath string + if strings.HasPrefix(generator.WorkDir, "/") { + databasePath = databaseType + "://" + filepath.Join(generator.WorkDir, "database.db") + } else { + databasePath = databaseType + "://" + filepath.Join(currentDir, generator.WorkDir, "database.db") + } + outputPath := filepath.Join(generator.WorkDir, "nydus_bootstrap_output.json") + + if err := builder.Generate(build.GenerateOption{ + BootstrapPaths: bootstrapSlice, + ChunkdictBootstrapPath: chunkdictBootstrapPath, + DatabasePath: databasePath, + OutputPath: outputPath, + }); err != nil { + return "", "", errors.Wrap(err, "invalid nydus bootstrap format") + } + + logrus.Infof("Successfully generate image chunk dictionary") + return chunkdictBootstrapPath, outputPath, nil +} + +func hosts(generator *Generator) remote.HostFunc { + maps := make(map[string]bool) + for _, source := range generator.Sources { + maps[source] = generator.SourceInsecure + } + + maps[generator.Target] = generator.TargetInsecure + return func(ref string) (remote.CredentialFunc, bool, error) { + return remote.NewDockerConfigCredFunc(), maps[ref], nil + } +} + +func (generator *Generator) push(ctx context.Context, chunkdictBootstrapPath string, outputPath string) error { + // Basic configuration + ctx = namespaces.WithNamespace(ctx, "nydusify") + platformMC, err := platformutil.ParsePlatforms(generator.AllPlatforms, generator.Platforms) + if err != nil { + return err + } + + pvd, err := provider.New(generator.WorkDir, hosts(generator), 200, "v1", platformMC, 0) + if err != nil { + return err + } + + var bkd backend.Backend + if generator.BackendType != "" { + bkd, err = backend.NewBackend(generator.BackendType, []byte(generator.BackendConfig), nil) + if err != nil { + return errors.Wrapf(err, "new backend") + } + } + + // Pull source image + for index := range generator.Sources { + if err := pvd.Pull(ctx, generator.Sources[index]); err != nil { + if errdefs.NeedsRetryWithHTTP(err) { + pvd.UsePlainHTTP() + if err := pvd.Pull(ctx, generator.Sources[index]); err != nil { + return errors.Wrap(err, "try to pull image") + } + } else { + return errors.Wrap(err, "pull source image") + } + } + } + + logrus.Infof("pulled source image %s", generator.Sources[0]) + sourceImage, err := pvd.Image(ctx, generator.Sources[0]) + if err != nil { + return errors.Wrap(err, "find image from store") + } + sourceDescs, err := serverutils.GetManifests(ctx, pvd.ContentStore(), *sourceImage, platformMC) + if err != nil { + return errors.Wrap(err, "get image manifests") + } + + targetDescs := make([]ocispec.Descriptor, len(sourceDescs)) + + sem := semaphore.NewWeighted(1) + eg := errgroup.Group{} + for idx := range sourceDescs { + func(idx int) { + eg.Go(func() error { + sem.Acquire(context.Background(), 1) + defer sem.Release(1) + sourceDesc := sourceDescs[idx] + targetDesc := &sourceDesc + + // Get the blob from backend + descs, _targetDesc, err := pushBlobFromBackend(ctx, pvd, bkd, sourceDesc, *generator, chunkdictBootstrapPath, outputPath) + if err != nil { + return errors.Wrap(err, "get resolver") + } + if _targetDesc != nil { + targetDesc = _targetDesc + store := newStore(pvd.ContentStore(), descs) + pvd.SetContentStore(store) + } + + targetDescs[idx] = *targetDesc + + if err := pvd.Push(ctx, *targetDesc, generator.Target); err != nil { + if errdefs.NeedsRetryWithHTTP(err) { + pvd.UsePlainHTTP() + if err := pvd.Push(ctx, *targetDesc, generator.Target); err != nil { + return errors.Wrap(err, "try to push image manifest") + } + } else { + return errors.Wrap(err, "push target image manifest") + } + } + return nil + }) + }(idx) + } + if err := eg.Wait(); err != nil { + return errors.Wrap(err, "push image manifests") + } + return nil +} + +func pushBlobFromBackend( + ctx context.Context, pvd *provider.Provider, bkd backend.Backend, src ocispec.Descriptor, generator Generator, bootstrapPath string, outputPath string, +) ([]ocispec.Descriptor, *ocispec.Descriptor, error) { + manifest := ocispec.Manifest{} + if _, err := serverutils.ReadJSON(ctx, pvd.ContentStore(), &manifest, src); err != nil { + return nil, nil, errors.Wrap(err, "read manifest from store") + } + fsversion := src.Annotations["containerd.io/snapshot/nydus-fs-version"] + // Read the Nydusify output JSON to get the list of blobs + var out output + bytes, err := os.ReadFile(outputPath) + if err != nil { + return nil, nil, errors.Wrap(err, "read output file") + } + if err := json.Unmarshal(bytes, &out); err != nil { + return nil, nil, errors.Wrap(err, "unmarshal output json") + } + + blobIDs := []string{} + blobIDMap := map[string]bool{} + for _, blobID := range out.Blobs { + if blobIDMap[blobID] { + continue + } + blobIDs = append(blobIDs, blobID) + blobIDMap[blobID] = true + } + blobDescs := make([]ocispec.Descriptor, len(blobIDs)) + + eg, ctx := errgroup.WithContext(ctx) + sem := semaphore.NewWeighted(int64(provider.LayerConcurrentLimit)) + for idx := range blobIDs { + func(idx int) { + eg.Go(func() error { + sem.Acquire(context.Background(), 1) + defer sem.Release(1) + + blobID := blobIDs[idx] + blobDigest := digest.Digest("sha256:" + blobID) + + var blobSize int64 + var rc io.ReadCloser + + if bkd != nil { + rc, err = bkd.Reader(blobID) + if err != nil { + return errors.Wrap(err, "get blob reader") + } + blobSize, err = bkd.Size(blobID) + if err != nil { + return errors.Wrap(err, "get blob size") + } + } else { + imageDesc, err := generator.sourcesParser[0].Remote.Resolve(ctx) + if err != nil { + if strings.Contains(err.Error(), "x509: certificate signed by unknown authority") { + logrus.Warningln("try to enable \"--source-insecure\" / \"--target-insecure\" option") + } + return errors.Wrap(err, "resolve image") + } + rc, err = generator.sourcesParser[0].Remote.Pull(ctx, *imageDesc, true) + if err != nil { + return errors.Wrap(err, "get blob reader") + } + blobInfo, err := pvd.ContentStore().Info(ctx, blobDigest) + if err != nil { + return errors.Wrap(err, "get info from content store") + } + blobSize = blobInfo.Size + } + defer rc.Close() + + blobSizeStr := humanize.Bytes(uint64(blobSize)) + logrus.WithField("digest", blobDigest).WithField("size", blobSizeStr).Infof("pushing blob from backend") + + blobDescs[idx] = ocispec.Descriptor{ + Digest: blobDigest, + Size: blobSize, + MediaType: converter.MediaTypeNydusBlob, + Annotations: map[string]string{ + converter.LayerAnnotationNydusBlob: "true", + }, + } + writer, err := getPushWriter(ctx, pvd, blobDescs[idx], generator.Opt) + if err != nil { + if errdefs.NeedsRetryWithHTTP(err) { + pvd.UsePlainHTTP() + writer, err = getPushWriter(ctx, pvd, blobDescs[idx], generator.Opt) + } + if err != nil { + return errors.Wrap(err, "get push writer") + } + } + if writer != nil { + defer writer.Close() + return content.Copy(ctx, writer, rc, blobSize, blobDigest) + } + + logrus.WithField("digest", blobDigest).WithField("size", blobSizeStr).Infof("pushed blob from backend") + + return nil + + }) + }(idx) + } + + if err := eg.Wait(); err != nil { + return nil, nil, errors.Wrap(err, "push blobs") + } + + // Update manifest blob layers + manifest.Layers = nil + manifest.Layers = append(blobDescs, manifest.Layers...) + + // Update bootstrap + cw, err := content.OpenWriter(ctx, pvd.ContentStore(), content.WithRef("merge-bootstrap")) + if err != nil { + return nil, nil, errors.Wrap(err, "open content store writer") + } + defer cw.Close() + + bootstrapPathTar := "image/image.boot" + rc, err := utils.PackTargz(bootstrapPath, bootstrapPathTar, false) + if err != nil { + return nil, nil, errors.Wrap(err, "get bootstrap reader") + } + defer rc.Close() + + gw := gzip.NewWriter(cw) + uncompressedDgst := digest.SHA256.Digester() + compressed := io.MultiWriter(gw, uncompressedDgst.Hash()) + + buffer := make([]byte, 32*1024) + if _, err := io.CopyBuffer(compressed, rc, buffer); err != nil { + return nil, nil, errors.Wrapf(err, "copy bootstrap targz into content store") + } + if err := gw.Close(); err != nil { + return nil, nil, errors.Wrap(err, "close gzip writer") + } + + compressedDgst := cw.Digest() + if err := cw.Commit(ctx, 0, compressedDgst, content.WithLabels(map[string]string{ + "containerd.io/uncompressed": uncompressedDgst.Digest().String(), + })); err != nil { + if !containerdErrdefs.IsAlreadyExists(err) { + return nil, nil, errors.Wrap(err, "commit to content store") + } + } + if err := cw.Close(); err != nil { + return nil, nil, errors.Wrap(err, "close content store writer") + } + + bootstrapInfo, err := pvd.ContentStore().Info(ctx, compressedDgst) + if err != nil { + return nil, nil, errors.Wrap(err, "get info from content store") + } + bootstrapSize := bootstrapInfo.Size + + bootstrapDesc := ocispec.Descriptor{ + Digest: compressedDgst, + Size: bootstrapSize, + MediaType: "application/vnd.docker.image.rootfs.diff.tar.gzip", + Annotations: map[string]string{ + "containerd.io/snapshot/nydus-bootstrap": "true", + "containerd.io/snapshot/nydus-fs-version": fsversion, + }, + } + manifest.Layers = append(manifest.Layers, bootstrapDesc) + + // Update image config + blobDigests := []digest.Digest{} + for idx := range blobDescs { + blobDigests = append(blobDigests, blobDescs[idx].Digest) + } + + config := ocispec.Image{} + if _, err := serverutils.ReadJSON(ctx, pvd.ContentStore(), &config, manifest.Config); err != nil { + return nil, nil, errors.Wrap(err, "read config json") + } + config.RootFS.DiffIDs = nil + config.RootFS.DiffIDs = append(blobDigests, config.RootFS.DiffIDs...) + config.RootFS.DiffIDs = append(config.RootFS.DiffIDs, digest.Digest(uncompressedDgst.Digest().String())) + configDesc, err := serverutils.WriteJSON(ctx, pvd.ContentStore(), config, manifest.Config, generator.Target, nil) + if err != nil { + return nil, nil, errors.Wrap(err, "write config json") + } + manifest.Config = *configDesc + target, err := serverutils.WriteJSON(ctx, pvd.ContentStore(), &manifest, src, generator.Target, nil) + if err != nil { + return nil, nil, errors.Wrap(err, "write manifest json") + } + + return blobDescs, target, nil +} + +func getPushWriter(ctx context.Context, pvd *provider.Provider, desc ocispec.Descriptor, opt Opt) (content.Writer, error) { + resolver, err := pvd.Resolver(opt.Target) + if err != nil { + return nil, errors.Wrap(err, "get resolver") + } + + ref := opt.Target + if !strings.Contains(ref, "@") { + ref = ref + "@" + desc.Digest.String() + } + pusher, err := resolver.Pusher(ctx, ref) + if err != nil { + return nil, errors.Wrap(err, "create pusher") + } + writer, err := pusher.Push(ctx, desc) + if err != nil { + if containerdErrdefs.IsAlreadyExists(err) { + return nil, nil + } + return nil, err + } + + return writer, nil +} + +type store struct { + content.Store + remotes []ocispec.Descriptor +} + +func newStore(base content.Store, remotes []ocispec.Descriptor) *store { + return &store{ + Store: base, + remotes: remotes, + } +} + +func (s *store) Info(ctx context.Context, dgst digest.Digest) (content.Info, error) { + info, err := s.Store.Info(ctx, dgst) + if err != nil { + if !containerdErrdefs.IsNotFound(err) { + return content.Info{}, err + } + for _, desc := range s.remotes { + if desc.Digest == dgst { + return content.Info{ + Digest: desc.Digest, + Size: desc.Size, + }, nil + } + } + return content.Info{}, err + } + return info, nil +} diff --git a/contrib/nydusify/pkg/chunkdict/generator/output.go b/contrib/nydusify/pkg/chunkdict/generator/output.go index 1a1364c61c7..318e4008a3f 100644 --- a/contrib/nydusify/pkg/chunkdict/generator/output.go +++ b/contrib/nydusify/pkg/chunkdict/generator/output.go @@ -1,66 +1,66 @@ -package generator - -import ( - "context" - "encoding/json" - "fmt" - "os" - "path/filepath" - - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/parser" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" -) - -func prettyDump(obj interface{}, name string) error { - bytes, err := json.MarshalIndent(obj, "", " ") - if err != nil { - return err - } - return os.WriteFile(name, bytes, 0644) -} - -// Output outputs Nydus image nydus_bootstrap file and manifest, config to JSON file. -func (generator *Generator) Output( - ctx context.Context, sourceParsed *parser.Parsed, outputPath string, index int, -) error { - if sourceParsed.Index != nil { - if err := prettyDump( - sourceParsed.Index, - filepath.Join(outputPath, "nydus_index.json"), - ); err != nil { - return errors.Wrap(err, "output nydus index file") - } - } - if sourceParsed.NydusImage != nil { - if err := prettyDump( - sourceParsed.NydusImage.Manifest, - filepath.Join(outputPath, "nydus_manifest.json"), - ); err != nil { - return errors.Wrap(err, "output Nydus manifest file") - } - if err := prettyDump( - sourceParsed.NydusImage.Config, - filepath.Join(outputPath, "nydus_config.json"), - ); err != nil { - return errors.Wrap(err, "output Nydus config file") - } - source := filepath.Join(outputPath, "nydus_bootstrap") - logrus.Infof("Pulling Nydus bootstrap to %s", source) - bootstrapReader, err := generator.sourcesParser[index].PullNydusBootstrap(ctx, sourceParsed.NydusImage) - if err != nil { - return errors.Wrap(err, "pull Nydus bootstrap layer") - } - defer bootstrapReader.Close() - - if err := utils.UnpackFile(bootstrapReader, utils.BootstrapFileNameInLayer, source); err != nil { - return errors.Wrap(err, "unpack Nydus bootstrap layer") - } - } else { - err := fmt.Errorf("the %s is not a Nydus image", generator.sourcesParser[index].Remote.Ref) - return err - } - return nil -} +package generator + +import ( + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/parser" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" +) + +func prettyDump(obj interface{}, name string) error { + bytes, err := json.MarshalIndent(obj, "", " ") + if err != nil { + return err + } + return os.WriteFile(name, bytes, 0644) +} + +// Output outputs Nydus image nydus_bootstrap file and manifest, config to JSON file. +func (generator *Generator) Output( + ctx context.Context, sourceParsed *parser.Parsed, outputPath string, index int, +) error { + if sourceParsed.Index != nil { + if err := prettyDump( + sourceParsed.Index, + filepath.Join(outputPath, "nydus_index.json"), + ); err != nil { + return errors.Wrap(err, "output nydus index file") + } + } + if sourceParsed.NydusImage != nil { + if err := prettyDump( + sourceParsed.NydusImage.Manifest, + filepath.Join(outputPath, "nydus_manifest.json"), + ); err != nil { + return errors.Wrap(err, "output Nydus manifest file") + } + if err := prettyDump( + sourceParsed.NydusImage.Config, + filepath.Join(outputPath, "nydus_config.json"), + ); err != nil { + return errors.Wrap(err, "output Nydus config file") + } + source := filepath.Join(outputPath, "nydus_bootstrap") + logrus.Infof("Pulling Nydus bootstrap to %s", source) + bootstrapReader, err := generator.sourcesParser[index].PullNydusBootstrap(ctx, sourceParsed.NydusImage) + if err != nil { + return errors.Wrap(err, "pull Nydus bootstrap layer") + } + defer bootstrapReader.Close() + + if err := utils.UnpackFile(bootstrapReader, utils.BootstrapFileNameInLayer, source); err != nil { + return errors.Wrap(err, "unpack Nydus bootstrap layer") + } + } else { + err := fmt.Errorf("the %s is not a Nydus image", generator.sourcesParser[index].Remote.Ref) + return err + } + return nil +} diff --git a/contrib/nydusify/pkg/committer/commiter.go b/contrib/nydusify/pkg/committer/commiter.go index 8dd8eef6ac7..74130ddc758 100644 --- a/contrib/nydusify/pkg/committer/commiter.go +++ b/contrib/nydusify/pkg/committer/commiter.go @@ -1,705 +1,705 @@ -// Copyright 2024 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package committer - -import ( - "bytes" - "compress/gzip" - "context" - "encoding/json" - "fmt" - "io" - "os" - "os/exec" - "path/filepath" - "runtime" - "strings" - "sync" - "time" - - "github.com/containerd/containerd/content/local" - "github.com/containerd/containerd/namespaces" - "github.com/containerd/containerd/reference/docker" - "github.com/containerd/nydus-snapshotter/pkg/converter" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/committer/diff" - parserPkg "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/parser" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/provider" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" - "github.com/dustin/go-humanize" - "github.com/opencontainers/go-digest" - ocispec "github.com/opencontainers/image-spec/specs-go/v1" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - "golang.org/x/sync/errgroup" -) - -type Opt struct { - WorkDir string - ContainerdAddress string - NydusImagePath string - Namespace string - - ContainerID string - SourceInsecure bool - TargetRef string - TargetInsecure bool - MaximumTimes int - FsVersion string - Compressor string - - WithPaths []string - WithoutPaths []string -} - -type Committer struct { - workDir string - builder string - manager *Manager -} - -func NewCommitter(opt Opt) (*Committer, error) { - if err := os.MkdirAll(opt.WorkDir, 0755); err != nil { - return nil, errors.Wrap(err, "prepare work dir") - } - - workDir, err := os.MkdirTemp(opt.WorkDir, "nydusify-commiter-") - if err != nil { - return nil, errors.Wrap(err, "create temp dir") - } - - cm, err := NewManager(opt.ContainerdAddress) - if err != nil { - return nil, errors.Wrap(err, "new container manager") - } - return &Committer{ - workDir: workDir, - builder: opt.NydusImagePath, - manager: cm, - }, nil -} - -func (cm *Committer) Commit(ctx context.Context, opt Opt) error { - ctx = namespaces.WithNamespace(ctx, opt.Namespace) - targetRef, err := ValidateRef(opt.TargetRef) - if err != nil { - return errors.Wrap(err, "parse target image name") - } - - inspect, err := cm.manager.Inspect(ctx, opt.ContainerID) - if err != nil { - return errors.Wrap(err, "inspect container") - } - - logrus.Infof("pulling base bootstrap") - start := time.Now() - image, committedLayers, err := cm.pullBootstrap(ctx, inspect.Image, "bootstrap-base", opt.SourceInsecure) - if err != nil { - return errors.Wrap(err, "pull base bootstrap") - } - logrus.Infof("pulled base bootstrap, elapsed: %s", time.Since(start)) - - if committedLayers >= opt.MaximumTimes { - return fmt.Errorf("reached maximum committed times %d", opt.MaximumTimes) - } - if opt.FsVersion, opt.Compressor, err = cm.obtainBootStrapInfo(ctx, "bootstrap-base"); err != nil { - return errors.Wrap(err, "obtain bootstrap FsVersion and Compressor") - } - - mountList := NewMountList() - - var upperBlob *Blob - mountBlobs := make([]Blob, len(opt.WithPaths)) - commit := func() error { - eg := errgroup.Group{} - eg.Go(func() error { - var upperBlobDigest *digest.Digest - if err := withRetry(func() error { - upperBlobDigest, err = cm.commitUpperByDiff(ctx, mountList.Add, opt.WithPaths, opt.WithoutPaths, inspect.LowerDirs, inspect.UpperDir, "blob-upper", opt.FsVersion, opt.Compressor) - return err - }, 3); err != nil { - return errors.Wrap(err, "commit upper") - } - logrus.Infof("pushing blob for upper") - start := time.Now() - upperBlobDesc, err := cm.pushBlob(ctx, "blob-upper", *upperBlobDigest, opt.TargetRef, opt.TargetInsecure) - if err != nil { - return errors.Wrap(err, "push upper blob") - } - upperBlob = &Blob{ - Name: "blob-upper", - Desc: *upperBlobDesc, - } - logrus.Infof("pushed blob for upper, elapsed: %s", time.Since(start)) - return nil - }) - - if len(opt.WithPaths) > 0 { - for idx := range opt.WithPaths { - func(idx int) { - eg.Go(func() error { - withPath := opt.WithPaths[idx] - name := fmt.Sprintf("blob-mount-%d", idx) - var mountBlobDigest *digest.Digest - if err := withRetry(func() error { - mountBlobDigest, err = cm.commitMountByNSEnter(ctx, inspect.Pid, withPath, name, opt.FsVersion, opt.Compressor) - return err - }, 3); err != nil { - return errors.Wrap(err, "commit mount") - } - logrus.Infof("pushing blob for mount") - start := time.Now() - mountBlobDesc, err := cm.pushBlob(ctx, name, *mountBlobDigest, opt.TargetRef, opt.TargetInsecure) - if err != nil { - return errors.Wrap(err, "push mount blob") - } - mountBlobs[idx] = Blob{ - Name: name, - Desc: *mountBlobDesc, - } - logrus.Infof("pushed blob for mount, elapsed: %s", time.Since(start)) - return nil - }) - }(idx) - } - } - - if err := eg.Wait(); err != nil { - return err - } - - appendedEg := errgroup.Group{} - appendedMutex := sync.Mutex{} - if len(mountList.paths) > 0 { - logrus.Infof("need commit appened mount path: %s", strings.Join(mountList.paths, ", ")) - } - for idx := range mountList.paths { - func(idx int) { - appendedEg.Go(func() error { - mountPath := mountList.paths[idx] - name := fmt.Sprintf("blob-appended-mount-%d", idx) - var mountBlobDigest *digest.Digest - if err := withRetry(func() error { - mountBlobDigest, err = cm.commitMountByNSEnter(ctx, inspect.Pid, mountPath, name, opt.FsVersion, opt.Compressor) - return err - }, 3); err != nil { - return errors.Wrap(err, "commit appended mount") - } - logrus.Infof("pushing blob for appended mount") - start := time.Now() - mountBlobDesc, err := cm.pushBlob(ctx, name, *mountBlobDigest, opt.TargetRef, opt.TargetInsecure) - if err != nil { - return errors.Wrap(err, "push appended mount blob") - } - appendedMutex.Lock() - mountBlobs = append(mountBlobs, Blob{ - Name: name, - Desc: *mountBlobDesc, - }) - appendedMutex.Unlock() - logrus.Infof("pushed blob for appended mount, elapsed: %s", time.Since(start)) - return nil - }) - }(idx) - } - - return appendedEg.Wait() - } - - if err := cm.pause(ctx, opt.ContainerID, commit); err != nil { - return errors.Wrap(err, "pause container to commit") - } - - logrus.Infof("merging base and upper bootstraps") - _, bootstrapDiffID, err := cm.mergeBootstrap(ctx, *upperBlob, mountBlobs, "bootstrap-base", "bootstrap-merged.tar") - if err != nil { - return errors.Wrap(err, "merge bootstrap") - } - - logrus.Infof("pushing committed image to %s", targetRef) - if err := cm.pushManifest(ctx, *image, *bootstrapDiffID, targetRef, "bootstrap-merged.tar", opt.FsVersion, upperBlob, mountBlobs, opt.TargetInsecure); err != nil { - return errors.Wrap(err, "push manifest") - } - - return nil -} - -func (cm *Committer) pullBootstrap(ctx context.Context, ref, bootstrapName string, insecure bool) (*parserPkg.Image, int, error) { - remoter, err := provider.DefaultRemote(ref, insecure) - if err != nil { - return nil, 0, errors.Wrap(err, "create remote") - } - - parser, err := parserPkg.New(remoter, runtime.GOARCH) - if err != nil { - return nil, 0, errors.Wrap(err, "create parser") - } - - var parsed *parserPkg.Parsed - parsed, err = parser.Parse(ctx) - if err != nil { - if utils.RetryWithHTTP(err) { - remoter.MaybeWithHTTP(err) - parsed, err = parser.Parse(ctx) - if err != nil { - return nil, 0, errors.Wrap(err, "parse nydus image") - } - } else { - return nil, 0, errors.Wrap(err, "parse nydus image") - } - } - if parsed.NydusImage == nil { - return nil, 0, fmt.Errorf("not a nydus image: %s", ref) - } - - bootstrapDesc := parserPkg.FindNydusBootstrapDesc(&parsed.NydusImage.Manifest) - if bootstrapDesc == nil { - return nil, 0, fmt.Errorf("not found nydus bootstrap layer") - } - committedLayers := 0 - _commitBlobs := bootstrapDesc.Annotations[utils.LayerAnnotationNydusCommitBlobs] - if _commitBlobs != "" { - committedLayers = len(strings.Split(_commitBlobs, ",")) - logrus.Infof("detected the committed layers: %d", committedLayers) - } - - target := filepath.Join(cm.workDir, bootstrapName) - reader, err := parser.PullNydusBootstrap(ctx, parsed.NydusImage) - if err != nil { - return nil, 0, errors.Wrap(err, "pull bootstrap layer") - } - defer reader.Close() - - if err := utils.UnpackFile(reader, utils.BootstrapFileNameInLayer, target); err != nil { - return nil, 0, errors.Wrap(err, "unpack bootstrap layer") - } - - return parsed.NydusImage, committedLayers, nil -} - -func (cm *Committer) commitUpperByDiff(ctx context.Context, appendMount func(path string), withPaths []string, withoutPaths []string, lowerDirs, upperDir, blobName, fsversion, compressor string) (*digest.Digest, error) { - logrus.Infof("committing upper") - start := time.Now() - - blobPath := filepath.Join(cm.workDir, blobName) - blob, err := os.Create(blobPath) - if err != nil { - return nil, errors.Wrap(err, "create upper blob file") - } - defer blob.Close() - - digester := digest.SHA256.Digester() - counter := Counter{} - tarWc, err := converter.Pack(ctx, io.MultiWriter(blob, digester.Hash(), &counter), converter.PackOption{ - WorkDir: cm.workDir, - FsVersion: fsversion, - Compressor: compressor, - BuilderPath: cm.builder, - }) - if err != nil { - return nil, errors.Wrap(err, "initialize pack to blob") - } - - if err := diff.Diff(ctx, appendMount, withPaths, withoutPaths, tarWc, lowerDirs, upperDir); err != nil { - return nil, errors.Wrap(err, "make diff") - } - - if err := tarWc.Close(); err != nil { - return nil, errors.Wrap(err, "pack to blob") - } - - blobDigest := digester.Digest() - logrus.Infof("committed upper, size: %s, elapsed: %s", humanize.Bytes(uint64(counter.Size())), time.Since(start)) - - return &blobDigest, nil -} - -func (cm *Committer) pushBlob(ctx context.Context, blobName string, blobDigest digest.Digest, targetRef string, insecure bool) (*ocispec.Descriptor, error) { - blobRa, err := local.OpenReader(filepath.Join(cm.workDir, blobName)) - if err != nil { - return nil, errors.Wrap(err, "open reader for upper blob") - } - - blobDesc := ocispec.Descriptor{ - Digest: blobDigest, - Size: blobRa.Size(), - MediaType: utils.MediaTypeNydusBlob, - Annotations: map[string]string{ - utils.LayerAnnotationUncompressed: blobDigest.String(), - utils.LayerAnnotationNydusBlob: "true", - }, - } - - remoter, err := provider.DefaultRemote(targetRef, insecure) - if err != nil { - return nil, errors.Wrap(err, "create remote") - } - - if err := remoter.Push(ctx, blobDesc, true, io.NewSectionReader(blobRa, 0, blobRa.Size())); err != nil { - if utils.RetryWithHTTP(err) { - remoter.MaybeWithHTTP(err) - if err := remoter.Push(ctx, blobDesc, true, io.NewSectionReader(blobRa, 0, blobRa.Size())); err != nil { - return nil, errors.Wrap(err, "push blob") - } - } else { - return nil, errors.Wrap(err, "push blob") - } - } - return &blobDesc, nil -} - -func (cm *Committer) pause(ctx context.Context, containerID string, handle func() error) error { - logrus.Infof("pausing container: %s", containerID) - if err := cm.manager.Pause(ctx, containerID); err != nil { - return errors.Wrap(err, "pause container") - } - - if err := handle(); err != nil { - logrus.Infof("unpausing container: %s", containerID) - if err := cm.manager.UnPause(ctx, containerID); err != nil { - logrus.Errorf("unpause container: %s", containerID) - } - return err - } - - logrus.Infof("unpausing container: %s", containerID) - return cm.manager.UnPause(ctx, containerID) -} - -func (cm *Committer) pushManifest( - ctx context.Context, nydusImage parserPkg.Image, bootstrapDiffID digest.Digest, targetRef, bootstrapName, fsversion string, upperBlob *Blob, mountBlobs []Blob, insecure bool, -) error { - lowerBlobLayers := []ocispec.Descriptor{} - for idx := range nydusImage.Manifest.Layers { - layer := nydusImage.Manifest.Layers[idx] - if layer.MediaType == utils.MediaTypeNydusBlob { - lowerBlobLayers = append(lowerBlobLayers, layer) - } - } - - // Push image config - config := nydusImage.Config - - config.RootFS.DiffIDs = []digest.Digest{} - for idx := range lowerBlobLayers { - config.RootFS.DiffIDs = append(config.RootFS.DiffIDs, lowerBlobLayers[idx].Digest) - } - for idx := range mountBlobs { - mountBlob := mountBlobs[idx] - config.RootFS.DiffIDs = append(config.RootFS.DiffIDs, mountBlob.Desc.Digest) - } - config.RootFS.DiffIDs = append(config.RootFS.DiffIDs, upperBlob.Desc.Digest) - config.RootFS.DiffIDs = append(config.RootFS.DiffIDs, bootstrapDiffID) - - configBytes, configDesc, err := cm.makeDesc(config, nydusImage.Manifest.Config) - if err != nil { - return errors.Wrap(err, "make config desc") - } - - remoter, err := provider.DefaultRemote(targetRef, insecure) - if err != nil { - return errors.Wrap(err, "create remote") - } - - if err := remoter.Push(ctx, *configDesc, true, bytes.NewReader(configBytes)); err != nil { - if utils.RetryWithHTTP(err) { - remoter.MaybeWithHTTP(err) - if err := remoter.Push(ctx, *configDesc, true, bytes.NewReader(configBytes)); err != nil { - return errors.Wrap(err, "push image config") - } - } else { - return errors.Wrap(err, "push image config") - } - } - - // Push bootstrap layer - bootstrapTarPath := filepath.Join(cm.workDir, bootstrapName) - bootstrapTar, err := os.Open(bootstrapTarPath) - if err != nil { - return errors.Wrap(err, "open bootstrap tar file") - } - - bootstrapTarGzPath := filepath.Join(cm.workDir, bootstrapName+".gz") - bootstrapTarGz, err := os.Create(bootstrapTarGzPath) - if err != nil { - return errors.Wrap(err, "create bootstrap tar.gz file") - } - defer bootstrapTarGz.Close() - - digester := digest.SHA256.Digester() - gzWriter := gzip.NewWriter(io.MultiWriter(bootstrapTarGz, digester.Hash())) - if _, err := io.Copy(gzWriter, bootstrapTar); err != nil { - return errors.Wrap(err, "compress bootstrap tar to tar.gz") - } - if err := gzWriter.Close(); err != nil { - return errors.Wrap(err, "close gzip writer") - } - - ra, err := local.OpenReader(bootstrapTarGzPath) - if err != nil { - return errors.Wrap(err, "open reader for upper blob") - } - defer ra.Close() - - commitBlobs := []string{} - for idx := range mountBlobs { - mountBlob := mountBlobs[idx] - commitBlobs = append(commitBlobs, mountBlob.Desc.Digest.String()) - } - commitBlobs = append(commitBlobs, upperBlob.Desc.Digest.String()) - - bootstrapDesc := ocispec.Descriptor{ - Digest: digester.Digest(), - Size: ra.Size(), - MediaType: ocispec.MediaTypeImageLayerGzip, - Annotations: map[string]string{ - converter.LayerAnnotationFSVersion: fsversion, - converter.LayerAnnotationNydusBootstrap: "true", - utils.LayerAnnotationNydusCommitBlobs: strings.Join(commitBlobs, ","), - }, - } - - bootstrapRc, err := os.Open(bootstrapTarGzPath) - if err != nil { - return errors.Wrapf(err, "open bootstrap %s", bootstrapTarGzPath) - } - defer bootstrapRc.Close() - if err := remoter.Push(ctx, bootstrapDesc, true, bootstrapRc); err != nil { - return errors.Wrap(err, "push bootstrap layer") - } - - // Push image manifest - layers := lowerBlobLayers - for idx := range mountBlobs { - mountBlob := mountBlobs[idx] - layers = append(layers, mountBlob.Desc) - } - layers = append(layers, upperBlob.Desc) - layers = append(layers, bootstrapDesc) - - nydusImage.Manifest.Config = *configDesc - nydusImage.Manifest.Layers = layers - - manifestBytes, manifestDesc, err := cm.makeDesc(nydusImage.Manifest, nydusImage.Desc) - if err != nil { - return errors.Wrap(err, "make config desc") - } - if err := remoter.Push(ctx, *manifestDesc, false, bytes.NewReader(manifestBytes)); err != nil { - return errors.Wrap(err, "push image manifest") - } - - return nil -} - -func (cm *Committer) makeDesc(x interface{}, oldDesc ocispec.Descriptor) ([]byte, *ocispec.Descriptor, error) { - data, err := json.MarshalIndent(x, "", " ") - if err != nil { - return nil, nil, errors.Wrap(err, "json marshal") - } - dgst := digest.SHA256.FromBytes(data) - - newDesc := oldDesc - newDesc.Size = int64(len(data)) - newDesc.Digest = dgst - - return data, &newDesc, nil -} - -func (cm *Committer) commitMountByNSEnter(ctx context.Context, containerPid int, sourceDir, name, fsversion, compressor string) (*digest.Digest, error) { - logrus.Infof("committing mount: %s", sourceDir) - start := time.Now() - - blobPath := filepath.Join(cm.workDir, name) - blob, err := os.Create(blobPath) - if err != nil { - return nil, errors.Wrap(err, "create mount blob file") - } - defer blob.Close() - - digester := digest.SHA256.Digester() - counter := Counter{} - tarWc, err := converter.Pack(ctx, io.MultiWriter(blob, &counter, digester.Hash()), converter.PackOption{ - WorkDir: cm.workDir, - FsVersion: fsversion, - Compressor: compressor, - BuilderPath: cm.builder, - }) - if err != nil { - return nil, errors.Wrap(err, "initialize pack to blob") - } - - if err := copyFromContainer(ctx, containerPid, sourceDir, tarWc); err != nil { - return nil, errors.Wrapf(err, "copy %s from pid %d", sourceDir, containerPid) - } - - if err := tarWc.Close(); err != nil { - return nil, errors.Wrap(err, "pack to blob") - } - - mountBlobDigest := digester.Digest() - - logrus.Infof("committed mount: %s, size: %s, elapsed %s", sourceDir, humanize.Bytes(uint64(counter.Size())), time.Since(start)) - - return &mountBlobDigest, nil -} - -func (cm *Committer) mergeBootstrap( - ctx context.Context, upperBlob Blob, mountBlobs []Blob, baseBootstrapName, mergedBootstrapName string, -) ([]digest.Digest, *digest.Digest, error) { - baseBootstrap := filepath.Join(cm.workDir, baseBootstrapName) - upperBlobRa, err := local.OpenReader(filepath.Join(cm.workDir, upperBlob.Name)) - if err != nil { - return nil, nil, errors.Wrap(err, "open reader for upper blob") - } - - mergedBootstrap := filepath.Join(cm.workDir, mergedBootstrapName) - bootstrap, err := os.Create(mergedBootstrap) - if err != nil { - return nil, nil, errors.Wrap(err, "create upper blob file") - } - defer bootstrap.Close() - - digester := digest.SHA256.Digester() - writer := io.MultiWriter(bootstrap, digester.Hash()) - - layers := []converter.Layer{} - layers = append(layers, converter.Layer{ - Digest: upperBlob.Desc.Digest, - ReaderAt: upperBlobRa, - }) - for idx := range mountBlobs { - mountBlob := mountBlobs[idx] - mountBlobRa, err := local.OpenReader(filepath.Join(cm.workDir, mountBlob.Name)) - if err != nil { - return nil, nil, errors.Wrap(err, "open reader for mount blob") - } - layers = append(layers, converter.Layer{ - Digest: mountBlob.Desc.Digest, - ReaderAt: mountBlobRa, - }) - } - - blobDigests, err := converter.Merge(ctx, layers, writer, converter.MergeOption{ - WorkDir: cm.workDir, - ParentBootstrapPath: baseBootstrap, - WithTar: true, - BuilderPath: cm.builder, - }) - if err != nil { - return nil, nil, errors.Wrap(err, "merge bootstraps") - } - bootstrapDiffID := digester.Digest() - - return blobDigests, &bootstrapDiffID, nil -} - -func copyFromContainer(ctx context.Context, containerPid int, source string, target io.Writer) error { - config := &Config{ - Mount: true, - Target: containerPid, - } - - stderr, err := config.ExecuteContext(ctx, target, "tar", "--xattrs", "--ignore-failed-read", "--absolute-names", "-cf", "-", source) - if err != nil { - return errors.Wrap(err, fmt.Sprintf("execute tar: %s", strings.TrimSpace(stderr))) - } - if stderr != "" { - logrus.Warnf("from container: %s", stderr) - } - - return nil -} - -type MountList struct { - mutex sync.Mutex - paths []string -} - -func NewMountList() *MountList { - return &MountList{ - paths: make([]string, 0), - } -} - -func (ml *MountList) Add(path string) { - ml.mutex.Lock() - defer ml.mutex.Unlock() - - ml.paths = append(ml.paths, path) -} - -type Blob struct { - Name string - BootstrapName string - Desc ocispec.Descriptor -} - -func withRetry(handle func() error, total int) error { - for { - total-- - err := handle() - if err == nil { - return nil - } - - if total > 0 { - logrus.WithError(err).Warnf("retry (remain %d times)", total) - continue - } - - return err - } -} - -// ValidateRef validate the target image reference. -func ValidateRef(ref string) (string, error) { - named, err := docker.ParseDockerRef(ref) - if err != nil { - return "", errors.Wrapf(err, "invalid image reference: %s", ref) - } - if _, ok := named.(docker.Digested); ok { - return "", fmt.Errorf("unsupported digested image reference: %s", ref) - } - named = docker.TagNameOnly(named) - return named.String(), nil -} - -type outputJSON struct { - FsVersion string `json:"fs_version"` - Compressor string `json:"compressor"` -} - -func (cm *Committer) obtainBootStrapInfo(ctx context.Context, BootstrapName string) (string, string, error) { - targetBootstrapPath := filepath.Join(cm.workDir, BootstrapName) - outputJSONPath := filepath.Join(cm.workDir, "output.json") - defer os.Remove(outputJSONPath) - - args := []string{ - "check", - "--log-level", - "warn", - "--bootstrap", - targetBootstrapPath, - "--output-json", - outputJSONPath, - } - - logrus.Debugf("\tCommand: %s", args) - cmd := exec.CommandContext(ctx, cm.builder, args...) - - if err := cmd.Run(); err != nil { - return "", "", errors.Wrap(err, "run merge command") - } - - outputBytes, err := os.ReadFile(outputJSONPath) - if err != nil { - return "", "", errors.Wrapf(err, "read file %s", outputJSONPath) - } - var output outputJSON - err = json.Unmarshal(outputBytes, &output) - if err != nil { - return "", "", errors.Wrapf(err, "unmarshal output json file %s", outputJSONPath) - } - return output.FsVersion, strings.ToLower(output.Compressor), nil -} +// Copyright 2024 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package committer + +import ( + "bytes" + "compress/gzip" + "context" + "encoding/json" + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + "runtime" + "strings" + "sync" + "time" + + "github.com/containerd/containerd/content/local" + "github.com/containerd/containerd/namespaces" + "github.com/containerd/containerd/reference/docker" + "github.com/containerd/nydus-snapshotter/pkg/converter" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/committer/diff" + parserPkg "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/parser" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/provider" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" + "github.com/dustin/go-humanize" + "github.com/opencontainers/go-digest" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "golang.org/x/sync/errgroup" +) + +type Opt struct { + WorkDir string + ContainerdAddress string + NydusImagePath string + Namespace string + + ContainerID string + SourceInsecure bool + TargetRef string + TargetInsecure bool + MaximumTimes int + FsVersion string + Compressor string + + WithPaths []string + WithoutPaths []string +} + +type Committer struct { + workDir string + builder string + manager *Manager +} + +func NewCommitter(opt Opt) (*Committer, error) { + if err := os.MkdirAll(opt.WorkDir, 0755); err != nil { + return nil, errors.Wrap(err, "prepare work dir") + } + + workDir, err := os.MkdirTemp(opt.WorkDir, "nydusify-commiter-") + if err != nil { + return nil, errors.Wrap(err, "create temp dir") + } + + cm, err := NewManager(opt.ContainerdAddress) + if err != nil { + return nil, errors.Wrap(err, "new container manager") + } + return &Committer{ + workDir: workDir, + builder: opt.NydusImagePath, + manager: cm, + }, nil +} + +func (cm *Committer) Commit(ctx context.Context, opt Opt) error { + ctx = namespaces.WithNamespace(ctx, opt.Namespace) + targetRef, err := ValidateRef(opt.TargetRef) + if err != nil { + return errors.Wrap(err, "parse target image name") + } + + inspect, err := cm.manager.Inspect(ctx, opt.ContainerID) + if err != nil { + return errors.Wrap(err, "inspect container") + } + + logrus.Infof("pulling base bootstrap") + start := time.Now() + image, committedLayers, err := cm.pullBootstrap(ctx, inspect.Image, "bootstrap-base", opt.SourceInsecure) + if err != nil { + return errors.Wrap(err, "pull base bootstrap") + } + logrus.Infof("pulled base bootstrap, elapsed: %s", time.Since(start)) + + if committedLayers >= opt.MaximumTimes { + return fmt.Errorf("reached maximum committed times %d", opt.MaximumTimes) + } + if opt.FsVersion, opt.Compressor, err = cm.obtainBootStrapInfo(ctx, "bootstrap-base"); err != nil { + return errors.Wrap(err, "obtain bootstrap FsVersion and Compressor") + } + + mountList := NewMountList() + + var upperBlob *Blob + mountBlobs := make([]Blob, len(opt.WithPaths)) + commit := func() error { + eg := errgroup.Group{} + eg.Go(func() error { + var upperBlobDigest *digest.Digest + if err := withRetry(func() error { + upperBlobDigest, err = cm.commitUpperByDiff(ctx, mountList.Add, opt.WithPaths, opt.WithoutPaths, inspect.LowerDirs, inspect.UpperDir, "blob-upper", opt.FsVersion, opt.Compressor) + return err + }, 3); err != nil { + return errors.Wrap(err, "commit upper") + } + logrus.Infof("pushing blob for upper") + start := time.Now() + upperBlobDesc, err := cm.pushBlob(ctx, "blob-upper", *upperBlobDigest, opt.TargetRef, opt.TargetInsecure) + if err != nil { + return errors.Wrap(err, "push upper blob") + } + upperBlob = &Blob{ + Name: "blob-upper", + Desc: *upperBlobDesc, + } + logrus.Infof("pushed blob for upper, elapsed: %s", time.Since(start)) + return nil + }) + + if len(opt.WithPaths) > 0 { + for idx := range opt.WithPaths { + func(idx int) { + eg.Go(func() error { + withPath := opt.WithPaths[idx] + name := fmt.Sprintf("blob-mount-%d", idx) + var mountBlobDigest *digest.Digest + if err := withRetry(func() error { + mountBlobDigest, err = cm.commitMountByNSEnter(ctx, inspect.Pid, withPath, name, opt.FsVersion, opt.Compressor) + return err + }, 3); err != nil { + return errors.Wrap(err, "commit mount") + } + logrus.Infof("pushing blob for mount") + start := time.Now() + mountBlobDesc, err := cm.pushBlob(ctx, name, *mountBlobDigest, opt.TargetRef, opt.TargetInsecure) + if err != nil { + return errors.Wrap(err, "push mount blob") + } + mountBlobs[idx] = Blob{ + Name: name, + Desc: *mountBlobDesc, + } + logrus.Infof("pushed blob for mount, elapsed: %s", time.Since(start)) + return nil + }) + }(idx) + } + } + + if err := eg.Wait(); err != nil { + return err + } + + appendedEg := errgroup.Group{} + appendedMutex := sync.Mutex{} + if len(mountList.paths) > 0 { + logrus.Infof("need commit appened mount path: %s", strings.Join(mountList.paths, ", ")) + } + for idx := range mountList.paths { + func(idx int) { + appendedEg.Go(func() error { + mountPath := mountList.paths[idx] + name := fmt.Sprintf("blob-appended-mount-%d", idx) + var mountBlobDigest *digest.Digest + if err := withRetry(func() error { + mountBlobDigest, err = cm.commitMountByNSEnter(ctx, inspect.Pid, mountPath, name, opt.FsVersion, opt.Compressor) + return err + }, 3); err != nil { + return errors.Wrap(err, "commit appended mount") + } + logrus.Infof("pushing blob for appended mount") + start := time.Now() + mountBlobDesc, err := cm.pushBlob(ctx, name, *mountBlobDigest, opt.TargetRef, opt.TargetInsecure) + if err != nil { + return errors.Wrap(err, "push appended mount blob") + } + appendedMutex.Lock() + mountBlobs = append(mountBlobs, Blob{ + Name: name, + Desc: *mountBlobDesc, + }) + appendedMutex.Unlock() + logrus.Infof("pushed blob for appended mount, elapsed: %s", time.Since(start)) + return nil + }) + }(idx) + } + + return appendedEg.Wait() + } + + if err := cm.pause(ctx, opt.ContainerID, commit); err != nil { + return errors.Wrap(err, "pause container to commit") + } + + logrus.Infof("merging base and upper bootstraps") + _, bootstrapDiffID, err := cm.mergeBootstrap(ctx, *upperBlob, mountBlobs, "bootstrap-base", "bootstrap-merged.tar") + if err != nil { + return errors.Wrap(err, "merge bootstrap") + } + + logrus.Infof("pushing committed image to %s", targetRef) + if err := cm.pushManifest(ctx, *image, *bootstrapDiffID, targetRef, "bootstrap-merged.tar", opt.FsVersion, upperBlob, mountBlobs, opt.TargetInsecure); err != nil { + return errors.Wrap(err, "push manifest") + } + + return nil +} + +func (cm *Committer) pullBootstrap(ctx context.Context, ref, bootstrapName string, insecure bool) (*parserPkg.Image, int, error) { + remoter, err := provider.DefaultRemote(ref, insecure) + if err != nil { + return nil, 0, errors.Wrap(err, "create remote") + } + + parser, err := parserPkg.New(remoter, runtime.GOARCH) + if err != nil { + return nil, 0, errors.Wrap(err, "create parser") + } + + var parsed *parserPkg.Parsed + parsed, err = parser.Parse(ctx) + if err != nil { + if utils.RetryWithHTTP(err) { + remoter.MaybeWithHTTP(err) + parsed, err = parser.Parse(ctx) + if err != nil { + return nil, 0, errors.Wrap(err, "parse nydus image") + } + } else { + return nil, 0, errors.Wrap(err, "parse nydus image") + } + } + if parsed.NydusImage == nil { + return nil, 0, fmt.Errorf("not a nydus image: %s", ref) + } + + bootstrapDesc := parserPkg.FindNydusBootstrapDesc(&parsed.NydusImage.Manifest) + if bootstrapDesc == nil { + return nil, 0, fmt.Errorf("not found nydus bootstrap layer") + } + committedLayers := 0 + _commitBlobs := bootstrapDesc.Annotations[utils.LayerAnnotationNydusCommitBlobs] + if _commitBlobs != "" { + committedLayers = len(strings.Split(_commitBlobs, ",")) + logrus.Infof("detected the committed layers: %d", committedLayers) + } + + target := filepath.Join(cm.workDir, bootstrapName) + reader, err := parser.PullNydusBootstrap(ctx, parsed.NydusImage) + if err != nil { + return nil, 0, errors.Wrap(err, "pull bootstrap layer") + } + defer reader.Close() + + if err := utils.UnpackFile(reader, utils.BootstrapFileNameInLayer, target); err != nil { + return nil, 0, errors.Wrap(err, "unpack bootstrap layer") + } + + return parsed.NydusImage, committedLayers, nil +} + +func (cm *Committer) commitUpperByDiff(ctx context.Context, appendMount func(path string), withPaths []string, withoutPaths []string, lowerDirs, upperDir, blobName, fsversion, compressor string) (*digest.Digest, error) { + logrus.Infof("committing upper") + start := time.Now() + + blobPath := filepath.Join(cm.workDir, blobName) + blob, err := os.Create(blobPath) + if err != nil { + return nil, errors.Wrap(err, "create upper blob file") + } + defer blob.Close() + + digester := digest.SHA256.Digester() + counter := Counter{} + tarWc, err := converter.Pack(ctx, io.MultiWriter(blob, digester.Hash(), &counter), converter.PackOption{ + WorkDir: cm.workDir, + FsVersion: fsversion, + Compressor: compressor, + BuilderPath: cm.builder, + }) + if err != nil { + return nil, errors.Wrap(err, "initialize pack to blob") + } + + if err := diff.Diff(ctx, appendMount, withPaths, withoutPaths, tarWc, lowerDirs, upperDir); err != nil { + return nil, errors.Wrap(err, "make diff") + } + + if err := tarWc.Close(); err != nil { + return nil, errors.Wrap(err, "pack to blob") + } + + blobDigest := digester.Digest() + logrus.Infof("committed upper, size: %s, elapsed: %s", humanize.Bytes(uint64(counter.Size())), time.Since(start)) + + return &blobDigest, nil +} + +func (cm *Committer) pushBlob(ctx context.Context, blobName string, blobDigest digest.Digest, targetRef string, insecure bool) (*ocispec.Descriptor, error) { + blobRa, err := local.OpenReader(filepath.Join(cm.workDir, blobName)) + if err != nil { + return nil, errors.Wrap(err, "open reader for upper blob") + } + + blobDesc := ocispec.Descriptor{ + Digest: blobDigest, + Size: blobRa.Size(), + MediaType: utils.MediaTypeNydusBlob, + Annotations: map[string]string{ + utils.LayerAnnotationUncompressed: blobDigest.String(), + utils.LayerAnnotationNydusBlob: "true", + }, + } + + remoter, err := provider.DefaultRemote(targetRef, insecure) + if err != nil { + return nil, errors.Wrap(err, "create remote") + } + + if err := remoter.Push(ctx, blobDesc, true, io.NewSectionReader(blobRa, 0, blobRa.Size())); err != nil { + if utils.RetryWithHTTP(err) { + remoter.MaybeWithHTTP(err) + if err := remoter.Push(ctx, blobDesc, true, io.NewSectionReader(blobRa, 0, blobRa.Size())); err != nil { + return nil, errors.Wrap(err, "push blob") + } + } else { + return nil, errors.Wrap(err, "push blob") + } + } + return &blobDesc, nil +} + +func (cm *Committer) pause(ctx context.Context, containerID string, handle func() error) error { + logrus.Infof("pausing container: %s", containerID) + if err := cm.manager.Pause(ctx, containerID); err != nil { + return errors.Wrap(err, "pause container") + } + + if err := handle(); err != nil { + logrus.Infof("unpausing container: %s", containerID) + if err := cm.manager.UnPause(ctx, containerID); err != nil { + logrus.Errorf("unpause container: %s", containerID) + } + return err + } + + logrus.Infof("unpausing container: %s", containerID) + return cm.manager.UnPause(ctx, containerID) +} + +func (cm *Committer) pushManifest( + ctx context.Context, nydusImage parserPkg.Image, bootstrapDiffID digest.Digest, targetRef, bootstrapName, fsversion string, upperBlob *Blob, mountBlobs []Blob, insecure bool, +) error { + lowerBlobLayers := []ocispec.Descriptor{} + for idx := range nydusImage.Manifest.Layers { + layer := nydusImage.Manifest.Layers[idx] + if layer.MediaType == utils.MediaTypeNydusBlob { + lowerBlobLayers = append(lowerBlobLayers, layer) + } + } + + // Push image config + config := nydusImage.Config + + config.RootFS.DiffIDs = []digest.Digest{} + for idx := range lowerBlobLayers { + config.RootFS.DiffIDs = append(config.RootFS.DiffIDs, lowerBlobLayers[idx].Digest) + } + for idx := range mountBlobs { + mountBlob := mountBlobs[idx] + config.RootFS.DiffIDs = append(config.RootFS.DiffIDs, mountBlob.Desc.Digest) + } + config.RootFS.DiffIDs = append(config.RootFS.DiffIDs, upperBlob.Desc.Digest) + config.RootFS.DiffIDs = append(config.RootFS.DiffIDs, bootstrapDiffID) + + configBytes, configDesc, err := cm.makeDesc(config, nydusImage.Manifest.Config) + if err != nil { + return errors.Wrap(err, "make config desc") + } + + remoter, err := provider.DefaultRemote(targetRef, insecure) + if err != nil { + return errors.Wrap(err, "create remote") + } + + if err := remoter.Push(ctx, *configDesc, true, bytes.NewReader(configBytes)); err != nil { + if utils.RetryWithHTTP(err) { + remoter.MaybeWithHTTP(err) + if err := remoter.Push(ctx, *configDesc, true, bytes.NewReader(configBytes)); err != nil { + return errors.Wrap(err, "push image config") + } + } else { + return errors.Wrap(err, "push image config") + } + } + + // Push bootstrap layer + bootstrapTarPath := filepath.Join(cm.workDir, bootstrapName) + bootstrapTar, err := os.Open(bootstrapTarPath) + if err != nil { + return errors.Wrap(err, "open bootstrap tar file") + } + + bootstrapTarGzPath := filepath.Join(cm.workDir, bootstrapName+".gz") + bootstrapTarGz, err := os.Create(bootstrapTarGzPath) + if err != nil { + return errors.Wrap(err, "create bootstrap tar.gz file") + } + defer bootstrapTarGz.Close() + + digester := digest.SHA256.Digester() + gzWriter := gzip.NewWriter(io.MultiWriter(bootstrapTarGz, digester.Hash())) + if _, err := io.Copy(gzWriter, bootstrapTar); err != nil { + return errors.Wrap(err, "compress bootstrap tar to tar.gz") + } + if err := gzWriter.Close(); err != nil { + return errors.Wrap(err, "close gzip writer") + } + + ra, err := local.OpenReader(bootstrapTarGzPath) + if err != nil { + return errors.Wrap(err, "open reader for upper blob") + } + defer ra.Close() + + commitBlobs := []string{} + for idx := range mountBlobs { + mountBlob := mountBlobs[idx] + commitBlobs = append(commitBlobs, mountBlob.Desc.Digest.String()) + } + commitBlobs = append(commitBlobs, upperBlob.Desc.Digest.String()) + + bootstrapDesc := ocispec.Descriptor{ + Digest: digester.Digest(), + Size: ra.Size(), + MediaType: ocispec.MediaTypeImageLayerGzip, + Annotations: map[string]string{ + converter.LayerAnnotationFSVersion: fsversion, + converter.LayerAnnotationNydusBootstrap: "true", + utils.LayerAnnotationNydusCommitBlobs: strings.Join(commitBlobs, ","), + }, + } + + bootstrapRc, err := os.Open(bootstrapTarGzPath) + if err != nil { + return errors.Wrapf(err, "open bootstrap %s", bootstrapTarGzPath) + } + defer bootstrapRc.Close() + if err := remoter.Push(ctx, bootstrapDesc, true, bootstrapRc); err != nil { + return errors.Wrap(err, "push bootstrap layer") + } + + // Push image manifest + layers := lowerBlobLayers + for idx := range mountBlobs { + mountBlob := mountBlobs[idx] + layers = append(layers, mountBlob.Desc) + } + layers = append(layers, upperBlob.Desc) + layers = append(layers, bootstrapDesc) + + nydusImage.Manifest.Config = *configDesc + nydusImage.Manifest.Layers = layers + + manifestBytes, manifestDesc, err := cm.makeDesc(nydusImage.Manifest, nydusImage.Desc) + if err != nil { + return errors.Wrap(err, "make config desc") + } + if err := remoter.Push(ctx, *manifestDesc, false, bytes.NewReader(manifestBytes)); err != nil { + return errors.Wrap(err, "push image manifest") + } + + return nil +} + +func (cm *Committer) makeDesc(x interface{}, oldDesc ocispec.Descriptor) ([]byte, *ocispec.Descriptor, error) { + data, err := json.MarshalIndent(x, "", " ") + if err != nil { + return nil, nil, errors.Wrap(err, "json marshal") + } + dgst := digest.SHA256.FromBytes(data) + + newDesc := oldDesc + newDesc.Size = int64(len(data)) + newDesc.Digest = dgst + + return data, &newDesc, nil +} + +func (cm *Committer) commitMountByNSEnter(ctx context.Context, containerPid int, sourceDir, name, fsversion, compressor string) (*digest.Digest, error) { + logrus.Infof("committing mount: %s", sourceDir) + start := time.Now() + + blobPath := filepath.Join(cm.workDir, name) + blob, err := os.Create(blobPath) + if err != nil { + return nil, errors.Wrap(err, "create mount blob file") + } + defer blob.Close() + + digester := digest.SHA256.Digester() + counter := Counter{} + tarWc, err := converter.Pack(ctx, io.MultiWriter(blob, &counter, digester.Hash()), converter.PackOption{ + WorkDir: cm.workDir, + FsVersion: fsversion, + Compressor: compressor, + BuilderPath: cm.builder, + }) + if err != nil { + return nil, errors.Wrap(err, "initialize pack to blob") + } + + if err := copyFromContainer(ctx, containerPid, sourceDir, tarWc); err != nil { + return nil, errors.Wrapf(err, "copy %s from pid %d", sourceDir, containerPid) + } + + if err := tarWc.Close(); err != nil { + return nil, errors.Wrap(err, "pack to blob") + } + + mountBlobDigest := digester.Digest() + + logrus.Infof("committed mount: %s, size: %s, elapsed %s", sourceDir, humanize.Bytes(uint64(counter.Size())), time.Since(start)) + + return &mountBlobDigest, nil +} + +func (cm *Committer) mergeBootstrap( + ctx context.Context, upperBlob Blob, mountBlobs []Blob, baseBootstrapName, mergedBootstrapName string, +) ([]digest.Digest, *digest.Digest, error) { + baseBootstrap := filepath.Join(cm.workDir, baseBootstrapName) + upperBlobRa, err := local.OpenReader(filepath.Join(cm.workDir, upperBlob.Name)) + if err != nil { + return nil, nil, errors.Wrap(err, "open reader for upper blob") + } + + mergedBootstrap := filepath.Join(cm.workDir, mergedBootstrapName) + bootstrap, err := os.Create(mergedBootstrap) + if err != nil { + return nil, nil, errors.Wrap(err, "create upper blob file") + } + defer bootstrap.Close() + + digester := digest.SHA256.Digester() + writer := io.MultiWriter(bootstrap, digester.Hash()) + + layers := []converter.Layer{} + layers = append(layers, converter.Layer{ + Digest: upperBlob.Desc.Digest, + ReaderAt: upperBlobRa, + }) + for idx := range mountBlobs { + mountBlob := mountBlobs[idx] + mountBlobRa, err := local.OpenReader(filepath.Join(cm.workDir, mountBlob.Name)) + if err != nil { + return nil, nil, errors.Wrap(err, "open reader for mount blob") + } + layers = append(layers, converter.Layer{ + Digest: mountBlob.Desc.Digest, + ReaderAt: mountBlobRa, + }) + } + + blobDigests, err := converter.Merge(ctx, layers, writer, converter.MergeOption{ + WorkDir: cm.workDir, + ParentBootstrapPath: baseBootstrap, + WithTar: true, + BuilderPath: cm.builder, + }) + if err != nil { + return nil, nil, errors.Wrap(err, "merge bootstraps") + } + bootstrapDiffID := digester.Digest() + + return blobDigests, &bootstrapDiffID, nil +} + +func copyFromContainer(ctx context.Context, containerPid int, source string, target io.Writer) error { + config := &Config{ + Mount: true, + Target: containerPid, + } + + stderr, err := config.ExecuteContext(ctx, target, "tar", "--xattrs", "--ignore-failed-read", "--absolute-names", "-cf", "-", source) + if err != nil { + return errors.Wrap(err, fmt.Sprintf("execute tar: %s", strings.TrimSpace(stderr))) + } + if stderr != "" { + logrus.Warnf("from container: %s", stderr) + } + + return nil +} + +type MountList struct { + mutex sync.Mutex + paths []string +} + +func NewMountList() *MountList { + return &MountList{ + paths: make([]string, 0), + } +} + +func (ml *MountList) Add(path string) { + ml.mutex.Lock() + defer ml.mutex.Unlock() + + ml.paths = append(ml.paths, path) +} + +type Blob struct { + Name string + BootstrapName string + Desc ocispec.Descriptor +} + +func withRetry(handle func() error, total int) error { + for { + total-- + err := handle() + if err == nil { + return nil + } + + if total > 0 { + logrus.WithError(err).Warnf("retry (remain %d times)", total) + continue + } + + return err + } +} + +// ValidateRef validate the target image reference. +func ValidateRef(ref string) (string, error) { + named, err := docker.ParseDockerRef(ref) + if err != nil { + return "", errors.Wrapf(err, "invalid image reference: %s", ref) + } + if _, ok := named.(docker.Digested); ok { + return "", fmt.Errorf("unsupported digested image reference: %s", ref) + } + named = docker.TagNameOnly(named) + return named.String(), nil +} + +type outputJSON struct { + FsVersion string `json:"fs_version"` + Compressor string `json:"compressor"` +} + +func (cm *Committer) obtainBootStrapInfo(ctx context.Context, BootstrapName string) (string, string, error) { + targetBootstrapPath := filepath.Join(cm.workDir, BootstrapName) + outputJSONPath := filepath.Join(cm.workDir, "output.json") + defer os.Remove(outputJSONPath) + + args := []string{ + "check", + "--log-level", + "warn", + "--bootstrap", + targetBootstrapPath, + "--output-json", + outputJSONPath, + } + + logrus.Debugf("\tCommand: %s", args) + cmd := exec.CommandContext(ctx, cm.builder, args...) + + if err := cmd.Run(); err != nil { + return "", "", errors.Wrap(err, "run merge command") + } + + outputBytes, err := os.ReadFile(outputJSONPath) + if err != nil { + return "", "", errors.Wrapf(err, "read file %s", outputJSONPath) + } + var output outputJSON + err = json.Unmarshal(outputBytes, &output) + if err != nil { + return "", "", errors.Wrapf(err, "unmarshal output json file %s", outputJSONPath) + } + return output.FsVersion, strings.ToLower(output.Compressor), nil +} diff --git a/contrib/nydusify/pkg/committer/diff/archive/tar.go b/contrib/nydusify/pkg/committer/diff/archive/tar.go index 7ba7568e7a0..f69e61aa919 100644 --- a/contrib/nydusify/pkg/committer/diff/archive/tar.go +++ b/contrib/nydusify/pkg/committer/diff/archive/tar.go @@ -1,317 +1,317 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package archive - -import ( - "archive/tar" - "context" - - "fmt" - "io" - "os" - "path/filepath" - "strings" - "sync" - "time" - - "github.com/containerd/continuity/fs" -) - -var bufPool = &sync.Pool{ - New: func() interface{} { - buffer := make([]byte, 32*1024) - return &buffer - }, -} - -const ( - // whiteoutPrefix prefix means file is a whiteout. If this is followed by a - // filename this means that file has been removed from the base layer. - // See https://github.com/opencontainers/image-spec/blob/main/layer.md#whiteouts - whiteoutPrefix = ".wh." - - paxSchilyXattr = "SCHILY.xattr." -) - -// ChangeWriter provides tar stream from filesystem change information. -// The provided tar stream is styled as an OCI layer. Change information -// (add/modify/delete/unmodified) for each file needs to be passed to this -// writer through HandleChange method. -// -// This should be used combining with continuity's diff computing functionality -// (e.g. `fs.Change` of github.com/containerd/continuity/fs). -// -// See also https://github.com/opencontainers/image-spec/blob/main/layer.md for details -// about OCI layers -type ChangeWriter struct { - tw *tar.Writer - source string - modTimeUpperBound *time.Time - whiteoutT time.Time - inodeSrc map[uint64]string - inodeRefs map[uint64][]string - addedDirs map[string]struct{} -} - -// ChangeWriterOpt can be specified in NewChangeWriter. -type ChangeWriterOpt func(cw *ChangeWriter) - -// NewChangeWriter returns ChangeWriter that writes tar stream of the source directory -// to the provided writer. Change information (add/modify/delete/unmodified) for each -// file needs to be passed through HandleChange method. -func NewChangeWriter(w io.Writer, source string, opts ...ChangeWriterOpt) *ChangeWriter { - cw := &ChangeWriter{ - tw: tar.NewWriter(w), - source: source, - whiteoutT: time.Now(), // can be overridden with WithWhiteoutTime(time.Time) ChangeWriterOpt . - inodeSrc: map[uint64]string{}, - inodeRefs: map[uint64][]string{}, - addedDirs: map[string]struct{}{}, - } - for _, o := range opts { - o(cw) - } - return cw -} - -// HandleChange receives filesystem change information and reflect that information to -// the result tar stream. This function implements `fs.ChangeFunc` of continuity -// (github.com/containerd/continuity/fs) and should be used with that package. -func (cw *ChangeWriter) HandleChange(k fs.ChangeKind, p string, f os.FileInfo, err error) error { - if err != nil { - return err - } - if k == fs.ChangeKindDelete { - whiteOutDir := filepath.Dir(p) - whiteOutBase := filepath.Base(p) - whiteOut := filepath.Join(whiteOutDir, whiteoutPrefix+whiteOutBase) - hdr := &tar.Header{ - Typeflag: tar.TypeReg, - Name: whiteOut[1:], - Size: 0, - ModTime: cw.whiteoutT, - AccessTime: cw.whiteoutT, - ChangeTime: cw.whiteoutT, - } - if err := cw.includeParents(hdr); err != nil { - return err - } - if err := cw.tw.WriteHeader(hdr); err != nil { - return fmt.Errorf("failed to write whiteout header: %w", err) - } - } else { - var ( - link string - err error - source = filepath.Join(cw.source, p) - ) - - switch { - case f.Mode()&os.ModeSocket != 0: - return nil // ignore sockets - case f.Mode()&os.ModeSymlink != 0: - if link, err = os.Readlink(source); err != nil { - return err - } - } - - hdr, err := tar.FileInfoHeader(f, link) - if err != nil { - return err - } - - hdr.Mode = int64(chmodTarEntry(os.FileMode(hdr.Mode))) - - // truncate timestamp for compatibility. without PAX stdlib rounds timestamps instead - hdr.Format = tar.FormatPAX - if cw.modTimeUpperBound != nil && hdr.ModTime.After(*cw.modTimeUpperBound) { - hdr.ModTime = *cw.modTimeUpperBound - } - hdr.ModTime = hdr.ModTime.Truncate(time.Second) - hdr.AccessTime = time.Time{} - hdr.ChangeTime = time.Time{} - - name := p - if strings.HasPrefix(name, string(filepath.Separator)) { - name, err = filepath.Rel(string(filepath.Separator), name) - if err != nil { - return fmt.Errorf("failed to make path relative: %w", err) - } - } - // Canonicalize to POSIX-style paths using forward slashes. Directory - // entries must end with a slash. - name = filepath.ToSlash(name) - if f.IsDir() && !strings.HasSuffix(name, "/") { - name += "/" - } - hdr.Name = name - - if err := setHeaderForSpecialDevice(hdr, name, f); err != nil { - return fmt.Errorf("failed to set device headers: %w", err) - } - - // additionalLinks stores file names which must be linked to - // this file when this file is added - var additionalLinks []string - inode, isHardlink := fs.GetLinkInfo(f) - if isHardlink { - // If the inode has a source, always link to it - if source, ok := cw.inodeSrc[inode]; ok { - hdr.Typeflag = tar.TypeLink - hdr.Linkname = source - hdr.Size = 0 - } else { - if k == fs.ChangeKindUnmodified { - cw.inodeRefs[inode] = append(cw.inodeRefs[inode], name) - return nil - } - cw.inodeSrc[inode] = name - additionalLinks = cw.inodeRefs[inode] - delete(cw.inodeRefs, inode) - } - } else if k == fs.ChangeKindUnmodified { - // Nothing to write to diff - return nil - } - - if capability, err := getxattr(source, "security.capability"); err != nil { - return fmt.Errorf("failed to get capabilities xattr: %w", err) - } else if len(capability) > 0 { - if hdr.PAXRecords == nil { - hdr.PAXRecords = map[string]string{} - } - hdr.PAXRecords[paxSchilyXattr+"security.capability"] = string(capability) - } - - if err := cw.includeParents(hdr); err != nil { - return err - } - if err := cw.tw.WriteHeader(hdr); err != nil { - return fmt.Errorf("failed to write file header: %w", err) - } - - if hdr.Typeflag == tar.TypeReg && hdr.Size > 0 { - file, err := open(source) - if err != nil { - return fmt.Errorf("failed to open path: %v: %w", source, err) - } - defer file.Close() - - // HACK (imeoer): display file path in error message. - n, err := copyBuffered(context.TODO(), cw.tw, file) - if err != nil { - return fmt.Errorf("failed to copy file %s: %w", p, err) - } - if n != hdr.Size { - return fmt.Errorf("short write copying file: %s", p) - } - } - - if additionalLinks != nil { - source = hdr.Name - for _, extra := range additionalLinks { - hdr.Name = extra - hdr.Typeflag = tar.TypeLink - hdr.Linkname = source - hdr.Size = 0 - - if err := cw.includeParents(hdr); err != nil { - return err - } - if err := cw.tw.WriteHeader(hdr); err != nil { - return fmt.Errorf("failed to write file header: %w", err) - } - } - } - } - return nil -} - -// Close closes this writer. -func (cw *ChangeWriter) Close() error { - if err := cw.tw.Close(); err != nil { - return fmt.Errorf("failed to close tar writer: %w", err) - } - return nil -} - -func (cw *ChangeWriter) includeParents(hdr *tar.Header) error { - if cw.addedDirs == nil { - return nil - } - name := strings.TrimRight(hdr.Name, "/") - fname := filepath.Join(cw.source, name) - parent := filepath.Dir(name) - pname := filepath.Join(cw.source, parent) - - // Do not include root directory as parent - if fname != cw.source && pname != cw.source { - _, ok := cw.addedDirs[parent] - if !ok { - cw.addedDirs[parent] = struct{}{} - fi, err := os.Stat(pname) - if err != nil { - return err - } - if err := cw.HandleChange(fs.ChangeKindModify, parent, fi, nil); err != nil { - return err - } - } - } - if hdr.Typeflag == tar.TypeDir { - cw.addedDirs[name] = struct{}{} - } - return nil -} - -func copyBuffered(ctx context.Context, dst io.Writer, src io.Reader) (written int64, err error) { - buf := bufPool.Get().(*[]byte) - defer bufPool.Put(buf) - - for { - select { - case <-ctx.Done(): - err = ctx.Err() - return - default: - } - - nr, er := src.Read(*buf) - if nr > 0 { - nw, ew := dst.Write((*buf)[0:nr]) - if nw > 0 { - written += int64(nw) - } - if ew != nil { - err = ew - break - } - if nr != nw { - err = io.ErrShortWrite - break - } - } - if er != nil { - if er != io.EOF { - err = er - } - break - } - } - return written, err - -} +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package archive + +import ( + "archive/tar" + "context" + + "fmt" + "io" + "os" + "path/filepath" + "strings" + "sync" + "time" + + "github.com/containerd/continuity/fs" +) + +var bufPool = &sync.Pool{ + New: func() interface{} { + buffer := make([]byte, 32*1024) + return &buffer + }, +} + +const ( + // whiteoutPrefix prefix means file is a whiteout. If this is followed by a + // filename this means that file has been removed from the base layer. + // See https://github.com/opencontainers/image-spec/blob/main/layer.md#whiteouts + whiteoutPrefix = ".wh." + + paxSchilyXattr = "SCHILY.xattr." +) + +// ChangeWriter provides tar stream from filesystem change information. +// The provided tar stream is styled as an OCI layer. Change information +// (add/modify/delete/unmodified) for each file needs to be passed to this +// writer through HandleChange method. +// +// This should be used combining with continuity's diff computing functionality +// (e.g. `fs.Change` of github.com/containerd/continuity/fs). +// +// See also https://github.com/opencontainers/image-spec/blob/main/layer.md for details +// about OCI layers +type ChangeWriter struct { + tw *tar.Writer + source string + modTimeUpperBound *time.Time + whiteoutT time.Time + inodeSrc map[uint64]string + inodeRefs map[uint64][]string + addedDirs map[string]struct{} +} + +// ChangeWriterOpt can be specified in NewChangeWriter. +type ChangeWriterOpt func(cw *ChangeWriter) + +// NewChangeWriter returns ChangeWriter that writes tar stream of the source directory +// to the provided writer. Change information (add/modify/delete/unmodified) for each +// file needs to be passed through HandleChange method. +func NewChangeWriter(w io.Writer, source string, opts ...ChangeWriterOpt) *ChangeWriter { + cw := &ChangeWriter{ + tw: tar.NewWriter(w), + source: source, + whiteoutT: time.Now(), // can be overridden with WithWhiteoutTime(time.Time) ChangeWriterOpt . + inodeSrc: map[uint64]string{}, + inodeRefs: map[uint64][]string{}, + addedDirs: map[string]struct{}{}, + } + for _, o := range opts { + o(cw) + } + return cw +} + +// HandleChange receives filesystem change information and reflect that information to +// the result tar stream. This function implements `fs.ChangeFunc` of continuity +// (github.com/containerd/continuity/fs) and should be used with that package. +func (cw *ChangeWriter) HandleChange(k fs.ChangeKind, p string, f os.FileInfo, err error) error { + if err != nil { + return err + } + if k == fs.ChangeKindDelete { + whiteOutDir := filepath.Dir(p) + whiteOutBase := filepath.Base(p) + whiteOut := filepath.Join(whiteOutDir, whiteoutPrefix+whiteOutBase) + hdr := &tar.Header{ + Typeflag: tar.TypeReg, + Name: whiteOut[1:], + Size: 0, + ModTime: cw.whiteoutT, + AccessTime: cw.whiteoutT, + ChangeTime: cw.whiteoutT, + } + if err := cw.includeParents(hdr); err != nil { + return err + } + if err := cw.tw.WriteHeader(hdr); err != nil { + return fmt.Errorf("failed to write whiteout header: %w", err) + } + } else { + var ( + link string + err error + source = filepath.Join(cw.source, p) + ) + + switch { + case f.Mode()&os.ModeSocket != 0: + return nil // ignore sockets + case f.Mode()&os.ModeSymlink != 0: + if link, err = os.Readlink(source); err != nil { + return err + } + } + + hdr, err := tar.FileInfoHeader(f, link) + if err != nil { + return err + } + + hdr.Mode = int64(chmodTarEntry(os.FileMode(hdr.Mode))) + + // truncate timestamp for compatibility. without PAX stdlib rounds timestamps instead + hdr.Format = tar.FormatPAX + if cw.modTimeUpperBound != nil && hdr.ModTime.After(*cw.modTimeUpperBound) { + hdr.ModTime = *cw.modTimeUpperBound + } + hdr.ModTime = hdr.ModTime.Truncate(time.Second) + hdr.AccessTime = time.Time{} + hdr.ChangeTime = time.Time{} + + name := p + if strings.HasPrefix(name, string(filepath.Separator)) { + name, err = filepath.Rel(string(filepath.Separator), name) + if err != nil { + return fmt.Errorf("failed to make path relative: %w", err) + } + } + // Canonicalize to POSIX-style paths using forward slashes. Directory + // entries must end with a slash. + name = filepath.ToSlash(name) + if f.IsDir() && !strings.HasSuffix(name, "/") { + name += "/" + } + hdr.Name = name + + if err := setHeaderForSpecialDevice(hdr, name, f); err != nil { + return fmt.Errorf("failed to set device headers: %w", err) + } + + // additionalLinks stores file names which must be linked to + // this file when this file is added + var additionalLinks []string + inode, isHardlink := fs.GetLinkInfo(f) + if isHardlink { + // If the inode has a source, always link to it + if source, ok := cw.inodeSrc[inode]; ok { + hdr.Typeflag = tar.TypeLink + hdr.Linkname = source + hdr.Size = 0 + } else { + if k == fs.ChangeKindUnmodified { + cw.inodeRefs[inode] = append(cw.inodeRefs[inode], name) + return nil + } + cw.inodeSrc[inode] = name + additionalLinks = cw.inodeRefs[inode] + delete(cw.inodeRefs, inode) + } + } else if k == fs.ChangeKindUnmodified { + // Nothing to write to diff + return nil + } + + if capability, err := getxattr(source, "security.capability"); err != nil { + return fmt.Errorf("failed to get capabilities xattr: %w", err) + } else if len(capability) > 0 { + if hdr.PAXRecords == nil { + hdr.PAXRecords = map[string]string{} + } + hdr.PAXRecords[paxSchilyXattr+"security.capability"] = string(capability) + } + + if err := cw.includeParents(hdr); err != nil { + return err + } + if err := cw.tw.WriteHeader(hdr); err != nil { + return fmt.Errorf("failed to write file header: %w", err) + } + + if hdr.Typeflag == tar.TypeReg && hdr.Size > 0 { + file, err := open(source) + if err != nil { + return fmt.Errorf("failed to open path: %v: %w", source, err) + } + defer file.Close() + + // HACK (imeoer): display file path in error message. + n, err := copyBuffered(context.TODO(), cw.tw, file) + if err != nil { + return fmt.Errorf("failed to copy file %s: %w", p, err) + } + if n != hdr.Size { + return fmt.Errorf("short write copying file: %s", p) + } + } + + if additionalLinks != nil { + source = hdr.Name + for _, extra := range additionalLinks { + hdr.Name = extra + hdr.Typeflag = tar.TypeLink + hdr.Linkname = source + hdr.Size = 0 + + if err := cw.includeParents(hdr); err != nil { + return err + } + if err := cw.tw.WriteHeader(hdr); err != nil { + return fmt.Errorf("failed to write file header: %w", err) + } + } + } + } + return nil +} + +// Close closes this writer. +func (cw *ChangeWriter) Close() error { + if err := cw.tw.Close(); err != nil { + return fmt.Errorf("failed to close tar writer: %w", err) + } + return nil +} + +func (cw *ChangeWriter) includeParents(hdr *tar.Header) error { + if cw.addedDirs == nil { + return nil + } + name := strings.TrimRight(hdr.Name, "/") + fname := filepath.Join(cw.source, name) + parent := filepath.Dir(name) + pname := filepath.Join(cw.source, parent) + + // Do not include root directory as parent + if fname != cw.source && pname != cw.source { + _, ok := cw.addedDirs[parent] + if !ok { + cw.addedDirs[parent] = struct{}{} + fi, err := os.Stat(pname) + if err != nil { + return err + } + if err := cw.HandleChange(fs.ChangeKindModify, parent, fi, nil); err != nil { + return err + } + } + } + if hdr.Typeflag == tar.TypeDir { + cw.addedDirs[name] = struct{}{} + } + return nil +} + +func copyBuffered(ctx context.Context, dst io.Writer, src io.Reader) (written int64, err error) { + buf := bufPool.Get().(*[]byte) + defer bufPool.Put(buf) + + for { + select { + case <-ctx.Done(): + err = ctx.Err() + return + default: + } + + nr, er := src.Read(*buf) + if nr > 0 { + nw, ew := dst.Write((*buf)[0:nr]) + if nw > 0 { + written += int64(nw) + } + if ew != nil { + err = ew + break + } + if nr != nw { + err = io.ErrShortWrite + break + } + } + if er != nil { + if er != io.EOF { + err = er + } + break + } + } + return written, err + +} diff --git a/contrib/nydusify/pkg/committer/diff/archive/tar_unix.go b/contrib/nydusify/pkg/committer/diff/archive/tar_unix.go index fbe47cbcabe..20672a02058 100644 --- a/contrib/nydusify/pkg/committer/diff/archive/tar_unix.go +++ b/contrib/nydusify/pkg/committer/diff/archive/tar_unix.go @@ -1,80 +1,80 @@ -//go:build !windows - -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package archive - -import ( - "archive/tar" - "errors" - - "os" - "runtime" - - "syscall" - - "github.com/containerd/continuity/sysx" - "golang.org/x/sys/unix" -) - -func chmodTarEntry(perm os.FileMode) os.FileMode { - return perm -} - -func setHeaderForSpecialDevice(hdr *tar.Header, _ string, fi os.FileInfo) error { - // Devmajor and Devminor are only needed for special devices. - - // In FreeBSD, RDev for regular files is -1 (unless overridden by FS): - // https://cgit.freebsd.org/src/tree/sys/kern/vfs_default.c?h=stable/13#n1531 - // (NODEV is -1: https://cgit.freebsd.org/src/tree/sys/sys/param.h?h=stable/13#n241). - - // ZFS in particular does not override the default: - // https://cgit.freebsd.org/src/tree/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c?h=stable/13#n2027 - - // Since `Stat_t.Rdev` is uint64, the cast turns -1 into (2^64 - 1). - // Such large values cannot be encoded in a tar header. - if runtime.GOOS == "freebsd" && hdr.Typeflag != tar.TypeBlock && hdr.Typeflag != tar.TypeChar { - return nil - } - s, ok := fi.Sys().(*syscall.Stat_t) - if !ok { - return errors.New("unsupported stat type") - } - - rdev := uint64(s.Rdev) //nolint:nolintlint,unconvert // rdev is int32 on darwin/bsd, int64 on linux/solaris - - // Currently go does not fill in the major/minors - if s.Mode&syscall.S_IFBLK != 0 || - s.Mode&syscall.S_IFCHR != 0 { - hdr.Devmajor = int64(unix.Major(rdev)) - hdr.Devminor = int64(unix.Minor(rdev)) - } - - return nil -} - -func open(p string) (*os.File, error) { - return os.Open(p) -} - -func getxattr(path, attr string) ([]byte, error) { - b, err := sysx.LGetxattr(path, attr) - if err == unix.ENOTSUP || err == sysx.ENODATA { - return nil, nil - } - return b, err -} +//go:build !windows + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package archive + +import ( + "archive/tar" + "errors" + + "os" + "runtime" + + "syscall" + + "github.com/containerd/continuity/sysx" + "golang.org/x/sys/unix" +) + +func chmodTarEntry(perm os.FileMode) os.FileMode { + return perm +} + +func setHeaderForSpecialDevice(hdr *tar.Header, _ string, fi os.FileInfo) error { + // Devmajor and Devminor are only needed for special devices. + + // In FreeBSD, RDev for regular files is -1 (unless overridden by FS): + // https://cgit.freebsd.org/src/tree/sys/kern/vfs_default.c?h=stable/13#n1531 + // (NODEV is -1: https://cgit.freebsd.org/src/tree/sys/sys/param.h?h=stable/13#n241). + + // ZFS in particular does not override the default: + // https://cgit.freebsd.org/src/tree/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c?h=stable/13#n2027 + + // Since `Stat_t.Rdev` is uint64, the cast turns -1 into (2^64 - 1). + // Such large values cannot be encoded in a tar header. + if runtime.GOOS == "freebsd" && hdr.Typeflag != tar.TypeBlock && hdr.Typeflag != tar.TypeChar { + return nil + } + s, ok := fi.Sys().(*syscall.Stat_t) + if !ok { + return errors.New("unsupported stat type") + } + + rdev := uint64(s.Rdev) //nolint:nolintlint,unconvert // rdev is int32 on darwin/bsd, int64 on linux/solaris + + // Currently go does not fill in the major/minors + if s.Mode&syscall.S_IFBLK != 0 || + s.Mode&syscall.S_IFCHR != 0 { + hdr.Devmajor = int64(unix.Major(rdev)) + hdr.Devminor = int64(unix.Minor(rdev)) + } + + return nil +} + +func open(p string) (*os.File, error) { + return os.Open(p) +} + +func getxattr(path, attr string) ([]byte, error) { + b, err := sysx.LGetxattr(path, attr) + if err == unix.ENOTSUP || err == sysx.ENODATA { + return nil, nil + } + return b, err +} diff --git a/contrib/nydusify/pkg/committer/diff/diff.go b/contrib/nydusify/pkg/committer/diff/diff.go index 94632f9c880..8ad2ae5e3bd 100644 --- a/contrib/nydusify/pkg/committer/diff/diff.go +++ b/contrib/nydusify/pkg/committer/diff/diff.go @@ -1,114 +1,114 @@ -// Ported from buildkit project, copyright The buildkit Authors. -// https://github.com/moby/buildkit - -package diff - -import ( - "context" - "fmt" - "io" - "os" - "strings" - - "github.com/containerd/containerd/mount" - "github.com/moby/buildkit/util/overlay" - "github.com/pkg/errors" - - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/committer/diff/archive" -) - -func overlaySupportIndex() bool { - if _, err := os.Stat("/sys/module/overlay/parameters/index"); err == nil { - return true - } - return false -} - -// Ported from github.com/moby/buildkit/util/overlay/overlay_linux.go -// Modified overlayfs temp mount handle. -// -// WriteUpperdir writes a layer tar archive into the specified writer, based on -// the diff information stored in the upperdir. -func writeUpperdir(ctx context.Context, appendMount func(path string), withPaths []string, withoutPaths []string, w io.Writer, upperdir string, lower []mount.Mount) error { - emptyLower, err := os.MkdirTemp("", "buildkit") // empty directory used for the lower of diff view - if err != nil { - return errors.Wrapf(err, "failed to create temp dir") - } - defer os.Remove(emptyLower) - - options := []string{ - fmt.Sprintf("lowerdir=%s", strings.Join([]string{upperdir, emptyLower}, ":")), - } - if overlaySupportIndex() { - options = append(options, "index=off") - } - upperView := []mount.Mount{ - { - Type: "overlay", - Source: "overlay", - Options: options, - }, - } - - return mount.WithTempMount(ctx, lower, func(lowerRoot string) error { - return mount.WithTempMount(ctx, upperView, func(upperViewRoot string) error { - cw := archive.NewChangeWriter(&cancellableWriter{ctx, w}, upperViewRoot) - if err := Changes(ctx, appendMount, withPaths, withoutPaths, cw.HandleChange, upperdir, upperViewRoot, lowerRoot); err != nil { - if err2 := cw.Close(); err2 != nil { - return errors.Wrapf(err, "failed to record upperdir changes (close error: %v)", err2) - } - return errors.Wrapf(err, "failed to record upperdir changes") - } - return cw.Close() - }) - }) -} - -func Diff(ctx context.Context, appendMount func(path string), withPaths []string, withoutPaths []string, writer io.Writer, lowerDirs, upperDir string) error { - emptyLower, err := os.MkdirTemp("", "nydus-cli-diff") - if err != nil { - return errors.Wrapf(err, "create temp dir") - } - defer os.Remove(emptyLower) - - lowerDirs += fmt.Sprintf(":%s", emptyLower) - - options := []string{ - fmt.Sprintf("lowerdir=%s", lowerDirs), - } - if overlaySupportIndex() { - options = append(options, "index=off") - } - lower := []mount.Mount{ - { - Type: "overlay", - Source: "overlay", - Options: options, - }, - } - - options = []string{ - fmt.Sprintf("lowerdir=%s:%s", upperDir, lowerDirs), - } - if overlaySupportIndex() { - options = append(options, "index=off") - } - upper := []mount.Mount{ - { - Type: "overlay", - Source: "overlay", - Options: options, - }, - } - - upperDir, err = overlay.GetUpperdir(lower, upper) - if err != nil { - return errors.Wrap(err, "get upper dir") - } - - if err = writeUpperdir(ctx, appendMount, withPaths, withoutPaths, &cancellableWriter{ctx, writer}, upperDir, lower); err != nil { - return errors.Wrap(err, "write diff") - } - - return nil -} +// Ported from buildkit project, copyright The buildkit Authors. +// https://github.com/moby/buildkit + +package diff + +import ( + "context" + "fmt" + "io" + "os" + "strings" + + "github.com/containerd/containerd/mount" + "github.com/moby/buildkit/util/overlay" + "github.com/pkg/errors" + + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/committer/diff/archive" +) + +func overlaySupportIndex() bool { + if _, err := os.Stat("/sys/module/overlay/parameters/index"); err == nil { + return true + } + return false +} + +// Ported from github.com/moby/buildkit/util/overlay/overlay_linux.go +// Modified overlayfs temp mount handle. +// +// WriteUpperdir writes a layer tar archive into the specified writer, based on +// the diff information stored in the upperdir. +func writeUpperdir(ctx context.Context, appendMount func(path string), withPaths []string, withoutPaths []string, w io.Writer, upperdir string, lower []mount.Mount) error { + emptyLower, err := os.MkdirTemp("", "buildkit") // empty directory used for the lower of diff view + if err != nil { + return errors.Wrapf(err, "failed to create temp dir") + } + defer os.Remove(emptyLower) + + options := []string{ + fmt.Sprintf("lowerdir=%s", strings.Join([]string{upperdir, emptyLower}, ":")), + } + if overlaySupportIndex() { + options = append(options, "index=off") + } + upperView := []mount.Mount{ + { + Type: "overlay", + Source: "overlay", + Options: options, + }, + } + + return mount.WithTempMount(ctx, lower, func(lowerRoot string) error { + return mount.WithTempMount(ctx, upperView, func(upperViewRoot string) error { + cw := archive.NewChangeWriter(&cancellableWriter{ctx, w}, upperViewRoot) + if err := Changes(ctx, appendMount, withPaths, withoutPaths, cw.HandleChange, upperdir, upperViewRoot, lowerRoot); err != nil { + if err2 := cw.Close(); err2 != nil { + return errors.Wrapf(err, "failed to record upperdir changes (close error: %v)", err2) + } + return errors.Wrapf(err, "failed to record upperdir changes") + } + return cw.Close() + }) + }) +} + +func Diff(ctx context.Context, appendMount func(path string), withPaths []string, withoutPaths []string, writer io.Writer, lowerDirs, upperDir string) error { + emptyLower, err := os.MkdirTemp("", "nydus-cli-diff") + if err != nil { + return errors.Wrapf(err, "create temp dir") + } + defer os.Remove(emptyLower) + + lowerDirs += fmt.Sprintf(":%s", emptyLower) + + options := []string{ + fmt.Sprintf("lowerdir=%s", lowerDirs), + } + if overlaySupportIndex() { + options = append(options, "index=off") + } + lower := []mount.Mount{ + { + Type: "overlay", + Source: "overlay", + Options: options, + }, + } + + options = []string{ + fmt.Sprintf("lowerdir=%s:%s", upperDir, lowerDirs), + } + if overlaySupportIndex() { + options = append(options, "index=off") + } + upper := []mount.Mount{ + { + Type: "overlay", + Source: "overlay", + Options: options, + }, + } + + upperDir, err = overlay.GetUpperdir(lower, upper) + if err != nil { + return errors.Wrap(err, "get upper dir") + } + + if err = writeUpperdir(ctx, appendMount, withPaths, withoutPaths, &cancellableWriter{ctx, writer}, upperDir, lower); err != nil { + return errors.Wrap(err, "write diff") + } + + return nil +} diff --git a/contrib/nydusify/pkg/committer/diff/overlay_linux.go b/contrib/nydusify/pkg/committer/diff/overlay_linux.go index 2833a607265..4d8f70c8ea3 100644 --- a/contrib/nydusify/pkg/committer/diff/overlay_linux.go +++ b/contrib/nydusify/pkg/committer/diff/overlay_linux.go @@ -1,459 +1,459 @@ -// Ported from buildkit project, copyright The buildkit Authors. -// https://github.com/moby/buildkit - -package diff - -import ( - "bytes" - "context" - "io" - "os" - "path/filepath" - "strings" - "sync" - "syscall" - - "github.com/containerd/containerd/mount" - "github.com/containerd/continuity/devices" - "github.com/containerd/continuity/fs" - "github.com/containerd/continuity/sysx" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - "golang.org/x/sys/unix" -) - -// GetUpperdir parses the passed mounts and identifies the directory -// that contains diff between upper and lower. -func GetUpperdir(lower, upper []mount.Mount) (string, error) { - var upperdir string - if len(lower) == 0 && len(upper) == 1 { // upper is the bottommost snapshot - // Get layer directories of upper snapshot - upperM := upper[0] - if upperM.Type != "bind" { - return "", errors.Errorf("bottommost upper must be bind mount but %q", upperM.Type) - } - upperdir = upperM.Source - } else if len(lower) == 1 && len(upper) == 1 { - // Get layer directories of lower snapshot - var lowerlayers []string - lowerM := lower[0] - switch lowerM.Type { - case "bind": - // lower snapshot is a bind mount of one layer - lowerlayers = []string{lowerM.Source} - case "overlay": - // lower snapshot is an overlay mount of multiple layers - var err error - lowerlayers, err = GetOverlayLayers(lowerM) - if err != nil { - return "", err - } - default: - return "", errors.Errorf("cannot get layer information from mount option (type = %q)", lowerM.Type) - } - - // Get layer directories of upper snapshot - upperM := upper[0] - if upperM.Type != "overlay" { - return "", errors.Errorf("upper snapshot isn't overlay mounted (type = %q)", upperM.Type) - } - upperlayers, err := GetOverlayLayers(upperM) - if err != nil { - return "", err - } - - // Check if the diff directory can be determined - if len(upperlayers) != len(lowerlayers)+1 { - return "", errors.Errorf("cannot determine diff of more than one upper directories") - } - for i := 0; i < len(lowerlayers); i++ { - if upperlayers[i] != lowerlayers[i] { - return "", errors.Errorf("layer %d must be common between upper and lower snapshots", i) - } - } - upperdir = upperlayers[len(upperlayers)-1] // get the topmost layer that indicates diff - } else { - return "", errors.Errorf("multiple mount configurations are not supported") - } - if upperdir == "" { - return "", errors.Errorf("cannot determine upperdir from mount option") - } - return upperdir, nil -} - -// GetOverlayLayers returns all layer directories of an overlayfs mount. -func GetOverlayLayers(m mount.Mount) ([]string, error) { - var u string - var uFound bool - var l []string // l[0] = bottommost - for _, o := range m.Options { - if strings.HasPrefix(o, "upperdir=") { - u, uFound = strings.TrimPrefix(o, "upperdir="), true - } else if strings.HasPrefix(o, "lowerdir=") { - l = strings.Split(strings.TrimPrefix(o, "lowerdir="), ":") - for i, j := 0, len(l)-1; i < j; i, j = i+1, j-1 { - l[i], l[j] = l[j], l[i] // make l[0] = bottommost - } - } else if strings.HasPrefix(o, "workdir=") || o == "index=off" || o == "userxattr" || strings.HasPrefix(o, "redirect_dir=") { - // these options are possible to specfied by the snapshotter but not indicate dir locations. - continue - } else { - // encountering an unknown option. return error and fallback to walking differ - // to avoid unexpected diff. - return nil, errors.Errorf("unknown option %q specified by snapshotter", o) - } - } - if uFound { - return append(l, u), nil - } - return l, nil -} - -type cancellableWriter struct { - ctx context.Context - w io.Writer -} - -func (w *cancellableWriter) Write(p []byte) (int, error) { - if err := w.ctx.Err(); err != nil { - return 0, err - } - return w.w.Write(p) -} - -// Changes is continuty's `fs.Change`-like method but leverages overlayfs's -// "upperdir" for computing the diff. "upperdirView" is overlayfs mounted view of -// the upperdir that doesn't contain whiteouts. This is used for computing -// changes under opaque directories. -func Changes(ctx context.Context, appendMount func(path string), withPaths []string, withoutPaths []string, changeFn fs.ChangeFunc, upperdir, upperdirView, base string) error { - err := filepath.Walk(upperdir, func(path string, f os.FileInfo, err error) error { - if err != nil { - return err - } - if ctx.Err() != nil { - return ctx.Err() - } - - // Rebase path - path, err = filepath.Rel(upperdir, path) - if err != nil { - return err - } - path = filepath.Join(string(os.PathSeparator), path) - - // Skip root - if path == string(os.PathSeparator) { - return nil - } - - // Skip filtered path - for _, filtered := range withoutPaths { - if path == filtered || strings.HasPrefix(path, filtered+"/") { - return nil - } - } - - // Check redirect - if redirect, err := checkRedirect(upperdir, path, f); err != nil { - return err - } else if redirect { - // Return error when redirect_dir is enabled which can result to a wrong diff. - // TODO: support redirect_dir - logrus.Warnf( - "[need append] redirect_dir is used but it's not supported in overlayfs differ: %s", - filepath.Join(upperdir, path), - ) - appendMount(path) - return nil - } - - // Check if this is a deleted entry - isDelete, skip, err := checkDelete(upperdir, path, base, f) - if err != nil { - return err - } else if skip { - return nil - } - - var kind fs.ChangeKind - var skipRecord bool - if isDelete { - // This is a deleted entry. - kind = fs.ChangeKindDelete - // Leave f set to the FileInfo for the whiteout device in case the caller wants it, e.g. - // the merge code uses it to hardlink in the whiteout device to merged snapshots - } else if baseF, err := os.Lstat(filepath.Join(base, path)); err == nil { - // File exists in the base layer. Thus this is modified. - kind = fs.ChangeKindModify - // Avoid including directory that hasn't been modified. If /foo/bar/baz is modified, - // then /foo will apper here even if it's not been modified because it's the parent of bar. - if same, err := sameDirent(baseF, f, filepath.Join(base, path), filepath.Join(upperdirView, path)); same { - skipRecord = true // Both are the same, don't record the change - } else if err != nil { - return err - } - } else if os.IsNotExist(err) || errors.Is(err, unix.ENOTDIR) { - // File doesn't exist in the base layer. Thus this is added. - kind = fs.ChangeKindAdd - } else if err != nil { - return errors.Wrap(err, "failed to stat base file during overlay diff") - } - - if !skipRecord { - if err := changeFn(kind, path, f, nil); err != nil { - return err - } - } - - if f != nil { - if isOpaque, err := checkOpaque(upperdir, path, base, f); err != nil { - return err - } else if isOpaque { - // This is an opaque directory. Start a new walking differ to get adds/deletes of - // this directory. We use "upperdirView" directory which doesn't contain whiteouts. - if err := fs.Changes(ctx, filepath.Join(base, path), filepath.Join(upperdirView, path), - func(k fs.ChangeKind, p string, f os.FileInfo, err error) error { - return changeFn(k, filepath.Join(path, p), f, err) // rebase path to be based on the opaque dir - }, - ); err != nil { - return err - } - return filepath.SkipDir // We completed this directory. Do not walk files under this directory anymore. - } - } - return nil - }) - if err != nil { - return err - } - // Remove lower files, these files will be re-added on committing mount process. - for _, withPath := range withPaths { - if err := changeFn(fs.ChangeKindDelete, withPath, nil, nil); err != nil { - return errors.Wrapf(err, "handle deleted with path: %s", withPath) - } - } - return err -} - -// checkDelete checks if the specified file is a whiteout -func checkDelete(_ string, path string, base string, f os.FileInfo) (delete, skip bool, _ error) { - if f.Mode()&os.ModeCharDevice != 0 { - if _, ok := f.Sys().(*syscall.Stat_t); ok { - maj, min, err := devices.DeviceInfo(f) - if err != nil { - return false, false, errors.Wrapf(err, "failed to get device info") - } - if maj == 0 && min == 0 { - // This file is a whiteout (char 0/0) that indicates this is deleted from the base - if _, err := os.Lstat(filepath.Join(base, path)); err != nil { - if !os.IsNotExist(err) { - return false, false, errors.Wrapf(err, "failed to lstat") - } - // This file doesn't exist even in the base dir. - // We don't need whiteout. Just skip this file. - return false, true, nil - } - return true, false, nil - } - } - } - return false, false, nil -} - -// checkDelete checks if the specified file is an opaque directory -func checkOpaque(upperdir string, path string, base string, f os.FileInfo) (isOpaque bool, _ error) { - if f.IsDir() { - for _, oKey := range []string{"trusted.overlay.opaque", "user.overlay.opaque"} { - opaque, err := sysx.LGetxattr(filepath.Join(upperdir, path), oKey) - if err != nil && err != unix.ENODATA { - return false, errors.Wrapf(err, "failed to retrieve %s attr", oKey) - } else if len(opaque) == 1 && opaque[0] == 'y' { - // This is an opaque whiteout directory. - if _, err := os.Lstat(filepath.Join(base, path)); err != nil { - if !os.IsNotExist(err) { - return false, errors.Wrapf(err, "failed to lstat") - } - // This file doesn't exist even in the base dir. We don't need to treat this as an opaque. - return false, nil - } - return true, nil - } - } - } - return false, nil -} - -// checkRedirect checks if the specified path enables redirect_dir. -func checkRedirect(upperdir string, path string, f os.FileInfo) (bool, error) { - if f.IsDir() { - rKey := "trusted.overlay.redirect" - redirect, err := sysx.LGetxattr(filepath.Join(upperdir, path), rKey) - if err != nil && err != unix.ENODATA { - return false, errors.Wrapf(err, "failed to retrieve %s attr", rKey) - } - return len(redirect) > 0, nil - } - return false, nil -} - -// sameDirent performs continity-compatible comparison of files and directories. -// https://github.com/containerd/continuity/blob/v0.1.0/fs/path.go#L91-L133 -// This will only do a slow content comparison of two files if they have all the -// same metadata and both have truncated nanosecond mtime timestamps. In practice, -// this can only happen if both the base file in the lowerdirs has a truncated -// timestamp (i.e. was unpacked from a tar) and the user did something like -// "mv foo tmp && mv tmp foo" that results in the file being copied up to the -// upperdir without making any changes to it. This is much rarer than similar -// cases in the double-walking differ, where the slow content comparison will -// be used whenever a file with a truncated timestamp is in the lowerdir at -// all and left unmodified. -func sameDirent(f1, f2 os.FileInfo, f1fullPath, f2fullPath string) (bool, error) { - if os.SameFile(f1, f2) { - return true, nil - } - - equalStat, err := compareSysStat(f1.Sys(), f2.Sys()) - if err != nil || !equalStat { - return equalStat, err - } - - if eq, err := compareCapabilities(f1fullPath, f2fullPath); err != nil || !eq { - return eq, err - } - - if !f1.IsDir() { - if f1.Size() != f2.Size() { - return false, nil - } - t1 := f1.ModTime() - t2 := f2.ModTime() - - if t1.Unix() != t2.Unix() { - return false, nil - } - - // If the timestamp may have been truncated in both of the - // files, check content of file to determine difference - if t1.Nanosecond() == 0 && t2.Nanosecond() == 0 { - if (f1.Mode() & os.ModeSymlink) == os.ModeSymlink { - return compareSymlinkTarget(f1fullPath, f2fullPath) - } - if f1.Size() == 0 { - return true, nil - } - return compareFileContent(f1fullPath, f2fullPath) - } else if t1.Nanosecond() != t2.Nanosecond() { - return false, nil - } - } - - return true, nil -} - -// Ported from continuity project -// https://github.com/containerd/continuity/blob/v0.1.0/fs/diff_unix.go#L43-L54 -// Copyright The containerd Authors. -func compareSysStat(s1, s2 interface{}) (bool, error) { - ls1, ok := s1.(*syscall.Stat_t) - if !ok { - return false, nil - } - ls2, ok := s2.(*syscall.Stat_t) - if !ok { - return false, nil - } - - return ls1.Mode == ls2.Mode && ls1.Uid == ls2.Uid && ls1.Gid == ls2.Gid && ls1.Rdev == ls2.Rdev, nil -} - -// Ported from continuity project -// https://github.com/containerd/continuity/blob/v0.1.0/fs/diff_unix.go#L56-L66 -// Copyright The containerd Authors. -func compareCapabilities(p1, p2 string) (bool, error) { - c1, err := sysx.LGetxattr(p1, "security.capability") - if err != nil && err != sysx.ENODATA { - return false, errors.Wrapf(err, "failed to get xattr for %s", p1) - } - c2, err := sysx.LGetxattr(p2, "security.capability") - if err != nil && err != sysx.ENODATA { - return false, errors.Wrapf(err, "failed to get xattr for %s", p2) - } - return bytes.Equal(c1, c2), nil -} - -// Ported from continuity project -// https://github.com/containerd/continuity/blob/bce1c3f9669b6f3e7f6656ee715b0b4d75fa64a6/fs/path.go#L135 -// Copyright The containerd Authors. -func compareSymlinkTarget(p1, p2 string) (bool, error) { - t1, err := os.Readlink(p1) - if err != nil { - return false, err - } - t2, err := os.Readlink(p2) - if err != nil { - return false, err - } - return t1 == t2, nil -} - -var bufPool = sync.Pool{ - New: func() interface{} { - b := make([]byte, 32*1024) - return &b - }, -} - -// Ported from continuity project -// https://github.com/containerd/continuity/blob/bce1c3f9669b6f3e7f6656ee715b0b4d75fa64a6/fs/path.go#L151 -// Copyright The containerd Authors. -func compareFileContent(p1, p2 string) (bool, error) { - f1, err := os.Open(p1) - if err != nil { - return false, err - } - defer f1.Close() - if stat, err := f1.Stat(); err != nil { - return false, err - } else if !stat.Mode().IsRegular() { - return false, errors.Errorf("%s is not a regular file", p1) - } - - f2, err := os.Open(p2) - if err != nil { - return false, err - } - defer f2.Close() - if stat, err := f2.Stat(); err != nil { - return false, err - } else if !stat.Mode().IsRegular() { - return false, errors.Errorf("%s is not a regular file", p2) - } - - b1 := bufPool.Get().(*[]byte) - defer bufPool.Put(b1) - b2 := bufPool.Get().(*[]byte) - defer bufPool.Put(b2) - for { - n1, err1 := io.ReadFull(f1, *b1) - if err1 == io.ErrUnexpectedEOF { - // it's expected to get EOF when file size isn't a multiple of chunk size, consolidate these error types - err1 = io.EOF - } - if err1 != nil && err1 != io.EOF { - return false, err1 - } - n2, err2 := io.ReadFull(f2, *b2) - if err2 == io.ErrUnexpectedEOF { - err2 = io.EOF - } - if err2 != nil && err2 != io.EOF { - return false, err2 - } - if n1 != n2 || !bytes.Equal((*b1)[:n1], (*b2)[:n2]) { - return false, nil - } - if err1 == io.EOF && err2 == io.EOF { - return true, nil - } - } -} +// Ported from buildkit project, copyright The buildkit Authors. +// https://github.com/moby/buildkit + +package diff + +import ( + "bytes" + "context" + "io" + "os" + "path/filepath" + "strings" + "sync" + "syscall" + + "github.com/containerd/containerd/mount" + "github.com/containerd/continuity/devices" + "github.com/containerd/continuity/fs" + "github.com/containerd/continuity/sysx" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" +) + +// GetUpperdir parses the passed mounts and identifies the directory +// that contains diff between upper and lower. +func GetUpperdir(lower, upper []mount.Mount) (string, error) { + var upperdir string + if len(lower) == 0 && len(upper) == 1 { // upper is the bottommost snapshot + // Get layer directories of upper snapshot + upperM := upper[0] + if upperM.Type != "bind" { + return "", errors.Errorf("bottommost upper must be bind mount but %q", upperM.Type) + } + upperdir = upperM.Source + } else if len(lower) == 1 && len(upper) == 1 { + // Get layer directories of lower snapshot + var lowerlayers []string + lowerM := lower[0] + switch lowerM.Type { + case "bind": + // lower snapshot is a bind mount of one layer + lowerlayers = []string{lowerM.Source} + case "overlay": + // lower snapshot is an overlay mount of multiple layers + var err error + lowerlayers, err = GetOverlayLayers(lowerM) + if err != nil { + return "", err + } + default: + return "", errors.Errorf("cannot get layer information from mount option (type = %q)", lowerM.Type) + } + + // Get layer directories of upper snapshot + upperM := upper[0] + if upperM.Type != "overlay" { + return "", errors.Errorf("upper snapshot isn't overlay mounted (type = %q)", upperM.Type) + } + upperlayers, err := GetOverlayLayers(upperM) + if err != nil { + return "", err + } + + // Check if the diff directory can be determined + if len(upperlayers) != len(lowerlayers)+1 { + return "", errors.Errorf("cannot determine diff of more than one upper directories") + } + for i := 0; i < len(lowerlayers); i++ { + if upperlayers[i] != lowerlayers[i] { + return "", errors.Errorf("layer %d must be common between upper and lower snapshots", i) + } + } + upperdir = upperlayers[len(upperlayers)-1] // get the topmost layer that indicates diff + } else { + return "", errors.Errorf("multiple mount configurations are not supported") + } + if upperdir == "" { + return "", errors.Errorf("cannot determine upperdir from mount option") + } + return upperdir, nil +} + +// GetOverlayLayers returns all layer directories of an overlayfs mount. +func GetOverlayLayers(m mount.Mount) ([]string, error) { + var u string + var uFound bool + var l []string // l[0] = bottommost + for _, o := range m.Options { + if strings.HasPrefix(o, "upperdir=") { + u, uFound = strings.TrimPrefix(o, "upperdir="), true + } else if strings.HasPrefix(o, "lowerdir=") { + l = strings.Split(strings.TrimPrefix(o, "lowerdir="), ":") + for i, j := 0, len(l)-1; i < j; i, j = i+1, j-1 { + l[i], l[j] = l[j], l[i] // make l[0] = bottommost + } + } else if strings.HasPrefix(o, "workdir=") || o == "index=off" || o == "userxattr" || strings.HasPrefix(o, "redirect_dir=") { + // these options are possible to specfied by the snapshotter but not indicate dir locations. + continue + } else { + // encountering an unknown option. return error and fallback to walking differ + // to avoid unexpected diff. + return nil, errors.Errorf("unknown option %q specified by snapshotter", o) + } + } + if uFound { + return append(l, u), nil + } + return l, nil +} + +type cancellableWriter struct { + ctx context.Context + w io.Writer +} + +func (w *cancellableWriter) Write(p []byte) (int, error) { + if err := w.ctx.Err(); err != nil { + return 0, err + } + return w.w.Write(p) +} + +// Changes is continuty's `fs.Change`-like method but leverages overlayfs's +// "upperdir" for computing the diff. "upperdirView" is overlayfs mounted view of +// the upperdir that doesn't contain whiteouts. This is used for computing +// changes under opaque directories. +func Changes(ctx context.Context, appendMount func(path string), withPaths []string, withoutPaths []string, changeFn fs.ChangeFunc, upperdir, upperdirView, base string) error { + err := filepath.Walk(upperdir, func(path string, f os.FileInfo, err error) error { + if err != nil { + return err + } + if ctx.Err() != nil { + return ctx.Err() + } + + // Rebase path + path, err = filepath.Rel(upperdir, path) + if err != nil { + return err + } + path = filepath.Join(string(os.PathSeparator), path) + + // Skip root + if path == string(os.PathSeparator) { + return nil + } + + // Skip filtered path + for _, filtered := range withoutPaths { + if path == filtered || strings.HasPrefix(path, filtered+"/") { + return nil + } + } + + // Check redirect + if redirect, err := checkRedirect(upperdir, path, f); err != nil { + return err + } else if redirect { + // Return error when redirect_dir is enabled which can result to a wrong diff. + // TODO: support redirect_dir + logrus.Warnf( + "[need append] redirect_dir is used but it's not supported in overlayfs differ: %s", + filepath.Join(upperdir, path), + ) + appendMount(path) + return nil + } + + // Check if this is a deleted entry + isDelete, skip, err := checkDelete(upperdir, path, base, f) + if err != nil { + return err + } else if skip { + return nil + } + + var kind fs.ChangeKind + var skipRecord bool + if isDelete { + // This is a deleted entry. + kind = fs.ChangeKindDelete + // Leave f set to the FileInfo for the whiteout device in case the caller wants it, e.g. + // the merge code uses it to hardlink in the whiteout device to merged snapshots + } else if baseF, err := os.Lstat(filepath.Join(base, path)); err == nil { + // File exists in the base layer. Thus this is modified. + kind = fs.ChangeKindModify + // Avoid including directory that hasn't been modified. If /foo/bar/baz is modified, + // then /foo will apper here even if it's not been modified because it's the parent of bar. + if same, err := sameDirent(baseF, f, filepath.Join(base, path), filepath.Join(upperdirView, path)); same { + skipRecord = true // Both are the same, don't record the change + } else if err != nil { + return err + } + } else if os.IsNotExist(err) || errors.Is(err, unix.ENOTDIR) { + // File doesn't exist in the base layer. Thus this is added. + kind = fs.ChangeKindAdd + } else if err != nil { + return errors.Wrap(err, "failed to stat base file during overlay diff") + } + + if !skipRecord { + if err := changeFn(kind, path, f, nil); err != nil { + return err + } + } + + if f != nil { + if isOpaque, err := checkOpaque(upperdir, path, base, f); err != nil { + return err + } else if isOpaque { + // This is an opaque directory. Start a new walking differ to get adds/deletes of + // this directory. We use "upperdirView" directory which doesn't contain whiteouts. + if err := fs.Changes(ctx, filepath.Join(base, path), filepath.Join(upperdirView, path), + func(k fs.ChangeKind, p string, f os.FileInfo, err error) error { + return changeFn(k, filepath.Join(path, p), f, err) // rebase path to be based on the opaque dir + }, + ); err != nil { + return err + } + return filepath.SkipDir // We completed this directory. Do not walk files under this directory anymore. + } + } + return nil + }) + if err != nil { + return err + } + // Remove lower files, these files will be re-added on committing mount process. + for _, withPath := range withPaths { + if err := changeFn(fs.ChangeKindDelete, withPath, nil, nil); err != nil { + return errors.Wrapf(err, "handle deleted with path: %s", withPath) + } + } + return err +} + +// checkDelete checks if the specified file is a whiteout +func checkDelete(_ string, path string, base string, f os.FileInfo) (delete, skip bool, _ error) { + if f.Mode()&os.ModeCharDevice != 0 { + if _, ok := f.Sys().(*syscall.Stat_t); ok { + maj, min, err := devices.DeviceInfo(f) + if err != nil { + return false, false, errors.Wrapf(err, "failed to get device info") + } + if maj == 0 && min == 0 { + // This file is a whiteout (char 0/0) that indicates this is deleted from the base + if _, err := os.Lstat(filepath.Join(base, path)); err != nil { + if !os.IsNotExist(err) { + return false, false, errors.Wrapf(err, "failed to lstat") + } + // This file doesn't exist even in the base dir. + // We don't need whiteout. Just skip this file. + return false, true, nil + } + return true, false, nil + } + } + } + return false, false, nil +} + +// checkDelete checks if the specified file is an opaque directory +func checkOpaque(upperdir string, path string, base string, f os.FileInfo) (isOpaque bool, _ error) { + if f.IsDir() { + for _, oKey := range []string{"trusted.overlay.opaque", "user.overlay.opaque"} { + opaque, err := sysx.LGetxattr(filepath.Join(upperdir, path), oKey) + if err != nil && err != unix.ENODATA { + return false, errors.Wrapf(err, "failed to retrieve %s attr", oKey) + } else if len(opaque) == 1 && opaque[0] == 'y' { + // This is an opaque whiteout directory. + if _, err := os.Lstat(filepath.Join(base, path)); err != nil { + if !os.IsNotExist(err) { + return false, errors.Wrapf(err, "failed to lstat") + } + // This file doesn't exist even in the base dir. We don't need to treat this as an opaque. + return false, nil + } + return true, nil + } + } + } + return false, nil +} + +// checkRedirect checks if the specified path enables redirect_dir. +func checkRedirect(upperdir string, path string, f os.FileInfo) (bool, error) { + if f.IsDir() { + rKey := "trusted.overlay.redirect" + redirect, err := sysx.LGetxattr(filepath.Join(upperdir, path), rKey) + if err != nil && err != unix.ENODATA { + return false, errors.Wrapf(err, "failed to retrieve %s attr", rKey) + } + return len(redirect) > 0, nil + } + return false, nil +} + +// sameDirent performs continity-compatible comparison of files and directories. +// https://github.com/containerd/continuity/blob/v0.1.0/fs/path.go#L91-L133 +// This will only do a slow content comparison of two files if they have all the +// same metadata and both have truncated nanosecond mtime timestamps. In practice, +// this can only happen if both the base file in the lowerdirs has a truncated +// timestamp (i.e. was unpacked from a tar) and the user did something like +// "mv foo tmp && mv tmp foo" that results in the file being copied up to the +// upperdir without making any changes to it. This is much rarer than similar +// cases in the double-walking differ, where the slow content comparison will +// be used whenever a file with a truncated timestamp is in the lowerdir at +// all and left unmodified. +func sameDirent(f1, f2 os.FileInfo, f1fullPath, f2fullPath string) (bool, error) { + if os.SameFile(f1, f2) { + return true, nil + } + + equalStat, err := compareSysStat(f1.Sys(), f2.Sys()) + if err != nil || !equalStat { + return equalStat, err + } + + if eq, err := compareCapabilities(f1fullPath, f2fullPath); err != nil || !eq { + return eq, err + } + + if !f1.IsDir() { + if f1.Size() != f2.Size() { + return false, nil + } + t1 := f1.ModTime() + t2 := f2.ModTime() + + if t1.Unix() != t2.Unix() { + return false, nil + } + + // If the timestamp may have been truncated in both of the + // files, check content of file to determine difference + if t1.Nanosecond() == 0 && t2.Nanosecond() == 0 { + if (f1.Mode() & os.ModeSymlink) == os.ModeSymlink { + return compareSymlinkTarget(f1fullPath, f2fullPath) + } + if f1.Size() == 0 { + return true, nil + } + return compareFileContent(f1fullPath, f2fullPath) + } else if t1.Nanosecond() != t2.Nanosecond() { + return false, nil + } + } + + return true, nil +} + +// Ported from continuity project +// https://github.com/containerd/continuity/blob/v0.1.0/fs/diff_unix.go#L43-L54 +// Copyright The containerd Authors. +func compareSysStat(s1, s2 interface{}) (bool, error) { + ls1, ok := s1.(*syscall.Stat_t) + if !ok { + return false, nil + } + ls2, ok := s2.(*syscall.Stat_t) + if !ok { + return false, nil + } + + return ls1.Mode == ls2.Mode && ls1.Uid == ls2.Uid && ls1.Gid == ls2.Gid && ls1.Rdev == ls2.Rdev, nil +} + +// Ported from continuity project +// https://github.com/containerd/continuity/blob/v0.1.0/fs/diff_unix.go#L56-L66 +// Copyright The containerd Authors. +func compareCapabilities(p1, p2 string) (bool, error) { + c1, err := sysx.LGetxattr(p1, "security.capability") + if err != nil && err != sysx.ENODATA { + return false, errors.Wrapf(err, "failed to get xattr for %s", p1) + } + c2, err := sysx.LGetxattr(p2, "security.capability") + if err != nil && err != sysx.ENODATA { + return false, errors.Wrapf(err, "failed to get xattr for %s", p2) + } + return bytes.Equal(c1, c2), nil +} + +// Ported from continuity project +// https://github.com/containerd/continuity/blob/bce1c3f9669b6f3e7f6656ee715b0b4d75fa64a6/fs/path.go#L135 +// Copyright The containerd Authors. +func compareSymlinkTarget(p1, p2 string) (bool, error) { + t1, err := os.Readlink(p1) + if err != nil { + return false, err + } + t2, err := os.Readlink(p2) + if err != nil { + return false, err + } + return t1 == t2, nil +} + +var bufPool = sync.Pool{ + New: func() interface{} { + b := make([]byte, 32*1024) + return &b + }, +} + +// Ported from continuity project +// https://github.com/containerd/continuity/blob/bce1c3f9669b6f3e7f6656ee715b0b4d75fa64a6/fs/path.go#L151 +// Copyright The containerd Authors. +func compareFileContent(p1, p2 string) (bool, error) { + f1, err := os.Open(p1) + if err != nil { + return false, err + } + defer f1.Close() + if stat, err := f1.Stat(); err != nil { + return false, err + } else if !stat.Mode().IsRegular() { + return false, errors.Errorf("%s is not a regular file", p1) + } + + f2, err := os.Open(p2) + if err != nil { + return false, err + } + defer f2.Close() + if stat, err := f2.Stat(); err != nil { + return false, err + } else if !stat.Mode().IsRegular() { + return false, errors.Errorf("%s is not a regular file", p2) + } + + b1 := bufPool.Get().(*[]byte) + defer bufPool.Put(b1) + b2 := bufPool.Get().(*[]byte) + defer bufPool.Put(b2) + for { + n1, err1 := io.ReadFull(f1, *b1) + if err1 == io.ErrUnexpectedEOF { + // it's expected to get EOF when file size isn't a multiple of chunk size, consolidate these error types + err1 = io.EOF + } + if err1 != nil && err1 != io.EOF { + return false, err1 + } + n2, err2 := io.ReadFull(f2, *b2) + if err2 == io.ErrUnexpectedEOF { + err2 = io.EOF + } + if err2 != nil && err2 != io.EOF { + return false, err2 + } + if n1 != n2 || !bytes.Equal((*b1)[:n1], (*b2)[:n2]) { + return false, nil + } + if err1 == io.EOF && err2 == io.EOF { + return true, nil + } + } +} diff --git a/contrib/nydusify/pkg/committer/manager.go b/contrib/nydusify/pkg/committer/manager.go index 6816171ca4f..70d8b9e04fa 100644 --- a/contrib/nydusify/pkg/committer/manager.go +++ b/contrib/nydusify/pkg/committer/manager.go @@ -1,130 +1,130 @@ -// Copyright 2024 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package committer - -import ( - "context" - "encoding/json" - "strings" - - "github.com/containerd/containerd" - "github.com/containerd/containerd/oci" - - "github.com/pkg/errors" -) - -type InspectResult struct { - LowerDirs string - UpperDir string - Image string - Mounts []Mount - Pid int -} - -type Mount struct { - Destination string - Source string -} - -type Manager struct { - address string -} - -func NewManager(addr string) (*Manager, error) { - return &Manager{ - address: addr, - }, nil -} - -func (m *Manager) Pause(ctx context.Context, containerID string) error { - client, err := containerd.New(m.address) - if err != nil { - return errors.Wrapf(err, "create client") - } - container, err := client.LoadContainer(ctx, containerID) - if err != nil { - return errors.Wrapf(err, "load container") - } - task, err := container.Task(ctx, nil) - if err != nil { - return errors.Wrapf(err, "obtain container task") - } - - return task.Pause(ctx) -} - -func (m *Manager) UnPause(ctx context.Context, containerID string) error { - client, err := containerd.New(m.address) - if err != nil { - return errors.Wrapf(err, "create client") - } - container, err := client.LoadContainer(ctx, containerID) - if err != nil { - return errors.Wrapf(err, "load container") - } - task, err := container.Task(ctx, nil) - if err != nil { - return errors.Wrapf(err, "obtain container task") - } - - return task.Resume(ctx) -} - -func (m *Manager) Inspect(ctx context.Context, containerID string) (*InspectResult, error) { - client, err := containerd.New(m.address) - if err != nil { - return nil, errors.Wrapf(err, "create client") - } - container, err := client.LoadContainer(ctx, containerID) - if err != nil { - return nil, errors.Wrapf(err, "load container") - } - _image, err := container.Image(ctx) - if err != nil { - return nil, errors.Wrapf(err, "obtain container image") - } - image := _image.Name() - - task, err := container.Task(ctx, nil) - if err != nil { - return nil, errors.Wrapf(err, "obtain container task") - } - pid := int(task.Pid()) - - containerInfo, err := container.Info(ctx, containerd.WithoutRefreshedMetadata) - if err != nil { - return nil, errors.Wrapf(err, "obtain container info") - } - spec := oci.Spec{} - if err := json.Unmarshal(containerInfo.Spec.GetValue(), &spec); err != nil { - return nil, errors.Wrapf(err, "unmarshal json") - } - mounts := []Mount{} - for _, mount := range spec.Mounts { - mounts = append(mounts, Mount{ - Destination: mount.Destination, - Source: mount.Source, - }) - } - - snapshot := client.SnapshotService("nydus") - lowerDirs := "" - upperDir := "" - mount, err := snapshot.Mounts(ctx, containerInfo.SnapshotKey) - if err != nil { - return nil, errors.Wrapf(err, "get snapshot mount") - } - // snapshot Mount Options[0] "workdir=$workdir", Options[1] "upperdir=$upperdir", Options[2] "lowerdir=$lowerdir". - lowerDirs = strings.TrimPrefix(mount[0].Options[2], "lowerdir=") - upperDir = strings.TrimPrefix(mount[0].Options[1], "upperdir=") - - return &InspectResult{ - LowerDirs: lowerDirs, - UpperDir: upperDir, - Image: image, - Mounts: mounts, - Pid: pid, - }, nil -} +// Copyright 2024 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package committer + +import ( + "context" + "encoding/json" + "strings" + + "github.com/containerd/containerd" + "github.com/containerd/containerd/oci" + + "github.com/pkg/errors" +) + +type InspectResult struct { + LowerDirs string + UpperDir string + Image string + Mounts []Mount + Pid int +} + +type Mount struct { + Destination string + Source string +} + +type Manager struct { + address string +} + +func NewManager(addr string) (*Manager, error) { + return &Manager{ + address: addr, + }, nil +} + +func (m *Manager) Pause(ctx context.Context, containerID string) error { + client, err := containerd.New(m.address) + if err != nil { + return errors.Wrapf(err, "create client") + } + container, err := client.LoadContainer(ctx, containerID) + if err != nil { + return errors.Wrapf(err, "load container") + } + task, err := container.Task(ctx, nil) + if err != nil { + return errors.Wrapf(err, "obtain container task") + } + + return task.Pause(ctx) +} + +func (m *Manager) UnPause(ctx context.Context, containerID string) error { + client, err := containerd.New(m.address) + if err != nil { + return errors.Wrapf(err, "create client") + } + container, err := client.LoadContainer(ctx, containerID) + if err != nil { + return errors.Wrapf(err, "load container") + } + task, err := container.Task(ctx, nil) + if err != nil { + return errors.Wrapf(err, "obtain container task") + } + + return task.Resume(ctx) +} + +func (m *Manager) Inspect(ctx context.Context, containerID string) (*InspectResult, error) { + client, err := containerd.New(m.address) + if err != nil { + return nil, errors.Wrapf(err, "create client") + } + container, err := client.LoadContainer(ctx, containerID) + if err != nil { + return nil, errors.Wrapf(err, "load container") + } + _image, err := container.Image(ctx) + if err != nil { + return nil, errors.Wrapf(err, "obtain container image") + } + image := _image.Name() + + task, err := container.Task(ctx, nil) + if err != nil { + return nil, errors.Wrapf(err, "obtain container task") + } + pid := int(task.Pid()) + + containerInfo, err := container.Info(ctx, containerd.WithoutRefreshedMetadata) + if err != nil { + return nil, errors.Wrapf(err, "obtain container info") + } + spec := oci.Spec{} + if err := json.Unmarshal(containerInfo.Spec.GetValue(), &spec); err != nil { + return nil, errors.Wrapf(err, "unmarshal json") + } + mounts := []Mount{} + for _, mount := range spec.Mounts { + mounts = append(mounts, Mount{ + Destination: mount.Destination, + Source: mount.Source, + }) + } + + snapshot := client.SnapshotService("nydus") + lowerDirs := "" + upperDir := "" + mount, err := snapshot.Mounts(ctx, containerInfo.SnapshotKey) + if err != nil { + return nil, errors.Wrapf(err, "get snapshot mount") + } + // snapshot Mount Options[0] "workdir=$workdir", Options[1] "upperdir=$upperdir", Options[2] "lowerdir=$lowerdir". + lowerDirs = strings.TrimPrefix(mount[0].Options[2], "lowerdir=") + upperDir = strings.TrimPrefix(mount[0].Options[1], "upperdir=") + + return &InspectResult{ + LowerDirs: lowerDirs, + UpperDir: upperDir, + Image: image, + Mounts: mounts, + Pid: pid, + }, nil +} diff --git a/contrib/nydusify/pkg/committer/nsenter.go b/contrib/nydusify/pkg/committer/nsenter.go index d30bf57380b..955f00ea0a1 100644 --- a/contrib/nydusify/pkg/committer/nsenter.go +++ b/contrib/nydusify/pkg/committer/nsenter.go @@ -1,186 +1,186 @@ -// Ported from go-nsenter project, copyright The go-nsenter Authors. -// https://github.com/Devatoria/go-nsenter - -package committer - -import ( - "bytes" - "context" - "fmt" - "io" - "os/exec" - "strconv" - "time" -) - -// Config is the nsenter configuration used to generate -// nsenter command -type Config struct { - Cgroup bool // Enter cgroup namespace - CgroupFile string // Cgroup namespace location, default to /proc/PID/ns/cgroup - FollowContext bool // Set SELinux security context - GID int // GID to use to execute given program - IPC bool // Enter IPC namespace - IPCFile string // IPC namespace location, default to /proc/PID/ns/ipc - Mount bool // Enter mount namespace - MountFile string // Mount namespace location, default to /proc/PID/ns/mnt - Net bool // Enter network namespace - NetFile string // Network namespace location, default to /proc/PID/ns/net - NoFork bool // Do not fork before executing the specified program - PID bool // Enter PID namespace - PIDFile string // PID namespace location, default to /proc/PID/ns/pid - PreserveCredentials bool // Preserve current UID/GID when entering namespaces - RootDirectory string // Set the root directory, default to target process root directory - Target int // Target PID (required) - UID int // UID to use to execute given program - User bool // Enter user namespace - UserFile string // User namespace location, default to /proc/PID/ns/user - UTS bool // Enter UTS namespace - UTSFile string // UTS namespace location, default to /proc/PID/ns/uts - WorkingDirectory string // Set the working directory, default to target process working directory -} - -// Execute executes the given command with a default background context -func (c *Config) Execute(writer io.Writer, program string, args ...string) (string, error) { - return c.ExecuteContext(context.Background(), writer, program, args...) -} - -// ExecuteContext the given program using the given nsenter configuration and given context -// and return stdout/stderr or an error if command has failed -func (c *Config) ExecuteContext(ctx context.Context, writer io.Writer, program string, args ...string) (string, error) { - cmd, err := c.buildCommand(ctx) - if err != nil { - return "", fmt.Errorf("Error while building command: %v", err) - } - - // Prepare command - var srderr bytes.Buffer - rc, err := cmd.StdoutPipe() - if err != nil { - return "", fmt.Errorf("Open stdout pipe: %v", err) - } - defer rc.Close() - - cmd.Stderr = &srderr - cmd.Args = append(cmd.Args, program) - cmd.Args = append(cmd.Args, args...) - - if err := cmd.Start(); err != nil { - return srderr.String(), err - } - - // HACK: we can't wait rc.Close happen automatically when process - // exits, so must check process state and call rc.Close() by manually. - go func() { - for { - time.Sleep(time.Second * 1) - if cmd.ProcessState != nil && cmd.ProcessState.Exited() { - rc.Close() - break - } - } - }() - - if _, err := io.Copy(writer, rc); err != nil { - return srderr.String(), err - } - - return srderr.String(), cmd.Wait() -} - -func (c *Config) buildCommand(ctx context.Context) (*exec.Cmd, error) { - if c.Target == 0 { - return nil, fmt.Errorf("Target must be specified") - } - - var args []string - args = append(args, "--target", strconv.Itoa(c.Target)) - - if c.Cgroup { - if c.CgroupFile != "" { - args = append(args, fmt.Sprintf("--cgroup=%s", c.CgroupFile)) - } else { - args = append(args, "--cgroup") - } - } - - if c.FollowContext { - args = append(args, "--follow-context") - } - - if c.GID != 0 { - args = append(args, "--setgid", strconv.Itoa(c.GID)) - } - - if c.IPC { - if c.IPCFile != "" { - args = append(args, fmt.Sprintf("--ip=%s", c.IPCFile)) - } else { - args = append(args, "--ipc") - } - } - - if c.Mount { - if c.MountFile != "" { - args = append(args, fmt.Sprintf("--mount=%s", c.MountFile)) - } else { - args = append(args, "--mount") - } - } - - if c.Net { - if c.NetFile != "" { - args = append(args, fmt.Sprintf("--net=%s", c.NetFile)) - } else { - args = append(args, "--net") - } - } - - if c.NoFork { - args = append(args, "--no-fork") - } - - if c.PID { - if c.PIDFile != "" { - args = append(args, fmt.Sprintf("--pid=%s", c.PIDFile)) - } else { - args = append(args, "--pid") - } - } - - if c.PreserveCredentials { - args = append(args, "--preserve-credentials") - } - - if c.RootDirectory != "" { - args = append(args, "--root", c.RootDirectory) - } - - if c.UID != 0 { - args = append(args, "--setuid", strconv.Itoa(c.UID)) - } - - if c.User { - if c.UserFile != "" { - args = append(args, fmt.Sprintf("--user=%s", c.UserFile)) - } else { - args = append(args, "--user") - } - } - - if c.UTS { - if c.UTSFile != "" { - args = append(args, fmt.Sprintf("--uts=%s", c.UTSFile)) - } else { - args = append(args, "--uts") - } - } - - if c.WorkingDirectory != "" { - args = append(args, "--wd", c.WorkingDirectory) - } - - cmd := exec.CommandContext(ctx, "nsenter", args...) - - return cmd, nil -} +// Ported from go-nsenter project, copyright The go-nsenter Authors. +// https://github.com/Devatoria/go-nsenter + +package committer + +import ( + "bytes" + "context" + "fmt" + "io" + "os/exec" + "strconv" + "time" +) + +// Config is the nsenter configuration used to generate +// nsenter command +type Config struct { + Cgroup bool // Enter cgroup namespace + CgroupFile string // Cgroup namespace location, default to /proc/PID/ns/cgroup + FollowContext bool // Set SELinux security context + GID int // GID to use to execute given program + IPC bool // Enter IPC namespace + IPCFile string // IPC namespace location, default to /proc/PID/ns/ipc + Mount bool // Enter mount namespace + MountFile string // Mount namespace location, default to /proc/PID/ns/mnt + Net bool // Enter network namespace + NetFile string // Network namespace location, default to /proc/PID/ns/net + NoFork bool // Do not fork before executing the specified program + PID bool // Enter PID namespace + PIDFile string // PID namespace location, default to /proc/PID/ns/pid + PreserveCredentials bool // Preserve current UID/GID when entering namespaces + RootDirectory string // Set the root directory, default to target process root directory + Target int // Target PID (required) + UID int // UID to use to execute given program + User bool // Enter user namespace + UserFile string // User namespace location, default to /proc/PID/ns/user + UTS bool // Enter UTS namespace + UTSFile string // UTS namespace location, default to /proc/PID/ns/uts + WorkingDirectory string // Set the working directory, default to target process working directory +} + +// Execute executes the given command with a default background context +func (c *Config) Execute(writer io.Writer, program string, args ...string) (string, error) { + return c.ExecuteContext(context.Background(), writer, program, args...) +} + +// ExecuteContext the given program using the given nsenter configuration and given context +// and return stdout/stderr or an error if command has failed +func (c *Config) ExecuteContext(ctx context.Context, writer io.Writer, program string, args ...string) (string, error) { + cmd, err := c.buildCommand(ctx) + if err != nil { + return "", fmt.Errorf("Error while building command: %v", err) + } + + // Prepare command + var srderr bytes.Buffer + rc, err := cmd.StdoutPipe() + if err != nil { + return "", fmt.Errorf("Open stdout pipe: %v", err) + } + defer rc.Close() + + cmd.Stderr = &srderr + cmd.Args = append(cmd.Args, program) + cmd.Args = append(cmd.Args, args...) + + if err := cmd.Start(); err != nil { + return srderr.String(), err + } + + // HACK: we can't wait rc.Close happen automatically when process + // exits, so must check process state and call rc.Close() by manually. + go func() { + for { + time.Sleep(time.Second * 1) + if cmd.ProcessState != nil && cmd.ProcessState.Exited() { + rc.Close() + break + } + } + }() + + if _, err := io.Copy(writer, rc); err != nil { + return srderr.String(), err + } + + return srderr.String(), cmd.Wait() +} + +func (c *Config) buildCommand(ctx context.Context) (*exec.Cmd, error) { + if c.Target == 0 { + return nil, fmt.Errorf("Target must be specified") + } + + var args []string + args = append(args, "--target", strconv.Itoa(c.Target)) + + if c.Cgroup { + if c.CgroupFile != "" { + args = append(args, fmt.Sprintf("--cgroup=%s", c.CgroupFile)) + } else { + args = append(args, "--cgroup") + } + } + + if c.FollowContext { + args = append(args, "--follow-context") + } + + if c.GID != 0 { + args = append(args, "--setgid", strconv.Itoa(c.GID)) + } + + if c.IPC { + if c.IPCFile != "" { + args = append(args, fmt.Sprintf("--ip=%s", c.IPCFile)) + } else { + args = append(args, "--ipc") + } + } + + if c.Mount { + if c.MountFile != "" { + args = append(args, fmt.Sprintf("--mount=%s", c.MountFile)) + } else { + args = append(args, "--mount") + } + } + + if c.Net { + if c.NetFile != "" { + args = append(args, fmt.Sprintf("--net=%s", c.NetFile)) + } else { + args = append(args, "--net") + } + } + + if c.NoFork { + args = append(args, "--no-fork") + } + + if c.PID { + if c.PIDFile != "" { + args = append(args, fmt.Sprintf("--pid=%s", c.PIDFile)) + } else { + args = append(args, "--pid") + } + } + + if c.PreserveCredentials { + args = append(args, "--preserve-credentials") + } + + if c.RootDirectory != "" { + args = append(args, "--root", c.RootDirectory) + } + + if c.UID != 0 { + args = append(args, "--setuid", strconv.Itoa(c.UID)) + } + + if c.User { + if c.UserFile != "" { + args = append(args, fmt.Sprintf("--user=%s", c.UserFile)) + } else { + args = append(args, "--user") + } + } + + if c.UTS { + if c.UTSFile != "" { + args = append(args, fmt.Sprintf("--uts=%s", c.UTSFile)) + } else { + args = append(args, "--uts") + } + } + + if c.WorkingDirectory != "" { + args = append(args, "--wd", c.WorkingDirectory) + } + + cmd := exec.CommandContext(ctx, "nsenter", args...) + + return cmd, nil +} diff --git a/contrib/nydusify/pkg/committer/util.go b/contrib/nydusify/pkg/committer/util.go index c4c6d9c0ce1..7e717651826 100644 --- a/contrib/nydusify/pkg/committer/util.go +++ b/contrib/nydusify/pkg/committer/util.go @@ -1,18 +1,18 @@ -package committer - -import ( - "sync/atomic" -) - -type Counter struct { - n int64 -} - -func (c *Counter) Write(p []byte) (n int, err error) { - atomic.AddInt64(&c.n, int64(len(p))) - return len(p), nil -} - -func (c *Counter) Size() (n int64) { - return c.n -} +package committer + +import ( + "sync/atomic" +) + +type Counter struct { + n int64 +} + +func (c *Counter) Write(p []byte) (n int, err error) { + atomic.AddInt64(&c.n, int64(len(p))) + return len(p), nil +} + +func (c *Counter) Size() (n int64) { + return c.n +} diff --git a/contrib/nydusify/pkg/compactor/compactor.go b/contrib/nydusify/pkg/compactor/compactor.go index b833ad7ed0a..8a7d8edea46 100644 --- a/contrib/nydusify/pkg/compactor/compactor.go +++ b/contrib/nydusify/pkg/compactor/compactor.go @@ -1,107 +1,107 @@ -package compactor - -import ( - "encoding/json" - "os" - "path/filepath" - - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/build" - "github.com/pkg/errors" -) - -var defaultCompactConfig = &CompactConfig{ - MinUsedRatio: 5, - CompactBlobSize: 10485760, - MaxCompactSize: 104857600, - LayersToCompact: 32, -} - -type CompactConfig struct { - MinUsedRatio int `json:"min_used_ratio"` - CompactBlobSize int `json:"compact_blob_size"` - MaxCompactSize int `json:"max_compact_size"` - LayersToCompact int `json:"layers_to_compact"` - BlobsDir string `json:"blobs_dir,omitempty"` -} - -func (cfg *CompactConfig) Dumps(filePath string) error { - file, err := os.OpenFile(filePath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) - if err != nil { - return errors.Wrap(err, "failed to open file") - } - defer file.Close() - if err = json.NewEncoder(file).Encode(cfg); err != nil { - return errors.Wrap(err, "failed to encode json") - } - return nil -} - -func loadCompactConfig(filePath string) (CompactConfig, error) { - file, err := os.Open(filePath) - if err != nil { - return CompactConfig{}, errors.Wrap(err, "failed to load compact configuration file") - } - defer file.Close() - var cfg CompactConfig - if err = json.NewDecoder(file).Decode(&cfg); err != nil { - return CompactConfig{}, errors.Wrap(err, "failed to decode compact configuration file") - } - return cfg, nil -} - -type Compactor struct { - builder *build.Builder - workdir string - cfg CompactConfig -} - -func NewCompactor(nydusImagePath, workdir, configPath string) (*Compactor, error) { - var ( - cfg CompactConfig - err error - ) - if configPath != "" { - cfg, err = loadCompactConfig(configPath) - if err != nil { - return nil, errors.Wrap(err, "compact config err") - } - } else { - cfg = *defaultCompactConfig - } - cfg.BlobsDir = workdir - return &Compactor{ - builder: build.NewBuilder(nydusImagePath), - workdir: workdir, - cfg: cfg, - }, nil -} - -func (compactor *Compactor) Compact(bootstrapPath, chunkDict, backendType, backendConfigFile string) (string, error) { - targetBootstrap := bootstrapPath + ".compact" - if err := os.Remove(targetBootstrap); err != nil && !os.IsNotExist(err) { - return "", errors.Wrap(err, "failed to delete old bootstrap file") - } - // prepare config file - configFilePath := filepath.Join(compactor.workdir, "compact.json") - if err := compactor.cfg.Dumps(configFilePath); err != nil { - return "", errors.Wrap(err, "compact err") - } - outputJSONPath := filepath.Join(compactor.workdir, "compact-result.json") - if err := os.Remove(outputJSONPath); err != nil && !os.IsNotExist(err) { - return "", errors.Wrap(err, "failed to delete old output-json file") - } - err := compactor.builder.Compact(build.CompactOption{ - ChunkDict: chunkDict, - BootstrapPath: bootstrapPath, - OutputBootstrapPath: targetBootstrap, - BackendType: backendType, - BackendConfigPath: backendConfigFile, - OutputJSONPath: outputJSONPath, - CompactConfigPath: configFilePath, - }) - if err != nil { - return "", errors.Wrap(err, "failed to run compact command") - } - - return targetBootstrap, nil -} +package compactor + +import ( + "encoding/json" + "os" + "path/filepath" + + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/build" + "github.com/pkg/errors" +) + +var defaultCompactConfig = &CompactConfig{ + MinUsedRatio: 5, + CompactBlobSize: 10485760, + MaxCompactSize: 104857600, + LayersToCompact: 32, +} + +type CompactConfig struct { + MinUsedRatio int `json:"min_used_ratio"` + CompactBlobSize int `json:"compact_blob_size"` + MaxCompactSize int `json:"max_compact_size"` + LayersToCompact int `json:"layers_to_compact"` + BlobsDir string `json:"blobs_dir,omitempty"` +} + +func (cfg *CompactConfig) Dumps(filePath string) error { + file, err := os.OpenFile(filePath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) + if err != nil { + return errors.Wrap(err, "failed to open file") + } + defer file.Close() + if err = json.NewEncoder(file).Encode(cfg); err != nil { + return errors.Wrap(err, "failed to encode json") + } + return nil +} + +func loadCompactConfig(filePath string) (CompactConfig, error) { + file, err := os.Open(filePath) + if err != nil { + return CompactConfig{}, errors.Wrap(err, "failed to load compact configuration file") + } + defer file.Close() + var cfg CompactConfig + if err = json.NewDecoder(file).Decode(&cfg); err != nil { + return CompactConfig{}, errors.Wrap(err, "failed to decode compact configuration file") + } + return cfg, nil +} + +type Compactor struct { + builder *build.Builder + workdir string + cfg CompactConfig +} + +func NewCompactor(nydusImagePath, workdir, configPath string) (*Compactor, error) { + var ( + cfg CompactConfig + err error + ) + if configPath != "" { + cfg, err = loadCompactConfig(configPath) + if err != nil { + return nil, errors.Wrap(err, "compact config err") + } + } else { + cfg = *defaultCompactConfig + } + cfg.BlobsDir = workdir + return &Compactor{ + builder: build.NewBuilder(nydusImagePath), + workdir: workdir, + cfg: cfg, + }, nil +} + +func (compactor *Compactor) Compact(bootstrapPath, chunkDict, backendType, backendConfigFile string) (string, error) { + targetBootstrap := bootstrapPath + ".compact" + if err := os.Remove(targetBootstrap); err != nil && !os.IsNotExist(err) { + return "", errors.Wrap(err, "failed to delete old bootstrap file") + } + // prepare config file + configFilePath := filepath.Join(compactor.workdir, "compact.json") + if err := compactor.cfg.Dumps(configFilePath); err != nil { + return "", errors.Wrap(err, "compact err") + } + outputJSONPath := filepath.Join(compactor.workdir, "compact-result.json") + if err := os.Remove(outputJSONPath); err != nil && !os.IsNotExist(err) { + return "", errors.Wrap(err, "failed to delete old output-json file") + } + err := compactor.builder.Compact(build.CompactOption{ + ChunkDict: chunkDict, + BootstrapPath: bootstrapPath, + OutputBootstrapPath: targetBootstrap, + BackendType: backendType, + BackendConfigPath: backendConfigFile, + OutputJSONPath: outputJSONPath, + CompactConfigPath: configFilePath, + }) + if err != nil { + return "", errors.Wrap(err, "failed to run compact command") + } + + return targetBootstrap, nil +} diff --git a/contrib/nydusify/pkg/converter/chunk_dict.go b/contrib/nydusify/pkg/converter/chunk_dict.go index ec255d06d0a..9539fa18f83 100644 --- a/contrib/nydusify/pkg/converter/chunk_dict.go +++ b/contrib/nydusify/pkg/converter/chunk_dict.go @@ -1,61 +1,61 @@ -// Copyright 2022 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package converter - -import ( - "fmt" - "strings" -) - -var ( - chunkDictFormats = []string{"bootstrap"} - chunkDictSources = []string{"registry", "local"} -) - -func isValidChunkDictFormat(s string) bool { - for i := range chunkDictFormats { - if chunkDictFormats[i] == s { - return true - } - } - return false -} - -func isValidChunkDictSource(source string) bool { - for i := range chunkDictSources { - if chunkDictSources[i] == source { - return true - } - } - return false -} - -// ParseChunkDictArgs parses chunk dict args like: -// - bootstrap:registry:$repo:$tag -// - bootstrap:local:$path -func ParseChunkDictArgs(args string) (format string, source string, ref string, err error) { - names := strings.Split(args, ":") - if len(names) < 3 { - err = fmt.Errorf("invalid args") - return - } - format = names[0] - if !isValidChunkDictFormat(format) { - err = fmt.Errorf("invalid chunk dict format %s, should be %v", format, chunkDictFormats) - return - } - source = names[1] - if !isValidChunkDictSource(source) { - err = fmt.Errorf("invalid chunk dict source %s, should be %v", source, chunkDictSources) - return - } - ref = strings.Join(names[2:], ":") - return -} - -type ChunkDictOpt struct { - Args string - Insecure bool -} +// Copyright 2022 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package converter + +import ( + "fmt" + "strings" +) + +var ( + chunkDictFormats = []string{"bootstrap"} + chunkDictSources = []string{"registry", "local"} +) + +func isValidChunkDictFormat(s string) bool { + for i := range chunkDictFormats { + if chunkDictFormats[i] == s { + return true + } + } + return false +} + +func isValidChunkDictSource(source string) bool { + for i := range chunkDictSources { + if chunkDictSources[i] == source { + return true + } + } + return false +} + +// ParseChunkDictArgs parses chunk dict args like: +// - bootstrap:registry:$repo:$tag +// - bootstrap:local:$path +func ParseChunkDictArgs(args string) (format string, source string, ref string, err error) { + names := strings.Split(args, ":") + if len(names) < 3 { + err = fmt.Errorf("invalid args") + return + } + format = names[0] + if !isValidChunkDictFormat(format) { + err = fmt.Errorf("invalid chunk dict format %s, should be %v", format, chunkDictFormats) + return + } + source = names[1] + if !isValidChunkDictSource(source) { + err = fmt.Errorf("invalid chunk dict source %s, should be %v", source, chunkDictSources) + return + } + ref = strings.Join(names[2:], ":") + return +} + +type ChunkDictOpt struct { + Args string + Insecure bool +} diff --git a/contrib/nydusify/pkg/converter/config.go b/contrib/nydusify/pkg/converter/config.go index f4920c819c9..505caabb265 100644 --- a/contrib/nydusify/pkg/converter/config.go +++ b/contrib/nydusify/pkg/converter/config.go @@ -1,39 +1,39 @@ -// Copyright 2022 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package converter - -import ( - "strconv" -) - -func getConfig(opt Opt) map[string]string { - cfg := map[string]string{} - - cfg["work_dir"] = opt.WorkDir - cfg["builder"] = opt.NydusImagePath - - cfg["backend_type"] = opt.BackendType - cfg["backend_config"] = opt.BackendConfig - cfg["backend_force_push"] = strconv.FormatBool(opt.BackendForcePush) - - cfg["chunk_dict_ref"] = opt.ChunkDictRef - cfg["docker2oci"] = strconv.FormatBool(opt.Docker2OCI) - cfg["merge_manifest"] = strconv.FormatBool(opt.MergePlatform) - cfg["oci_ref"] = strconv.FormatBool(opt.OCIRef) - cfg["with_referrer"] = strconv.FormatBool(opt.WithReferrer) - - cfg["prefetch_patterns"] = opt.PrefetchPatterns - cfg["compressor"] = opt.Compressor - cfg["fs_version"] = opt.FsVersion - cfg["fs_align_chunk"] = strconv.FormatBool(opt.FsAlignChunk) - cfg["fs_chunk_size"] = opt.ChunkSize - cfg["batch_size"] = opt.BatchSize - - cfg["cache_ref"] = opt.CacheRef - cfg["cache_version"] = opt.CacheVersion - cfg["cache_max_records"] = strconv.FormatUint(uint64(opt.CacheMaxRecords), 10) - - return cfg -} +// Copyright 2022 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package converter + +import ( + "strconv" +) + +func getConfig(opt Opt) map[string]string { + cfg := map[string]string{} + + cfg["work_dir"] = opt.WorkDir + cfg["builder"] = opt.NydusImagePath + + cfg["backend_type"] = opt.BackendType + cfg["backend_config"] = opt.BackendConfig + cfg["backend_force_push"] = strconv.FormatBool(opt.BackendForcePush) + + cfg["chunk_dict_ref"] = opt.ChunkDictRef + cfg["docker2oci"] = strconv.FormatBool(opt.Docker2OCI) + cfg["merge_manifest"] = strconv.FormatBool(opt.MergePlatform) + cfg["oci_ref"] = strconv.FormatBool(opt.OCIRef) + cfg["with_referrer"] = strconv.FormatBool(opt.WithReferrer) + + cfg["prefetch_patterns"] = opt.PrefetchPatterns + cfg["compressor"] = opt.Compressor + cfg["fs_version"] = opt.FsVersion + cfg["fs_align_chunk"] = strconv.FormatBool(opt.FsAlignChunk) + cfg["fs_chunk_size"] = opt.ChunkSize + cfg["batch_size"] = opt.BatchSize + + cfg["cache_ref"] = opt.CacheRef + cfg["cache_version"] = opt.CacheVersion + cfg["cache_max_records"] = strconv.FormatUint(uint64(opt.CacheMaxRecords), 10) + + return cfg +} diff --git a/contrib/nydusify/pkg/converter/converter.go b/contrib/nydusify/pkg/converter/converter.go index 25763fd5019..0f604277edb 100644 --- a/contrib/nydusify/pkg/converter/converter.go +++ b/contrib/nydusify/pkg/converter/converter.go @@ -1,100 +1,100 @@ -// Copyright 2022 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package converter - -import ( - "context" - "os" - - "github.com/containerd/containerd/namespaces" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/converter/provider" - "github.com/goharbor/acceleration-service/pkg/converter" - "github.com/goharbor/acceleration-service/pkg/platformutil" - "github.com/pkg/errors" -) - -type Opt struct { - WorkDir string - ContainerdAddress string - NydusImagePath string - - Source string - Target string - ChunkDictRef string - - SourceInsecure bool - TargetInsecure bool - ChunkDictInsecure bool - - CacheRef string - CacheInsecure bool - CacheVersion string - CacheMaxRecords uint - - BackendType string - BackendConfig string - BackendForcePush bool - - MergePlatform bool - Docker2OCI bool - FsVersion string - FsAlignChunk bool - Compressor string - ChunkSize string - BatchSize string - PrefetchPatterns string - OCIRef bool - WithReferrer bool - - AllPlatforms bool - Platforms string - - OutputJSON string -} - -func Convert(ctx context.Context, opt Opt) error { - ctx = namespaces.WithNamespace(ctx, "nydusify") - platformMC, err := platformutil.ParsePlatforms(opt.AllPlatforms, opt.Platforms) - if err != nil { - return err - } - - if _, err := os.Stat(opt.WorkDir); err != nil { - if errors.Is(err, os.ErrNotExist) { - if err := os.MkdirAll(opt.WorkDir, 0755); err != nil { - return errors.Wrap(err, "prepare work directory") - } - // We should only clean up when the work directory not exists - // before, otherwise it may delete user data by mistake. - defer os.RemoveAll(opt.WorkDir) - } else { - return errors.Wrap(err, "stat work directory") - } - } - tmpDir, err := os.MkdirTemp(opt.WorkDir, "nydusify-") - if err != nil { - return errors.Wrap(err, "create temp directory") - } - pvd, err := provider.New(tmpDir, hosts(opt), opt.CacheMaxRecords, opt.CacheVersion, platformMC, 0) - if err != nil { - return err - } - defer os.RemoveAll(tmpDir) - - cvt, err := converter.New( - converter.WithProvider(pvd), - converter.WithDriver("nydus", getConfig(opt)), - converter.WithPlatform(platformMC), - ) - if err != nil { - return err - } - - metric, err := cvt.Convert(ctx, opt.Source, opt.Target, opt.CacheRef) - if opt.OutputJSON != "" { - dumpMetric(metric, opt.OutputJSON) - } - return err -} +// Copyright 2022 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package converter + +import ( + "context" + "os" + + "github.com/containerd/containerd/namespaces" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/converter/provider" + "github.com/goharbor/acceleration-service/pkg/converter" + "github.com/goharbor/acceleration-service/pkg/platformutil" + "github.com/pkg/errors" +) + +type Opt struct { + WorkDir string + ContainerdAddress string + NydusImagePath string + + Source string + Target string + ChunkDictRef string + + SourceInsecure bool + TargetInsecure bool + ChunkDictInsecure bool + + CacheRef string + CacheInsecure bool + CacheVersion string + CacheMaxRecords uint + + BackendType string + BackendConfig string + BackendForcePush bool + + MergePlatform bool + Docker2OCI bool + FsVersion string + FsAlignChunk bool + Compressor string + ChunkSize string + BatchSize string + PrefetchPatterns string + OCIRef bool + WithReferrer bool + + AllPlatforms bool + Platforms string + + OutputJSON string +} + +func Convert(ctx context.Context, opt Opt) error { + ctx = namespaces.WithNamespace(ctx, "nydusify") + platformMC, err := platformutil.ParsePlatforms(opt.AllPlatforms, opt.Platforms) + if err != nil { + return err + } + + if _, err := os.Stat(opt.WorkDir); err != nil { + if errors.Is(err, os.ErrNotExist) { + if err := os.MkdirAll(opt.WorkDir, 0755); err != nil { + return errors.Wrap(err, "prepare work directory") + } + // We should only clean up when the work directory not exists + // before, otherwise it may delete user data by mistake. + defer os.RemoveAll(opt.WorkDir) + } else { + return errors.Wrap(err, "stat work directory") + } + } + tmpDir, err := os.MkdirTemp(opt.WorkDir, "nydusify-") + if err != nil { + return errors.Wrap(err, "create temp directory") + } + pvd, err := provider.New(tmpDir, hosts(opt), opt.CacheMaxRecords, opt.CacheVersion, platformMC, 0) + if err != nil { + return err + } + defer os.RemoveAll(tmpDir) + + cvt, err := converter.New( + converter.WithProvider(pvd), + converter.WithDriver("nydus", getConfig(opt)), + converter.WithPlatform(platformMC), + ) + if err != nil { + return err + } + + metric, err := cvt.Convert(ctx, opt.Source, opt.Target, opt.CacheRef) + if opt.OutputJSON != "" { + dumpMetric(metric, opt.OutputJSON) + } + return err +} diff --git a/contrib/nydusify/pkg/converter/hosts.go b/contrib/nydusify/pkg/converter/hosts.go index ff2a947de8e..7f7125c6045 100644 --- a/contrib/nydusify/pkg/converter/hosts.go +++ b/contrib/nydusify/pkg/converter/hosts.go @@ -1,21 +1,21 @@ -// Copyright 2022 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package converter - -import ( - "github.com/goharbor/acceleration-service/pkg/remote" -) - -func hosts(opt Opt) remote.HostFunc { - maps := map[string]bool{ - opt.Source: opt.SourceInsecure, - opt.Target: opt.TargetInsecure, - opt.ChunkDictRef: opt.ChunkDictInsecure, - opt.CacheRef: opt.CacheInsecure, - } - return func(ref string) (remote.CredentialFunc, bool, error) { - return remote.NewDockerConfigCredFunc(), maps[ref], nil - } -} +// Copyright 2022 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package converter + +import ( + "github.com/goharbor/acceleration-service/pkg/remote" +) + +func hosts(opt Opt) remote.HostFunc { + maps := map[string]bool{ + opt.Source: opt.SourceInsecure, + opt.Target: opt.TargetInsecure, + opt.ChunkDictRef: opt.ChunkDictInsecure, + opt.CacheRef: opt.CacheInsecure, + } + return func(ref string) (remote.CredentialFunc, bool, error) { + return remote.NewDockerConfigCredFunc(), maps[ref], nil + } +} diff --git a/contrib/nydusify/pkg/converter/metric.go b/contrib/nydusify/pkg/converter/metric.go index 4efa77de176..6b7dac4365b 100644 --- a/contrib/nydusify/pkg/converter/metric.go +++ b/contrib/nydusify/pkg/converter/metric.go @@ -1,27 +1,27 @@ -// Copyright 2023 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package converter - -import ( - "encoding/json" - "os" - - "github.com/goharbor/acceleration-service/pkg/converter" - "github.com/pkg/errors" -) - -func dumpMetric(metric *converter.Metric, path string) error { - file, err := os.Create(path) - if err != nil { - return errors.Wrap(err, "Create file for metric") - } - defer file.Close() - - encoder := json.NewEncoder(file) - if err := encoder.Encode(metric); err != nil { - return errors.Wrap(err, "Encode JSON from metric") - } - return nil -} +// Copyright 2023 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package converter + +import ( + "encoding/json" + "os" + + "github.com/goharbor/acceleration-service/pkg/converter" + "github.com/pkg/errors" +) + +func dumpMetric(metric *converter.Metric, path string) error { + file, err := os.Create(path) + if err != nil { + return errors.Wrap(err, "Create file for metric") + } + defer file.Close() + + encoder := json.NewEncoder(file) + if err := encoder.Encode(metric); err != nil { + return errors.Wrap(err, "Encode JSON from metric") + } + return nil +} diff --git a/contrib/nydusify/pkg/converter/provider/ported.go b/contrib/nydusify/pkg/converter/provider/ported.go index 652cf5ff838..2a328c67f9d 100644 --- a/contrib/nydusify/pkg/converter/provider/ported.go +++ b/contrib/nydusify/pkg/converter/provider/ported.go @@ -1,179 +1,179 @@ -// Copyright 2022 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package provider - -import ( - "context" - "fmt" - "strings" - - "github.com/containerd/containerd" - "github.com/containerd/containerd/content" - "github.com/containerd/containerd/images" - "github.com/containerd/containerd/platforms" - "github.com/containerd/containerd/remotes" - "github.com/containerd/containerd/remotes/docker" - - // nolint:staticcheck - "github.com/containerd/containerd/remotes/docker/schema1" - ocispec "github.com/opencontainers/image-spec/specs-go/v1" - "golang.org/x/sync/semaphore" -) - -// Ported from containerd project, copyright The containerd Authors. -// github.com/containerd/containerd/blob/main/pull.go -func fetch(ctx context.Context, store content.Store, rCtx *containerd.RemoteContext, ref string, limit int) (images.Image, error) { - name, desc, err := rCtx.Resolver.Resolve(ctx, ref) - if err != nil { - return images.Image{}, fmt.Errorf("failed to resolve reference %q: %w", ref, err) - } - - fetcher, err := rCtx.Resolver.Fetcher(ctx, name) - if err != nil { - return images.Image{}, fmt.Errorf("failed to get fetcher for %q: %w", name, err) - } - - var ( - handler images.Handler - - isConvertible bool - converterFunc func(context.Context, ocispec.Descriptor) (ocispec.Descriptor, error) - limiter *semaphore.Weighted - ) - - // nolint:staticcheck - if desc.MediaType == images.MediaTypeDockerSchema1Manifest && rCtx.ConvertSchema1 { - schema1Converter := schema1.NewConverter(store, fetcher) - - handler = images.Handlers(append(rCtx.BaseHandlers, schema1Converter)...) - - isConvertible = true - - converterFunc = func(ctx context.Context, _ ocispec.Descriptor) (ocispec.Descriptor, error) { - return schema1Converter.Convert(ctx) - } - } else { - // Get all the children for a descriptor - childrenHandler := images.ChildrenHandler(store) - // Set any children labels for that content - childrenHandler = images.SetChildrenMappedLabels(store, childrenHandler, rCtx.ChildLabelMap) - if rCtx.AllMetadata { - // Filter manifests by platforms but allow to handle manifest - // and configuration for not-target platforms - childrenHandler = remotes.FilterManifestByPlatformHandler(childrenHandler, rCtx.PlatformMatcher) - } else { - // Filter children by platforms if specified. - childrenHandler = images.FilterPlatforms(childrenHandler, rCtx.PlatformMatcher) - } - // Sort and limit manifests if a finite number is needed - if limit > 0 { - childrenHandler = images.LimitManifests(childrenHandler, rCtx.PlatformMatcher, limit) - } - - // set isConvertible to true if there is application/octet-stream media type - convertibleHandler := images.HandlerFunc( - func(_ context.Context, desc ocispec.Descriptor) ([]ocispec.Descriptor, error) { - if desc.MediaType == docker.LegacyConfigMediaType { - isConvertible = true - } - - return []ocispec.Descriptor{}, nil - }, - ) - - appendDistSrcLabelHandler, err := docker.AppendDistributionSourceLabel(store, ref) - if err != nil { - return images.Image{}, err - } - - handlers := append(rCtx.BaseHandlers, - remotes.FetchHandler(store, fetcher), - convertibleHandler, - childrenHandler, - appendDistSrcLabelHandler, - ) - - handler = images.Handlers(handlers...) - - converterFunc = func(ctx context.Context, desc ocispec.Descriptor) (ocispec.Descriptor, error) { - return docker.ConvertManifest(ctx, store, desc) - } - } - - if rCtx.HandlerWrapper != nil { - handler = rCtx.HandlerWrapper(handler) - } - - if rCtx.MaxConcurrentDownloads > 0 { - limiter = semaphore.NewWeighted(int64(rCtx.MaxConcurrentDownloads)) - } - - if err := images.Dispatch(ctx, handler, limiter, desc); err != nil { - return images.Image{}, err - } - - if isConvertible { - if desc, err = converterFunc(ctx, desc); err != nil { - return images.Image{}, err - } - } - - return images.Image{ - Name: name, - Target: desc, - Labels: rCtx.Labels, - }, nil -} - -// Ported from containerd project, copyright The containerd Authors. -// github.com/containerd/containerd/blob/main/client.go -func push(ctx context.Context, store content.Store, pushCtx *containerd.RemoteContext, desc ocispec.Descriptor, ref string) error { - if pushCtx.PlatformMatcher == nil { - if len(pushCtx.Platforms) > 0 { - var ps []ocispec.Platform - for _, platform := range pushCtx.Platforms { - p, err := platforms.Parse(platform) - if err != nil { - return fmt.Errorf("invalid platform %s: %w", platform, err) - } - ps = append(ps, p) - } - pushCtx.PlatformMatcher = platforms.Any(ps...) - } else { - pushCtx.PlatformMatcher = platforms.All - } - } - - // Annotate ref with digest to push only push tag for single digest - if !strings.Contains(ref, "@") { - ref = ref + "@" + desc.Digest.String() - } - - pusher, err := pushCtx.Resolver.Pusher(ctx, ref) - if err != nil { - return err - } - - var wrapper func(images.Handler) images.Handler - - if len(pushCtx.BaseHandlers) > 0 { - wrapper = func(h images.Handler) images.Handler { - h = images.Handlers(append(pushCtx.BaseHandlers, h)...) - if pushCtx.HandlerWrapper != nil { - h = pushCtx.HandlerWrapper(h) - } - return h - } - } else if pushCtx.HandlerWrapper != nil { - wrapper = pushCtx.HandlerWrapper - } - - var limiter *semaphore.Weighted - if pushCtx.MaxConcurrentUploadedLayers > 0 { - limiter = semaphore.NewWeighted(int64(pushCtx.MaxConcurrentUploadedLayers)) - } - - return remotes.PushContent(ctx, pusher, desc, store, limiter, pushCtx.PlatformMatcher, wrapper) -} +// Copyright 2022 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package provider + +import ( + "context" + "fmt" + "strings" + + "github.com/containerd/containerd" + "github.com/containerd/containerd/content" + "github.com/containerd/containerd/images" + "github.com/containerd/containerd/platforms" + "github.com/containerd/containerd/remotes" + "github.com/containerd/containerd/remotes/docker" + + // nolint:staticcheck + "github.com/containerd/containerd/remotes/docker/schema1" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" + "golang.org/x/sync/semaphore" +) + +// Ported from containerd project, copyright The containerd Authors. +// github.com/containerd/containerd/blob/main/pull.go +func fetch(ctx context.Context, store content.Store, rCtx *containerd.RemoteContext, ref string, limit int) (images.Image, error) { + name, desc, err := rCtx.Resolver.Resolve(ctx, ref) + if err != nil { + return images.Image{}, fmt.Errorf("failed to resolve reference %q: %w", ref, err) + } + + fetcher, err := rCtx.Resolver.Fetcher(ctx, name) + if err != nil { + return images.Image{}, fmt.Errorf("failed to get fetcher for %q: %w", name, err) + } + + var ( + handler images.Handler + + isConvertible bool + converterFunc func(context.Context, ocispec.Descriptor) (ocispec.Descriptor, error) + limiter *semaphore.Weighted + ) + + // nolint:staticcheck + if desc.MediaType == images.MediaTypeDockerSchema1Manifest && rCtx.ConvertSchema1 { + schema1Converter := schema1.NewConverter(store, fetcher) + + handler = images.Handlers(append(rCtx.BaseHandlers, schema1Converter)...) + + isConvertible = true + + converterFunc = func(ctx context.Context, _ ocispec.Descriptor) (ocispec.Descriptor, error) { + return schema1Converter.Convert(ctx) + } + } else { + // Get all the children for a descriptor + childrenHandler := images.ChildrenHandler(store) + // Set any children labels for that content + childrenHandler = images.SetChildrenMappedLabels(store, childrenHandler, rCtx.ChildLabelMap) + if rCtx.AllMetadata { + // Filter manifests by platforms but allow to handle manifest + // and configuration for not-target platforms + childrenHandler = remotes.FilterManifestByPlatformHandler(childrenHandler, rCtx.PlatformMatcher) + } else { + // Filter children by platforms if specified. + childrenHandler = images.FilterPlatforms(childrenHandler, rCtx.PlatformMatcher) + } + // Sort and limit manifests if a finite number is needed + if limit > 0 { + childrenHandler = images.LimitManifests(childrenHandler, rCtx.PlatformMatcher, limit) + } + + // set isConvertible to true if there is application/octet-stream media type + convertibleHandler := images.HandlerFunc( + func(_ context.Context, desc ocispec.Descriptor) ([]ocispec.Descriptor, error) { + if desc.MediaType == docker.LegacyConfigMediaType { + isConvertible = true + } + + return []ocispec.Descriptor{}, nil + }, + ) + + appendDistSrcLabelHandler, err := docker.AppendDistributionSourceLabel(store, ref) + if err != nil { + return images.Image{}, err + } + + handlers := append(rCtx.BaseHandlers, + remotes.FetchHandler(store, fetcher), + convertibleHandler, + childrenHandler, + appendDistSrcLabelHandler, + ) + + handler = images.Handlers(handlers...) + + converterFunc = func(ctx context.Context, desc ocispec.Descriptor) (ocispec.Descriptor, error) { + return docker.ConvertManifest(ctx, store, desc) + } + } + + if rCtx.HandlerWrapper != nil { + handler = rCtx.HandlerWrapper(handler) + } + + if rCtx.MaxConcurrentDownloads > 0 { + limiter = semaphore.NewWeighted(int64(rCtx.MaxConcurrentDownloads)) + } + + if err := images.Dispatch(ctx, handler, limiter, desc); err != nil { + return images.Image{}, err + } + + if isConvertible { + if desc, err = converterFunc(ctx, desc); err != nil { + return images.Image{}, err + } + } + + return images.Image{ + Name: name, + Target: desc, + Labels: rCtx.Labels, + }, nil +} + +// Ported from containerd project, copyright The containerd Authors. +// github.com/containerd/containerd/blob/main/client.go +func push(ctx context.Context, store content.Store, pushCtx *containerd.RemoteContext, desc ocispec.Descriptor, ref string) error { + if pushCtx.PlatformMatcher == nil { + if len(pushCtx.Platforms) > 0 { + var ps []ocispec.Platform + for _, platform := range pushCtx.Platforms { + p, err := platforms.Parse(platform) + if err != nil { + return fmt.Errorf("invalid platform %s: %w", platform, err) + } + ps = append(ps, p) + } + pushCtx.PlatformMatcher = platforms.Any(ps...) + } else { + pushCtx.PlatformMatcher = platforms.All + } + } + + // Annotate ref with digest to push only push tag for single digest + if !strings.Contains(ref, "@") { + ref = ref + "@" + desc.Digest.String() + } + + pusher, err := pushCtx.Resolver.Pusher(ctx, ref) + if err != nil { + return err + } + + var wrapper func(images.Handler) images.Handler + + if len(pushCtx.BaseHandlers) > 0 { + wrapper = func(h images.Handler) images.Handler { + h = images.Handlers(append(pushCtx.BaseHandlers, h)...) + if pushCtx.HandlerWrapper != nil { + h = pushCtx.HandlerWrapper(h) + } + return h + } + } else if pushCtx.HandlerWrapper != nil { + wrapper = pushCtx.HandlerWrapper + } + + var limiter *semaphore.Weighted + if pushCtx.MaxConcurrentUploadedLayers > 0 { + limiter = semaphore.NewWeighted(int64(pushCtx.MaxConcurrentUploadedLayers)) + } + + return remotes.PushContent(ctx, pusher, desc, store, limiter, pushCtx.PlatformMatcher, wrapper) +} diff --git a/contrib/nydusify/pkg/converter/provider/provider.go b/contrib/nydusify/pkg/converter/provider/provider.go index 08a7988dba1..b8b1dd5b29a 100644 --- a/contrib/nydusify/pkg/converter/provider/provider.go +++ b/contrib/nydusify/pkg/converter/provider/provider.go @@ -1,177 +1,177 @@ -// Copyright 2022 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package provider - -import ( - "context" - "crypto/tls" - "net" - "net/http" - "os" - "path/filepath" - "sync" - "time" - - "github.com/containerd/containerd" - "github.com/containerd/containerd/content" - "github.com/containerd/containerd/errdefs" - "github.com/containerd/containerd/platforms" - "github.com/containerd/containerd/remotes" - "github.com/containerd/containerd/remotes/docker" - "github.com/goharbor/acceleration-service/pkg/cache" - accelcontent "github.com/goharbor/acceleration-service/pkg/content" - "github.com/goharbor/acceleration-service/pkg/remote" - ocispec "github.com/opencontainers/image-spec/specs-go/v1" -) - -var LayerConcurrentLimit = 5 - -type Provider struct { - mutex sync.Mutex - usePlainHTTP bool - images map[string]*ocispec.Descriptor - store content.Store - hosts remote.HostFunc - platformMC platforms.MatchComparer - cacheSize int - cacheVersion string - chunkSize int64 -} - -func New(root string, hosts remote.HostFunc, cacheSize uint, cacheVersion string, platformMC platforms.MatchComparer, chunkSize int64) (*Provider, error) { - contentDir := filepath.Join(root, "content") - if err := os.MkdirAll(contentDir, 0755); err != nil { - return nil, err - } - store, err := accelcontent.NewContent(hosts, contentDir, root, "0MB") - if err != nil { - return nil, err - } - - return &Provider{ - images: make(map[string]*ocispec.Descriptor), - store: store, - hosts: hosts, - cacheSize: int(cacheSize), - platformMC: platformMC, - cacheVersion: cacheVersion, - chunkSize: chunkSize, - }, nil -} - -func newDefaultClient(skipTLSVerify bool) *http.Client { - return &http.Client{ - Transport: &http.Transport{ - Proxy: http.ProxyFromEnvironment, - DialContext: (&net.Dialer{ - Timeout: 30 * time.Second, - KeepAlive: 30 * time.Second, - DualStack: true, - }).DialContext, - MaxIdleConns: 10, - IdleConnTimeout: 30 * time.Second, - TLSHandshakeTimeout: 10 * time.Second, - ExpectContinueTimeout: 5 * time.Second, - DisableKeepAlives: true, - TLSNextProto: make(map[string]func(authority string, c *tls.Conn) http.RoundTripper), - TLSClientConfig: &tls.Config{ - InsecureSkipVerify: skipTLSVerify, - }, - }, - } -} - -func newResolver(insecure, plainHTTP bool, credFunc remote.CredentialFunc, chunkSize int64) remotes.Resolver { - registryHosts := docker.ConfigureDefaultRegistries( - docker.WithAuthorizer( - docker.NewDockerAuthorizer( - docker.WithAuthClient(newDefaultClient(insecure)), - docker.WithAuthCreds(credFunc), - ), - ), - docker.WithClient(newDefaultClient(insecure)), - docker.WithPlainHTTP(func(_ string) (bool, error) { - return plainHTTP, nil - }), - docker.WithChunkSize(chunkSize), - ) - - return docker.NewResolver(docker.ResolverOptions{ - Hosts: registryHosts, - }) -} - -func (pvd *Provider) UsePlainHTTP() { - pvd.usePlainHTTP = true -} - -func (pvd *Provider) Resolver(ref string) (remotes.Resolver, error) { - credFunc, insecure, err := pvd.hosts(ref) - if err != nil { - return nil, err - } - return newResolver(insecure, pvd.usePlainHTTP, credFunc, pvd.chunkSize), nil -} - -func (pvd *Provider) Pull(ctx context.Context, ref string) error { - resolver, err := pvd.Resolver(ref) - if err != nil { - return err - } - rc := &containerd.RemoteContext{ - Resolver: resolver, - PlatformMatcher: pvd.platformMC, - MaxConcurrentDownloads: LayerConcurrentLimit, - } - - img, err := fetch(ctx, pvd.store, rc, ref, 0) - if err != nil { - return err - } - - pvd.mutex.Lock() - defer pvd.mutex.Unlock() - pvd.images[ref] = &img.Target - - return nil -} - -func (pvd *Provider) Push(ctx context.Context, desc ocispec.Descriptor, ref string) error { - resolver, err := pvd.Resolver(ref) - if err != nil { - return err - } - rc := &containerd.RemoteContext{ - Resolver: resolver, - PlatformMatcher: pvd.platformMC, - MaxConcurrentUploadedLayers: LayerConcurrentLimit, - } - - return push(ctx, pvd.store, rc, desc, ref) -} - -func (pvd *Provider) Image(_ context.Context, ref string) (*ocispec.Descriptor, error) { - pvd.mutex.Lock() - defer pvd.mutex.Unlock() - if desc, ok := pvd.images[ref]; ok { - return desc, nil - } - return nil, errdefs.ErrNotFound -} - -func (pvd *Provider) ContentStore() content.Store { - return pvd.store -} - -func (pvd *Provider) SetContentStore(store content.Store) { - pvd.store = store -} - -func (pvd *Provider) NewRemoteCache(ctx context.Context, ref string) (context.Context, *cache.RemoteCache) { - if ref != "" { - return cache.New(ctx, ref, "", pvd.cacheSize, pvd) - } - return ctx, nil -} +// Copyright 2022 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package provider + +import ( + "context" + "crypto/tls" + "net" + "net/http" + "os" + "path/filepath" + "sync" + "time" + + "github.com/containerd/containerd" + "github.com/containerd/containerd/content" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/platforms" + "github.com/containerd/containerd/remotes" + "github.com/containerd/containerd/remotes/docker" + "github.com/goharbor/acceleration-service/pkg/cache" + accelcontent "github.com/goharbor/acceleration-service/pkg/content" + "github.com/goharbor/acceleration-service/pkg/remote" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" +) + +var LayerConcurrentLimit = 5 + +type Provider struct { + mutex sync.Mutex + usePlainHTTP bool + images map[string]*ocispec.Descriptor + store content.Store + hosts remote.HostFunc + platformMC platforms.MatchComparer + cacheSize int + cacheVersion string + chunkSize int64 +} + +func New(root string, hosts remote.HostFunc, cacheSize uint, cacheVersion string, platformMC platforms.MatchComparer, chunkSize int64) (*Provider, error) { + contentDir := filepath.Join(root, "content") + if err := os.MkdirAll(contentDir, 0755); err != nil { + return nil, err + } + store, err := accelcontent.NewContent(hosts, contentDir, root, "0MB") + if err != nil { + return nil, err + } + + return &Provider{ + images: make(map[string]*ocispec.Descriptor), + store: store, + hosts: hosts, + cacheSize: int(cacheSize), + platformMC: platformMC, + cacheVersion: cacheVersion, + chunkSize: chunkSize, + }, nil +} + +func newDefaultClient(skipTLSVerify bool) *http.Client { + return &http.Client{ + Transport: &http.Transport{ + Proxy: http.ProxyFromEnvironment, + DialContext: (&net.Dialer{ + Timeout: 30 * time.Second, + KeepAlive: 30 * time.Second, + DualStack: true, + }).DialContext, + MaxIdleConns: 10, + IdleConnTimeout: 30 * time.Second, + TLSHandshakeTimeout: 10 * time.Second, + ExpectContinueTimeout: 5 * time.Second, + DisableKeepAlives: true, + TLSNextProto: make(map[string]func(authority string, c *tls.Conn) http.RoundTripper), + TLSClientConfig: &tls.Config{ + InsecureSkipVerify: skipTLSVerify, + }, + }, + } +} + +func newResolver(insecure, plainHTTP bool, credFunc remote.CredentialFunc, chunkSize int64) remotes.Resolver { + registryHosts := docker.ConfigureDefaultRegistries( + docker.WithAuthorizer( + docker.NewDockerAuthorizer( + docker.WithAuthClient(newDefaultClient(insecure)), + docker.WithAuthCreds(credFunc), + ), + ), + docker.WithClient(newDefaultClient(insecure)), + docker.WithPlainHTTP(func(_ string) (bool, error) { + return plainHTTP, nil + }), + docker.WithChunkSize(chunkSize), + ) + + return docker.NewResolver(docker.ResolverOptions{ + Hosts: registryHosts, + }) +} + +func (pvd *Provider) UsePlainHTTP() { + pvd.usePlainHTTP = true +} + +func (pvd *Provider) Resolver(ref string) (remotes.Resolver, error) { + credFunc, insecure, err := pvd.hosts(ref) + if err != nil { + return nil, err + } + return newResolver(insecure, pvd.usePlainHTTP, credFunc, pvd.chunkSize), nil +} + +func (pvd *Provider) Pull(ctx context.Context, ref string) error { + resolver, err := pvd.Resolver(ref) + if err != nil { + return err + } + rc := &containerd.RemoteContext{ + Resolver: resolver, + PlatformMatcher: pvd.platformMC, + MaxConcurrentDownloads: LayerConcurrentLimit, + } + + img, err := fetch(ctx, pvd.store, rc, ref, 0) + if err != nil { + return err + } + + pvd.mutex.Lock() + defer pvd.mutex.Unlock() + pvd.images[ref] = &img.Target + + return nil +} + +func (pvd *Provider) Push(ctx context.Context, desc ocispec.Descriptor, ref string) error { + resolver, err := pvd.Resolver(ref) + if err != nil { + return err + } + rc := &containerd.RemoteContext{ + Resolver: resolver, + PlatformMatcher: pvd.platformMC, + MaxConcurrentUploadedLayers: LayerConcurrentLimit, + } + + return push(ctx, pvd.store, rc, desc, ref) +} + +func (pvd *Provider) Image(_ context.Context, ref string) (*ocispec.Descriptor, error) { + pvd.mutex.Lock() + defer pvd.mutex.Unlock() + if desc, ok := pvd.images[ref]; ok { + return desc, nil + } + return nil, errdefs.ErrNotFound +} + +func (pvd *Provider) ContentStore() content.Store { + return pvd.store +} + +func (pvd *Provider) SetContentStore(store content.Store) { + pvd.store = store +} + +func (pvd *Provider) NewRemoteCache(ctx context.Context, ref string) (context.Context, *cache.RemoteCache) { + if ref != "" { + return cache.New(ctx, ref, "", pvd.cacheSize, pvd) + } + return ctx, nil +} diff --git a/contrib/nydusify/pkg/copier/copier.go b/contrib/nydusify/pkg/copier/copier.go index fa3d059035c..869e6d9efb0 100644 --- a/contrib/nydusify/pkg/copier/copier.go +++ b/contrib/nydusify/pkg/copier/copier.go @@ -1,395 +1,395 @@ -// Copyright 2023 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package copier - -import ( - "context" - "encoding/json" - "fmt" - "io" - "os" - "path/filepath" - "strings" - - "github.com/containerd/containerd/content" - containerdErrdefs "github.com/containerd/containerd/errdefs" - "github.com/containerd/containerd/images" - "github.com/containerd/containerd/namespaces" - "github.com/containerd/containerd/platforms" - "github.com/containerd/containerd/reference/docker" - "github.com/containerd/nydus-snapshotter/pkg/converter" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/backend" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/checker/tool" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/converter/provider" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/parser" - nydusifyUtils "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" - "github.com/dustin/go-humanize" - "github.com/goharbor/acceleration-service/pkg/errdefs" - "github.com/goharbor/acceleration-service/pkg/platformutil" - "github.com/goharbor/acceleration-service/pkg/remote" - "github.com/goharbor/acceleration-service/pkg/utils" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - "golang.org/x/sync/errgroup" - "golang.org/x/sync/semaphore" - - "github.com/opencontainers/go-digest" - ocispec "github.com/opencontainers/image-spec/specs-go/v1" -) - -type Opt struct { - WorkDir string - NydusImagePath string - - Source string - Target string - - SourceInsecure bool - TargetInsecure bool - - SourceBackendType string - SourceBackendConfig string - - TargetBackendType string - TargetBackendConfig string - - AllPlatforms bool - Platforms string - - PushChunkSize int64 -} - -type output struct { - Blobs []string -} - -func hosts(opt Opt) remote.HostFunc { - maps := map[string]bool{ - opt.Source: opt.SourceInsecure, - opt.Target: opt.TargetInsecure, - } - return func(ref string) (remote.CredentialFunc, bool, error) { - return remote.NewDockerConfigCredFunc(), maps[ref], nil - } -} - -func getPushWriter(ctx context.Context, pvd *provider.Provider, desc ocispec.Descriptor, opt Opt) (content.Writer, error) { - resolver, err := pvd.Resolver(opt.Target) - if err != nil { - return nil, errors.Wrap(err, "get resolver") - } - ref := opt.Target - if !strings.Contains(ref, "@") { - ref = ref + "@" + desc.Digest.String() - } - pusher, err := resolver.Pusher(ctx, ref) - if err != nil { - return nil, errors.Wrap(err, "create pusher") - } - writer, err := pusher.Push(ctx, desc) - if err != nil { - if containerdErrdefs.IsAlreadyExists(err) { - return nil, nil - } - return nil, err - } - return writer, nil -} - -func pushBlobFromBackend( - ctx context.Context, pvd *provider.Provider, backend backend.Backend, src ocispec.Descriptor, opt Opt, -) ([]ocispec.Descriptor, *ocispec.Descriptor, error) { - if src.MediaType != ocispec.MediaTypeImageManifest && src.MediaType != images.MediaTypeDockerSchema2Manifest { - return nil, nil, fmt.Errorf("unsupported media type %s", src.MediaType) - } - manifest := ocispec.Manifest{} - if _, err := utils.ReadJSON(ctx, pvd.ContentStore(), &manifest, src); err != nil { - return nil, nil, errors.Wrap(err, "read manifest from store") - } - bootstrapDesc := parser.FindNydusBootstrapDesc(&manifest) - if bootstrapDesc == nil { - return nil, nil, nil - } - ra, err := pvd.ContentStore().ReaderAt(ctx, *bootstrapDesc) - if err != nil { - return nil, nil, errors.Wrap(err, "prepare reading bootstrap") - } - bootstrapPath := filepath.Join(opt.WorkDir, "bootstrap.tgz") - if err := nydusifyUtils.UnpackFile(io.NewSectionReader(ra, 0, ra.Size()), nydusifyUtils.BootstrapFileNameInLayer, bootstrapPath); err != nil { - return nil, nil, errors.Wrap(err, "unpack bootstrap layer") - } - outputPath := filepath.Join(opt.WorkDir, "output.json") - builder := tool.NewBuilder(opt.NydusImagePath) - if err := builder.Check(tool.BuilderOption{ - BootstrapPath: bootstrapPath, - DebugOutputPath: outputPath, - }); err != nil { - return nil, nil, errors.Wrap(err, "check bootstrap") - } - var out output - bytes, err := os.ReadFile(outputPath) - if err != nil { - return nil, nil, errors.Wrap(err, "read output file") - } - if err := json.Unmarshal(bytes, &out); err != nil { - return nil, nil, errors.Wrap(err, "unmarshal output json") - } - - // Deduplicate the blobs for avoiding uploading repeatedly. - blobIDs := []string{} - blobIDMap := map[string]bool{} - for _, blobID := range out.Blobs { - if blobIDMap[blobID] { - continue - } - blobIDs = append(blobIDs, blobID) - blobIDMap[blobID] = true - } - - sem := semaphore.NewWeighted(int64(provider.LayerConcurrentLimit)) - eg, ctx := errgroup.WithContext(ctx) - blobDescs := make([]ocispec.Descriptor, len(blobIDs)) - for idx := range blobIDs { - func(idx int) { - eg.Go(func() error { - sem.Acquire(context.Background(), 1) - defer sem.Release(1) - - blobID := blobIDs[idx] - blobDigest := digest.Digest("sha256:" + blobID) - blobSize, err := backend.Size(blobID) - if err != nil { - return errors.Wrap(err, "get blob size") - } - blobSizeStr := humanize.Bytes(uint64(blobSize)) - - logrus.WithField("digest", blobDigest).WithField("size", blobSizeStr).Infof("pushing blob from backend") - rc, err := backend.Reader(blobID) - if err != nil { - return errors.Wrap(err, "get blob reader") - } - defer rc.Close() - blobDescs[idx] = ocispec.Descriptor{ - Digest: blobDigest, - Size: blobSize, - MediaType: converter.MediaTypeNydusBlob, - Annotations: map[string]string{ - converter.LayerAnnotationNydusBlob: "true", - }, - } - writer, err := getPushWriter(ctx, pvd, blobDescs[idx], opt) - if err != nil { - if errdefs.NeedsRetryWithHTTP(err) { - pvd.UsePlainHTTP() - writer, err = getPushWriter(ctx, pvd, blobDescs[idx], opt) - } - if err != nil { - return errors.Wrap(err, "get push writer") - } - } - if writer != nil { - defer writer.Close() - return content.Copy(ctx, writer, rc, blobSize, blobDigest) - } - - logrus.WithField("digest", blobDigest).WithField("size", blobSizeStr).Infof("pushed blob from backend") - - return nil - }) - }(idx) - } - - if err := eg.Wait(); err != nil { - return nil, nil, errors.Wrap(err, "push blobs") - } - - // Update manifest layers - for idx := range manifest.Layers { - if manifest.Layers[idx].Annotations != nil { - // The annotation key is deprecated, but it still exists in some - // old nydus images, let's clean it up. - delete(manifest.Layers[idx].Annotations, "containerd.io/snapshot/nydus-blob-ids") - } - } - manifest.Layers = append(blobDescs, manifest.Layers...) - - // Update image config - blobDigests := []digest.Digest{} - for idx := range blobDescs { - blobDigests = append(blobDigests, blobDescs[idx].Digest) - } - config := ocispec.Image{} - if _, err := utils.ReadJSON(ctx, pvd.ContentStore(), &config, manifest.Config); err != nil { - return nil, nil, errors.Wrap(err, "read config json") - } - config.RootFS.DiffIDs = append(blobDigests, config.RootFS.DiffIDs...) - configDesc, err := utils.WriteJSON(ctx, pvd.ContentStore(), config, manifest.Config, opt.Target, nil) - if err != nil { - return nil, nil, errors.Wrap(err, "write config json") - } - manifest.Config = *configDesc - - target, err := utils.WriteJSON(ctx, pvd.ContentStore(), &manifest, src, opt.Target, nil) - if err != nil { - return nil, nil, errors.Wrap(err, "write manifest json") - } - - return blobDescs, target, nil -} - -func getPlatform(platform *ocispec.Platform) string { - if platform == nil { - return platforms.DefaultString() - } - return platforms.Format(*platform) -} - -func Copy(ctx context.Context, opt Opt) error { - // Containerd image fetch requires a namespace context. - ctx = namespaces.WithNamespace(ctx, "nydusify") - - platformMC, err := platformutil.ParsePlatforms(opt.AllPlatforms, opt.Platforms) - if err != nil { - return err - } - - var bkd backend.Backend - if opt.SourceBackendType != "" { - bkd, err = backend.NewBackend(opt.SourceBackendType, []byte(opt.SourceBackendConfig), nil) - if err != nil { - return errors.Wrapf(err, "new backend") - } - } - - if _, err := os.Stat(opt.WorkDir); err != nil { - if errors.Is(err, os.ErrNotExist) { - if err := os.MkdirAll(opt.WorkDir, 0755); err != nil { - return errors.Wrap(err, "prepare work directory") - } - // We should only clean up when the work directory not exists - // before, otherwise it may delete user data by mistake. - defer os.RemoveAll(opt.WorkDir) - } else { - return errors.Wrap(err, "stat work directory") - } - } - tmpDir, err := os.MkdirTemp(opt.WorkDir, "nydusify-") - if err != nil { - return errors.Wrap(err, "create temp directory") - } - pvd, err := provider.New(tmpDir, hosts(opt), 200, "v1", platformMC, opt.PushChunkSize) - if err != nil { - return err - } - defer os.RemoveAll(tmpDir) - - sourceNamed, err := docker.ParseDockerRef(opt.Source) - if err != nil { - return errors.Wrap(err, "parse source reference") - } - targetNamed, err := docker.ParseDockerRef(opt.Target) - if err != nil { - return errors.Wrap(err, "parse target reference") - } - source := sourceNamed.String() - target := targetNamed.String() - - logrus.Infof("pulling source image %s", source) - if err := pvd.Pull(ctx, source); err != nil { - if errdefs.NeedsRetryWithHTTP(err) { - pvd.UsePlainHTTP() - if err := pvd.Pull(ctx, source); err != nil { - return errors.Wrap(err, "try to pull image") - } - } else { - return errors.Wrap(err, "pull source image") - } - } - logrus.Infof("pulled source image %s", source) - - sourceImage, err := pvd.Image(ctx, source) - if err != nil { - return errors.Wrap(err, "find image from store") - } - - sourceDescs, err := utils.GetManifests(ctx, pvd.ContentStore(), *sourceImage, platformMC) - if err != nil { - return errors.Wrap(err, "get image manifests") - } - targetDescs := make([]ocispec.Descriptor, len(sourceDescs)) - - sem := semaphore.NewWeighted(1) - eg := errgroup.Group{} - for idx := range sourceDescs { - func(idx int) { - eg.Go(func() error { - sem.Acquire(context.Background(), 1) - defer sem.Release(1) - - sourceDesc := sourceDescs[idx] - targetDesc := &sourceDesc - if bkd != nil { - descs, _targetDesc, err := pushBlobFromBackend(ctx, pvd, bkd, sourceDesc, opt) - if err != nil { - return errors.Wrap(err, "get resolver") - } - if _targetDesc == nil { - logrus.WithField("platform", getPlatform(sourceDesc.Platform)).Warnf("%s is not a nydus image", source) - } else { - targetDesc = _targetDesc - store := newStore(pvd.ContentStore(), descs) - pvd.SetContentStore(store) - } - } - targetDescs[idx] = *targetDesc - - logrus.WithField("platform", getPlatform(sourceDesc.Platform)).Infof("pushing target manifest %s", targetDesc.Digest) - if err := pvd.Push(ctx, *targetDesc, target); err != nil { - if errdefs.NeedsRetryWithHTTP(err) { - pvd.UsePlainHTTP() - if err := pvd.Push(ctx, *targetDesc, target); err != nil { - return errors.Wrap(err, "try to push image manifest") - } - } else { - return errors.Wrap(err, "push target image manifest") - } - } - logrus.WithField("platform", getPlatform(sourceDesc.Platform)).Infof("pushed target manifest %s", targetDesc.Digest) - - return nil - }) - }(idx) - } - if err := eg.Wait(); err != nil { - return errors.Wrap(err, "push image manifests") - } - - if len(targetDescs) > 1 && (sourceImage.MediaType == ocispec.MediaTypeImageIndex || - sourceImage.MediaType == images.MediaTypeDockerSchema2ManifestList) { - targetIndex := ocispec.Index{} - if _, err := utils.ReadJSON(ctx, pvd.ContentStore(), &targetIndex, *sourceImage); err != nil { - return errors.Wrap(err, "read source manifest list") - } - targetIndex.Manifests = targetDescs - - targetImage, err := utils.WriteJSON(ctx, pvd.ContentStore(), targetIndex, *sourceImage, target, nil) - if err != nil { - return errors.Wrap(err, "write target manifest list") - } - if err := pvd.Push(ctx, *targetImage, target); err != nil { - if errdefs.NeedsRetryWithHTTP(err) { - pvd.UsePlainHTTP() - if err := pvd.Push(ctx, *targetImage, target); err != nil { - return errors.Wrap(err, "try to push image") - } - } else { - return errors.Wrap(err, "push target image") - } - } - logrus.Infof("pushed image %s", target) - } - - return nil -} +// Copyright 2023 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package copier + +import ( + "context" + "encoding/json" + "fmt" + "io" + "os" + "path/filepath" + "strings" + + "github.com/containerd/containerd/content" + containerdErrdefs "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/images" + "github.com/containerd/containerd/namespaces" + "github.com/containerd/containerd/platforms" + "github.com/containerd/containerd/reference/docker" + "github.com/containerd/nydus-snapshotter/pkg/converter" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/backend" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/checker/tool" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/converter/provider" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/parser" + nydusifyUtils "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" + "github.com/dustin/go-humanize" + "github.com/goharbor/acceleration-service/pkg/errdefs" + "github.com/goharbor/acceleration-service/pkg/platformutil" + "github.com/goharbor/acceleration-service/pkg/remote" + "github.com/goharbor/acceleration-service/pkg/utils" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "golang.org/x/sync/errgroup" + "golang.org/x/sync/semaphore" + + "github.com/opencontainers/go-digest" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" +) + +type Opt struct { + WorkDir string + NydusImagePath string + + Source string + Target string + + SourceInsecure bool + TargetInsecure bool + + SourceBackendType string + SourceBackendConfig string + + TargetBackendType string + TargetBackendConfig string + + AllPlatforms bool + Platforms string + + PushChunkSize int64 +} + +type output struct { + Blobs []string +} + +func hosts(opt Opt) remote.HostFunc { + maps := map[string]bool{ + opt.Source: opt.SourceInsecure, + opt.Target: opt.TargetInsecure, + } + return func(ref string) (remote.CredentialFunc, bool, error) { + return remote.NewDockerConfigCredFunc(), maps[ref], nil + } +} + +func getPushWriter(ctx context.Context, pvd *provider.Provider, desc ocispec.Descriptor, opt Opt) (content.Writer, error) { + resolver, err := pvd.Resolver(opt.Target) + if err != nil { + return nil, errors.Wrap(err, "get resolver") + } + ref := opt.Target + if !strings.Contains(ref, "@") { + ref = ref + "@" + desc.Digest.String() + } + pusher, err := resolver.Pusher(ctx, ref) + if err != nil { + return nil, errors.Wrap(err, "create pusher") + } + writer, err := pusher.Push(ctx, desc) + if err != nil { + if containerdErrdefs.IsAlreadyExists(err) { + return nil, nil + } + return nil, err + } + return writer, nil +} + +func pushBlobFromBackend( + ctx context.Context, pvd *provider.Provider, backend backend.Backend, src ocispec.Descriptor, opt Opt, +) ([]ocispec.Descriptor, *ocispec.Descriptor, error) { + if src.MediaType != ocispec.MediaTypeImageManifest && src.MediaType != images.MediaTypeDockerSchema2Manifest { + return nil, nil, fmt.Errorf("unsupported media type %s", src.MediaType) + } + manifest := ocispec.Manifest{} + if _, err := utils.ReadJSON(ctx, pvd.ContentStore(), &manifest, src); err != nil { + return nil, nil, errors.Wrap(err, "read manifest from store") + } + bootstrapDesc := parser.FindNydusBootstrapDesc(&manifest) + if bootstrapDesc == nil { + return nil, nil, nil + } + ra, err := pvd.ContentStore().ReaderAt(ctx, *bootstrapDesc) + if err != nil { + return nil, nil, errors.Wrap(err, "prepare reading bootstrap") + } + bootstrapPath := filepath.Join(opt.WorkDir, "bootstrap.tgz") + if err := nydusifyUtils.UnpackFile(io.NewSectionReader(ra, 0, ra.Size()), nydusifyUtils.BootstrapFileNameInLayer, bootstrapPath); err != nil { + return nil, nil, errors.Wrap(err, "unpack bootstrap layer") + } + outputPath := filepath.Join(opt.WorkDir, "output.json") + builder := tool.NewBuilder(opt.NydusImagePath) + if err := builder.Check(tool.BuilderOption{ + BootstrapPath: bootstrapPath, + DebugOutputPath: outputPath, + }); err != nil { + return nil, nil, errors.Wrap(err, "check bootstrap") + } + var out output + bytes, err := os.ReadFile(outputPath) + if err != nil { + return nil, nil, errors.Wrap(err, "read output file") + } + if err := json.Unmarshal(bytes, &out); err != nil { + return nil, nil, errors.Wrap(err, "unmarshal output json") + } + + // Deduplicate the blobs for avoiding uploading repeatedly. + blobIDs := []string{} + blobIDMap := map[string]bool{} + for _, blobID := range out.Blobs { + if blobIDMap[blobID] { + continue + } + blobIDs = append(blobIDs, blobID) + blobIDMap[blobID] = true + } + + sem := semaphore.NewWeighted(int64(provider.LayerConcurrentLimit)) + eg, ctx := errgroup.WithContext(ctx) + blobDescs := make([]ocispec.Descriptor, len(blobIDs)) + for idx := range blobIDs { + func(idx int) { + eg.Go(func() error { + sem.Acquire(context.Background(), 1) + defer sem.Release(1) + + blobID := blobIDs[idx] + blobDigest := digest.Digest("sha256:" + blobID) + blobSize, err := backend.Size(blobID) + if err != nil { + return errors.Wrap(err, "get blob size") + } + blobSizeStr := humanize.Bytes(uint64(blobSize)) + + logrus.WithField("digest", blobDigest).WithField("size", blobSizeStr).Infof("pushing blob from backend") + rc, err := backend.Reader(blobID) + if err != nil { + return errors.Wrap(err, "get blob reader") + } + defer rc.Close() + blobDescs[idx] = ocispec.Descriptor{ + Digest: blobDigest, + Size: blobSize, + MediaType: converter.MediaTypeNydusBlob, + Annotations: map[string]string{ + converter.LayerAnnotationNydusBlob: "true", + }, + } + writer, err := getPushWriter(ctx, pvd, blobDescs[idx], opt) + if err != nil { + if errdefs.NeedsRetryWithHTTP(err) { + pvd.UsePlainHTTP() + writer, err = getPushWriter(ctx, pvd, blobDescs[idx], opt) + } + if err != nil { + return errors.Wrap(err, "get push writer") + } + } + if writer != nil { + defer writer.Close() + return content.Copy(ctx, writer, rc, blobSize, blobDigest) + } + + logrus.WithField("digest", blobDigest).WithField("size", blobSizeStr).Infof("pushed blob from backend") + + return nil + }) + }(idx) + } + + if err := eg.Wait(); err != nil { + return nil, nil, errors.Wrap(err, "push blobs") + } + + // Update manifest layers + for idx := range manifest.Layers { + if manifest.Layers[idx].Annotations != nil { + // The annotation key is deprecated, but it still exists in some + // old nydus images, let's clean it up. + delete(manifest.Layers[idx].Annotations, "containerd.io/snapshot/nydus-blob-ids") + } + } + manifest.Layers = append(blobDescs, manifest.Layers...) + + // Update image config + blobDigests := []digest.Digest{} + for idx := range blobDescs { + blobDigests = append(blobDigests, blobDescs[idx].Digest) + } + config := ocispec.Image{} + if _, err := utils.ReadJSON(ctx, pvd.ContentStore(), &config, manifest.Config); err != nil { + return nil, nil, errors.Wrap(err, "read config json") + } + config.RootFS.DiffIDs = append(blobDigests, config.RootFS.DiffIDs...) + configDesc, err := utils.WriteJSON(ctx, pvd.ContentStore(), config, manifest.Config, opt.Target, nil) + if err != nil { + return nil, nil, errors.Wrap(err, "write config json") + } + manifest.Config = *configDesc + + target, err := utils.WriteJSON(ctx, pvd.ContentStore(), &manifest, src, opt.Target, nil) + if err != nil { + return nil, nil, errors.Wrap(err, "write manifest json") + } + + return blobDescs, target, nil +} + +func getPlatform(platform *ocispec.Platform) string { + if platform == nil { + return platforms.DefaultString() + } + return platforms.Format(*platform) +} + +func Copy(ctx context.Context, opt Opt) error { + // Containerd image fetch requires a namespace context. + ctx = namespaces.WithNamespace(ctx, "nydusify") + + platformMC, err := platformutil.ParsePlatforms(opt.AllPlatforms, opt.Platforms) + if err != nil { + return err + } + + var bkd backend.Backend + if opt.SourceBackendType != "" { + bkd, err = backend.NewBackend(opt.SourceBackendType, []byte(opt.SourceBackendConfig), nil) + if err != nil { + return errors.Wrapf(err, "new backend") + } + } + + if _, err := os.Stat(opt.WorkDir); err != nil { + if errors.Is(err, os.ErrNotExist) { + if err := os.MkdirAll(opt.WorkDir, 0755); err != nil { + return errors.Wrap(err, "prepare work directory") + } + // We should only clean up when the work directory not exists + // before, otherwise it may delete user data by mistake. + defer os.RemoveAll(opt.WorkDir) + } else { + return errors.Wrap(err, "stat work directory") + } + } + tmpDir, err := os.MkdirTemp(opt.WorkDir, "nydusify-") + if err != nil { + return errors.Wrap(err, "create temp directory") + } + pvd, err := provider.New(tmpDir, hosts(opt), 200, "v1", platformMC, opt.PushChunkSize) + if err != nil { + return err + } + defer os.RemoveAll(tmpDir) + + sourceNamed, err := docker.ParseDockerRef(opt.Source) + if err != nil { + return errors.Wrap(err, "parse source reference") + } + targetNamed, err := docker.ParseDockerRef(opt.Target) + if err != nil { + return errors.Wrap(err, "parse target reference") + } + source := sourceNamed.String() + target := targetNamed.String() + + logrus.Infof("pulling source image %s", source) + if err := pvd.Pull(ctx, source); err != nil { + if errdefs.NeedsRetryWithHTTP(err) { + pvd.UsePlainHTTP() + if err := pvd.Pull(ctx, source); err != nil { + return errors.Wrap(err, "try to pull image") + } + } else { + return errors.Wrap(err, "pull source image") + } + } + logrus.Infof("pulled source image %s", source) + + sourceImage, err := pvd.Image(ctx, source) + if err != nil { + return errors.Wrap(err, "find image from store") + } + + sourceDescs, err := utils.GetManifests(ctx, pvd.ContentStore(), *sourceImage, platformMC) + if err != nil { + return errors.Wrap(err, "get image manifests") + } + targetDescs := make([]ocispec.Descriptor, len(sourceDescs)) + + sem := semaphore.NewWeighted(1) + eg := errgroup.Group{} + for idx := range sourceDescs { + func(idx int) { + eg.Go(func() error { + sem.Acquire(context.Background(), 1) + defer sem.Release(1) + + sourceDesc := sourceDescs[idx] + targetDesc := &sourceDesc + if bkd != nil { + descs, _targetDesc, err := pushBlobFromBackend(ctx, pvd, bkd, sourceDesc, opt) + if err != nil { + return errors.Wrap(err, "get resolver") + } + if _targetDesc == nil { + logrus.WithField("platform", getPlatform(sourceDesc.Platform)).Warnf("%s is not a nydus image", source) + } else { + targetDesc = _targetDesc + store := newStore(pvd.ContentStore(), descs) + pvd.SetContentStore(store) + } + } + targetDescs[idx] = *targetDesc + + logrus.WithField("platform", getPlatform(sourceDesc.Platform)).Infof("pushing target manifest %s", targetDesc.Digest) + if err := pvd.Push(ctx, *targetDesc, target); err != nil { + if errdefs.NeedsRetryWithHTTP(err) { + pvd.UsePlainHTTP() + if err := pvd.Push(ctx, *targetDesc, target); err != nil { + return errors.Wrap(err, "try to push image manifest") + } + } else { + return errors.Wrap(err, "push target image manifest") + } + } + logrus.WithField("platform", getPlatform(sourceDesc.Platform)).Infof("pushed target manifest %s", targetDesc.Digest) + + return nil + }) + }(idx) + } + if err := eg.Wait(); err != nil { + return errors.Wrap(err, "push image manifests") + } + + if len(targetDescs) > 1 && (sourceImage.MediaType == ocispec.MediaTypeImageIndex || + sourceImage.MediaType == images.MediaTypeDockerSchema2ManifestList) { + targetIndex := ocispec.Index{} + if _, err := utils.ReadJSON(ctx, pvd.ContentStore(), &targetIndex, *sourceImage); err != nil { + return errors.Wrap(err, "read source manifest list") + } + targetIndex.Manifests = targetDescs + + targetImage, err := utils.WriteJSON(ctx, pvd.ContentStore(), targetIndex, *sourceImage, target, nil) + if err != nil { + return errors.Wrap(err, "write target manifest list") + } + if err := pvd.Push(ctx, *targetImage, target); err != nil { + if errdefs.NeedsRetryWithHTTP(err) { + pvd.UsePlainHTTP() + if err := pvd.Push(ctx, *targetImage, target); err != nil { + return errors.Wrap(err, "try to push image") + } + } else { + return errors.Wrap(err, "push target image") + } + } + logrus.Infof("pushed image %s", target) + } + + return nil +} diff --git a/contrib/nydusify/pkg/copier/store.go b/contrib/nydusify/pkg/copier/store.go index 790f3b7a28b..8e00ac197dc 100644 --- a/contrib/nydusify/pkg/copier/store.go +++ b/contrib/nydusify/pkg/copier/store.go @@ -1,45 +1,45 @@ -// Copyright 2023 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package copier - -import ( - "context" - - "github.com/containerd/containerd/content" - "github.com/containerd/containerd/errdefs" - "github.com/opencontainers/go-digest" - ocispec "github.com/opencontainers/image-spec/specs-go/v1" -) - -type store struct { - content.Store - remotes []ocispec.Descriptor -} - -func newStore(base content.Store, remotes []ocispec.Descriptor) *store { - return &store{ - Store: base, - remotes: remotes, - } -} - -func (s *store) Info(ctx context.Context, dgst digest.Digest) (content.Info, error) { - info, err := s.Store.Info(ctx, dgst) - if err != nil { - if !errdefs.IsNotFound(err) { - return content.Info{}, err - } - for _, desc := range s.remotes { - if desc.Digest == dgst { - return content.Info{ - Digest: desc.Digest, - Size: desc.Size, - }, nil - } - } - return content.Info{}, err - } - return info, nil -} +// Copyright 2023 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package copier + +import ( + "context" + + "github.com/containerd/containerd/content" + "github.com/containerd/containerd/errdefs" + "github.com/opencontainers/go-digest" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" +) + +type store struct { + content.Store + remotes []ocispec.Descriptor +} + +func newStore(base content.Store, remotes []ocispec.Descriptor) *store { + return &store{ + Store: base, + remotes: remotes, + } +} + +func (s *store) Info(ctx context.Context, dgst digest.Digest) (content.Info, error) { + info, err := s.Store.Info(ctx, dgst) + if err != nil { + if !errdefs.IsNotFound(err) { + return content.Info{}, err + } + for _, desc := range s.remotes { + if desc.Digest == dgst { + return content.Info{ + Digest: desc.Digest, + Size: desc.Size, + }, nil + } + } + return content.Info{}, err + } + return info, nil +} diff --git a/contrib/nydusify/pkg/hook/hook.go b/contrib/nydusify/pkg/hook/hook.go index e7fc4737a56..99307fe5f48 100644 --- a/contrib/nydusify/pkg/hook/hook.go +++ b/contrib/nydusify/pkg/hook/hook.go @@ -1,158 +1,158 @@ -// Copyright 2022 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package hook - -import ( - "net/rpc" - "os" - "os/exec" - - "github.com/hashicorp/go-hclog" - "github.com/hashicorp/go-plugin" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" -) - -var hookPluginPath = "./nydus-hook-plugin" - -func init() { - envPath := os.Getenv("NYDUS_HOOK_PLUGIN_PATH") - if envPath != "" { - hookPluginPath = envPath - } -} - -type Blob struct { - ID string `json:"id"` - Size int64 `json:"size"` -} - -type Info struct { - BootstrapPath string `json:"bootstrap_path"` - SourceRef string `json:"source_ref"` - TargetRef string `json:"target_ref"` - Blobs []Blob `json:"blobs"` -} - -type Hook interface { - BeforePushManifest(info *Info) error - AfterPushManifest(info *Info) error -} - -type RPC struct{ client *rpc.Client } - -func (h *RPC) BeforePushManifest(info *Info) error { - var resp error - err := h.client.Call("Plugin.BeforePushManifest", info, &resp) - if err != nil { - return err - } - return resp -} - -func (h *RPC) AfterPushManifest(info *Info) error { - var resp error - err := h.client.Call("Plugin.AfterPushManifest", info, &resp) - if err != nil { - return err - } - return resp -} - -type RPCServer struct { - Impl Hook -} - -func (s *RPCServer) BeforePushManifest(info Info, resp *error) error { - *resp = s.Impl.BeforePushManifest(&info) - return *resp -} - -func (s *RPCServer) AfterPushManifest(info Info, resp *error) error { - *resp = s.Impl.AfterPushManifest(&info) - return *resp -} - -type Plugin struct { - Impl Hook -} - -func (p *Plugin) Server(*plugin.MuxBroker) (interface{}, error) { - return &RPCServer{Impl: p.Impl}, nil -} - -func (Plugin) Client(_ *plugin.MuxBroker, c *rpc.Client) (interface{}, error) { - return &RPC{client: c}, nil -} - -var Caller Hook - -var handshakeConfig = plugin.HandshakeConfig{ - ProtocolVersion: 1, - MagicCookieKey: "NYDUS_HOOK_PLUGIN", - MagicCookieValue: "nydus-hook-plugin", -} - -func NewPlugin(pluginImpl Hook) { - plugin.Serve(&plugin.ServeConfig{ - HandshakeConfig: handshakeConfig, - Plugins: map[string]plugin.Plugin{ - "hook": &Plugin{Impl: pluginImpl}, - }, - }) -} - -var client *plugin.Client - -func Init() { - if Caller != nil { - return - } - - if _, err := os.Stat(hookPluginPath); err != nil { - if errors.Is(err, os.ErrNotExist) { - return - } - logrus.Errorln(errors.Wrapf(err, "try load hook plugin %s", hookPluginPath)) - return - } - - var pluginMap = map[string]plugin.Plugin{ - "hook": &Plugin{}, - } - - client = plugin.NewClient(&plugin.ClientConfig{ - HandshakeConfig: handshakeConfig, - Plugins: pluginMap, - Cmd: exec.Command(hookPluginPath), - Logger: hclog.New(&hclog.LoggerOptions{ - Output: hclog.DefaultOutput, - Level: hclog.Error, - Name: "plugin", - }), - }) - - rpcClient, err := client.Client() - if err != nil { - logrus.WithError(err).Error("Failed to create rpc client") - return - } - - raw, err := rpcClient.Dispense("hook") - if err != nil { - logrus.WithError(err).Error("Failed to dispense hook") - return - } - - logrus.Infof("[HOOK] Loaded hook plugin %s", hookPluginPath) - - Caller = raw.(Hook) -} - -func Close() { - if client != nil { - defer client.Kill() - } -} +// Copyright 2022 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package hook + +import ( + "net/rpc" + "os" + "os/exec" + + "github.com/hashicorp/go-hclog" + "github.com/hashicorp/go-plugin" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +var hookPluginPath = "./nydus-hook-plugin" + +func init() { + envPath := os.Getenv("NYDUS_HOOK_PLUGIN_PATH") + if envPath != "" { + hookPluginPath = envPath + } +} + +type Blob struct { + ID string `json:"id"` + Size int64 `json:"size"` +} + +type Info struct { + BootstrapPath string `json:"bootstrap_path"` + SourceRef string `json:"source_ref"` + TargetRef string `json:"target_ref"` + Blobs []Blob `json:"blobs"` +} + +type Hook interface { + BeforePushManifest(info *Info) error + AfterPushManifest(info *Info) error +} + +type RPC struct{ client *rpc.Client } + +func (h *RPC) BeforePushManifest(info *Info) error { + var resp error + err := h.client.Call("Plugin.BeforePushManifest", info, &resp) + if err != nil { + return err + } + return resp +} + +func (h *RPC) AfterPushManifest(info *Info) error { + var resp error + err := h.client.Call("Plugin.AfterPushManifest", info, &resp) + if err != nil { + return err + } + return resp +} + +type RPCServer struct { + Impl Hook +} + +func (s *RPCServer) BeforePushManifest(info Info, resp *error) error { + *resp = s.Impl.BeforePushManifest(&info) + return *resp +} + +func (s *RPCServer) AfterPushManifest(info Info, resp *error) error { + *resp = s.Impl.AfterPushManifest(&info) + return *resp +} + +type Plugin struct { + Impl Hook +} + +func (p *Plugin) Server(*plugin.MuxBroker) (interface{}, error) { + return &RPCServer{Impl: p.Impl}, nil +} + +func (Plugin) Client(_ *plugin.MuxBroker, c *rpc.Client) (interface{}, error) { + return &RPC{client: c}, nil +} + +var Caller Hook + +var handshakeConfig = plugin.HandshakeConfig{ + ProtocolVersion: 1, + MagicCookieKey: "NYDUS_HOOK_PLUGIN", + MagicCookieValue: "nydus-hook-plugin", +} + +func NewPlugin(pluginImpl Hook) { + plugin.Serve(&plugin.ServeConfig{ + HandshakeConfig: handshakeConfig, + Plugins: map[string]plugin.Plugin{ + "hook": &Plugin{Impl: pluginImpl}, + }, + }) +} + +var client *plugin.Client + +func Init() { + if Caller != nil { + return + } + + if _, err := os.Stat(hookPluginPath); err != nil { + if errors.Is(err, os.ErrNotExist) { + return + } + logrus.Errorln(errors.Wrapf(err, "try load hook plugin %s", hookPluginPath)) + return + } + + var pluginMap = map[string]plugin.Plugin{ + "hook": &Plugin{}, + } + + client = plugin.NewClient(&plugin.ClientConfig{ + HandshakeConfig: handshakeConfig, + Plugins: pluginMap, + Cmd: exec.Command(hookPluginPath), + Logger: hclog.New(&hclog.LoggerOptions{ + Output: hclog.DefaultOutput, + Level: hclog.Error, + Name: "plugin", + }), + }) + + rpcClient, err := client.Client() + if err != nil { + logrus.WithError(err).Error("Failed to create rpc client") + return + } + + raw, err := rpcClient.Dispense("hook") + if err != nil { + logrus.WithError(err).Error("Failed to dispense hook") + return + } + + logrus.Infof("[HOOK] Loaded hook plugin %s", hookPluginPath) + + Caller = raw.(Hook) +} + +func Close() { + if client != nil { + defer client.Kill() + } +} diff --git a/contrib/nydusify/pkg/metrics/fileexporter/fileexporter.go b/contrib/nydusify/pkg/metrics/fileexporter/fileexporter.go index 1f661971cc9..1c490023a15 100644 --- a/contrib/nydusify/pkg/metrics/fileexporter/fileexporter.go +++ b/contrib/nydusify/pkg/metrics/fileexporter/fileexporter.go @@ -1,23 +1,23 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package fileexporter - -import ( - "github.com/prometheus/client_golang/prometheus" - - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/metrics" -) - -type FileExporter struct{ name string } - -func New(name string) *FileExporter { - return &FileExporter{ - name: name, - } -} - -func (exp *FileExporter) Export() { - prometheus.WriteToTextfile(exp.name, metrics.Registry) -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package fileexporter + +import ( + "github.com/prometheus/client_golang/prometheus" + + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/metrics" +) + +type FileExporter struct{ name string } + +func New(name string) *FileExporter { + return &FileExporter{ + name: name, + } +} + +func (exp *FileExporter) Export() { + prometheus.WriteToTextfile(exp.name, metrics.Registry) +} diff --git a/contrib/nydusify/pkg/metrics/metrics.go b/contrib/nydusify/pkg/metrics/metrics.go index 7125ebafd51..8ddcab9c65e 100644 --- a/contrib/nydusify/pkg/metrics/metrics.go +++ b/contrib/nydusify/pkg/metrics/metrics.go @@ -1,108 +1,108 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package metrics - -import ( - "strconv" - "sync" - "time" - - "github.com/prometheus/client_golang/prometheus" -) - -type Exporter interface { - Export() -} - -const ( - convertDurationKey = "convert_duration_key" - convertSuccessCountKey = "convert_success_count_key" - convertFailureCountKey = "convert_failure_count_key" - storeCacheDurationKey = "store_cache_duration" - namespace = "nydusify" - subsystem = "convert" -) - -var ( - convertDuration = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: subsystem, - Name: convertDurationKey, - Help: "The total duration of converting an OCI image. Broken down by source references/repo and layers count.", - }, - []string{"source_reference", "layers_count"}, - ) - - convertSuccessCount = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: subsystem, - Name: convertSuccessCountKey, - Help: "The total converting success times. Broken down by source references.", - }, - []string{"source_reference"}, - ) - - convertFailureCount = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: subsystem, - Name: convertFailureCountKey, - Help: "The total converting failure times. Broken down by source references.", - }, - []string{"source_reference", "reason"}, - ) - - storeCacheDuration = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: subsystem, - Name: storeCacheDurationKey, - Help: "The duration of pushing cache to registry. Broken down by source references.", - }, - []string{"source_reference"}, - ) -) - -var register sync.Once -var Registry *prometheus.Registry -var exporter Exporter - -func sinceInSeconds(start time.Time) float64 { - return time.Since(start).Seconds() -} - -// Register registers metrics. This is always called only once. -func Register(exp Exporter) { - register.Do(func() { - Registry = prometheus.NewRegistry() - Registry.MustRegister(convertDuration, convertSuccessCount, convertFailureCount, storeCacheDuration) - exporter = exp - }) -} - -func Export() { - // In case no exporter was ever registered. - if exporter != nil { - exporter.Export() - } -} - -func ConversionDuration(ref string, layers int, start time.Time) { - convertDuration.WithLabelValues(ref, strconv.Itoa(layers)).Add(sinceInSeconds(start)) -} - -func ConversionSuccessCount(ref string) { - convertSuccessCount.WithLabelValues(ref).Inc() -} - -func ConversionFailureCount(ref string, reason string) { - convertFailureCount.WithLabelValues(ref, reason).Inc() -} - -func StoreCacheDuration(ref string, start time.Time) { - storeCacheDuration.WithLabelValues(ref).Add(sinceInSeconds(start)) -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package metrics + +import ( + "strconv" + "sync" + "time" + + "github.com/prometheus/client_golang/prometheus" +) + +type Exporter interface { + Export() +} + +const ( + convertDurationKey = "convert_duration_key" + convertSuccessCountKey = "convert_success_count_key" + convertFailureCountKey = "convert_failure_count_key" + storeCacheDurationKey = "store_cache_duration" + namespace = "nydusify" + subsystem = "convert" +) + +var ( + convertDuration = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: convertDurationKey, + Help: "The total duration of converting an OCI image. Broken down by source references/repo and layers count.", + }, + []string{"source_reference", "layers_count"}, + ) + + convertSuccessCount = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: convertSuccessCountKey, + Help: "The total converting success times. Broken down by source references.", + }, + []string{"source_reference"}, + ) + + convertFailureCount = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: convertFailureCountKey, + Help: "The total converting failure times. Broken down by source references.", + }, + []string{"source_reference", "reason"}, + ) + + storeCacheDuration = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: storeCacheDurationKey, + Help: "The duration of pushing cache to registry. Broken down by source references.", + }, + []string{"source_reference"}, + ) +) + +var register sync.Once +var Registry *prometheus.Registry +var exporter Exporter + +func sinceInSeconds(start time.Time) float64 { + return time.Since(start).Seconds() +} + +// Register registers metrics. This is always called only once. +func Register(exp Exporter) { + register.Do(func() { + Registry = prometheus.NewRegistry() + Registry.MustRegister(convertDuration, convertSuccessCount, convertFailureCount, storeCacheDuration) + exporter = exp + }) +} + +func Export() { + // In case no exporter was ever registered. + if exporter != nil { + exporter.Export() + } +} + +func ConversionDuration(ref string, layers int, start time.Time) { + convertDuration.WithLabelValues(ref, strconv.Itoa(layers)).Add(sinceInSeconds(start)) +} + +func ConversionSuccessCount(ref string) { + convertSuccessCount.WithLabelValues(ref).Inc() +} + +func ConversionFailureCount(ref string, reason string) { + convertFailureCount.WithLabelValues(ref, reason).Inc() +} + +func StoreCacheDuration(ref string, start time.Time) { + storeCacheDuration.WithLabelValues(ref).Add(sinceInSeconds(start)) +} diff --git a/contrib/nydusify/pkg/packer/artifact.go b/contrib/nydusify/pkg/packer/artifact.go index 306620d4a8c..6944018eb93 100644 --- a/contrib/nydusify/pkg/packer/artifact.go +++ b/contrib/nydusify/pkg/packer/artifact.go @@ -1,53 +1,53 @@ -// Copyright 2023 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package packer - -import ( - "os" - "path/filepath" - "strings" - - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" -) - -type Artifact struct { - OutputDir string -} - -func NewArtifact(outputDir string) (Artifact, error) { - res := Artifact{OutputDir: outputDir} - if err := res.ensureOutputDir(); err != nil { - return Artifact{}, err - } - return res, nil -} - -func (a Artifact) bootstrapPath(imageName string) string { - if filepath.Ext(imageName) != "" { - return filepath.Join(a.OutputDir, imageName) - } - return filepath.Join(a.OutputDir, imageName+".meta") -} - -func (a Artifact) blobFilePath(imageName string, isDigest bool) string { - if isDigest { - return filepath.Join(a.OutputDir, imageName) - } else if suffix := filepath.Ext(imageName); suffix != "" { - return filepath.Join(a.OutputDir, strings.TrimSuffix(imageName, suffix)+".blob") - } - return filepath.Join(a.OutputDir, imageName+".blob") -} - -func (a Artifact) outputJSONPath() string { - return filepath.Join(a.OutputDir, "output.json") -} - -// ensureOutputDir use user defined outputDir or defaultOutputDir, and make sure dir exists -func (a *Artifact) ensureOutputDir() error { - if utils.IsEmptyString(a.OutputDir) { - a.OutputDir = defaultOutputDir - } - return os.MkdirAll(a.OutputDir, 0755) -} +// Copyright 2023 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package packer + +import ( + "os" + "path/filepath" + "strings" + + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" +) + +type Artifact struct { + OutputDir string +} + +func NewArtifact(outputDir string) (Artifact, error) { + res := Artifact{OutputDir: outputDir} + if err := res.ensureOutputDir(); err != nil { + return Artifact{}, err + } + return res, nil +} + +func (a Artifact) bootstrapPath(imageName string) string { + if filepath.Ext(imageName) != "" { + return filepath.Join(a.OutputDir, imageName) + } + return filepath.Join(a.OutputDir, imageName+".meta") +} + +func (a Artifact) blobFilePath(imageName string, isDigest bool) string { + if isDigest { + return filepath.Join(a.OutputDir, imageName) + } else if suffix := filepath.Ext(imageName); suffix != "" { + return filepath.Join(a.OutputDir, strings.TrimSuffix(imageName, suffix)+".blob") + } + return filepath.Join(a.OutputDir, imageName+".blob") +} + +func (a Artifact) outputJSONPath() string { + return filepath.Join(a.OutputDir, "output.json") +} + +// ensureOutputDir use user defined outputDir or defaultOutputDir, and make sure dir exists +func (a *Artifact) ensureOutputDir() error { + if utils.IsEmptyString(a.OutputDir) { + a.OutputDir = defaultOutputDir + } + return os.MkdirAll(a.OutputDir, 0755) +} diff --git a/contrib/nydusify/pkg/packer/artifact_test.go b/contrib/nydusify/pkg/packer/artifact_test.go index d39440911b8..ef5a3304b92 100644 --- a/contrib/nydusify/pkg/packer/artifact_test.go +++ b/contrib/nydusify/pkg/packer/artifact_test.go @@ -1,35 +1,35 @@ -// Copyright 2023 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package packer - -import ( - "os" - "testing" - - "github.com/stretchr/testify/require" -) - -func TestArtifactPath(t *testing.T) { - artifact, err := NewArtifact("") - defer os.RemoveAll("./.nydus-build-output") - require.NoError(t, err) - require.Equal(t, ".nydus-build-output/test.meta", artifact.bootstrapPath("test.meta")) - require.Equal(t, ".nydus-build-output/test.m", artifact.bootstrapPath("test.m")) - require.Equal(t, ".nydus-build-output/test.meta", artifact.bootstrapPath("test")) - require.Equal(t, ".nydus-build-output/test.blob", artifact.blobFilePath("test.meta", false)) - require.Equal(t, ".nydus-build-output/test.blob", artifact.blobFilePath("test.m", false)) - require.Equal(t, ".nydus-build-output/test.blob", artifact.blobFilePath("test", false)) - require.Equal(t, ".nydus-build-output/test", artifact.blobFilePath("test", true)) - - artifact, err = NewArtifact("/tmp") - require.NoError(t, err) - require.Equal(t, "/tmp/test.meta", artifact.bootstrapPath("test.meta")) - require.Equal(t, "/tmp/test.m", artifact.bootstrapPath("test.m")) - require.Equal(t, "/tmp/test.meta", artifact.bootstrapPath("test")) - require.Equal(t, "/tmp/test.blob", artifact.blobFilePath("test.meta", false)) - require.Equal(t, "/tmp/test.blob", artifact.blobFilePath("test.m", false)) - require.Equal(t, "/tmp/test.blob", artifact.blobFilePath("test", false)) - require.Equal(t, "/tmp/test", artifact.blobFilePath("test", true)) -} +// Copyright 2023 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package packer + +import ( + "os" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestArtifactPath(t *testing.T) { + artifact, err := NewArtifact("") + defer os.RemoveAll("./.nydus-build-output") + require.NoError(t, err) + require.Equal(t, ".nydus-build-output/test.meta", artifact.bootstrapPath("test.meta")) + require.Equal(t, ".nydus-build-output/test.m", artifact.bootstrapPath("test.m")) + require.Equal(t, ".nydus-build-output/test.meta", artifact.bootstrapPath("test")) + require.Equal(t, ".nydus-build-output/test.blob", artifact.blobFilePath("test.meta", false)) + require.Equal(t, ".nydus-build-output/test.blob", artifact.blobFilePath("test.m", false)) + require.Equal(t, ".nydus-build-output/test.blob", artifact.blobFilePath("test", false)) + require.Equal(t, ".nydus-build-output/test", artifact.blobFilePath("test", true)) + + artifact, err = NewArtifact("/tmp") + require.NoError(t, err) + require.Equal(t, "/tmp/test.meta", artifact.bootstrapPath("test.meta")) + require.Equal(t, "/tmp/test.m", artifact.bootstrapPath("test.m")) + require.Equal(t, "/tmp/test.meta", artifact.bootstrapPath("test")) + require.Equal(t, "/tmp/test.blob", artifact.blobFilePath("test.meta", false)) + require.Equal(t, "/tmp/test.blob", artifact.blobFilePath("test.m", false)) + require.Equal(t, "/tmp/test.blob", artifact.blobFilePath("test", false)) + require.Equal(t, "/tmp/test", artifact.blobFilePath("test", true)) +} diff --git a/contrib/nydusify/pkg/packer/backend.go b/contrib/nydusify/pkg/packer/backend.go index 05f26f3c040..6c3a8957a46 100644 --- a/contrib/nydusify/pkg/packer/backend.go +++ b/contrib/nydusify/pkg/packer/backend.go @@ -1,101 +1,101 @@ -// Copyright 2022 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -// The Nydusify CLI tool converts an OCI container image from source registry into -// a Nydus image using `nydus-image` CLI layer by layer, then pushes Nydus image to -// target registry. - -package packer - -import ( - "encoding/json" - - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/backend" -) - -type BackendConfig interface { - rawMetaBackendCfg() []byte - rawBlobBackendCfg() []byte - backendType() string -} - -type OssBackendConfig struct { - Endpoint string `json:"endpoint"` - AccessKeyID string `json:"access_key_id"` - AccessKeySecret string `json:"access_key_secret"` - BucketName string `json:"bucket_name"` - MetaPrefix string `json:"meta_prefix"` - BlobPrefix string `json:"blob_prefix"` -} - -func (cfg *OssBackendConfig) rawMetaBackendCfg() []byte { - configMap := map[string]string{ - "endpoint": cfg.Endpoint, - "access_key_id": cfg.AccessKeyID, - "access_key_secret": cfg.AccessKeySecret, - "bucket_name": cfg.BucketName, - "object_prefix": cfg.MetaPrefix, - } - b, _ := json.Marshal(configMap) - return b -} - -func (cfg *OssBackendConfig) rawBlobBackendCfg() []byte { - configMap := map[string]string{ - "endpoint": cfg.Endpoint, - "access_key_id": cfg.AccessKeyID, - "access_key_secret": cfg.AccessKeySecret, - "bucket_name": cfg.BucketName, - "object_prefix": cfg.BlobPrefix, - } - b, _ := json.Marshal(configMap) - return b -} - -func (cfg *OssBackendConfig) backendType() string { - return "oss" -} - -type S3BackendConfig struct { - Endpoint string `json:"endpoint"` - Scheme string `json:"scheme,omitempty"` - AccessKeyID string `json:"access_key_id,omitempty"` - AccessKeySecret string `json:"access_key_secret,omitempty"` - Region string `json:"region"` - BucketName string `json:"bucket_name"` - MetaPrefix string `json:"meta_prefix"` - BlobPrefix string `json:"blob_prefix"` -} - -func (cfg *S3BackendConfig) rawMetaBackendCfg() []byte { - s3Config := backend.S3Config{ - AccessKeyID: cfg.AccessKeyID, - AccessKeySecret: cfg.AccessKeySecret, - Endpoint: cfg.Endpoint, - Scheme: cfg.Scheme, - BucketName: cfg.BucketName, - Region: cfg.Region, - ObjectPrefix: cfg.MetaPrefix, - } - b, _ := json.Marshal(s3Config) - return b -} - -func (cfg *S3BackendConfig) rawBlobBackendCfg() []byte { - s3Config := backend.S3Config{ - AccessKeyID: cfg.AccessKeyID, - AccessKeySecret: cfg.AccessKeySecret, - Endpoint: cfg.Endpoint, - Scheme: cfg.Scheme, - BucketName: cfg.BucketName, - Region: cfg.Region, - ObjectPrefix: cfg.BlobPrefix, - } - b, _ := json.Marshal(s3Config) - return b -} - -func (cfg *S3BackendConfig) backendType() string { - return "s3" -} +// Copyright 2022 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +// The Nydusify CLI tool converts an OCI container image from source registry into +// a Nydus image using `nydus-image` CLI layer by layer, then pushes Nydus image to +// target registry. + +package packer + +import ( + "encoding/json" + + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/backend" +) + +type BackendConfig interface { + rawMetaBackendCfg() []byte + rawBlobBackendCfg() []byte + backendType() string +} + +type OssBackendConfig struct { + Endpoint string `json:"endpoint"` + AccessKeyID string `json:"access_key_id"` + AccessKeySecret string `json:"access_key_secret"` + BucketName string `json:"bucket_name"` + MetaPrefix string `json:"meta_prefix"` + BlobPrefix string `json:"blob_prefix"` +} + +func (cfg *OssBackendConfig) rawMetaBackendCfg() []byte { + configMap := map[string]string{ + "endpoint": cfg.Endpoint, + "access_key_id": cfg.AccessKeyID, + "access_key_secret": cfg.AccessKeySecret, + "bucket_name": cfg.BucketName, + "object_prefix": cfg.MetaPrefix, + } + b, _ := json.Marshal(configMap) + return b +} + +func (cfg *OssBackendConfig) rawBlobBackendCfg() []byte { + configMap := map[string]string{ + "endpoint": cfg.Endpoint, + "access_key_id": cfg.AccessKeyID, + "access_key_secret": cfg.AccessKeySecret, + "bucket_name": cfg.BucketName, + "object_prefix": cfg.BlobPrefix, + } + b, _ := json.Marshal(configMap) + return b +} + +func (cfg *OssBackendConfig) backendType() string { + return "oss" +} + +type S3BackendConfig struct { + Endpoint string `json:"endpoint"` + Scheme string `json:"scheme,omitempty"` + AccessKeyID string `json:"access_key_id,omitempty"` + AccessKeySecret string `json:"access_key_secret,omitempty"` + Region string `json:"region"` + BucketName string `json:"bucket_name"` + MetaPrefix string `json:"meta_prefix"` + BlobPrefix string `json:"blob_prefix"` +} + +func (cfg *S3BackendConfig) rawMetaBackendCfg() []byte { + s3Config := backend.S3Config{ + AccessKeyID: cfg.AccessKeyID, + AccessKeySecret: cfg.AccessKeySecret, + Endpoint: cfg.Endpoint, + Scheme: cfg.Scheme, + BucketName: cfg.BucketName, + Region: cfg.Region, + ObjectPrefix: cfg.MetaPrefix, + } + b, _ := json.Marshal(s3Config) + return b +} + +func (cfg *S3BackendConfig) rawBlobBackendCfg() []byte { + s3Config := backend.S3Config{ + AccessKeyID: cfg.AccessKeyID, + AccessKeySecret: cfg.AccessKeySecret, + Endpoint: cfg.Endpoint, + Scheme: cfg.Scheme, + BucketName: cfg.BucketName, + Region: cfg.Region, + ObjectPrefix: cfg.BlobPrefix, + } + b, _ := json.Marshal(s3Config) + return b +} + +func (cfg *S3BackendConfig) backendType() string { + return "s3" +} diff --git a/contrib/nydusify/pkg/packer/backend_test.go b/contrib/nydusify/pkg/packer/backend_test.go index 9541370ee36..29b079efb23 100644 --- a/contrib/nydusify/pkg/packer/backend_test.go +++ b/contrib/nydusify/pkg/packer/backend_test.go @@ -1,30 +1,30 @@ -// Copyright 2023 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package packer - -import ( - "testing" - - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/backend" - "github.com/stretchr/testify/require" -) - -func TestS3BackendConfig(t *testing.T) { - s3BackendConfig := &S3BackendConfig{ - Endpoint: "s3.amazonaws.com", - Scheme: "https", - AccessKeyID: "testAK", - AccessKeySecret: "testSK", - Region: "region1", - BucketName: "test", - MetaPrefix: "meta", - BlobPrefix: "blob", - } - _, err := backend.NewBackend("s3", s3BackendConfig.rawMetaBackendCfg(), nil) - require.NoError(t, err) - _, err = backend.NewBackend("s3", s3BackendConfig.rawBlobBackendCfg(), nil) - require.NoError(t, err) - require.Equal(t, "s3", s3BackendConfig.backendType()) -} +// Copyright 2023 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package packer + +import ( + "testing" + + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/backend" + "github.com/stretchr/testify/require" +) + +func TestS3BackendConfig(t *testing.T) { + s3BackendConfig := &S3BackendConfig{ + Endpoint: "s3.amazonaws.com", + Scheme: "https", + AccessKeyID: "testAK", + AccessKeySecret: "testSK", + Region: "region1", + BucketName: "test", + MetaPrefix: "meta", + BlobPrefix: "blob", + } + _, err := backend.NewBackend("s3", s3BackendConfig.rawMetaBackendCfg(), nil) + require.NoError(t, err) + _, err = backend.NewBackend("s3", s3BackendConfig.rawBlobBackendCfg(), nil) + require.NoError(t, err) + require.Equal(t, "s3", s3BackendConfig.backendType()) +} diff --git a/contrib/nydusify/pkg/packer/packer.go b/contrib/nydusify/pkg/packer/packer.go index 6be564ad3bb..df38a01690d 100644 --- a/contrib/nydusify/pkg/packer/packer.go +++ b/contrib/nydusify/pkg/packer/packer.go @@ -1,330 +1,330 @@ -package packer - -import ( - "context" - "encoding/json" - "os" - "os/exec" - "path/filepath" - "strings" - - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/build" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/checker/tool" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/compactor" - - "github.com/pkg/errors" - "github.com/sirupsen/logrus" -) - -const ( - nydusBinaryName = "nydus-image" - defaultOutputDir = "./.nydus-build-output" -) - -var ( - ErrNydusImageBinaryNotFound = errors.New("failed to find nydus-image binary") - ErrInvalidChunkDictArgs = errors.New("invalid chunk-dict args") - ErrNoSupport = errors.New("invalid chunk-dict type") -) - -type Opt struct { - LogLevel logrus.Level - NydusImagePath string - OutputDir string - BackendConfig BackendConfig -} - -type Builder interface { - Run(option build.BuilderOption) error -} - -type Packer struct { - logger *logrus.Logger - nydusImagePath string - BackendConfig BackendConfig - pusher *Pusher - builder Builder - Artifact -} - -type BlobManifest struct { - Blobs []string `json:"blobs,omitempty"` -} - -type PackRequest struct { - SourceDir string - ImageName string - FsVersion string - Compressor string - ChunkSize string - PushToRemote bool - - ChunkDict string - Parent string - TryCompact bool - CompactConfigPath string -} - -type PackResult struct { - Meta string - Blob string -} - -func New(opt Opt) (*Packer, error) { - logger, err := initLogger(opt.LogLevel) - if err != nil { - return nil, errors.Wrap(err, "failed to init logger") - } - artifact, err := NewArtifact(opt.OutputDir) - if err != nil { - return nil, errors.Wrap(err, "failed to init artifact") - } - p := &Packer{ - Artifact: artifact, - BackendConfig: opt.BackendConfig, - logger: logger, - nydusImagePath: opt.NydusImagePath, - } - if err = p.ensureNydusImagePath(); err != nil { - return nil, err - } - p.builder = build.NewBuilder(p.nydusImagePath) - if p.BackendConfig != nil { - p.pusher, err = NewPusher(NewPusherOpt{ - Artifact: artifact, - BackendConfig: opt.BackendConfig, - Logger: p.logger, - }) - if err != nil { - return nil, err - } - } - return p, nil -} - -// get blobs from bootstrap -func (p *Packer) getBlobsFromBootstrap(bootstrap string) ([]string, error) { - var blobs []string - if bootstrap != "" { - inspector := tool.NewInspector(p.nydusImagePath) - item, err := inspector.Inspect(tool.InspectOption{ - Operation: tool.GetBlobs, - Bootstrap: bootstrap, - }) - if err != nil { - return []string{}, err - } - blobsInfo, _ := item.(tool.BlobInfoList) - p.logger.Infof("get blob list from bootstrap '%s': %v", bootstrap, blobsInfo) - for _, blobInfo := range blobsInfo { - blobs = append(blobs, blobInfo.BlobID) - } - } - return blobs, nil -} - -func (p *Packer) getChunkDictBlobs(chunkDict string) ([]string, error) { - if chunkDict == "" { - return []string{}, nil - } - // get chunk-dict file - info := strings.Split(chunkDict, "=") - if len(info) != 2 { - return []string{}, ErrInvalidChunkDictArgs - } - switch info[0] { - case "bootstrap": - return p.getBlobsFromBootstrap(info[1]) - default: - return []string{}, ErrNoSupport - } -} - -// getBlobHash will get blobs hash from output.json, the hash will be -// used oss key as blob -// ignore blobs already exist -func (p *Packer) getNewBlobsHash(exists []string) (string, error) { - // build tmp lookup map - m := make(map[string]bool) - for _, blob := range exists { - m[blob] = true - } - content, err := os.ReadFile(p.outputJSONPath()) - if err != nil { - return "", err - } - var manifest BlobManifest - if err = json.Unmarshal(content, &manifest); err != nil { - return "", err - } - for _, blob := range manifest.Blobs { - if _, ok := m[blob]; !ok { - return blob, nil - } - } - // return the latest blob hash - return "", nil -} - -func (p *Packer) dumpBlobBackendConfig(filePath string) (func(), error) { - file, err := os.OpenFile(filePath, os.O_TRUNC|os.O_CREATE|os.O_WRONLY, 0644) - if err != nil { - return nil, err - } - defer file.Close() - n, err := file.Write(p.BackendConfig.rawBlobBackendCfg()) - if err != nil { - return nil, err - } - return func() { - zeros := make([]byte, n) - file, err = os.OpenFile(filePath, os.O_WRONLY, 0644) - if err != nil { - logrus.Errorf("failed to open config file %s, err = %v", filePath, err) - return - } - file.Write(zeros) - file.Close() - os.Remove(filePath) - }, nil -} - -func (p *Packer) tryCompactParent(req *PackRequest) error { - if !req.TryCompact || req.Parent == "" { - return nil - } - if p.BackendConfig == nil { - return errors.Errorf("backend configuration is needed to compact parent bootstrap") - } - - // dumps backend config file - backendConfigPath := filepath.Join(p.OutputDir, "backend-config.json") - destroy, err := p.dumpBlobBackendConfig(backendConfigPath) - if err != nil { - return errors.Wrap(err, "failed to dump backend config file") - } - // destroy backend config file, because there are secrets - defer destroy() - c, err := compactor.NewCompactor(p.nydusImagePath, p.OutputDir, req.CompactConfigPath) - if err != nil { - return errors.Wrap(err, "failed to new compactor") - } - outputBootstrap, err := c.Compact(req.Parent, req.ChunkDict, p.BackendConfig.backendType(), backendConfigPath) - if err != nil { - return errors.Wrap(err, "failed to compact parent") - } - // check output bootstrap - _, err = os.Stat(outputBootstrap) - if err != nil && !os.IsNotExist(err) { - return errors.Wrapf(err, "failed to stat target bootstrap") - } - if err == nil { - // parent --> output bootstrap - p.logger.Infof("successfully compacted bootstrap %s, use new parent %s", req.Parent, outputBootstrap) - req.Parent = outputBootstrap - } - - return nil -} - -func (p *Packer) Pack(_ context.Context, req PackRequest) (PackResult, error) { - p.logger.Infof("start to build image from source directory %q", req.SourceDir) - if err := p.tryCompactParent(&req); err != nil { - return PackResult{}, err - } - parentBlobs, err := p.getBlobsFromBootstrap(req.Parent) - if err != nil { - return PackResult{}, errors.Wrap(err, "failed to get blobs from parent bootstrap") - } - chunkDictBlobs, err := p.getChunkDictBlobs(req.ChunkDict) - if err != nil { - return PackResult{}, errors.Wrap(err, "failed to get blobs from chunk-dict") - } - blobPath := p.blobFilePath(req.ImageName, false) - bootstrapPath := p.bootstrapPath(req.ImageName) - if err = p.builder.Run(build.BuilderOption{ - ParentBootstrapPath: req.Parent, - ChunkDict: req.ChunkDict, - BootstrapPath: bootstrapPath, - BlobPath: blobPath, - OutputJSONPath: p.outputJSONPath(), - RootfsPath: req.SourceDir, - WhiteoutSpec: "oci", - Compressor: req.Compressor, - ChunkSize: req.ChunkSize, - FsVersion: req.FsVersion, - }); err != nil { - return PackResult{}, errors.Wrapf(err, "failed to build image from directory %s", req.SourceDir) - } - newBlobHash, err := p.getNewBlobsHash(append(parentBlobs, chunkDictBlobs...)) - if err != nil { - return PackResult{}, errors.Wrap(err, "failed to get hash value of Nydus blob") - } - if newBlobHash == "" { - blobPath = "" - } else { - if req.Parent != "" || req.PushToRemote { - p.logger.Infof("rename blob file into sha256 csum") - newBlobName := p.blobFilePath(newBlobHash, true) - if err = os.Rename(blobPath, newBlobName); err != nil { - return PackResult{}, errors.Wrap(err, "failed to rename blob file") - } - blobPath = newBlobName - } - } - if !req.PushToRemote { - // if we don't need to push meta and blob to remote, just return the local build artifact - return PackResult{ - Meta: bootstrapPath, - Blob: blobPath, - }, nil - } - - // if pusher is empty, that means backend config is not provided - if p.pusher == nil { - return PackResult{}, errors.New("can not push image to remote due to lack of backend configuration") - } - pushResult, err := p.pusher.Push(PushRequest{ - Meta: req.ImageName, - Blob: newBlobHash, - ParentBlobs: parentBlobs, - }) - if err != nil { - return PackResult{}, errors.Wrap(err, "failed to push pack result to remote") - } - return PackResult{ - Meta: pushResult.RemoteMeta, - Blob: pushResult.RemoteBlob, - }, nil -} - -// ensureNydusImagePath ensure nydus-image binary exists, the Precedence for nydus-image is as follows -// 1. if nydusImagePath is specified try nydusImagePath first -// 2. if nydusImagePath not exists, try to find nydus-image from $PATH -// 3. return ErrNydusImageBinaryNotFound -func (p *Packer) ensureNydusImagePath() error { - // if NydusImagePath is not empty, check if binary exists - if strings.TrimSpace(p.nydusImagePath) != "" { - // if we found nydus Image Path from - if _, err := os.Stat(p.nydusImagePath); err == nil { - p.logger.Infof("found 'nydus-image' binary at %s", p.nydusImagePath) - return nil - } - // if NydusImagePath not exists, check if nydus-image can be found in PATH - if nydusBinaryPath, err := exec.LookPath(nydusBinaryName); err == nil { - p.logger.Infof("found 'nydus-image' binary at %s", nydusBinaryPath) - p.nydusImagePath = nydusBinaryPath - return nil - } - } - return ErrNydusImageBinaryNotFound -} - -func initLogger(logLevel logrus.Level) (*logrus.Logger, error) { - logger := logrus.New() - logger.SetLevel(logLevel) - logger.SetFormatter(&logrus.TextFormatter{ - FullTimestamp: true, - }) - return logger, nil -} +package packer + +import ( + "context" + "encoding/json" + "os" + "os/exec" + "path/filepath" + "strings" + + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/build" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/checker/tool" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/compactor" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +const ( + nydusBinaryName = "nydus-image" + defaultOutputDir = "./.nydus-build-output" +) + +var ( + ErrNydusImageBinaryNotFound = errors.New("failed to find nydus-image binary") + ErrInvalidChunkDictArgs = errors.New("invalid chunk-dict args") + ErrNoSupport = errors.New("invalid chunk-dict type") +) + +type Opt struct { + LogLevel logrus.Level + NydusImagePath string + OutputDir string + BackendConfig BackendConfig +} + +type Builder interface { + Run(option build.BuilderOption) error +} + +type Packer struct { + logger *logrus.Logger + nydusImagePath string + BackendConfig BackendConfig + pusher *Pusher + builder Builder + Artifact +} + +type BlobManifest struct { + Blobs []string `json:"blobs,omitempty"` +} + +type PackRequest struct { + SourceDir string + ImageName string + FsVersion string + Compressor string + ChunkSize string + PushToRemote bool + + ChunkDict string + Parent string + TryCompact bool + CompactConfigPath string +} + +type PackResult struct { + Meta string + Blob string +} + +func New(opt Opt) (*Packer, error) { + logger, err := initLogger(opt.LogLevel) + if err != nil { + return nil, errors.Wrap(err, "failed to init logger") + } + artifact, err := NewArtifact(opt.OutputDir) + if err != nil { + return nil, errors.Wrap(err, "failed to init artifact") + } + p := &Packer{ + Artifact: artifact, + BackendConfig: opt.BackendConfig, + logger: logger, + nydusImagePath: opt.NydusImagePath, + } + if err = p.ensureNydusImagePath(); err != nil { + return nil, err + } + p.builder = build.NewBuilder(p.nydusImagePath) + if p.BackendConfig != nil { + p.pusher, err = NewPusher(NewPusherOpt{ + Artifact: artifact, + BackendConfig: opt.BackendConfig, + Logger: p.logger, + }) + if err != nil { + return nil, err + } + } + return p, nil +} + +// get blobs from bootstrap +func (p *Packer) getBlobsFromBootstrap(bootstrap string) ([]string, error) { + var blobs []string + if bootstrap != "" { + inspector := tool.NewInspector(p.nydusImagePath) + item, err := inspector.Inspect(tool.InspectOption{ + Operation: tool.GetBlobs, + Bootstrap: bootstrap, + }) + if err != nil { + return []string{}, err + } + blobsInfo, _ := item.(tool.BlobInfoList) + p.logger.Infof("get blob list from bootstrap '%s': %v", bootstrap, blobsInfo) + for _, blobInfo := range blobsInfo { + blobs = append(blobs, blobInfo.BlobID) + } + } + return blobs, nil +} + +func (p *Packer) getChunkDictBlobs(chunkDict string) ([]string, error) { + if chunkDict == "" { + return []string{}, nil + } + // get chunk-dict file + info := strings.Split(chunkDict, "=") + if len(info) != 2 { + return []string{}, ErrInvalidChunkDictArgs + } + switch info[0] { + case "bootstrap": + return p.getBlobsFromBootstrap(info[1]) + default: + return []string{}, ErrNoSupport + } +} + +// getBlobHash will get blobs hash from output.json, the hash will be +// used oss key as blob +// ignore blobs already exist +func (p *Packer) getNewBlobsHash(exists []string) (string, error) { + // build tmp lookup map + m := make(map[string]bool) + for _, blob := range exists { + m[blob] = true + } + content, err := os.ReadFile(p.outputJSONPath()) + if err != nil { + return "", err + } + var manifest BlobManifest + if err = json.Unmarshal(content, &manifest); err != nil { + return "", err + } + for _, blob := range manifest.Blobs { + if _, ok := m[blob]; !ok { + return blob, nil + } + } + // return the latest blob hash + return "", nil +} + +func (p *Packer) dumpBlobBackendConfig(filePath string) (func(), error) { + file, err := os.OpenFile(filePath, os.O_TRUNC|os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return nil, err + } + defer file.Close() + n, err := file.Write(p.BackendConfig.rawBlobBackendCfg()) + if err != nil { + return nil, err + } + return func() { + zeros := make([]byte, n) + file, err = os.OpenFile(filePath, os.O_WRONLY, 0644) + if err != nil { + logrus.Errorf("failed to open config file %s, err = %v", filePath, err) + return + } + file.Write(zeros) + file.Close() + os.Remove(filePath) + }, nil +} + +func (p *Packer) tryCompactParent(req *PackRequest) error { + if !req.TryCompact || req.Parent == "" { + return nil + } + if p.BackendConfig == nil { + return errors.Errorf("backend configuration is needed to compact parent bootstrap") + } + + // dumps backend config file + backendConfigPath := filepath.Join(p.OutputDir, "backend-config.json") + destroy, err := p.dumpBlobBackendConfig(backendConfigPath) + if err != nil { + return errors.Wrap(err, "failed to dump backend config file") + } + // destroy backend config file, because there are secrets + defer destroy() + c, err := compactor.NewCompactor(p.nydusImagePath, p.OutputDir, req.CompactConfigPath) + if err != nil { + return errors.Wrap(err, "failed to new compactor") + } + outputBootstrap, err := c.Compact(req.Parent, req.ChunkDict, p.BackendConfig.backendType(), backendConfigPath) + if err != nil { + return errors.Wrap(err, "failed to compact parent") + } + // check output bootstrap + _, err = os.Stat(outputBootstrap) + if err != nil && !os.IsNotExist(err) { + return errors.Wrapf(err, "failed to stat target bootstrap") + } + if err == nil { + // parent --> output bootstrap + p.logger.Infof("successfully compacted bootstrap %s, use new parent %s", req.Parent, outputBootstrap) + req.Parent = outputBootstrap + } + + return nil +} + +func (p *Packer) Pack(_ context.Context, req PackRequest) (PackResult, error) { + p.logger.Infof("start to build image from source directory %q", req.SourceDir) + if err := p.tryCompactParent(&req); err != nil { + return PackResult{}, err + } + parentBlobs, err := p.getBlobsFromBootstrap(req.Parent) + if err != nil { + return PackResult{}, errors.Wrap(err, "failed to get blobs from parent bootstrap") + } + chunkDictBlobs, err := p.getChunkDictBlobs(req.ChunkDict) + if err != nil { + return PackResult{}, errors.Wrap(err, "failed to get blobs from chunk-dict") + } + blobPath := p.blobFilePath(req.ImageName, false) + bootstrapPath := p.bootstrapPath(req.ImageName) + if err = p.builder.Run(build.BuilderOption{ + ParentBootstrapPath: req.Parent, + ChunkDict: req.ChunkDict, + BootstrapPath: bootstrapPath, + BlobPath: blobPath, + OutputJSONPath: p.outputJSONPath(), + RootfsPath: req.SourceDir, + WhiteoutSpec: "oci", + Compressor: req.Compressor, + ChunkSize: req.ChunkSize, + FsVersion: req.FsVersion, + }); err != nil { + return PackResult{}, errors.Wrapf(err, "failed to build image from directory %s", req.SourceDir) + } + newBlobHash, err := p.getNewBlobsHash(append(parentBlobs, chunkDictBlobs...)) + if err != nil { + return PackResult{}, errors.Wrap(err, "failed to get hash value of Nydus blob") + } + if newBlobHash == "" { + blobPath = "" + } else { + if req.Parent != "" || req.PushToRemote { + p.logger.Infof("rename blob file into sha256 csum") + newBlobName := p.blobFilePath(newBlobHash, true) + if err = os.Rename(blobPath, newBlobName); err != nil { + return PackResult{}, errors.Wrap(err, "failed to rename blob file") + } + blobPath = newBlobName + } + } + if !req.PushToRemote { + // if we don't need to push meta and blob to remote, just return the local build artifact + return PackResult{ + Meta: bootstrapPath, + Blob: blobPath, + }, nil + } + + // if pusher is empty, that means backend config is not provided + if p.pusher == nil { + return PackResult{}, errors.New("can not push image to remote due to lack of backend configuration") + } + pushResult, err := p.pusher.Push(PushRequest{ + Meta: req.ImageName, + Blob: newBlobHash, + ParentBlobs: parentBlobs, + }) + if err != nil { + return PackResult{}, errors.Wrap(err, "failed to push pack result to remote") + } + return PackResult{ + Meta: pushResult.RemoteMeta, + Blob: pushResult.RemoteBlob, + }, nil +} + +// ensureNydusImagePath ensure nydus-image binary exists, the Precedence for nydus-image is as follows +// 1. if nydusImagePath is specified try nydusImagePath first +// 2. if nydusImagePath not exists, try to find nydus-image from $PATH +// 3. return ErrNydusImageBinaryNotFound +func (p *Packer) ensureNydusImagePath() error { + // if NydusImagePath is not empty, check if binary exists + if strings.TrimSpace(p.nydusImagePath) != "" { + // if we found nydus Image Path from + if _, err := os.Stat(p.nydusImagePath); err == nil { + p.logger.Infof("found 'nydus-image' binary at %s", p.nydusImagePath) + return nil + } + // if NydusImagePath not exists, check if nydus-image can be found in PATH + if nydusBinaryPath, err := exec.LookPath(nydusBinaryName); err == nil { + p.logger.Infof("found 'nydus-image' binary at %s", nydusBinaryPath) + p.nydusImagePath = nydusBinaryPath + return nil + } + } + return ErrNydusImageBinaryNotFound +} + +func initLogger(logLevel logrus.Level) (*logrus.Logger, error) { + logger := logrus.New() + logger.SetLevel(logLevel) + logger.SetFormatter(&logrus.TextFormatter{ + FullTimestamp: true, + }) + return logger, nil +} diff --git a/contrib/nydusify/pkg/packer/packer_test.go b/contrib/nydusify/pkg/packer/packer_test.go index c52defb513f..0493241d4ac 100644 --- a/contrib/nydusify/pkg/packer/packer_test.go +++ b/contrib/nydusify/pkg/packer/packer_test.go @@ -1,231 +1,231 @@ -// Copyright 2023 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package packer - -import ( - "context" - "errors" - "io" - "os" - "os/exec" - "path/filepath" - "testing" - - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/build" - ocispec "github.com/opencontainers/image-spec/specs-go/v1" - "github.com/sirupsen/logrus" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/require" -) - -type mockBuilder struct { - mock.Mock -} - -func (m *mockBuilder) Run(option build.BuilderOption) error { - args := m.Called(option) - return args.Error(0) -} - -func TestNew(t *testing.T) { - tmpDir, tearDown := setUpTmpDir(t) - defer tearDown() - - _, err := New(Opt{ - LogLevel: logrus.InfoLevel, - OutputDir: tmpDir, - NydusImagePath: filepath.Join(tmpDir, "nydus-image"), - }) - require.NoError(t, err) - - _, err = New(Opt{ - LogLevel: logrus.InfoLevel, - OutputDir: tmpDir, - }) - require.Error(t, err) - require.Contains(t, err.Error(), "failed to find nydus-image binary") - - _, err = New(Opt{ - LogLevel: logrus.InfoLevel, - OutputDir: "nil", - NydusImagePath: "nil/nydus-image", - }) - defer os.RemoveAll("nil") - if _, find := exec.LookPath("nydus-image"); find == nil { - require.NoError(t, err) - } else { - require.Error(t, err) - require.Contains(t, err.Error(), "failed to find nydus-image binary") - } - - _, err = New(Opt{ - LogLevel: logrus.InfoLevel, - OutputDir: tmpDir, - NydusImagePath: filepath.Join(tmpDir, "nydus-image"), - BackendConfig: &S3BackendConfig{ - Endpoint: "s3.amazonaws.com", - Scheme: "https", - AccessKeyID: "testAK", - AccessKeySecret: "testSK", - Region: "region1", - BucketName: "test", - MetaPrefix: "meta", - BlobPrefix: "blob", - }, - }) - require.NoError(t, err) -} - -func TestDumpBlobBackendConfig(t *testing.T) { - os.MkdirAll(t.Name(), 0755) - defer os.RemoveAll(t.Name()) - file, _ := os.Create(filepath.Join(t.Name(), "nydus-image")) - file.Write([]byte("for test")) - file.Close() - - p, err := New(Opt{ - OutputDir: t.Name(), - NydusImagePath: filepath.Join(t.Name(), "nydus-image"), - BackendConfig: &S3BackendConfig{ - Endpoint: "s3.amazonaws.com", - Scheme: "https", - AccessKeyID: "testAK", - AccessKeySecret: "testSK", - Region: "region1", - BucketName: "test", - MetaPrefix: "meta", - BlobPrefix: "blob", - }, - }) - require.NoError(t, err) - - _, err = p.dumpBlobBackendConfig(filepath.Join(t.Name(), "test.json")) - require.NoError(t, err) - data, err := os.ReadFile(filepath.Join(t.Name(), "test.json")) - require.NoError(t, err) - require.Equal(t, p.BackendConfig.rawBlobBackendCfg(), data) -} - -func copyFile(src, dst string) { - f1, err := os.Open(src) - if err != nil { - return - } - defer f1.Close() - f2, err := os.OpenFile(dst, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) - if err != nil { - return - } - defer f2.Close() - io.Copy(f2, f1) -} - -func TestPack(t *testing.T) { - tmpDir, tearDown := setUpTmpDir(t) - defer tearDown() - p, err := New(Opt{ - LogLevel: logrus.InfoLevel, - OutputDir: tmpDir, - NydusImagePath: filepath.Join(tmpDir, "nydus-image"), - }) - copyFile("testdata/output.json", filepath.Join(tmpDir, "output.json")) - require.NoError(t, err) - - builder := &mockBuilder{} - p.builder = builder - builder.On("Run", mock.Anything).Return(nil) - res, err := p.Pack(context.Background(), PackRequest{ - SourceDir: tmpDir, - ImageName: "test.meta", - PushToRemote: false, - }) - require.NoError(t, err) - require.Equal(t, PackResult{ - Meta: "testdata/TestPack/test.meta", - Blob: "testdata/TestPack/test.blob", - }, res) - - errBuilder := &mockBuilder{} - p.builder = errBuilder - errBuilder.On("Run", mock.Anything).Return(errors.New("test")) - res, err = p.Pack(context.Background(), PackRequest{ - SourceDir: tmpDir, - ImageName: "test.meta", - PushToRemote: false, - }) - require.Error(t, err) - require.Empty(t, res) - - os.Create(filepath.Join(tmpDir, "test.meta")) - os.Create(filepath.Join(tmpDir, "test.blob")) - - p.builder = builder - _, err = p.Pack(context.Background(), PackRequest{ - SourceDir: tmpDir, - ImageName: "test.meta", - PushToRemote: true, - }) - require.Error(t, err) - require.Contains(t, err.Error(), "can not push image to remote due to lack of backend configuration") - - os.Create(filepath.Join(tmpDir, "test.meta")) - os.Create(filepath.Join(tmpDir, "test.blob")) - artifact, err := NewArtifact(tmpDir) - require.NoError(t, err) - mp := &mockBackend{} - p.pusher = &Pusher{ - Artifact: artifact, - cfg: &OssBackendConfig{ - BucketName: "testbucket", - BlobPrefix: "testblobprefix", - MetaPrefix: "testmetaprefix", - }, - logger: logrus.New(), - metaBackend: mp, - blobBackend: mp, - } - hash := "3093776c78a21e47f0a8b4c80a1f019b1e838fc1ade274209332af1ca5f57090" - - mp.On("Upload", mock.Anything, "test.meta", mock.Anything, mock.Anything, mock.Anything).Return(&ocispec.Descriptor{ - URLs: []string{"oss://testbucket/testmetaprefix/test.meta"}, - }, nil) - mp.On("Upload", mock.Anything, hash, mock.Anything, mock.Anything, mock.Anything).Return(&ocispec.Descriptor{ - URLs: []string{"oss://testbucket/testblobprefix/3093776c78a21e47f0a8b4c80a1f019b1e838fc1ade274209332af1ca5f57090"}, - }, nil) - - res, err = p.Pack(context.Background(), PackRequest{ - SourceDir: tmpDir, - ImageName: "test.meta", - PushToRemote: true, - }) - require.NoError(t, err) - require.Equal(t, PackResult{ - Meta: "oss://testbucket/testmetaprefix/test.meta", - Blob: "oss://testbucket/testblobprefix/3093776c78a21e47f0a8b4c80a1f019b1e838fc1ade274209332af1ca5f57090", - }, res) -} - -func TestPusher_getBlobHash(t *testing.T) { - artifact, err := NewArtifact("testdata") - require.NoError(t, err) - pusher := Packer{ - Artifact: artifact, - logger: logrus.New(), - } - hash, err := pusher.getNewBlobsHash(nil) - require.NoError(t, err) - require.Equal(t, "3093776c78a21e47f0a8b4c80a1f019b1e838fc1ade274209332af1ca5f57090", hash) -} - -func setUpTmpDir(t *testing.T) (string, func()) { - tmpDir := filepath.Join("testdata", t.Name()) - os.MkdirAll(tmpDir, 0755) - file, _ := os.Create(filepath.Join(tmpDir, "nydus-image")) - file.Write([]byte("for test")) - file.Close() - return tmpDir, func() { - os.RemoveAll(tmpDir) - } -} +// Copyright 2023 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package packer + +import ( + "context" + "errors" + "io" + "os" + "os/exec" + "path/filepath" + "testing" + + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/build" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" +) + +type mockBuilder struct { + mock.Mock +} + +func (m *mockBuilder) Run(option build.BuilderOption) error { + args := m.Called(option) + return args.Error(0) +} + +func TestNew(t *testing.T) { + tmpDir, tearDown := setUpTmpDir(t) + defer tearDown() + + _, err := New(Opt{ + LogLevel: logrus.InfoLevel, + OutputDir: tmpDir, + NydusImagePath: filepath.Join(tmpDir, "nydus-image"), + }) + require.NoError(t, err) + + _, err = New(Opt{ + LogLevel: logrus.InfoLevel, + OutputDir: tmpDir, + }) + require.Error(t, err) + require.Contains(t, err.Error(), "failed to find nydus-image binary") + + _, err = New(Opt{ + LogLevel: logrus.InfoLevel, + OutputDir: "nil", + NydusImagePath: "nil/nydus-image", + }) + defer os.RemoveAll("nil") + if _, find := exec.LookPath("nydus-image"); find == nil { + require.NoError(t, err) + } else { + require.Error(t, err) + require.Contains(t, err.Error(), "failed to find nydus-image binary") + } + + _, err = New(Opt{ + LogLevel: logrus.InfoLevel, + OutputDir: tmpDir, + NydusImagePath: filepath.Join(tmpDir, "nydus-image"), + BackendConfig: &S3BackendConfig{ + Endpoint: "s3.amazonaws.com", + Scheme: "https", + AccessKeyID: "testAK", + AccessKeySecret: "testSK", + Region: "region1", + BucketName: "test", + MetaPrefix: "meta", + BlobPrefix: "blob", + }, + }) + require.NoError(t, err) +} + +func TestDumpBlobBackendConfig(t *testing.T) { + os.MkdirAll(t.Name(), 0755) + defer os.RemoveAll(t.Name()) + file, _ := os.Create(filepath.Join(t.Name(), "nydus-image")) + file.Write([]byte("for test")) + file.Close() + + p, err := New(Opt{ + OutputDir: t.Name(), + NydusImagePath: filepath.Join(t.Name(), "nydus-image"), + BackendConfig: &S3BackendConfig{ + Endpoint: "s3.amazonaws.com", + Scheme: "https", + AccessKeyID: "testAK", + AccessKeySecret: "testSK", + Region: "region1", + BucketName: "test", + MetaPrefix: "meta", + BlobPrefix: "blob", + }, + }) + require.NoError(t, err) + + _, err = p.dumpBlobBackendConfig(filepath.Join(t.Name(), "test.json")) + require.NoError(t, err) + data, err := os.ReadFile(filepath.Join(t.Name(), "test.json")) + require.NoError(t, err) + require.Equal(t, p.BackendConfig.rawBlobBackendCfg(), data) +} + +func copyFile(src, dst string) { + f1, err := os.Open(src) + if err != nil { + return + } + defer f1.Close() + f2, err := os.OpenFile(dst, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) + if err != nil { + return + } + defer f2.Close() + io.Copy(f2, f1) +} + +func TestPack(t *testing.T) { + tmpDir, tearDown := setUpTmpDir(t) + defer tearDown() + p, err := New(Opt{ + LogLevel: logrus.InfoLevel, + OutputDir: tmpDir, + NydusImagePath: filepath.Join(tmpDir, "nydus-image"), + }) + copyFile("testdata/output.json", filepath.Join(tmpDir, "output.json")) + require.NoError(t, err) + + builder := &mockBuilder{} + p.builder = builder + builder.On("Run", mock.Anything).Return(nil) + res, err := p.Pack(context.Background(), PackRequest{ + SourceDir: tmpDir, + ImageName: "test.meta", + PushToRemote: false, + }) + require.NoError(t, err) + require.Equal(t, PackResult{ + Meta: "testdata/TestPack/test.meta", + Blob: "testdata/TestPack/test.blob", + }, res) + + errBuilder := &mockBuilder{} + p.builder = errBuilder + errBuilder.On("Run", mock.Anything).Return(errors.New("test")) + res, err = p.Pack(context.Background(), PackRequest{ + SourceDir: tmpDir, + ImageName: "test.meta", + PushToRemote: false, + }) + require.Error(t, err) + require.Empty(t, res) + + os.Create(filepath.Join(tmpDir, "test.meta")) + os.Create(filepath.Join(tmpDir, "test.blob")) + + p.builder = builder + _, err = p.Pack(context.Background(), PackRequest{ + SourceDir: tmpDir, + ImageName: "test.meta", + PushToRemote: true, + }) + require.Error(t, err) + require.Contains(t, err.Error(), "can not push image to remote due to lack of backend configuration") + + os.Create(filepath.Join(tmpDir, "test.meta")) + os.Create(filepath.Join(tmpDir, "test.blob")) + artifact, err := NewArtifact(tmpDir) + require.NoError(t, err) + mp := &mockBackend{} + p.pusher = &Pusher{ + Artifact: artifact, + cfg: &OssBackendConfig{ + BucketName: "testbucket", + BlobPrefix: "testblobprefix", + MetaPrefix: "testmetaprefix", + }, + logger: logrus.New(), + metaBackend: mp, + blobBackend: mp, + } + hash := "3093776c78a21e47f0a8b4c80a1f019b1e838fc1ade274209332af1ca5f57090" + + mp.On("Upload", mock.Anything, "test.meta", mock.Anything, mock.Anything, mock.Anything).Return(&ocispec.Descriptor{ + URLs: []string{"oss://testbucket/testmetaprefix/test.meta"}, + }, nil) + mp.On("Upload", mock.Anything, hash, mock.Anything, mock.Anything, mock.Anything).Return(&ocispec.Descriptor{ + URLs: []string{"oss://testbucket/testblobprefix/3093776c78a21e47f0a8b4c80a1f019b1e838fc1ade274209332af1ca5f57090"}, + }, nil) + + res, err = p.Pack(context.Background(), PackRequest{ + SourceDir: tmpDir, + ImageName: "test.meta", + PushToRemote: true, + }) + require.NoError(t, err) + require.Equal(t, PackResult{ + Meta: "oss://testbucket/testmetaprefix/test.meta", + Blob: "oss://testbucket/testblobprefix/3093776c78a21e47f0a8b4c80a1f019b1e838fc1ade274209332af1ca5f57090", + }, res) +} + +func TestPusher_getBlobHash(t *testing.T) { + artifact, err := NewArtifact("testdata") + require.NoError(t, err) + pusher := Packer{ + Artifact: artifact, + logger: logrus.New(), + } + hash, err := pusher.getNewBlobsHash(nil) + require.NoError(t, err) + require.Equal(t, "3093776c78a21e47f0a8b4c80a1f019b1e838fc1ade274209332af1ca5f57090", hash) +} + +func setUpTmpDir(t *testing.T) (string, func()) { + tmpDir := filepath.Join("testdata", t.Name()) + os.MkdirAll(tmpDir, 0755) + file, _ := os.Create(filepath.Join(tmpDir, "nydus-image")) + file.Write([]byte("for test")) + file.Close() + return tmpDir, func() { + os.RemoveAll(tmpDir) + } +} diff --git a/contrib/nydusify/pkg/packer/pusher.go b/contrib/nydusify/pkg/packer/pusher.go index 7f56dfa9abf..c70a0f74b37 100644 --- a/contrib/nydusify/pkg/packer/pusher.go +++ b/contrib/nydusify/pkg/packer/pusher.go @@ -1,168 +1,168 @@ -package packer - -import ( - "context" - "encoding/json" - "fmt" - "os" - "strings" - - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/backend" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" -) - -type Pusher struct { - Artifact - cfg BackendConfig - blobBackend backend.Backend - metaBackend backend.Backend - logger *logrus.Logger -} - -type PushRequest struct { - Meta string - Blob string - - ParentBlobs []string -} - -type PushResult struct { - RemoteMeta string - RemoteBlob string -} - -type NewPusherOpt struct { - Artifact - BackendConfig BackendConfig - Logger *logrus.Logger -} - -func NewPusher(opt NewPusherOpt) (*Pusher, error) { - if utils.IsEmptyString(opt.OutputDir) { - return nil, errors.New("outputDir is required") - } - if !utils.IsPathExists(opt.OutputDir) { - return nil, errors.Errorf("outputDir %q does not exists", opt.OutputDir) - } - backendConfig := opt.BackendConfig - - metaBackend, err := backend.NewBackend(backendConfig.backendType(), backendConfig.rawMetaBackendCfg(), nil) - if err != nil { - return nil, errors.Wrapf(err, "failed to init backend for bootstrap blob") - } - blobBackend, err := backend.NewBackend(backendConfig.backendType(), backendConfig.rawBlobBackendCfg(), nil) - if err != nil { - return nil, errors.Wrapf(err, "failed to init backend for data blob") - } - - return &Pusher{ - Artifact: opt.Artifact, - logger: opt.Logger, - metaBackend: metaBackend, - blobBackend: blobBackend, - cfg: opt.BackendConfig, - }, nil -} - -// Push will push the meta and blob file to remote backend -// at this moment, only oss and s3 are the possible backends, the meta file name is user defined -// and blob file name is the hash of the blobfile that is extracted from output.json -func (p *Pusher) Push(req PushRequest) (pushResult PushResult, retErr error) { - p.logger.Info("start to push meta and blob to remote backend") - // todo: add a suitable timeout - ctx := context.Background() - // todo: use blob desc to build manifest - - defer func() { - if retErr != nil { - if err := p.blobBackend.Finalize(true); err != nil { - logrus.WithError(err).Warnf("Cancel blob backend upload") - } - if err := p.metaBackend.Finalize(true); err != nil { - logrus.WithError(err).Warnf("Cancel meta backend upload") - } - } - }() - - for _, blob := range req.ParentBlobs { - // try push parent blobs - if _, err := p.blobBackend.Upload(ctx, blob, p.blobFilePath(blob, true), 0, false); err != nil { - return PushResult{}, errors.Wrap(err, "failed to put blobfile to remote") - } - } - - p.logger.Infof("push blob %s", req.Blob) - if req.Blob != "" { - desc, err := p.blobBackend.Upload(ctx, req.Blob, p.blobFilePath(req.Blob, true), 0, false) - if err != nil { - return PushResult{}, errors.Wrap(err, "failed to put blobfile to remote") - } - if len(desc.URLs) > 0 { - pushResult.RemoteBlob = desc.URLs[0] - } - } - if retErr = p.blobBackend.Finalize(false); retErr != nil { - return PushResult{}, errors.Wrap(retErr, "Finalize blob backend upload") - } - - desc, retErr := p.metaBackend.Upload(ctx, req.Meta, p.bootstrapPath(req.Meta), 0, true) - if retErr != nil { - return PushResult{}, errors.Wrapf(retErr, "failed to put metafile to remote") - } - if len(desc.URLs) != 0 { - pushResult.RemoteMeta = desc.URLs[0] - } - if retErr = p.metaBackend.Finalize(false); retErr != nil { - return PushResult{}, errors.Wrap(retErr, "Finalize meta backend upload") - } - - return -} - -func ParseBackendConfig(backendType, backendConfigFile string) (BackendConfig, error) { - - cfgFile, err := os.Open(backendConfigFile) - if err != nil { - return nil, errors.Wrapf(err, "failed to open backend-config %s", backendConfigFile) - } - defer cfgFile.Close() - switch strings.ToLower(backendType) { - case "oss": - var cfg OssBackendConfig - if err = json.NewDecoder(cfgFile).Decode(&cfg); err != nil { - return nil, errors.Wrapf(err, "failed to decode backend-config %s", backendConfigFile) - } - return &cfg, nil - case "s3": - var cfg S3BackendConfig - if err = json.NewDecoder(cfgFile).Decode(&cfg); err != nil { - return nil, errors.Wrapf(err, "failed to decode backend-config %s", backendConfigFile) - } - return &cfg, nil - default: - return nil, fmt.Errorf("unsupported backend type %s", backendType) - } -} - -func ParseBackendConfigString(backendType, backendConfigContent string) (BackendConfig, error) { - switch strings.ToLower(backendType) { - case "oss": - var cfg OssBackendConfig - if err := json.Unmarshal([]byte(backendConfigContent), &cfg); err != nil { - return nil, errors.Wrapf(err, "failed to decode backend-config %s", backendConfigContent) - } - return &cfg, nil - - case "s3": - var cfg S3BackendConfig - if err := json.Unmarshal([]byte(backendConfigContent), &cfg); err != nil { - return nil, errors.Wrapf(err, "failed to decode backend-config %s", backendConfigContent) - } - return &cfg, nil - default: - return nil, fmt.Errorf("unsupported backend type %s", backendType) - } -} +package packer + +import ( + "context" + "encoding/json" + "fmt" + "os" + "strings" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/backend" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" +) + +type Pusher struct { + Artifact + cfg BackendConfig + blobBackend backend.Backend + metaBackend backend.Backend + logger *logrus.Logger +} + +type PushRequest struct { + Meta string + Blob string + + ParentBlobs []string +} + +type PushResult struct { + RemoteMeta string + RemoteBlob string +} + +type NewPusherOpt struct { + Artifact + BackendConfig BackendConfig + Logger *logrus.Logger +} + +func NewPusher(opt NewPusherOpt) (*Pusher, error) { + if utils.IsEmptyString(opt.OutputDir) { + return nil, errors.New("outputDir is required") + } + if !utils.IsPathExists(opt.OutputDir) { + return nil, errors.Errorf("outputDir %q does not exists", opt.OutputDir) + } + backendConfig := opt.BackendConfig + + metaBackend, err := backend.NewBackend(backendConfig.backendType(), backendConfig.rawMetaBackendCfg(), nil) + if err != nil { + return nil, errors.Wrapf(err, "failed to init backend for bootstrap blob") + } + blobBackend, err := backend.NewBackend(backendConfig.backendType(), backendConfig.rawBlobBackendCfg(), nil) + if err != nil { + return nil, errors.Wrapf(err, "failed to init backend for data blob") + } + + return &Pusher{ + Artifact: opt.Artifact, + logger: opt.Logger, + metaBackend: metaBackend, + blobBackend: blobBackend, + cfg: opt.BackendConfig, + }, nil +} + +// Push will push the meta and blob file to remote backend +// at this moment, only oss and s3 are the possible backends, the meta file name is user defined +// and blob file name is the hash of the blobfile that is extracted from output.json +func (p *Pusher) Push(req PushRequest) (pushResult PushResult, retErr error) { + p.logger.Info("start to push meta and blob to remote backend") + // todo: add a suitable timeout + ctx := context.Background() + // todo: use blob desc to build manifest + + defer func() { + if retErr != nil { + if err := p.blobBackend.Finalize(true); err != nil { + logrus.WithError(err).Warnf("Cancel blob backend upload") + } + if err := p.metaBackend.Finalize(true); err != nil { + logrus.WithError(err).Warnf("Cancel meta backend upload") + } + } + }() + + for _, blob := range req.ParentBlobs { + // try push parent blobs + if _, err := p.blobBackend.Upload(ctx, blob, p.blobFilePath(blob, true), 0, false); err != nil { + return PushResult{}, errors.Wrap(err, "failed to put blobfile to remote") + } + } + + p.logger.Infof("push blob %s", req.Blob) + if req.Blob != "" { + desc, err := p.blobBackend.Upload(ctx, req.Blob, p.blobFilePath(req.Blob, true), 0, false) + if err != nil { + return PushResult{}, errors.Wrap(err, "failed to put blobfile to remote") + } + if len(desc.URLs) > 0 { + pushResult.RemoteBlob = desc.URLs[0] + } + } + if retErr = p.blobBackend.Finalize(false); retErr != nil { + return PushResult{}, errors.Wrap(retErr, "Finalize blob backend upload") + } + + desc, retErr := p.metaBackend.Upload(ctx, req.Meta, p.bootstrapPath(req.Meta), 0, true) + if retErr != nil { + return PushResult{}, errors.Wrapf(retErr, "failed to put metafile to remote") + } + if len(desc.URLs) != 0 { + pushResult.RemoteMeta = desc.URLs[0] + } + if retErr = p.metaBackend.Finalize(false); retErr != nil { + return PushResult{}, errors.Wrap(retErr, "Finalize meta backend upload") + } + + return +} + +func ParseBackendConfig(backendType, backendConfigFile string) (BackendConfig, error) { + + cfgFile, err := os.Open(backendConfigFile) + if err != nil { + return nil, errors.Wrapf(err, "failed to open backend-config %s", backendConfigFile) + } + defer cfgFile.Close() + switch strings.ToLower(backendType) { + case "oss": + var cfg OssBackendConfig + if err = json.NewDecoder(cfgFile).Decode(&cfg); err != nil { + return nil, errors.Wrapf(err, "failed to decode backend-config %s", backendConfigFile) + } + return &cfg, nil + case "s3": + var cfg S3BackendConfig + if err = json.NewDecoder(cfgFile).Decode(&cfg); err != nil { + return nil, errors.Wrapf(err, "failed to decode backend-config %s", backendConfigFile) + } + return &cfg, nil + default: + return nil, fmt.Errorf("unsupported backend type %s", backendType) + } +} + +func ParseBackendConfigString(backendType, backendConfigContent string) (BackendConfig, error) { + switch strings.ToLower(backendType) { + case "oss": + var cfg OssBackendConfig + if err := json.Unmarshal([]byte(backendConfigContent), &cfg); err != nil { + return nil, errors.Wrapf(err, "failed to decode backend-config %s", backendConfigContent) + } + return &cfg, nil + + case "s3": + var cfg S3BackendConfig + if err := json.Unmarshal([]byte(backendConfigContent), &cfg); err != nil { + return nil, errors.Wrapf(err, "failed to decode backend-config %s", backendConfigContent) + } + return &cfg, nil + default: + return nil, fmt.Errorf("unsupported backend type %s", backendType) + } +} diff --git a/contrib/nydusify/pkg/packer/pusher_test.go b/contrib/nydusify/pkg/packer/pusher_test.go index 6bf6eb6ca7b..1468bc3d3f2 100644 --- a/contrib/nydusify/pkg/packer/pusher_test.go +++ b/contrib/nydusify/pkg/packer/pusher_test.go @@ -1,206 +1,206 @@ -// Copyright 2023 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package packer - -import ( - "context" - "io" - "os" - "path/filepath" - "testing" - - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/backend" - ocispec "github.com/opencontainers/image-spec/specs-go/v1" - "github.com/sirupsen/logrus" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/require" -) - -type mockBackend struct { - mock.Mock -} - -func (m *mockBackend) Upload(ctx context.Context, blobID, blobPath string, blobSize int64, forcePush bool) (*ocispec.Descriptor, error) { - args := m.Called(ctx, blobID, blobPath, blobSize, forcePush) - desc := args.Get(0) - return desc.(*ocispec.Descriptor), nil -} - -func (m *mockBackend) Finalize(_ bool) error { - return nil -} - -func (m *mockBackend) Check(_ string) (bool, error) { - return false, nil -} - -func (m *mockBackend) Type() backend.Type { - return backend.OssBackend -} - -func (m *mockBackend) Reader(_ string) (io.ReadCloser, error) { - panic("not implemented") -} - -func (m *mockBackend) Size(_ string) (int64, error) { - panic("not implemented") -} - -func Test_parseBackendConfig(t *testing.T) { - cfg, err := ParseBackendConfig("oss", filepath.Join("testdata", "backend-config.json")) - require.NoError(t, err) - require.Equal(t, &OssBackendConfig{ - Endpoint: "mock.aliyuncs.com", - AccessKeyID: "testid", - AccessKeySecret: "testkey", - BucketName: "testbucket", - MetaPrefix: "test/", - BlobPrefix: "", - }, cfg) -} - -func Test_parseBackendConfigString(t *testing.T) { - cfg, err := ParseBackendConfigString("oss", ` - { - "endpoint": "mock.aliyuncs.com", - "access_key_id": "testid", - "access_key_secret": "testkey", - "bucket_name": "testbucket", - "meta_prefix": "test/", - "blob_prefix": "" - }`) - require.NoError(t, err) - require.Equal(t, &OssBackendConfig{ - Endpoint: "mock.aliyuncs.com", - AccessKeyID: "testid", - AccessKeySecret: "testkey", - BucketName: "testbucket", - MetaPrefix: "test/", - BlobPrefix: "", - }, cfg) - - cfg, err = ParseBackendConfigString("s3", ` - { - "bucket_name": "test", - "endpoint": "s3.amazonaws.com", - "access_key_id": "testAK", - "access_key_secret": "testSK", - "object_prefix": "blob", - "scheme": "https", - "region": "region1", - "meta_prefix": "meta/", - "blob_prefix": "blob/" - }`) - require.NoError(t, err) - require.Equal(t, &S3BackendConfig{ - Endpoint: "s3.amazonaws.com", - AccessKeyID: "testAK", - AccessKeySecret: "testSK", - BucketName: "test", - Scheme: "https", - Region: "region1", - MetaPrefix: "meta/", - BlobPrefix: "blob/", - }, cfg) - - cfg, err = ParseBackendConfigString("registry", "") - require.Error(t, err) - require.Contains(t, err.Error(), "unsupported backend type") - require.Empty(t, cfg) -} - -func TestPusher_Push(t *testing.T) { - tmpDir, tearDown := setUpTmpDir(t) - defer tearDown() - - os.Create(filepath.Join(tmpDir, "mock.meta")) - os.Create(filepath.Join(tmpDir, "mock.blob")) - content, _ := os.ReadFile(filepath.Join("testdata", "output.json")) - os.WriteFile(filepath.Join(tmpDir, "output.json"), content, 0755) - - artifact, err := NewArtifact(tmpDir) - require.NoError(t, err) - - mp := &mockBackend{} - pusher := Pusher{ - Artifact: artifact, - cfg: &OssBackendConfig{ - BucketName: "testbucket", - BlobPrefix: "testblobprefix", - MetaPrefix: "testmetaprefix", - }, - logger: logrus.New(), - metaBackend: mp, - blobBackend: mp, - } - hash := "3093776c78a21e47f0a8b4c80a1f019b1e838fc1ade274209332af1ca5f57090" - mp.On("Upload", mock.Anything, "mock.meta", mock.Anything, mock.Anything, mock.Anything).Return(&ocispec.Descriptor{ - URLs: []string{"oss://testbucket/testmetaprefix/mock.meta"}, - }, nil) - mp.On("Upload", mock.Anything, hash, mock.Anything, mock.Anything, mock.Anything).Return(&ocispec.Descriptor{ - URLs: []string{"oss://testbucket/testblobprefix/3093776c78a21e47f0a8b4c80a1f019b1e838fc1ade274209332af1ca5f57090"}, - }, nil) - - res, err := pusher.Push(PushRequest{ - Meta: "mock.meta", - Blob: hash, - }) - require.NoError(t, err) - require.Equal( - t, - PushResult{ - RemoteMeta: "oss://testbucket/testmetaprefix/mock.meta", - RemoteBlob: "oss://testbucket/testblobprefix/3093776c78a21e47f0a8b4c80a1f019b1e838fc1ade274209332af1ca5f57090", - }, - res, - ) -} - -func TestNewPusher(t *testing.T) { - backendConfig := &OssBackendConfig{ - Endpoint: "region.oss.com", - BucketName: "testbucket", - BlobPrefix: "testblobprefix", - MetaPrefix: "testmetaprefix", - } - tmpDir, tearDown := setUpTmpDir(t) - defer tearDown() - - artifact, err := NewArtifact(tmpDir) - require.NoError(t, err) - _, err = NewPusher(NewPusherOpt{ - Artifact: artifact, - BackendConfig: backendConfig, - Logger: logrus.New(), - }) - require.NoError(t, err) - - _, err = NewPusher(NewPusherOpt{ - BackendConfig: backendConfig, - Logger: logrus.New(), - }) - require.Error(t, err) - require.Contains(t, err.Error(), "outputDir is required") - - _, err = NewPusher(NewPusherOpt{ - Artifact: Artifact{OutputDir: "test"}, - BackendConfig: backendConfig, - Logger: logrus.New(), - }) - require.Error(t, err) - require.Contains(t, err.Error(), "does not exists") - - _, err = NewPusher(NewPusherOpt{ - Artifact: artifact, - BackendConfig: &OssBackendConfig{ - BucketName: "testbucket", - BlobPrefix: "testblobprefix", - MetaPrefix: "testmetaprefix", - }, - Logger: logrus.New(), - }) - require.Error(t, err) - require.Contains(t, err.Error(), "failed to init backend for bootstrap blob") -} +// Copyright 2023 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package packer + +import ( + "context" + "io" + "os" + "path/filepath" + "testing" + + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/backend" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" +) + +type mockBackend struct { + mock.Mock +} + +func (m *mockBackend) Upload(ctx context.Context, blobID, blobPath string, blobSize int64, forcePush bool) (*ocispec.Descriptor, error) { + args := m.Called(ctx, blobID, blobPath, blobSize, forcePush) + desc := args.Get(0) + return desc.(*ocispec.Descriptor), nil +} + +func (m *mockBackend) Finalize(_ bool) error { + return nil +} + +func (m *mockBackend) Check(_ string) (bool, error) { + return false, nil +} + +func (m *mockBackend) Type() backend.Type { + return backend.OssBackend +} + +func (m *mockBackend) Reader(_ string) (io.ReadCloser, error) { + panic("not implemented") +} + +func (m *mockBackend) Size(_ string) (int64, error) { + panic("not implemented") +} + +func Test_parseBackendConfig(t *testing.T) { + cfg, err := ParseBackendConfig("oss", filepath.Join("testdata", "backend-config.json")) + require.NoError(t, err) + require.Equal(t, &OssBackendConfig{ + Endpoint: "mock.aliyuncs.com", + AccessKeyID: "testid", + AccessKeySecret: "testkey", + BucketName: "testbucket", + MetaPrefix: "test/", + BlobPrefix: "", + }, cfg) +} + +func Test_parseBackendConfigString(t *testing.T) { + cfg, err := ParseBackendConfigString("oss", ` + { + "endpoint": "mock.aliyuncs.com", + "access_key_id": "testid", + "access_key_secret": "testkey", + "bucket_name": "testbucket", + "meta_prefix": "test/", + "blob_prefix": "" + }`) + require.NoError(t, err) + require.Equal(t, &OssBackendConfig{ + Endpoint: "mock.aliyuncs.com", + AccessKeyID: "testid", + AccessKeySecret: "testkey", + BucketName: "testbucket", + MetaPrefix: "test/", + BlobPrefix: "", + }, cfg) + + cfg, err = ParseBackendConfigString("s3", ` + { + "bucket_name": "test", + "endpoint": "s3.amazonaws.com", + "access_key_id": "testAK", + "access_key_secret": "testSK", + "object_prefix": "blob", + "scheme": "https", + "region": "region1", + "meta_prefix": "meta/", + "blob_prefix": "blob/" + }`) + require.NoError(t, err) + require.Equal(t, &S3BackendConfig{ + Endpoint: "s3.amazonaws.com", + AccessKeyID: "testAK", + AccessKeySecret: "testSK", + BucketName: "test", + Scheme: "https", + Region: "region1", + MetaPrefix: "meta/", + BlobPrefix: "blob/", + }, cfg) + + cfg, err = ParseBackendConfigString("registry", "") + require.Error(t, err) + require.Contains(t, err.Error(), "unsupported backend type") + require.Empty(t, cfg) +} + +func TestPusher_Push(t *testing.T) { + tmpDir, tearDown := setUpTmpDir(t) + defer tearDown() + + os.Create(filepath.Join(tmpDir, "mock.meta")) + os.Create(filepath.Join(tmpDir, "mock.blob")) + content, _ := os.ReadFile(filepath.Join("testdata", "output.json")) + os.WriteFile(filepath.Join(tmpDir, "output.json"), content, 0755) + + artifact, err := NewArtifact(tmpDir) + require.NoError(t, err) + + mp := &mockBackend{} + pusher := Pusher{ + Artifact: artifact, + cfg: &OssBackendConfig{ + BucketName: "testbucket", + BlobPrefix: "testblobprefix", + MetaPrefix: "testmetaprefix", + }, + logger: logrus.New(), + metaBackend: mp, + blobBackend: mp, + } + hash := "3093776c78a21e47f0a8b4c80a1f019b1e838fc1ade274209332af1ca5f57090" + mp.On("Upload", mock.Anything, "mock.meta", mock.Anything, mock.Anything, mock.Anything).Return(&ocispec.Descriptor{ + URLs: []string{"oss://testbucket/testmetaprefix/mock.meta"}, + }, nil) + mp.On("Upload", mock.Anything, hash, mock.Anything, mock.Anything, mock.Anything).Return(&ocispec.Descriptor{ + URLs: []string{"oss://testbucket/testblobprefix/3093776c78a21e47f0a8b4c80a1f019b1e838fc1ade274209332af1ca5f57090"}, + }, nil) + + res, err := pusher.Push(PushRequest{ + Meta: "mock.meta", + Blob: hash, + }) + require.NoError(t, err) + require.Equal( + t, + PushResult{ + RemoteMeta: "oss://testbucket/testmetaprefix/mock.meta", + RemoteBlob: "oss://testbucket/testblobprefix/3093776c78a21e47f0a8b4c80a1f019b1e838fc1ade274209332af1ca5f57090", + }, + res, + ) +} + +func TestNewPusher(t *testing.T) { + backendConfig := &OssBackendConfig{ + Endpoint: "region.oss.com", + BucketName: "testbucket", + BlobPrefix: "testblobprefix", + MetaPrefix: "testmetaprefix", + } + tmpDir, tearDown := setUpTmpDir(t) + defer tearDown() + + artifact, err := NewArtifact(tmpDir) + require.NoError(t, err) + _, err = NewPusher(NewPusherOpt{ + Artifact: artifact, + BackendConfig: backendConfig, + Logger: logrus.New(), + }) + require.NoError(t, err) + + _, err = NewPusher(NewPusherOpt{ + BackendConfig: backendConfig, + Logger: logrus.New(), + }) + require.Error(t, err) + require.Contains(t, err.Error(), "outputDir is required") + + _, err = NewPusher(NewPusherOpt{ + Artifact: Artifact{OutputDir: "test"}, + BackendConfig: backendConfig, + Logger: logrus.New(), + }) + require.Error(t, err) + require.Contains(t, err.Error(), "does not exists") + + _, err = NewPusher(NewPusherOpt{ + Artifact: artifact, + BackendConfig: &OssBackendConfig{ + BucketName: "testbucket", + BlobPrefix: "testblobprefix", + MetaPrefix: "testmetaprefix", + }, + Logger: logrus.New(), + }) + require.Error(t, err) + require.Contains(t, err.Error(), "failed to init backend for bootstrap blob") +} diff --git a/contrib/nydusify/pkg/packer/testdata/backend-config.json b/contrib/nydusify/pkg/packer/testdata/backend-config.json index 7d263bd5067..bee2f0bf795 100644 --- a/contrib/nydusify/pkg/packer/testdata/backend-config.json +++ b/contrib/nydusify/pkg/packer/testdata/backend-config.json @@ -1,8 +1,8 @@ -{ - "endpoint": "mock.aliyuncs.com", - "access_key_id": "testid", - "access_key_secret": "testkey", - "bucket_name": "testbucket", - "meta_prefix": "test/", - "blob_prefix": "" +{ + "endpoint": "mock.aliyuncs.com", + "access_key_id": "testid", + "access_key_secret": "testkey", + "bucket_name": "testbucket", + "meta_prefix": "test/", + "blob_prefix": "" } \ No newline at end of file diff --git a/contrib/nydusify/pkg/packer/testdata/output.json b/contrib/nydusify/pkg/packer/testdata/output.json index 68507c659ca..af9ea0ca754 100644 --- a/contrib/nydusify/pkg/packer/testdata/output.json +++ b/contrib/nydusify/pkg/packer/testdata/output.json @@ -1,26 +1,26 @@ -{ - "version": "1.7.0-44dd2c425152e433c58f20575803d4d6fd5a3ea5", - "blobs": [ - "3093776c78a21e47f0a8b4c80a1f019b1e838fc1ade274209332af1ca5f57090" - ], - "trace": { - "consumed_time": { - "dump_blob": 0.00003558300159056671, - "dump_bootstrap": 8.193000212486368e-6, - "load_from_directory": 0.00003673899846035056, - "load_from_parent_bootstrap": 0.000011576000360946637, - "total_build": 0.00021919599385000765, - "validate_bootstrap": 0.00004726599945570342 - }, - "registered_events": { - "blob_compressed_size": 7, - "blob_decompressed_size": 7, - "dedup_chunks": 1, - "dedup_decompressed_size": 7, - "egid": "100", - "euid": "127824", - "load_from_directory": 2, - "load_from_parent_bootstrap": 2 - } - } -} +{ + "version": "1.7.0-44dd2c425152e433c58f20575803d4d6fd5a3ea5", + "blobs": [ + "3093776c78a21e47f0a8b4c80a1f019b1e838fc1ade274209332af1ca5f57090" + ], + "trace": { + "consumed_time": { + "dump_blob": 0.00003558300159056671, + "dump_bootstrap": 8.193000212486368e-6, + "load_from_directory": 0.00003673899846035056, + "load_from_parent_bootstrap": 0.000011576000360946637, + "total_build": 0.00021919599385000765, + "validate_bootstrap": 0.00004726599945570342 + }, + "registered_events": { + "blob_compressed_size": 7, + "blob_decompressed_size": 7, + "dedup_chunks": 1, + "dedup_decompressed_size": 7, + "egid": "100", + "euid": "127824", + "load_from_directory": 2, + "load_from_parent_bootstrap": 2 + } + } +} diff --git a/contrib/nydusify/pkg/parser/parser.go b/contrib/nydusify/pkg/parser/parser.go index 64834242d60..0ce200355cd 100644 --- a/contrib/nydusify/pkg/parser/parser.go +++ b/contrib/nydusify/pkg/parser/parser.go @@ -1,267 +1,267 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package parser - -import ( - "context" - "encoding/json" - "fmt" - "io" - "strings" - - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/remote" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" - - "github.com/containerd/containerd/images" - ocispec "github.com/opencontainers/image-spec/specs-go/v1" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" -) - -// Parser parses OCI & Nydus image manifest, manifest index and -// image config into Parsed object, see the Nydus image example: -// examples/manifest/index.json, examples/manifest/manifest.json. -type Parser struct { - Remote *remote.Remote - // Principle to select platform arch/os is that nydus only works on top of linux - // and interestedArch has to be specified in case of manifest list. So nydusify - // knows how to choose the source image. In case of single manifest, `interestedArch` - // is the same with origin. - interestedArch string -} - -// Image presents image contents. -type Image struct { - Desc ocispec.Descriptor - Manifest ocispec.Manifest - Config ocispec.Image -} - -// Parsed presents OCI and Nydus image manifest. -// Nydus image conversion only works on top of an existed oci image whose platform is linux/amd64 -type Parsed struct { - Index *ocispec.Index - // The base image from which to generate nydus image. - OCIImage *Image - NydusImage *Image -} - -// New creates Nydus image parser instance. -func New(remote *remote.Remote, interestedArch string) (*Parser, error) { - if !utils.IsSupportedArch(interestedArch) { - return nil, fmt.Errorf("invalid arch %s", interestedArch) - } - return &Parser{ - Remote: remote, - interestedArch: interestedArch, - }, nil -} - -// Try to find the topmost layer in Nydus manifest, it should -// be a Nydus bootstrap layer, see examples/manifest/manifest.json -func FindNydusBootstrapDesc(manifest *ocispec.Manifest) *ocispec.Descriptor { - layers := manifest.Layers - if len(layers) != 0 { - desc := &layers[len(layers)-1] - if (desc.MediaType == ocispec.MediaTypeImageLayerGzip || - desc.MediaType == images.MediaTypeDockerSchema2LayerGzip) && - desc.Annotations[utils.LayerAnnotationNydusBootstrap] == "true" { - return desc - } - } - return nil -} - -func (parser *Parser) pull(ctx context.Context, desc *ocispec.Descriptor, res interface{}) error { - reader, err := parser.Remote.Pull(ctx, *desc, true) - if err != nil { - return errors.Wrap(err, "pull image resource") - } - defer reader.Close() - - bytes, err := io.ReadAll(reader) - if err != nil { - return errors.Wrap(err, "read image resource") - } - - if err := json.Unmarshal(bytes, res); err != nil { - return errors.Wrap(err, "unmarshal image resource") - } - - return nil -} - -func (parser *Parser) pullManifest(ctx context.Context, desc *ocispec.Descriptor) (*ocispec.Manifest, error) { - var manifest ocispec.Manifest - if err := parser.pull(ctx, desc, &manifest); err != nil { - return nil, errors.Wrap(err, "pull image manifest") - } - return &manifest, nil -} - -func (parser *Parser) pullConfig(ctx context.Context, desc *ocispec.Descriptor) (*ocispec.Image, error) { - var config ocispec.Image - if err := parser.pull(ctx, desc, &config); err != nil { - return nil, errors.Wrap(err, "pull image config") - } - return &config, nil -} - -func (parser *Parser) pullIndex(ctx context.Context, desc *ocispec.Descriptor) (*ocispec.Index, error) { - var index ocispec.Index - if err := parser.pull(ctx, desc, &index); err != nil { - return nil, errors.Wrap(err, "pull image index") - } - return &index, nil -} - -func (parser *Parser) parseImage( - ctx context.Context, desc *ocispec.Descriptor, onlyManifest *ocispec.Manifest, ignoreArch bool, -) (*Image, error) { - var manifest *ocispec.Manifest - var err error - if onlyManifest != nil { - manifest = onlyManifest - } else { - manifest, err = parser.pullManifest(ctx, desc) - if err != nil { - return nil, errors.Wrap(err, "pull image manifest") - } - } - config, err := parser.pullConfig(ctx, &manifest.Config) - if err != nil { - return nil, errors.Wrap(err, "pull image config") - } - - if config.OS == "" || config.Architecture == "" { - err = errors.New("Source image configuration does not have os or architecture") - if ignoreArch { - logrus.WithError(err).Warn("Ignore image arch") - } else { - return nil, err - } - } - - // Just give user a simple hint telling option was ignored. - if config.Architecture != parser.interestedArch { - err = errors.Errorf("Found arch %s, but the specified target arch (--platform) is %s", config.Architecture, parser.interestedArch) - if ignoreArch { - logrus.WithError(err).Warn("Ignore image arch, attempting to continue converting") - } else { - return nil, err - } - } - - return &Image{ - Desc: *desc, - Manifest: *manifest, - Config: *config, - }, nil -} - -// PullNydusBootstrap pulls Nydus bootstrap layer from Nydus image. -func (parser *Parser) PullNydusBootstrap(ctx context.Context, image *Image) (io.ReadCloser, error) { - bootstrapDesc := FindNydusBootstrapDesc(&image.Manifest) - if bootstrapDesc == nil { - return nil, fmt.Errorf("not found Nydus bootstrap layer in manifest") - } - reader, err := parser.Remote.Pull(ctx, *bootstrapDesc, true) - if err != nil { - return nil, errors.Wrap(err, "pull Nydus bootstrap layer") - } - return reader, nil -} - -func (parser *Parser) matchImagePlatform(desc *ocispec.Descriptor) bool { - if parser.interestedArch == desc.Platform.Architecture && desc.Platform.OS == "linux" { - return true - } - return false -} - -// Parse parses Nydus image reference into Parsed object. -func (parser *Parser) Parse(ctx context.Context) (*Parsed, error) { - logrus.Infof("Parsing image %s", parser.Remote.Ref) - - parsed := Parsed{} - - imageDesc, err := parser.Remote.Resolve(ctx) - if err != nil { - if strings.Contains(err.Error(), "x509: certificate signed by unknown authority") { - logrus.Warningln("try to enable \"--source-insecure\" / \"--target-insecure\" option") - } - return nil, errors.Wrap(err, "resolve image") - } - - var ociDesc *ocispec.Descriptor - var nydusDesc *ocispec.Descriptor - var onlyManifest *ocispec.Manifest - var ignoreArch bool - - switch imageDesc.MediaType { - // Handle image manifest - case ocispec.MediaTypeImageManifest, images.MediaTypeDockerSchema2Manifest: - // Because there is only one manifest, the source is determined, - // `interestedArch` does not have effect. - onlyManifest, err = parser.pullManifest(ctx, imageDesc) - if err != nil { - return nil, err - } - - bootstrapDesc := FindNydusBootstrapDesc(onlyManifest) - if bootstrapDesc != nil { - nydusDesc = imageDesc - } else { - ociDesc = imageDesc - } - // For a single manifest image, we just ignore the arch, so that allowing - // to do a default conversion on a different arch's host, for example - // converting an arm64 image on an amd64 host. - ignoreArch = true - - // Handle image manifest index - case ocispec.MediaTypeImageIndex, images.MediaTypeDockerSchema2ManifestList: - index, err := parser.pullIndex(ctx, imageDesc) - if err != nil { - return nil, err - } - parsed.Index = index - - for idx := range index.Manifests { - desc := index.Manifests[idx] - if desc.Platform != nil { - // Currently, parser only finds one interested image. - if parser.matchImagePlatform(&desc) { - if utils.IsNydusPlatform(desc.Platform) { - nydusDesc = &desc - } else { - ociDesc = &desc - } - } - } else { - // FIXME: Returning the first image without platform specified is subtle. - // It might not violate Image spec. - ociDesc = &desc - logrus.Warnf("Will cook a image without platform, %s", ociDesc.Digest) - } - } - } - - if ociDesc != nil { - parsed.OCIImage, err = parser.parseImage(ctx, ociDesc, onlyManifest, ignoreArch) - if err != nil { - return nil, errors.Wrap(err, "Parse OCI image") - } - } - - if nydusDesc != nil { - parsed.NydusImage, err = parser.parseImage(ctx, nydusDesc, onlyManifest, ignoreArch) - if err != nil { - return nil, errors.Wrap(err, "Parse Nydus image") - } - } - - return &parsed, nil -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package parser + +import ( + "context" + "encoding/json" + "fmt" + "io" + "strings" + + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/remote" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" + + "github.com/containerd/containerd/images" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +// Parser parses OCI & Nydus image manifest, manifest index and +// image config into Parsed object, see the Nydus image example: +// examples/manifest/index.json, examples/manifest/manifest.json. +type Parser struct { + Remote *remote.Remote + // Principle to select platform arch/os is that nydus only works on top of linux + // and interestedArch has to be specified in case of manifest list. So nydusify + // knows how to choose the source image. In case of single manifest, `interestedArch` + // is the same with origin. + interestedArch string +} + +// Image presents image contents. +type Image struct { + Desc ocispec.Descriptor + Manifest ocispec.Manifest + Config ocispec.Image +} + +// Parsed presents OCI and Nydus image manifest. +// Nydus image conversion only works on top of an existed oci image whose platform is linux/amd64 +type Parsed struct { + Index *ocispec.Index + // The base image from which to generate nydus image. + OCIImage *Image + NydusImage *Image +} + +// New creates Nydus image parser instance. +func New(remote *remote.Remote, interestedArch string) (*Parser, error) { + if !utils.IsSupportedArch(interestedArch) { + return nil, fmt.Errorf("invalid arch %s", interestedArch) + } + return &Parser{ + Remote: remote, + interestedArch: interestedArch, + }, nil +} + +// Try to find the topmost layer in Nydus manifest, it should +// be a Nydus bootstrap layer, see examples/manifest/manifest.json +func FindNydusBootstrapDesc(manifest *ocispec.Manifest) *ocispec.Descriptor { + layers := manifest.Layers + if len(layers) != 0 { + desc := &layers[len(layers)-1] + if (desc.MediaType == ocispec.MediaTypeImageLayerGzip || + desc.MediaType == images.MediaTypeDockerSchema2LayerGzip) && + desc.Annotations[utils.LayerAnnotationNydusBootstrap] == "true" { + return desc + } + } + return nil +} + +func (parser *Parser) pull(ctx context.Context, desc *ocispec.Descriptor, res interface{}) error { + reader, err := parser.Remote.Pull(ctx, *desc, true) + if err != nil { + return errors.Wrap(err, "pull image resource") + } + defer reader.Close() + + bytes, err := io.ReadAll(reader) + if err != nil { + return errors.Wrap(err, "read image resource") + } + + if err := json.Unmarshal(bytes, res); err != nil { + return errors.Wrap(err, "unmarshal image resource") + } + + return nil +} + +func (parser *Parser) pullManifest(ctx context.Context, desc *ocispec.Descriptor) (*ocispec.Manifest, error) { + var manifest ocispec.Manifest + if err := parser.pull(ctx, desc, &manifest); err != nil { + return nil, errors.Wrap(err, "pull image manifest") + } + return &manifest, nil +} + +func (parser *Parser) pullConfig(ctx context.Context, desc *ocispec.Descriptor) (*ocispec.Image, error) { + var config ocispec.Image + if err := parser.pull(ctx, desc, &config); err != nil { + return nil, errors.Wrap(err, "pull image config") + } + return &config, nil +} + +func (parser *Parser) pullIndex(ctx context.Context, desc *ocispec.Descriptor) (*ocispec.Index, error) { + var index ocispec.Index + if err := parser.pull(ctx, desc, &index); err != nil { + return nil, errors.Wrap(err, "pull image index") + } + return &index, nil +} + +func (parser *Parser) parseImage( + ctx context.Context, desc *ocispec.Descriptor, onlyManifest *ocispec.Manifest, ignoreArch bool, +) (*Image, error) { + var manifest *ocispec.Manifest + var err error + if onlyManifest != nil { + manifest = onlyManifest + } else { + manifest, err = parser.pullManifest(ctx, desc) + if err != nil { + return nil, errors.Wrap(err, "pull image manifest") + } + } + config, err := parser.pullConfig(ctx, &manifest.Config) + if err != nil { + return nil, errors.Wrap(err, "pull image config") + } + + if config.OS == "" || config.Architecture == "" { + err = errors.New("Source image configuration does not have os or architecture") + if ignoreArch { + logrus.WithError(err).Warn("Ignore image arch") + } else { + return nil, err + } + } + + // Just give user a simple hint telling option was ignored. + if config.Architecture != parser.interestedArch { + err = errors.Errorf("Found arch %s, but the specified target arch (--platform) is %s", config.Architecture, parser.interestedArch) + if ignoreArch { + logrus.WithError(err).Warn("Ignore image arch, attempting to continue converting") + } else { + return nil, err + } + } + + return &Image{ + Desc: *desc, + Manifest: *manifest, + Config: *config, + }, nil +} + +// PullNydusBootstrap pulls Nydus bootstrap layer from Nydus image. +func (parser *Parser) PullNydusBootstrap(ctx context.Context, image *Image) (io.ReadCloser, error) { + bootstrapDesc := FindNydusBootstrapDesc(&image.Manifest) + if bootstrapDesc == nil { + return nil, fmt.Errorf("not found Nydus bootstrap layer in manifest") + } + reader, err := parser.Remote.Pull(ctx, *bootstrapDesc, true) + if err != nil { + return nil, errors.Wrap(err, "pull Nydus bootstrap layer") + } + return reader, nil +} + +func (parser *Parser) matchImagePlatform(desc *ocispec.Descriptor) bool { + if parser.interestedArch == desc.Platform.Architecture && desc.Platform.OS == "linux" { + return true + } + return false +} + +// Parse parses Nydus image reference into Parsed object. +func (parser *Parser) Parse(ctx context.Context) (*Parsed, error) { + logrus.Infof("Parsing image %s", parser.Remote.Ref) + + parsed := Parsed{} + + imageDesc, err := parser.Remote.Resolve(ctx) + if err != nil { + if strings.Contains(err.Error(), "x509: certificate signed by unknown authority") { + logrus.Warningln("try to enable \"--source-insecure\" / \"--target-insecure\" option") + } + return nil, errors.Wrap(err, "resolve image") + } + + var ociDesc *ocispec.Descriptor + var nydusDesc *ocispec.Descriptor + var onlyManifest *ocispec.Manifest + var ignoreArch bool + + switch imageDesc.MediaType { + // Handle image manifest + case ocispec.MediaTypeImageManifest, images.MediaTypeDockerSchema2Manifest: + // Because there is only one manifest, the source is determined, + // `interestedArch` does not have effect. + onlyManifest, err = parser.pullManifest(ctx, imageDesc) + if err != nil { + return nil, err + } + + bootstrapDesc := FindNydusBootstrapDesc(onlyManifest) + if bootstrapDesc != nil { + nydusDesc = imageDesc + } else { + ociDesc = imageDesc + } + // For a single manifest image, we just ignore the arch, so that allowing + // to do a default conversion on a different arch's host, for example + // converting an arm64 image on an amd64 host. + ignoreArch = true + + // Handle image manifest index + case ocispec.MediaTypeImageIndex, images.MediaTypeDockerSchema2ManifestList: + index, err := parser.pullIndex(ctx, imageDesc) + if err != nil { + return nil, err + } + parsed.Index = index + + for idx := range index.Manifests { + desc := index.Manifests[idx] + if desc.Platform != nil { + // Currently, parser only finds one interested image. + if parser.matchImagePlatform(&desc) { + if utils.IsNydusPlatform(desc.Platform) { + nydusDesc = &desc + } else { + ociDesc = &desc + } + } + } else { + // FIXME: Returning the first image without platform specified is subtle. + // It might not violate Image spec. + ociDesc = &desc + logrus.Warnf("Will cook a image without platform, %s", ociDesc.Digest) + } + } + } + + if ociDesc != nil { + parsed.OCIImage, err = parser.parseImage(ctx, ociDesc, onlyManifest, ignoreArch) + if err != nil { + return nil, errors.Wrap(err, "Parse OCI image") + } + } + + if nydusDesc != nil { + parsed.NydusImage, err = parser.parseImage(ctx, nydusDesc, onlyManifest, ignoreArch) + if err != nil { + return nil, errors.Wrap(err, "Parse Nydus image") + } + } + + return &parsed, nil +} diff --git a/contrib/nydusify/pkg/provider/logger.go b/contrib/nydusify/pkg/provider/logger.go index a640f3119cd..d11adfdd2c5 100644 --- a/contrib/nydusify/pkg/provider/logger.go +++ b/contrib/nydusify/pkg/provider/logger.go @@ -1,41 +1,41 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package provider - -import ( - "context" - "time" - - "github.com/sirupsen/logrus" -) - -// LoggerFields shows key-value like info in log line -type LoggerFields = map[string]interface{} - -// ProgressLogger displays the progress log of conversion -type ProgressLogger interface { - Log(ctx context.Context, msg string, fields LoggerFields) func(error) error -} - -type defaultLogger struct{} - -func (logger *defaultLogger) Log(_ context.Context, msg string, fields LoggerFields) func(err error) error { - if fields == nil { - fields = make(LoggerFields) - } - logrus.WithFields(fields).Info(msg) - start := time.Now() - return func(err error) error { - duration := time.Since(start) - fields["Time"] = duration.String() - logrus.WithFields(fields).Info(msg) - return err - } -} - -// DefaultLogger provides a basic logger outputted to stdout -func DefaultLogger() (ProgressLogger, error) { - return &defaultLogger{}, nil -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package provider + +import ( + "context" + "time" + + "github.com/sirupsen/logrus" +) + +// LoggerFields shows key-value like info in log line +type LoggerFields = map[string]interface{} + +// ProgressLogger displays the progress log of conversion +type ProgressLogger interface { + Log(ctx context.Context, msg string, fields LoggerFields) func(error) error +} + +type defaultLogger struct{} + +func (logger *defaultLogger) Log(_ context.Context, msg string, fields LoggerFields) func(err error) error { + if fields == nil { + fields = make(LoggerFields) + } + logrus.WithFields(fields).Info(msg) + start := time.Now() + return func(err error) error { + duration := time.Since(start) + fields["Time"] = duration.String() + logrus.WithFields(fields).Info(msg) + return err + } +} + +// DefaultLogger provides a basic logger outputted to stdout +func DefaultLogger() (ProgressLogger, error) { + return &defaultLogger{}, nil +} diff --git a/contrib/nydusify/pkg/provider/remote.go b/contrib/nydusify/pkg/provider/remote.go index 78076ddd376..2001ca4115b 100644 --- a/contrib/nydusify/pkg/provider/remote.go +++ b/contrib/nydusify/pkg/provider/remote.go @@ -1,115 +1,115 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package provider - -import ( - "crypto/tls" - "encoding/base64" - "net" - "net/http" - "os" - "strings" - "time" - - "github.com/containerd/containerd/remotes" - "github.com/containerd/containerd/remotes/docker" - dockerconfig "github.com/docker/cli/cli/config" - "github.com/pkg/errors" - - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/remote" -) - -func newDefaultClient(skipTLSVerify bool) *http.Client { - return &http.Client{ - Transport: &http.Transport{ - Proxy: http.ProxyFromEnvironment, - DialContext: (&net.Dialer{ - Timeout: 30 * time.Second, - KeepAlive: 30 * time.Second, - DualStack: true, - }).DialContext, - MaxIdleConns: 10, - IdleConnTimeout: 30 * time.Second, - TLSHandshakeTimeout: 10 * time.Second, - ExpectContinueTimeout: 5 * time.Second, - DisableKeepAlives: true, - TLSNextProto: make(map[string]func(authority string, c *tls.Conn) http.RoundTripper), - TLSClientConfig: &tls.Config{ - InsecureSkipVerify: skipTLSVerify, - }, - }, - } -} - -// withCredentialFunc accepts host url parameter and returns with -// username, password and error. -type withCredentialFunc = func(string) (string, string, error) - -// withRemote creates a remote instance, it uses the implementation of containerd -// docker remote to access image from remote registry. -func withRemote(ref string, insecure bool, credFunc withCredentialFunc) (*remote.Remote, error) { - resolverFunc := func(retryWithHTTP bool) remotes.Resolver { - registryHosts := docker.ConfigureDefaultRegistries( - docker.WithAuthorizer( - docker.NewDockerAuthorizer( - docker.WithAuthClient(newDefaultClient(insecure)), - docker.WithAuthCreds(credFunc), - ), - ), - docker.WithClient(newDefaultClient(insecure)), - docker.WithPlainHTTP(func(_ string) (bool, error) { - return retryWithHTTP, nil - }), - ) - - return docker.NewResolver(docker.ResolverOptions{ - Hosts: registryHosts, - }) - } - - return remote.New(ref, resolverFunc) -} - -// DefaultRemote creates a remote instance, it attempts to read docker auth config -// file `$DOCKER_CONFIG/config.json` to communicate with remote registry, `$DOCKER_CONFIG` -// defaults to `~/.docker`. -func DefaultRemote(ref string, insecure bool) (*remote.Remote, error) { - return withRemote(ref, insecure, func(host string) (string, string, error) { - // The host of docker hub image will be converted to `registry-1.docker.io` in: - // github.com/containerd/containerd/remotes/docker/registry.go - // But we need use the key `https://index.docker.io/v1/` to find auth from docker config. - if host == "registry-1.docker.io" { - host = "https://index.docker.io/v1/" - } - - config := dockerconfig.LoadDefaultConfigFile(os.Stderr) - authConfig, err := config.GetAuthConfig(host) - if err != nil { - return "", "", err - } - - return authConfig.Username, authConfig.Password, nil - }) -} - -// DefaultRemoteWithAuth creates a remote instance, it parses base64 encoded auth string -// to communicate with remote registry. -func DefaultRemoteWithAuth(ref string, insecure bool, auth string) (*remote.Remote, error) { - return withRemote(ref, insecure, func(_ string) (string, string, error) { - // Leave auth empty if no authorization be required - if strings.TrimSpace(auth) == "" { - return "", "", nil - } - decoded, err := base64.StdEncoding.DecodeString(auth) - if err != nil { - return "", "", errors.Wrap(err, "Decode base64 encoded auth string") - } - ary := strings.Split(string(decoded), ":") - if len(ary) != 2 { - return "", "", errors.New("Invalid base64 encoded auth string") - } - return ary[0], ary[1], nil - }) -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package provider + +import ( + "crypto/tls" + "encoding/base64" + "net" + "net/http" + "os" + "strings" + "time" + + "github.com/containerd/containerd/remotes" + "github.com/containerd/containerd/remotes/docker" + dockerconfig "github.com/docker/cli/cli/config" + "github.com/pkg/errors" + + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/remote" +) + +func newDefaultClient(skipTLSVerify bool) *http.Client { + return &http.Client{ + Transport: &http.Transport{ + Proxy: http.ProxyFromEnvironment, + DialContext: (&net.Dialer{ + Timeout: 30 * time.Second, + KeepAlive: 30 * time.Second, + DualStack: true, + }).DialContext, + MaxIdleConns: 10, + IdleConnTimeout: 30 * time.Second, + TLSHandshakeTimeout: 10 * time.Second, + ExpectContinueTimeout: 5 * time.Second, + DisableKeepAlives: true, + TLSNextProto: make(map[string]func(authority string, c *tls.Conn) http.RoundTripper), + TLSClientConfig: &tls.Config{ + InsecureSkipVerify: skipTLSVerify, + }, + }, + } +} + +// withCredentialFunc accepts host url parameter and returns with +// username, password and error. +type withCredentialFunc = func(string) (string, string, error) + +// withRemote creates a remote instance, it uses the implementation of containerd +// docker remote to access image from remote registry. +func withRemote(ref string, insecure bool, credFunc withCredentialFunc) (*remote.Remote, error) { + resolverFunc := func(retryWithHTTP bool) remotes.Resolver { + registryHosts := docker.ConfigureDefaultRegistries( + docker.WithAuthorizer( + docker.NewDockerAuthorizer( + docker.WithAuthClient(newDefaultClient(insecure)), + docker.WithAuthCreds(credFunc), + ), + ), + docker.WithClient(newDefaultClient(insecure)), + docker.WithPlainHTTP(func(_ string) (bool, error) { + return retryWithHTTP, nil + }), + ) + + return docker.NewResolver(docker.ResolverOptions{ + Hosts: registryHosts, + }) + } + + return remote.New(ref, resolverFunc) +} + +// DefaultRemote creates a remote instance, it attempts to read docker auth config +// file `$DOCKER_CONFIG/config.json` to communicate with remote registry, `$DOCKER_CONFIG` +// defaults to `~/.docker`. +func DefaultRemote(ref string, insecure bool) (*remote.Remote, error) { + return withRemote(ref, insecure, func(host string) (string, string, error) { + // The host of docker hub image will be converted to `registry-1.docker.io` in: + // github.com/containerd/containerd/remotes/docker/registry.go + // But we need use the key `https://index.docker.io/v1/` to find auth from docker config. + if host == "registry-1.docker.io" { + host = "https://index.docker.io/v1/" + } + + config := dockerconfig.LoadDefaultConfigFile(os.Stderr) + authConfig, err := config.GetAuthConfig(host) + if err != nil { + return "", "", err + } + + return authConfig.Username, authConfig.Password, nil + }) +} + +// DefaultRemoteWithAuth creates a remote instance, it parses base64 encoded auth string +// to communicate with remote registry. +func DefaultRemoteWithAuth(ref string, insecure bool, auth string) (*remote.Remote, error) { + return withRemote(ref, insecure, func(_ string) (string, string, error) { + // Leave auth empty if no authorization be required + if strings.TrimSpace(auth) == "" { + return "", "", nil + } + decoded, err := base64.StdEncoding.DecodeString(auth) + if err != nil { + return "", "", errors.Wrap(err, "Decode base64 encoded auth string") + } + ary := strings.Split(string(decoded), ":") + if len(ary) != 2 { + return "", "", errors.New("Invalid base64 encoded auth string") + } + return ary[0], ary[1], nil + }) +} diff --git a/contrib/nydusify/pkg/provider/source.go b/contrib/nydusify/pkg/provider/source.go index 8f54db49068..4849c8af0c1 100644 --- a/contrib/nydusify/pkg/provider/source.go +++ b/contrib/nydusify/pkg/provider/source.go @@ -1,205 +1,205 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -// Package provider abstracts interface to adapt to different build environments, -// the provider includes these components: -// - logger: output build progress for nydusify or buildkitd/buildctl; -// - remote: create a remote resolver, it communicates with remote registry; -// - source: responsible for getting image manifest, config, and mounting layer; -// Provider provides a default implementation, so we can use it in Nydusify -// directly, but we need to implement it in buildkit or other any projects -// which want to import nydusify package. -package provider - -import ( - "context" - "fmt" - "os" - "path/filepath" - "strings" - - "github.com/containerd/containerd/mount" - "github.com/opencontainers/go-digest" - "github.com/opencontainers/image-spec/identity" - ocispec "github.com/opencontainers/image-spec/specs-go/v1" - "github.com/pkg/errors" - - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/parser" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/remote" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" -) - -// SourceLayer is a layer of source image -type SourceLayer interface { - Mount(ctx context.Context) ([]mount.Mount, func() error, error) - Size() int64 - Digest() digest.Digest - ChainID() digest.Digest - ParentChainID() *digest.Digest -} - -// SourceProvider provides resource of source image -type SourceProvider interface { - Manifest(ctx context.Context) (*ocispec.Descriptor, error) - Config(ctx context.Context) (*ocispec.Image, error) - Layers(ctx context.Context) ([]SourceLayer, error) -} - -type defaultSourceProvider struct { - workDir string - image parser.Image - remote *remote.Remote -} - -type defaultSourceLayer struct { - remote *remote.Remote - mountDir string - desc ocispec.Descriptor - chainID digest.Digest - parentChainID *digest.Digest -} - -func (sp *defaultSourceProvider) Manifest(_ context.Context) (*ocispec.Descriptor, error) { - return &sp.image.Desc, nil -} - -func (sp *defaultSourceProvider) Config(_ context.Context) (*ocispec.Image, error) { - return &sp.image.Config, nil -} - -func (sp *defaultSourceProvider) Layers(_ context.Context) ([]SourceLayer, error) { - layers := sp.image.Manifest.Layers - diffIDs := sp.image.Config.RootFS.DiffIDs - if len(layers) != len(diffIDs) { - return nil, fmt.Errorf("Mismatched fs layers (%d) and diff ids (%d)", len(layers), len(diffIDs)) - } - - var parentChainID *digest.Digest - sourceLayers := []SourceLayer{} - - for i, desc := range layers { - chainID := identity.ChainID(diffIDs[:i+1]) - layer := &defaultSourceLayer{ - remote: sp.remote, - // Use layer ChainID as the mounted directory name, in case of - // the layers in the same Digest are removed by umount. - mountDir: filepath.Join(sp.workDir, chainID.String()), - desc: desc, - chainID: chainID, - parentChainID: parentChainID, - } - sourceLayers = append(sourceLayers, layer) - parentChainID = &chainID - } - - return sourceLayers, nil -} - -func (sl *defaultSourceLayer) Mount(ctx context.Context) ([]mount.Mount, func() error, error) { - digestStr := sl.desc.Digest.String() - - if err := utils.WithRetry(func() error { - // Pull the layer from source - reader, err := sl.remote.Pull(ctx, sl.desc, true) - if err != nil { - return errors.Wrap(err, fmt.Sprintf("Decompress source layer %s", digestStr)) - } - defer reader.Close() - - // Decompress layer from source stream - if err := utils.UnpackTargz(ctx, sl.mountDir, reader, false); err != nil { - return errors.Wrap(err, fmt.Sprintf("Decompress source layer %s", digestStr)) - } - - return nil - }); err != nil { - return nil, nil, err - } - - umount := func() error { - return os.RemoveAll(sl.mountDir) - } - - mounts := []mount.Mount{ - { - Type: "oci-directory", - Source: sl.mountDir, - }, - } - - return mounts, umount, nil -} - -func (sl *defaultSourceLayer) Digest() digest.Digest { - return sl.desc.Digest -} - -func (sl *defaultSourceLayer) Size() int64 { - return sl.desc.Size -} - -func (sl *defaultSourceLayer) ChainID() digest.Digest { - return sl.chainID -} - -func (sl *defaultSourceLayer) ParentChainID() *digest.Digest { - return sl.parentChainID -} - -// Input platform string should be formated like os/arch. -func ExtractOsArch(platform string) (string, string, error) { - - if len(strings.Split(platform, "/")) != 2 { - return "", "", fmt.Errorf("invalid platform format, %s", platform) - } - - p := strings.Split(platform, "/") - os := p[0] - arch := p[1] - - if os != "linux" { - return "", "", fmt.Errorf("not support os %s", os) - } - - if !utils.IsSupportedArch(arch) { - return "", "", fmt.Errorf("not support architecture %s", arch) - } - - return os, arch, nil -} - -// DefaultSource pulls image layers from specify image reference -func DefaultSource(ctx context.Context, remote *remote.Remote, workDir, platform string) ([]SourceProvider, error) { - - _, arch, err := ExtractOsArch(platform) - if err != nil { - return nil, err - } - - parser, err := parser.New(remote, arch) - if err != nil { - return nil, errors.Wrap(err, "failed to create parser") - } - parsed, err := parser.Parse(ctx) - if err != nil { - return nil, errors.Wrap(err, "Parse source image") - } - - if parsed.OCIImage == nil { - if parsed.NydusImage != nil { - return nil, fmt.Errorf("the source is an image that only included Nydus manifest") - } - return nil, fmt.Errorf("not found OCI %s manifest in source image", utils.SupportedOS+"/"+utils.SupportedArch) - } - - sp := []SourceProvider{ - &defaultSourceProvider{ - workDir: workDir, - image: *parsed.OCIImage, - remote: remote, - }, - } - - return sp, nil -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +// Package provider abstracts interface to adapt to different build environments, +// the provider includes these components: +// - logger: output build progress for nydusify or buildkitd/buildctl; +// - remote: create a remote resolver, it communicates with remote registry; +// - source: responsible for getting image manifest, config, and mounting layer; +// Provider provides a default implementation, so we can use it in Nydusify +// directly, but we need to implement it in buildkit or other any projects +// which want to import nydusify package. +package provider + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/containerd/containerd/mount" + "github.com/opencontainers/go-digest" + "github.com/opencontainers/image-spec/identity" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/pkg/errors" + + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/parser" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/remote" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" +) + +// SourceLayer is a layer of source image +type SourceLayer interface { + Mount(ctx context.Context) ([]mount.Mount, func() error, error) + Size() int64 + Digest() digest.Digest + ChainID() digest.Digest + ParentChainID() *digest.Digest +} + +// SourceProvider provides resource of source image +type SourceProvider interface { + Manifest(ctx context.Context) (*ocispec.Descriptor, error) + Config(ctx context.Context) (*ocispec.Image, error) + Layers(ctx context.Context) ([]SourceLayer, error) +} + +type defaultSourceProvider struct { + workDir string + image parser.Image + remote *remote.Remote +} + +type defaultSourceLayer struct { + remote *remote.Remote + mountDir string + desc ocispec.Descriptor + chainID digest.Digest + parentChainID *digest.Digest +} + +func (sp *defaultSourceProvider) Manifest(_ context.Context) (*ocispec.Descriptor, error) { + return &sp.image.Desc, nil +} + +func (sp *defaultSourceProvider) Config(_ context.Context) (*ocispec.Image, error) { + return &sp.image.Config, nil +} + +func (sp *defaultSourceProvider) Layers(_ context.Context) ([]SourceLayer, error) { + layers := sp.image.Manifest.Layers + diffIDs := sp.image.Config.RootFS.DiffIDs + if len(layers) != len(diffIDs) { + return nil, fmt.Errorf("Mismatched fs layers (%d) and diff ids (%d)", len(layers), len(diffIDs)) + } + + var parentChainID *digest.Digest + sourceLayers := []SourceLayer{} + + for i, desc := range layers { + chainID := identity.ChainID(diffIDs[:i+1]) + layer := &defaultSourceLayer{ + remote: sp.remote, + // Use layer ChainID as the mounted directory name, in case of + // the layers in the same Digest are removed by umount. + mountDir: filepath.Join(sp.workDir, chainID.String()), + desc: desc, + chainID: chainID, + parentChainID: parentChainID, + } + sourceLayers = append(sourceLayers, layer) + parentChainID = &chainID + } + + return sourceLayers, nil +} + +func (sl *defaultSourceLayer) Mount(ctx context.Context) ([]mount.Mount, func() error, error) { + digestStr := sl.desc.Digest.String() + + if err := utils.WithRetry(func() error { + // Pull the layer from source + reader, err := sl.remote.Pull(ctx, sl.desc, true) + if err != nil { + return errors.Wrap(err, fmt.Sprintf("Decompress source layer %s", digestStr)) + } + defer reader.Close() + + // Decompress layer from source stream + if err := utils.UnpackTargz(ctx, sl.mountDir, reader, false); err != nil { + return errors.Wrap(err, fmt.Sprintf("Decompress source layer %s", digestStr)) + } + + return nil + }); err != nil { + return nil, nil, err + } + + umount := func() error { + return os.RemoveAll(sl.mountDir) + } + + mounts := []mount.Mount{ + { + Type: "oci-directory", + Source: sl.mountDir, + }, + } + + return mounts, umount, nil +} + +func (sl *defaultSourceLayer) Digest() digest.Digest { + return sl.desc.Digest +} + +func (sl *defaultSourceLayer) Size() int64 { + return sl.desc.Size +} + +func (sl *defaultSourceLayer) ChainID() digest.Digest { + return sl.chainID +} + +func (sl *defaultSourceLayer) ParentChainID() *digest.Digest { + return sl.parentChainID +} + +// Input platform string should be formated like os/arch. +func ExtractOsArch(platform string) (string, string, error) { + + if len(strings.Split(platform, "/")) != 2 { + return "", "", fmt.Errorf("invalid platform format, %s", platform) + } + + p := strings.Split(platform, "/") + os := p[0] + arch := p[1] + + if os != "linux" { + return "", "", fmt.Errorf("not support os %s", os) + } + + if !utils.IsSupportedArch(arch) { + return "", "", fmt.Errorf("not support architecture %s", arch) + } + + return os, arch, nil +} + +// DefaultSource pulls image layers from specify image reference +func DefaultSource(ctx context.Context, remote *remote.Remote, workDir, platform string) ([]SourceProvider, error) { + + _, arch, err := ExtractOsArch(platform) + if err != nil { + return nil, err + } + + parser, err := parser.New(remote, arch) + if err != nil { + return nil, errors.Wrap(err, "failed to create parser") + } + parsed, err := parser.Parse(ctx) + if err != nil { + return nil, errors.Wrap(err, "Parse source image") + } + + if parsed.OCIImage == nil { + if parsed.NydusImage != nil { + return nil, fmt.Errorf("the source is an image that only included Nydus manifest") + } + return nil, fmt.Errorf("not found OCI %s manifest in source image", utils.SupportedOS+"/"+utils.SupportedArch) + } + + sp := []SourceProvider{ + &defaultSourceProvider{ + workDir: workDir, + image: *parsed.OCIImage, + remote: remote, + }, + } + + return sp, nil +} diff --git a/contrib/nydusify/pkg/remote/remote.go b/contrib/nydusify/pkg/remote/remote.go index edfbf552c21..4ab4e9bbb98 100644 --- a/contrib/nydusify/pkg/remote/remote.go +++ b/contrib/nydusify/pkg/remote/remote.go @@ -1,137 +1,137 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package remote - -import ( - "context" - "fmt" - "io" - "strings" - "sync" - - "github.com/containerd/containerd/content" - "github.com/containerd/containerd/errdefs" - "github.com/containerd/containerd/remotes" - "github.com/distribution/reference" - ocispec "github.com/opencontainers/image-spec/specs-go/v1" -) - -// Remote provides the ability to access remote registry -type Remote struct { - // `Ref` is pointing to a remote image in formatted string host[:port]/[namespace/]repo[:tag] - Ref string - parsed reference.Named - // The resolver is used for image pull or fetches requests. The best practice - // in containerd is that each resolver instance is used only once for a request - // and is destroyed when the request completes. When a registry token expires, - // the resolver does not re-apply for a new token, so it's better to create a - // new resolver instance using resolverFunc for each request. - resolverFunc func(insecure bool) remotes.Resolver - pushed sync.Map - - retryWithHTTP bool -} - -// New creates remote instance from docker remote resolver -func New(ref string, resolverFunc func(bool) remotes.Resolver) (*Remote, error) { - parsed, err := reference.ParseNormalizedNamed(ref) - if err != nil { - return nil, err - } - - return &Remote{ - Ref: ref, - parsed: parsed, - resolverFunc: resolverFunc, - }, nil -} - -func (remote *Remote) MaybeWithHTTP(err error) { - parsed, _ := reference.ParseNormalizedNamed(remote.Ref) - if parsed != nil { - host := reference.Domain(parsed) - // If the error message includes the current registry host string, it - // implies that we can retry the request with plain HTTP. - if strings.Contains(err.Error(), fmt.Sprintf("/%s/", host)) { - remote.retryWithHTTP = true - } - } -} - -func (remote *Remote) IsWithHTTP() bool { - return remote.retryWithHTTP -} - -// Push pushes blob to registry -func (remote *Remote) Push(ctx context.Context, desc ocispec.Descriptor, byDigest bool, reader io.Reader) error { - // Concurrently push blob with same digest using containerd - // docker remote client will cause error: - // `failed commit on ref: unexpected size x, expected y` - // use ref key leveled mutex lock to avoid the issue. - refKey := remotes.MakeRefKey(ctx, desc) - lock, _ := remote.pushed.LoadOrStore(refKey, &sync.Mutex{}) - lock.(*sync.Mutex).Lock() - defer lock.(*sync.Mutex).Unlock() - - var ref string - if byDigest { - ref = remote.parsed.Name() - } else { - ref = reference.TagNameOnly(remote.parsed).String() - } - - // Create a new resolver instance for the request - pusher, err := remote.resolverFunc(remote.retryWithHTTP).Pusher(ctx, ref) - if err != nil { - return err - } - - writer, err := pusher.Push(ctx, desc) - if err != nil { - if errdefs.IsAlreadyExists(err) { - return nil - } - return err - } - defer writer.Close() - - return content.Copy(ctx, writer, reader, desc.Size, desc.Digest) -} - -// Pull pulls blob from registry -func (remote *Remote) Pull(ctx context.Context, desc ocispec.Descriptor, byDigest bool) (io.ReadCloser, error) { - var ref string - if byDigest { - ref = remote.parsed.Name() - } else { - ref = reference.TagNameOnly(remote.parsed).String() - } - - // Create a new resolver instance for the request - puller, err := remote.resolverFunc(remote.retryWithHTTP).Fetcher(ctx, ref) - if err != nil { - return nil, err - } - - reader, err := puller.Fetch(ctx, desc) - if err != nil { - return nil, err - } - - return reader, nil -} - -// Resolve parses descriptor for given image reference -func (remote *Remote) Resolve(ctx context.Context) (*ocispec.Descriptor, error) { - ref := reference.TagNameOnly(remote.parsed).String() - - // Create a new resolver instance for the request - _, desc, err := remote.resolverFunc(remote.retryWithHTTP).Resolve(ctx, ref) - if err != nil { - return nil, err - } - - return &desc, nil -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package remote + +import ( + "context" + "fmt" + "io" + "strings" + "sync" + + "github.com/containerd/containerd/content" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/remotes" + "github.com/distribution/reference" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" +) + +// Remote provides the ability to access remote registry +type Remote struct { + // `Ref` is pointing to a remote image in formatted string host[:port]/[namespace/]repo[:tag] + Ref string + parsed reference.Named + // The resolver is used for image pull or fetches requests. The best practice + // in containerd is that each resolver instance is used only once for a request + // and is destroyed when the request completes. When a registry token expires, + // the resolver does not re-apply for a new token, so it's better to create a + // new resolver instance using resolverFunc for each request. + resolverFunc func(insecure bool) remotes.Resolver + pushed sync.Map + + retryWithHTTP bool +} + +// New creates remote instance from docker remote resolver +func New(ref string, resolverFunc func(bool) remotes.Resolver) (*Remote, error) { + parsed, err := reference.ParseNormalizedNamed(ref) + if err != nil { + return nil, err + } + + return &Remote{ + Ref: ref, + parsed: parsed, + resolverFunc: resolverFunc, + }, nil +} + +func (remote *Remote) MaybeWithHTTP(err error) { + parsed, _ := reference.ParseNormalizedNamed(remote.Ref) + if parsed != nil { + host := reference.Domain(parsed) + // If the error message includes the current registry host string, it + // implies that we can retry the request with plain HTTP. + if strings.Contains(err.Error(), fmt.Sprintf("/%s/", host)) { + remote.retryWithHTTP = true + } + } +} + +func (remote *Remote) IsWithHTTP() bool { + return remote.retryWithHTTP +} + +// Push pushes blob to registry +func (remote *Remote) Push(ctx context.Context, desc ocispec.Descriptor, byDigest bool, reader io.Reader) error { + // Concurrently push blob with same digest using containerd + // docker remote client will cause error: + // `failed commit on ref: unexpected size x, expected y` + // use ref key leveled mutex lock to avoid the issue. + refKey := remotes.MakeRefKey(ctx, desc) + lock, _ := remote.pushed.LoadOrStore(refKey, &sync.Mutex{}) + lock.(*sync.Mutex).Lock() + defer lock.(*sync.Mutex).Unlock() + + var ref string + if byDigest { + ref = remote.parsed.Name() + } else { + ref = reference.TagNameOnly(remote.parsed).String() + } + + // Create a new resolver instance for the request + pusher, err := remote.resolverFunc(remote.retryWithHTTP).Pusher(ctx, ref) + if err != nil { + return err + } + + writer, err := pusher.Push(ctx, desc) + if err != nil { + if errdefs.IsAlreadyExists(err) { + return nil + } + return err + } + defer writer.Close() + + return content.Copy(ctx, writer, reader, desc.Size, desc.Digest) +} + +// Pull pulls blob from registry +func (remote *Remote) Pull(ctx context.Context, desc ocispec.Descriptor, byDigest bool) (io.ReadCloser, error) { + var ref string + if byDigest { + ref = remote.parsed.Name() + } else { + ref = reference.TagNameOnly(remote.parsed).String() + } + + // Create a new resolver instance for the request + puller, err := remote.resolverFunc(remote.retryWithHTTP).Fetcher(ctx, ref) + if err != nil { + return nil, err + } + + reader, err := puller.Fetch(ctx, desc) + if err != nil { + return nil, err + } + + return reader, nil +} + +// Resolve parses descriptor for given image reference +func (remote *Remote) Resolve(ctx context.Context) (*ocispec.Descriptor, error) { + ref := reference.TagNameOnly(remote.parsed).String() + + // Create a new resolver instance for the request + _, desc, err := remote.resolverFunc(remote.retryWithHTTP).Resolve(ctx, ref) + if err != nil { + return nil, err + } + + return &desc, nil +} diff --git a/contrib/nydusify/pkg/utils/archive.go b/contrib/nydusify/pkg/utils/archive.go index df8b46aaec6..e2cb2dac44f 100644 --- a/contrib/nydusify/pkg/utils/archive.go +++ b/contrib/nydusify/pkg/utils/archive.go @@ -1,172 +1,172 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package utils - -import ( - "archive/tar" - "compress/gzip" - "context" - "io" - "os" - "path/filepath" - - "golang.org/x/sys/unix" - - "github.com/containerd/containerd/archive" - "github.com/containerd/containerd/archive/compression" - "github.com/opencontainers/go-digest" -) - -// PackTargz makes .tar(.gz) stream of file named `name` and return reader -func PackTargz(src string, name string, compress bool) (io.ReadCloser, error) { - fi, err := os.Stat(src) - if err != nil { - return nil, err - } - - dirHdr := &tar.Header{ - Name: filepath.Dir(name), - Mode: 0770, - Typeflag: tar.TypeDir, - } - - hdr := &tar.Header{ - Name: name, - Mode: 0666, - Size: fi.Size(), - } - - reader, writer := io.Pipe() - - go func() { - // Prepare targz writer - var tw *tar.Writer - var gw *gzip.Writer - var err error - var file *os.File - - if compress { - gw = gzip.NewWriter(writer) - tw = tar.NewWriter(gw) - } else { - tw = tar.NewWriter(writer) - } - - defer func() { - err1 := tw.Close() - var err2 error - if gw != nil { - err2 = gw.Close() - } - - var finalErr error - - // Return the first error encountered to the other end and ignore others. - if err != nil { - finalErr = err - } else if err1 != nil { - finalErr = err1 - } else if err2 != nil { - finalErr = err2 - } - - writer.CloseWithError(finalErr) - }() - - file, err = os.Open(src) - if err != nil { - return - } - defer file.Close() - - // Write targz stream - if err = tw.WriteHeader(dirHdr); err != nil { - return - } - - if err = tw.WriteHeader(hdr); err != nil { - return - } - - if _, err = io.Copy(tw, file); err != nil { - return - } - }() - - return reader, nil -} - -// PackTargzInfo makes .tar(.gz) stream of file named `name` and return digest and size -func PackTargzInfo(src, name string, compress bool) (digest.Digest, int64, error) { - reader, err := PackTargz(src, name, compress) - if err != nil { - return "", 0, err - } - defer reader.Close() - - pipeReader, pipeWriter := io.Pipe() - - chanSize := make(chan int64) - chanErr := make(chan error) - go func() { - size, err := io.Copy(pipeWriter, reader) - if err != nil { - err = pipeWriter.CloseWithError(err) - } else { - err = pipeWriter.Close() - } - chanSize <- size - chanErr <- err - }() - - hash, err := digest.FromReader(pipeReader) - if err != nil { - return "", 0, err - } - defer pipeReader.Close() - - return hash, <-chanSize, <-chanErr -} - -// UnpackTargz unpacks .tar(.gz) stream, and write to dst path -func UnpackTargz(ctx context.Context, dst string, r io.Reader, overlay bool) error { - ds, err := compression.DecompressStream(r) - if err != nil { - return err - } - defer ds.Close() - - // Guarantee that umask won't affect file/directory creation - mask := unix.Umask(0) - defer unix.Umask(mask) - - if err := os.MkdirAll(dst, 0755); err != nil { - return err - } - - if overlay { - _, err = archive.Apply( - ctx, - dst, - ds, - archive.WithConvertWhiteout(archive.OverlayConvertWhiteout), - ) - } else { - _, err = archive.Apply( - ctx, - dst, - ds, - archive.WithConvertWhiteout(func(_ *tar.Header, _ string) (bool, error) { - return true, nil - }), - ) - } - - if err != nil { - return err - } - - return nil -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package utils + +import ( + "archive/tar" + "compress/gzip" + "context" + "io" + "os" + "path/filepath" + + "golang.org/x/sys/unix" + + "github.com/containerd/containerd/archive" + "github.com/containerd/containerd/archive/compression" + "github.com/opencontainers/go-digest" +) + +// PackTargz makes .tar(.gz) stream of file named `name` and return reader +func PackTargz(src string, name string, compress bool) (io.ReadCloser, error) { + fi, err := os.Stat(src) + if err != nil { + return nil, err + } + + dirHdr := &tar.Header{ + Name: filepath.Dir(name), + Mode: 0770, + Typeflag: tar.TypeDir, + } + + hdr := &tar.Header{ + Name: name, + Mode: 0666, + Size: fi.Size(), + } + + reader, writer := io.Pipe() + + go func() { + // Prepare targz writer + var tw *tar.Writer + var gw *gzip.Writer + var err error + var file *os.File + + if compress { + gw = gzip.NewWriter(writer) + tw = tar.NewWriter(gw) + } else { + tw = tar.NewWriter(writer) + } + + defer func() { + err1 := tw.Close() + var err2 error + if gw != nil { + err2 = gw.Close() + } + + var finalErr error + + // Return the first error encountered to the other end and ignore others. + if err != nil { + finalErr = err + } else if err1 != nil { + finalErr = err1 + } else if err2 != nil { + finalErr = err2 + } + + writer.CloseWithError(finalErr) + }() + + file, err = os.Open(src) + if err != nil { + return + } + defer file.Close() + + // Write targz stream + if err = tw.WriteHeader(dirHdr); err != nil { + return + } + + if err = tw.WriteHeader(hdr); err != nil { + return + } + + if _, err = io.Copy(tw, file); err != nil { + return + } + }() + + return reader, nil +} + +// PackTargzInfo makes .tar(.gz) stream of file named `name` and return digest and size +func PackTargzInfo(src, name string, compress bool) (digest.Digest, int64, error) { + reader, err := PackTargz(src, name, compress) + if err != nil { + return "", 0, err + } + defer reader.Close() + + pipeReader, pipeWriter := io.Pipe() + + chanSize := make(chan int64) + chanErr := make(chan error) + go func() { + size, err := io.Copy(pipeWriter, reader) + if err != nil { + err = pipeWriter.CloseWithError(err) + } else { + err = pipeWriter.Close() + } + chanSize <- size + chanErr <- err + }() + + hash, err := digest.FromReader(pipeReader) + if err != nil { + return "", 0, err + } + defer pipeReader.Close() + + return hash, <-chanSize, <-chanErr +} + +// UnpackTargz unpacks .tar(.gz) stream, and write to dst path +func UnpackTargz(ctx context.Context, dst string, r io.Reader, overlay bool) error { + ds, err := compression.DecompressStream(r) + if err != nil { + return err + } + defer ds.Close() + + // Guarantee that umask won't affect file/directory creation + mask := unix.Umask(0) + defer unix.Umask(mask) + + if err := os.MkdirAll(dst, 0755); err != nil { + return err + } + + if overlay { + _, err = archive.Apply( + ctx, + dst, + ds, + archive.WithConvertWhiteout(archive.OverlayConvertWhiteout), + ) + } else { + _, err = archive.Apply( + ctx, + dst, + ds, + archive.WithConvertWhiteout(func(_ *tar.Header, _ string) (bool, error) { + return true, nil + }), + ) + } + + if err != nil { + return err + } + + return nil +} diff --git a/contrib/nydusify/pkg/utils/archive_test.go b/contrib/nydusify/pkg/utils/archive_test.go index 754cbc7c5ff..ed7dd406972 100644 --- a/contrib/nydusify/pkg/utils/archive_test.go +++ b/contrib/nydusify/pkg/utils/archive_test.go @@ -1,28 +1,28 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright 2023 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package utils - -import ( - "os" - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestPackTargzInfo(t *testing.T) { - file, err := os.CreateTemp("", "nydusify-archive-test") - assert.Nil(t, err) - defer os.RemoveAll(file.Name()) - - err = os.WriteFile(file.Name(), make([]byte, 1024*200), 0666) - assert.Nil(t, err) - - digest, size, err := PackTargzInfo(file.Name(), "test", true) - assert.Nil(t, err) - - assert.Equal(t, "sha256:6cdd1b26d54d5852fbea95a81cbb25383975b70b4ffad9f9b6d25c7a434a51eb", digest.String()) - assert.Equal(t, size, int64(315)) -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright 2023 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package utils + +import ( + "os" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestPackTargzInfo(t *testing.T) { + file, err := os.CreateTemp("", "nydusify-archive-test") + assert.Nil(t, err) + defer os.RemoveAll(file.Name()) + + err = os.WriteFile(file.Name(), make([]byte, 1024*200), 0666) + assert.Nil(t, err) + + digest, size, err := PackTargzInfo(file.Name(), "test", true) + assert.Nil(t, err) + + assert.Equal(t, "sha256:6cdd1b26d54d5852fbea95a81cbb25383975b70b4ffad9f9b6d25c7a434a51eb", digest.String()) + assert.Equal(t, size, int64(315)) +} diff --git a/contrib/nydusify/pkg/utils/constant.go b/contrib/nydusify/pkg/utils/constant.go index 87e6075c85a..d96793d8a7c 100644 --- a/contrib/nydusify/pkg/utils/constant.go +++ b/contrib/nydusify/pkg/utils/constant.go @@ -1,26 +1,26 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package utils - -const ( - ManifestOSFeatureNydus = "nydus.remoteimage.v1" - MediaTypeNydusBlob = "application/vnd.oci.image.layer.nydus.blob.v1" - BootstrapFileNameInLayer = "image/image.boot" - - ManifestNydusCache = "containerd.io/snapshot/nydus-cache" - - LayerAnnotationNydusBlob = "containerd.io/snapshot/nydus-blob" - LayerAnnotationNydusBlobDigest = "containerd.io/snapshot/nydus-blob-digest" - LayerAnnotationNydusBlobSize = "containerd.io/snapshot/nydus-blob-size" - LayerAnnotationNydusBootstrap = "containerd.io/snapshot/nydus-bootstrap" - LayerAnnotationNydusFsVersion = "containerd.io/snapshot/nydus-fs-version" - LayerAnnotationNydusSourceChainID = "containerd.io/snapshot/nydus-source-chainid" - - LayerAnnotationNydusReferenceBlobIDs = "containerd.io/snapshot/nydus-reference-blob-ids" - - LayerAnnotationUncompressed = "containerd.io/uncompressed" - - LayerAnnotationNydusCommitBlobs = "containerd.io/snapshot/nydus-commit-blobs" -) +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package utils + +const ( + ManifestOSFeatureNydus = "nydus.remoteimage.v1" + MediaTypeNydusBlob = "application/vnd.oci.image.layer.nydus.blob.v1" + BootstrapFileNameInLayer = "image/image.boot" + + ManifestNydusCache = "containerd.io/snapshot/nydus-cache" + + LayerAnnotationNydusBlob = "containerd.io/snapshot/nydus-blob" + LayerAnnotationNydusBlobDigest = "containerd.io/snapshot/nydus-blob-digest" + LayerAnnotationNydusBlobSize = "containerd.io/snapshot/nydus-blob-size" + LayerAnnotationNydusBootstrap = "containerd.io/snapshot/nydus-bootstrap" + LayerAnnotationNydusFsVersion = "containerd.io/snapshot/nydus-fs-version" + LayerAnnotationNydusSourceChainID = "containerd.io/snapshot/nydus-source-chainid" + + LayerAnnotationNydusReferenceBlobIDs = "containerd.io/snapshot/nydus-reference-blob-ids" + + LayerAnnotationUncompressed = "containerd.io/uncompressed" + + LayerAnnotationNydusCommitBlobs = "containerd.io/snapshot/nydus-commit-blobs" +) diff --git a/contrib/nydusify/pkg/utils/utils.go b/contrib/nydusify/pkg/utils/utils.go index 7be572a8034..176425b440a 100644 --- a/contrib/nydusify/pkg/utils/utils.go +++ b/contrib/nydusify/pkg/utils/utils.go @@ -1,205 +1,205 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package utils - -import ( - "archive/tar" - "encoding/json" - "fmt" - "io" - "net/http" - "os" - "runtime" - "strings" - "syscall" - "time" - - "github.com/containerd/containerd/archive/compression" - "github.com/opencontainers/go-digest" - ocispec "github.com/opencontainers/image-spec/specs-go/v1" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - "lukechampine.com/blake3" -) - -const SupportedOS = "linux" -const SupportedArch = runtime.GOARCH - -const defaultRetryAttempts = 3 -const defaultRetryInterval = time.Second * 2 - -const ( - PlatformArchAMD64 string = "amd64" - PlatformArchARM64 string = "arm64" -) - -type FsVersion int - -const ( - V5 FsVersion = iota - V6 -) - -func GetNydusFsVersionOrDefault(annotations map[string]string, defaultVersion FsVersion) FsVersion { - if annotations == nil { - return defaultVersion - } - if v, ok := annotations[LayerAnnotationNydusFsVersion]; ok { - if v == "5" { - return V5 - } - if v == "6" { - return V6 - } - } - - return defaultVersion -} - -func WithRetry(op func() error) error { - var err error - attempts := defaultRetryAttempts - for attempts > 0 { - attempts-- - if err != nil { - if RetryWithHTTP(err) { - return err - } - logrus.Warnf("Retry due to error: %s", err) - time.Sleep(defaultRetryInterval) - } - if err = op(); err == nil { - break - } - } - return err -} - -func RetryWithHTTP(err error) bool { - return err != nil && (errors.Is(err, http.ErrSchemeMismatch) || errors.Is(err, syscall.ECONNREFUSED)) -} - -func MarshalToDesc(data interface{}, mediaType string) (*ocispec.Descriptor, []byte, error) { - bytes, err := json.Marshal(data) - if err != nil { - return nil, nil, err - } - - dataDigest := digest.FromBytes(bytes) - desc := ocispec.Descriptor{ - Digest: dataDigest, - Size: int64(len(bytes)), - MediaType: mediaType, - } - - return &desc, bytes, nil -} - -func IsNydusPlatform(platform *ocispec.Platform) bool { - if platform != nil && platform.OSFeatures != nil { - for _, key := range platform.OSFeatures { - if key == ManifestOSFeatureNydus { - return true - } - } - } - return false -} - -func IsSupportedArch(arch string) bool { - if arch != PlatformArchAMD64 && arch != PlatformArchARM64 { - return false - } - return true -} - -// A matched nydus image should match os/arch -func MatchNydusPlatform(dst *ocispec.Descriptor, os, arch string) bool { - if dst.Platform.Architecture != arch || dst.Platform.OS != os { - return false - } - - for _, feature := range dst.Platform.OSFeatures { - if feature == ManifestOSFeatureNydus { - return true - } - } - - return false -} - -func UnpackFile(reader io.Reader, source, target string) error { - rdr, err := compression.DecompressStream(reader) - if err != nil { - return err - } - defer rdr.Close() - - found := false - tr := tar.NewReader(rdr) - for { - hdr, err := tr.Next() - if err != nil { - if err == io.EOF { - break - } - } - if hdr.Name == source { - file, err := os.Create(target) - if err != nil { - return err - } - defer file.Close() - if _, err := io.Copy(file, tr); err != nil { - return err - } - found = true - break - } - } - - if !found { - return fmt.Errorf("Not found file %s in targz", source) - } - - return nil -} - -func IsEmptyString(str string) bool { - return strings.TrimSpace(str) == "" -} - -func IsPathExists(path string) bool { - if _, err := os.Stat(path); err == nil { - return true - } - return false -} - -func HashFile(path string) ([]byte, error) { - hasher := blake3.New(32, nil) - - file, err := os.Open(path) - if err != nil { - return nil, errors.Wrap(err, "open file before hashing file") - } - defer file.Close() - - buf := make([]byte, 2<<15) // 64KB - for { - n, err := file.Read(buf) - if err == io.EOF || n == 0 { - break - } - if err != nil { - return nil, errors.Wrap(err, "read file during hashing file") - } - if _, err := hasher.Write(buf); err != nil { - return nil, errors.Wrap(err, "calculate hash of file") - } - } - - return hasher.Sum(nil), nil -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package utils + +import ( + "archive/tar" + "encoding/json" + "fmt" + "io" + "net/http" + "os" + "runtime" + "strings" + "syscall" + "time" + + "github.com/containerd/containerd/archive/compression" + "github.com/opencontainers/go-digest" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "lukechampine.com/blake3" +) + +const SupportedOS = "linux" +const SupportedArch = runtime.GOARCH + +const defaultRetryAttempts = 3 +const defaultRetryInterval = time.Second * 2 + +const ( + PlatformArchAMD64 string = "amd64" + PlatformArchARM64 string = "arm64" +) + +type FsVersion int + +const ( + V5 FsVersion = iota + V6 +) + +func GetNydusFsVersionOrDefault(annotations map[string]string, defaultVersion FsVersion) FsVersion { + if annotations == nil { + return defaultVersion + } + if v, ok := annotations[LayerAnnotationNydusFsVersion]; ok { + if v == "5" { + return V5 + } + if v == "6" { + return V6 + } + } + + return defaultVersion +} + +func WithRetry(op func() error) error { + var err error + attempts := defaultRetryAttempts + for attempts > 0 { + attempts-- + if err != nil { + if RetryWithHTTP(err) { + return err + } + logrus.Warnf("Retry due to error: %s", err) + time.Sleep(defaultRetryInterval) + } + if err = op(); err == nil { + break + } + } + return err +} + +func RetryWithHTTP(err error) bool { + return err != nil && (errors.Is(err, http.ErrSchemeMismatch) || errors.Is(err, syscall.ECONNREFUSED)) +} + +func MarshalToDesc(data interface{}, mediaType string) (*ocispec.Descriptor, []byte, error) { + bytes, err := json.Marshal(data) + if err != nil { + return nil, nil, err + } + + dataDigest := digest.FromBytes(bytes) + desc := ocispec.Descriptor{ + Digest: dataDigest, + Size: int64(len(bytes)), + MediaType: mediaType, + } + + return &desc, bytes, nil +} + +func IsNydusPlatform(platform *ocispec.Platform) bool { + if platform != nil && platform.OSFeatures != nil { + for _, key := range platform.OSFeatures { + if key == ManifestOSFeatureNydus { + return true + } + } + } + return false +} + +func IsSupportedArch(arch string) bool { + if arch != PlatformArchAMD64 && arch != PlatformArchARM64 { + return false + } + return true +} + +// A matched nydus image should match os/arch +func MatchNydusPlatform(dst *ocispec.Descriptor, os, arch string) bool { + if dst.Platform.Architecture != arch || dst.Platform.OS != os { + return false + } + + for _, feature := range dst.Platform.OSFeatures { + if feature == ManifestOSFeatureNydus { + return true + } + } + + return false +} + +func UnpackFile(reader io.Reader, source, target string) error { + rdr, err := compression.DecompressStream(reader) + if err != nil { + return err + } + defer rdr.Close() + + found := false + tr := tar.NewReader(rdr) + for { + hdr, err := tr.Next() + if err != nil { + if err == io.EOF { + break + } + } + if hdr.Name == source { + file, err := os.Create(target) + if err != nil { + return err + } + defer file.Close() + if _, err := io.Copy(file, tr); err != nil { + return err + } + found = true + break + } + } + + if !found { + return fmt.Errorf("Not found file %s in targz", source) + } + + return nil +} + +func IsEmptyString(str string) bool { + return strings.TrimSpace(str) == "" +} + +func IsPathExists(path string) bool { + if _, err := os.Stat(path); err == nil { + return true + } + return false +} + +func HashFile(path string) ([]byte, error) { + hasher := blake3.New(32, nil) + + file, err := os.Open(path) + if err != nil { + return nil, errors.Wrap(err, "open file before hashing file") + } + defer file.Close() + + buf := make([]byte, 2<<15) // 64KB + for { + n, err := file.Read(buf) + if err == io.EOF || n == 0 { + break + } + if err != nil { + return nil, errors.Wrap(err, "read file during hashing file") + } + if _, err := hasher.Write(buf); err != nil { + return nil, errors.Wrap(err, "calculate hash of file") + } + } + + return hasher.Sum(nil), nil +} diff --git a/contrib/nydusify/pkg/utils/utils_test.go b/contrib/nydusify/pkg/utils/utils_test.go index dc9c5c314fc..e7d698c4758 100644 --- a/contrib/nydusify/pkg/utils/utils_test.go +++ b/contrib/nydusify/pkg/utils/utils_test.go @@ -1,272 +1,272 @@ -// Copyright 2023 Alibaba Cloud. All rights reserved. -// Copyright 2023 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package utils - -import ( - "archive/tar" - "compress/gzip" - "io" - "net/http" - "os" - "strings" - "syscall" - "testing" - - "github.com/opencontainers/go-digest" - ocispec "github.com/opencontainers/image-spec/specs-go/v1" - "github.com/pkg/errors" - "github.com/stretchr/testify/require" -) - -func makePlatform(osArch string, nydus bool) *ocispec.Platform { - var platform *ocispec.Platform - if osArch == "" { - platform = &ocispec.Platform{ - OS: "", - Architecture: "", - } - } else { - platform = &ocispec.Platform{ - OS: strings.Split(osArch, "/")[0], - Architecture: strings.Split(osArch, "/")[1], - } - } - if nydus { - platform.OSFeatures = []string{ManifestOSFeatureNydus} - } else { - platform.OSFeatures = nil - } - return platform -} - -func makeDesc(id string, platform *ocispec.Platform) ocispec.Descriptor { - return ocispec.Descriptor{ - MediaType: ocispec.MediaTypeImageManifest, - Digest: digest.FromString("manifest-" + id), - Size: 10, - Platform: platform, - } -} - -func TestIsSupportedArch(t *testing.T) { - var arch string - arch = PlatformArchAMD64 - require.Equal(t, IsSupportedArch(arch), true) - arch = PlatformArchARM64 - require.Equal(t, IsSupportedArch(arch), true) - arch = "riscv64" - require.Equal(t, IsSupportedArch(arch), false) - arch = "unsupported" - require.Equal(t, IsSupportedArch(arch), false) -} - -func TestIsNydusPlatform(t *testing.T) { - var platform *ocispec.Platform - platform = makePlatform("linux/amd64", true) - require.Equal(t, IsNydusPlatform(platform), true) - platform = makePlatform("linux/arm64", true) - require.Equal(t, IsNydusPlatform(platform), true) - platform = makePlatform("linux/amd64", false) - require.Equal(t, IsNydusPlatform(platform), false) - platform = makePlatform("linux/arm64", false) - require.Equal(t, IsNydusPlatform(platform), false) -} - -func TestMatchNydusPlatform(t *testing.T) { - var desc ocispec.Descriptor - desc = makeDesc("nydus", makePlatform("linux/amd64", true)) - require.Equal(t, MatchNydusPlatform(&desc, "linux", "arm64"), false) - require.Equal(t, MatchNydusPlatform(&desc, "linux", "amd64"), true) - require.Equal(t, MatchNydusPlatform(&desc, "windows", "amd64"), false) - require.Equal(t, MatchNydusPlatform(&desc, "windows", "arm64"), false) - desc = makeDesc("nydus", makePlatform("linux/amd64", false)) - require.Equal(t, MatchNydusPlatform(&desc, "linux", "arm64"), false) - require.Equal(t, MatchNydusPlatform(&desc, "linux", "amd64"), false) - require.Equal(t, MatchNydusPlatform(&desc, "windows", "amd64"), false) - require.Equal(t, MatchNydusPlatform(&desc, "windows", "arm64"), false) - desc = makeDesc("nydus", makePlatform("windows/arm64", true)) - require.Equal(t, MatchNydusPlatform(&desc, "windows", "arm64"), true) - require.Equal(t, MatchNydusPlatform(&desc, "windows", "amd64"), false) - require.Equal(t, MatchNydusPlatform(&desc, "linux", "arm64"), false) - require.Equal(t, MatchNydusPlatform(&desc, "linux", "amd64"), false) -} - -func TestIsEmptyString(t *testing.T) { - var str = "" - require.Equal(t, IsEmptyString(str), true) - str = "test" - require.Equal(t, IsEmptyString(str), false) -} - -func TestIsPathExists(t *testing.T) { - var tempdir = "./test/" - err := os.MkdirAll(tempdir, 0666) - require.NoError(t, err) - defer os.RemoveAll(tempdir) - require.Equal(t, IsPathExists(tempdir), true) - var path = "UnexistFolder" - require.Equal(t, IsPathExists(path), false) -} - -func createArchive(files []string, buf io.Writer) error { - // Create new Writers for gzip and tar - // These writers are chained. Writing to the tar writer will - // write to the gzip writer which in turn will write to - // the "buf" writer - gw := gzip.NewWriter(buf) - defer gw.Close() - tw := tar.NewWriter(gw) - defer tw.Close() - // Iterate over files and add them to the tar archive - for _, file := range files { - err := addToArchive(tw, file) - if err != nil { - return err - } - } - return nil -} - -func addToArchive(tw *tar.Writer, filename string) error { - // Open the file which will be written into the archive - file, err := os.Open(filename) - if err != nil { - return err - } - defer file.Close() - // Get FileInfo about our file providing file size, mode, etc. - info, err := file.Stat() - if err != nil { - return err - } - // Create a tar Header from the FileInfo data - header, err := tar.FileInfoHeader(info, info.Name()) - if err != nil { - return err - } - // Use full path as name (FileInfoHeader only takes the basename) - // If we don't do this the directory structure would - // not be preserved - // https://golang.org/src/archive/tar/common.go?#L626 - header.Name = filename - // Write file header to the tar archive - err = tw.WriteHeader(header) - if err != nil { - return err - } - // Copy file content to tar archive - _, err = io.Copy(tw, file) - if err != nil { - return err - } - return nil -} - -func TestUnpackFile(t *testing.T) { - fileName := "example.txt" - dirName := "test" - mockData := "this is a test data" - // Create file1 - file1, err := os.Create(fileName) - require.NoError(t, err) - defer file1.Close() - defer os.RemoveAll(file1.Name()) - _, err = io.WriteString(file1, mockData) - require.NoError(t, err) - // Create file2 - err = os.MkdirAll(dirName, 0666) - defer os.RemoveAll(dirName) - require.NoError(t, err) - file2, err := os.Create(dirName + fileName) - require.NoError(t, err) - defer file2.Close() - defer os.RemoveAll(file2.Name()) - _, err = io.WriteString(file2, mockData) - require.NoError(t, err) - // Files which to include in the tar.gz archive - files := []string{file1.Name(), file2.Name()} - // Create output file - targzName := "output.tar.gz" - out, err := os.Create(targzName) - require.NoError(t, err) - defer out.Close() - defer os.Remove(targzName) - // Create the archive and write the output to the "out" Writer - err = createArchive(files, out) - require.NoError(t, err) - // Archive created successfully - targzFile, err := os.Open(out.Name()) - require.NoError(t, err) - defer targzFile.Close() - outputName := "output.txt" - err = UnpackFile(targzFile, file1.Name(), outputName) - require.NoError(t, err) - defer os.Remove(outputName) -} - -func TestHashFile(t *testing.T) { - file, err := os.CreateTemp("", "tempFile") - require.NoError(t, err) - defer os.RemoveAll(file.Name()) - - _, err = file.WriteString("123456") - require.NoError(t, err) - file.Sync() - - hashSum, err := HashFile(file.Name()) - require.NoError(t, err) - require.Len(t, hashSum, 32) -} - -func TestMarshalToDesc(t *testing.T) { - config := ocispec.Image{ - Config: ocispec.ImageConfig{}, - RootFS: ocispec.RootFS{ - Type: "layers", - // Layers from manifest must be match image config. - DiffIDs: []digest.Digest{}, - }, - } - configDesc, configBytes, err := MarshalToDesc(config, ocispec.MediaTypeImageConfig) - require.NoError(t, err) - require.Equal(t, "application/vnd.oci.image.config.v1+json", configDesc.MediaType) - require.Equal(t, "sha256:1475e1cf0118aa3ddadbc8ae05cd5d5e151b63784e1e062de226e70fced50a0f", configDesc.Digest.String()) - require.Equal(t, int64(len(configBytes)), configDesc.Size) -} - -func TestWithRetry(t *testing.T) { - err := WithRetry(func() error { - _, err := http.Get("http://localhost:5000") - return err - }) - require.ErrorIs(t, err, syscall.ECONNREFUSED) -} - -func TestRetryWithHTTP(t *testing.T) { - require.True(t, RetryWithHTTP(errors.Wrap(http.ErrSchemeMismatch, "parse Nydus image"))) - require.False(t, RetryWithHTTP(nil)) -} - -func TestGetNydusFsVersionOrDefault(t *testing.T) { - testAnnotations := make(map[string]string) - fsVersion := GetNydusFsVersionOrDefault(testAnnotations, V5) - require.Equal(t, fsVersion, V5) - - fsVersion = GetNydusFsVersionOrDefault(nil, V6) - require.Equal(t, fsVersion, V6) - - testAnnotations[LayerAnnotationNydusFsVersion] = "5" - fsVersion = GetNydusFsVersionOrDefault(testAnnotations, V6) - require.Equal(t, fsVersion, V5) - - testAnnotations[LayerAnnotationNydusFsVersion] = "6" - fsVersion = GetNydusFsVersionOrDefault(testAnnotations, V5) - require.Equal(t, fsVersion, V6) - - testAnnotations[LayerAnnotationNydusFsVersion] = "7" - fsVersion = GetNydusFsVersionOrDefault(testAnnotations, V5) - require.Equal(t, fsVersion, V5) -} +// Copyright 2023 Alibaba Cloud. All rights reserved. +// Copyright 2023 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package utils + +import ( + "archive/tar" + "compress/gzip" + "io" + "net/http" + "os" + "strings" + "syscall" + "testing" + + "github.com/opencontainers/go-digest" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/pkg/errors" + "github.com/stretchr/testify/require" +) + +func makePlatform(osArch string, nydus bool) *ocispec.Platform { + var platform *ocispec.Platform + if osArch == "" { + platform = &ocispec.Platform{ + OS: "", + Architecture: "", + } + } else { + platform = &ocispec.Platform{ + OS: strings.Split(osArch, "/")[0], + Architecture: strings.Split(osArch, "/")[1], + } + } + if nydus { + platform.OSFeatures = []string{ManifestOSFeatureNydus} + } else { + platform.OSFeatures = nil + } + return platform +} + +func makeDesc(id string, platform *ocispec.Platform) ocispec.Descriptor { + return ocispec.Descriptor{ + MediaType: ocispec.MediaTypeImageManifest, + Digest: digest.FromString("manifest-" + id), + Size: 10, + Platform: platform, + } +} + +func TestIsSupportedArch(t *testing.T) { + var arch string + arch = PlatformArchAMD64 + require.Equal(t, IsSupportedArch(arch), true) + arch = PlatformArchARM64 + require.Equal(t, IsSupportedArch(arch), true) + arch = "riscv64" + require.Equal(t, IsSupportedArch(arch), false) + arch = "unsupported" + require.Equal(t, IsSupportedArch(arch), false) +} + +func TestIsNydusPlatform(t *testing.T) { + var platform *ocispec.Platform + platform = makePlatform("linux/amd64", true) + require.Equal(t, IsNydusPlatform(platform), true) + platform = makePlatform("linux/arm64", true) + require.Equal(t, IsNydusPlatform(platform), true) + platform = makePlatform("linux/amd64", false) + require.Equal(t, IsNydusPlatform(platform), false) + platform = makePlatform("linux/arm64", false) + require.Equal(t, IsNydusPlatform(platform), false) +} + +func TestMatchNydusPlatform(t *testing.T) { + var desc ocispec.Descriptor + desc = makeDesc("nydus", makePlatform("linux/amd64", true)) + require.Equal(t, MatchNydusPlatform(&desc, "linux", "arm64"), false) + require.Equal(t, MatchNydusPlatform(&desc, "linux", "amd64"), true) + require.Equal(t, MatchNydusPlatform(&desc, "windows", "amd64"), false) + require.Equal(t, MatchNydusPlatform(&desc, "windows", "arm64"), false) + desc = makeDesc("nydus", makePlatform("linux/amd64", false)) + require.Equal(t, MatchNydusPlatform(&desc, "linux", "arm64"), false) + require.Equal(t, MatchNydusPlatform(&desc, "linux", "amd64"), false) + require.Equal(t, MatchNydusPlatform(&desc, "windows", "amd64"), false) + require.Equal(t, MatchNydusPlatform(&desc, "windows", "arm64"), false) + desc = makeDesc("nydus", makePlatform("windows/arm64", true)) + require.Equal(t, MatchNydusPlatform(&desc, "windows", "arm64"), true) + require.Equal(t, MatchNydusPlatform(&desc, "windows", "amd64"), false) + require.Equal(t, MatchNydusPlatform(&desc, "linux", "arm64"), false) + require.Equal(t, MatchNydusPlatform(&desc, "linux", "amd64"), false) +} + +func TestIsEmptyString(t *testing.T) { + var str = "" + require.Equal(t, IsEmptyString(str), true) + str = "test" + require.Equal(t, IsEmptyString(str), false) +} + +func TestIsPathExists(t *testing.T) { + var tempdir = "./test/" + err := os.MkdirAll(tempdir, 0666) + require.NoError(t, err) + defer os.RemoveAll(tempdir) + require.Equal(t, IsPathExists(tempdir), true) + var path = "UnexistFolder" + require.Equal(t, IsPathExists(path), false) +} + +func createArchive(files []string, buf io.Writer) error { + // Create new Writers for gzip and tar + // These writers are chained. Writing to the tar writer will + // write to the gzip writer which in turn will write to + // the "buf" writer + gw := gzip.NewWriter(buf) + defer gw.Close() + tw := tar.NewWriter(gw) + defer tw.Close() + // Iterate over files and add them to the tar archive + for _, file := range files { + err := addToArchive(tw, file) + if err != nil { + return err + } + } + return nil +} + +func addToArchive(tw *tar.Writer, filename string) error { + // Open the file which will be written into the archive + file, err := os.Open(filename) + if err != nil { + return err + } + defer file.Close() + // Get FileInfo about our file providing file size, mode, etc. + info, err := file.Stat() + if err != nil { + return err + } + // Create a tar Header from the FileInfo data + header, err := tar.FileInfoHeader(info, info.Name()) + if err != nil { + return err + } + // Use full path as name (FileInfoHeader only takes the basename) + // If we don't do this the directory structure would + // not be preserved + // https://golang.org/src/archive/tar/common.go?#L626 + header.Name = filename + // Write file header to the tar archive + err = tw.WriteHeader(header) + if err != nil { + return err + } + // Copy file content to tar archive + _, err = io.Copy(tw, file) + if err != nil { + return err + } + return nil +} + +func TestUnpackFile(t *testing.T) { + fileName := "example.txt" + dirName := "test" + mockData := "this is a test data" + // Create file1 + file1, err := os.Create(fileName) + require.NoError(t, err) + defer file1.Close() + defer os.RemoveAll(file1.Name()) + _, err = io.WriteString(file1, mockData) + require.NoError(t, err) + // Create file2 + err = os.MkdirAll(dirName, 0666) + defer os.RemoveAll(dirName) + require.NoError(t, err) + file2, err := os.Create(dirName + fileName) + require.NoError(t, err) + defer file2.Close() + defer os.RemoveAll(file2.Name()) + _, err = io.WriteString(file2, mockData) + require.NoError(t, err) + // Files which to include in the tar.gz archive + files := []string{file1.Name(), file2.Name()} + // Create output file + targzName := "output.tar.gz" + out, err := os.Create(targzName) + require.NoError(t, err) + defer out.Close() + defer os.Remove(targzName) + // Create the archive and write the output to the "out" Writer + err = createArchive(files, out) + require.NoError(t, err) + // Archive created successfully + targzFile, err := os.Open(out.Name()) + require.NoError(t, err) + defer targzFile.Close() + outputName := "output.txt" + err = UnpackFile(targzFile, file1.Name(), outputName) + require.NoError(t, err) + defer os.Remove(outputName) +} + +func TestHashFile(t *testing.T) { + file, err := os.CreateTemp("", "tempFile") + require.NoError(t, err) + defer os.RemoveAll(file.Name()) + + _, err = file.WriteString("123456") + require.NoError(t, err) + file.Sync() + + hashSum, err := HashFile(file.Name()) + require.NoError(t, err) + require.Len(t, hashSum, 32) +} + +func TestMarshalToDesc(t *testing.T) { + config := ocispec.Image{ + Config: ocispec.ImageConfig{}, + RootFS: ocispec.RootFS{ + Type: "layers", + // Layers from manifest must be match image config. + DiffIDs: []digest.Digest{}, + }, + } + configDesc, configBytes, err := MarshalToDesc(config, ocispec.MediaTypeImageConfig) + require.NoError(t, err) + require.Equal(t, "application/vnd.oci.image.config.v1+json", configDesc.MediaType) + require.Equal(t, "sha256:1475e1cf0118aa3ddadbc8ae05cd5d5e151b63784e1e062de226e70fced50a0f", configDesc.Digest.String()) + require.Equal(t, int64(len(configBytes)), configDesc.Size) +} + +func TestWithRetry(t *testing.T) { + err := WithRetry(func() error { + _, err := http.Get("http://localhost:5000") + return err + }) + require.ErrorIs(t, err, syscall.ECONNREFUSED) +} + +func TestRetryWithHTTP(t *testing.T) { + require.True(t, RetryWithHTTP(errors.Wrap(http.ErrSchemeMismatch, "parse Nydus image"))) + require.False(t, RetryWithHTTP(nil)) +} + +func TestGetNydusFsVersionOrDefault(t *testing.T) { + testAnnotations := make(map[string]string) + fsVersion := GetNydusFsVersionOrDefault(testAnnotations, V5) + require.Equal(t, fsVersion, V5) + + fsVersion = GetNydusFsVersionOrDefault(nil, V6) + require.Equal(t, fsVersion, V6) + + testAnnotations[LayerAnnotationNydusFsVersion] = "5" + fsVersion = GetNydusFsVersionOrDefault(testAnnotations, V6) + require.Equal(t, fsVersion, V5) + + testAnnotations[LayerAnnotationNydusFsVersion] = "6" + fsVersion = GetNydusFsVersionOrDefault(testAnnotations, V5) + require.Equal(t, fsVersion, V6) + + testAnnotations[LayerAnnotationNydusFsVersion] = "7" + fsVersion = GetNydusFsVersionOrDefault(testAnnotations, V5) + require.Equal(t, fsVersion, V5) +} diff --git a/contrib/nydusify/pkg/utils/worker.go b/contrib/nydusify/pkg/utils/worker.go index e7714f6dac7..92b190675f6 100644 --- a/contrib/nydusify/pkg/utils/worker.go +++ b/contrib/nydusify/pkg/utils/worker.go @@ -1,152 +1,152 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package utils - -import ( - "sync" - "sync/atomic" -) - -type Job = func() error - -type RJob interface { - Do() error - Err() error -} - -// QueueWorkerPool creates a worker pool with fixed count, caller -// puts some jobs to the pool by a fixed order and then wait all -// jobs finish by the previous order -type QueueWorkerPool struct { - err atomic.Value - jobs chan RJob - rets []chan RJob -} - -// NewQueueWorkerPool creates a queued worker pool, `worker` is worker -// count, `total` is expected job count -func NewQueueWorkerPool(worker, total uint) *QueueWorkerPool { - pool := &QueueWorkerPool{ - jobs: make(chan RJob, total), - rets: make([]chan RJob, total), - } - - for idx := range pool.rets { - pool.rets[idx] = make(chan RJob, 1) - } - - current := uint(0) - var lock sync.Mutex - - for count := uint(0); count < worker; count++ { - go func() { - for { - lock.Lock() - current++ - if current > total { - lock.Unlock() - break - } - index := current - 1 - job, ok := <-pool.jobs - if !ok { - lock.Unlock() - break - } - lock.Unlock() - - err := job.Do() - pool.rets[index] <- job - if err != nil { - pool.err.Store(err) - break - } - } - }() - } - - return pool -} - -func (pool *QueueWorkerPool) Put(_job RJob) error { - e := pool.err.Load() - if e != nil { - return e.(error) - } - - pool.jobs <- _job - return nil -} - -func (pool *QueueWorkerPool) Waiter() []chan RJob { - return pool.rets -} - -type Once int32 - -func NewOnce() Once { - return Once(0) -} - -func (o *Once) Do(callback func()) { - if atomic.CompareAndSwapInt32((*int32)(o), 0, 1) { - callback() - } -} - -// WorkerPool creates a worker pool with fixed count, caller -// puts some jobs to the pool and then wait all jobs finish -type WorkerPool struct { - err chan error - wg sync.WaitGroup - queue chan Job -} - -// NewWorkerPool creates a worker pool, `worker` is worker -// count, `total` is expected job count -func NewWorkerPool(worker, total uint) *WorkerPool { - pool := &WorkerPool{ - queue: make(chan Job, total), - err: make(chan error, 1), - } - - once := NewOnce() - - for count := uint(0); count < worker; count++ { - pool.wg.Add(1) - go func() { - defer pool.wg.Done() - for { - job, ok := <-pool.queue - if !ok { - break - } - if err := job(); err != nil { - once.Do(func() { - pool.err <- err - }) - break - } - } - }() - } - - return pool -} - -func (pool *WorkerPool) Put(job Job) { - pool.queue <- job -} - -func (pool *WorkerPool) Err() chan error { - return pool.err -} - -func (pool *WorkerPool) Waiter() chan error { - close(pool.queue) - pool.wg.Wait() - close(pool.err) - return pool.err -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package utils + +import ( + "sync" + "sync/atomic" +) + +type Job = func() error + +type RJob interface { + Do() error + Err() error +} + +// QueueWorkerPool creates a worker pool with fixed count, caller +// puts some jobs to the pool by a fixed order and then wait all +// jobs finish by the previous order +type QueueWorkerPool struct { + err atomic.Value + jobs chan RJob + rets []chan RJob +} + +// NewQueueWorkerPool creates a queued worker pool, `worker` is worker +// count, `total` is expected job count +func NewQueueWorkerPool(worker, total uint) *QueueWorkerPool { + pool := &QueueWorkerPool{ + jobs: make(chan RJob, total), + rets: make([]chan RJob, total), + } + + for idx := range pool.rets { + pool.rets[idx] = make(chan RJob, 1) + } + + current := uint(0) + var lock sync.Mutex + + for count := uint(0); count < worker; count++ { + go func() { + for { + lock.Lock() + current++ + if current > total { + lock.Unlock() + break + } + index := current - 1 + job, ok := <-pool.jobs + if !ok { + lock.Unlock() + break + } + lock.Unlock() + + err := job.Do() + pool.rets[index] <- job + if err != nil { + pool.err.Store(err) + break + } + } + }() + } + + return pool +} + +func (pool *QueueWorkerPool) Put(_job RJob) error { + e := pool.err.Load() + if e != nil { + return e.(error) + } + + pool.jobs <- _job + return nil +} + +func (pool *QueueWorkerPool) Waiter() []chan RJob { + return pool.rets +} + +type Once int32 + +func NewOnce() Once { + return Once(0) +} + +func (o *Once) Do(callback func()) { + if atomic.CompareAndSwapInt32((*int32)(o), 0, 1) { + callback() + } +} + +// WorkerPool creates a worker pool with fixed count, caller +// puts some jobs to the pool and then wait all jobs finish +type WorkerPool struct { + err chan error + wg sync.WaitGroup + queue chan Job +} + +// NewWorkerPool creates a worker pool, `worker` is worker +// count, `total` is expected job count +func NewWorkerPool(worker, total uint) *WorkerPool { + pool := &WorkerPool{ + queue: make(chan Job, total), + err: make(chan error, 1), + } + + once := NewOnce() + + for count := uint(0); count < worker; count++ { + pool.wg.Add(1) + go func() { + defer pool.wg.Done() + for { + job, ok := <-pool.queue + if !ok { + break + } + if err := job(); err != nil { + once.Do(func() { + pool.err <- err + }) + break + } + } + }() + } + + return pool +} + +func (pool *WorkerPool) Put(job Job) { + pool.queue <- job +} + +func (pool *WorkerPool) Err() chan error { + return pool.err +} + +func (pool *WorkerPool) Waiter() chan error { + close(pool.queue) + pool.wg.Wait() + close(pool.err) + return pool.err +} diff --git a/contrib/nydusify/pkg/utils/worker_test.go b/contrib/nydusify/pkg/utils/worker_test.go index 3e8d479c033..0565ad17e88 100644 --- a/contrib/nydusify/pkg/utils/worker_test.go +++ b/contrib/nydusify/pkg/utils/worker_test.go @@ -1,153 +1,153 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright 2023 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -package utils - -import ( - "fmt" - "testing" - "time" - - "github.com/stretchr/testify/require" -) - -type queueJob struct { - err error - before int - after int -} - -func (job *queueJob) Do() error { - if job.before == 1500 { - job.err = fmt.Errorf("Job error") - return job.err - } - time.Sleep(time.Microsecond * 1) - job.after = job.before - return nil -} - -func (job *queueJob) Err() error { - return job.err -} - -func TestQueueWorkerPool1(t *testing.T) { - pool := NewQueueWorkerPool(47, 1000) - - for i := 0; i < 1000; i++ { - job := &queueJob{ - before: i, - after: -1, - } - pool.Put(job) - } - - for idx, job := range pool.Waiter() { - ret := (<-job).(*queueJob) - require.Equal(t, ret.after, idx) - } -} - -func TestQueueWorkerPool2(t *testing.T) { - pool := NewQueueWorkerPool(47, 2000) - - for i := 0; i < 2000; i++ { - job := &queueJob{ - before: i, - after: -1, - } - pool.Put(job) - } - - for idx, _job := range pool.Waiter() { - job := <-_job - ret := job.(*queueJob) - if job.Err() != nil { - require.Equal(t, ret.before, 1500) - break - } - require.Equal(t, ret.after, idx) - } -} - -func TestWorkerPool1(t *testing.T) { - pool := NewWorkerPool(20, 50) - - for i := 0; i < 50; i++ { - pool.Put(func() error { - time.Sleep(time.Millisecond * 10) - return nil - }) - } - - require.Nil(t, <-pool.Waiter()) - require.Nil(t, <-pool.Err()) -} - -func TestWorkerPool2(t *testing.T) { - pool := NewWorkerPool(2, 2) - - pool.Put(func() error { - time.Sleep(time.Millisecond * 20) - return fmt.Errorf("Job error") - }) - - time.Sleep(time.Millisecond * 10) - - pool.Put(func() error { - time.Sleep(time.Millisecond * 30) - return nil - }) - - require.NotNil(t, <-pool.Waiter()) - require.Nil(t, <-pool.Err()) -} - -func TestWorkerPool3(t *testing.T) { - pool := NewWorkerPool(20, 5000) - - for i := 0; i < 5000; i++ { - pool.Put(func() error { - time.Sleep(time.Millisecond * 1) - return fmt.Errorf("Job error") - }) - } - - require.NotNil(t, <-pool.Waiter()) - require.Nil(t, <-pool.Err()) -} - -func TestWorkerPool4(t *testing.T) { - pool := NewWorkerPool(100, 50) - - for i := 0; i < 50; i++ { - pool.Put(func() error { - time.Sleep(time.Millisecond * 10) - return nil - }) - } - - require.Nil(t, <-pool.Waiter()) - require.Nil(t, <-pool.Err()) -} - -func TestWorkerPool5(t *testing.T) { - pool := NewWorkerPool(20, 51) - - for i := 0; i < 50; i++ { - pool.Put(func() error { - time.Sleep(time.Millisecond * 10) - return fmt.Errorf("Job error") - }) - } - - pool.Put(func() error { - time.Sleep(time.Second * 10) - return nil - }) - - require.NotNil(t, <-pool.Waiter()) - require.Nil(t, <-pool.Err()) -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright 2023 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package utils + +import ( + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +type queueJob struct { + err error + before int + after int +} + +func (job *queueJob) Do() error { + if job.before == 1500 { + job.err = fmt.Errorf("Job error") + return job.err + } + time.Sleep(time.Microsecond * 1) + job.after = job.before + return nil +} + +func (job *queueJob) Err() error { + return job.err +} + +func TestQueueWorkerPool1(t *testing.T) { + pool := NewQueueWorkerPool(47, 1000) + + for i := 0; i < 1000; i++ { + job := &queueJob{ + before: i, + after: -1, + } + pool.Put(job) + } + + for idx, job := range pool.Waiter() { + ret := (<-job).(*queueJob) + require.Equal(t, ret.after, idx) + } +} + +func TestQueueWorkerPool2(t *testing.T) { + pool := NewQueueWorkerPool(47, 2000) + + for i := 0; i < 2000; i++ { + job := &queueJob{ + before: i, + after: -1, + } + pool.Put(job) + } + + for idx, _job := range pool.Waiter() { + job := <-_job + ret := job.(*queueJob) + if job.Err() != nil { + require.Equal(t, ret.before, 1500) + break + } + require.Equal(t, ret.after, idx) + } +} + +func TestWorkerPool1(t *testing.T) { + pool := NewWorkerPool(20, 50) + + for i := 0; i < 50; i++ { + pool.Put(func() error { + time.Sleep(time.Millisecond * 10) + return nil + }) + } + + require.Nil(t, <-pool.Waiter()) + require.Nil(t, <-pool.Err()) +} + +func TestWorkerPool2(t *testing.T) { + pool := NewWorkerPool(2, 2) + + pool.Put(func() error { + time.Sleep(time.Millisecond * 20) + return fmt.Errorf("Job error") + }) + + time.Sleep(time.Millisecond * 10) + + pool.Put(func() error { + time.Sleep(time.Millisecond * 30) + return nil + }) + + require.NotNil(t, <-pool.Waiter()) + require.Nil(t, <-pool.Err()) +} + +func TestWorkerPool3(t *testing.T) { + pool := NewWorkerPool(20, 5000) + + for i := 0; i < 5000; i++ { + pool.Put(func() error { + time.Sleep(time.Millisecond * 1) + return fmt.Errorf("Job error") + }) + } + + require.NotNil(t, <-pool.Waiter()) + require.Nil(t, <-pool.Err()) +} + +func TestWorkerPool4(t *testing.T) { + pool := NewWorkerPool(100, 50) + + for i := 0; i < 50; i++ { + pool.Put(func() error { + time.Sleep(time.Millisecond * 10) + return nil + }) + } + + require.Nil(t, <-pool.Waiter()) + require.Nil(t, <-pool.Err()) +} + +func TestWorkerPool5(t *testing.T) { + pool := NewWorkerPool(20, 51) + + for i := 0; i < 50; i++ { + pool.Put(func() error { + time.Sleep(time.Millisecond * 10) + return fmt.Errorf("Job error") + }) + } + + pool.Put(func() error { + time.Sleep(time.Second * 10) + return nil + }) + + require.NotNil(t, <-pool.Waiter()) + require.Nil(t, <-pool.Err()) +} diff --git a/contrib/nydusify/pkg/viewer/viewer.go b/contrib/nydusify/pkg/viewer/viewer.go index 5f28b394864..7eb2086acc6 100644 --- a/contrib/nydusify/pkg/viewer/viewer.go +++ b/contrib/nydusify/pkg/viewer/viewer.go @@ -1,211 +1,211 @@ -package viewer - -import ( - "context" - "encoding/json" - "os" - "os/signal" - "path/filepath" - "syscall" - - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/checker/tool" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/parser" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/provider" - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" -) - -func prettyDump(obj interface{}, name string) error { - bytes, err := json.MarshalIndent(obj, "", " ") - if err != nil { - return err - } - return os.WriteFile(name, bytes, 0644) -} - -// Opt defines fsViewer options, Target is the Nydus image reference -type Opt struct { - WorkDir string - Target string - TargetInsecure bool - - MountPath string - NydusdPath string - BackendType string - BackendConfig string - ExpectedArch string - FsVersion string -} - -// fsViewer provides complete view of file system in nydus image -type FsViewer struct { - Opt - Parser *parser.Parser - NydusdConfig tool.NydusdConfig -} - -// New creates fsViewer instance, Target is the Nydus image reference -func New(opt Opt) (*FsViewer, error) { - if opt.Target == "" { - return nil, errors.Errorf("missing target image reference, please add option '--target reference'") - } - targetRemote, err := provider.DefaultRemote(opt.Target, opt.TargetInsecure) - if err != nil { - return nil, errors.Wrap(err, "failed to create image provider") - } - targetParser, err := parser.New(targetRemote, opt.ExpectedArch) - if targetParser == nil { - return nil, errors.Wrap(err, "failed to create image reference parser") - } - - mode := "cached" - - nydusdConfig := tool.NydusdConfig{ - NydusdPath: opt.NydusdPath, - BackendType: opt.BackendType, - BackendConfig: opt.BackendConfig, - BootstrapPath: filepath.Join(opt.WorkDir, "nydus_bootstrap"), - ConfigPath: filepath.Join(opt.WorkDir, "fs/nydusd_config.json"), - BlobCacheDir: filepath.Join(opt.WorkDir, "fs/nydus_blobs"), - MountPath: opt.MountPath, - APISockPath: filepath.Join(opt.WorkDir, "fs/nydus_api.sock"), - Mode: mode, - } - - fsViewer := &FsViewer{ - Opt: opt, - Parser: targetParser, - NydusdConfig: nydusdConfig, - } - - return fsViewer, nil -} - -// Pull Bootstrap, includes nydus_manifest.json and nydus_config.json -func (fsViewer *FsViewer) PullBootstrap(ctx context.Context, targetParsed *parser.Parsed) error { - if err := os.RemoveAll(fsViewer.WorkDir); err != nil { - return errors.Wrap(err, "failed to clean up working directory") - } - - if err := os.MkdirAll(filepath.Join(fsViewer.WorkDir, "fs"), 0750); err != nil { - return errors.Wrap(err, "can't create working directory") - } - - if targetParsed.NydusImage != nil { - if err := prettyDump( - targetParsed.NydusImage.Manifest, - filepath.Join(fsViewer.WorkDir, "nydus_manifest.json"), - ); err != nil { - return errors.Wrap(err, "output Nydus manifest file") - } - if err := prettyDump( - targetParsed.NydusImage.Config, - filepath.Join(fsViewer.WorkDir, "nydus_config.json"), - ); err != nil { - return errors.Wrap(err, "output Nydus config file") - } - - target := filepath.Join(fsViewer.WorkDir, "nydus_bootstrap") - logrus.Infof("Pulling Nydus bootstrap to %s", target) - bootstrapReader, err := fsViewer.Parser.PullNydusBootstrap(ctx, targetParsed.NydusImage) - if err != nil { - return errors.Wrap(err, "failed to pull Nydus bootstrap layer") - } - defer bootstrapReader.Close() - - if err := utils.UnpackFile(bootstrapReader, utils.BootstrapFileNameInLayer, target); err != nil { - return errors.Wrap(err, "failed to unpack Nydus bootstrap layer") - } - } - - return nil -} - -// Mount nydus image. -func (fsViewer *FsViewer) MountImage() error { - logrus.Infof("Mounting Nydus image to %s", fsViewer.NydusdConfig.MountPath) - - if err := os.MkdirAll(fsViewer.NydusdConfig.BlobCacheDir, 0750); err != nil { - return errors.Wrap(err, "can't create blob cache directory for Nydusd") - } - - if err := os.MkdirAll(fsViewer.NydusdConfig.MountPath, 0750); err != nil { - return errors.Wrap(err, "can't create mountpoint directory of Nydus image") - } - - nydusd, err := tool.NewNydusd(fsViewer.NydusdConfig) - if err != nil { - return errors.Wrap(err, "can't create Nydusd daemon") - } - - if err := nydusd.Mount(); err != nil { - return errors.Wrap(err, "failed to mount Nydus image") - } - - return nil -} - -// View provides the structure of the file system in target nydus image -// It includes two steps, pull the boostrap of the image, and mount the -// image under specified path. -func (fsViewer *FsViewer) View(ctx context.Context) error { - if err := fsViewer.view(ctx); err != nil { - if utils.RetryWithHTTP(err) { - fsViewer.Parser.Remote.MaybeWithHTTP(err) - return fsViewer.view(ctx) - } - return err - - } - return nil -} - -func (fsViewer *FsViewer) view(ctx context.Context) error { - // Pull bootstrap - targetParsed, err := fsViewer.Parser.Parse(ctx) - if err != nil { - return errors.Wrap(err, "failed to parse image reference") - } - err = fsViewer.PullBootstrap(ctx, targetParsed) - if err != nil { - return errors.Wrap(err, "failed to pull Nydus image bootstrap") - } - - // Adjust nydusd parameters(DigestValidate) according to rafs format - nydusManifest := parser.FindNydusBootstrapDesc(&targetParsed.NydusImage.Manifest) - if nydusManifest != nil { - v := utils.GetNydusFsVersionOrDefault(nydusManifest.Annotations, utils.V5) - if v == utils.V5 { - // Digest validate is not currently supported for v6, - // but v5 supports it. In order to make the check more sufficient, - // this validate needs to be turned on for v5. - fsViewer.NydusdConfig.DigestValidate = true - } - } - - err = fsViewer.MountImage() - if err != nil { - return err - } - - // Block current goroutine in order to umount the file system and clean up workdir - sigs := make(chan os.Signal, 1) - done := make(chan bool, 1) - signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM) - - go func() { - sig := <-sigs - logrus.Infof("Received Signal: %s", sig) - done <- true - }() - - logrus.Infof("Please send signal SIGINT/SIGTERM to umount the file system") - <-done - if err := os.RemoveAll(fsViewer.WorkDir); err != nil { - return errors.Wrap(err, "failed to clean up working directory") - } - - return nil -} +package viewer + +import ( + "context" + "encoding/json" + "os" + "os/signal" + "path/filepath" + "syscall" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/checker/tool" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/parser" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/provider" + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/utils" +) + +func prettyDump(obj interface{}, name string) error { + bytes, err := json.MarshalIndent(obj, "", " ") + if err != nil { + return err + } + return os.WriteFile(name, bytes, 0644) +} + +// Opt defines fsViewer options, Target is the Nydus image reference +type Opt struct { + WorkDir string + Target string + TargetInsecure bool + + MountPath string + NydusdPath string + BackendType string + BackendConfig string + ExpectedArch string + FsVersion string +} + +// fsViewer provides complete view of file system in nydus image +type FsViewer struct { + Opt + Parser *parser.Parser + NydusdConfig tool.NydusdConfig +} + +// New creates fsViewer instance, Target is the Nydus image reference +func New(opt Opt) (*FsViewer, error) { + if opt.Target == "" { + return nil, errors.Errorf("missing target image reference, please add option '--target reference'") + } + targetRemote, err := provider.DefaultRemote(opt.Target, opt.TargetInsecure) + if err != nil { + return nil, errors.Wrap(err, "failed to create image provider") + } + targetParser, err := parser.New(targetRemote, opt.ExpectedArch) + if targetParser == nil { + return nil, errors.Wrap(err, "failed to create image reference parser") + } + + mode := "cached" + + nydusdConfig := tool.NydusdConfig{ + NydusdPath: opt.NydusdPath, + BackendType: opt.BackendType, + BackendConfig: opt.BackendConfig, + BootstrapPath: filepath.Join(opt.WorkDir, "nydus_bootstrap"), + ConfigPath: filepath.Join(opt.WorkDir, "fs/nydusd_config.json"), + BlobCacheDir: filepath.Join(opt.WorkDir, "fs/nydus_blobs"), + MountPath: opt.MountPath, + APISockPath: filepath.Join(opt.WorkDir, "fs/nydus_api.sock"), + Mode: mode, + } + + fsViewer := &FsViewer{ + Opt: opt, + Parser: targetParser, + NydusdConfig: nydusdConfig, + } + + return fsViewer, nil +} + +// Pull Bootstrap, includes nydus_manifest.json and nydus_config.json +func (fsViewer *FsViewer) PullBootstrap(ctx context.Context, targetParsed *parser.Parsed) error { + if err := os.RemoveAll(fsViewer.WorkDir); err != nil { + return errors.Wrap(err, "failed to clean up working directory") + } + + if err := os.MkdirAll(filepath.Join(fsViewer.WorkDir, "fs"), 0750); err != nil { + return errors.Wrap(err, "can't create working directory") + } + + if targetParsed.NydusImage != nil { + if err := prettyDump( + targetParsed.NydusImage.Manifest, + filepath.Join(fsViewer.WorkDir, "nydus_manifest.json"), + ); err != nil { + return errors.Wrap(err, "output Nydus manifest file") + } + if err := prettyDump( + targetParsed.NydusImage.Config, + filepath.Join(fsViewer.WorkDir, "nydus_config.json"), + ); err != nil { + return errors.Wrap(err, "output Nydus config file") + } + + target := filepath.Join(fsViewer.WorkDir, "nydus_bootstrap") + logrus.Infof("Pulling Nydus bootstrap to %s", target) + bootstrapReader, err := fsViewer.Parser.PullNydusBootstrap(ctx, targetParsed.NydusImage) + if err != nil { + return errors.Wrap(err, "failed to pull Nydus bootstrap layer") + } + defer bootstrapReader.Close() + + if err := utils.UnpackFile(bootstrapReader, utils.BootstrapFileNameInLayer, target); err != nil { + return errors.Wrap(err, "failed to unpack Nydus bootstrap layer") + } + } + + return nil +} + +// Mount nydus image. +func (fsViewer *FsViewer) MountImage() error { + logrus.Infof("Mounting Nydus image to %s", fsViewer.NydusdConfig.MountPath) + + if err := os.MkdirAll(fsViewer.NydusdConfig.BlobCacheDir, 0750); err != nil { + return errors.Wrap(err, "can't create blob cache directory for Nydusd") + } + + if err := os.MkdirAll(fsViewer.NydusdConfig.MountPath, 0750); err != nil { + return errors.Wrap(err, "can't create mountpoint directory of Nydus image") + } + + nydusd, err := tool.NewNydusd(fsViewer.NydusdConfig) + if err != nil { + return errors.Wrap(err, "can't create Nydusd daemon") + } + + if err := nydusd.Mount(); err != nil { + return errors.Wrap(err, "failed to mount Nydus image") + } + + return nil +} + +// View provides the structure of the file system in target nydus image +// It includes two steps, pull the boostrap of the image, and mount the +// image under specified path. +func (fsViewer *FsViewer) View(ctx context.Context) error { + if err := fsViewer.view(ctx); err != nil { + if utils.RetryWithHTTP(err) { + fsViewer.Parser.Remote.MaybeWithHTTP(err) + return fsViewer.view(ctx) + } + return err + + } + return nil +} + +func (fsViewer *FsViewer) view(ctx context.Context) error { + // Pull bootstrap + targetParsed, err := fsViewer.Parser.Parse(ctx) + if err != nil { + return errors.Wrap(err, "failed to parse image reference") + } + err = fsViewer.PullBootstrap(ctx, targetParsed) + if err != nil { + return errors.Wrap(err, "failed to pull Nydus image bootstrap") + } + + // Adjust nydusd parameters(DigestValidate) according to rafs format + nydusManifest := parser.FindNydusBootstrapDesc(&targetParsed.NydusImage.Manifest) + if nydusManifest != nil { + v := utils.GetNydusFsVersionOrDefault(nydusManifest.Annotations, utils.V5) + if v == utils.V5 { + // Digest validate is not currently supported for v6, + // but v5 supports it. In order to make the check more sufficient, + // this validate needs to be turned on for v5. + fsViewer.NydusdConfig.DigestValidate = true + } + } + + err = fsViewer.MountImage() + if err != nil { + return err + } + + // Block current goroutine in order to umount the file system and clean up workdir + sigs := make(chan os.Signal, 1) + done := make(chan bool, 1) + signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM) + + go func() { + sig := <-sigs + logrus.Infof("Received Signal: %s", sig) + done <- true + }() + + logrus.Infof("Please send signal SIGINT/SIGTERM to umount the file system") + <-done + if err := os.RemoveAll(fsViewer.WorkDir); err != nil { + return errors.Wrap(err, "failed to clean up working directory") + } + + return nil +} diff --git a/contrib/nydusify/plugin/main.go b/contrib/nydusify/plugin/main.go index 6dc0547917f..c18dd019bf0 100644 --- a/contrib/nydusify/plugin/main.go +++ b/contrib/nydusify/plugin/main.go @@ -1,20 +1,20 @@ -package main - -import ( - "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/hook" -) - -type LocalHook struct { -} - -func (h *LocalHook) BeforePushManifest(_ *hook.Info) error { - return nil -} - -func (h *LocalHook) AfterPushManifest(_ *hook.Info) error { - return nil -} - -func main() { - hook.NewPlugin(&LocalHook{}) -} +package main + +import ( + "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/hook" +) + +type LocalHook struct { +} + +func (h *LocalHook) BeforePushManifest(_ *hook.Info) error { + return nil +} + +func (h *LocalHook) AfterPushManifest(_ *hook.Info) error { + return nil +} + +func main() { + hook.NewPlugin(&LocalHook{}) +} diff --git a/deny.toml b/deny.toml index 48c7f660a6c..6537b673c35 100644 --- a/deny.toml +++ b/deny.toml @@ -1,197 +1,197 @@ -# This template contains all of the possible sections and their default values - -# Note that all fields that take a lint level have these possible values: -# * deny - An error will be produced and the check will fail -# * warn - A warning will be produced, but the check will not fail -# * allow - No warning or error will be produced, though in some cases a note -# will be - -# The values provided in this template are the default values that will be used -# when any section or field is not specified in your own configuration - -# If 1 or more target triples (and optionally, target_features) are specified, -# only the specified targets will be checked when running `cargo deny check`. -# This means, if a particular package is only ever used as a target specific -# dependency, such as, for example, the `nix` crate only being used via the -# `target_family = "unix"` configuration, that only having windows targets in -# this list would mean the nix crate, as well as any of its exclusive -# dependencies not shared by any other crates, would be ignored, as the target -# list here is effectively saying which targets you are building for. -targets = [ - # The triple can be any string, but only the target triples built in to - # rustc (as of 1.40) can be checked against actual config expressions - #{ triple = "x86_64-unknown-linux-musl" }, - # You can also specify which target_features you promise are enabled for a - # particular target. target_features are currently not validated against - # the actual valid features supported by the target architecture. - #{ triple = "wasm32-unknown-unknown", features = ["atomics"] }, -] - -# This section is considered when running `cargo deny check advisories` -# More documentation for the advisories section can be found here: -# https://embarkstudios.github.io/cargo-deny/checks/advisories/cfg.html -[advisories] -# The path where the advisory database is cloned/fetched into -db-path = "~/.cargo/advisory-db" -# The url(s) of the advisory databases to use -db-urls = ["https://github.com/rustsec/advisory-db"] -# The lint level for security vulnerabilities -vulnerability = "deny" -# The lint level for unmaintained crates -unmaintained = "warn" -# The lint level for crates that have been yanked from their source registry -yanked = "warn" -# The lint level for crates with security notices. Note that as of -# 2019-12-17 there are no security notice advisories in -# https://github.com/rustsec/advisory-db -notice = "warn" -# A list of advisory IDs to ignore. Note that ignored advisories will still -# output a note when they are encountered. -ignore = [ -] -# Threshold for security vulnerabilities, any vulnerability with a CVSS score -# lower than the range specified will be ignored. Note that ignored advisories -# will still output a note when they are encountered. -# * None - CVSS Score 0.0 -# * Low - CVSS Score 0.1 - 3.9 -# * Medium - CVSS Score 4.0 - 6.9 -# * High - CVSS Score 7.0 - 8.9 -# * Critical - CVSS Score 9.0 - 10.0 -#severity-threshold = - -# This section is considered when running `cargo deny check licenses` -# More documentation for the licenses section can be found here: -# https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html -[licenses] -# The lint level for crates which do not have a detectable license -unlicensed = "deny" -# List of explictly allowed licenses -# See https://spdx.org/licenses/ for list of possible licenses -# [possible values: any SPDX 3.11 short identifier (+ optional exception)]. -allow = [ - "MIT", - "Apache-2.0", - "BSD-3-Clause", - "BSD-2-Clause", - "CC0-1.0", - "Unicode-DFS-2016", -] -# List of explictly disallowed licenses -# See https://spdx.org/licenses/ for list of possible licenses -# [possible values: any SPDX 3.11 short identifier (+ optional exception)]. -deny = [ - #"Nokia", -] -# Lint level for licenses considered copyleft -copyleft = "deny" -# Blanket approval or denial for OSI-approved or FSF Free/Libre licenses -# * both - The license will be approved if it is both OSI-approved *AND* FSF -# * either - The license will be approved if it is either OSI-approved *OR* FSF -# * osi-only - The license will be approved if is OSI-approved *AND NOT* FSF -# * fsf-only - The license will be approved if is FSF *AND NOT* OSI-approved -# * neither - This predicate is ignored and the default lint level is used -allow-osi-fsf-free = "neither" -# Lint level used when no other predicates are matched -# 1. License isn't in the allow or deny lists -# 2. License isn't copyleft -# 3. License isn't OSI/FSF, or allow-osi-fsf-free = "neither" -default = "deny" -# The confidence threshold for detecting a license from license text. -# The higher the value, the more closely the license text must be to the -# canonical license text of a valid SPDX license file. -# [possible values: any between 0.0 and 1.0]. -confidence-threshold = 0.8 -# Allow 1 or more licenses on a per-crate basis, so that particular licenses -# aren't accepted for every possible crate as with the normal allow list -exceptions = [ - # Each entry is the crate and version constraint, and its specific allow - # list - #{ allow = ["Zlib"], name = "adler32", version = "*" }, -] - -# Some crates don't have (easily) machine readable licensing information, -# adding a clarification entry for it allows you to manually specify the -# licensing information -#[[licenses.clarify]] -# The name of the crate the clarification applies to -#name = "ring" -# The optional version constraint for the crate -#version = "*" -# The SPDX expression for the license requirements of the crate -#expression = "MIT AND ISC AND OpenSSL" -# One or more files in the crate's source used as the "source of truth" for -# the license expression. If the contents match, the clarification will be used -# when running the license check, otherwise the clarification will be ignored -# and the crate will be checked normally, which may produce warnings or errors -# depending on the rest of your configuration -#license-files = [ - # Each entry is a crate relative path, and the (opaque) hash of its contents - #{ path = "LICENSE", hash = 0xbd0eed23 } -#] - -[licenses.private] -# If true, ignores workspace crates that aren't published, or are only -# published to private registries -ignore = false -# One or more private registries that you might publish crates to, if a crate -# is only published to private registries, and ignore is true, the crate will -# not have its license(s) checked -registries = [ - #"https://sekretz.com/registry -] - -# This section is considered when running `cargo deny check bans`. -# More documentation about the 'bans' section can be found here: -# https://embarkstudios.github.io/cargo-deny/checks/bans/cfg.html -[bans] -# Lint level for when multiple versions of the same crate are detected -multiple-versions = "warn" -# Lint level for when a crate version requirement is `*` -wildcards = "allow" -# The graph highlighting used when creating dotgraphs for crates -# with multiple versions -# * lowest-version - The path to the lowest versioned duplicate is highlighted -# * simplest-path - The path to the version with the fewest edges is highlighted -# * all - Both lowest-version and simplest-path are used -highlight = "all" -# List of crates that are allowed. Use with care! -allow = [ - #{ name = "ansi_term", version = "=0.11.0" }, -] -# List of crates to deny -deny = [ - # Each entry the name of a crate and a version range. If version is - # not specified, all versions will be matched. - #{ name = "ansi_term", version = "=0.11.0" }, - # - # Wrapper crates can optionally be specified to allow the crate when it - # is a direct dependency of the otherwise banned crate - #{ name = "ansi_term", version = "=0.11.0", wrappers = [] }, -] -# Certain crates/versions that will be skipped when doing duplicate detection. -skip = [ - #{ name = "ansi_term", version = "=0.11.0" }, -] -# Similarly to `skip` allows you to skip certain crates during duplicate -# detection. Unlike skip, it also includes the entire tree of transitive -# dependencies starting at the specified crate, up to a certain depth, which is -# by default infinite -skip-tree = [ - #{ name = "ansi_term", version = "=0.11.0", depth = 20 }, -] - -# This section is considered when running `cargo deny check sources`. -# More documentation about the 'sources' section can be found here: -# https://embarkstudios.github.io/cargo-deny/checks/sources/cfg.html -[sources] -# Lint level for what to happen when a crate from a crate registry that is not -# in the allow list is encountered -unknown-registry = "warn" -# Lint level for what to happen when a crate from a git repository that is not -# in the allow list is encountered -unknown-git = "warn" -# List of URLs for allowed crate registries. Defaults to the crates.io index -# if not specified. If it is specified but empty, no registries are allowed. -allow-registry = ["https://github.com/rust-lang/crates.io-index"] -# List of URLs for allowed Git repositories -#allow-git = [ ] +# This template contains all of the possible sections and their default values + +# Note that all fields that take a lint level have these possible values: +# * deny - An error will be produced and the check will fail +# * warn - A warning will be produced, but the check will not fail +# * allow - No warning or error will be produced, though in some cases a note +# will be + +# The values provided in this template are the default values that will be used +# when any section or field is not specified in your own configuration + +# If 1 or more target triples (and optionally, target_features) are specified, +# only the specified targets will be checked when running `cargo deny check`. +# This means, if a particular package is only ever used as a target specific +# dependency, such as, for example, the `nix` crate only being used via the +# `target_family = "unix"` configuration, that only having windows targets in +# this list would mean the nix crate, as well as any of its exclusive +# dependencies not shared by any other crates, would be ignored, as the target +# list here is effectively saying which targets you are building for. +targets = [ + # The triple can be any string, but only the target triples built in to + # rustc (as of 1.40) can be checked against actual config expressions + #{ triple = "x86_64-unknown-linux-musl" }, + # You can also specify which target_features you promise are enabled for a + # particular target. target_features are currently not validated against + # the actual valid features supported by the target architecture. + #{ triple = "wasm32-unknown-unknown", features = ["atomics"] }, +] + +# This section is considered when running `cargo deny check advisories` +# More documentation for the advisories section can be found here: +# https://embarkstudios.github.io/cargo-deny/checks/advisories/cfg.html +[advisories] +# The path where the advisory database is cloned/fetched into +db-path = "~/.cargo/advisory-db" +# The url(s) of the advisory databases to use +db-urls = ["https://github.com/rustsec/advisory-db"] +# The lint level for security vulnerabilities +vulnerability = "deny" +# The lint level for unmaintained crates +unmaintained = "warn" +# The lint level for crates that have been yanked from their source registry +yanked = "warn" +# The lint level for crates with security notices. Note that as of +# 2019-12-17 there are no security notice advisories in +# https://github.com/rustsec/advisory-db +notice = "warn" +# A list of advisory IDs to ignore. Note that ignored advisories will still +# output a note when they are encountered. +ignore = [ +] +# Threshold for security vulnerabilities, any vulnerability with a CVSS score +# lower than the range specified will be ignored. Note that ignored advisories +# will still output a note when they are encountered. +# * None - CVSS Score 0.0 +# * Low - CVSS Score 0.1 - 3.9 +# * Medium - CVSS Score 4.0 - 6.9 +# * High - CVSS Score 7.0 - 8.9 +# * Critical - CVSS Score 9.0 - 10.0 +#severity-threshold = + +# This section is considered when running `cargo deny check licenses` +# More documentation for the licenses section can be found here: +# https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html +[licenses] +# The lint level for crates which do not have a detectable license +unlicensed = "deny" +# List of explictly allowed licenses +# See https://spdx.org/licenses/ for list of possible licenses +# [possible values: any SPDX 3.11 short identifier (+ optional exception)]. +allow = [ + "MIT", + "Apache-2.0", + "BSD-3-Clause", + "BSD-2-Clause", + "CC0-1.0", + "Unicode-DFS-2016", +] +# List of explictly disallowed licenses +# See https://spdx.org/licenses/ for list of possible licenses +# [possible values: any SPDX 3.11 short identifier (+ optional exception)]. +deny = [ + #"Nokia", +] +# Lint level for licenses considered copyleft +copyleft = "deny" +# Blanket approval or denial for OSI-approved or FSF Free/Libre licenses +# * both - The license will be approved if it is both OSI-approved *AND* FSF +# * either - The license will be approved if it is either OSI-approved *OR* FSF +# * osi-only - The license will be approved if is OSI-approved *AND NOT* FSF +# * fsf-only - The license will be approved if is FSF *AND NOT* OSI-approved +# * neither - This predicate is ignored and the default lint level is used +allow-osi-fsf-free = "neither" +# Lint level used when no other predicates are matched +# 1. License isn't in the allow or deny lists +# 2. License isn't copyleft +# 3. License isn't OSI/FSF, or allow-osi-fsf-free = "neither" +default = "deny" +# The confidence threshold for detecting a license from license text. +# The higher the value, the more closely the license text must be to the +# canonical license text of a valid SPDX license file. +# [possible values: any between 0.0 and 1.0]. +confidence-threshold = 0.8 +# Allow 1 or more licenses on a per-crate basis, so that particular licenses +# aren't accepted for every possible crate as with the normal allow list +exceptions = [ + # Each entry is the crate and version constraint, and its specific allow + # list + #{ allow = ["Zlib"], name = "adler32", version = "*" }, +] + +# Some crates don't have (easily) machine readable licensing information, +# adding a clarification entry for it allows you to manually specify the +# licensing information +#[[licenses.clarify]] +# The name of the crate the clarification applies to +#name = "ring" +# The optional version constraint for the crate +#version = "*" +# The SPDX expression for the license requirements of the crate +#expression = "MIT AND ISC AND OpenSSL" +# One or more files in the crate's source used as the "source of truth" for +# the license expression. If the contents match, the clarification will be used +# when running the license check, otherwise the clarification will be ignored +# and the crate will be checked normally, which may produce warnings or errors +# depending on the rest of your configuration +#license-files = [ + # Each entry is a crate relative path, and the (opaque) hash of its contents + #{ path = "LICENSE", hash = 0xbd0eed23 } +#] + +[licenses.private] +# If true, ignores workspace crates that aren't published, or are only +# published to private registries +ignore = false +# One or more private registries that you might publish crates to, if a crate +# is only published to private registries, and ignore is true, the crate will +# not have its license(s) checked +registries = [ + #"https://sekretz.com/registry +] + +# This section is considered when running `cargo deny check bans`. +# More documentation about the 'bans' section can be found here: +# https://embarkstudios.github.io/cargo-deny/checks/bans/cfg.html +[bans] +# Lint level for when multiple versions of the same crate are detected +multiple-versions = "warn" +# Lint level for when a crate version requirement is `*` +wildcards = "allow" +# The graph highlighting used when creating dotgraphs for crates +# with multiple versions +# * lowest-version - The path to the lowest versioned duplicate is highlighted +# * simplest-path - The path to the version with the fewest edges is highlighted +# * all - Both lowest-version and simplest-path are used +highlight = "all" +# List of crates that are allowed. Use with care! +allow = [ + #{ name = "ansi_term", version = "=0.11.0" }, +] +# List of crates to deny +deny = [ + # Each entry the name of a crate and a version range. If version is + # not specified, all versions will be matched. + #{ name = "ansi_term", version = "=0.11.0" }, + # + # Wrapper crates can optionally be specified to allow the crate when it + # is a direct dependency of the otherwise banned crate + #{ name = "ansi_term", version = "=0.11.0", wrappers = [] }, +] +# Certain crates/versions that will be skipped when doing duplicate detection. +skip = [ + #{ name = "ansi_term", version = "=0.11.0" }, +] +# Similarly to `skip` allows you to skip certain crates during duplicate +# detection. Unlike skip, it also includes the entire tree of transitive +# dependencies starting at the specified crate, up to a certain depth, which is +# by default infinite +skip-tree = [ + #{ name = "ansi_term", version = "=0.11.0", depth = 20 }, +] + +# This section is considered when running `cargo deny check sources`. +# More documentation about the 'sources' section can be found here: +# https://embarkstudios.github.io/cargo-deny/checks/sources/cfg.html +[sources] +# Lint level for what to happen when a crate from a crate registry that is not +# in the allow list is encountered +unknown-registry = "warn" +# Lint level for what to happen when a crate from a git repository that is not +# in the allow list is encountered +unknown-git = "warn" +# List of URLs for allowed crate registries. Defaults to the crates.io index +# if not specified. If it is specified but empty, no registries are allowed. +allow-registry = ["https://github.com/rust-lang/crates.io-index"] +# List of URLs for allowed Git repositories +#allow-git = [ ] diff --git a/docs/chunk-deduplication.md b/docs/chunk-deduplication.md index a33472d060c..0bfc8462979 100644 --- a/docs/chunk-deduplication.md +++ b/docs/chunk-deduplication.md @@ -1,153 +1,153 @@ -# Chunk-Level Deduplication: Storage Optimization for Nydus Images - -## Probntroduction - -In container images, there are often a large number of duplicate files or content, and these duplicate parts occupy a large amount of storage space, especially in high-density deployment scenarios. As the number of Nydus images grows, it will bring many problems such as low storage space utilization and excessive consumption of bandwidth resources. To do this, an effective deduplication mechanism (deduplication) needs to be designed to solve this problem. - -Unlike traditional OCI, which distributes images at a layer-granular level, the smallest unit of a Nydus image is a chunk, so the deduplication algorithm needs to be deduplicated in chunk units. At the same time, we want to deduplicate multiple aspects of the Nydus image, including between Nydus images and between different versions of the same Nydus image. No matter which deduplication method is essentially to deduplicate the repeated chunks in the image, only one duplicate chunk is retained, and the reference to the chunk is used instead of other duplicate chunks to reduce the storage space occupation, so as to maximize the data transmission and storage capabilities of Nydus and improve the access speed and efficiency of the image. - -## General idea - -The deduplication algorithm first needs to select the duplicate chunk in the image according to the image information such as the number of occurrences of chunk, chunk size, chunk image to which the chunk belongs and the corresponding version, and generate chunkdict, chunkdict records the unique identifier or fingerprint of chunk, only need to store chunkdict, other images can refer to chunk in chunkdict by reference. - -The deduplication algorithm is divided into two parts, the first part is the DBSCAN clustering algorithm, which deduplicates different images; The second part is the exponential smoothing algorithm, which deduplicates different versions within the image. - -**The general process is as follows:** - -1. Store the image information to the local database, -2. Extract the image information and call the DBSCAN clustering algorithm to deduplicate different images. -3. Deduplicate the dictionary content in 2, and call the exponential smoothing algorithm for each image separately for image version deduplication. -4. Get the deduplication dictionary generated by running the two algorithms and drop the disk. -5. Generate a chunkdict image and push it to the remote repository - -## Algorithm detailed process - -### Overall Input - -```shell -nydusify chunkdict generate --sources \ - registry.com/redis:nydus_7.0.1, \ - registry.com/redis:nydus_7.0.2, \ - registry.com/redis:nydus_7.0.3 \ - -- target registry.com/redis:nydus_chunkdict \ - --source-insecure --target-insecure - # Optional - --backend-config-file /path/to/backend-config.json \ - --backend-type oss -``` - -## Use the chunk dict image to reduce the incremental size of the new image - -``` -nydusify convert - --source registry.com/redis:OCI_7.0.4 \ - --target registry.com/redis:nydus_7.0.4 \ - --chunk-dict registry.com/redis:nydus_chunkdict -``` - -*** -`nydusify chunkdict generate` calls subcommand `nydus-image chunkdict generate` to store image information into the database and generate a new bootstrap as chunkdict bootstrap. - -Download multiple Nydus images in advance and put them into the repository as datasets, such as selecting 10 consecutive versions of redis and alpine as the image dataset, and execute the command `nydus-image chunkdict generate` to store the information of the chunk and blob in the chunk and blob table of the database. - -```shell -# Deposit multiple images into the database -nydus-image chunkdict generate --source \ - /path/localhost:5000:redis:nydus_7.0.1/nydus_bootstrap, \ - /path/localhost:5000:redis:nydus_7.0.2/nydus_bootstrap, \ - /path/localhost:5000:redis:nydus_7.0.3/nydus_bootstrap \ - --bootstrap /path/to/chunkdict_bootstrap\ - --database /path/to/database.db\ - --output-json /path/to/nydus_bootstrap_output.json -``` - -*** - -### Deduplication algorithm - -#### Algorithm 1 Deduplication between different images (DBSCAN clustering algorithm) - -*** -**Basic principle:** DBSCAN is a density-based clustering algorithm, which mainly investigates the connectivity between samples through sample density, samples of the same category, they are closely connected, in other words, there must be samples of the same category not far around any sample of the category. Therefore, it can group a group of objects with high density and close distance, can find clusters of arbitrary shapes, and does not need to specify the number of clusters in advance, which is suitable for high-density deployment scenarios. - -**Input:** Read the chunk information in the database and store it in the chunk list. Chunk information includes:image_name, version, chunk_blob_id, chunk_digest, chunk_compressed_size, and so on. - -**Output:** The chunk dictionary corresponding to each image cluster - -**Basic steps:** -**1.** Select a part of the version as the training set and the rest as the test set according to a certain proportion of all images. - -**2.** Divide all chunks in the training set into a new list according to the image_name, and each list corresponds to an image and all chunk sets in the image. - -**3.** These images are done using the DBSCAN (Density-Based Spatial Clustering of Applications with Noise) algorithm -Clustering. - -*** -3.1 Initialize the core point collection $Omega$ as an empty set,and set the clustering algorithm radius $gamma = 0.5$, and the sample number threshold $MinPts = 10$ - -3.2 Loop through each image and its corresponding chunk list,and calculate its distance from other images according to the following formula. -$$ distance (x,y)= \frac{\lvert C(R_x) \cup C(R_y) \rvert - \lvert C(R_x) \cap C(R_y) \rvert}{\lvert C(R_x) \cup C(R_y) \rvert }$$ -where $C(R_x)$ represents the unique chunk set of all training set images in the image. Calculate the number of images based on $distance(x,y) \leq \gamma$,If there are M y, such that $distance(x,y) \leq \gamma$, where $M \geq MinPts$, then add the imagex to the core point set, and image y is called the image in the neighborhood of the core image x; - -3.3 Initialize the number of cluster classes k=0, and then iterate the core point warehouse collection in turn, and add all the neighboring warehouses in the core point warehouse to the queue, if a warehouse in the neighborhood is also a core warehouse, all warehouses in its neighborhood join the queue, classify the warehouses in the above queue into a cluster class, and continue to traverse the core warehouse collection until all core warehouses are traversed. - -3.4 Calculate the frequency of chunks that appear in each class image. Add the chunk that appears in the image above $90%$ in the training set to the dictionary corresponding to the cluster class to generate a set of < cluster classes, and the dictionary > pairs. -*** -**4.** Adjust the neighborhood radius size and repeat step 3 to obtain multiple deduplication dictionaries. - -**5.** Use the test set to evaluate multiple deduplication dictionaries in 4, and select the chunk dictionary corresponding to the test set with the smallest storage space. - -**6.** Remove the chunk in the chunk dictionary selected in 5 for all images (training set and test set), and then repeat the operation 1-5 to generate the chunk dictionary until the maximum number of cycles is reached 7, or the discrete image ratio is greater than 80% of the total number of images. - -The principle of DBSCAN algorithm how to divide the cluster is shown in the diagram: -![dbscan algorithm](images/nydus_chunkdict_dbscan_algorithm.png) -**Remark:** This section of the picture and the associated DBSCAN algorithm description are referenced from : [https://en.wikipedia.org/wiki/DBSCAN](https://en.wikipedia.org/wiki/DBSCAN) - -#### Algorithm 2 Deduplication between different versions of the image (exponential smoothing algorithm) - -*** -**Basic principle:** Exponential smoothing algorithm is a method for time series data prediction and smoothing, the basic principle is to weighted average the data, give higher weight to the more recent repeated chunks, and constantly update the smoothing value, so the newer chunk has a greater impact on future forecasts, and the impact of older data will gradually weaken. - -**Input:** The training set and test set after deduplication in algorithm 1. - -**Output:** The chunk dictionary corresponding to each image. - -**Basic steps:** -**1.** Divide all chunks in the training set into a new list according to the image_name, and each list corresponds to an image and all chunk sets in the image. - -**2.** The different versions inside each image are sorted chronologically, and each chunk is scored according to the Exponential Smoothing formula. -$$S_0 =0 ,S_t = \alpha Y_{t-1} +(1- \alpha)S_{t-1} $$ -where, $\alpha=0.5$ , $Y_{t-1}$ indicates whether the chunk appeared in the previous image, 1 if it did, otherwise 0. - -**3.** Count the score for each chunk and select all chunks with a score greater than $THs$ as the chunk dictionary. Deduplicate the image version in the test set and calculate the storage space it occupies. - -**4.** Modify the value of $THs$ from 0.8 to 0.5 in steps of 0.05 and repeat steps 2 and 3 to generate multiple chunk dictionaries. - -**5.** Choose a chunk dictionary that minimizes the test set's storage space. -*** - -### Exponential Smoothing Algorithm Test - -#### Procedure - -**1.** Download 10 versions of each OCI image and count the total size in MB. -**2.** Convert the OCI images to Nydus format and then count the total size in MB after conversion. -**3.** Select three versions of each image to generate a chunk dictionary. Use the chunk dictionary to convert the remaining seven versions of the image, and then count the total size in MB after deduplication. - -#### Image Information Table - -| **Image Name** | **Number of Versions** | **Total Image Size (OCI)** | **Total Image Size (Nydus)** | -| :------------: | :--------------------: | :------------------------: | :--------------------------: | -| **Redis** | 10 | 341.78 MB | 419.37 MB | -| **Ubuntu** | 10 | 290.26 MB | 308.59 MB | -| **Alpine** | 10 | 26.9 MB | 27.55 MB | - -#### Deduplication Results Table - -| **Image Name** | **Chunkdict Image Size** | **Total Image Size (Nydus after Deduplicating)** | **Deduplicating Rate** | -| :------------: | :----------------------: | :----------------------------------------------: | :--------------------: | -| **Redis** | 41.87 MB | 319.48 MB | 23.82% | -| **Ubuntu** | 30.8 MB | 140.28 MB | 54.54% | -| **Alpine** | 2.74 MB | 24.7 MB | 10.34% | - -*** +# Chunk-Level Deduplication: Storage Optimization for Nydus Images + +## Probntroduction + +In container images, there are often a large number of duplicate files or content, and these duplicate parts occupy a large amount of storage space, especially in high-density deployment scenarios. As the number of Nydus images grows, it will bring many problems such as low storage space utilization and excessive consumption of bandwidth resources. To do this, an effective deduplication mechanism (deduplication) needs to be designed to solve this problem. + +Unlike traditional OCI, which distributes images at a layer-granular level, the smallest unit of a Nydus image is a chunk, so the deduplication algorithm needs to be deduplicated in chunk units. At the same time, we want to deduplicate multiple aspects of the Nydus image, including between Nydus images and between different versions of the same Nydus image. No matter which deduplication method is essentially to deduplicate the repeated chunks in the image, only one duplicate chunk is retained, and the reference to the chunk is used instead of other duplicate chunks to reduce the storage space occupation, so as to maximize the data transmission and storage capabilities of Nydus and improve the access speed and efficiency of the image. + +## General idea + +The deduplication algorithm first needs to select the duplicate chunk in the image according to the image information such as the number of occurrences of chunk, chunk size, chunk image to which the chunk belongs and the corresponding version, and generate chunkdict, chunkdict records the unique identifier or fingerprint of chunk, only need to store chunkdict, other images can refer to chunk in chunkdict by reference. + +The deduplication algorithm is divided into two parts, the first part is the DBSCAN clustering algorithm, which deduplicates different images; The second part is the exponential smoothing algorithm, which deduplicates different versions within the image. + +**The general process is as follows:** + +1. Store the image information to the local database, +2. Extract the image information and call the DBSCAN clustering algorithm to deduplicate different images. +3. Deduplicate the dictionary content in 2, and call the exponential smoothing algorithm for each image separately for image version deduplication. +4. Get the deduplication dictionary generated by running the two algorithms and drop the disk. +5. Generate a chunkdict image and push it to the remote repository + +## Algorithm detailed process + +### Overall Input + +```shell +nydusify chunkdict generate --sources \ + registry.com/redis:nydus_7.0.1, \ + registry.com/redis:nydus_7.0.2, \ + registry.com/redis:nydus_7.0.3 \ + -- target registry.com/redis:nydus_chunkdict \ + --source-insecure --target-insecure + # Optional + --backend-config-file /path/to/backend-config.json \ + --backend-type oss +``` + +## Use the chunk dict image to reduce the incremental size of the new image + +``` +nydusify convert + --source registry.com/redis:OCI_7.0.4 \ + --target registry.com/redis:nydus_7.0.4 \ + --chunk-dict registry.com/redis:nydus_chunkdict +``` + +*** +`nydusify chunkdict generate` calls subcommand `nydus-image chunkdict generate` to store image information into the database and generate a new bootstrap as chunkdict bootstrap. + +Download multiple Nydus images in advance and put them into the repository as datasets, such as selecting 10 consecutive versions of redis and alpine as the image dataset, and execute the command `nydus-image chunkdict generate` to store the information of the chunk and blob in the chunk and blob table of the database. + +```shell +# Deposit multiple images into the database +nydus-image chunkdict generate --source \ + /path/localhost:5000:redis:nydus_7.0.1/nydus_bootstrap, \ + /path/localhost:5000:redis:nydus_7.0.2/nydus_bootstrap, \ + /path/localhost:5000:redis:nydus_7.0.3/nydus_bootstrap \ + --bootstrap /path/to/chunkdict_bootstrap\ + --database /path/to/database.db\ + --output-json /path/to/nydus_bootstrap_output.json +``` + +*** + +### Deduplication algorithm + +#### Algorithm 1 Deduplication between different images (DBSCAN clustering algorithm) + +*** +**Basic principle:** DBSCAN is a density-based clustering algorithm, which mainly investigates the connectivity between samples through sample density, samples of the same category, they are closely connected, in other words, there must be samples of the same category not far around any sample of the category. Therefore, it can group a group of objects with high density and close distance, can find clusters of arbitrary shapes, and does not need to specify the number of clusters in advance, which is suitable for high-density deployment scenarios. + +**Input:** Read the chunk information in the database and store it in the chunk list. Chunk information includes:image_name, version, chunk_blob_id, chunk_digest, chunk_compressed_size, and so on. + +**Output:** The chunk dictionary corresponding to each image cluster + +**Basic steps:** +**1.** Select a part of the version as the training set and the rest as the test set according to a certain proportion of all images. + +**2.** Divide all chunks in the training set into a new list according to the image_name, and each list corresponds to an image and all chunk sets in the image. + +**3.** These images are done using the DBSCAN (Density-Based Spatial Clustering of Applications with Noise) algorithm +Clustering. + +*** +3.1 Initialize the core point collection $Omega$ as an empty set,and set the clustering algorithm radius $gamma = 0.5$, and the sample number threshold $MinPts = 10$ + +3.2 Loop through each image and its corresponding chunk list,and calculate its distance from other images according to the following formula. +$$ distance (x,y)= \frac{\lvert C(R_x) \cup C(R_y) \rvert - \lvert C(R_x) \cap C(R_y) \rvert}{\lvert C(R_x) \cup C(R_y) \rvert }$$ +where $C(R_x)$ represents the unique chunk set of all training set images in the image. Calculate the number of images based on $distance(x,y) \leq \gamma$,If there are M y, such that $distance(x,y) \leq \gamma$, where $M \geq MinPts$, then add the imagex to the core point set, and image y is called the image in the neighborhood of the core image x; + +3.3 Initialize the number of cluster classes k=0, and then iterate the core point warehouse collection in turn, and add all the neighboring warehouses in the core point warehouse to the queue, if a warehouse in the neighborhood is also a core warehouse, all warehouses in its neighborhood join the queue, classify the warehouses in the above queue into a cluster class, and continue to traverse the core warehouse collection until all core warehouses are traversed. + +3.4 Calculate the frequency of chunks that appear in each class image. Add the chunk that appears in the image above $90%$ in the training set to the dictionary corresponding to the cluster class to generate a set of < cluster classes, and the dictionary > pairs. +*** +**4.** Adjust the neighborhood radius size and repeat step 3 to obtain multiple deduplication dictionaries. + +**5.** Use the test set to evaluate multiple deduplication dictionaries in 4, and select the chunk dictionary corresponding to the test set with the smallest storage space. + +**6.** Remove the chunk in the chunk dictionary selected in 5 for all images (training set and test set), and then repeat the operation 1-5 to generate the chunk dictionary until the maximum number of cycles is reached 7, or the discrete image ratio is greater than 80% of the total number of images. + +The principle of DBSCAN algorithm how to divide the cluster is shown in the diagram: +![dbscan algorithm](images/nydus_chunkdict_dbscan_algorithm.png) +**Remark:** This section of the picture and the associated DBSCAN algorithm description are referenced from : [https://en.wikipedia.org/wiki/DBSCAN](https://en.wikipedia.org/wiki/DBSCAN) + +#### Algorithm 2 Deduplication between different versions of the image (exponential smoothing algorithm) + +*** +**Basic principle:** Exponential smoothing algorithm is a method for time series data prediction and smoothing, the basic principle is to weighted average the data, give higher weight to the more recent repeated chunks, and constantly update the smoothing value, so the newer chunk has a greater impact on future forecasts, and the impact of older data will gradually weaken. + +**Input:** The training set and test set after deduplication in algorithm 1. + +**Output:** The chunk dictionary corresponding to each image. + +**Basic steps:** +**1.** Divide all chunks in the training set into a new list according to the image_name, and each list corresponds to an image and all chunk sets in the image. + +**2.** The different versions inside each image are sorted chronologically, and each chunk is scored according to the Exponential Smoothing formula. +$$S_0 =0 ,S_t = \alpha Y_{t-1} +(1- \alpha)S_{t-1} $$ +where, $\alpha=0.5$ , $Y_{t-1}$ indicates whether the chunk appeared in the previous image, 1 if it did, otherwise 0. + +**3.** Count the score for each chunk and select all chunks with a score greater than $THs$ as the chunk dictionary. Deduplicate the image version in the test set and calculate the storage space it occupies. + +**4.** Modify the value of $THs$ from 0.8 to 0.5 in steps of 0.05 and repeat steps 2 and 3 to generate multiple chunk dictionaries. + +**5.** Choose a chunk dictionary that minimizes the test set's storage space. +*** + +### Exponential Smoothing Algorithm Test + +#### Procedure + +**1.** Download 10 versions of each OCI image and count the total size in MB. +**2.** Convert the OCI images to Nydus format and then count the total size in MB after conversion. +**3.** Select three versions of each image to generate a chunk dictionary. Use the chunk dictionary to convert the remaining seven versions of the image, and then count the total size in MB after deduplication. + +#### Image Information Table + +| **Image Name** | **Number of Versions** | **Total Image Size (OCI)** | **Total Image Size (Nydus)** | +| :------------: | :--------------------: | :------------------------: | :--------------------------: | +| **Redis** | 10 | 341.78 MB | 419.37 MB | +| **Ubuntu** | 10 | 290.26 MB | 308.59 MB | +| **Alpine** | 10 | 26.9 MB | 27.55 MB | + +#### Deduplication Results Table + +| **Image Name** | **Chunkdict Image Size** | **Total Image Size (Nydus after Deduplicating)** | **Deduplicating Rate** | +| :------------: | :----------------------: | :----------------------------------------------: | :--------------------: | +| **Redis** | 41.87 MB | 319.48 MB | 23.82% | +| **Ubuntu** | 30.8 MB | 140.28 MB | 54.54% | +| **Alpine** | 2.74 MB | 24.7 MB | 10.34% | + +*** diff --git a/docs/containerd-env-setup.md b/docs/containerd-env-setup.md index 5d6eebcf02c..c6c0057d777 100644 --- a/docs/containerd-env-setup.md +++ b/docs/containerd-env-setup.md @@ -1,275 +1,275 @@ -# Nydus Setup for Containerd Environment - -This document will walk through how to setup a nydus image service to work with containerd. It assumes that you already have `containerd` installed. If not, please refer to [containerd documents](https://github.com/containerd/containerd/blob/master/docs/ops.md) on how to install and set it up. - -## Install All Nydus Binaries - -1. Get `nydus-image`, `nydusd`, `nydusify`, `nydusctl` and `nydus-overlayfs` binaries from [release](https://github.com/dragonflyoss/nydus/releases/latest) page. - -```bash -sudo install -D -m 755 nydusd nydus-image nydusify nydusctl nydus-overlayfs /usr/bin -``` - -2. Get `containerd-nydus-grpc` (nydus snapshotter) binary from nydus-snapshotter [release](https://github.com/containerd/nydus-snapshotter/releases/latest) page. - -```bash -sudo install -D -m 755 containerd-nydus-grpc /usr/bin -``` - -## Start a Local Registry Container - -To make it easier to convert and run nydus images next, we can run a local registry service with docker: - -```bash -sudo docker run -d --restart=always -p 5000:5000 registry -``` - -## Convert/Build an Image to Nydus Format - -Nydus image can be created by converting from an existing OCI or docker v2 image stored in container registry or directly built from Dockerfile(with [Buildkit](https://github.com/nydusaccelerator/buildkit/blob/master/docs/nydus.md)) - -Note: For private registry repo, please make sure you are authorized to pull and push the target registry. The basic method is to use `docker pull` and `docker push` to verify your access to the source or target registry. - -```bash -sudo nydusify convert --source ubuntu --target localhost:5000/ubuntu-nydus -``` - -For more details about how to build nydus image, please refer to [Nydusify](https://github.com/dragonflyoss/nydus/blob/master/docs/nydusify.md) conversion tool, [Acceld](https://github.com/goharbor/acceleration-service) conversion service or [Nerdctl](https://github.com/containerd/nerdctl/blob/master/docs/nydus.md#build-nydus-image-using-nerdctl-image-convert). - -## Start Nydus Snapshotter - -Nydus provides a containerd remote snapshotter `containerd-nydus-grpc` (nydus snapshotter) to prepare container rootfs with nydus formatted images. - -1. Prepare a `nydusd` configuration to `/etc/nydus/nydusd-config.fusedev.json`: - -```bash -$ sudo tee /etc/nydus/nydusd-config.fusedev.json > /dev/null << EOF -{ - "device": { - "backend": { - "type": "registry", - "config": { - "scheme": "", - "skip_verify": true, - "timeout": 5, - "connect_timeout": 5, - "retry_limit": 4, - "auth": "" - } - }, - "cache": { - "type": "blobcache", - "config": { - "work_dir": "cache" - } - } - }, - "mode": "direct", - "digest_validate": false, - "iostats_files": false, - "enable_xattr": true, - "fs_prefetch": { - "enable": true, - "threads_count": 4 - } -} -EOF -``` - -Please refer to the nydusd [doc](./nydusd.md) to learn more options. - -⚠️ Note: - -- The `device.backend.config.scheme` is the URL scheme for the registry. Leave it empty for automatic detection, or specify `https` or `http` depending on your registry server configuration. -- The `device.backend.config.auth` is the base64 encoded `username:password` authentication string required by nydusd to lazily pull image data from an authenticated registry. The nydus snapshotter will automatically read it from the `$HOME/.docker/config.json` configuration file, or you can also fill it with your own. -- The `device.backend.config.skip_verify` allows you to skip the insecure https certificate checks for the registry, only set it to `true` when necessary. Note that enabling this option is a security risk for the connection to registry, so you should only use this when you are sure it is safe. -- The `fs_prefetch.enable` option enables nydusd to prefetch image data in background, which can make container startup faster when it needs to read a large amount of image data. Set this to `false` if you don't need this functionality when it brings disk and network pressure. - -2. [Optional] Cleanup snapshotter environment: - -Make sure the default nydus snapshotter root directory is clear. - -``` -sudo rm -rf /var/lib/containerd-nydus -``` - -3. Start `containerd-nydus-grpc` (nydus snapshotter): -Optionally, a TOML based nydus-snapshotter configuration file can be provided by appending `--config ` when starting nydus-snapshotter if you want fine-grained control items. An example configuration file can be found [here](https://github.com/containerd/nydus-snapshotter/blob/main/misc/snapshotter/config.toml) - -```bash -sudo /usr/bin/containerd-nydus-grpc \ - --nydusd-config /etc/nydus/nydusd-config.fusedev.json \ - --log-to-stdout -``` - -## [Option 1] Configure as Containerd Global Snapshotter - -Nydus depends on two features of Containerd: - -- Support remote snapshotter plugin -- Support passing annotations to remote snapshotter - -To enable them, add below configuration items to your `containerd` configuration file (default path is `/etc/containerd/config.toml`): - -```toml -[proxy_plugins] - [proxy_plugins.nydus] - type = "snapshot" - address = "/run/containerd-nydus/containerd-nydus-grpc.sock" -``` - -When working with Kubernetes CRI, please change the default snapshotter to `nydus` and enable snapshot annotations like below: - -For version 1 containerd config format: - -```toml -[plugins.cri] - [plugins.cri.containerd] - snapshotter = "nydus" - disable_snapshot_annotations = false - discard_unpacked_layers = false -``` - -For version 2 containerd config format: - -```toml -[plugins."io.containerd.grpc.v1.cri".containerd] - snapshotter = "nydus" - disable_snapshot_annotations = false - discard_unpacked_layers = false -``` - -Then restart containerd, e.g.: - -```bash -sudo systemctl restart containerd -``` - -## [Option 2] Configure as Containerd Runtime-Level Snapshotter - -Note: this way only works on CRI based scenario (for example crictl or kubernetes). - -Containerd (>= v1.7.0) supports configuring the `runtime-level` snapshotter. By following the steps below, we can declare runtimes that use different snapshotters: - -### Step 1: Apply Containerd Patches - -[Patch](https://github.com/nydusaccelerator/containerd/commit/0959cdb0b190e35c058a0e5bc2e256e59b95b584): fixes the handle of sandbox run and container create for runtime-level snapshotter; - -### Step 2: Configure Containerd - -Only for version 2 containerd config format: - -```toml -[plugins."io.containerd.grpc.v1.cri".containerd] - snapshotter = "overlayfs" - disable_snapshot_annotations = false - discard_unpacked_layers = false - - [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc-nydus] - snapshotter = "nydus" - -[proxy_plugins] - [proxy_plugins.nydus] - type = "snapshot" - address = "/run/containerd-nydus/containerd-nydus-grpc.sock" -``` - -Then restart containerd, e.g.: - -```bash -sudo systemctl restart containerd -``` - -### Step 3: Add an Extra Annotation in Sandbox Spec - -The annotation `"io.containerd.cri.runtime-handler": "runc-nydus"` must be set in sandbox spec. The `nydus-sandbox.yaml` looks like below: - -```yaml -metadata: - attempt: 1 - name: nydus-sandbox - namespace: default - uid: nydus-sandbox-test -log_directory: /tmp -linux: - security_context: - namespace_options: - network: 2 -annotations: - "io.containerd.cri.runtime-handler": "runc-nydus" -``` - -As shown above, the sandbox is declared with `"io.containerd.cri.runtime-handler": "runc-nydus"` annotation will use the `nydus` snapshotter, while others will use the default `overlayfs` snapshotter. - -## Multiple Snapshotter Switch Troubleshooting - -⚠️ You may encounter the following error when creating a Pod: - -``` -err="failed to \"StartContainer\" for \"xxx\" with CreateContainerError: \"failed to create containerd container: error unpacking image: failed to extract layer sha256:yyy: failed to get reader from content store: content digest sha256:zzz: not found\"" -``` - -One possible reason is some images in the Pod (including the Pause image) have used containerd's default snapshotter (such as the `overlayfs` snapshotter), and the `discard_unpacked_layers` option was previously set to `true` in containerd config, containerd has already deleted the blobs from the content store. To resolve this issue, you should first ensure that `discard_unpacked_layers=false`, then use the following command to restore the image: - -``` -ctr -n k8s.io content fetch pause:3.8 -``` - -Please note that `pause:3.8` is just an example image, you should also fetch all images used by the Pod to ensure that there are no issues. - -## Try Nydus with `nerdctl` - -Nydus snapshotter has been supported by [nerdctl](https://github.com/containerd/nerdctl)(requires >= v0.22), we can lazily start container with it. - -```bash -$ sudo nerdctl --snapshotter nydus run --rm -it localhost:5000/ubuntu-nydus:latest bash -``` - -## Create Pod with Nydus Image in Kubernetes - -For example, use the following `nydus-sandbox.yaml` and `nydus-container.yaml` - -The `nydus-sandbox.yaml` looks like below: - -```yaml -metadata: - attempt: 1 - name: nydus-sandbox - namespace: default - uid: nydus-sandbox-test -log_directory: /tmp -linux: - security_context: - namespace_options: - network: 2 -``` - -The `nydus-container.yaml` looks like below: - -```yaml -metadata: - name: nydus-container -image: - image: localhost:5000/ubuntu-nydus:latest -command: - - /bin/sleep -args: - - 600 -log_path: container.1.log -``` - -To create a pod with the just converted nydus image: - -```bash -$ sudo crictl pull localhost:5000/ubuntu-nydus:latest -$ pod=`sudo crictl runp nydus-sandbox.yaml` -$ container=`sudo crictl create $pod nydus-container.yaml nydus-sandbox.yaml` -$ sudo crictl start $container -$ sudo crictl ps -CONTAINER ID IMAGE CREATED STATE NAME ATTEMPT POD ID -f4a6c6dc47e34 localhost:5000/ubuntu-nydus:latest 9 seconds ago Running nydus-container 0 21b91779d551e -``` - -## Integrate P2P with Dragonfly - -Nydus is deeply integrated with [Dragonfly](https://d7y.io/) P2P system, which can greatly reduce the network latency and the single point of network pressure for registry server, testing in the production environment shows that using Dragonfly can reduce network latency by more than 80%, to understand the performance test data and how to configure Nydus to use Dragonfly, please refer to the [doc](https://d7y.io/docs/setup/integration/nydus). +# Nydus Setup for Containerd Environment + +This document will walk through how to setup a nydus image service to work with containerd. It assumes that you already have `containerd` installed. If not, please refer to [containerd documents](https://github.com/containerd/containerd/blob/master/docs/ops.md) on how to install and set it up. + +## Install All Nydus Binaries + +1. Get `nydus-image`, `nydusd`, `nydusify`, `nydusctl` and `nydus-overlayfs` binaries from [release](https://github.com/dragonflyoss/nydus/releases/latest) page. + +```bash +sudo install -D -m 755 nydusd nydus-image nydusify nydusctl nydus-overlayfs /usr/bin +``` + +2. Get `containerd-nydus-grpc` (nydus snapshotter) binary from nydus-snapshotter [release](https://github.com/containerd/nydus-snapshotter/releases/latest) page. + +```bash +sudo install -D -m 755 containerd-nydus-grpc /usr/bin +``` + +## Start a Local Registry Container + +To make it easier to convert and run nydus images next, we can run a local registry service with docker: + +```bash +sudo docker run -d --restart=always -p 5000:5000 registry +``` + +## Convert/Build an Image to Nydus Format + +Nydus image can be created by converting from an existing OCI or docker v2 image stored in container registry or directly built from Dockerfile(with [Buildkit](https://github.com/nydusaccelerator/buildkit/blob/master/docs/nydus.md)) + +Note: For private registry repo, please make sure you are authorized to pull and push the target registry. The basic method is to use `docker pull` and `docker push` to verify your access to the source or target registry. + +```bash +sudo nydusify convert --source ubuntu --target localhost:5000/ubuntu-nydus +``` + +For more details about how to build nydus image, please refer to [Nydusify](https://github.com/dragonflyoss/nydus/blob/master/docs/nydusify.md) conversion tool, [Acceld](https://github.com/goharbor/acceleration-service) conversion service or [Nerdctl](https://github.com/containerd/nerdctl/blob/master/docs/nydus.md#build-nydus-image-using-nerdctl-image-convert). + +## Start Nydus Snapshotter + +Nydus provides a containerd remote snapshotter `containerd-nydus-grpc` (nydus snapshotter) to prepare container rootfs with nydus formatted images. + +1. Prepare a `nydusd` configuration to `/etc/nydus/nydusd-config.fusedev.json`: + +```bash +$ sudo tee /etc/nydus/nydusd-config.fusedev.json > /dev/null << EOF +{ + "device": { + "backend": { + "type": "registry", + "config": { + "scheme": "", + "skip_verify": true, + "timeout": 5, + "connect_timeout": 5, + "retry_limit": 4, + "auth": "" + } + }, + "cache": { + "type": "blobcache", + "config": { + "work_dir": "cache" + } + } + }, + "mode": "direct", + "digest_validate": false, + "iostats_files": false, + "enable_xattr": true, + "fs_prefetch": { + "enable": true, + "threads_count": 4 + } +} +EOF +``` + +Please refer to the nydusd [doc](./nydusd.md) to learn more options. + +⚠️ Note: + +- The `device.backend.config.scheme` is the URL scheme for the registry. Leave it empty for automatic detection, or specify `https` or `http` depending on your registry server configuration. +- The `device.backend.config.auth` is the base64 encoded `username:password` authentication string required by nydusd to lazily pull image data from an authenticated registry. The nydus snapshotter will automatically read it from the `$HOME/.docker/config.json` configuration file, or you can also fill it with your own. +- The `device.backend.config.skip_verify` allows you to skip the insecure https certificate checks for the registry, only set it to `true` when necessary. Note that enabling this option is a security risk for the connection to registry, so you should only use this when you are sure it is safe. +- The `fs_prefetch.enable` option enables nydusd to prefetch image data in background, which can make container startup faster when it needs to read a large amount of image data. Set this to `false` if you don't need this functionality when it brings disk and network pressure. + +2. [Optional] Cleanup snapshotter environment: + +Make sure the default nydus snapshotter root directory is clear. + +``` +sudo rm -rf /var/lib/containerd-nydus +``` + +3. Start `containerd-nydus-grpc` (nydus snapshotter): +Optionally, a TOML based nydus-snapshotter configuration file can be provided by appending `--config ` when starting nydus-snapshotter if you want fine-grained control items. An example configuration file can be found [here](https://github.com/containerd/nydus-snapshotter/blob/main/misc/snapshotter/config.toml) + +```bash +sudo /usr/bin/containerd-nydus-grpc \ + --nydusd-config /etc/nydus/nydusd-config.fusedev.json \ + --log-to-stdout +``` + +## [Option 1] Configure as Containerd Global Snapshotter + +Nydus depends on two features of Containerd: + +- Support remote snapshotter plugin +- Support passing annotations to remote snapshotter + +To enable them, add below configuration items to your `containerd` configuration file (default path is `/etc/containerd/config.toml`): + +```toml +[proxy_plugins] + [proxy_plugins.nydus] + type = "snapshot" + address = "/run/containerd-nydus/containerd-nydus-grpc.sock" +``` + +When working with Kubernetes CRI, please change the default snapshotter to `nydus` and enable snapshot annotations like below: + +For version 1 containerd config format: + +```toml +[plugins.cri] + [plugins.cri.containerd] + snapshotter = "nydus" + disable_snapshot_annotations = false + discard_unpacked_layers = false +``` + +For version 2 containerd config format: + +```toml +[plugins."io.containerd.grpc.v1.cri".containerd] + snapshotter = "nydus" + disable_snapshot_annotations = false + discard_unpacked_layers = false +``` + +Then restart containerd, e.g.: + +```bash +sudo systemctl restart containerd +``` + +## [Option 2] Configure as Containerd Runtime-Level Snapshotter + +Note: this way only works on CRI based scenario (for example crictl or kubernetes). + +Containerd (>= v1.7.0) supports configuring the `runtime-level` snapshotter. By following the steps below, we can declare runtimes that use different snapshotters: + +### Step 1: Apply Containerd Patches + +[Patch](https://github.com/nydusaccelerator/containerd/commit/0959cdb0b190e35c058a0e5bc2e256e59b95b584): fixes the handle of sandbox run and container create for runtime-level snapshotter; + +### Step 2: Configure Containerd + +Only for version 2 containerd config format: + +```toml +[plugins."io.containerd.grpc.v1.cri".containerd] + snapshotter = "overlayfs" + disable_snapshot_annotations = false + discard_unpacked_layers = false + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc-nydus] + snapshotter = "nydus" + +[proxy_plugins] + [proxy_plugins.nydus] + type = "snapshot" + address = "/run/containerd-nydus/containerd-nydus-grpc.sock" +``` + +Then restart containerd, e.g.: + +```bash +sudo systemctl restart containerd +``` + +### Step 3: Add an Extra Annotation in Sandbox Spec + +The annotation `"io.containerd.cri.runtime-handler": "runc-nydus"` must be set in sandbox spec. The `nydus-sandbox.yaml` looks like below: + +```yaml +metadata: + attempt: 1 + name: nydus-sandbox + namespace: default + uid: nydus-sandbox-test +log_directory: /tmp +linux: + security_context: + namespace_options: + network: 2 +annotations: + "io.containerd.cri.runtime-handler": "runc-nydus" +``` + +As shown above, the sandbox is declared with `"io.containerd.cri.runtime-handler": "runc-nydus"` annotation will use the `nydus` snapshotter, while others will use the default `overlayfs` snapshotter. + +## Multiple Snapshotter Switch Troubleshooting + +⚠️ You may encounter the following error when creating a Pod: + +``` +err="failed to \"StartContainer\" for \"xxx\" with CreateContainerError: \"failed to create containerd container: error unpacking image: failed to extract layer sha256:yyy: failed to get reader from content store: content digest sha256:zzz: not found\"" +``` + +One possible reason is some images in the Pod (including the Pause image) have used containerd's default snapshotter (such as the `overlayfs` snapshotter), and the `discard_unpacked_layers` option was previously set to `true` in containerd config, containerd has already deleted the blobs from the content store. To resolve this issue, you should first ensure that `discard_unpacked_layers=false`, then use the following command to restore the image: + +``` +ctr -n k8s.io content fetch pause:3.8 +``` + +Please note that `pause:3.8` is just an example image, you should also fetch all images used by the Pod to ensure that there are no issues. + +## Try Nydus with `nerdctl` + +Nydus snapshotter has been supported by [nerdctl](https://github.com/containerd/nerdctl)(requires >= v0.22), we can lazily start container with it. + +```bash +$ sudo nerdctl --snapshotter nydus run --rm -it localhost:5000/ubuntu-nydus:latest bash +``` + +## Create Pod with Nydus Image in Kubernetes + +For example, use the following `nydus-sandbox.yaml` and `nydus-container.yaml` + +The `nydus-sandbox.yaml` looks like below: + +```yaml +metadata: + attempt: 1 + name: nydus-sandbox + namespace: default + uid: nydus-sandbox-test +log_directory: /tmp +linux: + security_context: + namespace_options: + network: 2 +``` + +The `nydus-container.yaml` looks like below: + +```yaml +metadata: + name: nydus-container +image: + image: localhost:5000/ubuntu-nydus:latest +command: + - /bin/sleep +args: + - 600 +log_path: container.1.log +``` + +To create a pod with the just converted nydus image: + +```bash +$ sudo crictl pull localhost:5000/ubuntu-nydus:latest +$ pod=`sudo crictl runp nydus-sandbox.yaml` +$ container=`sudo crictl create $pod nydus-container.yaml nydus-sandbox.yaml` +$ sudo crictl start $container +$ sudo crictl ps +CONTAINER ID IMAGE CREATED STATE NAME ATTEMPT POD ID +f4a6c6dc47e34 localhost:5000/ubuntu-nydus:latest 9 seconds ago Running nydus-container 0 21b91779d551e +``` + +## Integrate P2P with Dragonfly + +Nydus is deeply integrated with [Dragonfly](https://d7y.io/) P2P system, which can greatly reduce the network latency and the single point of network pressure for registry server, testing in the production environment shows that using Dragonfly can reduce network latency by more than 80%, to understand the performance test data and how to configure Nydus to use Dragonfly, please refer to the [doc](https://d7y.io/docs/setup/integration/nydus). diff --git a/docs/data-deduplication.md b/docs/data-deduplication.md index 45b259ad204..fe91470deb4 100644 --- a/docs/data-deduplication.md +++ b/docs/data-deduplication.md @@ -1,167 +1,167 @@ -# Overview - -A container image contains a packaged application, along with its dependencies, and information on what processes it runs when launched. -Container image provides an immutable and repeatable mechanism to distribute software. -Because every container image packs the application and all its dependencies, common dependencies used by multiple container images may be duplicated multiple times, thus causes heavy data duplication. - -The [OCI Image Format Specification V1](https://github.com/opencontainers/image-spec) tries to solve the data duplication issue by using image layers. -A container image may contain one or more image layers, and an image layer may be shared/reused between two container images if and only if: -- the two layers have the same parent layer -- the two layers have identical content - -Container image layer helps to reduce duplicated data, but it's not as efficiency as expected due to: -- It's too coarse-grained. Any change of file data or attribute generates a new layer. -- A layer depends on its parent. They are different layers if two layers have identical content but different parents. -- It depends on the way to generate the container images. In practice, it heavily depends on development workflow to reuse existing layers. - -So more fine-grained data deduplication technologies may be used to reduce duplicated content of container images further. - -## Data Deduplication -Data deduplication(Dedup) eliminates redundant data at the file- or chunk-level and identifies duplicate contents by their cryptographically secure hash signatures. -Fine-grained data dedup technologies may be classified into different categories. One commonly used classification is: -- File-Level Dedup -- Fixed-Size Chunking Dedup -- Content-Defined Chunking Dedup - -### File-Level Deduplication -File-level dedup watches for multiple copies of the same file, stores the first copy, and then just links the other references to the first file. Only one copy gets stored on the disk/tape archive. -Ultimately, the space you save on disk relates to how many copies of the file there were in the file system. -File-level dedup has both pros and cons: -- Pro: smaller index data. Indexes for file-level dedup are significantly smaller, which takes less computational time/memory resource when duplicates are being determined. -- Pro: less computation resource. File-level processes require less processing power due to the smaller index and reduced number of comparisons. -- Con: lower dedup rate. Any change within the file causes the whole file to be saved again. -- Con: not friend to on demand downloading. The whole bigger file must be downloaded to access any byte of the file. -- Con: not friend to Container Registry for garbage-collection. -- Con: not friend to CDN for data cache. - -### Fixed-Size Chunking Deduplication -The Fixed-Size Chunking dedup divides file content into fixed-size chunks/slices, and de-duplicates among those data chunks. -Compared with file-level dedup and Content-Defined Chunking(CDC), it's a balanced solution with: -- Mediate dedup rate, higher than file-level dedup, lower than CDC. -- Mediate index data size, bigger than file-level dedup, smaller than CDC. -- Mediate CPU/memory resource consumption when restoring the de-duplicated data. -- Pro: friend to on demand loading and random access. -- Pro: friend to implement in-kernel filesystem. -- Pro: friend to Container Registry for garbage-collection. -- Con: not friend to CDN for data cache. - -### Content-Defined Chunking Deduplication -Content-defined chunking (CDC) is a method to split files into variable length chunks, where the cut points are defined by some internal features of the files. -Unlike fixed-length chunks, variable-length chunks are more resistant to byte shifting. -Thus, it increases the probability of finding duplicate chunks within a file and between files. -However, CDC algorithms require additional computation to find the cut points which might be computationally expensive for some applications. -CDC also has Pros/Cons too: -- Pro: higher data dedup rate. -- Pro: friend to on demand loading -- Pro: friend to CDN for data cache. -- Con: bigger index data. -- Con: higher CPU/memory resource consumption when restoring the de-duplicated data. -- Con: not friend to Container Registry. -- Con: not friend to random access. - -### Data Deduplication Usage Scenarios -Different data dedup technologies have different Pros/Cons, and may be suitable for different usage scenarios. -The article [FastCDC: a Fast and Efficient Content-Defined Chunking Approach for Data Deduplication](https://www.usenix.org/system/files/conference/atc16/atc16-paper-xia.pdf) states: -``` -In general, chunk-level deduplication is more popular than file-level deduplication because it identifies and removes -redundancy at a finer granularity. For chunk-level deduplication, the simplest chunking approach is to cut the file or -data stream into equal, fixed-size chunks, referred to as Fixed-Size Chunking (FSC). Content-Defined Chunking (CDC) -based approaches are proposed to address the boundary-shift problem faced by the FSC approach. Specifically, CDC -declares chunk boundaries based on the byte contents of the data stream instead of on the byte offset, as in FSC, -and thus helps detect more redundancy for deduplication. According to some recent studies, CDC based deduplication -approaches are able to detect about 10-20 percent more redundancy than the FSC approach. -``` - -A container image is generated once, immutably distributed to many clients, and then accessed by those clients. -Though CDC may achieve higher data deduplication rate, but it consumes more resources when generating and accessing the de-duplicated data. -And FSC dedup also has advantages of being friend to in-kernel fs, on demand loading and registry garbage collection etc. -So we think FSC dedup is the right technology for container image. - -## Related Projects -There are several related projects which try to reduce duplicated data in container image/software package by using data dedup technologies. - -### casync - Content Addressable Data Synchronizer -The [casync](https://github.com/systemd/casync) project provides a Linux software utility designed to distribute frequently-updated file system images over the Internet. -It uses Content-Defined Chunking to reduce data transferred to/from the software repositories. - -### CernVM-FS -The [CernVM File System](https://cernvm.cern.ch/fs/) project provides a scalable, reliable and low-maintenance software distribution service. -It uses file-level deduplication do reduce data transferred to/from the HTTP server and data stored on local disk. - -### OSTree -The [OSTree](https://github.com/ostreedev/ostree) project provides an upgrade system for Linux-based operating systems that performs atomic upgrades of complete filesystem trees. -It uses file-level deduplication do reduce data transferred to/from the HTTP server and data stored on local disk. - -# Data Deduplication for Container Images -Data deduplication technologies may be used to reduce data generated, stored, transferred and loaded for container images and/or software packages. -When talking about data deduplication for container images, it may mean different things from different point of view. -For example, it may mean: -- O1: reduce data uploaded to container registries -- O2: reduce data stored in container registry storage backend -- O3: reduce data downloaded from container registries -- O4: reduce data stored on node local storage -- O5: reduce data loaded from node local storage into memory - -![container_data_dedup](images/container-data-deduplication.png) - -# Nydus Data Deduplication -The Nydus Image Service project aims to achieve O1, O2, O3, O4 and O5 altogether with two technologies. - -First, it develops a new image format, named Rafs (Registry Accelerated File System), which reduces duplicated data by `Fixed-Size Chunking Dedup` at container build time. -Rafs supports configurable chunking size, between 4K to 1M (`2^n, n >= 12 && n <= 20)`. -By default, it reduces duplicated chunks within the same image layer and between layers of the same image. -It may also reduce duplicated chunks among images if a reference image is given. - -Second, it develops a CAS(content addressable storage) system to reduce node-level duplicated data at container runtime. -We are still working on developing a CAS system for Nydus. - -![rafs_format](images/nydus-rafs-cas.svg) - -## Data Deduplication within an Image Layer - -It follows the process to create an image layer from a source filesystem/directory: -- create an image layer bootstrap file -- create an empty image layer data file -- create an empty `ChunkDigestHashTable` -- scan the fs tree and for each filesystem object found: - - get metadata about the filesystem object, such as type, name, size, permission, owner, atime/ctime etc, and append a metadata entry to the bootstrap file. - - if the object is a normal file with data, - - split the file data into fixed-size chunks - - compute cryptographically secure hash signature for each chunk - - append the chunk to the data file and add the chunk digest to the `ChunkDigestHashTable` if it doesn't exist in the hash table yet - -Finally, we will get one bootstrap file containing all fs metadata and one data file containing de-duplicated data chunks. -So all duplicated data chunks within the same layer will get de-duplicated. - -Data dedup within an image layer helps to achieve O1, O2, O3, O4 and O5. - -## Data Deduplication within a Multiple Layer Image -When creating the first layer of a multi-layer image, it follows the same process as above. -When creating the following-on layers of a multi-layer image, there's one difference: it reuses the `ChunkDigestHashTable` of parent layer instead creating an empty one. -By this way, all data chunks existing in ancestor layers will get de-duplicated. - -Data dedup among multiple image layers helps to achieve O1, O2, O3, O4 and O5. - -## Data Deduplication among Multiple Images -A referenced image may be used when creating an image by `nydus-image create --parent-bootstrap referenced-image-bootstrap ...`. -When creating the first layer of an image, and a referenced parent image is present, it reconstructs the `ChunkDigestHashTable` from the reference bootstrap file instead of creating an empty one. -By this way, all data chunks existing in the referenced image will get de-duplicated. -This is most valuable for software upgrading scenario because there may be many duplicated chunks between the existing version and the new version. - -Data dedup among multiple images may help to achieve O1, O2, O3, O4 and O5. - -## Data Deduplication on Node -The above three methods achieve data dedup during image building stage. -But multi container images may still contain duplicated data chunks, for example: -- images containing the same nodejs library. -- images containing the same java runtime. - -Even more, a container image may contain duplicated data with the host, say running a Ubuntu 16.04 based image on a Ubuntu 16.04 host. - -A node level CAS (content addressable storage) may help to de-duplicate data downloaded from the registry if it already exists in the node CAS system. -When downloading a container image, all data chunks already existing in local CAS will be skipped, and chunks downloaded from the registry will be added to the local CAS system. -So Nydus provides a node level CAS system to reduce data downloaded from the registry and data loaded into memory. - -The node level CAS system helps to achieve O4 and O5. - -# Node Level CAS System (WIP) +# Overview + +A container image contains a packaged application, along with its dependencies, and information on what processes it runs when launched. +Container image provides an immutable and repeatable mechanism to distribute software. +Because every container image packs the application and all its dependencies, common dependencies used by multiple container images may be duplicated multiple times, thus causes heavy data duplication. + +The [OCI Image Format Specification V1](https://github.com/opencontainers/image-spec) tries to solve the data duplication issue by using image layers. +A container image may contain one or more image layers, and an image layer may be shared/reused between two container images if and only if: +- the two layers have the same parent layer +- the two layers have identical content + +Container image layer helps to reduce duplicated data, but it's not as efficiency as expected due to: +- It's too coarse-grained. Any change of file data or attribute generates a new layer. +- A layer depends on its parent. They are different layers if two layers have identical content but different parents. +- It depends on the way to generate the container images. In practice, it heavily depends on development workflow to reuse existing layers. + +So more fine-grained data deduplication technologies may be used to reduce duplicated content of container images further. + +## Data Deduplication +Data deduplication(Dedup) eliminates redundant data at the file- or chunk-level and identifies duplicate contents by their cryptographically secure hash signatures. +Fine-grained data dedup technologies may be classified into different categories. One commonly used classification is: +- File-Level Dedup +- Fixed-Size Chunking Dedup +- Content-Defined Chunking Dedup + +### File-Level Deduplication +File-level dedup watches for multiple copies of the same file, stores the first copy, and then just links the other references to the first file. Only one copy gets stored on the disk/tape archive. +Ultimately, the space you save on disk relates to how many copies of the file there were in the file system. +File-level dedup has both pros and cons: +- Pro: smaller index data. Indexes for file-level dedup are significantly smaller, which takes less computational time/memory resource when duplicates are being determined. +- Pro: less computation resource. File-level processes require less processing power due to the smaller index and reduced number of comparisons. +- Con: lower dedup rate. Any change within the file causes the whole file to be saved again. +- Con: not friend to on demand downloading. The whole bigger file must be downloaded to access any byte of the file. +- Con: not friend to Container Registry for garbage-collection. +- Con: not friend to CDN for data cache. + +### Fixed-Size Chunking Deduplication +The Fixed-Size Chunking dedup divides file content into fixed-size chunks/slices, and de-duplicates among those data chunks. +Compared with file-level dedup and Content-Defined Chunking(CDC), it's a balanced solution with: +- Mediate dedup rate, higher than file-level dedup, lower than CDC. +- Mediate index data size, bigger than file-level dedup, smaller than CDC. +- Mediate CPU/memory resource consumption when restoring the de-duplicated data. +- Pro: friend to on demand loading and random access. +- Pro: friend to implement in-kernel filesystem. +- Pro: friend to Container Registry for garbage-collection. +- Con: not friend to CDN for data cache. + +### Content-Defined Chunking Deduplication +Content-defined chunking (CDC) is a method to split files into variable length chunks, where the cut points are defined by some internal features of the files. +Unlike fixed-length chunks, variable-length chunks are more resistant to byte shifting. +Thus, it increases the probability of finding duplicate chunks within a file and between files. +However, CDC algorithms require additional computation to find the cut points which might be computationally expensive for some applications. +CDC also has Pros/Cons too: +- Pro: higher data dedup rate. +- Pro: friend to on demand loading +- Pro: friend to CDN for data cache. +- Con: bigger index data. +- Con: higher CPU/memory resource consumption when restoring the de-duplicated data. +- Con: not friend to Container Registry. +- Con: not friend to random access. + +### Data Deduplication Usage Scenarios +Different data dedup technologies have different Pros/Cons, and may be suitable for different usage scenarios. +The article [FastCDC: a Fast and Efficient Content-Defined Chunking Approach for Data Deduplication](https://www.usenix.org/system/files/conference/atc16/atc16-paper-xia.pdf) states: +``` +In general, chunk-level deduplication is more popular than file-level deduplication because it identifies and removes +redundancy at a finer granularity. For chunk-level deduplication, the simplest chunking approach is to cut the file or +data stream into equal, fixed-size chunks, referred to as Fixed-Size Chunking (FSC). Content-Defined Chunking (CDC) +based approaches are proposed to address the boundary-shift problem faced by the FSC approach. Specifically, CDC +declares chunk boundaries based on the byte contents of the data stream instead of on the byte offset, as in FSC, +and thus helps detect more redundancy for deduplication. According to some recent studies, CDC based deduplication +approaches are able to detect about 10-20 percent more redundancy than the FSC approach. +``` + +A container image is generated once, immutably distributed to many clients, and then accessed by those clients. +Though CDC may achieve higher data deduplication rate, but it consumes more resources when generating and accessing the de-duplicated data. +And FSC dedup also has advantages of being friend to in-kernel fs, on demand loading and registry garbage collection etc. +So we think FSC dedup is the right technology for container image. + +## Related Projects +There are several related projects which try to reduce duplicated data in container image/software package by using data dedup technologies. + +### casync - Content Addressable Data Synchronizer +The [casync](https://github.com/systemd/casync) project provides a Linux software utility designed to distribute frequently-updated file system images over the Internet. +It uses Content-Defined Chunking to reduce data transferred to/from the software repositories. + +### CernVM-FS +The [CernVM File System](https://cernvm.cern.ch/fs/) project provides a scalable, reliable and low-maintenance software distribution service. +It uses file-level deduplication do reduce data transferred to/from the HTTP server and data stored on local disk. + +### OSTree +The [OSTree](https://github.com/ostreedev/ostree) project provides an upgrade system for Linux-based operating systems that performs atomic upgrades of complete filesystem trees. +It uses file-level deduplication do reduce data transferred to/from the HTTP server and data stored on local disk. + +# Data Deduplication for Container Images +Data deduplication technologies may be used to reduce data generated, stored, transferred and loaded for container images and/or software packages. +When talking about data deduplication for container images, it may mean different things from different point of view. +For example, it may mean: +- O1: reduce data uploaded to container registries +- O2: reduce data stored in container registry storage backend +- O3: reduce data downloaded from container registries +- O4: reduce data stored on node local storage +- O5: reduce data loaded from node local storage into memory + +![container_data_dedup](images/container-data-deduplication.png) + +# Nydus Data Deduplication +The Nydus Image Service project aims to achieve O1, O2, O3, O4 and O5 altogether with two technologies. + +First, it develops a new image format, named Rafs (Registry Accelerated File System), which reduces duplicated data by `Fixed-Size Chunking Dedup` at container build time. +Rafs supports configurable chunking size, between 4K to 1M (`2^n, n >= 12 && n <= 20)`. +By default, it reduces duplicated chunks within the same image layer and between layers of the same image. +It may also reduce duplicated chunks among images if a reference image is given. + +Second, it develops a CAS(content addressable storage) system to reduce node-level duplicated data at container runtime. +We are still working on developing a CAS system for Nydus. + +![rafs_format](images/nydus-rafs-cas.svg) + +## Data Deduplication within an Image Layer + +It follows the process to create an image layer from a source filesystem/directory: +- create an image layer bootstrap file +- create an empty image layer data file +- create an empty `ChunkDigestHashTable` +- scan the fs tree and for each filesystem object found: + - get metadata about the filesystem object, such as type, name, size, permission, owner, atime/ctime etc, and append a metadata entry to the bootstrap file. + - if the object is a normal file with data, + - split the file data into fixed-size chunks + - compute cryptographically secure hash signature for each chunk + - append the chunk to the data file and add the chunk digest to the `ChunkDigestHashTable` if it doesn't exist in the hash table yet + +Finally, we will get one bootstrap file containing all fs metadata and one data file containing de-duplicated data chunks. +So all duplicated data chunks within the same layer will get de-duplicated. + +Data dedup within an image layer helps to achieve O1, O2, O3, O4 and O5. + +## Data Deduplication within a Multiple Layer Image +When creating the first layer of a multi-layer image, it follows the same process as above. +When creating the following-on layers of a multi-layer image, there's one difference: it reuses the `ChunkDigestHashTable` of parent layer instead creating an empty one. +By this way, all data chunks existing in ancestor layers will get de-duplicated. + +Data dedup among multiple image layers helps to achieve O1, O2, O3, O4 and O5. + +## Data Deduplication among Multiple Images +A referenced image may be used when creating an image by `nydus-image create --parent-bootstrap referenced-image-bootstrap ...`. +When creating the first layer of an image, and a referenced parent image is present, it reconstructs the `ChunkDigestHashTable` from the reference bootstrap file instead of creating an empty one. +By this way, all data chunks existing in the referenced image will get de-duplicated. +This is most valuable for software upgrading scenario because there may be many duplicated chunks between the existing version and the new version. + +Data dedup among multiple images may help to achieve O1, O2, O3, O4 and O5. + +## Data Deduplication on Node +The above three methods achieve data dedup during image building stage. +But multi container images may still contain duplicated data chunks, for example: +- images containing the same nodejs library. +- images containing the same java runtime. + +Even more, a container image may contain duplicated data with the host, say running a Ubuntu 16.04 based image on a Ubuntu 16.04 host. + +A node level CAS (content addressable storage) may help to de-duplicate data downloaded from the registry if it already exists in the node CAS system. +When downloading a container image, all data chunks already existing in local CAS will be skipped, and chunks downloaded from the registry will be added to the local CAS system. +So Nydus provides a node level CAS system to reduce data downloaded from the registry and data loaded into memory. + +The node level CAS system helps to achieve O4 and O5. + +# Node Level CAS System (WIP) diff --git a/docs/docker-env-setup.md b/docs/docker-env-setup.md index 79abf9fccec..ac1492a71c3 100644 --- a/docs/docker-env-setup.md +++ b/docs/docker-env-setup.md @@ -1,97 +1,97 @@ -# Nydus Setup for Docker(Moby) Environment -## Install Nydus Snapshotter for Docker(Moby) with Systemd -1. Docker(Moby) newer than [5c1d6c957b97321c8577e10ddbffe6e01981617a](https://github.com/moby/moby/commit/5c1d6c957b97321c8577e10ddbffe6e01981617a) is needed on your host. The commit is expected to be included in Docker v24. -``` -git clone https://github.com/moby/moby -cd moby -make binary -cd bundles/binary -sudo systemctl stop docker -sudo systemctl stop containerd -sudo cp ./* /usr/bin/ -``` - -2. Download nydus-snapshotter release tarball from [the release page](https://github.com/containerd/nydus-snapshotter/releases). -``` -# Get the latest version. If this version does not work for you, you can try v0.6.0 -TAG=`curl -s https://api.github.com/repos/containerd/nydus-snapshotter/releases/latest | grep tag_name | cut -f4 -d "\""` -wget https://github.com/containerd/nydus-snapshotter/releases/download/"$TAG"/nydus-snapshotter-"$TAG"-linux-amd64.tar.gz -tar -xzvf nydus-snapshotter-"$TAG"-linux-amd64.tar.gz -sudo install -D -m 755 bin/containerd-nydus-grpc /usr/local/bin - -wget -O /etc/nydus/nydusd-config.fusedev.json https://raw.githubusercontent.com/containerd/nydus-snapshotter/"$TAG"/misc/snapshotter/nydusd-config.fusedev.json -wget -O /etc/nydus/config.toml https://raw.githubusercontent.com/containerd/nydus-snapshotter/"$TAG"/misc/snapshotter/config.toml -``` - -3. Download nydus image service release tarball from [the release page](https://github.com/dragonflyoss/nydus/releases). -``` -# Get the latest version. If this version does not work for you, you can try v2.1.4 -TAG=`curl -s https://api.github.com/repos/dragonflyoss/nydus/releases/latest | grep tag_name | cut -f4 -d "\""` -wget https://github.com/dragonflyoss/nydus/releases/download/"$TAG"/nydus-static-"$TAG"-linux-amd64.tgz -tar -xzvf nydus-static-"$TAG"-linux-amd64.tgz -sudo install -D -m 755 nydus-static/* /usr/local/bin -``` - -4. Enable `containerd-snapshotter` feature and `nydus`snapshotter in Docker. Add the following to docker's configuration file (typically: /etc/docker/daemon.json). -```json -{ - "features": { - "containerd-snapshotter": true - }, - "storage-driver": "nydus" -} -``` - -5. Enable nydus snapshotter in containerd. Add the following configuration to containerd's configuration file (typically: /etc/containerd/config.toml). -```toml -version = 2 - -# Plug nydus snapshotter into containerd -[proxy_plugins] - [proxy_plugins.nydus] - type = "snapshot" - address = "/run/containerd-nydus/containerd-nydus-grpc.sock" -``` - -6. Install fuse -- centos -```bash -# centos 7 -sudo yum install fuse -# centos 8 -sudo dnf install fuse - -sudo modprobe fuse -``` - -- ubuntu -```bash -sudo apt-get install fuse -sudo modprobe fuse -``` - -7. Start nydus-snapshotter and restart containerd and docker -``` -# install nydus snapshotter service -wget -O /etc/systemd/system/nydus-snapshotter.service https://raw.githubusercontent.com/containerd/nydus-snapshotter/main/misc/snapshotter/nydus-snapshotter.fusedev.service -sudo systemctl enable --now nydus-snapshotter -sudo systemctl restart containerd - -sudo sed -i "s/fd:/unix:/g" /lib/systemd/system/docker.service -sudo systemctl daemon-reload -sudo systemctl restart docker -``` - 8. Run nydus image in docker -``` -# Start local registry -sudo docker run -d --restart=always -p 5000:5000 registry -# Convert Nydus image -sudo nydusify convert --source ubuntu --target localhost:5000/ubuntu-nydus -# Run Nydus image -sudo docker run --rm -it localhost:5000/ubuntu-nydus:latest bash -``` - -## Install Docker Nydus Graph Driver for Docker [Experimental] -This feature is currently **experimental**, please do not use it in a production environment. - +# Nydus Setup for Docker(Moby) Environment +## Install Nydus Snapshotter for Docker(Moby) with Systemd +1. Docker(Moby) newer than [5c1d6c957b97321c8577e10ddbffe6e01981617a](https://github.com/moby/moby/commit/5c1d6c957b97321c8577e10ddbffe6e01981617a) is needed on your host. The commit is expected to be included in Docker v24. +``` +git clone https://github.com/moby/moby +cd moby +make binary +cd bundles/binary +sudo systemctl stop docker +sudo systemctl stop containerd +sudo cp ./* /usr/bin/ +``` + +2. Download nydus-snapshotter release tarball from [the release page](https://github.com/containerd/nydus-snapshotter/releases). +``` +# Get the latest version. If this version does not work for you, you can try v0.6.0 +TAG=`curl -s https://api.github.com/repos/containerd/nydus-snapshotter/releases/latest | grep tag_name | cut -f4 -d "\""` +wget https://github.com/containerd/nydus-snapshotter/releases/download/"$TAG"/nydus-snapshotter-"$TAG"-linux-amd64.tar.gz +tar -xzvf nydus-snapshotter-"$TAG"-linux-amd64.tar.gz +sudo install -D -m 755 bin/containerd-nydus-grpc /usr/local/bin + +wget -O /etc/nydus/nydusd-config.fusedev.json https://raw.githubusercontent.com/containerd/nydus-snapshotter/"$TAG"/misc/snapshotter/nydusd-config.fusedev.json +wget -O /etc/nydus/config.toml https://raw.githubusercontent.com/containerd/nydus-snapshotter/"$TAG"/misc/snapshotter/config.toml +``` + +3. Download nydus image service release tarball from [the release page](https://github.com/dragonflyoss/nydus/releases). +``` +# Get the latest version. If this version does not work for you, you can try v2.1.4 +TAG=`curl -s https://api.github.com/repos/dragonflyoss/nydus/releases/latest | grep tag_name | cut -f4 -d "\""` +wget https://github.com/dragonflyoss/nydus/releases/download/"$TAG"/nydus-static-"$TAG"-linux-amd64.tgz +tar -xzvf nydus-static-"$TAG"-linux-amd64.tgz +sudo install -D -m 755 nydus-static/* /usr/local/bin +``` + +4. Enable `containerd-snapshotter` feature and `nydus`snapshotter in Docker. Add the following to docker's configuration file (typically: /etc/docker/daemon.json). +```json +{ + "features": { + "containerd-snapshotter": true + }, + "storage-driver": "nydus" +} +``` + +5. Enable nydus snapshotter in containerd. Add the following configuration to containerd's configuration file (typically: /etc/containerd/config.toml). +```toml +version = 2 + +# Plug nydus snapshotter into containerd +[proxy_plugins] + [proxy_plugins.nydus] + type = "snapshot" + address = "/run/containerd-nydus/containerd-nydus-grpc.sock" +``` + +6. Install fuse +- centos +```bash +# centos 7 +sudo yum install fuse +# centos 8 +sudo dnf install fuse + +sudo modprobe fuse +``` + +- ubuntu +```bash +sudo apt-get install fuse +sudo modprobe fuse +``` + +7. Start nydus-snapshotter and restart containerd and docker +``` +# install nydus snapshotter service +wget -O /etc/systemd/system/nydus-snapshotter.service https://raw.githubusercontent.com/containerd/nydus-snapshotter/main/misc/snapshotter/nydus-snapshotter.fusedev.service +sudo systemctl enable --now nydus-snapshotter +sudo systemctl restart containerd + +sudo sed -i "s/fd:/unix:/g" /lib/systemd/system/docker.service +sudo systemctl daemon-reload +sudo systemctl restart docker +``` + 8. Run nydus image in docker +``` +# Start local registry +sudo docker run -d --restart=always -p 5000:5000 registry +# Convert Nydus image +sudo nydusify convert --source ubuntu --target localhost:5000/ubuntu-nydus +# Run Nydus image +sudo docker run --rm -it localhost:5000/ubuntu-nydus:latest bash +``` + +## Install Docker Nydus Graph Driver for Docker [Experimental] +This feature is currently **experimental**, please do not use it in a production environment. + 1. For older versions of Docker(Moby) lower than v24, please use [Docker Nydus Graph Driver](https://github.com/nydusaccelerator/docker-nydus-graphdriver). \ No newline at end of file diff --git a/docs/images/crate-dependency.svg b/docs/images/crate-dependency.svg index 73fdf6ef5c6..b3ec5901676 100644 --- a/docs/images/crate-dependency.svg +++ b/docs/images/crate-dependency.svg @@ -1,4 +1,4 @@ - - - + + +
nydus-error
nydus-error
nydus-app
nydus-app
nydus-utils
nydus-utils
nydus-api
nydus-api
nydus-rafs
nydus-rafs
nydus-blobfs
nydus-blobfs
nydus-storage
nydus-storage
nydusd
nydusd
Text is not SVG - cannot display
\ No newline at end of file diff --git a/docs/images/nydus-architecture-overview.svg b/docs/images/nydus-architecture-overview.svg index e25bb5e97ae..60f7f7c8226 100644 --- a/docs/images/nydus-architecture-overview.svg +++ b/docs/images/nydus-architecture-overview.svg @@ -1,4 +1,4 @@ - - - + + +
Cache
Cache
Registry
Registry
State
State
OSS
OSS
LocalFs
LocalFs
P2P
P2P
Storage
Storage
FsCached
FsCach...
Handler
Handler
Rafs
Rafs
FileSystem
FileSystem
BlobFs
BlobFs
FileSystem
FileSystem
...
...
Guest OS
Guest OS
VirtioFs
VirtioFs
EROFS
EROFS
VirtioFs
VirtioFs
Host OS
Host OS
FsCache
FsCache
EROFS
EROFS
Guest OS
Guest OS
Host OS
Host OS
FUSE
FUSE
Host OS
Host OS
EROFS
EROFS
Nydusd
on Host
Nydusd...
Local
Disk
Local...
Kata Secure Container
Kata Secure Container
RunC Native Container
RunC Native Container
Nydus Data Blob
Nydus Data Blob
Stargz Data Blob
Stargz Data Blob
EROFS
EROFS
Guest OS
Guest OS
Kata Confidential Container
Kata Confidential Con...
FsCache
FsCache
Nydusd
in Guest
Nydusd...
Containerd Snapshotter
Containerd...
NPM
NPM
Sealer
Sealer
Docker
GraphDriver
Docker...
NAS
NAS
OSS
OSS
CEPH
CEPH
Gluster
Gluster
Dragon
Fly
Dragon...
Text is not SVG - cannot display
\ No newline at end of file diff --git a/docs/images/nydus-rafs-cas.svg b/docs/images/nydus-rafs-cas.svg index 10c70005766..ce3e9e2ef52 100644 --- a/docs/images/nydus-rafs-cas.svg +++ b/docs/images/nydus-rafs-cas.svg @@ -1,4 +1,4 @@ - - - + + +
Image 2 on Registry
Image 2 on Registry
/
/
SHA
SHA
file1
file1
SHA
SHA
dir1
dir1
SHA
SHA
file2
file2
SHA
SHA
file3
file3
SHA
SHA
 Metadata
Metad...
/
/
SHA
SHA
file4
file4
SHA
SHA
dir2
dir2
SHA
SHA
file5
file5
SHA
SHA
Metadata
Metada...
Data 1
Data 1
CAS DatabasePKUniqueIDRow 1Row 2
Image on Node with Local CAS
Image on Node with Local CAS
Image 1 on Registry
Image 1 on Registry
Data 
Data 
Data 
Data 
Image 1 Metadata
Image 1 Metadata
Image 2 Metadata
Image 2 Metadata
Data 2
Data 2
Text is not SVG - cannot display
\ No newline at end of file diff --git a/docs/images/nydus-storage-architecture.svg b/docs/images/nydus-storage-architecture.svg index bc4971f5f5c..7ffc340a95b 100644 --- a/docs/images/nydus-storage-architecture.svg +++ b/docs/images/nydus-storage-architecture.svg @@ -1,4 +1,4 @@ - - - + + +
Rafs
Rafs
BlobDevice
BlobDevice
async_new
async_update
async_read_to
async_new...
BlobDeviceIoVec
BlobDeviceIoVec
read_vectored_at_volatile
read_vectored_at_volatile
async_read
async_read
get_blob_object
get_blob_object
FsCache
FsCache
FileCache
FileCache
DummyCache
DummyCache
BlobCache
BlobCache
BlobObject
BlobObject
ChunkMap
ChunkMap
BlobReader
BlobReader
NoopChunkMap
NoopChunkMap
BlobChunkMap
BlobChunkMap
IndexedChunkMap
IndexedChunkMap
DigestedChunkMap
DigestedChunkMap
...
...
OSS
OSS
Registry
Registry
LocalFs
LocalFs
BlobMetaInfo
BlobMetaInfo
get_chunks_uncompressed
get_chunks_compressed
get_chunks_uncompressed...
async_read
async_read
async_read
async_read
BlobFs
BlobFs
FsCache
FsCache
fetch_range
fetch_range
PrefetchWorker
PrefetchWorker
async_fetch_range\
_uncompressed
async_fetch_range\...
prefetch
prefetch
Nydus Storage Subsystem
Nydus Storage Subsys...
Struct
Struct
Trait
Trait
call
call
impl
impl
PersistMap
PersistMap
BlobFactory
BlobFactory
async_new_blob_cache
async_new_blob_cache
send_prefetch_message
send_prefetch_message
create
create
async_check_ready_and_mark_pending
async_set_read_and_clear_pending
async_clear_pending
async_check_ready_and_mark_pending...
Cache
Cache
Backend
Backend
State
State
async_fetch_chunks
async_fetch_range_compressed
async_fetch_range_uncompressed
async_fetch_chunks...
Text is not SVG - cannot display
\ No newline at end of file diff --git a/docs/nydus-design.md b/docs/nydus-design.md index fe245e2d73a..049745c1145 100644 --- a/docs/nydus-design.md +++ b/docs/nydus-design.md @@ -1,414 +1,414 @@ -# Nydus - -# I. High Level Design -## 0. Overview -Dragonfly image service is named as `nydus`, [GitHub repo](https://github.com/dragonflyoss/nydus) - -Nydus consists of two parts, -* a userspace filesystem called `rafs` on top of a container image format -* an image manifest that is compatible with OCI spec of image and distribution - -Its key features include: - -* Container images are downloaded on demand -* Chunk level data deduplication -* Flatten image metadata and data to remove all intermediate layers -* Only usable image data is saved when building a container image -* Only usable image data is downloaded when running a container -* End-to-end image data integrity -* Compatible with the OCI artifacts spec and distribution spec -* Integrated with existing CNCF project Dragonfly to support image distribution in large clusters -* Different container image storage backends are supported - -## 1. Architecture -Nydus takes in either [FUSE](https://www.kernel.org/doc/html/latest/filesystems/fuse.html) or [virtiofs](https://virtio-fs.gitlab.io/) protocol to service POD created by conventional runc containers or vm-based [Kata Containers](https://katacontainers.io/). It supports pulling container image data from container image registry, [OSS](https://www.alibabacloud.com/product/oss), NAS, as well as Dragonfly supernode and node peers. It can also optionally use a local directory to cache all container image data to speed up future container creation. - -![architecture](images/nydusd-arch.png) - -## 2. Rafs -Rafs is a filesystem image containing a separated metadata blob and several data-deduplicated content-addressable data blobs. In a typical rafs filesystem, the metadata is stored in `bootstrap` while the data is stored in `blobfile`. Nydus splits container image into two parts, metadata and data, where metadata contains everything a container needs to start with, while data is stored in chunks with chunk size being 1MB. Currently, Rafs has two versions called `Rafs v5` (which is a FUSE-based filesystem) and `Rafs v6` (which is compatible with the in-kernel EROFS filesystem). Note that, the following details are all about `Rafs v5` and meterials about `Raft v6` are still working in progress. - -![rafs](./images/rafs-format.png) - - * bootstrap - * The metadata is a merkle tree whose nodes represents a regular filesystem's directory/file - * a leaf node refers to a file and contains hash value of its file data - * Root node and internal nodes refer to directories and contain the hash value of their children nodes. -``` -+-----------------+-----------------+--------------+----------------+-------+-------+--------+---------------------+ -| | | | | | | | | -|OndiskSuperBlock |OndiskInodeTable |PrefetchTable |OndiskBlobTable |node 1 |node 2 | ...... | | -| | |(optional) | | | | | | -+-----------------+--------------------------------+----------------+-------+-------+--------+---------------------+ -``` - -* node - -``` -+-------------------+-------+----------------+----------------+ -| | | | | -|OndiskInodeWrapper |XAttrs |OndiskChunkInfo |...... | -| | | | | -+-------------------+-------+----------------+----------------+ -``` -* OndiskInodeWrapper -``` -+------------+-------+-----------+ -| | | | -|OndiskInode | name | symlink | -| | | | -+------------+-------+-----------+ - - ``` - * blob file -``` -+------------+-----------+-------------------------------------------------+ -| | | | -|chunk_data |chunk_data |...... | -| | | | -+------------+-----------+-------------------------------------------------+ -``` - -## 3. Integrity Validation -### 3.1 Metadata Integrity Validation -Firstly, Nydus does basic verification of metadata values, looking for values that are in range (and hence not detected by automated verification checks) but are not correct. - -Secondly, as a primary concern, metadata needs some form of overall integrity checking. We cannot trust the metadata if we cannot verify that it has not been changed as a result of external influences. Hence we need some form of integrity check, and this is done by adding one of the two digest validations (Sha256 and blake3) to the metadata. - -Validation of the metadata takes place at runtime when metadata is accessed. By the nature of container image, only read verification is required. - -The read verification is doing sanity checking on metadata's fields and determining whether digest validating is necessary. If it is, the digest is calculated with the chosen hash algorithm and compared against the value stored in the object itself. If any of these checks fail, then the buffer is considered corrupt and the EINVAL error is set appropriately. -### 3.2 Data Integrity Validation -Data is split into chunks and each chunk has a saved digest in chunk info, the way of metadata digest validation applies to chunks as well. - -## 4. Prefetch -As a lazily fetch solution, prefetch plays an important role to mitigate the impact of failing to fetch data after containers run. In order to do it, we need to record hints in container image about which files and directories need prefetching, according to the information, at runtime nydus daemon will fetch these files and directories in the background into local storage. - -The image build tool `nydusify` accepts prefetch hints from `stdin`. -## 5. Blob and Blob Cache - -* blob - -Blob is the data part of a container image, it consists of files' data. Nydus has splitted a file's data into one or more fixed-length (1MB) chunks. - -* blob cache - -Nydus can be configured to set up a cache for blob, called `blobcache`. With `blobcache`, fetched blob data is saved to a `work dir` and won't be repeatedly fetched. Given the assumption that only a small portion of image is fetched, there is no cache eviction for `blobcache`. - -## 6. Compression -Nydus can be configured to save either compressed chunk or noncompressed chunk, with compressed chunk is the default configuration. - -The compression algorithm is lz4, gzip and zstd, `None` stands for noncompression. - -```rust -pub enum Algorithm { - None, - LZ4Block, - GZip, - Zstd, -} -``` - -# II. Global Structures - ## 1. Rafs Superblock - Rafs superblock is located at the first 8K of the `bootstrap` file. - -```rust -pub struct OndiskSuperBlock { - /// RAFS super magic - s_magic: u32, - /// RAFS version - s_fs_version: u32, - /// superblock on disk size - s_sb_size: u32, - /// block size - s_block_size: u32, - /// superblock flags - s_flags: u64, - /// V5: Number of unique inodes(hard link counts as 1). - s_inodes_count: u64, - /// V5: Offset of inode table - s_inode_table_offset: u64, - /// Those inodes which need to prefetch will have there indexes put into this table. - /// Then Rafs has a hint to prefetch inodes and doesn't have to load all inodes to page cache - /// under *direct* metadata mode. It helps save memory usage. - /// [idx1:u32, idx2:u32, idx3:u32 ...] - s_prefetch_table_offset: u64, - /// V5: Offset of blob table - s_blob_table_offset: u64, - /// V5: Size of inode table - s_inode_table_entries: u32, - s_prefetch_table_entries: u32, - /// V5: Entries of blob table - s_blob_table_size: u32, - s_reserved: u32, - /// Unused area - s_reserved2: [u8; RAFS_SUPERBLOCK_RESERVED_SIZE], -} -``` - -`s_flags` offers several flags to choose which compression algorithm, metadata hash algorithm and xattr will be used. - -```rust -bitflags! { - pub struct RafsSuperFlags: u64 { - /// Data chunks are not compressed. - const COMPRESS_NONE = 0x0000_0001; - /// Data chunks are compressed with lz4_block. - const COMPRESS_LZ4_BLOCK = 0x0000_0002; - /// Use blake3 hash algorithm to calculate digest. - const DIGESTER_BLAKE3 = 0x0000_0004; - /// Use sha256 hash algorithm to calculate digest. - const DIGESTER_SHA256 = 0x0000_0008; - /// Inode has explicit uid gid fields. - /// If unset, use nydusd process euid/egid for all - /// inodes at runtime. - const EXPLICIT_UID_GID = 0x0000_0010; - /// Some inode has xattr. - /// If unset, nydusd may return ENOSYS for getxattr/listxattr - /// calls. - const HAS_XATTR = 0x0000_0020; - // Data chunks are compressed with gzip - const COMPRESS_GZIP = 0x0000_0040; - // Data chunks are compressed with zstd - const COMPRESS_ZSTD = 0x0000_0080; - } -} -``` - - ## 2. Rafs Inode - -```rust -pub struct OndiskInodeWrapper<'a> { - pub name: &'a OsStr, - pub symlink: Option<&'a OsStr>, - pub inode: &'a OndiskInode, -} -``` - -The OndiskInode struct size is padded to 128 bytes. - -* If it's a directory, all its children are indexed contiguously in `inode table`, and `i_child_index` is the index of the first child and `i_child_count` is the amount of its children. -* If it's a file, `i_child_index` is not used. -*`i_name_size` is the length of its name. -* `i_symlink_size` is the length of its symlink path. - -```rust -pub struct OndiskInode { - /// sha256(sha256(chunk) + ...), [char; RAFS_SHA256_LENGTH] - pub i_digest: RafsDigest, // 32 - /// parent inode number - pub i_parent: u64, - /// Artifact inode number set by the nydus image builder. Start from RAFS_ROOT_INODE = 1. - pub i_ino: u64, - pub i_uid: u32, - pub i_gid: u32, - pub i_projid: u32, - pub i_mode: u32, // 64 - pub i_size: u64, - pub i_blocks: u64, - /// HARDLINK | SYMLINK | PREFETCH_HINT - pub i_flags: RafsInodeFlags, - pub i_nlink: u32, - /// for dir, child start index - pub i_child_index: u32, // 96 - /// for dir, means child count. - /// for regular file, means chunk info count. - pub i_child_count: u32, - /// file name size, [char; i_name_size] - pub i_name_size: u16, - /// symlink path size, [char; i_symlink_size] - pub i_symlink_size: u16, // 104 - pub i_reserved: [u8; 24], // 128 -} -``` - -`i_flags` indicates whether the inode is a symlink or a hardlink, whether it has xattr, and whether it has hole between its chunks. - -```rust -bitflags! { - pub struct RafsInodeFlags: u64 { - /// Inode is a symlink. - const SYMLINK = 0x0000_0001; - /// Inode has hardlinks. - const HARDLINK = 0x0000_0002; - /// Inode has extended attributes. - const XATTR = 0x0000_0004; - /// Inode chunks has holes. - const HAS_HOLE = 0x0000_0008; - } -} -``` - - `OndiskXAttrs` and xattr are stored right after `OndiskInodeWrapper` in the bootstrap file. - -```rust -pub struct OndiskXAttrs { - pub size: u64, -} -``` - -A list of `OndiskChunkInfo` is also stored after xattr if the inode contains file data. Each chunk info tells us where to find data in blob file, it contains -- the hash value `block_id` calculated from the chunk data, -- the blob file it belongs to, -- whether the chunk is compressed, -- whether the chunk has holes, -- the offset in the blob file, -- the file offset. - - -```rust -pub struct OndiskChunkInfo { - /// sha256(chunk), [char; RAFS_SHA256_LENGTH] - pub block_id: RafsDigest, - /// blob index (blob_id = blob_table[blob_index]) - pub blob_index: u32, - /// chunk flags - pub flags: RafsChunkFlags, - - /// compressed size in blob - pub compress_size: u32, - /// decompressed size in blob - pub decompress_size: u32, - /// compressed offset in blob - pub compress_offset: u64, - /// decompressed offset in blob - pub decompress_offset: u64, - - /// offset in file - pub file_offset: u64, - /// reserved - pub reserved: u64, -} - -bitflags! { - pub struct RafsChunkFlags: u32 { - /// chunk is compressed - const COMPRESSED = 0x0000_0001; - const HOLECHUNK = 0x0000_0002; - } -} -``` - ## 3. Rafs Inode Table -Inode table is a mapping from inode index to `OndiskInode`, specifically a hardlink file shares the same inode number but has a different inode index. - -```rust -pub struct OndiskInodeTable { - pub(crate) data: Vec, -} -``` - ## 4. Rafs Prefetch Table - This is where we record hints in container image about which files and directories need prefetching upon starting. - -```rust -pub struct PrefetchTable { - pub inode_indexes: Vec, -} -``` - ## 5. Rafs Blob Table - Blob table is the mapping from blob index of `OndiskInode` to blob id so that we don't have to record blob id inside `OndiskInode` (note that different inodes' data chunk can reside in the same blob). - -```rust -pub struct OndiskBlobTableEntry { - pub readahead_offset: u32, - pub readahead_size: u32, - pub blob_id: String, -} - -pub struct OndiskBlobTable { - pub entries: Vec, -} -``` -# III. Manifest of Nydus Format Image -Nydus manifest is designed to be fully compatible with OCI image spec and distribution spec by adding an extra manifest file to store the pointers of nydus bootstrap (i.e. metadata) and blobfile (i.e. data). - -## 1. Image Index -A typical image index enabling nydus points to two manifest files, one is the traditional OCI v1 image manifest, the other is the nydus manifest that takes advantage of `platform` and puts `os.features: ["nydus.remoteimage.v1"]` field under `platform`. - -```json -{ - "schemaVersion": 2, - "manifests": [ - { - "mediaType": "application/vnd.docker.distribution.manifest.v2+json", - "size": 1152, - "digest": "sha256:c95b7b93ccd48c3bfd97f8cac6d5ca8053ced584c9e8e6431861ca30b0d73114", - "platform": { - "architecture": "amd64", - "os": "linux" - } - }, - { - "mediaType": "application/vnd.oci.image.manifest.v1+json", - "size": 1072, - "digest": "sha256:9e2bcf20f78c9ca1a5968a9228d73d85f27846904ddd9f6c10ef2263e13cec4f", - "platform": { - "architecture": "amd64", - "os": "linux", - "os.features": [ - "nydus.remoteimage.v1" - ] - } - } - ] -} -``` -## 2. Image Manifest - -A typical image manifest of nydus consists of `config.json`, one nydus metadata layer (`"mediaType": "application/vnd.oci.image.layer.v1.tar.gz"`) and one or more nydus data layers (`"mediaType": "application/vnd.oci.image.layer.nydus.blob.v1"`). - -* nydus metadata layer - -This layer refers to the metadata part of files and directories in the image, including rafs filesystem metadata and digest for validation purpose. - -* nydus data layer - -This layer refers to the data part, please note that the data layers of an image can be owned solely by this image or shared by others, similarly, each data layer is annotated with `"containerd.io/snapshot/nydus-blob": "true"`, which can be used to tell containerd's snapshotter to skip downloading them. - -The manifest is designed to be compatible with the dependency architect and garbage collection algorithm widely used by containerd and registry. - -```json -{ - "schemaVersion": 2, - "mediaType": "", - "config": { - "mediaType": "application/vnd.docker.container.image.v1+json", - "size": 981, - "digest": "sha256:a27f27be5546ba699ec38344a3fcbeb92ccfe7bdf0ac13d62ce630dea0178bbd" - }, - "layers": [ - { - "mediaType": "application/vnd.oci.image.layer.nydus.blob.v1", - "size": 51522, - "digest": "sha256:8a44bc8c2e35502f68d1ad692f7bf247eb9e21dca2742b6b0df58ba7b6a96ef3", - "annotations": { - "containerd.io/snapshot/nydus-blob": "true" - } - }, - { - "mediaType": "application/vnd.oci.image.layer.nydus.blob.v1", - "size": 524, - "digest": "sha256:1d51ac9ebde626252c1b02fc2d446a5e328eadcb1ca26942bfbd482b5e386e49", - "annotations": { - "containerd.io/snapshot/nydus-blob": "true" - } - }, - { - "mediaType": "application/vnd.oci.image.layer.nydus.blob.v1", - "size": 7, - "digest": "sha256:00d151e7d392e68e2c756a6fc42640006ddc0a98d37dba3f90a7b73f63188bbd", - "annotations": { - "containerd.io/snapshot/nydus-blob": "true" - } - }, - { - "mediaType": "application/vnd.oci.image.layer.v1.tar.gz", - "size": 664576, - "digest": "sha256:35bdd331b926eccd78440b0060c8484355ad69a8e6f38290fed4d0a3491ba76e", - "annotations": { - "containerd.io/snapshot/nydus-bootstrap": "true" - } - } - ] -} -``` +# Nydus + +# I. High Level Design +## 0. Overview +Dragonfly image service is named as `nydus`, [GitHub repo](https://github.com/dragonflyoss/nydus) + +Nydus consists of two parts, +* a userspace filesystem called `rafs` on top of a container image format +* an image manifest that is compatible with OCI spec of image and distribution + +Its key features include: + +* Container images are downloaded on demand +* Chunk level data deduplication +* Flatten image metadata and data to remove all intermediate layers +* Only usable image data is saved when building a container image +* Only usable image data is downloaded when running a container +* End-to-end image data integrity +* Compatible with the OCI artifacts spec and distribution spec +* Integrated with existing CNCF project Dragonfly to support image distribution in large clusters +* Different container image storage backends are supported + +## 1. Architecture +Nydus takes in either [FUSE](https://www.kernel.org/doc/html/latest/filesystems/fuse.html) or [virtiofs](https://virtio-fs.gitlab.io/) protocol to service POD created by conventional runc containers or vm-based [Kata Containers](https://katacontainers.io/). It supports pulling container image data from container image registry, [OSS](https://www.alibabacloud.com/product/oss), NAS, as well as Dragonfly supernode and node peers. It can also optionally use a local directory to cache all container image data to speed up future container creation. + +![architecture](images/nydusd-arch.png) + +## 2. Rafs +Rafs is a filesystem image containing a separated metadata blob and several data-deduplicated content-addressable data blobs. In a typical rafs filesystem, the metadata is stored in `bootstrap` while the data is stored in `blobfile`. Nydus splits container image into two parts, metadata and data, where metadata contains everything a container needs to start with, while data is stored in chunks with chunk size being 1MB. Currently, Rafs has two versions called `Rafs v5` (which is a FUSE-based filesystem) and `Rafs v6` (which is compatible with the in-kernel EROFS filesystem). Note that, the following details are all about `Rafs v5` and meterials about `Raft v6` are still working in progress. + +![rafs](./images/rafs-format.png) + + * bootstrap + * The metadata is a merkle tree whose nodes represents a regular filesystem's directory/file + * a leaf node refers to a file and contains hash value of its file data + * Root node and internal nodes refer to directories and contain the hash value of their children nodes. +``` ++-----------------+-----------------+--------------+----------------+-------+-------+--------+---------------------+ +| | | | | | | | | +|OndiskSuperBlock |OndiskInodeTable |PrefetchTable |OndiskBlobTable |node 1 |node 2 | ...... | | +| | |(optional) | | | | | | ++-----------------+--------------------------------+----------------+-------+-------+--------+---------------------+ +``` + +* node + +``` ++-------------------+-------+----------------+----------------+ +| | | | | +|OndiskInodeWrapper |XAttrs |OndiskChunkInfo |...... | +| | | | | ++-------------------+-------+----------------+----------------+ +``` +* OndiskInodeWrapper +``` ++------------+-------+-----------+ +| | | | +|OndiskInode | name | symlink | +| | | | ++------------+-------+-----------+ + + ``` + * blob file +``` ++------------+-----------+-------------------------------------------------+ +| | | | +|chunk_data |chunk_data |...... | +| | | | ++------------+-----------+-------------------------------------------------+ +``` + +## 3. Integrity Validation +### 3.1 Metadata Integrity Validation +Firstly, Nydus does basic verification of metadata values, looking for values that are in range (and hence not detected by automated verification checks) but are not correct. + +Secondly, as a primary concern, metadata needs some form of overall integrity checking. We cannot trust the metadata if we cannot verify that it has not been changed as a result of external influences. Hence we need some form of integrity check, and this is done by adding one of the two digest validations (Sha256 and blake3) to the metadata. + +Validation of the metadata takes place at runtime when metadata is accessed. By the nature of container image, only read verification is required. + +The read verification is doing sanity checking on metadata's fields and determining whether digest validating is necessary. If it is, the digest is calculated with the chosen hash algorithm and compared against the value stored in the object itself. If any of these checks fail, then the buffer is considered corrupt and the EINVAL error is set appropriately. +### 3.2 Data Integrity Validation +Data is split into chunks and each chunk has a saved digest in chunk info, the way of metadata digest validation applies to chunks as well. + +## 4. Prefetch +As a lazily fetch solution, prefetch plays an important role to mitigate the impact of failing to fetch data after containers run. In order to do it, we need to record hints in container image about which files and directories need prefetching, according to the information, at runtime nydus daemon will fetch these files and directories in the background into local storage. + +The image build tool `nydusify` accepts prefetch hints from `stdin`. +## 5. Blob and Blob Cache + +* blob + +Blob is the data part of a container image, it consists of files' data. Nydus has splitted a file's data into one or more fixed-length (1MB) chunks. + +* blob cache + +Nydus can be configured to set up a cache for blob, called `blobcache`. With `blobcache`, fetched blob data is saved to a `work dir` and won't be repeatedly fetched. Given the assumption that only a small portion of image is fetched, there is no cache eviction for `blobcache`. + +## 6. Compression +Nydus can be configured to save either compressed chunk or noncompressed chunk, with compressed chunk is the default configuration. + +The compression algorithm is lz4, gzip and zstd, `None` stands for noncompression. + +```rust +pub enum Algorithm { + None, + LZ4Block, + GZip, + Zstd, +} +``` + +# II. Global Structures + ## 1. Rafs Superblock + Rafs superblock is located at the first 8K of the `bootstrap` file. + +```rust +pub struct OndiskSuperBlock { + /// RAFS super magic + s_magic: u32, + /// RAFS version + s_fs_version: u32, + /// superblock on disk size + s_sb_size: u32, + /// block size + s_block_size: u32, + /// superblock flags + s_flags: u64, + /// V5: Number of unique inodes(hard link counts as 1). + s_inodes_count: u64, + /// V5: Offset of inode table + s_inode_table_offset: u64, + /// Those inodes which need to prefetch will have there indexes put into this table. + /// Then Rafs has a hint to prefetch inodes and doesn't have to load all inodes to page cache + /// under *direct* metadata mode. It helps save memory usage. + /// [idx1:u32, idx2:u32, idx3:u32 ...] + s_prefetch_table_offset: u64, + /// V5: Offset of blob table + s_blob_table_offset: u64, + /// V5: Size of inode table + s_inode_table_entries: u32, + s_prefetch_table_entries: u32, + /// V5: Entries of blob table + s_blob_table_size: u32, + s_reserved: u32, + /// Unused area + s_reserved2: [u8; RAFS_SUPERBLOCK_RESERVED_SIZE], +} +``` + +`s_flags` offers several flags to choose which compression algorithm, metadata hash algorithm and xattr will be used. + +```rust +bitflags! { + pub struct RafsSuperFlags: u64 { + /// Data chunks are not compressed. + const COMPRESS_NONE = 0x0000_0001; + /// Data chunks are compressed with lz4_block. + const COMPRESS_LZ4_BLOCK = 0x0000_0002; + /// Use blake3 hash algorithm to calculate digest. + const DIGESTER_BLAKE3 = 0x0000_0004; + /// Use sha256 hash algorithm to calculate digest. + const DIGESTER_SHA256 = 0x0000_0008; + /// Inode has explicit uid gid fields. + /// If unset, use nydusd process euid/egid for all + /// inodes at runtime. + const EXPLICIT_UID_GID = 0x0000_0010; + /// Some inode has xattr. + /// If unset, nydusd may return ENOSYS for getxattr/listxattr + /// calls. + const HAS_XATTR = 0x0000_0020; + // Data chunks are compressed with gzip + const COMPRESS_GZIP = 0x0000_0040; + // Data chunks are compressed with zstd + const COMPRESS_ZSTD = 0x0000_0080; + } +} +``` + + ## 2. Rafs Inode + +```rust +pub struct OndiskInodeWrapper<'a> { + pub name: &'a OsStr, + pub symlink: Option<&'a OsStr>, + pub inode: &'a OndiskInode, +} +``` + +The OndiskInode struct size is padded to 128 bytes. + +* If it's a directory, all its children are indexed contiguously in `inode table`, and `i_child_index` is the index of the first child and `i_child_count` is the amount of its children. +* If it's a file, `i_child_index` is not used. +*`i_name_size` is the length of its name. +* `i_symlink_size` is the length of its symlink path. + +```rust +pub struct OndiskInode { + /// sha256(sha256(chunk) + ...), [char; RAFS_SHA256_LENGTH] + pub i_digest: RafsDigest, // 32 + /// parent inode number + pub i_parent: u64, + /// Artifact inode number set by the nydus image builder. Start from RAFS_ROOT_INODE = 1. + pub i_ino: u64, + pub i_uid: u32, + pub i_gid: u32, + pub i_projid: u32, + pub i_mode: u32, // 64 + pub i_size: u64, + pub i_blocks: u64, + /// HARDLINK | SYMLINK | PREFETCH_HINT + pub i_flags: RafsInodeFlags, + pub i_nlink: u32, + /// for dir, child start index + pub i_child_index: u32, // 96 + /// for dir, means child count. + /// for regular file, means chunk info count. + pub i_child_count: u32, + /// file name size, [char; i_name_size] + pub i_name_size: u16, + /// symlink path size, [char; i_symlink_size] + pub i_symlink_size: u16, // 104 + pub i_reserved: [u8; 24], // 128 +} +``` + +`i_flags` indicates whether the inode is a symlink or a hardlink, whether it has xattr, and whether it has hole between its chunks. + +```rust +bitflags! { + pub struct RafsInodeFlags: u64 { + /// Inode is a symlink. + const SYMLINK = 0x0000_0001; + /// Inode has hardlinks. + const HARDLINK = 0x0000_0002; + /// Inode has extended attributes. + const XATTR = 0x0000_0004; + /// Inode chunks has holes. + const HAS_HOLE = 0x0000_0008; + } +} +``` + + `OndiskXAttrs` and xattr are stored right after `OndiskInodeWrapper` in the bootstrap file. + +```rust +pub struct OndiskXAttrs { + pub size: u64, +} +``` + +A list of `OndiskChunkInfo` is also stored after xattr if the inode contains file data. Each chunk info tells us where to find data in blob file, it contains +- the hash value `block_id` calculated from the chunk data, +- the blob file it belongs to, +- whether the chunk is compressed, +- whether the chunk has holes, +- the offset in the blob file, +- the file offset. + + +```rust +pub struct OndiskChunkInfo { + /// sha256(chunk), [char; RAFS_SHA256_LENGTH] + pub block_id: RafsDigest, + /// blob index (blob_id = blob_table[blob_index]) + pub blob_index: u32, + /// chunk flags + pub flags: RafsChunkFlags, + + /// compressed size in blob + pub compress_size: u32, + /// decompressed size in blob + pub decompress_size: u32, + /// compressed offset in blob + pub compress_offset: u64, + /// decompressed offset in blob + pub decompress_offset: u64, + + /// offset in file + pub file_offset: u64, + /// reserved + pub reserved: u64, +} + +bitflags! { + pub struct RafsChunkFlags: u32 { + /// chunk is compressed + const COMPRESSED = 0x0000_0001; + const HOLECHUNK = 0x0000_0002; + } +} +``` + ## 3. Rafs Inode Table +Inode table is a mapping from inode index to `OndiskInode`, specifically a hardlink file shares the same inode number but has a different inode index. + +```rust +pub struct OndiskInodeTable { + pub(crate) data: Vec, +} +``` + ## 4. Rafs Prefetch Table + This is where we record hints in container image about which files and directories need prefetching upon starting. + +```rust +pub struct PrefetchTable { + pub inode_indexes: Vec, +} +``` + ## 5. Rafs Blob Table + Blob table is the mapping from blob index of `OndiskInode` to blob id so that we don't have to record blob id inside `OndiskInode` (note that different inodes' data chunk can reside in the same blob). + +```rust +pub struct OndiskBlobTableEntry { + pub readahead_offset: u32, + pub readahead_size: u32, + pub blob_id: String, +} + +pub struct OndiskBlobTable { + pub entries: Vec, +} +``` +# III. Manifest of Nydus Format Image +Nydus manifest is designed to be fully compatible with OCI image spec and distribution spec by adding an extra manifest file to store the pointers of nydus bootstrap (i.e. metadata) and blobfile (i.e. data). + +## 1. Image Index +A typical image index enabling nydus points to two manifest files, one is the traditional OCI v1 image manifest, the other is the nydus manifest that takes advantage of `platform` and puts `os.features: ["nydus.remoteimage.v1"]` field under `platform`. + +```json +{ + "schemaVersion": 2, + "manifests": [ + { + "mediaType": "application/vnd.docker.distribution.manifest.v2+json", + "size": 1152, + "digest": "sha256:c95b7b93ccd48c3bfd97f8cac6d5ca8053ced584c9e8e6431861ca30b0d73114", + "platform": { + "architecture": "amd64", + "os": "linux" + } + }, + { + "mediaType": "application/vnd.oci.image.manifest.v1+json", + "size": 1072, + "digest": "sha256:9e2bcf20f78c9ca1a5968a9228d73d85f27846904ddd9f6c10ef2263e13cec4f", + "platform": { + "architecture": "amd64", + "os": "linux", + "os.features": [ + "nydus.remoteimage.v1" + ] + } + } + ] +} +``` +## 2. Image Manifest + +A typical image manifest of nydus consists of `config.json`, one nydus metadata layer (`"mediaType": "application/vnd.oci.image.layer.v1.tar.gz"`) and one or more nydus data layers (`"mediaType": "application/vnd.oci.image.layer.nydus.blob.v1"`). + +* nydus metadata layer + +This layer refers to the metadata part of files and directories in the image, including rafs filesystem metadata and digest for validation purpose. + +* nydus data layer + +This layer refers to the data part, please note that the data layers of an image can be owned solely by this image or shared by others, similarly, each data layer is annotated with `"containerd.io/snapshot/nydus-blob": "true"`, which can be used to tell containerd's snapshotter to skip downloading them. + +The manifest is designed to be compatible with the dependency architect and garbage collection algorithm widely used by containerd and registry. + +```json +{ + "schemaVersion": 2, + "mediaType": "", + "config": { + "mediaType": "application/vnd.docker.container.image.v1+json", + "size": 981, + "digest": "sha256:a27f27be5546ba699ec38344a3fcbeb92ccfe7bdf0ac13d62ce630dea0178bbd" + }, + "layers": [ + { + "mediaType": "application/vnd.oci.image.layer.nydus.blob.v1", + "size": 51522, + "digest": "sha256:8a44bc8c2e35502f68d1ad692f7bf247eb9e21dca2742b6b0df58ba7b6a96ef3", + "annotations": { + "containerd.io/snapshot/nydus-blob": "true" + } + }, + { + "mediaType": "application/vnd.oci.image.layer.nydus.blob.v1", + "size": 524, + "digest": "sha256:1d51ac9ebde626252c1b02fc2d446a5e328eadcb1ca26942bfbd482b5e386e49", + "annotations": { + "containerd.io/snapshot/nydus-blob": "true" + } + }, + { + "mediaType": "application/vnd.oci.image.layer.nydus.blob.v1", + "size": 7, + "digest": "sha256:00d151e7d392e68e2c756a6fc42640006ddc0a98d37dba3f90a7b73f63188bbd", + "annotations": { + "containerd.io/snapshot/nydus-blob": "true" + } + }, + { + "mediaType": "application/vnd.oci.image.layer.v1.tar.gz", + "size": 664576, + "digest": "sha256:35bdd331b926eccd78440b0060c8484355ad69a8e6f38290fed4d0a3491ba76e", + "annotations": { + "containerd.io/snapshot/nydus-bootstrap": "true" + } + } + ] +} +``` diff --git a/docs/nydus-dev.md b/docs/nydus-dev.md index d77dc076147..f00458ac2e3 100644 --- a/docs/nydus-dev.md +++ b/docs/nydus-dev.md @@ -1,60 +1,60 @@ -## Architecture Overview - -![overview](images/nydus-architecture-overview.svg) - -### Crate Dependency - -The dependency among Nydus crates are shown below: - -![dependency](images/crate-dependency.svg) - -To ease crate publishing process and avoid frequent dependency failures, please follow the rules below to specify dependencies: -- Library crates only specify major and minor version numbers, such as `nydus-error = "0.2"`. -- Binary crates specify major, minor and patch version numbers, such as `nydus-error = "0.2.1"`. - - -## Storage Subsystem -The storage subsystem is the core of [Nydus Image Service](https://nydus.dev/) project, which caches data fetched from storage backends and provides data services to rafs, blobfs and fscache etc. -The storage subsystem contains several sub-modules: -- API Layer: define external APIs for clients. - - [BlobFactory](https://docs.rs/nydus-storage/latest/nydus_storage/factory/struct.BlobFactory.html): create and garbage-collect cached blob objects. - - [BlobDevice](https://docs.rs/nydus-storage/latest/nydus_storage/device/struct.BlobDevice.html) and [BlobIoVec](https://docs.rs/nydus-storage/latest/nydus_storage/device/struct.BlobIoVec.html): provide data services for rafs and blobfs in unit of data chunks. - - [BlobObject](https://docs.rs/nydus-storage/latest/nydus_storage/device/trait.BlobObject.html): provide data services in unit of address ranges. -- Cache Layer: decompress, decrypt and cache blob data fetched from storage backends. - - `PrefetchWorker`: workers to prefetch blob data from storage backend in background. - - [BlobCache](https://docs.rs/nydus-storage/latest/nydus_storage/cache/trait.BlobCache.html): define the core interface to access cached blob object. - - [DummyCacheMgr](https://docs.rs/nydus-storage/latest/nydus_storage/cache/struct.DummyCacheMgr.html): a dummy cache manager to fetch data from storage backend on demand and never caches data locally. - - [FileCacheMgr](https://docs.rs/nydus-storage/latest/nydus_storage/cache/struct.FileCacheMgr.html): a generic cache manager to cache data fetched from storage backend by using local files. - - [FsCacheMgr](https://docs.rs/nydus-storage/latest/nydus_storage/cache/struct.FsCacheMgr.html): a special cache manager to cooperate Linux fscache driver to cache data from storage backend by using local files. - - [BlobMetaInfo](https://docs.rs/nydus-storage/latest/nydus_storage/meta/struct.BlobMetaInfo.html): blob metadata to support fetch, decompress and decrypt blob data on demand. -- Cache State Layer: track status of local cached blob data. - - [ChunkMap](https://docs.rs/nydus-storage/latest/nydus_storage/cache/state/trait.ChunkMap.html): maintain data cache status for data chunks. - - [RangeMap](https://docs.rs/nydus-storage/latest/nydus_storage/cache/state/trait.RangeMap.html): maintain data cache status for data ranges. -- Backend Layer: fetch blob data from storage backends. - - [BlobBackend](https://docs.rs/nydus-storage/latest/nydus_storage/backend/trait.BlobBackend.html): manager to get access to data blobs from storage backend. - - [BlobReader](https://docs.rs/nydus-storage/latest/nydus_storage/backend/trait.BlobReader.html): reader to access blob data from storage backend. - - [LocalFs](https://docs.rs/nydus-storage/latest/nydus_storage/backend/localfs/struct.LocalFs.html): storage backend to access blobs stored in local accessible filesystems, such ext4, xfs, NFS, CephFS, GlusterFS etc. - - [Oss](https://docs.rs/nydus-storage/latest/nydus_storage/backend/oss/index.html): storage backend to access blobs from object storage systems. - - [Registry](https://docs.rs/nydus-storage/latest/nydus_storage/backend/registry/index.html): storage backend to access blobs from container image registries. - -![storage](images/nydus-storage-architecture.svg) - -### IO Mode - -When the Nydus Image Service is created in 2019, the rust async io ecosystem is still under development. -So the Nydus storage subsystem has used multi-threaded, synchronous IO mode for IO operations. -This mode has several advantages, especially easy to develop and maintain. -But it also has drawbacks too, too many worker threads under heavy loads, hard to use AIO/io uring asynchronous IO framework. - -Now the rust async IO ecosystem is much more mature, and is suitable for IO intensive workloads like Nydus. -It uses the most popular tokio async io framework. Tokio supports two types of runtime engines: multi-threaded and current thread. -The tokio multi-threaded Runtime will automatically dispatch work items among worker threads. -And the tokio current thread Runtime runs pending work items by current thread, and doesn't dispatch work items to different worker threads. - -On the other hand, tokio-uring to do asynchronous IO by using io-uring only supports tokio current thread Runtime engine. -Thus tokio current thread Runtime is adopted to support tokio-uring in future. -And an asynchronous Multi-Producer, Multi-Consumer channel is used to dispatch work items among different tokio current thread Runtimes. - -TODO: enable io-uring for asynchronous IO. It's a challenging task due to data buffer lifetime management. - -### Blob Data Format (TODO: nydus data blob, stargz data blob, compression algorithms, encryption algorithms, digest algorithms) +## Architecture Overview + +![overview](images/nydus-architecture-overview.svg) + +### Crate Dependency + +The dependency among Nydus crates are shown below: + +![dependency](images/crate-dependency.svg) + +To ease crate publishing process and avoid frequent dependency failures, please follow the rules below to specify dependencies: +- Library crates only specify major and minor version numbers, such as `nydus-error = "0.2"`. +- Binary crates specify major, minor and patch version numbers, such as `nydus-error = "0.2.1"`. + + +## Storage Subsystem +The storage subsystem is the core of [Nydus Image Service](https://nydus.dev/) project, which caches data fetched from storage backends and provides data services to rafs, blobfs and fscache etc. +The storage subsystem contains several sub-modules: +- API Layer: define external APIs for clients. + - [BlobFactory](https://docs.rs/nydus-storage/latest/nydus_storage/factory/struct.BlobFactory.html): create and garbage-collect cached blob objects. + - [BlobDevice](https://docs.rs/nydus-storage/latest/nydus_storage/device/struct.BlobDevice.html) and [BlobIoVec](https://docs.rs/nydus-storage/latest/nydus_storage/device/struct.BlobIoVec.html): provide data services for rafs and blobfs in unit of data chunks. + - [BlobObject](https://docs.rs/nydus-storage/latest/nydus_storage/device/trait.BlobObject.html): provide data services in unit of address ranges. +- Cache Layer: decompress, decrypt and cache blob data fetched from storage backends. + - `PrefetchWorker`: workers to prefetch blob data from storage backend in background. + - [BlobCache](https://docs.rs/nydus-storage/latest/nydus_storage/cache/trait.BlobCache.html): define the core interface to access cached blob object. + - [DummyCacheMgr](https://docs.rs/nydus-storage/latest/nydus_storage/cache/struct.DummyCacheMgr.html): a dummy cache manager to fetch data from storage backend on demand and never caches data locally. + - [FileCacheMgr](https://docs.rs/nydus-storage/latest/nydus_storage/cache/struct.FileCacheMgr.html): a generic cache manager to cache data fetched from storage backend by using local files. + - [FsCacheMgr](https://docs.rs/nydus-storage/latest/nydus_storage/cache/struct.FsCacheMgr.html): a special cache manager to cooperate Linux fscache driver to cache data from storage backend by using local files. + - [BlobMetaInfo](https://docs.rs/nydus-storage/latest/nydus_storage/meta/struct.BlobMetaInfo.html): blob metadata to support fetch, decompress and decrypt blob data on demand. +- Cache State Layer: track status of local cached blob data. + - [ChunkMap](https://docs.rs/nydus-storage/latest/nydus_storage/cache/state/trait.ChunkMap.html): maintain data cache status for data chunks. + - [RangeMap](https://docs.rs/nydus-storage/latest/nydus_storage/cache/state/trait.RangeMap.html): maintain data cache status for data ranges. +- Backend Layer: fetch blob data from storage backends. + - [BlobBackend](https://docs.rs/nydus-storage/latest/nydus_storage/backend/trait.BlobBackend.html): manager to get access to data blobs from storage backend. + - [BlobReader](https://docs.rs/nydus-storage/latest/nydus_storage/backend/trait.BlobReader.html): reader to access blob data from storage backend. + - [LocalFs](https://docs.rs/nydus-storage/latest/nydus_storage/backend/localfs/struct.LocalFs.html): storage backend to access blobs stored in local accessible filesystems, such ext4, xfs, NFS, CephFS, GlusterFS etc. + - [Oss](https://docs.rs/nydus-storage/latest/nydus_storage/backend/oss/index.html): storage backend to access blobs from object storage systems. + - [Registry](https://docs.rs/nydus-storage/latest/nydus_storage/backend/registry/index.html): storage backend to access blobs from container image registries. + +![storage](images/nydus-storage-architecture.svg) + +### IO Mode + +When the Nydus Image Service is created in 2019, the rust async io ecosystem is still under development. +So the Nydus storage subsystem has used multi-threaded, synchronous IO mode for IO operations. +This mode has several advantages, especially easy to develop and maintain. +But it also has drawbacks too, too many worker threads under heavy loads, hard to use AIO/io uring asynchronous IO framework. + +Now the rust async IO ecosystem is much more mature, and is suitable for IO intensive workloads like Nydus. +It uses the most popular tokio async io framework. Tokio supports two types of runtime engines: multi-threaded and current thread. +The tokio multi-threaded Runtime will automatically dispatch work items among worker threads. +And the tokio current thread Runtime runs pending work items by current thread, and doesn't dispatch work items to different worker threads. + +On the other hand, tokio-uring to do asynchronous IO by using io-uring only supports tokio current thread Runtime engine. +Thus tokio current thread Runtime is adopted to support tokio-uring in future. +And an asynchronous Multi-Producer, Multi-Consumer channel is used to dispatch work items among different tokio current thread Runtimes. + +TODO: enable io-uring for asynchronous IO. It's a challenging task due to data buffer lifetime management. + +### Blob Data Format (TODO: nydus data blob, stargz data blob, compression algorithms, encryption algorithms, digest algorithms) diff --git a/docs/nydus-failover-upgrade.md b/docs/nydus-failover-upgrade.md index d0641f823d6..992ed3a5b03 100644 --- a/docs/nydus-failover-upgrade.md +++ b/docs/nydus-failover-upgrade.md @@ -1,62 +1,62 @@ -# Nydus Failover and Hot Upgrade - -Currently, nydusd supports failover and hot upgrade under fusedev and fscache modes, specifically: - -- Nydusd can automatically recover from crashes and continue serving container I/O operations. -- Users can upgrade nydusd without disrupting container I/O operations, once the old nydusd process exits and the new one starts, it can immediately serve container I/O. - -[nydus-snapshotter](https://github.com/containerd/nydus-snapshotter.git) already natively supports the above features. - -## How to Use - -### Failover - -Ensure that the `recover_policy` value in the configuration file of nydus-snapshotter is set to `failover`. - -```toml -[daemon] -# How to process when daemon dies: "none", "restart" or "failover" -recover_policy = "failover" -``` - -Then restart the nydus-snapshotter service. In this way, when the nydusd process crashes (for example, OOM Killed), nydus-snapshotter will automatically restart and recover the nydusd process. - -### Hot Upgrade - -By making an HTTP call to the system unix domain socket exposed by nydus-snapshotter (default is `/run/containerd-nydus/system.sock`), you can upgrade the version of the nydusd binary file used by the nydusd process. - -The method and path of this HTTP call are: `PUT /api/v1/nydusd/upgrade` - -Example request body: - -```json -{ - "nydusd_path": "/path/to/new/nydusd" -} -``` - -By now only the field `nydusd_path` is required in the request body. More fields (like `version`, `policy`, etc) may be used in the future. - -## Design - -### State Machine of Nydusd - -![state machine of nydusd](images/nydusd-sate-machine.png) - -Under normal circumstances, executing the nydusd binary file will create a new nydusd process. At this time, the process is in the `Init` state. Then nydusd issues a `Mount` command to itself, entering the `Ready` state. Subsequently, an external controller (such as nydus-snapshotter) will call the control interface exposed by nydusd and issue a `Start` command, making nydusd enter the `Running` state. - -When an external controller (such as nydus-snapshotter) performs failover or hot upgrade, the command to start a new nydusd process will include the `--upgrade` parameter. Thus, after the new nydusd process starts, it will stay in the `Init` state. The external controller (such as nydus-snapshotter) can then issue a `Takeover` command to the new nydusd process, instructing the new nydusd process to perform a `Restore` operation. - -### State Saving and Recovery - -#### State Information to be Saved and Recovered - -1. File handler. In fusedev mode, it's the fuse file; in fscache mode, it's the file handler which is used to cooperate with the linux fscache driver to manage cached blob objects. - -2. State of the backend filesystem. For example, the VFS state in fusedev mode, etc. - -#### How to Save and Recover - -Usually, after the nydusd process enters the `Running` state, the external controller (such as nydus-snapshotter) will notify the nydusd process to save its state information and return it. For nydus-snapshotter, it provides a unix domain socket path to the nydusd process. Nydusd serializes the state information using [the dbs-snapshot crate](https://github.com/kata-containers/dbs-snapshot) and then sends the serialized byte array, using [the sendfd crate](https://github.com/standard-ai/sendfd) (the fd used here is the File Handler referred to in the previous section), through the unix domain socket path provided by nydus-snapshotter. Nydus-snapshotter will store the received state information byte array in memory. - +# Nydus Failover and Hot Upgrade + +Currently, nydusd supports failover and hot upgrade under fusedev and fscache modes, specifically: + +- Nydusd can automatically recover from crashes and continue serving container I/O operations. +- Users can upgrade nydusd without disrupting container I/O operations, once the old nydusd process exits and the new one starts, it can immediately serve container I/O. + +[nydus-snapshotter](https://github.com/containerd/nydus-snapshotter.git) already natively supports the above features. + +## How to Use + +### Failover + +Ensure that the `recover_policy` value in the configuration file of nydus-snapshotter is set to `failover`. + +```toml +[daemon] +# How to process when daemon dies: "none", "restart" or "failover" +recover_policy = "failover" +``` + +Then restart the nydus-snapshotter service. In this way, when the nydusd process crashes (for example, OOM Killed), nydus-snapshotter will automatically restart and recover the nydusd process. + +### Hot Upgrade + +By making an HTTP call to the system unix domain socket exposed by nydus-snapshotter (default is `/run/containerd-nydus/system.sock`), you can upgrade the version of the nydusd binary file used by the nydusd process. + +The method and path of this HTTP call are: `PUT /api/v1/nydusd/upgrade` + +Example request body: + +```json +{ + "nydusd_path": "/path/to/new/nydusd" +} +``` + +By now only the field `nydusd_path` is required in the request body. More fields (like `version`, `policy`, etc) may be used in the future. + +## Design + +### State Machine of Nydusd + +![state machine of nydusd](images/nydusd-sate-machine.png) + +Under normal circumstances, executing the nydusd binary file will create a new nydusd process. At this time, the process is in the `Init` state. Then nydusd issues a `Mount` command to itself, entering the `Ready` state. Subsequently, an external controller (such as nydus-snapshotter) will call the control interface exposed by nydusd and issue a `Start` command, making nydusd enter the `Running` state. + +When an external controller (such as nydus-snapshotter) performs failover or hot upgrade, the command to start a new nydusd process will include the `--upgrade` parameter. Thus, after the new nydusd process starts, it will stay in the `Init` state. The external controller (such as nydus-snapshotter) can then issue a `Takeover` command to the new nydusd process, instructing the new nydusd process to perform a `Restore` operation. + +### State Saving and Recovery + +#### State Information to be Saved and Recovered + +1. File handler. In fusedev mode, it's the fuse file; in fscache mode, it's the file handler which is used to cooperate with the linux fscache driver to manage cached blob objects. + +2. State of the backend filesystem. For example, the VFS state in fusedev mode, etc. + +#### How to Save and Recover + +Usually, after the nydusd process enters the `Running` state, the external controller (such as nydus-snapshotter) will notify the nydusd process to save its state information and return it. For nydus-snapshotter, it provides a unix domain socket path to the nydusd process. Nydusd serializes the state information using [the dbs-snapshot crate](https://github.com/kata-containers/dbs-snapshot) and then sends the serialized byte array, using [the sendfd crate](https://github.com/standard-ai/sendfd) (the fd used here is the File Handler referred to in the previous section), through the unix domain socket path provided by nydus-snapshotter. Nydus-snapshotter will store the received state information byte array in memory. + During recovery, the nydusd process is in the `Init` state. Upon receiving the `Takeover` command from nydus-snapshotter, it will receive the state information saved by the old nydusd process sent by nydus-snapshotter through the unix domain socket path. After receiving the state information, nydusd will deserialize it and use it to restore the backend filesystem. \ No newline at end of file diff --git a/docs/nydus-fscache.md b/docs/nydus-fscache.md index 4e3a6252c08..5b1ad0a7607 100644 --- a/docs/nydus-fscache.md +++ b/docs/nydus-fscache.md @@ -1,226 +1,226 @@ -# Nydus EROFS fscache user guide - -This guide shows you how to use fscache-based EROFS nydus image service to launch containers with the fscache-enabled in-kernel EROFS on-demand download feature. - -## Prepare the kernel - -### (1) Compile kernel by yourself -Be aware of using the fscache-enabled EROFS kernel (Linux 5.19+), it can be built with the following steps: - -1. ``git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git`` \ - or (mirror in china): ``git://kernel.source.codeaurora.cn/pub/scm/linux/kernel/git/torvalds/linux.git`` - -2. ``make olddefconfig`` - -3. `make menuconfig` to update _.config_ to enable the follow kernel configurations: -``` -CONFIG_FSCACHE=m -CONFIG_CACHEFILES=m -CONFIG_CACHEFILES_ONDEMAND=y -CONFIG_EROFS_FS=m -CONFIG_EROFS_FS_ONDEMAND=y -``` - -5. ``make -jX`` - -6. ``make modules_install && make install`` - -7. Reboot to the kernel just built - -8. ``modprobe cachefiles`` if cachefiles is built as module - -9. ``[ -c /dev/cachefiles ] && echo ok`` - -### (2) Use OpenAnolis kernel -If you want to use an existing kernel, you could use [OpenAnolis](https://openanolis.cn/download?lang=en), it adds support for fscache mode since kernel version 4.19.91-27 or 5.10.134-12. - -Update the kernel to 4.19.91-27 on OpenAnolis 8 as an example: - -``` -$ sudo yum list --showduplicate kernel --enablerepo Plus - -Installed Packages -kernel.x86_64 4.19.91-26.an8 -Available Packages -kernel.x86_64 4.18.0-372.32.1.an8_6 -kernel.x86_64 4.18.0-425.13.1.0.1.an8 -kernel.x86_64 4.18.0-425.19.2.0.1.an8 -kernel.x86_64 4.18.0-477.10.1.0.1.an8 -kernel.x86_64 4.18.0-477.13.1.0.1.an8 -kernel.x86_64 4.19.91-27.an8 -kernel.x86_64 4.19.91-27.1.an8 -kernel.x86_64 4.19.91-27.2.an8 -kernel.x86_64 4.19.91-27.3.an8 -kernel.x86_64 4.19.91-27.4.an8 - -$ sudo yum update kernel --enablerepo Plus - -$ sudo reboot -``` - -## Enable fscache - -1. ``[ -c /dev/cachefiles ] && echo ok`` to test fscache is enable or not. If your result shows `ok`, then fscache has been already enabled; otherwise, please follow the following steps to enable fscache. - -2. Download cachefilesd package. For centos users, the command is: -``` -sudo yum install cachefilesd -``` - -3. Start cachefilesd deamon. -``` -sudo systemctl start cachefilesd -sudo systemctl status cachefilesd -``` - -4. Ensure the device file `/dev/cachefiles` is not occupied. If your result is not empty, please kill all processes the result shows. -``` -sudo lsof /dev/cachefiles -``` - -## Get ctr-remote and the fscache-supported nydusd - -1. Make sure you have installed _rust 1.52.1_ version and golang. - -2. Check out the latest nydus source code with \ -``git clone https://github.com/dragonflyoss/nydus.git`` - -3. Build nydusd and nydus-image with - -``` bash -cd nydus -make release -``` - -4. Copy the "nydus-image" binary file compiled in Step 3 into _$PATH_ e.g. /usr/bin with \ -``cp target/release/nydus-image /usr/bin`` - -5. Build ctr-remote with - -``` bash -cd contrib/ctr-remote -make -``` - -## Run container with nydus snapshotter - -1. Make sure your containerd version is 1.4 or above. - -2. Get nydus snapshotter with EROFS supported: - ```shell - # clone code - git clone https://github.com/containerd/nydus-snapshotter.git - # compile binary to ./bin/containerd-nydus-grpc - cd nydus-snapshotter - make - ``` - -3. Prepare a configuration json like below, named as `/etc/nydus/nydusd-config.fscache.json`: - -```json -{ - "type": "bootstrap", - "config": { - "backend_type": "registry", - "backend_config": { - "scheme": "https" - }, - "cache_type": "fscache" - } -} -``` - -4. Start nydus snapshotter with the command below: - -``` -# make sure the directory exists. -mkdir -p /var/lib/containerd-nydus - -./bin/containerd-nydus-grpc \ - --nydusd-config /etc/nydus/nydusd-config.fscache.json \ - --fs-driver fscache \ - --nydusd /path/to/nydusd \ - --log-to-stdout -``` - -5. Configure containerd to use `nydus-snapshotter` by editing - `/etc/containerd/config.toml` like below: - -``` toml -version = 2 - -[plugins] - [plugins."io.containerd.grpc.v1.cri"] - [plugins."io.containerd.grpc.v1.cri".cni] - bin_dir = "/usr/lib/cni" - conf_dir = "/etc/cni/net.d" - [plugins."io.containerd.internal.v1.opt"] - path = "/var/lib/containerd/opt" - -[proxy_plugins] - [proxy_plugins.nydus] - type = "snapshot" - address = "/run/containerd-nydus/containerd-nydus-grpc.sock" - -[plugins."io.containerd.grpc.v1.cri".containerd] - snapshotter = "nydus" - disable_snapshot_annotations = false -``` - -For more information on how to configure containerd to use nydus snapshotter please refer to [here](./containerd-env-setup.md). - -6. Restart containerd with - `service containerd restart` - -7. Run container with [ctr-remote](../contrib/ctr-remote) - -``` shell -# pull nydus image -contrib/ctr-remote/bin/ctr-remote images rpull docker.io/hsiangkao/ubuntu:20.04-rafs-v6 - -# run nydus image -ctr run --rm -t --snapshotter=nydus docker.io/hsiangkao/ubuntu:20.04-rafs-v6 ubuntu /bin/bash - -# remove nydus image -ctr images rm docker.io/hsiangkao/ubuntu:20.04-rafs-v6 -``` - -Some RAFS v6 referenced images (in Zstd algorithms): -``` -docker.io/hsiangkao/ubuntu:20.04-rafs-v6 -docker.io/hsiangkao/ubuntu:22.04-rafs-v6 -docker.io/hsiangkao/wordpress:5.7-rafs-v6 -docker.io/hsiangkao/wordpress:6.0-rafs-v6 -``` - -## Try to convert a new image to RAFS v6 - -1. Get nydus image conversion tool `accelctl` - -``` shell -# clone acceld code -git clone https://github.com/goharbor/acceleration-service.git - -# compile binary to ./accelctl -cd acceleration-service -make -``` - -2. Convert to nydus image - -Duplicate `./misc/config/config.yaml.nydus.tmpl` configuration file as `path/to/config.yaml`, make sure that the `rafs_version` option in `converter.driver.config` is changed to `6` and the registry auth have been configured in `provider.source`. - -``` shell -# convert to nydus image -./accelctl convert --config path/to/config.yaml /ubuntu:latest -``` - -## Recordings - -1. Pull Nydus / OCI wordpress images - -[![asciicast](https://asciinema.org/a/1a6aQA6rOFsoAgivDh9mBV0lE.svg)](https://asciinema.org/a/1a6aQA6rOFsoAgivDh9mBV0lE?speed=2) - -2. Pull ZRAN-indexed OCI / OCI wordpress images - -[![asciicast](https://asciinema.org/a/7IOWhUk8Rna0Ju1avcamu7T5f.svg)](https://asciinema.org/a/7IOWhUk8Rna0Ju1avcamu7T5f?speed=2) +# Nydus EROFS fscache user guide + +This guide shows you how to use fscache-based EROFS nydus image service to launch containers with the fscache-enabled in-kernel EROFS on-demand download feature. + +## Prepare the kernel + +### (1) Compile kernel by yourself +Be aware of using the fscache-enabled EROFS kernel (Linux 5.19+), it can be built with the following steps: + +1. ``git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git`` \ + or (mirror in china): ``git://kernel.source.codeaurora.cn/pub/scm/linux/kernel/git/torvalds/linux.git`` + +2. ``make olddefconfig`` + +3. `make menuconfig` to update _.config_ to enable the follow kernel configurations: +``` +CONFIG_FSCACHE=m +CONFIG_CACHEFILES=m +CONFIG_CACHEFILES_ONDEMAND=y +CONFIG_EROFS_FS=m +CONFIG_EROFS_FS_ONDEMAND=y +``` + +5. ``make -jX`` + +6. ``make modules_install && make install`` + +7. Reboot to the kernel just built + +8. ``modprobe cachefiles`` if cachefiles is built as module + +9. ``[ -c /dev/cachefiles ] && echo ok`` + +### (2) Use OpenAnolis kernel +If you want to use an existing kernel, you could use [OpenAnolis](https://openanolis.cn/download?lang=en), it adds support for fscache mode since kernel version 4.19.91-27 or 5.10.134-12. + +Update the kernel to 4.19.91-27 on OpenAnolis 8 as an example: + +``` +$ sudo yum list --showduplicate kernel --enablerepo Plus + +Installed Packages +kernel.x86_64 4.19.91-26.an8 +Available Packages +kernel.x86_64 4.18.0-372.32.1.an8_6 +kernel.x86_64 4.18.0-425.13.1.0.1.an8 +kernel.x86_64 4.18.0-425.19.2.0.1.an8 +kernel.x86_64 4.18.0-477.10.1.0.1.an8 +kernel.x86_64 4.18.0-477.13.1.0.1.an8 +kernel.x86_64 4.19.91-27.an8 +kernel.x86_64 4.19.91-27.1.an8 +kernel.x86_64 4.19.91-27.2.an8 +kernel.x86_64 4.19.91-27.3.an8 +kernel.x86_64 4.19.91-27.4.an8 + +$ sudo yum update kernel --enablerepo Plus + +$ sudo reboot +``` + +## Enable fscache + +1. ``[ -c /dev/cachefiles ] && echo ok`` to test fscache is enable or not. If your result shows `ok`, then fscache has been already enabled; otherwise, please follow the following steps to enable fscache. + +2. Download cachefilesd package. For centos users, the command is: +``` +sudo yum install cachefilesd +``` + +3. Start cachefilesd deamon. +``` +sudo systemctl start cachefilesd +sudo systemctl status cachefilesd +``` + +4. Ensure the device file `/dev/cachefiles` is not occupied. If your result is not empty, please kill all processes the result shows. +``` +sudo lsof /dev/cachefiles +``` + +## Get ctr-remote and the fscache-supported nydusd + +1. Make sure you have installed _rust 1.52.1_ version and golang. + +2. Check out the latest nydus source code with \ +``git clone https://github.com/dragonflyoss/nydus.git`` + +3. Build nydusd and nydus-image with + +``` bash +cd nydus +make release +``` + +4. Copy the "nydus-image" binary file compiled in Step 3 into _$PATH_ e.g. /usr/bin with \ +``cp target/release/nydus-image /usr/bin`` + +5. Build ctr-remote with + +``` bash +cd contrib/ctr-remote +make +``` + +## Run container with nydus snapshotter + +1. Make sure your containerd version is 1.4 or above. + +2. Get nydus snapshotter with EROFS supported: + ```shell + # clone code + git clone https://github.com/containerd/nydus-snapshotter.git + # compile binary to ./bin/containerd-nydus-grpc + cd nydus-snapshotter + make + ``` + +3. Prepare a configuration json like below, named as `/etc/nydus/nydusd-config.fscache.json`: + +```json +{ + "type": "bootstrap", + "config": { + "backend_type": "registry", + "backend_config": { + "scheme": "https" + }, + "cache_type": "fscache" + } +} +``` + +4. Start nydus snapshotter with the command below: + +``` +# make sure the directory exists. +mkdir -p /var/lib/containerd-nydus + +./bin/containerd-nydus-grpc \ + --nydusd-config /etc/nydus/nydusd-config.fscache.json \ + --fs-driver fscache \ + --nydusd /path/to/nydusd \ + --log-to-stdout +``` + +5. Configure containerd to use `nydus-snapshotter` by editing + `/etc/containerd/config.toml` like below: + +``` toml +version = 2 + +[plugins] + [plugins."io.containerd.grpc.v1.cri"] + [plugins."io.containerd.grpc.v1.cri".cni] + bin_dir = "/usr/lib/cni" + conf_dir = "/etc/cni/net.d" + [plugins."io.containerd.internal.v1.opt"] + path = "/var/lib/containerd/opt" + +[proxy_plugins] + [proxy_plugins.nydus] + type = "snapshot" + address = "/run/containerd-nydus/containerd-nydus-grpc.sock" + +[plugins."io.containerd.grpc.v1.cri".containerd] + snapshotter = "nydus" + disable_snapshot_annotations = false +``` + +For more information on how to configure containerd to use nydus snapshotter please refer to [here](./containerd-env-setup.md). + +6. Restart containerd with + `service containerd restart` + +7. Run container with [ctr-remote](../contrib/ctr-remote) + +``` shell +# pull nydus image +contrib/ctr-remote/bin/ctr-remote images rpull docker.io/hsiangkao/ubuntu:20.04-rafs-v6 + +# run nydus image +ctr run --rm -t --snapshotter=nydus docker.io/hsiangkao/ubuntu:20.04-rafs-v6 ubuntu /bin/bash + +# remove nydus image +ctr images rm docker.io/hsiangkao/ubuntu:20.04-rafs-v6 +``` + +Some RAFS v6 referenced images (in Zstd algorithms): +``` +docker.io/hsiangkao/ubuntu:20.04-rafs-v6 +docker.io/hsiangkao/ubuntu:22.04-rafs-v6 +docker.io/hsiangkao/wordpress:5.7-rafs-v6 +docker.io/hsiangkao/wordpress:6.0-rafs-v6 +``` + +## Try to convert a new image to RAFS v6 + +1. Get nydus image conversion tool `accelctl` + +``` shell +# clone acceld code +git clone https://github.com/goharbor/acceleration-service.git + +# compile binary to ./accelctl +cd acceleration-service +make +``` + +2. Convert to nydus image + +Duplicate `./misc/config/config.yaml.nydus.tmpl` configuration file as `path/to/config.yaml`, make sure that the `rafs_version` option in `converter.driver.config` is changed to `6` and the registry auth have been configured in `provider.source`. + +``` shell +# convert to nydus image +./accelctl convert --config path/to/config.yaml /ubuntu:latest +``` + +## Recordings + +1. Pull Nydus / OCI wordpress images + +[![asciicast](https://asciinema.org/a/1a6aQA6rOFsoAgivDh9mBV0lE.svg)](https://asciinema.org/a/1a6aQA6rOFsoAgivDh9mBV0lE?speed=2) + +2. Pull ZRAN-indexed OCI / OCI wordpress images + +[![asciicast](https://asciinema.org/a/7IOWhUk8Rna0Ju1avcamu7T5f.svg)](https://asciinema.org/a/7IOWhUk8Rna0Ju1avcamu7T5f?speed=2) diff --git a/docs/nydus-image.md b/docs/nydus-image.md index 7e7974eeed6..eff5c2c5962 100644 --- a/docs/nydus-image.md +++ b/docs/nydus-image.md @@ -1,304 +1,304 @@ -# Toolset for Working with RAFS Filesystems and Nydus Container Images - -The `nydus-image` toolset provides tools to build, check, inspect and export RAFS filesystems and Nydus container images. - -Logically, a RAFS filesystem consists of two parts: -- a bootstrap/meta blob, containing filesystem metadata, such directory name, file attributes etc. -- one or more data blobs, containing file contents. - -Physically, RAFS bootstrap/meta blobs can be stored as separate files, or inlined into data blob files. -Therefore, a RAFS file system may consist of the following parts: -- a blob file containing both RAFS metadata and data -- a blob file containing RAFS metadata, and one or more blob files for RAFS data -- a blob file containing RAFS metadata, and one or more blob files for RAFS data, and associated targz files for RAFS ZRAN mode. - -## Installation - -Get latest `nydus-image` binary from [release](https://github.com/dragonflyoss/nydus/releases/latest) page. - -## Nydus Image Builder - -The `nydus-image create` subcommand creates a RAFS filesystem or a layer of Nydus image from a tar file or from a directory. - -RAFS filesystem/Nydus image has three modes: Native, Zran and Tarfs. Each mode has different features and may be used for different scenarios. - -| Mode | Blobs in Registry | Local Cache File | Runtime Integrity | Lazy-loading | Chunk Dedup | Encryption | OCIv1 Compatible | -|:------:|:-----------------------:|:-----------------------------:|:-----------------:|:------------:|:-----------:|:----------:|:----------------:| -| Tarfs | tar.gz / tar.zst | nydus.meta & tar | Optional | No | No | No | Yes | -| Zran | tar.gz & nydus.meta | nydus.meta & nydus.data.cache | Yes | Yes | Yes | No | Yes | -| Native | nydus.data & nydus.meta | nydus.meta & nydus.data.cache | Yes | Yes | Yes | Yes | No | - -### Specify Data Blob Output Path - -There are two ways to specify where to save the resulting data blob: - -- Specify the file path via `--blob `. It could be a regular file into which the data blob contents are dumped. It can also be a fifo (named pipe) from which "nydusify" or other tools can receive the generated blob content. - -- Specify a directory with `-D/--blob-dir BLOB_DIR`. `nydus-image` will use the sha256 digest of the resulting data blob as the filename, concatenated to the directory path. This is useful when you don't want to set a custom name or you are building a layered nydus image. Please create `BLOB_DIR` before executing the command. - -### Build RAFS Filesystem in Native Mode from a Directory -```shell -nydus-image create -t dir-rafs \ - -D /path/to/output/directory \ - /path/to/source/dir - -[root@image-service]# nydus-image create -t dir-rafs -D images/ src -[2023-03-29 16:34:28.092347 +08:00] INFO successfully built RAFS filesystem: -meta blob path: images/f62a7e668c7f306655233367f8b6e4073d7fa94a6f57826069db3e745e2fd327 -data blob size: 0xe32b -data blobs: ["e9d3d45f6ad9f647cc1a2e2f699a46f553ce87b1136026d53d474c6142f80763"] -[root@image-service]# ls -l images/ --rw-r--r-- 1 root root 58155 3月 29 16:34 e9d3d45f6ad9f647cc1a2e2f699a46f553ce87b1136026d53d474c6142f80763 --rw-r--r-- 1 root root 20480 3月 29 16:34 f62a7e668c7f306655233367f8b6e4073d7fa94a6f57826069db3e745e2fd327 -``` - -### Build RAFS Filesystem in Native Mode with Inlined Metadata from a Directory -```shell -nydus-image create -t dir-rafs \ - --blob-inline-meta \ - -D /path/to/output/directory \ - /path/to/source/dir - -[root@image-service]# nydus-image create -t dir-rafs --blob-inline-meta -D images/ src -[2023-03-29 16:36:14.629372 +08:00] INFO successfully built RAFS filesystem: -meta blob path: -data blob size: 0x1392b -data blobs: ["903c62564da0cb18997a4d4c40f25d73c0ab9baef2177f9030d5e0c06ac26fa4"] -[root@image-service]# ls -l images/ --rw-r--r-- 1 root root 80171 3月 29 16:36 903c62564da0cb18997a4d4c40f25d73c0ab9baef2177f9030d5e0c06ac26fa4 -``` - -### Build RAFS Filesystem in Native Mode from a tar.gz File -```shell -nydus-image create -t targz-rafs \ - -D /path/to/output/directory \ - /path/to/source/targz.file - -[root@image-service]# nydus-image create -t targz-rafs -D images/ src.tar.gz -[2023-03-29 16:40:20.484997 +08:00] INFO successfully built RAFS filesystem: -meta blob path: images/94bf66fc81425bfb72939c942ee1ead90e2e2ac9f09f08f369db15afde163b3b -data blob size: 0xe328 -data blobs: ["d3bb8a2cdb6778cbdc31d97be88ef00217d29e4c119f41ef0a4d9f202088d813"] -[root@image-service]# ls -l images/ --rw-r--r-- 1 root root 20480 3月 29 16:40 94bf66fc81425bfb72939c942ee1ead90e2e2ac9f09f08f369db15afde163b3b --rw-r--r-- 1 root root 58152 3月 29 16:40 d3bb8a2cdb6778cbdc31d97be88ef00217d29e4c119f41ef0a4d9f202088d813 -``` - -### Build RAFS Filesystem in Zran Mode from a tar.gz File -```shell -nydus-image create -t targz-ref \ - -D /path/to/output/directory \ - /path/to/source/targz.file - -[root@image-service]# sha256sum src.tar.gz -13111d487b1958281514769eedea840d14e5f27f0d7c2c97b8a286d62645766b src.tar.gz -[root@image-service]# cp src.tar.gz images/13111d487b1958281514769eedea840d14e5f27f0d7c2c97b8a286d62645766b -[root@image-service]# file images/13111d487b1958281514769eedea840d14e5f27f0d7c2c97b8a286d62645766b -images/13111d487b1958281514769eedea840d14e5f27f0d7c2c97b8a286d62645766b: gzip compressed data, last modified: Wed Mar 29 08:39:20 2023, from Unix, original size 245760 -[root@image-service]# nydus-image create -t targz-ref -D images/ images/13111d487b1958281514769eedea840d14e5f27f0d7c2c97b8a286d62645766b -[2023-03-29 16:48:51.656612 +08:00] INFO successfully built RAFS filesystem: -meta blob path: images/606e8f8fbce6496b676f09f6b5231d15c301424af5b54a0433b2e9071bbe857d -data blob size: 0xb008 -data blobs: ["13111d487b1958281514769eedea840d14e5f27f0d7c2c97b8a286d62645766b"] -[root@image-service]# ls -l images/ --rw-r--r-- 1 root root 45064 3月 29 16:48 13111d487b1958281514769eedea840d14e5f27f0d7c2c97b8a286d62645766b --rw-r--r-- 1 root root 4343 3月 29 16:48 2ae4b87374bbb7be0f10300c20617bc7f40d96a8a12a43445f88d95dd326c7dd --rw-r--r-- 1 root root 20480 3月 29 16:48 606e8f8fbce6496b676f09f6b5231d15c301424af5b54a0433b2e9071bbe857d -``` - -### Build RAFS Filesystem in Tarfs Mode from a tar File -```shell -nydus-image create -t tar-tarfs \ - -D /path/to/output/directory \ - /path/to/source/tar.file - -[root@image-service]# sha256sum src.tar -0e2dbe8b6e0f55f42c75034ed9dfc582ad0a94098cfc248c968522e7ef02e00a src.tar -[root@image-service]# cp src.tar images/0e2dbe8b6e0f55f42c75034ed9dfc582ad0a94098cfc248c968522e7ef02e00a -[root@image-service]# nydus-image create -t tar-tarfs -D images/ images/0e2dbe8b6e0f55f42c75034ed9dfc582ad0a94098cfc248c968522e7ef02e00a -[2023-03-29 16:52:44.251252 +08:00] INFO successfully built RAFS filesystem: -meta blob path: images/90f0e6e7e0ff822d4acddf30c36ac77fe06f549fe58f89a818fa824b19f70d47 -data blob size: 0x3c000 -data blobs: ["0e2dbe8b6e0f55f42c75034ed9dfc582ad0a94098cfc248c968522e7ef02e00a"] -[root@image-service]# ls -l images/ --rw-r--r-- 1 root root 245760 3月 29 16:52 0e2dbe8b6e0f55f42c75034ed9dfc582ad0a94098cfc248c968522e7ef02e00a --rw-r--r-- 1 root root 20480 3月 29 16:52 90f0e6e7e0ff822d4acddf30c36ac77fe06f549fe58f89a818fa824b19f70d47 -``` - -### Layered Build Nydus Image - -`nydus-image` tool supports to build Nydus image from multiple layers of image: - - ```shell - # Build from lower layer - nydus-image create \ - -D /path/to/output/dir \ - /path/to/lower/dir - # Build from upper layer based on lower layer - nydus-image create \ - --parent-bootstrap /path/to/parent-bootstrap \ - -D /path/to/output/dir \ - /path/to/upper/dir -``` - -### Build Nydus Image With Chunk-Dict -`nydus-image` tool supports to build Nydus image with chunk-dict for chunk deduplication: -1. reference chunks which are same as chunks in chunk-dict to blobs in chunk-dict -2. new dumped blob would be smaller than without using chunk-dict -3. save space of remote storage because of chunk-deduplication between images (e.g. oss, registry) -```shell -# Build with bootstrap type chunk-dict -nydus-image create \ - --chunk-dict bootstrap=/path/to/dict.boot \ - -D /path/to/output/dir \ - /path/to/lower/dir -``` - -## Merge Multiple RAFS Filesystems into One - -`nydus-image` tool supports to build Nydus image from multiple layers of image: -The `nydus-image merge` subcommand supports merging multiple RAFS filesystems into one. -It applies the overlay rules defined the OCI Image Spec or the overlayfs, to avoid using `overlayfs` at runtime. - -```shell -nydus-image merge \ - -D /path/to/output/dir \ - /path/to/bootstrap1 /path/to/bootstrap2 - -[root@image-service]# nydus-image create --blob-inline-meta -D images/ src -[2023-03-29 17:02:06.231478 +08:00] INFO successfully built RAFS filesystem: -meta blob path: -data blob size: 0x1392b -data blobs: ["903c62564da0cb18997a4d4c40f25d73c0ab9baef2177f9030d5e0c06ac26fa4"] -[root@image-service]# nydus-image create --blob-inline-meta -D images/ blobfs/ -[2023-03-29 17:02:08.980743 +08:00] INFO successfully built RAFS filesystem: -meta blob path: -data blob size: 0x86ba -data blobs: ["9e50ae5ac02b2ef6ffb86075720e49d95d8240eed4717dd8ac9c68cadba00762"] -[root@image-service]# nydus-image merge -D images/ images/903c62564da0cb18997a4d4c40f25d73c0ab9baef2177f9030d5e0c06ac26fa4 images/9e50ae5ac02b2ef6ffb86075720e49d95d8240eed4717dd8ac9c68cadba00762 -[root@image-service]# ls -l images/ --rw-r--r-- 1 root root 80171 3月 29 17:02 903c62564da0cb18997a4d4c40f25d73c0ab9baef2177f9030d5e0c06ac26fa4 --rw-r--r-- 1 root root 34490 3月 29 17:02 9e50ae5ac02b2ef6ffb86075720e49d95d8240eed4717dd8ac9c68cadba00762 --rw-r--r-- 1 root root 20480 3月 29 17:02 df01f389850b79cd5a6ca6db98495bb457aa0821b0558351c55537551322fb96 -``` - -## Compact Nydus Image -`nydus-image` tool supports to compact Nydus image for -1. reduce number of blobs -2. optimize size of blobs (remove unused chunks in blob, merge small blobs) -```shell -# backend config for getting chunk data from remote storage -# e.g. OSS backend config -cat /path/to/backend-config.json -{ - "endpoint": "region.aliyuncs.com", - "scheme": "https", - "access_key_id": "", - "access_key_secret": "", - "bucket_name": "", - "object_prefix": "nydus/" -} - -# min_used_ratio: -# rebuild blobs whose used_ratio < min_used_ratio -# used_ratio = (compress_size of all chunks which are referenced by bootstrap) / blob_compress_size -# available value: 0-99, 0 means disable -# compact_blob_size: -# we only compact blob whose compress_size < compact_blob_size -# max_compact_size: -# final merged blob compress_size <= max_compact_size -# layers_to_compact: -# if number of blobs >= layers_to_compact, try compact nydus image -# 0 means always try compact -cat /path/to/compact.json -{ - "min_used_ratio": 10, - "compact_blob_size": 10485760, - "max_compact_size": 104857600, - "layers_to_compact": 32, - "blobs_dir": "/path/to/blobs" -} - -# Compact Nydus image with chunk-dict -nydus-image create \ - --bootstrap /path/to/bootstrap \ - --chunk-dict bootstrap=/path/to/dict \ - --config /path/to/compact.json \ - --backend-config-file /path/to/backend-config.json \ - --backend-type oss \ - /path/to/lower/dir -``` - -## Export RAFS Filesystem into Other Formats - -### Export RAFS Filesystem as Raw Block Device Image - -A RAFS filesystem can be exported as a raw block device image, so it can be exposed as block device through loop device, NBD and virtio-blk etc. -If the `--verity` option is given, it will also generate verification data to enable `dm-verity`. - -```shell - nydus-image export --block --verity \ - -D /path/to/output/dir \ - -B /path/to/bootstrap - -[root@image-service]# nydus-image create -t tar-tarfs -D images/ src.tar -[2023-03-29 17:20:02.500167 +08:00] INFO successfully built RAFS filesystem: -meta blob path: images/90f0e6e7e0ff822d4acddf30c36ac77fe06f549fe58f89a818fa824b19f70d47 -data blob size: 0x3c000 -data blobs: ["0e2dbe8b6e0f55f42c75034ed9dfc582ad0a94098cfc248c968522e7ef02e00a"] -[root@image-service]# cp src.tar images/0e2dbe8b6e0f55f42c75034ed9dfc582ad0a94098cfc248c968522e7ef02e00a -[root@image-service]# nydus-image export --block --verity -D images/ -B images/90f0e6e7e0ff822d4acddf30c36ac77fe06f549fe58f89a818fa824b19f70d47 -[2023-03-29 17:20:47.676914 +08:00] INFO RAFS features: COMPRESSION_NONE | HASH_SHA256 | EXPLICIT_UID_GID | TARTFS_MODE -dm-verity options: --no-superblock --format=1 -s "" --hash=sha256 --data-block-size=512 --hash-block-size=4096 --data-blocks 4576 --hash-offset 2342912 6b5743e7da406a33ab3a8bb03b65e67d1c1951b2d7ebc5026e0de3fb44a7cc20 -[root@image-service]# ls -l images/ --rw-r--r-- 1 root root 245760 3月 29 17:20 0e2dbe8b6e0f55f42c75034ed9dfc582ad0a94098cfc248c968522e7ef02e00a --rw-r--r-- 1 root root 20480 3月 29 17:20 90f0e6e7e0ff822d4acddf30c36ac77fe06f549fe58f89a818fa824b19f70d47 --rw-r--r-- 1 root root 2494464 3月 29 17:20 90f0e6e7e0ff822d4acddf30c36ac77fe06f549fe58f89a818fa824b19f70d47.disk -[root@image-service]# losetup /dev/loop1 images/90f0e6e7e0ff822d4acddf30c36ac77fe06f549fe58f89a818fa824b19f70d47.disk -[root@image-service]# veritysetup open --no-superblock --format=1 -s "" --hash=sha256 --data-block-size=512 --hash-block-size=4096 --data-blocks 4576 --hash-offset 2342912 /dev/loop1 verity /dev/loop1 6b5743e7da406a33ab3a8bb03b65e67d1c1951b2d7ebc5026e0de3fb44a7cc20 -[root@image-service]# lsblk -NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT -loop1 7:1 0 2.4M 0 loop -└─verity 252:0 0 2.2M 1 crypt /root/nydus/mnt -[root@image-service]# mount -t erofs -r /dev/dm-0 mnt/ -``` - -**Note**: the argument value of image layer id specified in nydus-image CLI should omit `sha256:` prefix. - -## Check RAFS Filesystem - -### Check and Validate RAFS filesystem metadata - -`nydus-image check` command support output rafs filesystem metadata info into json. - -```shell -[root@image-service]# nydus-image create -t dir-rafs -D images/ src -[2024-03-13 20:18:16.611453 +08:00] INFO successfully built RAFS filesystem: -meta blob path: images/05533d7dfe183435d34e862367c32352401f8305bb0ab90bf9e9bfddd5a52157 -data blob size: 0x1025 -data blobs: ["d8c052b11ef830a4655d7c9af3e396c5ce4fb8d4b4708701217845ec9fb2fbb3"] - -[root@image-service]# nydus-image check --bootstrap images/05533d7dfe183435d34e862367c32352401f8305bb0ab90bf9e9bfddd5a52157 -J ~/output.json -[2024-03-13 20:19:08.235328 +08:00] INFO RAFS features: HASH_BLAKE3 | EXPLICIT_UID_GID | COMPRESSION_ZSTD -RAFS filesystem metadata is valid, referenced data blobs: - 0: d8c052b11ef830a4655d7c9af3e396c5ce4fb8d4b4708701217845ec9fb2fbb3, compressed data size 0x15, compressed file size 0x1025, uncompressed file size 0x1000, chunks: 0x1, features: aligned cap_toc - -[root@image-service]# cat ~/output.json -{ - "version": "unknown-baf7148a2721320d0b09f5fca0442b7baf99cbba", - "bootstrap": "./images/05533d7dfe183435d34e862367c32352401f8305bb0ab90bf9e9bfddd5a52157", - "blobs": [ - "d8c052b11ef830a4655d7c9af3e396c5ce4fb8d4b4708701217845ec9fb2fbb3" - ], - "trace": { - "consumed_time": { - "load_tree_from_bootstrap": 0.0004384600033517927 - }, - "registered_events": { - - } - }, - "fs_version": "6", - "compressor": "Zstd" -} +# Toolset for Working with RAFS Filesystems and Nydus Container Images + +The `nydus-image` toolset provides tools to build, check, inspect and export RAFS filesystems and Nydus container images. + +Logically, a RAFS filesystem consists of two parts: +- a bootstrap/meta blob, containing filesystem metadata, such directory name, file attributes etc. +- one or more data blobs, containing file contents. + +Physically, RAFS bootstrap/meta blobs can be stored as separate files, or inlined into data blob files. +Therefore, a RAFS file system may consist of the following parts: +- a blob file containing both RAFS metadata and data +- a blob file containing RAFS metadata, and one or more blob files for RAFS data +- a blob file containing RAFS metadata, and one or more blob files for RAFS data, and associated targz files for RAFS ZRAN mode. + +## Installation + +Get latest `nydus-image` binary from [release](https://github.com/dragonflyoss/nydus/releases/latest) page. + +## Nydus Image Builder + +The `nydus-image create` subcommand creates a RAFS filesystem or a layer of Nydus image from a tar file or from a directory. + +RAFS filesystem/Nydus image has three modes: Native, Zran and Tarfs. Each mode has different features and may be used for different scenarios. + +| Mode | Blobs in Registry | Local Cache File | Runtime Integrity | Lazy-loading | Chunk Dedup | Encryption | OCIv1 Compatible | +|:------:|:-----------------------:|:-----------------------------:|:-----------------:|:------------:|:-----------:|:----------:|:----------------:| +| Tarfs | tar.gz / tar.zst | nydus.meta & tar | Optional | No | No | No | Yes | +| Zran | tar.gz & nydus.meta | nydus.meta & nydus.data.cache | Yes | Yes | Yes | No | Yes | +| Native | nydus.data & nydus.meta | nydus.meta & nydus.data.cache | Yes | Yes | Yes | Yes | No | + +### Specify Data Blob Output Path + +There are two ways to specify where to save the resulting data blob: + +- Specify the file path via `--blob `. It could be a regular file into which the data blob contents are dumped. It can also be a fifo (named pipe) from which "nydusify" or other tools can receive the generated blob content. + +- Specify a directory with `-D/--blob-dir BLOB_DIR`. `nydus-image` will use the sha256 digest of the resulting data blob as the filename, concatenated to the directory path. This is useful when you don't want to set a custom name or you are building a layered nydus image. Please create `BLOB_DIR` before executing the command. + +### Build RAFS Filesystem in Native Mode from a Directory +```shell +nydus-image create -t dir-rafs \ + -D /path/to/output/directory \ + /path/to/source/dir + +[root@image-service]# nydus-image create -t dir-rafs -D images/ src +[2023-03-29 16:34:28.092347 +08:00] INFO successfully built RAFS filesystem: +meta blob path: images/f62a7e668c7f306655233367f8b6e4073d7fa94a6f57826069db3e745e2fd327 +data blob size: 0xe32b +data blobs: ["e9d3d45f6ad9f647cc1a2e2f699a46f553ce87b1136026d53d474c6142f80763"] +[root@image-service]# ls -l images/ +-rw-r--r-- 1 root root 58155 3月 29 16:34 e9d3d45f6ad9f647cc1a2e2f699a46f553ce87b1136026d53d474c6142f80763 +-rw-r--r-- 1 root root 20480 3月 29 16:34 f62a7e668c7f306655233367f8b6e4073d7fa94a6f57826069db3e745e2fd327 +``` + +### Build RAFS Filesystem in Native Mode with Inlined Metadata from a Directory +```shell +nydus-image create -t dir-rafs \ + --blob-inline-meta \ + -D /path/to/output/directory \ + /path/to/source/dir + +[root@image-service]# nydus-image create -t dir-rafs --blob-inline-meta -D images/ src +[2023-03-29 16:36:14.629372 +08:00] INFO successfully built RAFS filesystem: +meta blob path: +data blob size: 0x1392b +data blobs: ["903c62564da0cb18997a4d4c40f25d73c0ab9baef2177f9030d5e0c06ac26fa4"] +[root@image-service]# ls -l images/ +-rw-r--r-- 1 root root 80171 3月 29 16:36 903c62564da0cb18997a4d4c40f25d73c0ab9baef2177f9030d5e0c06ac26fa4 +``` + +### Build RAFS Filesystem in Native Mode from a tar.gz File +```shell +nydus-image create -t targz-rafs \ + -D /path/to/output/directory \ + /path/to/source/targz.file + +[root@image-service]# nydus-image create -t targz-rafs -D images/ src.tar.gz +[2023-03-29 16:40:20.484997 +08:00] INFO successfully built RAFS filesystem: +meta blob path: images/94bf66fc81425bfb72939c942ee1ead90e2e2ac9f09f08f369db15afde163b3b +data blob size: 0xe328 +data blobs: ["d3bb8a2cdb6778cbdc31d97be88ef00217d29e4c119f41ef0a4d9f202088d813"] +[root@image-service]# ls -l images/ +-rw-r--r-- 1 root root 20480 3月 29 16:40 94bf66fc81425bfb72939c942ee1ead90e2e2ac9f09f08f369db15afde163b3b +-rw-r--r-- 1 root root 58152 3月 29 16:40 d3bb8a2cdb6778cbdc31d97be88ef00217d29e4c119f41ef0a4d9f202088d813 +``` + +### Build RAFS Filesystem in Zran Mode from a tar.gz File +```shell +nydus-image create -t targz-ref \ + -D /path/to/output/directory \ + /path/to/source/targz.file + +[root@image-service]# sha256sum src.tar.gz +13111d487b1958281514769eedea840d14e5f27f0d7c2c97b8a286d62645766b src.tar.gz +[root@image-service]# cp src.tar.gz images/13111d487b1958281514769eedea840d14e5f27f0d7c2c97b8a286d62645766b +[root@image-service]# file images/13111d487b1958281514769eedea840d14e5f27f0d7c2c97b8a286d62645766b +images/13111d487b1958281514769eedea840d14e5f27f0d7c2c97b8a286d62645766b: gzip compressed data, last modified: Wed Mar 29 08:39:20 2023, from Unix, original size 245760 +[root@image-service]# nydus-image create -t targz-ref -D images/ images/13111d487b1958281514769eedea840d14e5f27f0d7c2c97b8a286d62645766b +[2023-03-29 16:48:51.656612 +08:00] INFO successfully built RAFS filesystem: +meta blob path: images/606e8f8fbce6496b676f09f6b5231d15c301424af5b54a0433b2e9071bbe857d +data blob size: 0xb008 +data blobs: ["13111d487b1958281514769eedea840d14e5f27f0d7c2c97b8a286d62645766b"] +[root@image-service]# ls -l images/ +-rw-r--r-- 1 root root 45064 3月 29 16:48 13111d487b1958281514769eedea840d14e5f27f0d7c2c97b8a286d62645766b +-rw-r--r-- 1 root root 4343 3月 29 16:48 2ae4b87374bbb7be0f10300c20617bc7f40d96a8a12a43445f88d95dd326c7dd +-rw-r--r-- 1 root root 20480 3月 29 16:48 606e8f8fbce6496b676f09f6b5231d15c301424af5b54a0433b2e9071bbe857d +``` + +### Build RAFS Filesystem in Tarfs Mode from a tar File +```shell +nydus-image create -t tar-tarfs \ + -D /path/to/output/directory \ + /path/to/source/tar.file + +[root@image-service]# sha256sum src.tar +0e2dbe8b6e0f55f42c75034ed9dfc582ad0a94098cfc248c968522e7ef02e00a src.tar +[root@image-service]# cp src.tar images/0e2dbe8b6e0f55f42c75034ed9dfc582ad0a94098cfc248c968522e7ef02e00a +[root@image-service]# nydus-image create -t tar-tarfs -D images/ images/0e2dbe8b6e0f55f42c75034ed9dfc582ad0a94098cfc248c968522e7ef02e00a +[2023-03-29 16:52:44.251252 +08:00] INFO successfully built RAFS filesystem: +meta blob path: images/90f0e6e7e0ff822d4acddf30c36ac77fe06f549fe58f89a818fa824b19f70d47 +data blob size: 0x3c000 +data blobs: ["0e2dbe8b6e0f55f42c75034ed9dfc582ad0a94098cfc248c968522e7ef02e00a"] +[root@image-service]# ls -l images/ +-rw-r--r-- 1 root root 245760 3月 29 16:52 0e2dbe8b6e0f55f42c75034ed9dfc582ad0a94098cfc248c968522e7ef02e00a +-rw-r--r-- 1 root root 20480 3月 29 16:52 90f0e6e7e0ff822d4acddf30c36ac77fe06f549fe58f89a818fa824b19f70d47 +``` + +### Layered Build Nydus Image + +`nydus-image` tool supports to build Nydus image from multiple layers of image: + + ```shell + # Build from lower layer + nydus-image create \ + -D /path/to/output/dir \ + /path/to/lower/dir + # Build from upper layer based on lower layer + nydus-image create \ + --parent-bootstrap /path/to/parent-bootstrap \ + -D /path/to/output/dir \ + /path/to/upper/dir +``` + +### Build Nydus Image With Chunk-Dict +`nydus-image` tool supports to build Nydus image with chunk-dict for chunk deduplication: +1. reference chunks which are same as chunks in chunk-dict to blobs in chunk-dict +2. new dumped blob would be smaller than without using chunk-dict +3. save space of remote storage because of chunk-deduplication between images (e.g. oss, registry) +```shell +# Build with bootstrap type chunk-dict +nydus-image create \ + --chunk-dict bootstrap=/path/to/dict.boot \ + -D /path/to/output/dir \ + /path/to/lower/dir +``` + +## Merge Multiple RAFS Filesystems into One + +`nydus-image` tool supports to build Nydus image from multiple layers of image: +The `nydus-image merge` subcommand supports merging multiple RAFS filesystems into one. +It applies the overlay rules defined the OCI Image Spec or the overlayfs, to avoid using `overlayfs` at runtime. + +```shell +nydus-image merge \ + -D /path/to/output/dir \ + /path/to/bootstrap1 /path/to/bootstrap2 + +[root@image-service]# nydus-image create --blob-inline-meta -D images/ src +[2023-03-29 17:02:06.231478 +08:00] INFO successfully built RAFS filesystem: +meta blob path: +data blob size: 0x1392b +data blobs: ["903c62564da0cb18997a4d4c40f25d73c0ab9baef2177f9030d5e0c06ac26fa4"] +[root@image-service]# nydus-image create --blob-inline-meta -D images/ blobfs/ +[2023-03-29 17:02:08.980743 +08:00] INFO successfully built RAFS filesystem: +meta blob path: +data blob size: 0x86ba +data blobs: ["9e50ae5ac02b2ef6ffb86075720e49d95d8240eed4717dd8ac9c68cadba00762"] +[root@image-service]# nydus-image merge -D images/ images/903c62564da0cb18997a4d4c40f25d73c0ab9baef2177f9030d5e0c06ac26fa4 images/9e50ae5ac02b2ef6ffb86075720e49d95d8240eed4717dd8ac9c68cadba00762 +[root@image-service]# ls -l images/ +-rw-r--r-- 1 root root 80171 3月 29 17:02 903c62564da0cb18997a4d4c40f25d73c0ab9baef2177f9030d5e0c06ac26fa4 +-rw-r--r-- 1 root root 34490 3月 29 17:02 9e50ae5ac02b2ef6ffb86075720e49d95d8240eed4717dd8ac9c68cadba00762 +-rw-r--r-- 1 root root 20480 3月 29 17:02 df01f389850b79cd5a6ca6db98495bb457aa0821b0558351c55537551322fb96 +``` + +## Compact Nydus Image +`nydus-image` tool supports to compact Nydus image for +1. reduce number of blobs +2. optimize size of blobs (remove unused chunks in blob, merge small blobs) +```shell +# backend config for getting chunk data from remote storage +# e.g. OSS backend config +cat /path/to/backend-config.json +{ + "endpoint": "region.aliyuncs.com", + "scheme": "https", + "access_key_id": "", + "access_key_secret": "", + "bucket_name": "", + "object_prefix": "nydus/" +} + +# min_used_ratio: +# rebuild blobs whose used_ratio < min_used_ratio +# used_ratio = (compress_size of all chunks which are referenced by bootstrap) / blob_compress_size +# available value: 0-99, 0 means disable +# compact_blob_size: +# we only compact blob whose compress_size < compact_blob_size +# max_compact_size: +# final merged blob compress_size <= max_compact_size +# layers_to_compact: +# if number of blobs >= layers_to_compact, try compact nydus image +# 0 means always try compact +cat /path/to/compact.json +{ + "min_used_ratio": 10, + "compact_blob_size": 10485760, + "max_compact_size": 104857600, + "layers_to_compact": 32, + "blobs_dir": "/path/to/blobs" +} + +# Compact Nydus image with chunk-dict +nydus-image create \ + --bootstrap /path/to/bootstrap \ + --chunk-dict bootstrap=/path/to/dict \ + --config /path/to/compact.json \ + --backend-config-file /path/to/backend-config.json \ + --backend-type oss \ + /path/to/lower/dir +``` + +## Export RAFS Filesystem into Other Formats + +### Export RAFS Filesystem as Raw Block Device Image + +A RAFS filesystem can be exported as a raw block device image, so it can be exposed as block device through loop device, NBD and virtio-blk etc. +If the `--verity` option is given, it will also generate verification data to enable `dm-verity`. + +```shell + nydus-image export --block --verity \ + -D /path/to/output/dir \ + -B /path/to/bootstrap + +[root@image-service]# nydus-image create -t tar-tarfs -D images/ src.tar +[2023-03-29 17:20:02.500167 +08:00] INFO successfully built RAFS filesystem: +meta blob path: images/90f0e6e7e0ff822d4acddf30c36ac77fe06f549fe58f89a818fa824b19f70d47 +data blob size: 0x3c000 +data blobs: ["0e2dbe8b6e0f55f42c75034ed9dfc582ad0a94098cfc248c968522e7ef02e00a"] +[root@image-service]# cp src.tar images/0e2dbe8b6e0f55f42c75034ed9dfc582ad0a94098cfc248c968522e7ef02e00a +[root@image-service]# nydus-image export --block --verity -D images/ -B images/90f0e6e7e0ff822d4acddf30c36ac77fe06f549fe58f89a818fa824b19f70d47 +[2023-03-29 17:20:47.676914 +08:00] INFO RAFS features: COMPRESSION_NONE | HASH_SHA256 | EXPLICIT_UID_GID | TARTFS_MODE +dm-verity options: --no-superblock --format=1 -s "" --hash=sha256 --data-block-size=512 --hash-block-size=4096 --data-blocks 4576 --hash-offset 2342912 6b5743e7da406a33ab3a8bb03b65e67d1c1951b2d7ebc5026e0de3fb44a7cc20 +[root@image-service]# ls -l images/ +-rw-r--r-- 1 root root 245760 3月 29 17:20 0e2dbe8b6e0f55f42c75034ed9dfc582ad0a94098cfc248c968522e7ef02e00a +-rw-r--r-- 1 root root 20480 3月 29 17:20 90f0e6e7e0ff822d4acddf30c36ac77fe06f549fe58f89a818fa824b19f70d47 +-rw-r--r-- 1 root root 2494464 3月 29 17:20 90f0e6e7e0ff822d4acddf30c36ac77fe06f549fe58f89a818fa824b19f70d47.disk +[root@image-service]# losetup /dev/loop1 images/90f0e6e7e0ff822d4acddf30c36ac77fe06f549fe58f89a818fa824b19f70d47.disk +[root@image-service]# veritysetup open --no-superblock --format=1 -s "" --hash=sha256 --data-block-size=512 --hash-block-size=4096 --data-blocks 4576 --hash-offset 2342912 /dev/loop1 verity /dev/loop1 6b5743e7da406a33ab3a8bb03b65e67d1c1951b2d7ebc5026e0de3fb44a7cc20 +[root@image-service]# lsblk +NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT +loop1 7:1 0 2.4M 0 loop +└─verity 252:0 0 2.2M 1 crypt /root/nydus/mnt +[root@image-service]# mount -t erofs -r /dev/dm-0 mnt/ +``` + +**Note**: the argument value of image layer id specified in nydus-image CLI should omit `sha256:` prefix. + +## Check RAFS Filesystem + +### Check and Validate RAFS filesystem metadata + +`nydus-image check` command support output rafs filesystem metadata info into json. + +```shell +[root@image-service]# nydus-image create -t dir-rafs -D images/ src +[2024-03-13 20:18:16.611453 +08:00] INFO successfully built RAFS filesystem: +meta blob path: images/05533d7dfe183435d34e862367c32352401f8305bb0ab90bf9e9bfddd5a52157 +data blob size: 0x1025 +data blobs: ["d8c052b11ef830a4655d7c9af3e396c5ce4fb8d4b4708701217845ec9fb2fbb3"] + +[root@image-service]# nydus-image check --bootstrap images/05533d7dfe183435d34e862367c32352401f8305bb0ab90bf9e9bfddd5a52157 -J ~/output.json +[2024-03-13 20:19:08.235328 +08:00] INFO RAFS features: HASH_BLAKE3 | EXPLICIT_UID_GID | COMPRESSION_ZSTD +RAFS filesystem metadata is valid, referenced data blobs: + 0: d8c052b11ef830a4655d7c9af3e396c5ce4fb8d4b4708701217845ec9fb2fbb3, compressed data size 0x15, compressed file size 0x1025, uncompressed file size 0x1000, chunks: 0x1, features: aligned cap_toc + +[root@image-service]# cat ~/output.json +{ + "version": "unknown-baf7148a2721320d0b09f5fca0442b7baf99cbba", + "bootstrap": "./images/05533d7dfe183435d34e862367c32352401f8305bb0ab90bf9e9bfddd5a52157", + "blobs": [ + "d8c052b11ef830a4655d7c9af3e396c5ce4fb8d4b4708701217845ec9fb2fbb3" + ], + "trace": { + "consumed_time": { + "load_tree_from_bootstrap": 0.0004384600033517927 + }, + "registered_events": { + + } + }, + "fs_version": "6", + "compressor": "Zstd" +} ``` \ No newline at end of file diff --git a/docs/nydus-overlayfs.md b/docs/nydus-overlayfs.md index 7229a2bbb58..c8059ed5d4a 100644 --- a/docs/nydus-overlayfs.md +++ b/docs/nydus-overlayfs.md @@ -1,42 +1,42 @@ -# NydusOverlayfs - -`nydus-overlayfs` is a FUSE(Filesystem in UserSpacE) mount helper command for containerd to use with Nydus. The document explains in a nutshell on how it works. - -When the `--enable-nydus-overlayfs` option is specified, `nydus-snapshotter`'s `Mount()` method returns a mount slice like - -```json -[ - { - Type: "fuse.nydus-overlayfs", - Source: "overlay", - Options: [lowerdir=lower_A:lower_B,upperdir=upper_A,workdir=work_A,extraoption=base64({source:xxx,config:xxx,snapshotdir:xxx})], - } -] -``` - -Compared with a mount slice returned by the `overlayfs` snapshotter, there is an extra `extraoption` option encoded in base64 format. The `nydus-overlayfs` mount helper is used to help containerd to ignore the extra mount option. - -There are three calling stacks when handling a `nydus-snapshotter` mount slice. -1. `containerd` -> `mount.fuse` -> `nydus-overlay` -2. `containerd` -> `containerd-shim-runc-v2` -> `mount.fuse` -> `nydus-overlay` -3. `containerd` -> `containerd-shim-kata-v2` -> `nydusd` - -Per [containerd](https://github.com/containerd/containerd/blob/v1.5.7/mount/mount_linux.go#L384), `containerd` and `containerd-shim-runc-v2` call `mount.fuse` or `mount.fuse3` when `Type` has prefix `fuse` or `fuse3`, with a command format like -```shell -mount.fuse overlay ./foo/merged -o lowerdir=./foo/lower2:./foo/lower1,upperdir=./foo/upper,workdir=./foo/work,extraoption=base64({source:xxx,config:xxx,snapshotdir:xxx}) -t nydus-overlayfs -``` - -When `mount.fuse` starts, it calls the below command to do the real mount -```shell -nydus-overlayfs overlay ./foo/merged -o lowerdir=./foo/lower2:./foo/lower1,upperdir=./foo/upper,workdir=./foo/work,extraoption=base64({source:xxx,config:xxx,snapshotdir:xxx}),dev,suid -``` - -And `nydus-overlayfs` parses the mount options, filters out `extraoption`, and calls the `mount` syscall in a format equivalent to -```shell -mount -t overlay overlay ./foo/merged -o lowerdir=./foo/lower2:./foo/lower1,upperdir=./foo/upper,workdir=./foo/work,dev,suid -``` - -Meanwhile, when containerd passes the `nydus-snapshotter` mount slice to `containerd-shim-kata-v2`, it can parse the mount slice and pass the `extraoption` to `nydusd`, to support nydus image format natively. - -So in summary, `containerd` and `containerd-shim-runc-v2` rely on the `nydus-overlay` mount helper to handle the mount slice returned by `nydus-snapshotter`, while `containerd-shim-kata-v2` can parse and handle it on its own. - +# NydusOverlayfs + +`nydus-overlayfs` is a FUSE(Filesystem in UserSpacE) mount helper command for containerd to use with Nydus. The document explains in a nutshell on how it works. + +When the `--enable-nydus-overlayfs` option is specified, `nydus-snapshotter`'s `Mount()` method returns a mount slice like + +```json +[ + { + Type: "fuse.nydus-overlayfs", + Source: "overlay", + Options: [lowerdir=lower_A:lower_B,upperdir=upper_A,workdir=work_A,extraoption=base64({source:xxx,config:xxx,snapshotdir:xxx})], + } +] +``` + +Compared with a mount slice returned by the `overlayfs` snapshotter, there is an extra `extraoption` option encoded in base64 format. The `nydus-overlayfs` mount helper is used to help containerd to ignore the extra mount option. + +There are three calling stacks when handling a `nydus-snapshotter` mount slice. +1. `containerd` -> `mount.fuse` -> `nydus-overlay` +2. `containerd` -> `containerd-shim-runc-v2` -> `mount.fuse` -> `nydus-overlay` +3. `containerd` -> `containerd-shim-kata-v2` -> `nydusd` + +Per [containerd](https://github.com/containerd/containerd/blob/v1.5.7/mount/mount_linux.go#L384), `containerd` and `containerd-shim-runc-v2` call `mount.fuse` or `mount.fuse3` when `Type` has prefix `fuse` or `fuse3`, with a command format like +```shell +mount.fuse overlay ./foo/merged -o lowerdir=./foo/lower2:./foo/lower1,upperdir=./foo/upper,workdir=./foo/work,extraoption=base64({source:xxx,config:xxx,snapshotdir:xxx}) -t nydus-overlayfs +``` + +When `mount.fuse` starts, it calls the below command to do the real mount +```shell +nydus-overlayfs overlay ./foo/merged -o lowerdir=./foo/lower2:./foo/lower1,upperdir=./foo/upper,workdir=./foo/work,extraoption=base64({source:xxx,config:xxx,snapshotdir:xxx}),dev,suid +``` + +And `nydus-overlayfs` parses the mount options, filters out `extraoption`, and calls the `mount` syscall in a format equivalent to +```shell +mount -t overlay overlay ./foo/merged -o lowerdir=./foo/lower2:./foo/lower1,upperdir=./foo/upper,workdir=./foo/work,dev,suid +``` + +Meanwhile, when containerd passes the `nydus-snapshotter` mount slice to `containerd-shim-kata-v2`, it can parse the mount slice and pass the `extraoption` to `nydusd`, to support nydus image format natively. + +So in summary, `containerd` and `containerd-shim-runc-v2` rely on the `nydus-overlay` mount helper to handle the mount slice returned by `nydus-snapshotter`, while `containerd-shim-kata-v2` can parse and handle it on its own. + diff --git a/docs/nydus-zran.md b/docs/nydus-zran.md index e02d9b152f0..5205262bfe7 100644 --- a/docs/nydus-zran.md +++ b/docs/nydus-zran.md @@ -1,63 +1,63 @@ -# Nydus zran artifact user guide - -This guide explains how to create a tiny **nydus zran artifact** from an existing OCI image, which can be used to accelerate the image directly with lazy pulling. It provides several advantages because of reusing the OCI image blobs: - -- It eliminates the need to store two separate full images, which can save massive space in your image registry. -- Generating such artifact is faster than converting the full accelerated image. - -A simple test result is shown below: - -- Image: node:19.0 -- Workload: node -v -- Registry Network: 3MB/s - -| image type | image size | nydusify conversion | nydusify push | nerdctl run | read data | -| ---------------------- | ---------- | ------------------- | ------------- | ----------- | --------- | -| OCI v1 | 353.05MB | - | - | 126s | 353.05MB | -| Nydus (Native RAFS v6) | 337.94MB | 29s | 1m58s | 11s | 21.18MB | -| Nydus (Zran) | 14MB | 11s | 12s | 15s | 28.78MB | - -## Generate nydus zran artifact - -1. Get nydus components `nydusd`, `nydus-image`, `nydusify` from [release](https://github.com/dragonflyoss/nydus/releases) page (requires >= v2.2). - -``` -sudo install -D -m 755 nydusd nydus-image nydusify /usr/bin -``` - -2. Get nydus zran artifact: - -There are some pre-generated nydus zran artifacts under the same OCI image repo available for testing: - -- `docker.io/hsiangkao/wordpress:6.0` -> `docker.io/hsiangkao/wordpress:6.0-nydus-oci-ref` -- `docker.io/hsiangkao/node:18` -> `docker.io/hsiangkao/node:18-nydus-oci-ref` -- `docker.io/hsiangkao/gcc:12.2.0` -> `docker.io/hsiangkao/gcc:12.2.0-nydus-oci-ref` - -Or you can generate one by `nydusify` tool: - -``` bash -# Convert the existing OCI image `your-registry.com/node:19.0` to `your-registry.com/node:19.0-nydus-oci-ref`: -sudo nydusify convert --oci-ref --source your-registry.com/node:19.0 --target your-registry.com/node:19.0-nydus-oci-ref -``` - -**Tips**: -- Nydus ZRAN artifacts must be in the same namespace with the OCI image. - -## Run nydus zran artifact: - -Follow the [documentation](https://github.com/dragonflyoss/nydus/blob/master/docs/containerd-env-setup.md) to configure `containerd` and `nydus-snapshotter` (containerd-nydus-grpc): - -``` bash -# Run nydus zran artifact -sudo nerdctl --snapshotter nydus run --rm -it docker.io/hsiangkao/node:18-nydus-oci-ref node -v -``` - -## Compression method - -Currently ZRAN only supports OCI images that use gzip as their compression method. - -## Recording - -Pull ZRAN-indexed OCI / OCI wordpress images - -[![asciicast](https://asciinema.org/a/7IOWhUk8Rna0Ju1avcamu7T5f.svg)](https://asciinema.org/a/7IOWhUk8Rna0Ju1avcamu7T5f?speed=2) +# Nydus zran artifact user guide + +This guide explains how to create a tiny **nydus zran artifact** from an existing OCI image, which can be used to accelerate the image directly with lazy pulling. It provides several advantages because of reusing the OCI image blobs: + +- It eliminates the need to store two separate full images, which can save massive space in your image registry. +- Generating such artifact is faster than converting the full accelerated image. + +A simple test result is shown below: + +- Image: node:19.0 +- Workload: node -v +- Registry Network: 3MB/s + +| image type | image size | nydusify conversion | nydusify push | nerdctl run | read data | +| ---------------------- | ---------- | ------------------- | ------------- | ----------- | --------- | +| OCI v1 | 353.05MB | - | - | 126s | 353.05MB | +| Nydus (Native RAFS v6) | 337.94MB | 29s | 1m58s | 11s | 21.18MB | +| Nydus (Zran) | 14MB | 11s | 12s | 15s | 28.78MB | + +## Generate nydus zran artifact + +1. Get nydus components `nydusd`, `nydus-image`, `nydusify` from [release](https://github.com/dragonflyoss/nydus/releases) page (requires >= v2.2). + +``` +sudo install -D -m 755 nydusd nydus-image nydusify /usr/bin +``` + +2. Get nydus zran artifact: + +There are some pre-generated nydus zran artifacts under the same OCI image repo available for testing: + +- `docker.io/hsiangkao/wordpress:6.0` -> `docker.io/hsiangkao/wordpress:6.0-nydus-oci-ref` +- `docker.io/hsiangkao/node:18` -> `docker.io/hsiangkao/node:18-nydus-oci-ref` +- `docker.io/hsiangkao/gcc:12.2.0` -> `docker.io/hsiangkao/gcc:12.2.0-nydus-oci-ref` + +Or you can generate one by `nydusify` tool: + +``` bash +# Convert the existing OCI image `your-registry.com/node:19.0` to `your-registry.com/node:19.0-nydus-oci-ref`: +sudo nydusify convert --oci-ref --source your-registry.com/node:19.0 --target your-registry.com/node:19.0-nydus-oci-ref +``` + +**Tips**: +- Nydus ZRAN artifacts must be in the same namespace with the OCI image. + +## Run nydus zran artifact: + +Follow the [documentation](https://github.com/dragonflyoss/nydus/blob/master/docs/containerd-env-setup.md) to configure `containerd` and `nydus-snapshotter` (containerd-nydus-grpc): + +``` bash +# Run nydus zran artifact +sudo nerdctl --snapshotter nydus run --rm -it docker.io/hsiangkao/node:18-nydus-oci-ref node -v +``` + +## Compression method + +Currently ZRAN only supports OCI images that use gzip as their compression method. + +## Recording + +Pull ZRAN-indexed OCI / OCI wordpress images + +[![asciicast](https://asciinema.org/a/7IOWhUk8Rna0Ju1avcamu7T5f.svg)](https://asciinema.org/a/7IOWhUk8Rna0Ju1avcamu7T5f?speed=2) diff --git a/docs/nydus_with_macos.md b/docs/nydus_with_macos.md index 5a6de4a46fb..2a9666855f1 100644 --- a/docs/nydus_with_macos.md +++ b/docs/nydus_with_macos.md @@ -1,26 +1,26 @@ -# Nydus with macos - -## Prepare - -Please install macfuse(a.k.a osxfuse). The release can be found with https://osxfuse.github.io/. - -## Env -- macfuse@4.2.4 has been tested. -- macos 11(Big Sur)/12(Monterey) has been tested. - -## Support bin - -For now only `nydusd` works on macos, other bin is open for pr. - -## Support features -Only `fusedev` works on macos, by the way passthrough file system not work(passthrough fs has lot syscall is linux specific). - -## Build instruction -```shell -cargo build --release --bin=nydusd -``` -or -``` -make release -make install -``` +# Nydus with macos + +## Prepare + +Please install macfuse(a.k.a osxfuse). The release can be found with https://osxfuse.github.io/. + +## Env +- macfuse@4.2.4 has been tested. +- macos 11(Big Sur)/12(Monterey) has been tested. + +## Support bin + +For now only `nydusd` works on macos, other bin is open for pr. + +## Support features +Only `fusedev` works on macos, by the way passthrough file system not work(passthrough fs has lot syscall is linux specific). + +## Build instruction +```shell +cargo build --release --bin=nydusd +``` +or +``` +make release +make install +``` diff --git a/docs/nydusd.md b/docs/nydusd.md index bcd51f61231..3cf9e5cef7c 100644 --- a/docs/nydusd.md +++ b/docs/nydusd.md @@ -1,512 +1,512 @@ -# Nydusd - -`nydusd` running as daemon to expose a [FUSE](https://www.kernel.org/doc/html/latest/filesystems/fuse.html) mountpoint or a [Virtio-FS](https://virtio-fs.gitlab.io/) mountpoint inside guest for containers to access. - -### Get binary from release page - -Get `nydusd` binary from [release](https://github.com/dragonflyoss/nydus/releases/latest) page. - -## Run Nydusd Daemon - -```shell -# Prepare nydusd configuration -sudo tee /etc/nydus/nydusd-config.localfs.json > /dev/null << EOF -{ - "device": { - "backend": { - "type": "localfs", - "config": { - "dir": "/var/lib/nydus/blobs" - } - }, - "cache": { - "type": "blobcache", - "config": { - "work_dir": "/var/lib/nydus/cache" - } - } - }, - "mode": "direct", - "digest_validate": false, - "iostats_files": false, - "enable_xattr": true -} - -EOF -``` - -### Run With FUSE -If no `/path/to/bootstrap` is available, please refer to [nydus-image.md](https://github.com/dragonflyoss/nydus/blob/master/docs/nydus-image.md) for more details. - -``` shell -sudo mkdir -p /var/lib/nydus/blobs/ -sudo mkdir -p /var/lib/nydus/cache/ -sudo nydusd \ - --config /etc/nydus/nydusd-config.localfs.json \ - --mountpoint /path/to/mnt \ - --bootstrap /path/to/bootstrap \ - --log-level info -``` - -For registry backend, we can set authorization with environment variable `IMAGE_PULL_AUTH` to avoid loading `auth` from nydusd configuration file. - -### Run With Virtio-FS -If no `/path/to/bootstrap` is available, please refer to [nydus-image.md](https://github.com/dragonflyoss/nydus/blob/master/docs/nydus-image.md) for more details. - -Virtio-fs is supported by both [QEMU](https://www.qemu.org/) and [Cloud-hypervisor](https://github.com/cloud-hypervisor/cloud-hypervisor). To run `nydusd` with virtio-fs support, first start it with `--sock` option to expose a virtio-fs socket endpoint. - -``` shell -sudo nydusd \ - --config /etc/nydus/nydusd-config.localfs.json \ - --sock /path/to/vhost-user-fs.sock \ - --bootstrap /path/to/bootstrap \ - --log-level info -``` - -Then start a qemu process with a `vhost-user-fs-pci` device, run something like: - -``` shell -./qemu-system-x86_64 -M pc -cpu host --enable-kvm -smp 2 \ - -m 2G,maxmem=16G -object memory-backend-file,id=mem,size=2G,mem-path=/dev/shm,share=on -numa node,memdev=mem \ - -chardev socket,id=char0,path=/path/to/vhost-user-fs.sock \ - -device vhost-user-fs-pci,chardev=char0,tag=nydus,queue-size=1024,indirect_desc=false,event_idx=false \ - -serial mon:stdio -vga none -nographic -curses -kernel ./kernel \ - -append 'console=ttyS0 root=/dev/vda1 virtio_fs.dyndbg="+pfl" fuse.dyndbg="+pfl"' \ - -device virtio-net-pci,netdev=net0,mac=AE:AD:BE:EF:6C:FB -netdev type=user,id=net0 \ - -qmp unix:/path/to/qmp.sock,server,nowait \ - -drive if=virtio,file=./bionic-server-cloudimg-amd64.img -``` - -Then we can mount nydus virtio-fs inside the guest with: - -``` shell -mount -t virtio_fs none /mnt -o tag=nydus,default_permissions,allow_other,rootmode=040000,user_id=0,group_id=0,nodev -``` - -Or simply below if you are running newer guest kernel: - -``` shell -mount -t virtiofs nydus /mnt -``` - -We are working on enabling cloud-hypervisor support for nydus. - -### Nydus Configuration - -#### Common Fields In Config - -``` -{ - "device": { - "backend": { - // localfs | oss | registry - "type": "localfs", - "config": { - // Drop the read request once http request timeout, in seconds - "timeout": 5, - // Drop the read request once http connection timeout, in seconds - "connect_timeout": 5, - // Retry count when read request failed - "retry_limit": 0, - } - }, - "cache": { - // Blobcache: enable local fs cache - // Dummycache: disable cache, access remote storage backend directly - "type": "blobcache", - // Enable cache compression - "compressed": true, - "config": { - // Directory of cache files, only for blobcache - "work_dir": "/cache" - } - } - }, - // direct | cached - "mode": "direct", - // Validate inode tree digest and chunk digest on demand - "digest_validate": false, - // Enable file IO metric - "iostats_files": true, - // Enable support of fs extended attributes - "enable_xattr": false, - // Amplified user IO request batch size to read data from remote storage backend / local cache - // in unit of Bytes, valid values: 0-268435456, default: 1048576 - "amplify_io": 1048576, - "fs_prefetch": { - // Enable blob prefetch - "enable": false, - // Prefetch thread count - "threads_count": 10, - // Maximal read size per prefetch request, e.g. 128kb - "merging_size": 131072, - // Limit prefetch bandwidth to 1MB/S, it aims at reducing congestion with normal user io - "bandwidth_rate": 1048576 - } -} -``` - -#### Use Different Storage Backends - -Using different storage backend means that the nydus image metadata (bootstrap) layer is stored in the image registry, but the data layer will be stored on the external storage. Therefore, the option `--target` for `nydusify convert` is still required, the registry image reference is needed to store the metadata layer. - -##### Localfs Backend - -``` -{ - "device": { - "backend": { - "type": "localfs", - "config": { - // The directory included all blob files declared in bootstrap - "dir": "/path/to/blobs/", - // Record read access log, prefetch data on next time - "readahead": true, - // Duration of recording access log - "readahead_sec": 10 - } - }, - ... - }, - ... -} -``` - -##### Localdisk Backend (Experimental) -Using this backend enables Nydus to support reading blobs from block devices. This feature will be useful in Confidential Computing or Hybrid Image scenarios. - -The localdisk backend adds support for storing images in disks. In this scenario, each layer of the blob is stored in partitions, and multiple partitions are addressed in the local raw disk via the GUID partition table (GPT), which means that this disk stores the entire image. - -Currently, generating a localdisk image through nydusify is not supported for the time being. You need to use the nydus-localdisk tool to complete this step. -Document located at: https://github.com/adamqqqplay/nydus-localdisk/blob/master/README.md - -``` -{ - "device": { - "backend": { - "type": "localdisk", - "config": { - // Mounted block device path or original localdisk image file path. - "device_path": "/dev/loop1" - //"device_path": "/home/user/ubuntu.img" - } - }, - ... - }, - ... -} -``` - -##### OSS Backend - -``` -{ - "device": { - "backend": { - "type": "oss", - "config": { - ... - "endpoint": "region.aliyuncs.com", - "scheme": "https", - "access_key_id": "", - "access_key_secret": "", - "bucket_name": "", - "object_prefix": "nydus/" - } - }, - ... - }, - ... -} -``` - -##### S3 Backend - -``` -{ - "device": { - "backend": { - "type": "s3", - "config": { - ... - "endpoint": "s3.amazonaws.com", - "scheme": "https", - "access_key_id": "", - "access_key_secret": "", - "bucket_name": "", - "region": "", - "object_prefix": "nydus/" - } - }, - ... - }, - ... -} -``` - -##### Registry Backend - -``` -{ - "device": { - "backend": { - "type": "registry", - "config": { - ... - // Registry url scheme, leave empty to automatically detect, otherwise specify to https or http. - "scheme": "", - // Registry hostname with format `$host:$port` - "host": "my-registry:5000", - // Skip SSL certificate validation for HTTPS scheme - "skip_verify": false, - // Use format `$namespace/$repo` (no image tag) - "repo": "test/repo", - // Username and password for auth - // base64(username:password), optional - "auth": "", - // Bearer token for auth, optional - "registry_token": "" - // Redirected blob download host, optional - "blob_redirected_host": "" - } - }, - ... - }, - ... -} -``` -Note: The value of `device.backend.config.auth` will be overwrite if running the nydusd with environment variable `IMAGE_PULL_AUTH`. - -#### HTTP Proxy Backend - -The `HttpProxy` backend can access blobs through a http proxy server which can be local (using unix socket) or remote (using `https://` or using `http://`). - -`HttpProxy` uses two API endpoints to access the blobs: -- `HEAD /path/to/blobs` to get the blob size -- `GET /path/to/blobs` to read the blob - -The http proxy server should respect [the `Range` header](https://www.rfc-editor.org/rfc/rfc9110.html#name-range) to compute the offset and length of the blob. - -The example config files for the `HttpProxy` backend may be: - -``` -// for remote usage -{ - "device": { - "backend": { - "type": "http-proxy", - "config": { - "addr": "http://127.0.0.1:9977", - "path": "/namespace//blobs" - } - } - } -} -``` - -or - -``` -// for local usage -{ - "device": { - "backend": { - "type": "http-proxy", - "config": { - "addr": "/path/to/unix.sock", - } - } - } -} -``` - -The `HttpProxy` backend also supports the `Proxy` and `Mirrors` configurations for remote usage like the `Registry backend` described above. - -##### Enable Mirrors for Storage Backend (Recommend) - -Nydus is deeply integrated with [Dragonfly](https://d7y.io/) P2P mirror mode, please refer the [doc](https://d7y.io/docs/next/operations/integrations/container-runtime/nydus/) to learn how configuring Nydus to use Dragonfly. - -Add `device.backend.config.mirrors` field to enable mirrors for storage backend. The mirror can be a P2P distribution server or registry. If the request to mirror server failed, it will fall back to the original registry. -Currently, the mirror mode is only tested in the registry backend, and in theory, the OSS backend also supports it. - -!! The `mirrors` field conflicts with `proxy` field. - -``` -{ - "device": { - "backend": { - "type": "registry", - "config": { - "mirrors": [ - { - // Mirror server URL (including scheme), e.g. Dragonfly dfdaemon server URL - "host": "http://dragonfly1.io:65001", - // Headers for mirror server - "headers": { - // For Dragonfly dfdaemon server URL, we need to specify "X-Dragonfly-Registry" (including scheme). - // When Dragonfly does not cache data, nydusd will pull it from "X-Dragonfly-Registry". - // If not set "X-Dragonfly-Registry", Dragonfly will pull data from proxy.registryMirror.url. - "X-Dragonfly-Registry": "https://index.docker.io" - }, - // This URL endpoint is used to check the health of mirror server, and if the mirror is unhealthy, - // the request will fallback to the next mirror or the original registry server. - // Use $host/v2 as default if left empty. - "ping_url": "http://127.0.0.1:40901/server/ping", - // Interval time (s) to check and recover unavailable mirror. Use 5 as default if left empty. - "health_check_interval": 5, - // Failure counts before disabling this mirror. Use 5 as default if left empty. - "failure_limit": 5, - // Elapsed time to pause mirror health check when the request is inactive, in seconds. - // Use 300 as default if left empty. - "health_check_pause_elapsed": 300, - }, - { - "host": "http://dragonfly2.io:65001", - "headers": { - "X-Dragonfly-Registry": "https://index.docker.io" - }, - } - ], - ... - } - }, - ... - }, - ... -} -``` - - -##### Enable P2P Proxy for Storage Backend - -Add `device.backend.config.proxy` field to enable HTTP proxy for storage backend. For example, use P2P distribution service to reduce network workload and latency in large scale container cluster using [Dragonfly](https://d7y.io/) (enable centralized dfdaemon mode). - -``` -{ - "device": { - "backend": { - "type": "registry", - "config": { - "proxy": { - // Access remote storage backend via P2P proxy, e.g. Dragonfly dfdaemon server URL - "url": "http://p2p-proxy:65001", - // Fallback to remote storage backend if P2P proxy ping failed - "fallback": true, - // Endpoint of P2P proxy health checking - "ping_url": "http://p2p-proxy:40901/server/ping", - // Interval of P2P proxy health checking, in seconds - "check_interval": 5 - // Elapsed time to pause proxy health check when the request is inactive, in seconds. - // Use 300 as default if left empty. - "check_pause_elapsed": 300, - }, - ... - } - }, - ... - }, - ... -} -``` - -Once the configuration is loaded successfully on nydusd starting, we will see the log as shown below: - -``` -INFO [storage/src/backend/connection.rs:136] backend config: CommonConfig { proxy: ProxyConfig { url: "http://p2p-proxy:65001", ping_url: "http://p2p-proxy:40901/server/ping", fallback: true, check_interval: 5 }, timeout: 5, connect_timeout: 5, retry_limit: 0 } -``` - -### Mount writable Overlay FS - -`Nydusd` itself has a native userspace Overlay FS implementation, which can be enabled with several extra configurations. - -An example configuration `/etc/nydus/nydusd-config.overlay.json` is as follows: - -```json -{ - "version": 2, - "backend": { - "type": "localfs", - "localfs": "/var/lib/nydus/blobs" - }, - "cache": { - "type": "blobcache", - "filecache": { - "work_dir": "/var/lib/nydus/cache" - } - }, - "rafs": { - "mode": "direct", - "enable_xattr": true - }, - "overlay": { - "upper_dir": "/path/to/upperdir", - "work_dir": "/path/to/workdir" - } -} -``` - -An extra field `overlay` is added to the Nydusd configuration, which specifies the `upper_dir` and `work_dir` for the overlay filesystem. - -You can start `nydusd` with the above configuration and such command: - -```bash -sudo nydusd \ - --config /etc/nydus/nydusd-config.overlay.json \ - --mountpoint /path/to/mnt/ \ - --bootstrap /path/to/bootstrap \ - --log-level info \ - --writable -``` - -This will create a FUSE overlay mountpoint at `/path/to/mnt/`, with one `Nydus` image as readonly lower layer and the `/path/to/upperdir` as writable upper layer, so that it can take over whole rootfs of a container, any contents writen from container will be stored in `/path/to/upperdir`. - -Removing `--writable` flag will make the overlay filesystem readonly if you wish. - -### Mount Bootstrap Via API - -To mount a bootstrap via api, first launch nydusd without a bootstrap: - -``` shell -sudo nydusd \ - --apisock /path/to/api.sock \ - --config /path/to/config.json \ - --mountpoint /path/to/mountpoint -``` - -Then use curl to mount a bootstrap to `/path/to/mountpoint/sub`: - -``` shell -curl --unix-socket api.sock \ - -X POST "http://localhost/api/v1/mount?mountpoint=/sub" \ - -H "Content-Type: application/json" \ - -d '{ - "source":"/path/to/bootstrap", - "fs_type":"rafs", - "config":"{\"device\":{\"backend\":{\"type\":\"localfs\",\"config\":{\"dir\":\"blobs\"}},\"cache\":{\"type\":\"blobcache\",\"config\":{\"work_dir\":\"cache\"}}},\"mode\":\"direct\",\"digest_validate\":true}" - }' -``` - -The `config` field is a JSON format string that can be obtained by `cat rafs.config | jq tostring`. - -### Multiple Pseudo Mounts - -One single nydusd can have multiple pseudo mounts within a mountpoint. - -To achieve that, you can trigger backend fs (e.g., rafs) mount through the HTTP interfaces using curl command. - -When starting nydusd without the --bootstrap option, there will be no backend file system in a nydus mountpoint. You can use curl command to mount multiple backend fs at different sub-directories. - -#### Example - -Given that your mountpoint is `/mnt` which can be a directory in local host or inside guest. - -When you have two pseudo mounts which are named "pseudo_1" and "pseudo_2" identified in http request body. - -pseudo_1 and pseudo_2 correspond to bootstrap respectively. - -``` shell -tree -L 1 mnt -mnt -├── pseudo_1 -└── pseudo_2 -``` +# Nydusd + +`nydusd` running as daemon to expose a [FUSE](https://www.kernel.org/doc/html/latest/filesystems/fuse.html) mountpoint or a [Virtio-FS](https://virtio-fs.gitlab.io/) mountpoint inside guest for containers to access. + +### Get binary from release page + +Get `nydusd` binary from [release](https://github.com/dragonflyoss/nydus/releases/latest) page. + +## Run Nydusd Daemon + +```shell +# Prepare nydusd configuration +sudo tee /etc/nydus/nydusd-config.localfs.json > /dev/null << EOF +{ + "device": { + "backend": { + "type": "localfs", + "config": { + "dir": "/var/lib/nydus/blobs" + } + }, + "cache": { + "type": "blobcache", + "config": { + "work_dir": "/var/lib/nydus/cache" + } + } + }, + "mode": "direct", + "digest_validate": false, + "iostats_files": false, + "enable_xattr": true +} + +EOF +``` + +### Run With FUSE +If no `/path/to/bootstrap` is available, please refer to [nydus-image.md](https://github.com/dragonflyoss/nydus/blob/master/docs/nydus-image.md) for more details. + +``` shell +sudo mkdir -p /var/lib/nydus/blobs/ +sudo mkdir -p /var/lib/nydus/cache/ +sudo nydusd \ + --config /etc/nydus/nydusd-config.localfs.json \ + --mountpoint /path/to/mnt \ + --bootstrap /path/to/bootstrap \ + --log-level info +``` + +For registry backend, we can set authorization with environment variable `IMAGE_PULL_AUTH` to avoid loading `auth` from nydusd configuration file. + +### Run With Virtio-FS +If no `/path/to/bootstrap` is available, please refer to [nydus-image.md](https://github.com/dragonflyoss/nydus/blob/master/docs/nydus-image.md) for more details. + +Virtio-fs is supported by both [QEMU](https://www.qemu.org/) and [Cloud-hypervisor](https://github.com/cloud-hypervisor/cloud-hypervisor). To run `nydusd` with virtio-fs support, first start it with `--sock` option to expose a virtio-fs socket endpoint. + +``` shell +sudo nydusd \ + --config /etc/nydus/nydusd-config.localfs.json \ + --sock /path/to/vhost-user-fs.sock \ + --bootstrap /path/to/bootstrap \ + --log-level info +``` + +Then start a qemu process with a `vhost-user-fs-pci` device, run something like: + +``` shell +./qemu-system-x86_64 -M pc -cpu host --enable-kvm -smp 2 \ + -m 2G,maxmem=16G -object memory-backend-file,id=mem,size=2G,mem-path=/dev/shm,share=on -numa node,memdev=mem \ + -chardev socket,id=char0,path=/path/to/vhost-user-fs.sock \ + -device vhost-user-fs-pci,chardev=char0,tag=nydus,queue-size=1024,indirect_desc=false,event_idx=false \ + -serial mon:stdio -vga none -nographic -curses -kernel ./kernel \ + -append 'console=ttyS0 root=/dev/vda1 virtio_fs.dyndbg="+pfl" fuse.dyndbg="+pfl"' \ + -device virtio-net-pci,netdev=net0,mac=AE:AD:BE:EF:6C:FB -netdev type=user,id=net0 \ + -qmp unix:/path/to/qmp.sock,server,nowait \ + -drive if=virtio,file=./bionic-server-cloudimg-amd64.img +``` + +Then we can mount nydus virtio-fs inside the guest with: + +``` shell +mount -t virtio_fs none /mnt -o tag=nydus,default_permissions,allow_other,rootmode=040000,user_id=0,group_id=0,nodev +``` + +Or simply below if you are running newer guest kernel: + +``` shell +mount -t virtiofs nydus /mnt +``` + +We are working on enabling cloud-hypervisor support for nydus. + +### Nydus Configuration + +#### Common Fields In Config + +``` +{ + "device": { + "backend": { + // localfs | oss | registry + "type": "localfs", + "config": { + // Drop the read request once http request timeout, in seconds + "timeout": 5, + // Drop the read request once http connection timeout, in seconds + "connect_timeout": 5, + // Retry count when read request failed + "retry_limit": 0, + } + }, + "cache": { + // Blobcache: enable local fs cache + // Dummycache: disable cache, access remote storage backend directly + "type": "blobcache", + // Enable cache compression + "compressed": true, + "config": { + // Directory of cache files, only for blobcache + "work_dir": "/cache" + } + } + }, + // direct | cached + "mode": "direct", + // Validate inode tree digest and chunk digest on demand + "digest_validate": false, + // Enable file IO metric + "iostats_files": true, + // Enable support of fs extended attributes + "enable_xattr": false, + // Amplified user IO request batch size to read data from remote storage backend / local cache + // in unit of Bytes, valid values: 0-268435456, default: 1048576 + "amplify_io": 1048576, + "fs_prefetch": { + // Enable blob prefetch + "enable": false, + // Prefetch thread count + "threads_count": 10, + // Maximal read size per prefetch request, e.g. 128kb + "merging_size": 131072, + // Limit prefetch bandwidth to 1MB/S, it aims at reducing congestion with normal user io + "bandwidth_rate": 1048576 + } +} +``` + +#### Use Different Storage Backends + +Using different storage backend means that the nydus image metadata (bootstrap) layer is stored in the image registry, but the data layer will be stored on the external storage. Therefore, the option `--target` for `nydusify convert` is still required, the registry image reference is needed to store the metadata layer. + +##### Localfs Backend + +``` +{ + "device": { + "backend": { + "type": "localfs", + "config": { + // The directory included all blob files declared in bootstrap + "dir": "/path/to/blobs/", + // Record read access log, prefetch data on next time + "readahead": true, + // Duration of recording access log + "readahead_sec": 10 + } + }, + ... + }, + ... +} +``` + +##### Localdisk Backend (Experimental) +Using this backend enables Nydus to support reading blobs from block devices. This feature will be useful in Confidential Computing or Hybrid Image scenarios. + +The localdisk backend adds support for storing images in disks. In this scenario, each layer of the blob is stored in partitions, and multiple partitions are addressed in the local raw disk via the GUID partition table (GPT), which means that this disk stores the entire image. + +Currently, generating a localdisk image through nydusify is not supported for the time being. You need to use the nydus-localdisk tool to complete this step. +Document located at: https://github.com/adamqqqplay/nydus-localdisk/blob/master/README.md + +``` +{ + "device": { + "backend": { + "type": "localdisk", + "config": { + // Mounted block device path or original localdisk image file path. + "device_path": "/dev/loop1" + //"device_path": "/home/user/ubuntu.img" + } + }, + ... + }, + ... +} +``` + +##### OSS Backend + +``` +{ + "device": { + "backend": { + "type": "oss", + "config": { + ... + "endpoint": "region.aliyuncs.com", + "scheme": "https", + "access_key_id": "", + "access_key_secret": "", + "bucket_name": "", + "object_prefix": "nydus/" + } + }, + ... + }, + ... +} +``` + +##### S3 Backend + +``` +{ + "device": { + "backend": { + "type": "s3", + "config": { + ... + "endpoint": "s3.amazonaws.com", + "scheme": "https", + "access_key_id": "", + "access_key_secret": "", + "bucket_name": "", + "region": "", + "object_prefix": "nydus/" + } + }, + ... + }, + ... +} +``` + +##### Registry Backend + +``` +{ + "device": { + "backend": { + "type": "registry", + "config": { + ... + // Registry url scheme, leave empty to automatically detect, otherwise specify to https or http. + "scheme": "", + // Registry hostname with format `$host:$port` + "host": "my-registry:5000", + // Skip SSL certificate validation for HTTPS scheme + "skip_verify": false, + // Use format `$namespace/$repo` (no image tag) + "repo": "test/repo", + // Username and password for auth + // base64(username:password), optional + "auth": "", + // Bearer token for auth, optional + "registry_token": "" + // Redirected blob download host, optional + "blob_redirected_host": "" + } + }, + ... + }, + ... +} +``` +Note: The value of `device.backend.config.auth` will be overwrite if running the nydusd with environment variable `IMAGE_PULL_AUTH`. + +#### HTTP Proxy Backend + +The `HttpProxy` backend can access blobs through a http proxy server which can be local (using unix socket) or remote (using `https://` or using `http://`). + +`HttpProxy` uses two API endpoints to access the blobs: +- `HEAD /path/to/blobs` to get the blob size +- `GET /path/to/blobs` to read the blob + +The http proxy server should respect [the `Range` header](https://www.rfc-editor.org/rfc/rfc9110.html#name-range) to compute the offset and length of the blob. + +The example config files for the `HttpProxy` backend may be: + +``` +// for remote usage +{ + "device": { + "backend": { + "type": "http-proxy", + "config": { + "addr": "http://127.0.0.1:9977", + "path": "/namespace//blobs" + } + } + } +} +``` + +or + +``` +// for local usage +{ + "device": { + "backend": { + "type": "http-proxy", + "config": { + "addr": "/path/to/unix.sock", + } + } + } +} +``` + +The `HttpProxy` backend also supports the `Proxy` and `Mirrors` configurations for remote usage like the `Registry backend` described above. + +##### Enable Mirrors for Storage Backend (Recommend) + +Nydus is deeply integrated with [Dragonfly](https://d7y.io/) P2P mirror mode, please refer the [doc](https://d7y.io/docs/next/operations/integrations/container-runtime/nydus/) to learn how configuring Nydus to use Dragonfly. + +Add `device.backend.config.mirrors` field to enable mirrors for storage backend. The mirror can be a P2P distribution server or registry. If the request to mirror server failed, it will fall back to the original registry. +Currently, the mirror mode is only tested in the registry backend, and in theory, the OSS backend also supports it. + +!! The `mirrors` field conflicts with `proxy` field. + +``` +{ + "device": { + "backend": { + "type": "registry", + "config": { + "mirrors": [ + { + // Mirror server URL (including scheme), e.g. Dragonfly dfdaemon server URL + "host": "http://dragonfly1.io:65001", + // Headers for mirror server + "headers": { + // For Dragonfly dfdaemon server URL, we need to specify "X-Dragonfly-Registry" (including scheme). + // When Dragonfly does not cache data, nydusd will pull it from "X-Dragonfly-Registry". + // If not set "X-Dragonfly-Registry", Dragonfly will pull data from proxy.registryMirror.url. + "X-Dragonfly-Registry": "https://index.docker.io" + }, + // This URL endpoint is used to check the health of mirror server, and if the mirror is unhealthy, + // the request will fallback to the next mirror or the original registry server. + // Use $host/v2 as default if left empty. + "ping_url": "http://127.0.0.1:40901/server/ping", + // Interval time (s) to check and recover unavailable mirror. Use 5 as default if left empty. + "health_check_interval": 5, + // Failure counts before disabling this mirror. Use 5 as default if left empty. + "failure_limit": 5, + // Elapsed time to pause mirror health check when the request is inactive, in seconds. + // Use 300 as default if left empty. + "health_check_pause_elapsed": 300, + }, + { + "host": "http://dragonfly2.io:65001", + "headers": { + "X-Dragonfly-Registry": "https://index.docker.io" + }, + } + ], + ... + } + }, + ... + }, + ... +} +``` + + +##### Enable P2P Proxy for Storage Backend + +Add `device.backend.config.proxy` field to enable HTTP proxy for storage backend. For example, use P2P distribution service to reduce network workload and latency in large scale container cluster using [Dragonfly](https://d7y.io/) (enable centralized dfdaemon mode). + +``` +{ + "device": { + "backend": { + "type": "registry", + "config": { + "proxy": { + // Access remote storage backend via P2P proxy, e.g. Dragonfly dfdaemon server URL + "url": "http://p2p-proxy:65001", + // Fallback to remote storage backend if P2P proxy ping failed + "fallback": true, + // Endpoint of P2P proxy health checking + "ping_url": "http://p2p-proxy:40901/server/ping", + // Interval of P2P proxy health checking, in seconds + "check_interval": 5 + // Elapsed time to pause proxy health check when the request is inactive, in seconds. + // Use 300 as default if left empty. + "check_pause_elapsed": 300, + }, + ... + } + }, + ... + }, + ... +} +``` + +Once the configuration is loaded successfully on nydusd starting, we will see the log as shown below: + +``` +INFO [storage/src/backend/connection.rs:136] backend config: CommonConfig { proxy: ProxyConfig { url: "http://p2p-proxy:65001", ping_url: "http://p2p-proxy:40901/server/ping", fallback: true, check_interval: 5 }, timeout: 5, connect_timeout: 5, retry_limit: 0 } +``` + +### Mount writable Overlay FS + +`Nydusd` itself has a native userspace Overlay FS implementation, which can be enabled with several extra configurations. + +An example configuration `/etc/nydus/nydusd-config.overlay.json` is as follows: + +```json +{ + "version": 2, + "backend": { + "type": "localfs", + "localfs": "/var/lib/nydus/blobs" + }, + "cache": { + "type": "blobcache", + "filecache": { + "work_dir": "/var/lib/nydus/cache" + } + }, + "rafs": { + "mode": "direct", + "enable_xattr": true + }, + "overlay": { + "upper_dir": "/path/to/upperdir", + "work_dir": "/path/to/workdir" + } +} +``` + +An extra field `overlay` is added to the Nydusd configuration, which specifies the `upper_dir` and `work_dir` for the overlay filesystem. + +You can start `nydusd` with the above configuration and such command: + +```bash +sudo nydusd \ + --config /etc/nydus/nydusd-config.overlay.json \ + --mountpoint /path/to/mnt/ \ + --bootstrap /path/to/bootstrap \ + --log-level info \ + --writable +``` + +This will create a FUSE overlay mountpoint at `/path/to/mnt/`, with one `Nydus` image as readonly lower layer and the `/path/to/upperdir` as writable upper layer, so that it can take over whole rootfs of a container, any contents writen from container will be stored in `/path/to/upperdir`. + +Removing `--writable` flag will make the overlay filesystem readonly if you wish. + +### Mount Bootstrap Via API + +To mount a bootstrap via api, first launch nydusd without a bootstrap: + +``` shell +sudo nydusd \ + --apisock /path/to/api.sock \ + --config /path/to/config.json \ + --mountpoint /path/to/mountpoint +``` + +Then use curl to mount a bootstrap to `/path/to/mountpoint/sub`: + +``` shell +curl --unix-socket api.sock \ + -X POST "http://localhost/api/v1/mount?mountpoint=/sub" \ + -H "Content-Type: application/json" \ + -d '{ + "source":"/path/to/bootstrap", + "fs_type":"rafs", + "config":"{\"device\":{\"backend\":{\"type\":\"localfs\",\"config\":{\"dir\":\"blobs\"}},\"cache\":{\"type\":\"blobcache\",\"config\":{\"work_dir\":\"cache\"}}},\"mode\":\"direct\",\"digest_validate\":true}" + }' +``` + +The `config` field is a JSON format string that can be obtained by `cat rafs.config | jq tostring`. + +### Multiple Pseudo Mounts + +One single nydusd can have multiple pseudo mounts within a mountpoint. + +To achieve that, you can trigger backend fs (e.g., rafs) mount through the HTTP interfaces using curl command. + +When starting nydusd without the --bootstrap option, there will be no backend file system in a nydus mountpoint. You can use curl command to mount multiple backend fs at different sub-directories. + +#### Example + +Given that your mountpoint is `/mnt` which can be a directory in local host or inside guest. + +When you have two pseudo mounts which are named "pseudo_1" and "pseudo_2" identified in http request body. + +pseudo_1 and pseudo_2 correspond to bootstrap respectively. + +``` shell +tree -L 1 mnt +mnt +├── pseudo_1 +└── pseudo_2 +``` diff --git a/docs/nydusify.md b/docs/nydusify.md index 0da352b0562..e74220922df 100644 --- a/docs/nydusify.md +++ b/docs/nydusify.md @@ -1,253 +1,253 @@ -# Nydusify - -The Nydusify CLI tool supports: -1. Convert an OCI container image from source registry into a Nydus image using `nydus-image` CLI layer by layer, then push Nydus image to target registry. -2. Convert local file system dictionary into Nydus image using `nydus-image`, then push Nydus-image to target remote storage(e.g. oss) optionally. - -### Get binaries from release page - -Get `nydus-image`, `nydusd` and `nydusify` binaries from [release](https://github.com/dragonflyoss/nydus/releases/latest) page and install them to system PATH like `/usr/bin` or `/usr/local/bin`. - -## Basic Usage - -Convert oci image: -``` -nydusify convert \ - --source myregistry/repo:tag \ - --target myregistry/repo:tag-nydus -``` -Pack local file system dictionary: -``` -nydusify pack \ - --bootstrap target.bootstrap \ - --target-dir /path/to/target \ - --output-dir /path/to/output -``` - -## Upload blob to storage backend - -Nydusify uploads Nydus blob to registry by default, change this behavior by specifying `--backend-type` option. - -### OSS Backend - -``` shell -cat /path/to/backend-config.json -{ - "endpoint": "region.aliyuncs.com", - "scheme": "https", - "access_key_id": "", - "access_key_secret": "", - "bucket_name": "", - "object_prefix": "nydus/" -} -``` - -``` shell -nydusify convert \ - --source myregistry/repo:tag \ - --target myregistry/repo:tag-nydus \ - --backend-type oss \ - --backend-config-file /path/to/backend-config.json -``` - -### S3 Backend - -`nydusify convert` can upload blob to the aws s3 service or other s3 compatible services (for example minio, ceph s3 gateway, etc.) by specifying `--backend-type s3` option. - -The `endpoint` field of the `backend-config.json` is optional when using aws s3 service. - -``` shell -cat /path/to/backend-config.json -{ - "endpoint": "localhost:9000", - "scheme": "http", - "access_key_id": "", - "access_key_secret": "", - "bucket_name": "", - "object_prefix": "nydus/" -} -``` - -Note: the `endpoint` in the s3 `backend-config.json` **should not** contains the scheme prefix. - -``` shell -nydusify convert \ - --source myregistry/repo:tag \ - --target myregistry/repo:tag-nydus \ - --backend-type s3 \ - --backend-config-file /path/to/backend-config.json -``` - -## Push Nydus Image to storage backend with subcommand pack - -### OSS - -``` shell -# meta_prefix: -# push bootstrap into oss://$bucket_name/$meta_prefix$bootstrap_name -# object_prefix: -# push blobs into oss://$bucket_name/$object_prefix$blob_id -cat /path/to/backend-config.json -{ - "bucket_name": "", - "endpoint": "region.aliyuncs.com", - "access_key_id": "", - "access_key_secret": "", - "meta_prefix": "meta/", - "object_prefix": "nydus/" -} - -nydusify pack --bootstrap target.bootstrap \ - --backend-push \ - --backend-type oss \ - --backend-config-file /path/to/backend-config.json \ - --target-dir /path/to/target \ - --output-dir /path/to/output -``` - -### S3 - -``` shell -# meta_prefix: -# push bootstrap into s3://$bucket_name/$meta_prefix$bootstrap_name -# object_prefix: -# push blobs into s3://$bucket_name/$object_prefix$blob_id -cat /path/to/backend-config.json -{ - "bucket_name": "", - "endpoint": "my-s3-service.net", - "access_key_id": "", - "access_key_secret": "", - "meta_prefix": "meta/", - "object_prefix": "nydus/" -} - -nydusify pack --bootstrap target.bootstrap \ - --backend-push \ - --backend-type s3 \ - --backend-config-file /path/to/backend-config.json \ - --target-dir /path/to/target \ - --output-dir /path/to/output -``` - -## Check Nydus image - -Nydusify provides a checker to validate Nydus image, the checklist includes image manifest, Nydus bootstrap, file metadata, and data consistency in rootfs with the original OCI image. Meanwhile, the checker dumps OCI & Nydus image information to `output` (default) directory. - -Only check the manifest and bootstrap of Nydus image: - -``` shell -nydusify check \ - --target myregistry/repo:tag-nydus -``` - -You can find parsed image manifest, image config, and Nydus bootstrap file in `output` (default) directory: - -``` shell -$ tree ./output - -./output -├── nydus_bootstrap -├── nydus_bootstrap_debug.json -├── nydus_config.json -├── nydus_manifest.json -├── oci_config.json -└── oci_manifest.json -``` - -Specify `--source` and options to walk the rootfs of OCI image and Nydus image to compare file metadata: - -``` shell -nydusify check \ - --source myregistry/repo:tag \ - --target myregistry/repo:tag-nydus -``` - -Specify `--backend-type` and `--backend-config` options to compare file metadata and file data consistency: - -``` shell -nydusify check \ - --source myregistry/repo:tag \ - --target myregistry/repo:tag-nydus \ - --backend-type oss \ - --backend-config-file /path/to/backend-config.json -``` - - -## Mount the nydus image as a filesystem - -The nydusify mount command can mount a nydus image stored in the backend as a filesystem. Now the supported backend types include Registry (default backend), s3 and oss. - -When using Registy as the backend, you don't need specify the `--backend-type` . - -``` shell -nydusify mount \ - --target myregistry/repo:tag-nydus -``` - -Specify `--backend-type` and `--backend-config` options to mount for other backends:: - -``` shell -nydusify mount \ - --target mybackend/repo:tag-nydus \ - --backend-type oss \ - --backend-config-file /path/to/backend-config.json -``` - -## Copy image between registry repositories - -``` shell -nydusify copy \ - --source myregistry/repo:tag-nydus \ - --target myregistry/repo:tag-nydus-copy -``` - -It supports copying OCI v1 or Nydus images, use the options `--all-platforms` / `--platform` to copy the images of specific platforms. - -## Commit nydus image from container's changes - -The nydusify commit command can commit a nydus image from a nydus container, like `nerdctl commit` command. - -``` shell -nydusify convert \ - --source myregistry/repo:tag \ - --target myregistry/repo:tag-nydus - -nerdctl --snapshotter nydus run \ - -dt myregistry/repo:tag-nydus sh - -nydusify commit \ - --container containerID - --target myregistry/repo:tag-nydus-committed - -nerdctl --snapshotter nydus run \ - -dt myregistry/repo:tag-nydus-committed sh -``` - -The original container ID need to be a full container ID rather than an abbreviation. - -## More Nydusify Options - -See `nydusify convert/check/mount --help` - -## Use Nydusify as a package - -``` -See `contrib/nydusify/examples/converter/main.go` -``` - -## Hook Plugin (Experimental) - -Nydusify supports the hook function execution as [go-plugin](https://github.com/hashicorp/go-plugin) at key stages of image conversion. - -Write a hook plugin go file like [plugin/main.go](../contrib/nydusify/plugin/main.go), then build with the below command line: - -``` -go build -o nydus-hook-plugin ./plugin -``` - -And run `nydusify` with environment variable `NYDUS_HOOK_PLUGIN_PATH` (optional): - -``` -NYDUS_HOOK_PLUGIN_PATH=./nydus-hook-plugin nydusify convert --source ... --target ... -``` +# Nydusify + +The Nydusify CLI tool supports: +1. Convert an OCI container image from source registry into a Nydus image using `nydus-image` CLI layer by layer, then push Nydus image to target registry. +2. Convert local file system dictionary into Nydus image using `nydus-image`, then push Nydus-image to target remote storage(e.g. oss) optionally. + +### Get binaries from release page + +Get `nydus-image`, `nydusd` and `nydusify` binaries from [release](https://github.com/dragonflyoss/nydus/releases/latest) page and install them to system PATH like `/usr/bin` or `/usr/local/bin`. + +## Basic Usage + +Convert oci image: +``` +nydusify convert \ + --source myregistry/repo:tag \ + --target myregistry/repo:tag-nydus +``` +Pack local file system dictionary: +``` +nydusify pack \ + --bootstrap target.bootstrap \ + --target-dir /path/to/target \ + --output-dir /path/to/output +``` + +## Upload blob to storage backend + +Nydusify uploads Nydus blob to registry by default, change this behavior by specifying `--backend-type` option. + +### OSS Backend + +``` shell +cat /path/to/backend-config.json +{ + "endpoint": "region.aliyuncs.com", + "scheme": "https", + "access_key_id": "", + "access_key_secret": "", + "bucket_name": "", + "object_prefix": "nydus/" +} +``` + +``` shell +nydusify convert \ + --source myregistry/repo:tag \ + --target myregistry/repo:tag-nydus \ + --backend-type oss \ + --backend-config-file /path/to/backend-config.json +``` + +### S3 Backend + +`nydusify convert` can upload blob to the aws s3 service or other s3 compatible services (for example minio, ceph s3 gateway, etc.) by specifying `--backend-type s3` option. + +The `endpoint` field of the `backend-config.json` is optional when using aws s3 service. + +``` shell +cat /path/to/backend-config.json +{ + "endpoint": "localhost:9000", + "scheme": "http", + "access_key_id": "", + "access_key_secret": "", + "bucket_name": "", + "object_prefix": "nydus/" +} +``` + +Note: the `endpoint` in the s3 `backend-config.json` **should not** contains the scheme prefix. + +``` shell +nydusify convert \ + --source myregistry/repo:tag \ + --target myregistry/repo:tag-nydus \ + --backend-type s3 \ + --backend-config-file /path/to/backend-config.json +``` + +## Push Nydus Image to storage backend with subcommand pack + +### OSS + +``` shell +# meta_prefix: +# push bootstrap into oss://$bucket_name/$meta_prefix$bootstrap_name +# object_prefix: +# push blobs into oss://$bucket_name/$object_prefix$blob_id +cat /path/to/backend-config.json +{ + "bucket_name": "", + "endpoint": "region.aliyuncs.com", + "access_key_id": "", + "access_key_secret": "", + "meta_prefix": "meta/", + "object_prefix": "nydus/" +} + +nydusify pack --bootstrap target.bootstrap \ + --backend-push \ + --backend-type oss \ + --backend-config-file /path/to/backend-config.json \ + --target-dir /path/to/target \ + --output-dir /path/to/output +``` + +### S3 + +``` shell +# meta_prefix: +# push bootstrap into s3://$bucket_name/$meta_prefix$bootstrap_name +# object_prefix: +# push blobs into s3://$bucket_name/$object_prefix$blob_id +cat /path/to/backend-config.json +{ + "bucket_name": "", + "endpoint": "my-s3-service.net", + "access_key_id": "", + "access_key_secret": "", + "meta_prefix": "meta/", + "object_prefix": "nydus/" +} + +nydusify pack --bootstrap target.bootstrap \ + --backend-push \ + --backend-type s3 \ + --backend-config-file /path/to/backend-config.json \ + --target-dir /path/to/target \ + --output-dir /path/to/output +``` + +## Check Nydus image + +Nydusify provides a checker to validate Nydus image, the checklist includes image manifest, Nydus bootstrap, file metadata, and data consistency in rootfs with the original OCI image. Meanwhile, the checker dumps OCI & Nydus image information to `output` (default) directory. + +Only check the manifest and bootstrap of Nydus image: + +``` shell +nydusify check \ + --target myregistry/repo:tag-nydus +``` + +You can find parsed image manifest, image config, and Nydus bootstrap file in `output` (default) directory: + +``` shell +$ tree ./output + +./output +├── nydus_bootstrap +├── nydus_bootstrap_debug.json +├── nydus_config.json +├── nydus_manifest.json +├── oci_config.json +└── oci_manifest.json +``` + +Specify `--source` and options to walk the rootfs of OCI image and Nydus image to compare file metadata: + +``` shell +nydusify check \ + --source myregistry/repo:tag \ + --target myregistry/repo:tag-nydus +``` + +Specify `--backend-type` and `--backend-config` options to compare file metadata and file data consistency: + +``` shell +nydusify check \ + --source myregistry/repo:tag \ + --target myregistry/repo:tag-nydus \ + --backend-type oss \ + --backend-config-file /path/to/backend-config.json +``` + + +## Mount the nydus image as a filesystem + +The nydusify mount command can mount a nydus image stored in the backend as a filesystem. Now the supported backend types include Registry (default backend), s3 and oss. + +When using Registy as the backend, you don't need specify the `--backend-type` . + +``` shell +nydusify mount \ + --target myregistry/repo:tag-nydus +``` + +Specify `--backend-type` and `--backend-config` options to mount for other backends:: + +``` shell +nydusify mount \ + --target mybackend/repo:tag-nydus \ + --backend-type oss \ + --backend-config-file /path/to/backend-config.json +``` + +## Copy image between registry repositories + +``` shell +nydusify copy \ + --source myregistry/repo:tag-nydus \ + --target myregistry/repo:tag-nydus-copy +``` + +It supports copying OCI v1 or Nydus images, use the options `--all-platforms` / `--platform` to copy the images of specific platforms. + +## Commit nydus image from container's changes + +The nydusify commit command can commit a nydus image from a nydus container, like `nerdctl commit` command. + +``` shell +nydusify convert \ + --source myregistry/repo:tag \ + --target myregistry/repo:tag-nydus + +nerdctl --snapshotter nydus run \ + -dt myregistry/repo:tag-nydus sh + +nydusify commit \ + --container containerID + --target myregistry/repo:tag-nydus-committed + +nerdctl --snapshotter nydus run \ + -dt myregistry/repo:tag-nydus-committed sh +``` + +The original container ID need to be a full container ID rather than an abbreviation. + +## More Nydusify Options + +See `nydusify convert/check/mount --help` + +## Use Nydusify as a package + +``` +See `contrib/nydusify/examples/converter/main.go` +``` + +## Hook Plugin (Experimental) + +Nydusify supports the hook function execution as [go-plugin](https://github.com/hashicorp/go-plugin) at key stages of image conversion. + +Write a hook plugin go file like [plugin/main.go](../contrib/nydusify/plugin/main.go), then build with the below command line: + +``` +go build -o nydus-hook-plugin ./plugin +``` + +And run `nydusify` with environment variable `NYDUS_HOOK_PLUGIN_PATH` (optional): + +``` +NYDUS_HOOK_PLUGIN_PATH=./nydus-hook-plugin nydusify convert --source ... --target ... +``` diff --git a/docs/prefetch.md b/docs/prefetch.md index 4fbc110716f..bbe8d4e5122 100644 --- a/docs/prefetch.md +++ b/docs/prefetch.md @@ -1,76 +1,76 @@ -# Cache and Prefetch - -Nydus stores blobs in OCI compatible registry, OSS (Aliyun Object Storage Service) and local file system. Regarding to local filesystem storage backend, it doesn't mean blob can only be stored on local disk. It can surely be stored in the NAS device, which can be accessed by POSIX file system interfaces. - -Nydus divides a single regular file into segments by 1MB size which will be compressed by a configurable compressor like lz4, etc. The compressed segments are called chunk. Chunks are contiguously arranged within a blob file. - -The underlying technique of prefetch is that it tries to merge several backend reads into one that spans a wider range. Moreover, users usually know what files are more likely to be read when container starts. - -## Prefetch policy: - -When converting or creating nydus image, we can add an option to tell `nydus-image` which files should be prefetched when nydusd is started. This is accomplished by option `--prefetch-policy `. Prefetch-policy has 3 possible values: - -- fs -- blob -- none - -With option `prefetch-policy`, `nydus-image` tries to read stdin to gather a list of files that are proposed to prefetch. The list can have both regular files and directories, even a file belongs to a directory that is also in the same list. - -Note that, `fs_prefetch` has to be enabled in rafs configuration file if prefetch is required. - -### 1. File System Level - -Nydus issues prefetch requests to backend and pulls needed chunks to local storage. So read IO can hit the blobcache which was previously filled by prefetch. Speaking of file system level prefetch, the prefetch request is issued from Rafs layer. So it is easier to better understand about files layout on disk, the relationship between files and directories. Prefetch works on top of file system is born of agility and very nimble. - -Prefetch is configurable by Rafs configuration file. - -- threads_count - - It has its own background workers to do prefetch. By this parameter, nydus can start different quantity of threads to issue read from backend storage in parallel. - -- merging_size - - The upper limit of request size to backend storage. In unit of bytes. - -- bandwidth_rate - - In unit of bytes. - In order to mitigate possible backend bandwidth contention, we can give a bandwidth rate limit to prefetch. Note that the `bandwidth_rate` sets the limit to the aggregated backend bandwidth consumed by all the threads configured by `threads_count`. So with a lower `bandwidth_rate` limit, more prefetch threads might be meaningless. - -A rafs configuration file (only `$.fs_prefetch` shows, other properties are omitted) follows: - -```json -{ - "fs_prefetch": { - "enable": true, - "threads_count": 4, - "merging_size": 131072, - "bandwidth_rate": 10485760 - } -} -``` - -#### 1.1 Prefetch Hints - -`nydus-image` statically and permanently writes a list of inode numbers to prefetch table of minimal size to bootstrap. The prefetch table will give a hint to nydus when it is mounted how to prefetch files from storage backend. - -#### 1.2 Dynamically Specified Files - -Thanks to rafs disk layout, even no prefetch hint was given when creating nydus image, we can still provide option `--prefetch-files ...` to `nydusd`. Afterwards rafs will prefetch those files specified in the list when the mount is initiated. If fortunately enough, rafs tries best to merge backend read requests to reduce latency. A good practice for this is to provide directories which is more possible to get merged to raise prefetch efficiency. -Please be aware of the fact that this method to initiate prefetch does not conflict with "prefetch hints" stored in bootstrap prefetch table. In fact, rafs will firstly try to load prefetch table and then takes the specified files list into account. - -#### 1.3 Prefetch policy (future work) - -Nydus can now only prefetch data from backend by an explicit hint either from prefetch table or command line starting flag. No globally configured prefetch policy as below is available: - -- Prefetch all the blobs entirely from backend slowly and in low priority. -- User IO triggered, block-level readahead. -- Prefetch the parent directory if one of its child is read. - -### 2. Blob Level - -Not like file system level prefetch, blob level prefetch directly pre-fetches a contiguous region from blob when nydusd started. This prefetch procedure is not aware of files, directories and chunks structures. When creating nydus image, a range descriptor composed of `readahead_offset` and `readahead_length` is written bootstrap. But blob level prefetch **won't** cache any data into blobcache or any other kind of cache. It works at `StorageBackend` level which is lower than `RafsCache`. For now, blob level prefetch only benefits `LocalFs` specific backend. In particular, `LocalFs` backend can perform Linux system call `readahead(2)` to load data from `readahead_offset` up to `readahead_length` bytes. - -## Sequence Diagram demonstrating prefetch workflow - -![Container Cold Startup](../misc/prefetch_seq_diagram.jpg) +# Cache and Prefetch + +Nydus stores blobs in OCI compatible registry, OSS (Aliyun Object Storage Service) and local file system. Regarding to local filesystem storage backend, it doesn't mean blob can only be stored on local disk. It can surely be stored in the NAS device, which can be accessed by POSIX file system interfaces. + +Nydus divides a single regular file into segments by 1MB size which will be compressed by a configurable compressor like lz4, etc. The compressed segments are called chunk. Chunks are contiguously arranged within a blob file. + +The underlying technique of prefetch is that it tries to merge several backend reads into one that spans a wider range. Moreover, users usually know what files are more likely to be read when container starts. + +## Prefetch policy: + +When converting or creating nydus image, we can add an option to tell `nydus-image` which files should be prefetched when nydusd is started. This is accomplished by option `--prefetch-policy `. Prefetch-policy has 3 possible values: + +- fs +- blob +- none + +With option `prefetch-policy`, `nydus-image` tries to read stdin to gather a list of files that are proposed to prefetch. The list can have both regular files and directories, even a file belongs to a directory that is also in the same list. + +Note that, `fs_prefetch` has to be enabled in rafs configuration file if prefetch is required. + +### 1. File System Level + +Nydus issues prefetch requests to backend and pulls needed chunks to local storage. So read IO can hit the blobcache which was previously filled by prefetch. Speaking of file system level prefetch, the prefetch request is issued from Rafs layer. So it is easier to better understand about files layout on disk, the relationship between files and directories. Prefetch works on top of file system is born of agility and very nimble. + +Prefetch is configurable by Rafs configuration file. + +- threads_count + + It has its own background workers to do prefetch. By this parameter, nydus can start different quantity of threads to issue read from backend storage in parallel. + +- merging_size + + The upper limit of request size to backend storage. In unit of bytes. + +- bandwidth_rate + + In unit of bytes. + In order to mitigate possible backend bandwidth contention, we can give a bandwidth rate limit to prefetch. Note that the `bandwidth_rate` sets the limit to the aggregated backend bandwidth consumed by all the threads configured by `threads_count`. So with a lower `bandwidth_rate` limit, more prefetch threads might be meaningless. + +A rafs configuration file (only `$.fs_prefetch` shows, other properties are omitted) follows: + +```json +{ + "fs_prefetch": { + "enable": true, + "threads_count": 4, + "merging_size": 131072, + "bandwidth_rate": 10485760 + } +} +``` + +#### 1.1 Prefetch Hints + +`nydus-image` statically and permanently writes a list of inode numbers to prefetch table of minimal size to bootstrap. The prefetch table will give a hint to nydus when it is mounted how to prefetch files from storage backend. + +#### 1.2 Dynamically Specified Files + +Thanks to rafs disk layout, even no prefetch hint was given when creating nydus image, we can still provide option `--prefetch-files ...` to `nydusd`. Afterwards rafs will prefetch those files specified in the list when the mount is initiated. If fortunately enough, rafs tries best to merge backend read requests to reduce latency. A good practice for this is to provide directories which is more possible to get merged to raise prefetch efficiency. +Please be aware of the fact that this method to initiate prefetch does not conflict with "prefetch hints" stored in bootstrap prefetch table. In fact, rafs will firstly try to load prefetch table and then takes the specified files list into account. + +#### 1.3 Prefetch policy (future work) + +Nydus can now only prefetch data from backend by an explicit hint either from prefetch table or command line starting flag. No globally configured prefetch policy as below is available: + +- Prefetch all the blobs entirely from backend slowly and in low priority. +- User IO triggered, block-level readahead. +- Prefetch the parent directory if one of its child is read. + +### 2. Blob Level + +Not like file system level prefetch, blob level prefetch directly pre-fetches a contiguous region from blob when nydusd started. This prefetch procedure is not aware of files, directories and chunks structures. When creating nydus image, a range descriptor composed of `readahead_offset` and `readahead_length` is written bootstrap. But blob level prefetch **won't** cache any data into blobcache or any other kind of cache. It works at `StorageBackend` level which is lower than `RafsCache`. For now, blob level prefetch only benefits `LocalFs` specific backend. In particular, `LocalFs` backend can perform Linux system call `readahead(2)` to load data from `readahead_offset` up to `readahead_length` bytes. + +## Sequence Diagram demonstrating prefetch workflow + +![Container Cold Startup](../misc/prefetch_seq_diagram.jpg) diff --git a/go.work b/go.work index 6f89070aac9..4f6dbfca0ea 100644 --- a/go.work +++ b/go.work @@ -1,8 +1,8 @@ -go 1.21 - -use ( - ./contrib/ctr-remote - ./contrib/nydus-overlayfs - ./contrib/nydusify - ./smoke -) +go 1.21 + +use ( + ./contrib/ctr-remote + ./contrib/nydus-overlayfs + ./contrib/nydusify + ./smoke +) diff --git a/misc/configs/nydusd-blob-cache-entry-configuration-v2.toml b/misc/configs/nydusd-blob-cache-entry-configuration-v2.toml index ab434f60733..3b41666078a 100644 --- a/misc/configs/nydusd-blob-cache-entry-configuration-v2.toml +++ b/misc/configs/nydusd-blob-cache-entry-configuration-v2.toml @@ -1,139 +1,139 @@ -# Configuration file for Nydus Image Service - -# Configuration file format version number, must be 2. -version = 2 -# Identifier for the instance. -id = "my_id" -# Optional file path for metadata blobs, for BlobCacheEntry only. -metadata_path = "/path/to/rafs/meta/data/blob" - -[backend] -# Type of storage backend, valid values: "localfs", "oss", "registry" -type = "localfs" - -[backend.localfs] -blob_file = "/tmp/nydus.blob.data" -dir = "/tmp" -alt_dirs = ["/var/nydus/cache"] - -[backend.oss] -# Oss http scheme, either 'http' or 'https' -scheme = "http" -# Oss endpoint -endpoint = "my_endpoint" -# Oss bucket name -bucket_name = "my_bucket_name" -# Prefix object_prefix to OSS object key, for example the simulation of subdirectory: -object_prefix = "my_object_prefix" -# Oss access key -access_key_id = "my_access_key_id" -# Oss secret -access_key_secret = "my_access_key_secret" -# Skip SSL certificate validation for HTTPS scheme. -skip_verify = true -# Drop the read request once http request timeout, in seconds. -timeout = 10 -# Drop the read request once http connection timeout, in seconds. -connect_timeout = 10 -# Retry count when read request failed. -retry_limit = 5 - -[backend.oss.proxy] -# Access remote storage backend via proxy, e.g. Dragonfly dfdaemon server URL. -url = "localhost:6789" -# Proxy health checking endpoint. -ping_url = "localhost:6789/ping" -# Fallback to remote storage backend if proxy ping failed. -fallback = true -# Interval for proxy health checking, in seconds. -check_interval = 5 -# Replace URL to http to request source registry with proxy, and allow fallback to https if the proxy is unhealthy. -use_http = false - -[[backend.oss.mirrors]] -# Mirror server URL, for example http://127.0.0.1:65001. -host = "http://127.0.0.1:65001" -# Ping URL to check mirror server health. -ping_url = "http://127.0.0.1:65001/ping" -# HTTP request headers to be passed to mirror server. -# headers = -# Interval for mirror health checking, in seconds. -health_check_interval = 5 -# Maximum number of failures before marking a mirror as unusable. -failure_limit = 5 - -[backend.registy] -# Registry http scheme, either 'http' or 'https' -scheme = "https" -# Registry url host -host = "my.registry.com" -# Registry image name, like 'library/ubuntu' -repo = "nydus" -# Base64_encoded(username:password), the field should be sent to registry auth server to get a bearer token. -auth = "base64_encoded" -# Skip SSL certificate validation for HTTPS scheme. -skip_verify = true -# Drop the read request once http request timeout, in seconds. -timeout = 10 -# Drop the read request once http connection timeout, in seconds. -connect_timeout = 10 -# Retry count when read request failed. -retry_limit = 5 -# The field is a bearer token to be sent to registry to authorize registry requests. -registry_token = "bear_token" -# The http scheme to access blobs. -# It is used to workaround some P2P subsystem that requires a different scheme than the registry. -blob_url_scheme = "https" -# Redirect blob access to a different host regardless of the one specified in 'host'. -blob_redirected_host = "redirect.registry.com" - -[backend.registry.proxy] -# Access remote storage backend via proxy, e.g. Dragonfly dfdaemon server URL. -url = "localhost:6789" -# Proxy health checking endpoint. -ping_url = "localhost:6789/ping" -# Fallback to remote storage backend if proxy ping failed. -fallback = true -# Interval for proxy health checking, in seconds. -check_interval = 5 -# Replace URL to http to request source registry with proxy, and allow fallback to https if the proxy is unhealthy. -use_http = false - -[[backend.registry.mirrors]] -# Mirror server URL, for example http://127.0.0.1:65001. -host = "http://127.0.0.1:65001" -# Ping URL to check mirror server health. -ping_url = "http://127.0.0.1:65001/ping" -# HTTP request headers to be passed to mirror server. -# headers = -# Interval for mirror health checking, in seconds. -health_check_interval = 5 -# Maximum number of failures before marking a mirror as unusable. -failure_limit = 5 - -[cache] -# Type of blob cache: "blobcache", "filecache", "fscache", "dummycache" or "" -type = "filecache" -# Whether to cache compressed or uncompressed data. -compressed = true -# Whether to validate data read from the cache. -validate = true - -[cache.filecache] -work_dir = "." -enable_encryption = true -enable_convergent_encryption = true -encryption_key = "fc4a7db5614afc2f400e9478bebed1aefdbc9d7cd03210b84f144683a7a6fd1a" - -[cache.fscache] -work_dir = "." - -[cache.prefetch] -# Whether to enable blob data prefetching. -enable = true -# Number of data prefetching working threads, valid values: 1-1024. -threads = 8 -# The batch size to prefetch data from backend, valid values: 0-0x10000000. -batch_size = 1000000 -# Network bandwidth rate limit in unit of Bytes and Zero means no limit. -bandwidth_limit = 10000000 +# Configuration file for Nydus Image Service + +# Configuration file format version number, must be 2. +version = 2 +# Identifier for the instance. +id = "my_id" +# Optional file path for metadata blobs, for BlobCacheEntry only. +metadata_path = "/path/to/rafs/meta/data/blob" + +[backend] +# Type of storage backend, valid values: "localfs", "oss", "registry" +type = "localfs" + +[backend.localfs] +blob_file = "/tmp/nydus.blob.data" +dir = "/tmp" +alt_dirs = ["/var/nydus/cache"] + +[backend.oss] +# Oss http scheme, either 'http' or 'https' +scheme = "http" +# Oss endpoint +endpoint = "my_endpoint" +# Oss bucket name +bucket_name = "my_bucket_name" +# Prefix object_prefix to OSS object key, for example the simulation of subdirectory: +object_prefix = "my_object_prefix" +# Oss access key +access_key_id = "my_access_key_id" +# Oss secret +access_key_secret = "my_access_key_secret" +# Skip SSL certificate validation for HTTPS scheme. +skip_verify = true +# Drop the read request once http request timeout, in seconds. +timeout = 10 +# Drop the read request once http connection timeout, in seconds. +connect_timeout = 10 +# Retry count when read request failed. +retry_limit = 5 + +[backend.oss.proxy] +# Access remote storage backend via proxy, e.g. Dragonfly dfdaemon server URL. +url = "localhost:6789" +# Proxy health checking endpoint. +ping_url = "localhost:6789/ping" +# Fallback to remote storage backend if proxy ping failed. +fallback = true +# Interval for proxy health checking, in seconds. +check_interval = 5 +# Replace URL to http to request source registry with proxy, and allow fallback to https if the proxy is unhealthy. +use_http = false + +[[backend.oss.mirrors]] +# Mirror server URL, for example http://127.0.0.1:65001. +host = "http://127.0.0.1:65001" +# Ping URL to check mirror server health. +ping_url = "http://127.0.0.1:65001/ping" +# HTTP request headers to be passed to mirror server. +# headers = +# Interval for mirror health checking, in seconds. +health_check_interval = 5 +# Maximum number of failures before marking a mirror as unusable. +failure_limit = 5 + +[backend.registy] +# Registry http scheme, either 'http' or 'https' +scheme = "https" +# Registry url host +host = "my.registry.com" +# Registry image name, like 'library/ubuntu' +repo = "nydus" +# Base64_encoded(username:password), the field should be sent to registry auth server to get a bearer token. +auth = "base64_encoded" +# Skip SSL certificate validation for HTTPS scheme. +skip_verify = true +# Drop the read request once http request timeout, in seconds. +timeout = 10 +# Drop the read request once http connection timeout, in seconds. +connect_timeout = 10 +# Retry count when read request failed. +retry_limit = 5 +# The field is a bearer token to be sent to registry to authorize registry requests. +registry_token = "bear_token" +# The http scheme to access blobs. +# It is used to workaround some P2P subsystem that requires a different scheme than the registry. +blob_url_scheme = "https" +# Redirect blob access to a different host regardless of the one specified in 'host'. +blob_redirected_host = "redirect.registry.com" + +[backend.registry.proxy] +# Access remote storage backend via proxy, e.g. Dragonfly dfdaemon server URL. +url = "localhost:6789" +# Proxy health checking endpoint. +ping_url = "localhost:6789/ping" +# Fallback to remote storage backend if proxy ping failed. +fallback = true +# Interval for proxy health checking, in seconds. +check_interval = 5 +# Replace URL to http to request source registry with proxy, and allow fallback to https if the proxy is unhealthy. +use_http = false + +[[backend.registry.mirrors]] +# Mirror server URL, for example http://127.0.0.1:65001. +host = "http://127.0.0.1:65001" +# Ping URL to check mirror server health. +ping_url = "http://127.0.0.1:65001/ping" +# HTTP request headers to be passed to mirror server. +# headers = +# Interval for mirror health checking, in seconds. +health_check_interval = 5 +# Maximum number of failures before marking a mirror as unusable. +failure_limit = 5 + +[cache] +# Type of blob cache: "blobcache", "filecache", "fscache", "dummycache" or "" +type = "filecache" +# Whether to cache compressed or uncompressed data. +compressed = true +# Whether to validate data read from the cache. +validate = true + +[cache.filecache] +work_dir = "." +enable_encryption = true +enable_convergent_encryption = true +encryption_key = "fc4a7db5614afc2f400e9478bebed1aefdbc9d7cd03210b84f144683a7a6fd1a" + +[cache.fscache] +work_dir = "." + +[cache.prefetch] +# Whether to enable blob data prefetching. +enable = true +# Number of data prefetching working threads, valid values: 1-1024. +threads = 8 +# The batch size to prefetch data from backend, valid values: 0-0x10000000. +batch_size = 1000000 +# Network bandwidth rate limit in unit of Bytes and Zero means no limit. +bandwidth_limit = 10000000 diff --git a/misc/configs/nydusd-blob-cache-entry.toml b/misc/configs/nydusd-blob-cache-entry.toml index dcab4707dfc..e74ca86d3e0 100644 --- a/misc/configs/nydusd-blob-cache-entry.toml +++ b/misc/configs/nydusd-blob-cache-entry.toml @@ -1,141 +1,141 @@ -# Configuration file for Nydus Image Service - -type = "bootstrap" -id = "image1" -domain_id = "domain1" - -# Configuration file format version number, must be 2. -[config_v2] -version = 2 -# Identifier for the instance. -id = "my_id" -# Optional file path for metadata blobs, for BlobCacheEntry only. -metadata_path = "/path/to/rafs/meta/data/blob" - -[config_v2.backend] -# Type of storage backend, valid values: "localfs", "oss", "registry" -type = "localfs" - -[config_v2.backend.localfs] -blob_file = "/tmp/nydus.blob.data" -dir = "/tmp" -alt_dirs = ["/var/nydus/cache"] - -[config_v2.backend.oss] -# Oss http scheme, either 'http' or 'https' -scheme = "http" -# Oss endpoint -endpoint = "my_endpoint" -# Oss bucket name -bucket_name = "my_bucket_name" -# Prefix object_prefix to OSS object key, for example the simulation of subdirectory: -object_prefix = "my_object_prefix" -# Oss access key -access_key_id = "my_access_key_id" -# Oss secret -access_key_secret = "my_access_key_secret" -# Skip SSL certificate validation for HTTPS scheme. -skip_verify = true -# Drop the read request once http request timeout, in seconds. -timeout = 10 -# Drop the read request once http connection timeout, in seconds. -connect_timeout = 10 -# Retry count when read request failed. -retry_limit = 5 - -[config_v2.backend.oss.proxy] -# Access remote storage backend via proxy, e.g. Dragonfly dfdaemon server URL. -url = "localhost:6789" -# Proxy health checking endpoint. -ping_url = "localhost:6789/ping" -# Fallback to remote storage backend if proxy ping failed. -fallback = true -# Interval for proxy health checking, in seconds. -check_interval = 5 -# Replace URL to http to request source registry with proxy, and allow fallback to https if the proxy is unhealthy. -use_http = false - -[[config_v2.backend.oss.mirrors]] -# Mirror server URL, for example http://127.0.0.1:65001. -host = "http://127.0.0.1:65001" -# Ping URL to check mirror server health. -ping_url = "http://127.0.0.1:65001/ping" -# HTTP request headers to be passed to mirror server. -# headers = -# Interval for mirror health checking, in seconds. -health_check_interval = 5 -# Maximum number of failures before marking a mirror as unusable. -failure_limit = 5 - -[config_v2.backend.registry] -# Registry http scheme, either 'http' or 'https' -scheme = "https" -# Registry url host -host = "my.registry.com" -# Registry image name, like 'library/ubuntu' -repo = "nydus" -# Base64_encoded(username:password), the field should be sent to registry auth server to get a bearer token. -auth = "base64_encoded" -# Skip SSL certificate validation for HTTPS scheme. -skip_verify = true -# Drop the read request once http request timeout, in seconds. -timeout = 10 -# Drop the read request once http connection timeout, in seconds. -connect_timeout = 10 -# Retry count when read request failed. -retry_limit = 5 -# The field is a bearer token to be sent to registry to authorize registry requests. -registry_token = "bear_token" -# The http scheme to access blobs. -# It is used to workaround some P2P subsystem that requires a different scheme than the registry. -blob_url_scheme = "https" -# Redirect blob access to a different host regardless of the one specified in 'host'. -blob_redirected_host = "redirect.registry.com" - -[config_v2.backend.registry.proxy] -# Access remote storage backend via proxy, e.g. Dragonfly dfdaemon server URL. -url = "localhost:6789" -# Proxy health checking endpoint. -ping_url = "localhost:6789/ping" -# Fallback to remote storage backend if proxy ping failed. -fallback = true -# Interval for proxy health checking, in seconds. -check_interval = 5 -# Replace URL to http to request source registry with proxy, and allow fallback to https if the proxy is unhealthy. -use_http = false - -[[config_v2.backend.registry.mirrors]] -# Mirror server URL, for example http://127.0.0.1:65001. -host = "http://127.0.0.1:65001" -# Ping URL to check mirror server health. -ping_url = "http://127.0.0.1:65001/ping" -# HTTP request headers to be passed to mirror server. -# headers = -# Interval for mirror health checking, in seconds. -health_check_interval = 5 -# Maximum number of failures before marking a mirror as unusable. -failure_limit = 5 - -[config_v2.cache] -# Type of blob cache: "blobcache", "filecache", "fscache", "dummycache" or "" -type = "filecache" -# Whether to cache compressed or uncompressed data. -compressed = true -# Whether to validate data read from the cache. -validate = true - -[config_v2.cache.filecache] -work_dir = "." - -[config_v2.cache.fscache] -work_dir = "." - -[config_v2.cache.prefetch] -# Whether to enable blob data prefetching. -enable = true -# Number of data prefetching working threads, valid values: 1-1024. -threads = 8 -# The batch size to prefetch data from backend, valid values: 0-0x10000000. -batch_size = 1000000 -# Network bandwidth rate limit in unit of Bytes and Zero means no limit. -bandwidth_limit = 10000000 +# Configuration file for Nydus Image Service + +type = "bootstrap" +id = "image1" +domain_id = "domain1" + +# Configuration file format version number, must be 2. +[config_v2] +version = 2 +# Identifier for the instance. +id = "my_id" +# Optional file path for metadata blobs, for BlobCacheEntry only. +metadata_path = "/path/to/rafs/meta/data/blob" + +[config_v2.backend] +# Type of storage backend, valid values: "localfs", "oss", "registry" +type = "localfs" + +[config_v2.backend.localfs] +blob_file = "/tmp/nydus.blob.data" +dir = "/tmp" +alt_dirs = ["/var/nydus/cache"] + +[config_v2.backend.oss] +# Oss http scheme, either 'http' or 'https' +scheme = "http" +# Oss endpoint +endpoint = "my_endpoint" +# Oss bucket name +bucket_name = "my_bucket_name" +# Prefix object_prefix to OSS object key, for example the simulation of subdirectory: +object_prefix = "my_object_prefix" +# Oss access key +access_key_id = "my_access_key_id" +# Oss secret +access_key_secret = "my_access_key_secret" +# Skip SSL certificate validation for HTTPS scheme. +skip_verify = true +# Drop the read request once http request timeout, in seconds. +timeout = 10 +# Drop the read request once http connection timeout, in seconds. +connect_timeout = 10 +# Retry count when read request failed. +retry_limit = 5 + +[config_v2.backend.oss.proxy] +# Access remote storage backend via proxy, e.g. Dragonfly dfdaemon server URL. +url = "localhost:6789" +# Proxy health checking endpoint. +ping_url = "localhost:6789/ping" +# Fallback to remote storage backend if proxy ping failed. +fallback = true +# Interval for proxy health checking, in seconds. +check_interval = 5 +# Replace URL to http to request source registry with proxy, and allow fallback to https if the proxy is unhealthy. +use_http = false + +[[config_v2.backend.oss.mirrors]] +# Mirror server URL, for example http://127.0.0.1:65001. +host = "http://127.0.0.1:65001" +# Ping URL to check mirror server health. +ping_url = "http://127.0.0.1:65001/ping" +# HTTP request headers to be passed to mirror server. +# headers = +# Interval for mirror health checking, in seconds. +health_check_interval = 5 +# Maximum number of failures before marking a mirror as unusable. +failure_limit = 5 + +[config_v2.backend.registry] +# Registry http scheme, either 'http' or 'https' +scheme = "https" +# Registry url host +host = "my.registry.com" +# Registry image name, like 'library/ubuntu' +repo = "nydus" +# Base64_encoded(username:password), the field should be sent to registry auth server to get a bearer token. +auth = "base64_encoded" +# Skip SSL certificate validation for HTTPS scheme. +skip_verify = true +# Drop the read request once http request timeout, in seconds. +timeout = 10 +# Drop the read request once http connection timeout, in seconds. +connect_timeout = 10 +# Retry count when read request failed. +retry_limit = 5 +# The field is a bearer token to be sent to registry to authorize registry requests. +registry_token = "bear_token" +# The http scheme to access blobs. +# It is used to workaround some P2P subsystem that requires a different scheme than the registry. +blob_url_scheme = "https" +# Redirect blob access to a different host regardless of the one specified in 'host'. +blob_redirected_host = "redirect.registry.com" + +[config_v2.backend.registry.proxy] +# Access remote storage backend via proxy, e.g. Dragonfly dfdaemon server URL. +url = "localhost:6789" +# Proxy health checking endpoint. +ping_url = "localhost:6789/ping" +# Fallback to remote storage backend if proxy ping failed. +fallback = true +# Interval for proxy health checking, in seconds. +check_interval = 5 +# Replace URL to http to request source registry with proxy, and allow fallback to https if the proxy is unhealthy. +use_http = false + +[[config_v2.backend.registry.mirrors]] +# Mirror server URL, for example http://127.0.0.1:65001. +host = "http://127.0.0.1:65001" +# Ping URL to check mirror server health. +ping_url = "http://127.0.0.1:65001/ping" +# HTTP request headers to be passed to mirror server. +# headers = +# Interval for mirror health checking, in seconds. +health_check_interval = 5 +# Maximum number of failures before marking a mirror as unusable. +failure_limit = 5 + +[config_v2.cache] +# Type of blob cache: "blobcache", "filecache", "fscache", "dummycache" or "" +type = "filecache" +# Whether to cache compressed or uncompressed data. +compressed = true +# Whether to validate data read from the cache. +validate = true + +[config_v2.cache.filecache] +work_dir = "." + +[config_v2.cache.fscache] +work_dir = "." + +[config_v2.cache.prefetch] +# Whether to enable blob data prefetching. +enable = true +# Number of data prefetching working threads, valid values: 1-1024. +threads = 8 +# The batch size to prefetch data from backend, valid values: 0-0x10000000. +batch_size = 1000000 +# Network bandwidth rate limit in unit of Bytes and Zero means no limit. +bandwidth_limit = 10000000 diff --git a/misc/configs/nydusd-boostrap-blob-cache-entry.json b/misc/configs/nydusd-boostrap-blob-cache-entry.json index 3c0973bb717..132216a924a 100644 --- a/misc/configs/nydusd-boostrap-blob-cache-entry.json +++ b/misc/configs/nydusd-boostrap-blob-cache-entry.json @@ -1,17 +1,17 @@ -{ - "type": "bootstrap", - "id": "bootstrap1", - "domain_id": "userid1", - "config": { - "id": "factory1", - "backend_type": "localfs", - "backend_config": { - "dir": "/tmp/nydus" - }, - "cache_type": "fscache", - "cache_config": { - "work_dir": "/tmp/nydus" - }, - "metadata_file": "/tmp/nydus/bootstrap1" - } -} +{ + "type": "bootstrap", + "id": "bootstrap1", + "domain_id": "userid1", + "config": { + "id": "factory1", + "backend_type": "localfs", + "backend_config": { + "dir": "/tmp/nydus" + }, + "cache_type": "fscache", + "cache_config": { + "work_dir": "/tmp/nydus" + }, + "metadata_file": "/tmp/nydus/bootstrap1" + } +} diff --git a/misc/configs/nydusd-config-v2.toml b/misc/configs/nydusd-config-v2.toml index 30e8bffca64..396227c13af 100644 --- a/misc/configs/nydusd-config-v2.toml +++ b/misc/configs/nydusd-config-v2.toml @@ -1,170 +1,170 @@ -# Configuration file for Nydus Image Service - -# Configuration file format version number, must be 2. -version = 2 -# Identifier for the instance. -id = "my_id" - -[backend] -# Type of storage backend, valid values: "localfs", "oss", "registry" -type = "localfs" - -[backend.localfs] -blob_file = "/tmp/nydus.blob.data" -dir = "/tmp" -alt_dirs = ["/var/nydus/cache"] - -[backend.oss] -# Oss http scheme, either 'http' or 'https' -scheme = "http" -# Oss endpoint -endpoint = "my_endpoint" -# Oss bucket name -bucket_name = "my_bucket_name" -# Prefix object_prefix to OSS object key, for example the simulation of subdirectory: -object_prefix = "my_object_prefix" -# Oss access key -access_key_id = "my_access_key_id" -# Oss secret -access_key_secret = "my_access_key_secret" -# Skip SSL certificate validation for HTTPS scheme. -skip_verify = true -# Drop the read request once http request timeout, in seconds. -timeout = 10 -# Drop the read request once http connection timeout, in seconds. -connect_timeout = 10 -# Retry count when read request failed. -retry_limit = 5 - -[backend.oss.proxy] -# Access remote storage backend via proxy, e.g. Dragonfly dfdaemon server URL. -url = "localhost:6789" -# Proxy health checking endpoint. -ping_url = "localhost:6789/ping" -# Fallback to remote storage backend if proxy ping failed. -fallback = true -# Interval for proxy health checking, in seconds. -check_interval = 5 -# Replace URL to http to request source registry with proxy, and allow fallback to https if the proxy is unhealthy. -use_http = false - -[[backend.oss.mirrors]] -# Mirror server URL, for example http://127.0.0.1:65001. -host = "http://127.0.0.1:65001" -# Ping URL to check mirror server health. -ping_url = "http://127.0.0.1:65001/ping" -# HTTP request headers to be passed to mirror server. -# headers = -# Interval for mirror health checking, in seconds. -health_check_interval = 5 -# Maximum number of failures before marking a mirror as unusable. -failure_limit = 5 - -[backend.registy] -# Registry http scheme, either 'http' or 'https' -scheme = "https" -# Registry url host -host = "my.registry.com" -# Registry image name, like 'library/ubuntu' -repo = "nydus" -# Base64_encoded(username:password), the field should be sent to registry auth server to get a bearer token. -auth = "base64_encoded" -# Skip SSL certificate validation for HTTPS scheme. -skip_verify = true -# Drop the read request once http request timeout, in seconds. -timeout = 10 -# Drop the read request once http connection timeout, in seconds. -connect_timeout = 10 -# Retry count when read request failed. -retry_limit = 5 -# The field is a bearer token to be sent to registry to authorize registry requests. -registry_token = "bear_token" -# The http scheme to access blobs. -# It is used to workaround some P2P subsystem that requires a different scheme than the registry. -blob_url_scheme = "https" -# Redirect blob access to a different host regardless of the one specified in 'host'. -blob_redirected_host = "redirect.registry.com" - -[backend.registry.proxy] -# Access remote storage backend via proxy, e.g. Dragonfly dfdaemon server URL. -url = "localhost:6789" -# Proxy health checking endpoint. -ping_url = "localhost:6789/ping" -# Fallback to remote storage backend if proxy ping failed. -fallback = true -# Interval for proxy health checking, in seconds. -check_interval = 5 -# Replace URL to http to request source registry with proxy, and allow fallback to https if the proxy is unhealthy. -use_http = false - -[[backend.registry.mirrors]] -# Mirror server URL, for example http://127.0.0.1:65001. -host = "http://127.0.0.1:65001" -# Ping URL to check mirror server health. -ping_url = "http://127.0.0.1:65001/ping" -# HTTP request headers to be passed to mirror server. -# headers = -# Interval for mirror health checking, in seconds. -health_check_interval = 5 -# Maximum number of failures before marking a mirror as unusable. -failure_limit = 5 - -[cache] -# Type of blob cache: "blobcache", "filecache", "fscache", "dummycache" or "" -type = "filecache" -# Whether to cache compressed or uncompressed data. -compressed = true -# Whether to validate data read from the cache. -validate = true -# Enable encryption data written to the cache file. -enable_encryption = true -# Enable convergent encryption for chunk deduplication. -enable_convergent_encryption = true -# Key for data encryption, a heximal representation of [u8; 32]. -encryption_key = "fc4a7db5614afc2f400e9478bebed1aefdbc9d7cd03210b84f144683a7a6fd1a" - -[cache.filecache] -work_dir = "." - -[cache.fscache] -work_dir = "." - -[cache.prefetch] -# Whether to enable blob data prefetching. -enable = true -# Number of data prefetching working threads, valid values: 1-1024. -threads = 8 -# The batch size to prefetch data from backend, valid values: 0-0x10000000. -batch_size = 1000000 -# Network bandwidth rate limit in unit of Bytes and Zero means no limit. -bandwidth_limit = 10000000 - -[rafs] -# Filesystem metadata cache mode, "direct" or "cached". "direct" is almost what you want. -mode = "direct" -# Amplified user IO request batch size to read data from remote storage backend / local cache, -# valid values: 0-0x10000000 -batch_size = 1000000 -# Whether to validate data digest. -validate = true -# Enable support of extended attributes. -enable_xattr = true -# Enable statistics for file IOs. -iostats_files = false -# Record filesystem access pattern. -access_pattern = false -# Record file name if file access trace log. -latest_read_files = false - -[rafs.prefetch] -# Whether to enable RAFS filesystem layer prefetching. -enable = true -# Number of data prefetching working threads, valid values: 1-1024. -threads = 8 -# The batch size to prefetch data from backend, valid values: 0-0x10000000. -batch_size = 1000000 -# Network bandwidth rate limit in unit of Bytes and Zero means no limit. -bandwidth_limit = 10000000 -# Prefetch all data from backend. -prefetch_all = true - +# Configuration file for Nydus Image Service + +# Configuration file format version number, must be 2. +version = 2 +# Identifier for the instance. +id = "my_id" + +[backend] +# Type of storage backend, valid values: "localfs", "oss", "registry" +type = "localfs" + +[backend.localfs] +blob_file = "/tmp/nydus.blob.data" +dir = "/tmp" +alt_dirs = ["/var/nydus/cache"] + +[backend.oss] +# Oss http scheme, either 'http' or 'https' +scheme = "http" +# Oss endpoint +endpoint = "my_endpoint" +# Oss bucket name +bucket_name = "my_bucket_name" +# Prefix object_prefix to OSS object key, for example the simulation of subdirectory: +object_prefix = "my_object_prefix" +# Oss access key +access_key_id = "my_access_key_id" +# Oss secret +access_key_secret = "my_access_key_secret" +# Skip SSL certificate validation for HTTPS scheme. +skip_verify = true +# Drop the read request once http request timeout, in seconds. +timeout = 10 +# Drop the read request once http connection timeout, in seconds. +connect_timeout = 10 +# Retry count when read request failed. +retry_limit = 5 + +[backend.oss.proxy] +# Access remote storage backend via proxy, e.g. Dragonfly dfdaemon server URL. +url = "localhost:6789" +# Proxy health checking endpoint. +ping_url = "localhost:6789/ping" +# Fallback to remote storage backend if proxy ping failed. +fallback = true +# Interval for proxy health checking, in seconds. +check_interval = 5 +# Replace URL to http to request source registry with proxy, and allow fallback to https if the proxy is unhealthy. +use_http = false + +[[backend.oss.mirrors]] +# Mirror server URL, for example http://127.0.0.1:65001. +host = "http://127.0.0.1:65001" +# Ping URL to check mirror server health. +ping_url = "http://127.0.0.1:65001/ping" +# HTTP request headers to be passed to mirror server. +# headers = +# Interval for mirror health checking, in seconds. +health_check_interval = 5 +# Maximum number of failures before marking a mirror as unusable. +failure_limit = 5 + +[backend.registy] +# Registry http scheme, either 'http' or 'https' +scheme = "https" +# Registry url host +host = "my.registry.com" +# Registry image name, like 'library/ubuntu' +repo = "nydus" +# Base64_encoded(username:password), the field should be sent to registry auth server to get a bearer token. +auth = "base64_encoded" +# Skip SSL certificate validation for HTTPS scheme. +skip_verify = true +# Drop the read request once http request timeout, in seconds. +timeout = 10 +# Drop the read request once http connection timeout, in seconds. +connect_timeout = 10 +# Retry count when read request failed. +retry_limit = 5 +# The field is a bearer token to be sent to registry to authorize registry requests. +registry_token = "bear_token" +# The http scheme to access blobs. +# It is used to workaround some P2P subsystem that requires a different scheme than the registry. +blob_url_scheme = "https" +# Redirect blob access to a different host regardless of the one specified in 'host'. +blob_redirected_host = "redirect.registry.com" + +[backend.registry.proxy] +# Access remote storage backend via proxy, e.g. Dragonfly dfdaemon server URL. +url = "localhost:6789" +# Proxy health checking endpoint. +ping_url = "localhost:6789/ping" +# Fallback to remote storage backend if proxy ping failed. +fallback = true +# Interval for proxy health checking, in seconds. +check_interval = 5 +# Replace URL to http to request source registry with proxy, and allow fallback to https if the proxy is unhealthy. +use_http = false + +[[backend.registry.mirrors]] +# Mirror server URL, for example http://127.0.0.1:65001. +host = "http://127.0.0.1:65001" +# Ping URL to check mirror server health. +ping_url = "http://127.0.0.1:65001/ping" +# HTTP request headers to be passed to mirror server. +# headers = +# Interval for mirror health checking, in seconds. +health_check_interval = 5 +# Maximum number of failures before marking a mirror as unusable. +failure_limit = 5 + +[cache] +# Type of blob cache: "blobcache", "filecache", "fscache", "dummycache" or "" +type = "filecache" +# Whether to cache compressed or uncompressed data. +compressed = true +# Whether to validate data read from the cache. +validate = true +# Enable encryption data written to the cache file. +enable_encryption = true +# Enable convergent encryption for chunk deduplication. +enable_convergent_encryption = true +# Key for data encryption, a heximal representation of [u8; 32]. +encryption_key = "fc4a7db5614afc2f400e9478bebed1aefdbc9d7cd03210b84f144683a7a6fd1a" + +[cache.filecache] +work_dir = "." + +[cache.fscache] +work_dir = "." + +[cache.prefetch] +# Whether to enable blob data prefetching. +enable = true +# Number of data prefetching working threads, valid values: 1-1024. +threads = 8 +# The batch size to prefetch data from backend, valid values: 0-0x10000000. +batch_size = 1000000 +# Network bandwidth rate limit in unit of Bytes and Zero means no limit. +bandwidth_limit = 10000000 + +[rafs] +# Filesystem metadata cache mode, "direct" or "cached". "direct" is almost what you want. +mode = "direct" +# Amplified user IO request batch size to read data from remote storage backend / local cache, +# valid values: 0-0x10000000 +batch_size = 1000000 +# Whether to validate data digest. +validate = true +# Enable support of extended attributes. +enable_xattr = true +# Enable statistics for file IOs. +iostats_files = false +# Record filesystem access pattern. +access_pattern = false +# Record file name if file access trace log. +latest_read_files = false + +[rafs.prefetch] +# Whether to enable RAFS filesystem layer prefetching. +enable = true +# Number of data prefetching working threads, valid values: 1-1024. +threads = 8 +# The batch size to prefetch data from backend, valid values: 0-0x10000000. +batch_size = 1000000 +# Network bandwidth rate limit in unit of Bytes and Zero means no limit. +bandwidth_limit = 10000000 +# Prefetch all data from backend. +prefetch_all = true + diff --git a/misc/configs/nydusd-config.json b/misc/configs/nydusd-config.json index 044a0e91bc9..b37bdd6f475 100644 --- a/misc/configs/nydusd-config.json +++ b/misc/configs/nydusd-config.json @@ -1,28 +1,28 @@ -{ - "device": { - "backend": { - "type": "registry", - "config": { - "timeout": 5, - "connect_timeout": 5, - "retry_limit": 2 - } - }, - "cache": { - "type": "blobcache", - "config": { - "work_dir": "/var/lib/nydus/cache" - } - } - }, - "mode": "direct", - "digest_validate": false, - "iostats_files": false, - "enable_xattr": true, - "fs_prefetch": { - "enable": true, - "threads_count": 8, - "merging_size": 1048576, - "prefetch_all": true - } +{ + "device": { + "backend": { + "type": "registry", + "config": { + "timeout": 5, + "connect_timeout": 5, + "retry_limit": 2 + } + }, + "cache": { + "type": "blobcache", + "config": { + "work_dir": "/var/lib/nydus/cache" + } + } + }, + "mode": "direct", + "digest_validate": false, + "iostats_files": false, + "enable_xattr": true, + "fs_prefetch": { + "enable": true, + "threads_count": 8, + "merging_size": 1048576, + "prefetch_all": true + } } \ No newline at end of file diff --git a/misc/fscache/setup.sh b/misc/fscache/setup.sh index da312b53ece..f1b74b9be3a 100644 --- a/misc/fscache/setup.sh +++ b/misc/fscache/setup.sh @@ -1,17 +1,17 @@ -#!/bin/bash - -# This script should be executed in root mode! - -apt update -apt install -y cachefilesd -apt list --installed | grep cachefilesd -chmod a+w /etc/default/cachefilesd -sed -i 's/#RUN=yes/RUN=yes/' /etc/default/cachefilesd -cat /etc/default/cachefilesd -/sbin/modprobe -qab cachefiles -/sbin/cachefilesd -f /etc/cachefilesd.conf -systemctl status cachefilesd -[ -c /dev/cachefiles ] && echo "cachefilesd is successfully enabled" -pid=$(lsof /dev/cachefiles | awk '{if (NR>1) {print $2}}') -kill -9 $pid -echo "/dev/cachefiles is available now" +#!/bin/bash + +# This script should be executed in root mode! + +apt update +apt install -y cachefilesd +apt list --installed | grep cachefilesd +chmod a+w /etc/default/cachefilesd +sed -i 's/#RUN=yes/RUN=yes/' /etc/default/cachefilesd +cat /etc/default/cachefilesd +/sbin/modprobe -qab cachefiles +/sbin/cachefilesd -f /etc/cachefilesd.conf +systemctl status cachefilesd +[ -c /dev/cachefiles ] && echo "cachefilesd is successfully enabled" +pid=$(lsof /dev/cachefiles | awk '{if (NR>1) {print $2}}') +kill -9 $pid +echo "/dev/cachefiles is available now" diff --git a/misc/musl-static/Dockerfile b/misc/musl-static/Dockerfile index 5109c9634d0..d13cadec2f1 100644 --- a/misc/musl-static/Dockerfile +++ b/misc/musl-static/Dockerfile @@ -1,12 +1,12 @@ -FROM clux/muslrust:1.72.1 - -ARG RUST_TARGET=x86_64-unknown-linux-musl - -RUN apt update && apt install -y cmake - -WORKDIR /nydus-rs - -CMD rustup component add clippy && \ - rustup component add rustfmt && \ - rustup target add $RUST_TARGET && \ - make static-release +FROM clux/muslrust:1.72.1 + +ARG RUST_TARGET=x86_64-unknown-linux-musl + +RUN apt update && apt install -y cmake + +WORKDIR /nydus-rs + +CMD rustup component add clippy && \ + rustup component add rustfmt && \ + rustup target add $RUST_TARGET && \ + make static-release diff --git a/misc/performance/containerd_config.toml b/misc/performance/containerd_config.toml index 129a0e6a539..5de6d72a1d0 100644 --- a/misc/performance/containerd_config.toml +++ b/misc/performance/containerd_config.toml @@ -1,17 +1,17 @@ -version = 2 -root = "/var/lib/containerd" -state = "/run/containerd" -oom_score = 0 - -[debug] - level = "debug" - -[plugins."io.containerd.grpc.v1.cri".containerd] - snapshotter = "nydus" - disable_snapshot_annotations = false - discard_unpacked_layers = false - -[proxy_plugins] - [proxy_plugins.nydus] - type = "snapshot" - address = "/run/containerd-nydus/containerd-nydus-grpc.sock" +version = 2 +root = "/var/lib/containerd" +state = "/run/containerd" +oom_score = 0 + +[debug] + level = "debug" + +[plugins."io.containerd.grpc.v1.cri".containerd] + snapshotter = "nydus" + disable_snapshot_annotations = false + discard_unpacked_layers = false + +[proxy_plugins] + [proxy_plugins.nydus] + type = "snapshot" + address = "/run/containerd-nydus/containerd-nydus-grpc.sock" diff --git a/misc/performance/nydus-snapshotter.service b/misc/performance/nydus-snapshotter.service index 3abdb555986..204d6b72f50 100644 --- a/misc/performance/nydus-snapshotter.service +++ b/misc/performance/nydus-snapshotter.service @@ -1,18 +1,18 @@ -[Unit] -Description=nydus snapshotter -After=network.target -Before=containerd.service - -[Service] -Type=simple -Environment=HOME=/root -ExecStart=/usr/local/bin/containerd-nydus-grpc --config /etc/nydus/config.toml -Restart=always -RestartSec=1 -KillMode=process -OOMScoreAdjust=-999 -StandardOutput=journal -StandardError=journal - -[Install] -WantedBy=multi-user.target +[Unit] +Description=nydus snapshotter +After=network.target +Before=containerd.service + +[Service] +Type=simple +Environment=HOME=/root +ExecStart=/usr/local/bin/containerd-nydus-grpc --config /etc/nydus/config.toml +Restart=always +RestartSec=1 +KillMode=process +OOMScoreAdjust=-999 +StandardOutput=journal +StandardError=journal + +[Install] +WantedBy=multi-user.target diff --git a/misc/performance/nydusd_config.json b/misc/performance/nydusd_config.json index 1f7fd38927b..040d8708cee 100644 --- a/misc/performance/nydusd_config.json +++ b/misc/performance/nydusd_config.json @@ -1,30 +1,30 @@ -{ - "device": { - "backend": { - "type": "registry", - "config": { - "scheme": "http", - "host": "localhost:5077", - "skip_verify": true, - "timeout": 5, - "connect_timeout": 5, - "retry_limit": 4 - } - }, - "cache": { - "type": "blobcache", - "config": { - "work_dir": "/var/lib/containerd-nydus/cache" - } - } - }, - "mode": "direct", - "digest_validate": false, - "enable_xattr": true, - "iostats_files": false, - "access_pattern": false, - "latest_read_files": false, - "fs_prefetch": { - "enable": false - } -} +{ + "device": { + "backend": { + "type": "registry", + "config": { + "scheme": "http", + "host": "localhost:5077", + "skip_verify": true, + "timeout": 5, + "connect_timeout": 5, + "retry_limit": 4 + } + }, + "cache": { + "type": "blobcache", + "config": { + "work_dir": "/var/lib/containerd-nydus/cache" + } + } + }, + "mode": "direct", + "digest_validate": false, + "enable_xattr": true, + "iostats_files": false, + "access_pattern": false, + "latest_read_files": false, + "fs_prefetch": { + "enable": false + } +} diff --git a/misc/performance/snapshotter_config.toml b/misc/performance/snapshotter_config.toml index e8d3c118d9c..68522ab5405 100644 --- a/misc/performance/snapshotter_config.toml +++ b/misc/performance/snapshotter_config.toml @@ -1,132 +1,132 @@ -version = 1 -# Snapshotter's own home directory where it stores and creates necessary resources -root = "/var/lib/containerd-nydus" -# The snapshotter's GRPC server socket, containerd will connect to plugin on this socket -address = "/run/containerd-nydus/containerd-nydus-grpc.sock" -daemon_mode = "dedicated" -# Whether snapshotter should try to clean up resources when it is closed -cleanup_on_close = false - -[system] -# Snapshotter's debug and trace HTTP server interface -enable = true -# Unix domain socket path where system controller is listening on -address = "/run/containerd-nydus/system.sock" - -[system.debug] -# Snapshotter can profile the CPU utilization of each nydusd daemon when it is being started. -# This option specifies the profile duration when nydusd is downloading and uncomproessing data. -daemon_cpu_profile_duration_secs = 5 -# Enable by assigning an address, empty indicates pprof server is disabled -pprof_address = "" - -[daemon] -# Specify a configuration file for nydusd -nydusd_config = "/etc/nydus/nydusd-config.fusedev.json" -nydusd_path = "/usr/local/bin/nydusd" -nydusimage_path = "/usr/local/bin/nydus-image" -# fusedev or fscache -fs_driver = "fusedev" -# How to process when daemon dies: "none", "restart" or "failover" -recover_policy = "restart" -# Nydusd worker thread number to handle FUSE or fscache requests, [0-1024]. -# Setting to 0 will use the default configuration of nydusd. -threads_number = 4 -# Log rotation size for nydusd, in unit MB(megabytes) -log_rotation_size = 100 - -[cgroup] -# Whether to use separate cgroup for nydusd. -enable = true -# The memory limit for nydusd cgroup, which contains all nydusd processes. -# Percentage is supported as well, please ensure it is end with "%". -# The default unit is bytes. Acceptable values include "209715200", "200MiB", "200Mi" and "10%". -memory_limit = "" - -[log] -# Print logs to stdout rather than logging files -log_to_stdout = false -# Snapshotter's log level -level = "info" -log_rotation_compress = true -log_rotation_local_time = true -# Max number of days to retain logs -log_rotation_max_age = 7 -log_rotation_max_backups = 5 -# In unit MB(megabytes) -log_rotation_max_size = 100 - -[metrics] -# Enable by assigning an address, empty indicates metrics server is disabled -address = ":9110" - -[remote] -convert_vpc_registry = false - -[remote.mirrors_config] -# Snapshotter will overwrite daemon's mirrors configuration -# if the values loaded from this driectory are not null before starting a daemon. -# Set to "" or an empty directory to disable it. -#dir = "/etc/nydus/certs.d" - -[remote.auth] -# Fetch the private registry auth by listening to K8s API server -enable_kubeconfig_keychain = false -# synchronize `kubernetes.io/dockerconfigjson` secret from kubernetes API server with specified kubeconfig (default `$KUBECONFIG` or `~/.kube/config`) -kubeconfig_path = "" -# Fetch the private registry auth as CRI image service proxy -enable_cri_keychain = false -# the target image service when using image proxy -#image_service_address = "/run/containerd/containerd.sock" - -[snapshot] -# Let containerd use nydus-overlayfs mount helper -enable_nydus_overlayfs = false -# Insert Kata Virtual Volume option to `Mount.Options` -enable_kata_volume = false -# Whether to remove resources when a snapshot is removed -sync_remove = false - -[cache_manager] -# Disable or enable recyclebin -disable = false -# How long to keep deleted files in recyclebin -gc_period = "24h" -# Directory to host cached files -cache_dir = "" - -[image] -public_key_file = "" -validate_signature = false - -# The configuraions for features that are not production ready -[experimental] -# Whether to enable stargz support -enable_stargz = false -# Whether to enable referrers support -# The option enables trying to fetch the Nydus image associated with the OCI image and run it. -# Also see https://github.com/opencontainers/distribution-spec/blob/main/spec.md#listing-referrers -enable_referrer_detect = false -# Whether to enable authentication support -# The option enables nydus snapshot to provide backend information to nydusd. -enable_backend_source = false -[experimental.tarfs] -# Whether to enable nydus tarfs mode. Tarfs is supported by: -# - The EROFS filesystem driver since Linux 6.4 -# - Nydus Image Service release v2.3 -enable_tarfs = false -# Mount rafs on host by loopdev and EROFS -mount_tarfs_on_host = false -# Only enable nydus tarfs mode for images with `tarfs hint` label when true -tarfs_hint = false -# Maximum of concurrence to converting OCIv1 images to tarfs, 0 means default -max_concurrent_proc = 0 -# Mode to export tarfs images: -# - "none" or "": do not export tarfs -# - "layer_verity_only": only generate disk verity information for a layer blob -# - "image_verity_only": only generate disk verity information for all blobs of an image -# - "layer_block": generate a raw block disk image with tarfs for a layer -# - "image_block": generate a raw block disk image with tarfs for an image -# - "layer_block_with_verity": generate a raw block disk image with tarfs for a layer with dm-verity info -# - "image_block_with_verity": generate a raw block disk image with tarfs for an image with dm-verity info -export_mode = "" +version = 1 +# Snapshotter's own home directory where it stores and creates necessary resources +root = "/var/lib/containerd-nydus" +# The snapshotter's GRPC server socket, containerd will connect to plugin on this socket +address = "/run/containerd-nydus/containerd-nydus-grpc.sock" +daemon_mode = "dedicated" +# Whether snapshotter should try to clean up resources when it is closed +cleanup_on_close = false + +[system] +# Snapshotter's debug and trace HTTP server interface +enable = true +# Unix domain socket path where system controller is listening on +address = "/run/containerd-nydus/system.sock" + +[system.debug] +# Snapshotter can profile the CPU utilization of each nydusd daemon when it is being started. +# This option specifies the profile duration when nydusd is downloading and uncomproessing data. +daemon_cpu_profile_duration_secs = 5 +# Enable by assigning an address, empty indicates pprof server is disabled +pprof_address = "" + +[daemon] +# Specify a configuration file for nydusd +nydusd_config = "/etc/nydus/nydusd-config.fusedev.json" +nydusd_path = "/usr/local/bin/nydusd" +nydusimage_path = "/usr/local/bin/nydus-image" +# fusedev or fscache +fs_driver = "fusedev" +# How to process when daemon dies: "none", "restart" or "failover" +recover_policy = "restart" +# Nydusd worker thread number to handle FUSE or fscache requests, [0-1024]. +# Setting to 0 will use the default configuration of nydusd. +threads_number = 4 +# Log rotation size for nydusd, in unit MB(megabytes) +log_rotation_size = 100 + +[cgroup] +# Whether to use separate cgroup for nydusd. +enable = true +# The memory limit for nydusd cgroup, which contains all nydusd processes. +# Percentage is supported as well, please ensure it is end with "%". +# The default unit is bytes. Acceptable values include "209715200", "200MiB", "200Mi" and "10%". +memory_limit = "" + +[log] +# Print logs to stdout rather than logging files +log_to_stdout = false +# Snapshotter's log level +level = "info" +log_rotation_compress = true +log_rotation_local_time = true +# Max number of days to retain logs +log_rotation_max_age = 7 +log_rotation_max_backups = 5 +# In unit MB(megabytes) +log_rotation_max_size = 100 + +[metrics] +# Enable by assigning an address, empty indicates metrics server is disabled +address = ":9110" + +[remote] +convert_vpc_registry = false + +[remote.mirrors_config] +# Snapshotter will overwrite daemon's mirrors configuration +# if the values loaded from this driectory are not null before starting a daemon. +# Set to "" or an empty directory to disable it. +#dir = "/etc/nydus/certs.d" + +[remote.auth] +# Fetch the private registry auth by listening to K8s API server +enable_kubeconfig_keychain = false +# synchronize `kubernetes.io/dockerconfigjson` secret from kubernetes API server with specified kubeconfig (default `$KUBECONFIG` or `~/.kube/config`) +kubeconfig_path = "" +# Fetch the private registry auth as CRI image service proxy +enable_cri_keychain = false +# the target image service when using image proxy +#image_service_address = "/run/containerd/containerd.sock" + +[snapshot] +# Let containerd use nydus-overlayfs mount helper +enable_nydus_overlayfs = false +# Insert Kata Virtual Volume option to `Mount.Options` +enable_kata_volume = false +# Whether to remove resources when a snapshot is removed +sync_remove = false + +[cache_manager] +# Disable or enable recyclebin +disable = false +# How long to keep deleted files in recyclebin +gc_period = "24h" +# Directory to host cached files +cache_dir = "" + +[image] +public_key_file = "" +validate_signature = false + +# The configuraions for features that are not production ready +[experimental] +# Whether to enable stargz support +enable_stargz = false +# Whether to enable referrers support +# The option enables trying to fetch the Nydus image associated with the OCI image and run it. +# Also see https://github.com/opencontainers/distribution-spec/blob/main/spec.md#listing-referrers +enable_referrer_detect = false +# Whether to enable authentication support +# The option enables nydus snapshot to provide backend information to nydusd. +enable_backend_source = false +[experimental.tarfs] +# Whether to enable nydus tarfs mode. Tarfs is supported by: +# - The EROFS filesystem driver since Linux 6.4 +# - Nydus Image Service release v2.3 +enable_tarfs = false +# Mount rafs on host by loopdev and EROFS +mount_tarfs_on_host = false +# Only enable nydus tarfs mode for images with `tarfs hint` label when true +tarfs_hint = false +# Maximum of concurrence to converting OCIv1 images to tarfs, 0 means default +max_concurrent_proc = 0 +# Mode to export tarfs images: +# - "none" or "": do not export tarfs +# - "layer_verity_only": only generate disk verity information for a layer blob +# - "image_verity_only": only generate disk verity information for all blobs of an image +# - "layer_block": generate a raw block disk image with tarfs for a layer +# - "image_block": generate a raw block disk image with tarfs for an image +# - "layer_block_with_verity": generate a raw block disk image with tarfs for a layer with dm-verity info +# - "image_block_with_verity": generate a raw block disk image with tarfs for an image with dm-verity info +export_mode = "" diff --git a/misc/prepare.sh b/misc/prepare.sh index 80d70e74c4f..355e2cbce40 100644 --- a/misc/prepare.sh +++ b/misc/prepare.sh @@ -1,28 +1,28 @@ -#!/bin/bash - -SNAPSHOTTER_CONFIG="misc/performance/snapshotter_config.toml" -if [ "$1" == "takeover_test" ]; then - SNAPSHOTTER_CONFIG="misc/takeover/snapshotter_config.toml" -fi - -readonly SNAPSHOTTER_VERSION=0.13.13 -readonly NERDCTL_VERSION=1.7.6 -readonly CNI_PLUGINS_VERSION=1.5.0 - -# setup nerdctl and nydusd env -sudo install -D -m 755 contrib/nydusify/cmd/nydusify /usr/local/bin -sudo install -D -m 755 target/release/nydusd target/release/nydus-image /usr/local/bin -wget https://github.com/containerd/nydus-snapshotter/releases/download/v$SNAPSHOTTER_VERSION/nydus-snapshotter-v$SNAPSHOTTER_VERSION-linux-amd64.tar.gz -tar zxvf nydus-snapshotter-v$SNAPSHOTTER_VERSION-linux-amd64.tar.gz -sudo install -D -m 755 bin/containerd-nydus-grpc /usr/local/bin -sudo wget https://github.com/containerd/nerdctl/releases/download/v$NERDCTL_VERSION/nerdctl-$NERDCTL_VERSION-linux-amd64.tar.gz -sudo tar -xzvf nerdctl-$NERDCTL_VERSION-linux-amd64.tar.gz -C /usr/local/bin -sudo mkdir -p /opt/cni/bin -sudo wget https://github.com/containernetworking/plugins/releases/download/v$CNI_PLUGINS_VERSION/cni-plugins-linux-amd64-v$CNI_PLUGINS_VERSION.tgz -sudo tar -xzvf cni-plugins-linux-amd64-v$CNI_PLUGINS_VERSION.tgz -C /opt/cni/bin -sudo install -D misc/performance/containerd_config.toml /etc/containerd/config.toml -sudo systemctl restart containerd -sudo install -D misc/performance/nydusd_config.json /etc/nydus/nydusd-config.fusedev.json -sudo install -D $SNAPSHOTTER_CONFIG /etc/nydus/config.toml -sudo install -D misc/performance/nydus-snapshotter.service /etc/systemd/system/nydus-snapshotter.service -sudo systemctl start nydus-snapshotter +#!/bin/bash + +SNAPSHOTTER_CONFIG="misc/performance/snapshotter_config.toml" +if [ "$1" == "takeover_test" ]; then + SNAPSHOTTER_CONFIG="misc/takeover/snapshotter_config.toml" +fi + +readonly SNAPSHOTTER_VERSION=0.13.13 +readonly NERDCTL_VERSION=1.7.6 +readonly CNI_PLUGINS_VERSION=1.5.0 + +# setup nerdctl and nydusd env +sudo install -D -m 755 contrib/nydusify/cmd/nydusify /usr/local/bin +sudo install -D -m 755 target/release/nydusd target/release/nydus-image /usr/local/bin +wget https://github.com/containerd/nydus-snapshotter/releases/download/v$SNAPSHOTTER_VERSION/nydus-snapshotter-v$SNAPSHOTTER_VERSION-linux-amd64.tar.gz +tar zxvf nydus-snapshotter-v$SNAPSHOTTER_VERSION-linux-amd64.tar.gz +sudo install -D -m 755 bin/containerd-nydus-grpc /usr/local/bin +sudo wget https://github.com/containerd/nerdctl/releases/download/v$NERDCTL_VERSION/nerdctl-$NERDCTL_VERSION-linux-amd64.tar.gz +sudo tar -xzvf nerdctl-$NERDCTL_VERSION-linux-amd64.tar.gz -C /usr/local/bin +sudo mkdir -p /opt/cni/bin +sudo wget https://github.com/containernetworking/plugins/releases/download/v$CNI_PLUGINS_VERSION/cni-plugins-linux-amd64-v$CNI_PLUGINS_VERSION.tgz +sudo tar -xzvf cni-plugins-linux-amd64-v$CNI_PLUGINS_VERSION.tgz -C /opt/cni/bin +sudo install -D misc/performance/containerd_config.toml /etc/containerd/config.toml +sudo systemctl restart containerd +sudo install -D misc/performance/nydusd_config.json /etc/nydus/nydusd-config.fusedev.json +sudo install -D $SNAPSHOTTER_CONFIG /etc/nydus/config.toml +sudo install -D misc/performance/nydus-snapshotter.service /etc/systemd/system/nydus-snapshotter.service +sudo systemctl start nydus-snapshotter diff --git a/misc/takeover/snapshotter_config.toml b/misc/takeover/snapshotter_config.toml index 1138d652d87..005b93408cc 100644 --- a/misc/takeover/snapshotter_config.toml +++ b/misc/takeover/snapshotter_config.toml @@ -1,132 +1,132 @@ -version = 1 -# Snapshotter's own home directory where it stores and creates necessary resources -root = "/var/lib/containerd-nydus" -# The snapshotter's GRPC server socket, containerd will connect to plugin on this socket -address = "/run/containerd-nydus/containerd-nydus-grpc.sock" -daemon_mode = "dedicated" -# Whether snapshotter should try to clean up resources when it is closed -cleanup_on_close = false - -[system] -# Snapshotter's debug and trace HTTP server interface -enable = true -# Unix domain socket path where system controller is listening on -address = "/run/containerd-nydus/system.sock" - -[system.debug] -# Snapshotter can profile the CPU utilization of each nydusd daemon when it is being started. -# This option specifies the profile duration when nydusd is downloading and uncomproessing data. -daemon_cpu_profile_duration_secs = 5 -# Enable by assigning an address, empty indicates pprof server is disabled -pprof_address = "" - -[daemon] -# Specify a configuration file for nydusd -nydusd_config = "/etc/nydus/nydusd-config.fusedev.json" -nydusd_path = "/usr/local/bin/nydusd" -nydusimage_path = "/usr/local/bin/nydus-image" -# fusedev or fscache -fs_driver = "fusedev" -# How to process when daemon dies: "none", "restart" or "failover" -recover_policy = "failover" -# Nydusd worker thread number to handle FUSE or fscache requests, [0-1024]. -# Setting to 0 will use the default configuration of nydusd. -threads_number = 4 -# Log rotation size for nydusd, in unit MB(megabytes) -log_rotation_size = 100 - -[cgroup] -# Whether to use separate cgroup for nydusd. -enable = true -# The memory limit for nydusd cgroup, which contains all nydusd processes. -# Percentage is supported as well, please ensure it is end with "%". -# The default unit is bytes. Acceptable values include "209715200", "200MiB", "200Mi" and "10%". -memory_limit = "" - -[log] -# Print logs to stdout rather than logging files -log_to_stdout = false -# Snapshotter's log level -level = "info" -log_rotation_compress = true -log_rotation_local_time = true -# Max number of days to retain logs -log_rotation_max_age = 7 -log_rotation_max_backups = 5 -# In unit MB(megabytes) -log_rotation_max_size = 100 - -[metrics] -# Enable by assigning an address, empty indicates metrics server is disabled -address = ":9110" - -[remote] -convert_vpc_registry = false - -[remote.mirrors_config] -# Snapshotter will overwrite daemon's mirrors configuration -# if the values loaded from this driectory are not null before starting a daemon. -# Set to "" or an empty directory to disable it. -#dir = "/etc/nydus/certs.d" - -[remote.auth] -# Fetch the private registry auth by listening to K8s API server -enable_kubeconfig_keychain = false -# synchronize `kubernetes.io/dockerconfigjson` secret from kubernetes API server with specified kubeconfig (default `$KUBECONFIG` or `~/.kube/config`) -kubeconfig_path = "" -# Fetch the private registry auth as CRI image service proxy -enable_cri_keychain = false -# the target image service when using image proxy -#image_service_address = "/run/containerd/containerd.sock" - -[snapshot] -# Let containerd use nydus-overlayfs mount helper -enable_nydus_overlayfs = false -# Insert Kata Virtual Volume option to `Mount.Options` -enable_kata_volume = false -# Whether to remove resources when a snapshot is removed -sync_remove = false - -[cache_manager] -# Disable or enable recyclebin -disable = false -# How long to keep deleted files in recyclebin -gc_period = "24h" -# Directory to host cached files -cache_dir = "" - -[image] -public_key_file = "" -validate_signature = false - -# The configuraions for features that are not production ready -[experimental] -# Whether to enable stargz support -enable_stargz = false -# Whether to enable referrers support -# The option enables trying to fetch the Nydus image associated with the OCI image and run it. -# Also see https://github.com/opencontainers/distribution-spec/blob/main/spec.md#listing-referrers -enable_referrer_detect = false -# Whether to enable authentication support -# The option enables nydus snapshot to provide backend information to nydusd. -enable_backend_source = false -[experimental.tarfs] -# Whether to enable nydus tarfs mode. Tarfs is supported by: -# - The EROFS filesystem driver since Linux 6.4 -# - Nydus Image Service release v2.3 -enable_tarfs = false -# Mount rafs on host by loopdev and EROFS -mount_tarfs_on_host = false -# Only enable nydus tarfs mode for images with `tarfs hint` label when true -tarfs_hint = false -# Maximum of concurrence to converting OCIv1 images to tarfs, 0 means default -max_concurrent_proc = 0 -# Mode to export tarfs images: -# - "none" or "": do not export tarfs -# - "layer_verity_only": only generate disk verity information for a layer blob -# - "image_verity_only": only generate disk verity information for all blobs of an image -# - "layer_block": generate a raw block disk image with tarfs for a layer -# - "image_block": generate a raw block disk image with tarfs for an image -# - "layer_block_with_verity": generate a raw block disk image with tarfs for a layer with dm-verity info -# - "image_block_with_verity": generate a raw block disk image with tarfs for an image with dm-verity info -export_mode = "" +version = 1 +# Snapshotter's own home directory where it stores and creates necessary resources +root = "/var/lib/containerd-nydus" +# The snapshotter's GRPC server socket, containerd will connect to plugin on this socket +address = "/run/containerd-nydus/containerd-nydus-grpc.sock" +daemon_mode = "dedicated" +# Whether snapshotter should try to clean up resources when it is closed +cleanup_on_close = false + +[system] +# Snapshotter's debug and trace HTTP server interface +enable = true +# Unix domain socket path where system controller is listening on +address = "/run/containerd-nydus/system.sock" + +[system.debug] +# Snapshotter can profile the CPU utilization of each nydusd daemon when it is being started. +# This option specifies the profile duration when nydusd is downloading and uncomproessing data. +daemon_cpu_profile_duration_secs = 5 +# Enable by assigning an address, empty indicates pprof server is disabled +pprof_address = "" + +[daemon] +# Specify a configuration file for nydusd +nydusd_config = "/etc/nydus/nydusd-config.fusedev.json" +nydusd_path = "/usr/local/bin/nydusd" +nydusimage_path = "/usr/local/bin/nydus-image" +# fusedev or fscache +fs_driver = "fusedev" +# How to process when daemon dies: "none", "restart" or "failover" +recover_policy = "failover" +# Nydusd worker thread number to handle FUSE or fscache requests, [0-1024]. +# Setting to 0 will use the default configuration of nydusd. +threads_number = 4 +# Log rotation size for nydusd, in unit MB(megabytes) +log_rotation_size = 100 + +[cgroup] +# Whether to use separate cgroup for nydusd. +enable = true +# The memory limit for nydusd cgroup, which contains all nydusd processes. +# Percentage is supported as well, please ensure it is end with "%". +# The default unit is bytes. Acceptable values include "209715200", "200MiB", "200Mi" and "10%". +memory_limit = "" + +[log] +# Print logs to stdout rather than logging files +log_to_stdout = false +# Snapshotter's log level +level = "info" +log_rotation_compress = true +log_rotation_local_time = true +# Max number of days to retain logs +log_rotation_max_age = 7 +log_rotation_max_backups = 5 +# In unit MB(megabytes) +log_rotation_max_size = 100 + +[metrics] +# Enable by assigning an address, empty indicates metrics server is disabled +address = ":9110" + +[remote] +convert_vpc_registry = false + +[remote.mirrors_config] +# Snapshotter will overwrite daemon's mirrors configuration +# if the values loaded from this driectory are not null before starting a daemon. +# Set to "" or an empty directory to disable it. +#dir = "/etc/nydus/certs.d" + +[remote.auth] +# Fetch the private registry auth by listening to K8s API server +enable_kubeconfig_keychain = false +# synchronize `kubernetes.io/dockerconfigjson` secret from kubernetes API server with specified kubeconfig (default `$KUBECONFIG` or `~/.kube/config`) +kubeconfig_path = "" +# Fetch the private registry auth as CRI image service proxy +enable_cri_keychain = false +# the target image service when using image proxy +#image_service_address = "/run/containerd/containerd.sock" + +[snapshot] +# Let containerd use nydus-overlayfs mount helper +enable_nydus_overlayfs = false +# Insert Kata Virtual Volume option to `Mount.Options` +enable_kata_volume = false +# Whether to remove resources when a snapshot is removed +sync_remove = false + +[cache_manager] +# Disable or enable recyclebin +disable = false +# How long to keep deleted files in recyclebin +gc_period = "24h" +# Directory to host cached files +cache_dir = "" + +[image] +public_key_file = "" +validate_signature = false + +# The configuraions for features that are not production ready +[experimental] +# Whether to enable stargz support +enable_stargz = false +# Whether to enable referrers support +# The option enables trying to fetch the Nydus image associated with the OCI image and run it. +# Also see https://github.com/opencontainers/distribution-spec/blob/main/spec.md#listing-referrers +enable_referrer_detect = false +# Whether to enable authentication support +# The option enables nydus snapshot to provide backend information to nydusd. +enable_backend_source = false +[experimental.tarfs] +# Whether to enable nydus tarfs mode. Tarfs is supported by: +# - The EROFS filesystem driver since Linux 6.4 +# - Nydus Image Service release v2.3 +enable_tarfs = false +# Mount rafs on host by loopdev and EROFS +mount_tarfs_on_host = false +# Only enable nydus tarfs mode for images with `tarfs hint` label when true +tarfs_hint = false +# Maximum of concurrence to converting OCIv1 images to tarfs, 0 means default +max_concurrent_proc = 0 +# Mode to export tarfs images: +# - "none" or "": do not export tarfs +# - "layer_verity_only": only generate disk verity information for a layer blob +# - "image_verity_only": only generate disk verity information for all blobs of an image +# - "layer_block": generate a raw block disk image with tarfs for a layer +# - "image_block": generate a raw block disk image with tarfs for an image +# - "layer_block_with_verity": generate a raw block disk image with tarfs for a layer with dm-verity info +# - "image_block_with_verity": generate a raw block disk image with tarfs for an image with dm-verity info +export_mode = "" diff --git a/misc/top_images/fsck.patch b/misc/top_images/fsck.patch index f4b30caba74..a43bc789e51 100644 --- a/misc/top_images/fsck.patch +++ b/misc/top_images/fsck.patch @@ -1,21 +1,21 @@ -diff --git a/lib/super.c b/lib/super.c -index f486eb7..2c83a77 100644 ---- a/lib/super.c -+++ b/lib/super.c -@@ -36,11 +36,11 @@ static int erofs_init_devices(struct erofs_sb_info *sbi, - else - ondisk_extradevs = le16_to_cpu(dsb->extra_devices); - -- if (ondisk_extradevs != sbi->extra_devices) { -- erofs_err("extra devices don't match (ondisk %u, given %u)", -- ondisk_extradevs, sbi->extra_devices); -- return -EINVAL; -- } -+ // if (ondisk_extradevs != sbi->extra_devices) { -+ // erofs_err("extra devices don't match (ondisk %u, given %u)", -+ // ondisk_extradevs, sbi->extra_devices); -+ // return -EINVAL; -+ // } - if (!ondisk_extradevs) - return 0; - +diff --git a/lib/super.c b/lib/super.c +index f486eb7..2c83a77 100644 +--- a/lib/super.c ++++ b/lib/super.c +@@ -36,11 +36,11 @@ static int erofs_init_devices(struct erofs_sb_info *sbi, + else + ondisk_extradevs = le16_to_cpu(dsb->extra_devices); + +- if (ondisk_extradevs != sbi->extra_devices) { +- erofs_err("extra devices don't match (ondisk %u, given %u)", +- ondisk_extradevs, sbi->extra_devices); +- return -EINVAL; +- } ++ // if (ondisk_extradevs != sbi->extra_devices) { ++ // erofs_err("extra devices don't match (ondisk %u, given %u)", ++ // ondisk_extradevs, sbi->extra_devices); ++ // return -EINVAL; ++ // } + if (!ondisk_extradevs) + return 0; + diff --git a/misc/top_images/image_list.txt b/misc/top_images/image_list.txt index 9b6e814b3bd..e34532a3939 100644 --- a/misc/top_images/image_list.txt +++ b/misc/top_images/image_list.txt @@ -1,47 +1,47 @@ -alpine -postgres -ubuntu -nginx -busybox -python -traefik -redis -httpd -node -mongo -mysql -memcached -mariadb -amazoncorretto -docker -rabbitmq -centos -hello-world -registry -debian -sonarqube -wordpress -influxdb -tomcat -amazonlinux -maven -nextcloud -haproxy -php -bash -caddy -telegraf -hashicorp/vault -couchdb -eclipse-mosquitto -cassandra -chronograf -gradle -adminer -ghost -kong -solr -sentry -zookeeper -ghcr.io/dragonflyoss/image-service/pax-uid-test +alpine +postgres +ubuntu +nginx +busybox +python +traefik +redis +httpd +node +mongo +mysql +memcached +mariadb +amazoncorretto +docker +rabbitmq +centos +hello-world +registry +debian +sonarqube +wordpress +influxdb +tomcat +amazonlinux +maven +nextcloud +haproxy +php +bash +caddy +telegraf +hashicorp/vault +couchdb +eclipse-mosquitto +cassandra +chronograf +gradle +adminer +ghost +kong +solr +sentry +zookeeper +ghcr.io/dragonflyoss/image-service/pax-uid-test cgr.dev/chainguard/busybox \ No newline at end of file diff --git a/rafs/Cargo.toml b/rafs/Cargo.toml index 5cbb972bb3a..09b01e28b45 100644 --- a/rafs/Cargo.toml +++ b/rafs/Cargo.toml @@ -1,46 +1,46 @@ -[package] -name = "nydus-rafs" -version = "0.3.2" -description = "The RAFS filesystem format for Nydus Image Service" -authors = ["The Nydus Developers"] -license = "Apache-2.0 OR BSD-3-Clause" -homepage = "https://nydus.dev/" -repository = "https://github.com/dragonflyoss/nydus" -edition = "2021" - -[dependencies] -anyhow = "1.0.35" -arc-swap = "1.5" -bitflags = "1.2.1" -lazy_static = "1.4.0" -libc = "0.2" -log = "0.4" -nix = "0.24" -serde = { version = "1.0.110", features = ["serde_derive", "rc"] } -serde_json = "1.0.53" -vm-memory = "0.10" -fuse-backend-rs = "^0.12.0" -thiserror = "1" - -nydus-api = { version = "0.3", path = "../api" } -nydus-storage = { version = "0.6", path = "../storage", features = [ - "backend-localfs", -] } -nydus-utils = { version = "0.4", path = "../utils" } - -[dev-dependencies] -vmm-sys-util = "0.11" -assert_matches = "1.5.0" - -[features] -fusedev = ["fuse-backend-rs/fusedev"] -virtio-fs = ["fuse-backend-rs/virtiofs", "vm-memory/backend-mmap"] -vhost-user-fs = ["fuse-backend-rs/vhost-user-fs"] - -[package.metadata.docs.rs] -all-features = true -targets = [ - "x86_64-unknown-linux-gnu", - "aarch64-unknown-linux-gnu", - "aarch64-apple-darwin", -] +[package] +name = "nydus-rafs" +version = "0.3.2" +description = "The RAFS filesystem format for Nydus Image Service" +authors = ["The Nydus Developers"] +license = "Apache-2.0 OR BSD-3-Clause" +homepage = "https://nydus.dev/" +repository = "https://github.com/dragonflyoss/nydus" +edition = "2021" + +[dependencies] +anyhow = "1.0.35" +arc-swap = "1.5" +bitflags = "1.2.1" +lazy_static = "1.4.0" +libc = "0.2" +log = "0.4" +nix = "0.24" +serde = { version = "1.0.110", features = ["serde_derive", "rc"] } +serde_json = "1.0.53" +vm-memory = "0.10" +fuse-backend-rs = "^0.12.0" +thiserror = "1" + +nydus-api = { version = "0.3", path = "../api" } +nydus-storage = { version = "0.6", path = "../storage", features = [ + "backend-localfs", +] } +nydus-utils = { version = "0.4", path = "../utils" } + +[dev-dependencies] +vmm-sys-util = "0.11" +assert_matches = "1.5.0" + +[features] +fusedev = ["fuse-backend-rs/fusedev"] +virtio-fs = ["fuse-backend-rs/virtiofs", "vm-memory/backend-mmap"] +vhost-user-fs = ["fuse-backend-rs/vhost-user-fs"] + +[package.metadata.docs.rs] +all-features = true +targets = [ + "x86_64-unknown-linux-gnu", + "aarch64-unknown-linux-gnu", + "aarch64-apple-darwin", +] diff --git a/rafs/README.md b/rafs/README.md index 315e24c6d92..df72f386506 100644 --- a/rafs/README.md +++ b/rafs/README.md @@ -1,17 +1,17 @@ -# nydus-rafs - -The RAFS Fuse filesystem for [Nydus Image Service](https://nydus.dev/). - -## Support - -**Platforms**: -- x86_64 -- aarch64 - -**Operating Systems**: -- Linux -- MacOS - -## License - -This code is licensed under [Apache-2.0](LICENSE-APACHE) or [BSD-3-Clause](LICENSE-BSD-3-Clause). +# nydus-rafs + +The RAFS Fuse filesystem for [Nydus Image Service](https://nydus.dev/). + +## Support + +**Platforms**: +- x86_64 +- aarch64 + +**Operating Systems**: +- Linux +- MacOS + +## License + +This code is licensed under [Apache-2.0](LICENSE-APACHE) or [BSD-3-Clause](LICENSE-BSD-3-Clause). diff --git a/rafs/src/blobfs/mod.rs b/rafs/src/blobfs/mod.rs index fa5a3c19047..d9b79541fd2 100644 --- a/rafs/src/blobfs/mod.rs +++ b/rafs/src/blobfs/mod.rs @@ -1,380 +1,380 @@ -// Copyright (C) 2020 Alibaba Cloud. All rights reserved. -// SPDX-License-Identifier: Apache-2.0 - -//! Fuse blob passthrough file system, mirroring an existing FS hierarchy. -//! -//! This file system mirrors the existing file system hierarchy of the system, starting at the -//! root file system. This is implemented by just "passing through" all requests to the -//! corresponding underlying file system. -//! -//! The code is derived from the -//! [CrosVM](https://chromium.googlesource.com/chromiumos/platform/crosvm/) project, -//! with heavy modification/enhancements from Alibaba Cloud OS team. - -use std::any::Any; -use std::collections::HashMap; -use std::ffi::{CStr, CString}; -use std::fs::{create_dir_all, File}; -use std::io; -use std::mem::MaybeUninit; -use std::os::fd::{AsRawFd, FromRawFd}; -use std::os::unix::ffi::OsStrExt; -use std::path::Path; -use std::str::FromStr; -use std::sync::{Arc, Mutex, RwLock}; -use std::thread; - -use fuse_backend_rs::api::{filesystem::*, BackendFileSystem, VFS_MAX_INO}; -use fuse_backend_rs::{passthrough::Config as PassthroughConfig, passthrough::PassthroughFs}; -use nix::NixPath; -use nydus_api::{einval, ConfigV2}; -use nydus_storage::device::BlobPrefetchRequest; -use serde::Deserialize; - -use crate::fs::Rafs; -use crate::metadata::Inode; -use crate::RafsError; - -mod sync_io; - -const EMPTY_CSTR: &[u8] = b"\0"; - -/// Configuration information for blobfs instance. -#[derive(Clone, Default, Deserialize)] -pub struct BlobOndemandConfig { - /// RAFS filesystem configuration to configure backend, cache and fuse. - /// The rafs config used to set up rafs device for the purpose of `on demand read`. - pub rafs_conf: ConfigV2, - - /// Meta blob file path for a RAFS filesystem. - #[serde(default)] - pub bootstrap_path: String, - - /// Blob cache directory path. - #[serde(default)] - pub blob_cache_dir: String, -} - -impl FromStr for BlobOndemandConfig { - type Err = io::Error; - - fn from_str(s: &str) -> io::Result { - serde_json::from_str(s).map_err(|e| { - einval!(format!( - "blobfs: failed to load blobfs configuration, {}", - e - )) - }) - } -} - -/// Options that configure the behavior of the blobfs fuse file system. -#[derive(Default, Debug, Clone, PartialEq)] -pub struct Config { - /// Blobfs config is embedded with passthrough config - pub ps_config: PassthroughConfig, - /// This provides on demand config of blob management. - pub blob_ondemand_cfg: String, -} - -struct RafsHandle { - rafs: Option, - thread: Option>>, -} - -struct BlobfsState { - #[allow(unused)] - blob_cache_dir: String, - rafs_handle: RwLock, - inode_map: Mutex>, -} - -impl BlobfsState { - fn get_rafs_handle(&self) -> io::Result<()> { - let mut rafs_handle = self.rafs_handle.write().unwrap(); - - if let Some(handle) = rafs_handle.thread.take() { - match handle.join() { - Ok(v) => match v { - Ok(rafs) => rafs_handle.rafs = Some(rafs), - Err(e) => { - return Err(eio!(format!( - "blobfs: failed to get RAFS filesystem handle, {}", - e - ))) - } - }, - Err(e) => { - return Err(eio!(format!( - "blobfs: failed to get RAFS filesystem handle, {:?}", - e - ))) - } - } - } - - if rafs_handle.rafs.is_none() { - Err(eio!("blobfs: failed to get RAFS filesystem handle")) - } else { - Ok(()) - } - } -} - -/// A file system that simply "passes through" all requests it receives to the underlying file -/// system. -/// -/// To keep the implementation simple it servers the contents of its root directory. Users -/// that wish to serve only a specific directory should set up the environment so that that -/// directory ends up as the root of the file system process. One way to accomplish this is via a -/// combination of mount namespaces and the pivot_root system call. -pub struct BlobFs { - state: BlobfsState, - pfs: PassthroughFs, -} - -impl BlobFs { - /// Create a Blob file system instance. - pub fn new(cfg: Config) -> io::Result { - let bootstrap_args = Self::load_bootstrap(&cfg)?; - let pfs = PassthroughFs::new(cfg.ps_config)?; - - Ok(BlobFs { - pfs, - state: bootstrap_args, - }) - } - - /// Initialize the blobfs instance. - pub fn import(&self) -> io::Result<()> { - self.pfs.import() - } - - fn ensure_path_exist(path: &Path) -> io::Result<()> { - if path.is_empty() { - return Err(einval!("blobfs: path is empty")); - } - if !path.exists() { - create_dir_all(path).map_err(|e| { - error!("blobfs: failed to create dir {}, {}", path.display(), e); - e - })?; - } - - Ok(()) - } - - fn load_bootstrap(cfg: &Config) -> io::Result { - let blob_ondemand_conf = BlobOndemandConfig::from_str(&cfg.blob_ondemand_cfg)?; - if !blob_ondemand_conf.rafs_conf.validate() { - return Err(einval!("blobfs: invalidate configuration for blobfs")); - } - let rafs_cfg = blob_ondemand_conf.rafs_conf.get_rafs_config()?; - if rafs_cfg.mode != "direct" { - return Err(einval!("blobfs: only 'direct' mode is supported")); - } - - // check if blob cache dir exists. - let path = Path::new(blob_ondemand_conf.blob_cache_dir.as_str()); - Self::ensure_path_exist(path)?; - - let path = Path::new(blob_ondemand_conf.bootstrap_path.as_str()); - if blob_ondemand_conf.bootstrap_path.is_empty() || !path.is_file() { - return Err(einval!(format!( - "blobfs: bootstrap file {} is invalid", - path.display() - ))); - } - - let bootstrap_path = blob_ondemand_conf.bootstrap_path.clone(); - let config = Arc::new(blob_ondemand_conf.rafs_conf.clone()); - - trace!("blobfs: async create Rafs start!"); - let rafs_join_handle = std::thread::spawn(move || { - let (mut rafs, reader) = Rafs::new(&config, "blobfs", Path::new(&bootstrap_path))?; - rafs.import(reader, None)?; - Ok(rafs) - }); - - let rafs_handle = RafsHandle { - rafs: None, - thread: Some(rafs_join_handle), - }; - - Ok(BlobfsState { - blob_cache_dir: blob_ondemand_conf.blob_cache_dir.clone(), - rafs_handle: RwLock::new(rafs_handle), - inode_map: Mutex::new(HashMap::new()), - }) - } - - fn get_blob_id_and_size(&self, inode: Inode) -> io::Result<(String, u64)> { - let mut map = self.state.inode_map.lock().unwrap(); - match map.entry(inode) { - std::collections::hash_map::Entry::Occupied(v) => { - let (sz, blob_id) = v.get(); - Ok((blob_id.to_string(), *sz)) - } - std::collections::hash_map::Entry::Vacant(entry) => { - // locate blob file that the inode refers to - let blob_id_full_path = self.pfs.readlinkat_proc_file(inode)?; - let blob_file = Self::open_file( - libc::AT_FDCWD, - blob_id_full_path.as_path(), - libc::O_PATH | libc::O_NOFOLLOW | libc::O_CLOEXEC, - 0, - ) - .map_err(|e| einval!(e))?; - let st = Self::stat(&blob_file).map_err(|e| { - error!("get_blob_id_and_size: stat failed {:?}", e); - e - })?; - if st.st_size < 0 { - return Err(einval!(format!( - "load_chunks_on_demand: blob_id {:?}, size: {:?} is less than 0", - blob_id_full_path.display(), - st.st_size - ))); - } - - let blob_id = blob_id_full_path - .file_name() - .ok_or_else(|| einval!("blobfs: failed to find blob file"))?; - let blob_id = blob_id - .to_os_string() - .into_string() - .map_err(|_e| einval!("blobfs: failed to get blob id from file name"))?; - trace!("load_chunks_on_demand: blob_id {}", blob_id); - entry.insert((st.st_size as u64, blob_id.clone())); - - Ok((blob_id, st.st_size as u64)) - } - } - } - - fn stat(f: &File) -> io::Result { - // Safe because this is a constant value and a valid C string. - let pathname = unsafe { CStr::from_bytes_with_nul_unchecked(EMPTY_CSTR) }; - let mut st = MaybeUninit::::zeroed(); - - // Safe because the kernel will only write data in `st` and we check the return value. - let res = unsafe { - libc::fstatat64( - f.as_raw_fd(), - pathname.as_ptr(), - st.as_mut_ptr(), - libc::AT_EMPTY_PATH | libc::AT_SYMLINK_NOFOLLOW, - ) - }; - if res >= 0 { - // Safe because the kernel guarantees that the struct is now fully initialized. - Ok(unsafe { st.assume_init() }) - } else { - Err(io::Error::last_os_error()) - } - } - - fn open_file(dfd: i32, pathname: &Path, flags: i32, mode: u32) -> io::Result { - let pathname = CString::new(pathname.as_os_str().as_bytes()) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; - let fd = if flags & libc::O_CREAT == libc::O_CREAT { - unsafe { libc::openat(dfd, pathname.as_ptr(), flags, mode) } - } else { - unsafe { libc::openat(dfd, pathname.as_ptr(), flags) } - }; - - if fd < 0 { - return Err(io::Error::last_os_error()); - } - - // Safe because we just opened this fd. - Ok(unsafe { File::from_raw_fd(fd) }) - } -} - -impl BackendFileSystem for BlobFs { - fn mount(&self) -> io::Result<(Entry, u64)> { - let ctx = &Context::default(); - let name = CString::new(".").unwrap(); - let entry = self.lookup(ctx, ROOT_ID, name.as_c_str())?; - - Ok((entry, VFS_MAX_INO)) - } - - fn as_any(&self) -> &dyn Any { - self - } -} - -#[cfg(test)] -mod tests { - use crate::metadata::{RafsMode, RafsSuper}; - use crate::{RafsIoRead, RafsIoReader, RafsIoWrite, RafsIterator}; - use std::fs::OpenOptions; - use std::io::Write; - use std::path::PathBuf; - use vmm_sys_util::tempfile::TempFile; - - #[test] - fn test_rafs_io_writer() { - let mut file = TempFile::new().unwrap().into_file(); - - assert!(file.validate_alignment(2, 8).is_err()); - assert!(file.validate_alignment(7, 8).is_err()); - assert!(file.validate_alignment(9, 8).is_err()); - assert!(file.validate_alignment(8, 8).is_ok()); - - file.write_all(&[0x0u8; 7]).unwrap(); - assert!(file.validate_alignment(8, 8).is_err()); - { - let obj: &mut dyn RafsIoWrite = &mut file; - obj.write_padding(1).unwrap(); - } - assert!(file.validate_alignment(8, 8).is_ok()); - file.write_all(&[0x0u8; 1]).unwrap(); - assert!(file.validate_alignment(8, 8).is_err()); - - let obj: &mut dyn RafsIoRead = &mut file; - assert_eq!(obj.seek_to_offset(0).unwrap(), 0); - assert_eq!(obj.seek_plus_offset(7).unwrap(), 7); - assert_eq!(obj.seek_to_next_aligned(7, 8).unwrap(), 8); - assert_eq!(obj.seek_plus_offset(7).unwrap(), 15); - } - - #[test] - fn test_rafs_iterator() { - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let path = PathBuf::from(root_dir).join("../tests/texture/bootstrap/rafs-v5.boot"); - let bootstrap = OpenOptions::new() - .read(true) - .write(false) - .open(&path) - .unwrap(); - let mut rs = RafsSuper { - mode: RafsMode::Direct, - validate_digest: false, - ..Default::default() - }; - rs.load(&mut (Box::new(bootstrap) as RafsIoReader)).unwrap(); - let iter = RafsIterator::new(&rs); - - let mut last = false; - for (idx, (_node, path)) in iter.enumerate() { - assert!(!last); - if idx == 1 { - assert_eq!(path, PathBuf::from("/bin")); - } else if idx == 2 { - assert_eq!(path, PathBuf::from("/boot")); - } else if idx == 3 { - assert_eq!(path, PathBuf::from("/dev")); - } else if idx == 10 { - assert_eq!(path, PathBuf::from("/etc/DIR_COLORS.256color")); - } else if idx == 11 { - assert_eq!(path, PathBuf::from("/etc/DIR_COLORS.lightbgcolor")); - } else if path == PathBuf::from("/var/yp") { - last = true; - } - } - assert!(last); - } -} +// Copyright (C) 2020 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Fuse blob passthrough file system, mirroring an existing FS hierarchy. +//! +//! This file system mirrors the existing file system hierarchy of the system, starting at the +//! root file system. This is implemented by just "passing through" all requests to the +//! corresponding underlying file system. +//! +//! The code is derived from the +//! [CrosVM](https://chromium.googlesource.com/chromiumos/platform/crosvm/) project, +//! with heavy modification/enhancements from Alibaba Cloud OS team. + +use std::any::Any; +use std::collections::HashMap; +use std::ffi::{CStr, CString}; +use std::fs::{create_dir_all, File}; +use std::io; +use std::mem::MaybeUninit; +use std::os::fd::{AsRawFd, FromRawFd}; +use std::os::unix::ffi::OsStrExt; +use std::path::Path; +use std::str::FromStr; +use std::sync::{Arc, Mutex, RwLock}; +use std::thread; + +use fuse_backend_rs::api::{filesystem::*, BackendFileSystem, VFS_MAX_INO}; +use fuse_backend_rs::{passthrough::Config as PassthroughConfig, passthrough::PassthroughFs}; +use nix::NixPath; +use nydus_api::{einval, ConfigV2}; +use nydus_storage::device::BlobPrefetchRequest; +use serde::Deserialize; + +use crate::fs::Rafs; +use crate::metadata::Inode; +use crate::RafsError; + +mod sync_io; + +const EMPTY_CSTR: &[u8] = b"\0"; + +/// Configuration information for blobfs instance. +#[derive(Clone, Default, Deserialize)] +pub struct BlobOndemandConfig { + /// RAFS filesystem configuration to configure backend, cache and fuse. + /// The rafs config used to set up rafs device for the purpose of `on demand read`. + pub rafs_conf: ConfigV2, + + /// Meta blob file path for a RAFS filesystem. + #[serde(default)] + pub bootstrap_path: String, + + /// Blob cache directory path. + #[serde(default)] + pub blob_cache_dir: String, +} + +impl FromStr for BlobOndemandConfig { + type Err = io::Error; + + fn from_str(s: &str) -> io::Result { + serde_json::from_str(s).map_err(|e| { + einval!(format!( + "blobfs: failed to load blobfs configuration, {}", + e + )) + }) + } +} + +/// Options that configure the behavior of the blobfs fuse file system. +#[derive(Default, Debug, Clone, PartialEq)] +pub struct Config { + /// Blobfs config is embedded with passthrough config + pub ps_config: PassthroughConfig, + /// This provides on demand config of blob management. + pub blob_ondemand_cfg: String, +} + +struct RafsHandle { + rafs: Option, + thread: Option>>, +} + +struct BlobfsState { + #[allow(unused)] + blob_cache_dir: String, + rafs_handle: RwLock, + inode_map: Mutex>, +} + +impl BlobfsState { + fn get_rafs_handle(&self) -> io::Result<()> { + let mut rafs_handle = self.rafs_handle.write().unwrap(); + + if let Some(handle) = rafs_handle.thread.take() { + match handle.join() { + Ok(v) => match v { + Ok(rafs) => rafs_handle.rafs = Some(rafs), + Err(e) => { + return Err(eio!(format!( + "blobfs: failed to get RAFS filesystem handle, {}", + e + ))) + } + }, + Err(e) => { + return Err(eio!(format!( + "blobfs: failed to get RAFS filesystem handle, {:?}", + e + ))) + } + } + } + + if rafs_handle.rafs.is_none() { + Err(eio!("blobfs: failed to get RAFS filesystem handle")) + } else { + Ok(()) + } + } +} + +/// A file system that simply "passes through" all requests it receives to the underlying file +/// system. +/// +/// To keep the implementation simple it servers the contents of its root directory. Users +/// that wish to serve only a specific directory should set up the environment so that that +/// directory ends up as the root of the file system process. One way to accomplish this is via a +/// combination of mount namespaces and the pivot_root system call. +pub struct BlobFs { + state: BlobfsState, + pfs: PassthroughFs, +} + +impl BlobFs { + /// Create a Blob file system instance. + pub fn new(cfg: Config) -> io::Result { + let bootstrap_args = Self::load_bootstrap(&cfg)?; + let pfs = PassthroughFs::new(cfg.ps_config)?; + + Ok(BlobFs { + pfs, + state: bootstrap_args, + }) + } + + /// Initialize the blobfs instance. + pub fn import(&self) -> io::Result<()> { + self.pfs.import() + } + + fn ensure_path_exist(path: &Path) -> io::Result<()> { + if path.is_empty() { + return Err(einval!("blobfs: path is empty")); + } + if !path.exists() { + create_dir_all(path).map_err(|e| { + error!("blobfs: failed to create dir {}, {}", path.display(), e); + e + })?; + } + + Ok(()) + } + + fn load_bootstrap(cfg: &Config) -> io::Result { + let blob_ondemand_conf = BlobOndemandConfig::from_str(&cfg.blob_ondemand_cfg)?; + if !blob_ondemand_conf.rafs_conf.validate() { + return Err(einval!("blobfs: invalidate configuration for blobfs")); + } + let rafs_cfg = blob_ondemand_conf.rafs_conf.get_rafs_config()?; + if rafs_cfg.mode != "direct" { + return Err(einval!("blobfs: only 'direct' mode is supported")); + } + + // check if blob cache dir exists. + let path = Path::new(blob_ondemand_conf.blob_cache_dir.as_str()); + Self::ensure_path_exist(path)?; + + let path = Path::new(blob_ondemand_conf.bootstrap_path.as_str()); + if blob_ondemand_conf.bootstrap_path.is_empty() || !path.is_file() { + return Err(einval!(format!( + "blobfs: bootstrap file {} is invalid", + path.display() + ))); + } + + let bootstrap_path = blob_ondemand_conf.bootstrap_path.clone(); + let config = Arc::new(blob_ondemand_conf.rafs_conf.clone()); + + trace!("blobfs: async create Rafs start!"); + let rafs_join_handle = std::thread::spawn(move || { + let (mut rafs, reader) = Rafs::new(&config, "blobfs", Path::new(&bootstrap_path))?; + rafs.import(reader, None)?; + Ok(rafs) + }); + + let rafs_handle = RafsHandle { + rafs: None, + thread: Some(rafs_join_handle), + }; + + Ok(BlobfsState { + blob_cache_dir: blob_ondemand_conf.blob_cache_dir.clone(), + rafs_handle: RwLock::new(rafs_handle), + inode_map: Mutex::new(HashMap::new()), + }) + } + + fn get_blob_id_and_size(&self, inode: Inode) -> io::Result<(String, u64)> { + let mut map = self.state.inode_map.lock().unwrap(); + match map.entry(inode) { + std::collections::hash_map::Entry::Occupied(v) => { + let (sz, blob_id) = v.get(); + Ok((blob_id.to_string(), *sz)) + } + std::collections::hash_map::Entry::Vacant(entry) => { + // locate blob file that the inode refers to + let blob_id_full_path = self.pfs.readlinkat_proc_file(inode)?; + let blob_file = Self::open_file( + libc::AT_FDCWD, + blob_id_full_path.as_path(), + libc::O_PATH | libc::O_NOFOLLOW | libc::O_CLOEXEC, + 0, + ) + .map_err(|e| einval!(e))?; + let st = Self::stat(&blob_file).map_err(|e| { + error!("get_blob_id_and_size: stat failed {:?}", e); + e + })?; + if st.st_size < 0 { + return Err(einval!(format!( + "load_chunks_on_demand: blob_id {:?}, size: {:?} is less than 0", + blob_id_full_path.display(), + st.st_size + ))); + } + + let blob_id = blob_id_full_path + .file_name() + .ok_or_else(|| einval!("blobfs: failed to find blob file"))?; + let blob_id = blob_id + .to_os_string() + .into_string() + .map_err(|_e| einval!("blobfs: failed to get blob id from file name"))?; + trace!("load_chunks_on_demand: blob_id {}", blob_id); + entry.insert((st.st_size as u64, blob_id.clone())); + + Ok((blob_id, st.st_size as u64)) + } + } + } + + fn stat(f: &File) -> io::Result { + // Safe because this is a constant value and a valid C string. + let pathname = unsafe { CStr::from_bytes_with_nul_unchecked(EMPTY_CSTR) }; + let mut st = MaybeUninit::::zeroed(); + + // Safe because the kernel will only write data in `st` and we check the return value. + let res = unsafe { + libc::fstatat64( + f.as_raw_fd(), + pathname.as_ptr(), + st.as_mut_ptr(), + libc::AT_EMPTY_PATH | libc::AT_SYMLINK_NOFOLLOW, + ) + }; + if res >= 0 { + // Safe because the kernel guarantees that the struct is now fully initialized. + Ok(unsafe { st.assume_init() }) + } else { + Err(io::Error::last_os_error()) + } + } + + fn open_file(dfd: i32, pathname: &Path, flags: i32, mode: u32) -> io::Result { + let pathname = CString::new(pathname.as_os_str().as_bytes()) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; + let fd = if flags & libc::O_CREAT == libc::O_CREAT { + unsafe { libc::openat(dfd, pathname.as_ptr(), flags, mode) } + } else { + unsafe { libc::openat(dfd, pathname.as_ptr(), flags) } + }; + + if fd < 0 { + return Err(io::Error::last_os_error()); + } + + // Safe because we just opened this fd. + Ok(unsafe { File::from_raw_fd(fd) }) + } +} + +impl BackendFileSystem for BlobFs { + fn mount(&self) -> io::Result<(Entry, u64)> { + let ctx = &Context::default(); + let name = CString::new(".").unwrap(); + let entry = self.lookup(ctx, ROOT_ID, name.as_c_str())?; + + Ok((entry, VFS_MAX_INO)) + } + + fn as_any(&self) -> &dyn Any { + self + } +} + +#[cfg(test)] +mod tests { + use crate::metadata::{RafsMode, RafsSuper}; + use crate::{RafsIoRead, RafsIoReader, RafsIoWrite, RafsIterator}; + use std::fs::OpenOptions; + use std::io::Write; + use std::path::PathBuf; + use vmm_sys_util::tempfile::TempFile; + + #[test] + fn test_rafs_io_writer() { + let mut file = TempFile::new().unwrap().into_file(); + + assert!(file.validate_alignment(2, 8).is_err()); + assert!(file.validate_alignment(7, 8).is_err()); + assert!(file.validate_alignment(9, 8).is_err()); + assert!(file.validate_alignment(8, 8).is_ok()); + + file.write_all(&[0x0u8; 7]).unwrap(); + assert!(file.validate_alignment(8, 8).is_err()); + { + let obj: &mut dyn RafsIoWrite = &mut file; + obj.write_padding(1).unwrap(); + } + assert!(file.validate_alignment(8, 8).is_ok()); + file.write_all(&[0x0u8; 1]).unwrap(); + assert!(file.validate_alignment(8, 8).is_err()); + + let obj: &mut dyn RafsIoRead = &mut file; + assert_eq!(obj.seek_to_offset(0).unwrap(), 0); + assert_eq!(obj.seek_plus_offset(7).unwrap(), 7); + assert_eq!(obj.seek_to_next_aligned(7, 8).unwrap(), 8); + assert_eq!(obj.seek_plus_offset(7).unwrap(), 15); + } + + #[test] + fn test_rafs_iterator() { + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let path = PathBuf::from(root_dir).join("../tests/texture/bootstrap/rafs-v5.boot"); + let bootstrap = OpenOptions::new() + .read(true) + .write(false) + .open(&path) + .unwrap(); + let mut rs = RafsSuper { + mode: RafsMode::Direct, + validate_digest: false, + ..Default::default() + }; + rs.load(&mut (Box::new(bootstrap) as RafsIoReader)).unwrap(); + let iter = RafsIterator::new(&rs); + + let mut last = false; + for (idx, (_node, path)) in iter.enumerate() { + assert!(!last); + if idx == 1 { + assert_eq!(path, PathBuf::from("/bin")); + } else if idx == 2 { + assert_eq!(path, PathBuf::from("/boot")); + } else if idx == 3 { + assert_eq!(path, PathBuf::from("/dev")); + } else if idx == 10 { + assert_eq!(path, PathBuf::from("/etc/DIR_COLORS.256color")); + } else if idx == 11 { + assert_eq!(path, PathBuf::from("/etc/DIR_COLORS.lightbgcolor")); + } else if path == PathBuf::from("/var/yp") { + last = true; + } + } + assert!(last); + } +} diff --git a/rafs/src/blobfs/sync_io.rs b/rafs/src/blobfs/sync_io.rs index 70860b0e682..4ec1226c0f3 100644 --- a/rafs/src/blobfs/sync_io.rs +++ b/rafs/src/blobfs/sync_io.rs @@ -1,422 +1,422 @@ -// Copyright (C) 2020 Alibaba Cloud. All rights reserved. -// Copyright 2019 The Chromium OS Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE-BSD-3-Clause file. - -use std::ffi::CStr; -use std::io; -use std::time::Duration; - -use fuse_backend_rs::abi::fuse_abi::{CreateIn, FsOptions, OpenOptions, SetattrValid}; -use fuse_backend_rs::abi::virtio_fs; -use fuse_backend_rs::api::filesystem::{ - Context, DirEntry, Entry, FileSystem, GetxattrReply, ListxattrReply, ZeroCopyReader, - ZeroCopyWriter, -}; -use fuse_backend_rs::transport::FsCacheReqHandler; -use nydus_api::eacces; -use nydus_utils::{round_down, round_up}; - -use super::*; -use crate::fs::Handle; -use crate::metadata::Inode; - -const MAPPING_UNIT_SIZE: u64 = 0x200000; - -impl BlobfsState { - fn fetch_range_sync(&self, prefetches: &[BlobPrefetchRequest]) -> io::Result<()> { - let rafs_handle = self.rafs_handle.read().unwrap(); - match rafs_handle.rafs.as_ref() { - Some(rafs) => rafs.fetch_range_synchronous(prefetches), - None => Err(einval!("blobfs: failed to initialize RAFS filesystem.")), - } - } -} - -impl BlobFs { - // prepare BlobPrefetchRequest and call device.prefetch(). - // Make sure prefetch doesn't use delay_persist as we need the data immediately. - fn load_chunks_on_demand(&self, inode: Inode, offset: u64, len: u64) -> io::Result<()> { - let (blob_id, size) = self.get_blob_id_and_size(inode)?; - if size <= offset || offset.checked_add(len).is_none() { - return Err(einval!(format!( - "blobfs: blob_id {:?}, offset {:?} is larger than size {:?}", - blob_id, offset, size - ))); - } - - let end = std::cmp::min(offset + len, size); - let len = end - offset; - let req = BlobPrefetchRequest { - blob_id, - offset, - len, - }; - - self.state.fetch_range_sync(&[req]).map_err(|e| { - warn!("blobfs: failed to load data, {:?}", e); - e - }) - } -} - -impl FileSystem for BlobFs { - type Inode = Inode; - type Handle = Handle; - - fn init(&self, capable: FsOptions) -> io::Result { - self.state.get_rafs_handle()?; - self.pfs.init(capable) - } - - fn destroy(&self) { - self.pfs.destroy() - } - - fn lookup(&self, _ctx: &Context, parent: Inode, name: &CStr) -> io::Result { - self.pfs.lookup(_ctx, parent, name) - } - - fn forget(&self, _ctx: &Context, inode: Inode, count: u64) { - self.pfs.forget(_ctx, inode, count) - } - - fn batch_forget(&self, _ctx: &Context, requests: Vec<(Inode, u64)>) { - self.pfs.batch_forget(_ctx, requests) - } - - fn getattr( - &self, - _ctx: &Context, - inode: Inode, - _handle: Option, - ) -> io::Result<(libc::stat64, Duration)> { - self.pfs.getattr(_ctx, inode, _handle) - } - - fn setattr( - &self, - _ctx: &Context, - _inode: Inode, - _attr: libc::stat64, - _handle: Option, - _valid: SetattrValid, - ) -> io::Result<(libc::stat64, Duration)> { - Err(eacces!("Setattr request is not allowed in blobfs")) - } - - fn readlink(&self, _ctx: &Context, inode: Inode) -> io::Result> { - self.pfs.readlink(_ctx, inode) - } - - fn symlink( - &self, - _ctx: &Context, - _linkname: &CStr, - _parent: Inode, - _name: &CStr, - ) -> io::Result { - Err(eacces!("Symlink request is not allowed in blobfs")) - } - - fn mknod( - &self, - _ctx: &Context, - _parent: Inode, - _name: &CStr, - _mode: u32, - _rdev: u32, - _umask: u32, - ) -> io::Result { - Err(eacces!("Mknod request is not allowed in blobfs")) - } - - fn mkdir( - &self, - _ctx: &Context, - _parent: Inode, - _name: &CStr, - _mode: u32, - _umask: u32, - ) -> io::Result { - Err(eacces!("Mkdir request is not allowed in blobfs")) - } - - fn unlink(&self, _ctx: &Context, _parent: Inode, _name: &CStr) -> io::Result<()> { - Err(eacces!("Unlink request is not allowed in blobfs")) - } - - fn rmdir(&self, _ctx: &Context, _parent: Inode, _name: &CStr) -> io::Result<()> { - Err(eacces!("Rmdir request is not allowed in blobfs")) - } - - fn rename( - &self, - _ctx: &Context, - _olddir: Inode, - _oldname: &CStr, - _newdir: Inode, - _newname: &CStr, - _flags: u32, - ) -> io::Result<()> { - Err(eacces!("Rename request is not allowed in blobfs")) - } - - fn link( - &self, - _ctx: &Context, - _inode: Inode, - _newparent: Inode, - _newname: &CStr, - ) -> io::Result { - Err(eacces!("Link request is not allowed in blobfs")) - } - - fn open( - &self, - _ctx: &Context, - inode: Inode, - flags: u32, - _fuse_flags: u32, - ) -> io::Result<(Option, OpenOptions)> { - self.pfs.open(_ctx, inode, flags, _fuse_flags) - } - - fn create( - &self, - _ctx: &Context, - _parent: Inode, - _name: &CStr, - _args: CreateIn, - ) -> io::Result<(Entry, Option, OpenOptions)> { - Err(eacces!("Create request is not allowed in blobfs")) - } - - fn read( - &self, - ctx: &Context, - inode: Inode, - handle: Handle, - w: &mut dyn ZeroCopyWriter, - size: u32, - offset: u64, - lock_owner: Option, - flags: u32, - ) -> io::Result { - self.load_chunks_on_demand(inode, offset, size as u64)?; - self.pfs - .read(ctx, inode, handle, w, size, offset, lock_owner, flags) - } - - fn write( - &self, - _ctx: &Context, - _inode: Inode, - _handle: Handle, - _r: &mut dyn ZeroCopyReader, - _size: u32, - _offset: u64, - _lock_owner: Option, - _delayed_write: bool, - _flags: u32, - _fuse_flags: u32, - ) -> io::Result { - Err(eacces!("Write request is not allowed in blobfs")) - } - - fn flush( - &self, - _ctx: &Context, - inode: Inode, - handle: Handle, - _lock_owner: u64, - ) -> io::Result<()> { - self.pfs.flush(_ctx, inode, handle, _lock_owner) - } - - fn fsync( - &self, - _ctx: &Context, - inode: Inode, - datasync: bool, - handle: Handle, - ) -> io::Result<()> { - self.pfs.fsync(_ctx, inode, datasync, handle) - } - - fn fallocate( - &self, - _ctx: &Context, - _inode: Inode, - _handle: Handle, - _mode: u32, - _offset: u64, - _length: u64, - ) -> io::Result<()> { - Err(eacces!("Fallocate request is not allowed in blobfs")) - } - - fn release( - &self, - _ctx: &Context, - inode: Inode, - _flags: u32, - handle: Handle, - _flush: bool, - _flock_release: bool, - _lock_owner: Option, - ) -> io::Result<()> { - self.pfs.release( - _ctx, - inode, - _flags, - handle, - _flush, - _flock_release, - _lock_owner, - ) - } - - fn statfs(&self, _ctx: &Context, inode: Inode) -> io::Result { - self.pfs.statfs(_ctx, inode) - } - - fn setxattr( - &self, - _ctx: &Context, - _inode: Inode, - _name: &CStr, - _value: &[u8], - _flags: u32, - ) -> io::Result<()> { - Err(eacces!("Setxattr request is not allowed in blobfs")) - } - - fn getxattr( - &self, - _ctx: &Context, - inode: Inode, - name: &CStr, - size: u32, - ) -> io::Result { - self.pfs.getxattr(_ctx, inode, name, size) - } - - fn listxattr(&self, _ctx: &Context, inode: Inode, size: u32) -> io::Result { - self.pfs.listxattr(_ctx, inode, size) - } - - fn removexattr(&self, _ctx: &Context, _inode: Inode, _name: &CStr) -> io::Result<()> { - Err(eacces!("Removexattr request is not allowed in blobfs")) - } - - fn opendir( - &self, - _ctx: &Context, - inode: Inode, - flags: u32, - ) -> io::Result<(Option, OpenOptions)> { - self.pfs.opendir(_ctx, inode, flags) - } - - fn readdir( - &self, - _ctx: &Context, - inode: Inode, - handle: Handle, - size: u32, - offset: u64, - add_entry: &mut dyn FnMut(DirEntry) -> io::Result, - ) -> io::Result<()> { - self.pfs - .readdir(_ctx, inode, handle, size, offset, add_entry) - } - - fn readdirplus( - &self, - _ctx: &Context, - inode: Inode, - handle: Handle, - size: u32, - offset: u64, - add_entry: &mut dyn FnMut(DirEntry, Entry) -> io::Result, - ) -> io::Result<()> { - self.pfs - .readdirplus(_ctx, inode, handle, size, offset, add_entry) - } - - fn fsyncdir( - &self, - ctx: &Context, - inode: Inode, - datasync: bool, - handle: Handle, - ) -> io::Result<()> { - self.pfs.fsyncdir(ctx, inode, datasync, handle) - } - - fn releasedir( - &self, - _ctx: &Context, - inode: Inode, - _flags: u32, - handle: Handle, - ) -> io::Result<()> { - self.pfs.releasedir(_ctx, inode, _flags, handle) - } - - fn setupmapping( - &self, - _ctx: &Context, - inode: Inode, - _handle: Handle, - foffset: u64, - len: u64, - flags: u64, - moffset: u64, - vu_req: &mut dyn FsCacheReqHandler, - ) -> io::Result<()> { - if (flags & virtio_fs::SetupmappingFlags::WRITE.bits()) != 0 { - return Err(eacces!("blob file cannot write in dax")); - } - if foffset.checked_add(len).is_none() || foffset + len > u64::MAX - MAPPING_UNIT_SIZE { - return Err(einval!(format!( - "blobfs: invalid offset 0x{:x} and len 0x{:x}", - foffset, len - ))); - } - - let end = round_up(foffset + len, MAPPING_UNIT_SIZE); - let offset = round_down(foffset, MAPPING_UNIT_SIZE); - let len = end - offset; - self.load_chunks_on_demand(inode, offset, len)?; - - self.pfs - .setupmapping(_ctx, inode, _handle, foffset, len, flags, moffset, vu_req) - } - - fn removemapping( - &self, - _ctx: &Context, - _inode: Inode, - requests: Vec, - vu_req: &mut dyn FsCacheReqHandler, - ) -> io::Result<()> { - self.pfs.removemapping(_ctx, _inode, requests, vu_req) - } - - fn access(&self, ctx: &Context, inode: Inode, mask: u32) -> io::Result<()> { - self.pfs.access(ctx, inode, mask) - } - - fn lseek( - &self, - _ctx: &Context, - inode: Inode, - handle: Handle, - offset: u64, - whence: u32, - ) -> io::Result { - self.pfs.lseek(_ctx, inode, handle, offset, whence) - } -} +// Copyright (C) 2020 Alibaba Cloud. All rights reserved. +// Copyright 2019 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE-BSD-3-Clause file. + +use std::ffi::CStr; +use std::io; +use std::time::Duration; + +use fuse_backend_rs::abi::fuse_abi::{CreateIn, FsOptions, OpenOptions, SetattrValid}; +use fuse_backend_rs::abi::virtio_fs; +use fuse_backend_rs::api::filesystem::{ + Context, DirEntry, Entry, FileSystem, GetxattrReply, ListxattrReply, ZeroCopyReader, + ZeroCopyWriter, +}; +use fuse_backend_rs::transport::FsCacheReqHandler; +use nydus_api::eacces; +use nydus_utils::{round_down, round_up}; + +use super::*; +use crate::fs::Handle; +use crate::metadata::Inode; + +const MAPPING_UNIT_SIZE: u64 = 0x200000; + +impl BlobfsState { + fn fetch_range_sync(&self, prefetches: &[BlobPrefetchRequest]) -> io::Result<()> { + let rafs_handle = self.rafs_handle.read().unwrap(); + match rafs_handle.rafs.as_ref() { + Some(rafs) => rafs.fetch_range_synchronous(prefetches), + None => Err(einval!("blobfs: failed to initialize RAFS filesystem.")), + } + } +} + +impl BlobFs { + // prepare BlobPrefetchRequest and call device.prefetch(). + // Make sure prefetch doesn't use delay_persist as we need the data immediately. + fn load_chunks_on_demand(&self, inode: Inode, offset: u64, len: u64) -> io::Result<()> { + let (blob_id, size) = self.get_blob_id_and_size(inode)?; + if size <= offset || offset.checked_add(len).is_none() { + return Err(einval!(format!( + "blobfs: blob_id {:?}, offset {:?} is larger than size {:?}", + blob_id, offset, size + ))); + } + + let end = std::cmp::min(offset + len, size); + let len = end - offset; + let req = BlobPrefetchRequest { + blob_id, + offset, + len, + }; + + self.state.fetch_range_sync(&[req]).map_err(|e| { + warn!("blobfs: failed to load data, {:?}", e); + e + }) + } +} + +impl FileSystem for BlobFs { + type Inode = Inode; + type Handle = Handle; + + fn init(&self, capable: FsOptions) -> io::Result { + self.state.get_rafs_handle()?; + self.pfs.init(capable) + } + + fn destroy(&self) { + self.pfs.destroy() + } + + fn lookup(&self, _ctx: &Context, parent: Inode, name: &CStr) -> io::Result { + self.pfs.lookup(_ctx, parent, name) + } + + fn forget(&self, _ctx: &Context, inode: Inode, count: u64) { + self.pfs.forget(_ctx, inode, count) + } + + fn batch_forget(&self, _ctx: &Context, requests: Vec<(Inode, u64)>) { + self.pfs.batch_forget(_ctx, requests) + } + + fn getattr( + &self, + _ctx: &Context, + inode: Inode, + _handle: Option, + ) -> io::Result<(libc::stat64, Duration)> { + self.pfs.getattr(_ctx, inode, _handle) + } + + fn setattr( + &self, + _ctx: &Context, + _inode: Inode, + _attr: libc::stat64, + _handle: Option, + _valid: SetattrValid, + ) -> io::Result<(libc::stat64, Duration)> { + Err(eacces!("Setattr request is not allowed in blobfs")) + } + + fn readlink(&self, _ctx: &Context, inode: Inode) -> io::Result> { + self.pfs.readlink(_ctx, inode) + } + + fn symlink( + &self, + _ctx: &Context, + _linkname: &CStr, + _parent: Inode, + _name: &CStr, + ) -> io::Result { + Err(eacces!("Symlink request is not allowed in blobfs")) + } + + fn mknod( + &self, + _ctx: &Context, + _parent: Inode, + _name: &CStr, + _mode: u32, + _rdev: u32, + _umask: u32, + ) -> io::Result { + Err(eacces!("Mknod request is not allowed in blobfs")) + } + + fn mkdir( + &self, + _ctx: &Context, + _parent: Inode, + _name: &CStr, + _mode: u32, + _umask: u32, + ) -> io::Result { + Err(eacces!("Mkdir request is not allowed in blobfs")) + } + + fn unlink(&self, _ctx: &Context, _parent: Inode, _name: &CStr) -> io::Result<()> { + Err(eacces!("Unlink request is not allowed in blobfs")) + } + + fn rmdir(&self, _ctx: &Context, _parent: Inode, _name: &CStr) -> io::Result<()> { + Err(eacces!("Rmdir request is not allowed in blobfs")) + } + + fn rename( + &self, + _ctx: &Context, + _olddir: Inode, + _oldname: &CStr, + _newdir: Inode, + _newname: &CStr, + _flags: u32, + ) -> io::Result<()> { + Err(eacces!("Rename request is not allowed in blobfs")) + } + + fn link( + &self, + _ctx: &Context, + _inode: Inode, + _newparent: Inode, + _newname: &CStr, + ) -> io::Result { + Err(eacces!("Link request is not allowed in blobfs")) + } + + fn open( + &self, + _ctx: &Context, + inode: Inode, + flags: u32, + _fuse_flags: u32, + ) -> io::Result<(Option, OpenOptions)> { + self.pfs.open(_ctx, inode, flags, _fuse_flags) + } + + fn create( + &self, + _ctx: &Context, + _parent: Inode, + _name: &CStr, + _args: CreateIn, + ) -> io::Result<(Entry, Option, OpenOptions)> { + Err(eacces!("Create request is not allowed in blobfs")) + } + + fn read( + &self, + ctx: &Context, + inode: Inode, + handle: Handle, + w: &mut dyn ZeroCopyWriter, + size: u32, + offset: u64, + lock_owner: Option, + flags: u32, + ) -> io::Result { + self.load_chunks_on_demand(inode, offset, size as u64)?; + self.pfs + .read(ctx, inode, handle, w, size, offset, lock_owner, flags) + } + + fn write( + &self, + _ctx: &Context, + _inode: Inode, + _handle: Handle, + _r: &mut dyn ZeroCopyReader, + _size: u32, + _offset: u64, + _lock_owner: Option, + _delayed_write: bool, + _flags: u32, + _fuse_flags: u32, + ) -> io::Result { + Err(eacces!("Write request is not allowed in blobfs")) + } + + fn flush( + &self, + _ctx: &Context, + inode: Inode, + handle: Handle, + _lock_owner: u64, + ) -> io::Result<()> { + self.pfs.flush(_ctx, inode, handle, _lock_owner) + } + + fn fsync( + &self, + _ctx: &Context, + inode: Inode, + datasync: bool, + handle: Handle, + ) -> io::Result<()> { + self.pfs.fsync(_ctx, inode, datasync, handle) + } + + fn fallocate( + &self, + _ctx: &Context, + _inode: Inode, + _handle: Handle, + _mode: u32, + _offset: u64, + _length: u64, + ) -> io::Result<()> { + Err(eacces!("Fallocate request is not allowed in blobfs")) + } + + fn release( + &self, + _ctx: &Context, + inode: Inode, + _flags: u32, + handle: Handle, + _flush: bool, + _flock_release: bool, + _lock_owner: Option, + ) -> io::Result<()> { + self.pfs.release( + _ctx, + inode, + _flags, + handle, + _flush, + _flock_release, + _lock_owner, + ) + } + + fn statfs(&self, _ctx: &Context, inode: Inode) -> io::Result { + self.pfs.statfs(_ctx, inode) + } + + fn setxattr( + &self, + _ctx: &Context, + _inode: Inode, + _name: &CStr, + _value: &[u8], + _flags: u32, + ) -> io::Result<()> { + Err(eacces!("Setxattr request is not allowed in blobfs")) + } + + fn getxattr( + &self, + _ctx: &Context, + inode: Inode, + name: &CStr, + size: u32, + ) -> io::Result { + self.pfs.getxattr(_ctx, inode, name, size) + } + + fn listxattr(&self, _ctx: &Context, inode: Inode, size: u32) -> io::Result { + self.pfs.listxattr(_ctx, inode, size) + } + + fn removexattr(&self, _ctx: &Context, _inode: Inode, _name: &CStr) -> io::Result<()> { + Err(eacces!("Removexattr request is not allowed in blobfs")) + } + + fn opendir( + &self, + _ctx: &Context, + inode: Inode, + flags: u32, + ) -> io::Result<(Option, OpenOptions)> { + self.pfs.opendir(_ctx, inode, flags) + } + + fn readdir( + &self, + _ctx: &Context, + inode: Inode, + handle: Handle, + size: u32, + offset: u64, + add_entry: &mut dyn FnMut(DirEntry) -> io::Result, + ) -> io::Result<()> { + self.pfs + .readdir(_ctx, inode, handle, size, offset, add_entry) + } + + fn readdirplus( + &self, + _ctx: &Context, + inode: Inode, + handle: Handle, + size: u32, + offset: u64, + add_entry: &mut dyn FnMut(DirEntry, Entry) -> io::Result, + ) -> io::Result<()> { + self.pfs + .readdirplus(_ctx, inode, handle, size, offset, add_entry) + } + + fn fsyncdir( + &self, + ctx: &Context, + inode: Inode, + datasync: bool, + handle: Handle, + ) -> io::Result<()> { + self.pfs.fsyncdir(ctx, inode, datasync, handle) + } + + fn releasedir( + &self, + _ctx: &Context, + inode: Inode, + _flags: u32, + handle: Handle, + ) -> io::Result<()> { + self.pfs.releasedir(_ctx, inode, _flags, handle) + } + + fn setupmapping( + &self, + _ctx: &Context, + inode: Inode, + _handle: Handle, + foffset: u64, + len: u64, + flags: u64, + moffset: u64, + vu_req: &mut dyn FsCacheReqHandler, + ) -> io::Result<()> { + if (flags & virtio_fs::SetupmappingFlags::WRITE.bits()) != 0 { + return Err(eacces!("blob file cannot write in dax")); + } + if foffset.checked_add(len).is_none() || foffset + len > u64::MAX - MAPPING_UNIT_SIZE { + return Err(einval!(format!( + "blobfs: invalid offset 0x{:x} and len 0x{:x}", + foffset, len + ))); + } + + let end = round_up(foffset + len, MAPPING_UNIT_SIZE); + let offset = round_down(foffset, MAPPING_UNIT_SIZE); + let len = end - offset; + self.load_chunks_on_demand(inode, offset, len)?; + + self.pfs + .setupmapping(_ctx, inode, _handle, foffset, len, flags, moffset, vu_req) + } + + fn removemapping( + &self, + _ctx: &Context, + _inode: Inode, + requests: Vec, + vu_req: &mut dyn FsCacheReqHandler, + ) -> io::Result<()> { + self.pfs.removemapping(_ctx, _inode, requests, vu_req) + } + + fn access(&self, ctx: &Context, inode: Inode, mask: u32) -> io::Result<()> { + self.pfs.access(ctx, inode, mask) + } + + fn lseek( + &self, + _ctx: &Context, + inode: Inode, + handle: Handle, + offset: u64, + whence: u32, + ) -> io::Result { + self.pfs.lseek(_ctx, inode, handle, offset, whence) + } +} diff --git a/rafs/src/fs.rs b/rafs/src/fs.rs index 280b946516b..0f33b135b15 100644 --- a/rafs/src/fs.rs +++ b/rafs/src/fs.rs @@ -1,1074 +1,1074 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 -// -// A container image Registry Acceleration File System. - -//! The Rafs API layer to glue fuse, storage backend and filesystem metadata together. -//! -//! This module is core to glue fuse, filesystem format and storage backend. The main API provided -//! by this module is the [Rafs](struct.Rafs.html) structures, which implements the -//! `fuse_backend_rs::FileSystem` trait, so an instance of [Rafs] could be registered to a fuse -//! backend server. A [Rafs] instance receives fuse requests from a fuse backend server, parsing -//! the request and filesystem metadata, and eventually ask the storage backend to fetch requested -//! data. There are also [FsPrefetchControl](struct.FsPrefetchControl.html) and -//! [RafsConfig](struct.RafsConfig.html) to configure an [Rafs] instance. - -use std::any::Any; -use std::cmp; -use std::ffi::{CStr, OsStr, OsString}; -use std::io::Result; -use std::ops::Deref; -use std::os::unix::ffi::OsStrExt; -use std::path::{Path, PathBuf}; -use std::sync::Arc; -use std::time::{Duration, SystemTime}; - -use fuse_backend_rs::abi::fuse_abi::Attr; -use fuse_backend_rs::abi::fuse_abi::{stat64, statvfs64}; -use fuse_backend_rs::api::filesystem::*; -use fuse_backend_rs::api::BackendFileSystem; -use nix::unistd::{getegid, geteuid}; - -use nydus_api::ConfigV2; -use nydus_storage::device::{BlobDevice, BlobIoVec, BlobPrefetchRequest}; -use nydus_storage::{RAFS_DEFAULT_CHUNK_SIZE, RAFS_MAX_CHUNK_SIZE}; -use nydus_utils::{ - div_round_up, - metrics::{self, FopRecorder, StatsFop::*}, -}; - -use crate::metadata::{ - Inode, RafsInode, RafsInodeWalkAction, RafsSuper, RafsSuperMeta, DOT, DOTDOT, -}; -use crate::{RafsError, RafsIoReader, RafsResult}; - -/// Type of RAFS fuse handle. -pub type Handle = u64; - -/// Rafs default attribute timeout value. -pub const RAFS_DEFAULT_ATTR_TIMEOUT: u64 = 1 << 32; -/// Rafs default entry timeout value. -pub const RAFS_DEFAULT_ENTRY_TIMEOUT: u64 = RAFS_DEFAULT_ATTR_TIMEOUT; - -/// Struct to glue fuse, storage backend and filesystem metadata together. -/// -/// The [Rafs](struct.Rafs.html) structure implements the `fuse_backend_rs::FileSystem` trait, -/// so an instance of [Rafs] could be registered to a fuse backend server. A [Rafs] instance -/// receives fuse requests from a fuse backend server, parsing the request and filesystem metadata, -/// and eventually ask the storage backend to fetch requested data. -pub struct Rafs { - id: String, - device: BlobDevice, - ios: Arc, - sb: Arc, - - initialized: bool, - digest_validate: bool, - fs_prefetch: bool, - prefetch_all: bool, - xattr_enabled: bool, - user_io_batch_size: u32, - - // static inode attributes - i_uid: u32, - i_gid: u32, - i_time: u64, -} - -impl Rafs { - /// Create a new instance of `Rafs`. - pub fn new(cfg: &Arc, id: &str, path: &Path) -> RafsResult<(Self, RafsIoReader)> { - // Assume all meta/data blobs are accessible, otherwise it will always cause IO errors. - cfg.internal.set_blob_accessible(true); - - let cache_cfg = cfg.get_cache_config().map_err(RafsError::LoadConfig)?; - let rafs_cfg = cfg.get_rafs_config().map_err(RafsError::LoadConfig)?; - let (sb, reader) = RafsSuper::load_from_file(path, cfg.clone(), false) - .map_err(RafsError::FillSuperBlock)?; - let blob_infos = sb.superblock.get_blob_infos(); - let device = BlobDevice::new(cfg, &blob_infos).map_err(RafsError::CreateDevice)?; - - if cfg.is_chunk_validation_enabled() && sb.meta.has_inlined_chunk_digest() { - sb.superblock.set_blob_device(device.clone()); - } - - let rafs = Rafs { - id: id.to_string(), - device, - ios: metrics::FsIoStats::new(id), - sb: Arc::new(sb), - - initialized: false, - digest_validate: rafs_cfg.validate, - fs_prefetch: rafs_cfg.prefetch.enable, - user_io_batch_size: rafs_cfg.user_io_batch_size as u32, - prefetch_all: rafs_cfg.prefetch.prefetch_all, - xattr_enabled: rafs_cfg.enable_xattr, - - i_uid: geteuid().into(), - i_gid: getegid().into(), - i_time: SystemTime::now() - .duration_since(SystemTime::UNIX_EPOCH) - .unwrap() - .as_secs(), - }; - - // Rafs v6 does must store chunk info into local file cache. So blob cache is required - if rafs.metadata().is_v6() { - if cache_cfg.cache_type != "blobcache" && cache_cfg.cache_type != "filecache" { - return Err(RafsError::Configure( - "Rafs v6 must have local blobcache configured".to_string(), - )); - } - - if rafs_cfg.validate { - return Err(RafsError::Configure( - "Rafs v6 doesn't support integrity validation yet".to_string(), - )); - } - } - - rafs.ios.toggle_files_recording(rafs_cfg.iostats_files); - rafs.ios.toggle_access_pattern(rafs_cfg.access_pattern); - rafs.ios - .toggle_latest_read_files_recording(rafs_cfg.latest_read_files); - - Ok((rafs, reader)) - } - - /// Update storage backend for blobs. - pub fn update(&self, r: &mut RafsIoReader, conf: &Arc) -> RafsResult<()> { - info!("update"); - if !self.initialized { - warn!("Rafs is not yet initialized"); - return Err(RafsError::Uninitialized); - } - - // TODO: seems no need to do self.sb.update() - // step 1: update sb. - // No lock is needed thanks to ArcSwap. - self.sb.update(r).map_err(|e| { - error!("update failed due to {:?}", e); - e - })?; - info!("update sb is successful"); - - // step 2: update device (only localfs is supported) - let blob_infos = self.sb.superblock.get_blob_infos(); - self.device - .update(conf, &blob_infos, self.fs_prefetch) - .map_err(RafsError::SwapBackend)?; - info!("update device is successful"); - - Ok(()) - } - - /// Import an rafs bootstrap to initialize the filesystem instance. - pub fn import( - &mut self, - r: RafsIoReader, - prefetch_files: Option>, - ) -> RafsResult<()> { - if self.initialized { - return Err(RafsError::AlreadyMounted); - } - if self.fs_prefetch { - // Device should be ready before any prefetch. - self.device.start_prefetch(); - self.prefetch(r, prefetch_files); - } - self.initialized = true; - - Ok(()) - } - - /// Umount a mounted Rafs Fuse filesystem. - pub fn destroy(&mut self) -> Result<()> { - info! {"Destroy rafs"} - - if self.initialized { - Arc::get_mut(&mut self.sb) - .expect("Superblock is no longer used") - .destroy(); - if self.fs_prefetch { - self.device.stop_prefetch(); - } - self.device.close()?; - self.initialized = false; - } - - Ok(()) - } - - /// Get id of the filesystem instance. - pub fn id(&self) -> &str { - &self.id - } - - /// Get the cached file system super block metadata. - pub fn metadata(&self) -> &RafsSuperMeta { - &self.sb.meta - } - - fn xattr_supported(&self) -> bool { - self.xattr_enabled || self.sb.meta.has_xattr() - } - - fn do_readdir( - &self, - ino: Inode, - size: u32, - offset: u64, - add_entry: &mut dyn FnMut(DirEntry) -> Result, - ) -> Result<()> { - if size == 0 { - return Ok(()); - } - - let parent = self.sb.get_inode(ino, self.digest_validate)?; - if !parent.is_dir() { - return Err(enotdir!()); - } - - let mut handler = |_inode, name: OsString, ino, offset| { - match add_entry(DirEntry { - ino, - offset, - type_: 0, - name: name.as_os_str().as_bytes(), - }) { - Ok(0) => { - self.ios.new_file_counter(ino); - Ok(RafsInodeWalkAction::Break) - } - Ok(_) => { - self.ios.new_file_counter(ino); - Ok(RafsInodeWalkAction::Continue) - } // TODO: should we check `size` here? - Err(e) => Err(e), - } - }; - - parent.walk_children_inodes(offset, &mut handler)?; - - Ok(()) - } - - fn negative_entry(&self) -> Entry { - Entry { - attr: Attr { - ..Default::default() - } - .into(), - inode: 0, - generation: 0, - attr_flags: 0, - attr_timeout: self.sb.meta.attr_timeout, - entry_timeout: self.sb.meta.entry_timeout, - } - } - - fn get_inode_attr(&self, ino: u64) -> Result { - let inode = self.sb.get_inode(ino, false)?; - let mut attr = inode.get_attr(); - - // override uid/gid if there is no explicit inode uid/gid - if !self.sb.meta.explicit_uidgid() { - attr.uid = self.i_uid; - attr.gid = self.i_gid; - } - - // Older rafs image or the root inode doesn't include mtime, in such cases - // we use runtime timestamp. - if attr.mtime == 0 { - attr.atime = self.i_time; - attr.ctime = self.i_time; - attr.mtime = self.i_time; - } - - // Only touch permissions bits. This trick is some sort of workaround - // since nydusify gives root directory permission of 0o750 and fuse mount - // options `rootmode=` does not affect root directory's permission bits, ending - // up with preventing other users from accessing the container rootfs. - let root_ino = self.root_ino(); - if attr.ino == root_ino { - attr.mode = attr.mode & !0o777 | 0o755; - } - - Ok(attr) - } - - fn get_inode_entry>(&self, inode: I) -> Entry { - let mut entry = inode.get_entry(); - - // override uid/gid if there is no explicit inode uid/gid - if !self.sb.meta.explicit_uidgid() { - entry.attr.st_uid = self.i_uid; - entry.attr.st_gid = self.i_gid; - } - - // Older rafs image doesn't include mtime, in such case we use runtime timestamp. - if entry.attr.st_mtime == 0 { - entry.attr.st_atime = self.i_time as i64; - entry.attr.st_ctime = self.i_time as i64; - entry.attr.st_mtime = self.i_time as i64; - } - - // Only touch permissions bits. This trick is some sort of workaround - // since nydusify gives root directory permission of 0o750 and fuse mount - // options `rootmode=` does not affect root directory's permission bits, ending - // up with preventing other users from accessing the container rootfs. - if entry.inode == ROOT_ID { - entry.attr.st_mode = entry.attr.st_mode & !0o777 | 0o755; - } - - entry - } -} - -impl Rafs { - fn prefetch(&self, reader: RafsIoReader, prefetch_files: Option>) { - let sb = self.sb.clone(); - let device = self.device.clone(); - let prefetch_all = self.prefetch_all; - let root_ino = self.root_ino(); - - let _ = std::thread::spawn(move || { - Self::do_prefetch(root_ino, reader, prefetch_files, prefetch_all, sb, device); - }); - } - - /// for blobfs - pub fn fetch_range_synchronous(&self, prefetches: &[BlobPrefetchRequest]) -> Result<()> { - self.device.fetch_range_synchronous(prefetches) - } - - fn root_ino(&self) -> u64 { - self.sb.superblock.root_ino() - } - - fn do_prefetch( - root_ino: u64, - mut reader: RafsIoReader, - prefetch_files: Option>, - prefetch_all: bool, - sb: Arc, - device: BlobDevice, - ) { - let blob_infos = sb.superblock.get_blob_infos(); - - // First do range based prefetch for rafs v6. - if sb.meta.is_v6() { - let mut prefetches = Vec::new(); - - for blob in &blob_infos { - let sz = blob.prefetch_size(); - if sz > 0 { - let mut offset = 0; - while offset < sz { - let len = cmp::min(sz - offset, RAFS_DEFAULT_CHUNK_SIZE); - prefetches.push(BlobPrefetchRequest { - blob_id: blob.blob_id().to_owned(), - offset, - len, - }); - offset += len; - } - } - } - if !prefetches.is_empty() { - device.prefetch(&[], &prefetches).unwrap_or_else(|e| { - warn!("Prefetch error, {:?}", e); - }); - } - } - - let fetcher = |desc: &mut BlobIoVec, last: bool| { - if desc.size() as u64 > RAFS_MAX_CHUNK_SIZE - || desc.len() > 1024 - || (last && desc.size() > 0) - { - trace!( - "fs prefetch: 0x{:x} bytes for {} descriptors", - desc.size(), - desc.len() - ); - device.prefetch(&[desc], &[]).unwrap_or_else(|e| { - warn!("Prefetch error, {:?}", e); - }); - desc.reset(); - } - }; - - // Bootstrap has non-empty prefetch table indicating a full prefetch - let inlay_prefetch_all = sb - .is_inlay_prefetch_all(&mut reader) - .map_err(|e| error!("Detect prefetch table error {}", e)) - .unwrap_or_default(); - - // Nydusd has a CLI option indicating a full prefetch - let startup_prefetch_all = prefetch_files - .as_ref() - .map(|f| f.len() == 1 && f[0].as_os_str() == "/") - .unwrap_or(false); - - let mut ignore_prefetch_all = false; - - // User specified prefetch files have high priority to be prefetched. - // Moreover, user specified prefetch files list will override those on-disk prefetch table. - if !startup_prefetch_all && !inlay_prefetch_all { - // Then do file based prefetch based on: - // - prefetch listed passed in by user - // - or file prefetch list in metadata - let inodes = prefetch_files.map(|files| Self::convert_file_list(&files, &sb)); - let res = sb.prefetch_files(&device, &mut reader, root_ino, inodes, &fetcher); - match res { - Ok(true) => { - ignore_prefetch_all = true; - info!("Root inode was found, but it should not prefetch all files!") - } - Ok(false) => {} - Err(e) => info!("No file to be prefetched {:?}", e), - } - } - - // Perform different policy for v5 format and v6 format as rafs v6's blobs are capable to - // download chunks and decompress them all by themselves. For rafs v6, directly perform - // chunk based full prefetch - if !ignore_prefetch_all && (inlay_prefetch_all || prefetch_all || startup_prefetch_all) { - if sb.meta.is_v6() { - // The larger batch size, the fewer requests to registry - let batch_size = 1024 * 1024 * 2; - - for blob in &blob_infos { - let blob_size = blob.compressed_data_size(); - let count = div_round_up(blob_size, batch_size); - - let mut pre_offset = 0u64; - - for _i in 0..count { - let req = BlobPrefetchRequest { - blob_id: blob.blob_id().to_owned(), - offset: pre_offset, - len: cmp::min(batch_size, blob_size - pre_offset), - }; - device - .prefetch(&[], &[req]) - .map_err(|e| warn!("failed to prefetch blob data, {}", e)) - .unwrap_or_default(); - pre_offset += batch_size; - if pre_offset > blob_size { - break; - } - } - } - } else { - let root = vec![root_ino]; - let res = sb.prefetch_files(&device, &mut reader, root_ino, Some(root), &fetcher); - if let Err(e) = res { - info!("No file to be prefetched {:?}", e); - } - } - } - } - - fn convert_file_list(files: &[PathBuf], sb: &Arc) -> Vec { - let mut inodes = Vec::::with_capacity(files.len()); - - for f in files { - if let Ok(inode) = sb.ino_from_path(f.as_path()) { - inodes.push(inode); - } - } - - inodes - } -} - -impl BackendFileSystem for Rafs { - fn mount(&self) -> Result<(Entry, u64)> { - let root_inode = self.sb.get_inode(self.root_ino(), self.digest_validate)?; - self.ios.new_file_counter(root_inode.ino()); - let e = self.get_inode_entry(root_inode); - Ok((e, self.sb.get_max_ino())) - } - - fn as_any(&self) -> &dyn Any { - self - } -} - -impl FileSystem for Rafs { - type Inode = Inode; - type Handle = Handle; - - #[cfg(target_os = "macos")] - fn init(&self, _opts: FsOptions) -> Result { - Ok( - // These fuse features are supported by rafs by default. - FsOptions::ASYNC_READ | FsOptions::BIG_WRITES | FsOptions::ATOMIC_O_TRUNC, - ) - } - - #[cfg(target_os = "linux")] - fn init(&self, _opts: FsOptions) -> Result { - Ok( - // These fuse features are supported by rafs by default. - FsOptions::ASYNC_READ - | FsOptions::PARALLEL_DIROPS - | FsOptions::BIG_WRITES - | FsOptions::HANDLE_KILLPRIV - | FsOptions::ASYNC_DIO - | FsOptions::HAS_IOCTL_DIR - | FsOptions::WRITEBACK_CACHE - | FsOptions::ZERO_MESSAGE_OPEN - | FsOptions::ATOMIC_O_TRUNC - | FsOptions::CACHE_SYMLINKS - | FsOptions::ZERO_MESSAGE_OPENDIR, - ) - } - - fn destroy(&self) {} - - fn lookup(&self, _ctx: &Context, ino: u64, name: &CStr) -> Result { - let mut rec = FopRecorder::settle(Lookup, ino, &self.ios); - let target = OsStr::from_bytes(name.to_bytes()); - let parent = self.sb.get_inode(ino, self.digest_validate)?; - if !parent.is_dir() { - return Err(enotdir!()); - } - - rec.mark_success(0); - if target == DOT || (ino == ROOT_ID && target == DOTDOT) { - let mut entry = self.get_inode_entry(parent); - entry.inode = ino; - Ok(entry) - } else if target == DOTDOT { - let parent = self.sb.get_extended_inode(parent.ino(), false)?; - Ok(self - .sb - .get_inode(parent.parent(), self.digest_validate) - .map(|i| self.get_inode_entry(i)) - .unwrap_or_else(|_| self.negative_entry())) - } else { - Ok(parent - .get_child_by_name(target) - .map(|i| { - self.ios.new_file_counter(i.ino()); - self.get_inode_entry(i.as_inode()) - }) - .unwrap_or_else(|_| self.negative_entry())) - } - } - - fn forget(&self, _ctx: &Context, _inode: u64, _count: u64) {} - - fn batch_forget(&self, ctx: &Context, requests: Vec<(u64, u64)>) { - for (inode, count) in requests { - self.forget(ctx, inode, count) - } - } - - fn getattr( - &self, - _ctx: &Context, - ino: u64, - _handle: Option, - ) -> Result<(stat64, Duration)> { - let mut recorder = FopRecorder::settle(Getattr, ino, &self.ios); - - let attr = self.get_inode_attr(ino).map(|r| { - recorder.mark_success(0); - r - })?; - - Ok((attr.into(), self.sb.meta.attr_timeout)) - } - - fn readlink(&self, _ctx: &Context, ino: u64) -> Result> { - let mut rec = FopRecorder::settle(Readlink, ino, &self.ios); - let inode = self.sb.get_inode(ino, self.digest_validate)?; - - Ok(inode - .get_symlink() - .map(|r| { - rec.mark_success(0); - r - })? - .as_bytes() - .to_vec()) - } - - #[allow(clippy::too_many_arguments)] - fn read( - &self, - _ctx: &Context, - ino: u64, - _handle: u64, - w: &mut dyn ZeroCopyWriter, - size: u32, - offset: u64, - _lock_owner: Option, - _flags: u32, - ) -> Result { - if offset.checked_add(size as u64).is_none() { - return Err(einval!("offset + size wraps around.")); - } - - let inode = self.sb.get_inode(ino, false)?; - let inode_size = inode.size(); - let mut recorder = FopRecorder::settle(Read, ino, &self.ios); - // Check for zero size read. - if size == 0 || offset >= inode_size { - recorder.mark_success(0); - return Ok(0); - } - - let real_size = cmp::min(size as u64, inode_size - offset); - let mut result = 0; - let mut io_vecs = inode.alloc_bio_vecs(&self.device, offset, real_size as usize, true)?; - assert!(!io_vecs.is_empty() && !io_vecs[0].is_empty()); - - // Try to amplify user io for Rafs v5, to improve performance. - let user_io_batch_size = - cmp::min(self.user_io_batch_size as usize, w.available_bytes()) as u32; - if self.sb.meta.is_v5() && size < user_io_batch_size { - let all_chunks_ready = self.device.all_chunks_ready(&io_vecs); - if !all_chunks_ready { - let chunk_mask = self.metadata().chunk_size as u64 - 1; - let next_chunk_base = (offset + (size as u64) + chunk_mask) & !chunk_mask; - let window_base = cmp::min(next_chunk_base, inode_size); - let actual_size = window_base - (offset & !chunk_mask); - if actual_size < user_io_batch_size as u64 { - let window_size = user_io_batch_size as u64 - actual_size; - let orig_cnt = io_vecs.iter().fold(0, |s, d| s + d.len()); - self.sb.amplify_user_io( - &self.device, - user_io_batch_size, - &mut io_vecs, - &inode, - window_base, - window_size, - )?; - let new_cnt = io_vecs.iter().fold(0, |s, d| s + d.len()); - trace!( - "amplify RAFS v5 read from {} to {} chunks", - orig_cnt, - new_cnt - ); - } - } - } - - let start = self.ios.latency_start(); - for io_vec in io_vecs.iter_mut() { - assert!(!io_vec.is_empty()); - assert_ne!(io_vec.size(), 0); - - // Avoid copying `desc` - let r = self.device.read_to(w, io_vec)?; - result += r; - recorder.mark_success(r); - if r as u64 != io_vec.size() { - break; - } - } - self.ios.latency_end(&start, Read); - - Ok(result) - } - - fn open( - &self, - _ctx: &Context, - _inode: Self::Inode, - _flags: u32, - _fuse_flags: u32, - ) -> Result<(Option, OpenOptions, Option)> { - // Keep cache since we are readonly - Ok((None, OpenOptions::KEEP_CACHE, None)) - } - - fn release( - &self, - _ctx: &Context, - _inode: u64, - _flags: u32, - _handle: u64, - _flush: bool, - _flock_release: bool, - _lock_owner: Option, - ) -> Result<()> { - Ok(()) - } - - fn statfs(&self, _ctx: &Context, _inode: u64) -> Result { - // Safe because we are zero-initializing a struct with only POD fields. - let mut st: statvfs64 = unsafe { std::mem::zeroed() }; - - // This matches the behavior of libfuse as it returns these values if the - // filesystem doesn't implement this method. - st.f_namemax = 255; - st.f_bsize = 512; - st.f_fsid = self.sb.meta.magic as u64; - #[cfg(target_os = "macos")] - { - st.f_files = self.sb.meta.inodes_count as u32; - } - - #[cfg(target_os = "linux")] - { - st.f_files = self.sb.meta.inodes_count; - } - - Ok(st) - } - - fn getxattr( - &self, - _ctx: &Context, - inode: u64, - name: &CStr, - size: u32, - ) -> Result { - let mut recorder = FopRecorder::settle(Getxattr, inode, &self.ios); - - if !self.xattr_supported() { - return Err(std::io::Error::from_raw_os_error(libc::ENOSYS)); - } - - let name = OsStr::from_bytes(name.to_bytes()); - let inode = self.sb.get_inode(inode, false)?; - let value = inode.get_xattr(name)?; - let r = match value { - Some(value) => match size { - 0 => Ok(GetxattrReply::Count((value.len() + 1) as u32)), - x if x < value.len() as u32 => Err(std::io::Error::from_raw_os_error(libc::ERANGE)), - _ => Ok(GetxattrReply::Value(value)), - }, - None => { - // TODO: Hopefully, we can have a 'decorator' procedure macro in - // the future to wrap this method thus to handle different reasonable - // errors in a clean way. - recorder.mark_success(0); - Err(std::io::Error::from_raw_os_error(libc::ENODATA)) - } - }; - - r.map(|v| { - recorder.mark_success(0); - v - }) - } - - fn listxattr(&self, _ctx: &Context, inode: u64, size: u32) -> Result { - let mut rec = FopRecorder::settle(Listxattr, inode, &self.ios); - if !self.xattr_supported() { - return Err(std::io::Error::from_raw_os_error(libc::ENOSYS)); - } - - let inode = self.sb.get_inode(inode, false)?; - let mut count = 0; - let mut buf = Vec::new(); - for mut name in inode.get_xattrs()? { - count += name.len() + 1; - if size != 0 { - buf.append(&mut name); - buf.append(&mut vec![0u8; 1]); - } - } - - rec.mark_success(0); - - match size { - 0 => Ok(ListxattrReply::Count(count as u32)), - x if x < count as u32 => Err(std::io::Error::from_raw_os_error(libc::ERANGE)), - _ => Ok(ListxattrReply::Names(buf)), - } - } - - fn readdir( - &self, - _ctx: &Context, - inode: u64, - _handle: u64, - size: u32, - offset: u64, - add_entry: &mut dyn FnMut(DirEntry) -> Result, - ) -> Result<()> { - let mut rec = FopRecorder::settle(Readdir, inode, &self.ios); - - self.do_readdir(inode, size, offset, add_entry).map(|r| { - rec.mark_success(0); - r - }) - } - - fn readdirplus( - &self, - _ctx: &Context, - ino: u64, - _handle: u64, - size: u32, - offset: u64, - add_entry: &mut dyn FnMut(DirEntry, Entry) -> Result, - ) -> Result<()> { - let mut rec = FopRecorder::settle(Readdirplus, ino, &self.ios); - - self.do_readdir(ino, size, offset, &mut |dir_entry| { - let inode = self.sb.get_inode(dir_entry.ino, self.digest_validate)?; - add_entry(dir_entry, self.get_inode_entry(inode)) - }) - .map(|r| { - rec.mark_success(0); - r - }) - } - - fn opendir( - &self, - _ctx: &Context, - _inode: Self::Inode, - _flags: u32, - ) -> Result<(Option, OpenOptions)> { - // Cache dir since we are readonly - #[cfg(target_os = "macos")] - return Ok((None, OpenOptions::KEEP_CACHE)); - #[cfg(target_os = "linux")] - return Ok((None, OpenOptions::CACHE_DIR | OpenOptions::KEEP_CACHE)); - } - - fn releasedir(&self, _ctx: &Context, _inode: u64, _flags: u32, _handle: u64) -> Result<()> { - Ok(()) - } - - fn access(&self, ctx: &Context, ino: u64, mask: u32) -> Result<()> { - let mut rec = FopRecorder::settle(Access, ino, &self.ios); - let st = self.get_inode_attr(ino)?; - let mode = mask as i32 & (libc::R_OK | libc::W_OK | libc::X_OK); - - if mode == libc::F_OK { - rec.mark_success(0); - return Ok(()); - } - - if (mode & libc::R_OK) != 0 - && ctx.uid != 0 - && (st.uid != ctx.uid || st.mode & 0o400 == 0) - && (st.gid != ctx.gid || st.mode & 0o040 == 0) - && st.mode & 0o004 == 0 - { - return Err(eacces!("permission denied")); - } - - if (mode & libc::W_OK) != 0 - && ctx.uid != 0 - && (st.uid != ctx.uid || st.mode & 0o200 == 0) - && (st.gid != ctx.gid || st.mode & 0o020 == 0) - && st.mode & 0o002 == 0 - { - return Err(eacces!("permission denied")); - } - - // root can only execute something if it is executable by one of the owner, the group, or - // everyone. - if (mode & libc::X_OK) != 0 - && (ctx.uid != 0 || st.mode & 0o111 == 0) - && (st.uid != ctx.uid || st.mode & 0o100 == 0) - && (st.gid != ctx.gid || st.mode & 0o010 == 0) - && st.mode & 0o001 == 0 - { - return Err(eacces!("permission denied")); - } - - rec.mark_success(0); - Ok(()) - } -} - -#[cfg(target_os = "linux")] -// Let Rafs works as an OverlayFs layer. -impl Layer for Rafs { - fn root_inode(&self) -> Self::Inode { - self.root_ino() - } -} - -#[cfg(all(test, feature = "backend-oss"))] -pub(crate) mod tests { - use super::*; - use std::str::FromStr; - - pub fn new_rafs_backend() -> Box { - let config = r#" - version = 2 - id = "test" - [backend] - type = "oss" - [backend.oss] - endpoint = "test" - access_key_id = "test" - access_key_secret = "test" - bucket_name = "antsys-nydus" - object_prefix = "nydus_v2/" - scheme = "http" - [cache] - type = "filecache" - [cache.filecache] - work_dir = "." - [rafs] - mode = "direct" - validate = false - enable_xattr = true - [rafs.prefetch] - enable = true - threads = 10 - batch_size = 131072 - bandwidth_limit = 10485760 - "#; - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let mut source_path = PathBuf::from(root_dir); - source_path.push("../tests/texture/bootstrap/rafs-v5.boot"); - let mountpoint = "/mnt"; - let config = Arc::new(ConfigV2::from_str(config).unwrap()); - let bootstrapfile = source_path.to_str().unwrap(); - let (mut rafs, reader) = Rafs::new(&config, mountpoint, Path::new(bootstrapfile)).unwrap(); - rafs.import(reader, Some(vec![std::path::PathBuf::new()])) - .unwrap(); - Box::new(rafs) - } - - #[test] - fn it_should_create_new_rafs_fs() { - let rafs = new_rafs_backend(); - let attr = rafs.get_inode_attr(1).unwrap(); - assert_eq!(attr.ino, 1); - assert_eq!(attr.blocks, 8); - assert_eq!(attr.uid, 0); - // Root inode mode must be 0755 - assert_eq!(attr.mode & 0o777, 0o755); - } - - #[test] - fn it_should_access() { - let rafs = new_rafs_backend(); - let ctx = &Context { - gid: 0, - pid: 1, - uid: 0, - }; - if rafs.access(ctx, 1, 0).is_err() { - panic!("failed to access inode 1"); - } - } - - #[test] - fn it_should_listxattr() { - let rafs = new_rafs_backend(); - let ctx = &Context { - gid: 0, - pid: 1, - uid: 0, - }; - match rafs.listxattr(ctx, 1, 0) { - Ok(reply) => match reply { - ListxattrReply::Count(c) => assert_eq!(c, 0), - _ => panic!(), - }, - Err(_) => panic!("failed to access inode 1"), - } - } - - #[test] - fn it_should_get_statfs() { - let rafs = new_rafs_backend(); - let ctx = &Context { - gid: 0, - pid: 1, - uid: 0, - }; - match rafs.statfs(ctx, 1) { - Ok(statfs) => { - assert_eq!(statfs.f_files, 43082); - assert_eq!(statfs.f_bsize, 512); - assert_eq!(statfs.f_namemax, 255); - assert_eq!(statfs.f_fsid, 1380009555); - assert_eq!(statfs.f_ffree, 0); - } - Err(_) => panic!("failed to statfs"), - } - } - - #[test] - fn it_should_enable_xattr() { - let rafs = new_rafs_backend(); - assert!(rafs.xattr_enabled); - assert!(rafs.xattr_supported()); - } - - #[test] - fn it_should_lookup_entry() { - let rafs = new_rafs_backend(); - let ctx = &Context { - gid: 0, - pid: 1, - uid: 0, - }; - match rafs.lookup(ctx, 1, &std::ffi::CString::new("/etc").unwrap()) { - Err(_e) => { - panic!("failed to lookup /etc from ino 1"); - } - Ok(e) => { - assert_eq!(e.inode, 0); - } - } - } -} - -#[cfg(test)] -mod tests { - use nydus_utils::metrics::FsIoStats; - - use super::*; - #[test] - fn test_rafs() { - let rafs = Rafs { - id: "foo".into(), - device: BlobDevice::default(), - ios: FsIoStats::default().into(), - sb: Arc::new(RafsSuper::default()), - initialized: false, - digest_validate: false, - fs_prefetch: false, - prefetch_all: false, - xattr_enabled: false, - user_io_batch_size: 0, - i_uid: 0, - i_gid: 0, - i_time: 0, - }; - assert_eq!(rafs.id(), "foo"); - assert!(!rafs.xattr_supported()); - let ent = rafs.negative_entry(); - assert_eq!(ent.inode, 0); - assert_eq!(ent.generation, 0); - assert_eq!(ent.attr_flags, 0); - #[cfg(target_os = "linux")] - rafs.init(FsOptions::ASYNC_DIO).unwrap(); - rafs.open(&Context::default(), Inode::default(), 0, 0) - .unwrap(); - rafs.release( - &Context::default(), - Inode::default(), - 0, - Handle::default(), - false, - false, - Some(0), - ) - .unwrap(); - rafs.statfs(&Context::default(), Inode::default()).unwrap(); - rafs.destroy(); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 +// +// A container image Registry Acceleration File System. + +//! The Rafs API layer to glue fuse, storage backend and filesystem metadata together. +//! +//! This module is core to glue fuse, filesystem format and storage backend. The main API provided +//! by this module is the [Rafs](struct.Rafs.html) structures, which implements the +//! `fuse_backend_rs::FileSystem` trait, so an instance of [Rafs] could be registered to a fuse +//! backend server. A [Rafs] instance receives fuse requests from a fuse backend server, parsing +//! the request and filesystem metadata, and eventually ask the storage backend to fetch requested +//! data. There are also [FsPrefetchControl](struct.FsPrefetchControl.html) and +//! [RafsConfig](struct.RafsConfig.html) to configure an [Rafs] instance. + +use std::any::Any; +use std::cmp; +use std::ffi::{CStr, OsStr, OsString}; +use std::io::Result; +use std::ops::Deref; +use std::os::unix::ffi::OsStrExt; +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use std::time::{Duration, SystemTime}; + +use fuse_backend_rs::abi::fuse_abi::Attr; +use fuse_backend_rs::abi::fuse_abi::{stat64, statvfs64}; +use fuse_backend_rs::api::filesystem::*; +use fuse_backend_rs::api::BackendFileSystem; +use nix::unistd::{getegid, geteuid}; + +use nydus_api::ConfigV2; +use nydus_storage::device::{BlobDevice, BlobIoVec, BlobPrefetchRequest}; +use nydus_storage::{RAFS_DEFAULT_CHUNK_SIZE, RAFS_MAX_CHUNK_SIZE}; +use nydus_utils::{ + div_round_up, + metrics::{self, FopRecorder, StatsFop::*}, +}; + +use crate::metadata::{ + Inode, RafsInode, RafsInodeWalkAction, RafsSuper, RafsSuperMeta, DOT, DOTDOT, +}; +use crate::{RafsError, RafsIoReader, RafsResult}; + +/// Type of RAFS fuse handle. +pub type Handle = u64; + +/// Rafs default attribute timeout value. +pub const RAFS_DEFAULT_ATTR_TIMEOUT: u64 = 1 << 32; +/// Rafs default entry timeout value. +pub const RAFS_DEFAULT_ENTRY_TIMEOUT: u64 = RAFS_DEFAULT_ATTR_TIMEOUT; + +/// Struct to glue fuse, storage backend and filesystem metadata together. +/// +/// The [Rafs](struct.Rafs.html) structure implements the `fuse_backend_rs::FileSystem` trait, +/// so an instance of [Rafs] could be registered to a fuse backend server. A [Rafs] instance +/// receives fuse requests from a fuse backend server, parsing the request and filesystem metadata, +/// and eventually ask the storage backend to fetch requested data. +pub struct Rafs { + id: String, + device: BlobDevice, + ios: Arc, + sb: Arc, + + initialized: bool, + digest_validate: bool, + fs_prefetch: bool, + prefetch_all: bool, + xattr_enabled: bool, + user_io_batch_size: u32, + + // static inode attributes + i_uid: u32, + i_gid: u32, + i_time: u64, +} + +impl Rafs { + /// Create a new instance of `Rafs`. + pub fn new(cfg: &Arc, id: &str, path: &Path) -> RafsResult<(Self, RafsIoReader)> { + // Assume all meta/data blobs are accessible, otherwise it will always cause IO errors. + cfg.internal.set_blob_accessible(true); + + let cache_cfg = cfg.get_cache_config().map_err(RafsError::LoadConfig)?; + let rafs_cfg = cfg.get_rafs_config().map_err(RafsError::LoadConfig)?; + let (sb, reader) = RafsSuper::load_from_file(path, cfg.clone(), false) + .map_err(RafsError::FillSuperBlock)?; + let blob_infos = sb.superblock.get_blob_infos(); + let device = BlobDevice::new(cfg, &blob_infos).map_err(RafsError::CreateDevice)?; + + if cfg.is_chunk_validation_enabled() && sb.meta.has_inlined_chunk_digest() { + sb.superblock.set_blob_device(device.clone()); + } + + let rafs = Rafs { + id: id.to_string(), + device, + ios: metrics::FsIoStats::new(id), + sb: Arc::new(sb), + + initialized: false, + digest_validate: rafs_cfg.validate, + fs_prefetch: rafs_cfg.prefetch.enable, + user_io_batch_size: rafs_cfg.user_io_batch_size as u32, + prefetch_all: rafs_cfg.prefetch.prefetch_all, + xattr_enabled: rafs_cfg.enable_xattr, + + i_uid: geteuid().into(), + i_gid: getegid().into(), + i_time: SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap() + .as_secs(), + }; + + // Rafs v6 does must store chunk info into local file cache. So blob cache is required + if rafs.metadata().is_v6() { + if cache_cfg.cache_type != "blobcache" && cache_cfg.cache_type != "filecache" { + return Err(RafsError::Configure( + "Rafs v6 must have local blobcache configured".to_string(), + )); + } + + if rafs_cfg.validate { + return Err(RafsError::Configure( + "Rafs v6 doesn't support integrity validation yet".to_string(), + )); + } + } + + rafs.ios.toggle_files_recording(rafs_cfg.iostats_files); + rafs.ios.toggle_access_pattern(rafs_cfg.access_pattern); + rafs.ios + .toggle_latest_read_files_recording(rafs_cfg.latest_read_files); + + Ok((rafs, reader)) + } + + /// Update storage backend for blobs. + pub fn update(&self, r: &mut RafsIoReader, conf: &Arc) -> RafsResult<()> { + info!("update"); + if !self.initialized { + warn!("Rafs is not yet initialized"); + return Err(RafsError::Uninitialized); + } + + // TODO: seems no need to do self.sb.update() + // step 1: update sb. + // No lock is needed thanks to ArcSwap. + self.sb.update(r).map_err(|e| { + error!("update failed due to {:?}", e); + e + })?; + info!("update sb is successful"); + + // step 2: update device (only localfs is supported) + let blob_infos = self.sb.superblock.get_blob_infos(); + self.device + .update(conf, &blob_infos, self.fs_prefetch) + .map_err(RafsError::SwapBackend)?; + info!("update device is successful"); + + Ok(()) + } + + /// Import an rafs bootstrap to initialize the filesystem instance. + pub fn import( + &mut self, + r: RafsIoReader, + prefetch_files: Option>, + ) -> RafsResult<()> { + if self.initialized { + return Err(RafsError::AlreadyMounted); + } + if self.fs_prefetch { + // Device should be ready before any prefetch. + self.device.start_prefetch(); + self.prefetch(r, prefetch_files); + } + self.initialized = true; + + Ok(()) + } + + /// Umount a mounted Rafs Fuse filesystem. + pub fn destroy(&mut self) -> Result<()> { + info! {"Destroy rafs"} + + if self.initialized { + Arc::get_mut(&mut self.sb) + .expect("Superblock is no longer used") + .destroy(); + if self.fs_prefetch { + self.device.stop_prefetch(); + } + self.device.close()?; + self.initialized = false; + } + + Ok(()) + } + + /// Get id of the filesystem instance. + pub fn id(&self) -> &str { + &self.id + } + + /// Get the cached file system super block metadata. + pub fn metadata(&self) -> &RafsSuperMeta { + &self.sb.meta + } + + fn xattr_supported(&self) -> bool { + self.xattr_enabled || self.sb.meta.has_xattr() + } + + fn do_readdir( + &self, + ino: Inode, + size: u32, + offset: u64, + add_entry: &mut dyn FnMut(DirEntry) -> Result, + ) -> Result<()> { + if size == 0 { + return Ok(()); + } + + let parent = self.sb.get_inode(ino, self.digest_validate)?; + if !parent.is_dir() { + return Err(enotdir!()); + } + + let mut handler = |_inode, name: OsString, ino, offset| { + match add_entry(DirEntry { + ino, + offset, + type_: 0, + name: name.as_os_str().as_bytes(), + }) { + Ok(0) => { + self.ios.new_file_counter(ino); + Ok(RafsInodeWalkAction::Break) + } + Ok(_) => { + self.ios.new_file_counter(ino); + Ok(RafsInodeWalkAction::Continue) + } // TODO: should we check `size` here? + Err(e) => Err(e), + } + }; + + parent.walk_children_inodes(offset, &mut handler)?; + + Ok(()) + } + + fn negative_entry(&self) -> Entry { + Entry { + attr: Attr { + ..Default::default() + } + .into(), + inode: 0, + generation: 0, + attr_flags: 0, + attr_timeout: self.sb.meta.attr_timeout, + entry_timeout: self.sb.meta.entry_timeout, + } + } + + fn get_inode_attr(&self, ino: u64) -> Result { + let inode = self.sb.get_inode(ino, false)?; + let mut attr = inode.get_attr(); + + // override uid/gid if there is no explicit inode uid/gid + if !self.sb.meta.explicit_uidgid() { + attr.uid = self.i_uid; + attr.gid = self.i_gid; + } + + // Older rafs image or the root inode doesn't include mtime, in such cases + // we use runtime timestamp. + if attr.mtime == 0 { + attr.atime = self.i_time; + attr.ctime = self.i_time; + attr.mtime = self.i_time; + } + + // Only touch permissions bits. This trick is some sort of workaround + // since nydusify gives root directory permission of 0o750 and fuse mount + // options `rootmode=` does not affect root directory's permission bits, ending + // up with preventing other users from accessing the container rootfs. + let root_ino = self.root_ino(); + if attr.ino == root_ino { + attr.mode = attr.mode & !0o777 | 0o755; + } + + Ok(attr) + } + + fn get_inode_entry>(&self, inode: I) -> Entry { + let mut entry = inode.get_entry(); + + // override uid/gid if there is no explicit inode uid/gid + if !self.sb.meta.explicit_uidgid() { + entry.attr.st_uid = self.i_uid; + entry.attr.st_gid = self.i_gid; + } + + // Older rafs image doesn't include mtime, in such case we use runtime timestamp. + if entry.attr.st_mtime == 0 { + entry.attr.st_atime = self.i_time as i64; + entry.attr.st_ctime = self.i_time as i64; + entry.attr.st_mtime = self.i_time as i64; + } + + // Only touch permissions bits. This trick is some sort of workaround + // since nydusify gives root directory permission of 0o750 and fuse mount + // options `rootmode=` does not affect root directory's permission bits, ending + // up with preventing other users from accessing the container rootfs. + if entry.inode == ROOT_ID { + entry.attr.st_mode = entry.attr.st_mode & !0o777 | 0o755; + } + + entry + } +} + +impl Rafs { + fn prefetch(&self, reader: RafsIoReader, prefetch_files: Option>) { + let sb = self.sb.clone(); + let device = self.device.clone(); + let prefetch_all = self.prefetch_all; + let root_ino = self.root_ino(); + + let _ = std::thread::spawn(move || { + Self::do_prefetch(root_ino, reader, prefetch_files, prefetch_all, sb, device); + }); + } + + /// for blobfs + pub fn fetch_range_synchronous(&self, prefetches: &[BlobPrefetchRequest]) -> Result<()> { + self.device.fetch_range_synchronous(prefetches) + } + + fn root_ino(&self) -> u64 { + self.sb.superblock.root_ino() + } + + fn do_prefetch( + root_ino: u64, + mut reader: RafsIoReader, + prefetch_files: Option>, + prefetch_all: bool, + sb: Arc, + device: BlobDevice, + ) { + let blob_infos = sb.superblock.get_blob_infos(); + + // First do range based prefetch for rafs v6. + if sb.meta.is_v6() { + let mut prefetches = Vec::new(); + + for blob in &blob_infos { + let sz = blob.prefetch_size(); + if sz > 0 { + let mut offset = 0; + while offset < sz { + let len = cmp::min(sz - offset, RAFS_DEFAULT_CHUNK_SIZE); + prefetches.push(BlobPrefetchRequest { + blob_id: blob.blob_id().to_owned(), + offset, + len, + }); + offset += len; + } + } + } + if !prefetches.is_empty() { + device.prefetch(&[], &prefetches).unwrap_or_else(|e| { + warn!("Prefetch error, {:?}", e); + }); + } + } + + let fetcher = |desc: &mut BlobIoVec, last: bool| { + if desc.size() as u64 > RAFS_MAX_CHUNK_SIZE + || desc.len() > 1024 + || (last && desc.size() > 0) + { + trace!( + "fs prefetch: 0x{:x} bytes for {} descriptors", + desc.size(), + desc.len() + ); + device.prefetch(&[desc], &[]).unwrap_or_else(|e| { + warn!("Prefetch error, {:?}", e); + }); + desc.reset(); + } + }; + + // Bootstrap has non-empty prefetch table indicating a full prefetch + let inlay_prefetch_all = sb + .is_inlay_prefetch_all(&mut reader) + .map_err(|e| error!("Detect prefetch table error {}", e)) + .unwrap_or_default(); + + // Nydusd has a CLI option indicating a full prefetch + let startup_prefetch_all = prefetch_files + .as_ref() + .map(|f| f.len() == 1 && f[0].as_os_str() == "/") + .unwrap_or(false); + + let mut ignore_prefetch_all = false; + + // User specified prefetch files have high priority to be prefetched. + // Moreover, user specified prefetch files list will override those on-disk prefetch table. + if !startup_prefetch_all && !inlay_prefetch_all { + // Then do file based prefetch based on: + // - prefetch listed passed in by user + // - or file prefetch list in metadata + let inodes = prefetch_files.map(|files| Self::convert_file_list(&files, &sb)); + let res = sb.prefetch_files(&device, &mut reader, root_ino, inodes, &fetcher); + match res { + Ok(true) => { + ignore_prefetch_all = true; + info!("Root inode was found, but it should not prefetch all files!") + } + Ok(false) => {} + Err(e) => info!("No file to be prefetched {:?}", e), + } + } + + // Perform different policy for v5 format and v6 format as rafs v6's blobs are capable to + // download chunks and decompress them all by themselves. For rafs v6, directly perform + // chunk based full prefetch + if !ignore_prefetch_all && (inlay_prefetch_all || prefetch_all || startup_prefetch_all) { + if sb.meta.is_v6() { + // The larger batch size, the fewer requests to registry + let batch_size = 1024 * 1024 * 2; + + for blob in &blob_infos { + let blob_size = blob.compressed_data_size(); + let count = div_round_up(blob_size, batch_size); + + let mut pre_offset = 0u64; + + for _i in 0..count { + let req = BlobPrefetchRequest { + blob_id: blob.blob_id().to_owned(), + offset: pre_offset, + len: cmp::min(batch_size, blob_size - pre_offset), + }; + device + .prefetch(&[], &[req]) + .map_err(|e| warn!("failed to prefetch blob data, {}", e)) + .unwrap_or_default(); + pre_offset += batch_size; + if pre_offset > blob_size { + break; + } + } + } + } else { + let root = vec![root_ino]; + let res = sb.prefetch_files(&device, &mut reader, root_ino, Some(root), &fetcher); + if let Err(e) = res { + info!("No file to be prefetched {:?}", e); + } + } + } + } + + fn convert_file_list(files: &[PathBuf], sb: &Arc) -> Vec { + let mut inodes = Vec::::with_capacity(files.len()); + + for f in files { + if let Ok(inode) = sb.ino_from_path(f.as_path()) { + inodes.push(inode); + } + } + + inodes + } +} + +impl BackendFileSystem for Rafs { + fn mount(&self) -> Result<(Entry, u64)> { + let root_inode = self.sb.get_inode(self.root_ino(), self.digest_validate)?; + self.ios.new_file_counter(root_inode.ino()); + let e = self.get_inode_entry(root_inode); + Ok((e, self.sb.get_max_ino())) + } + + fn as_any(&self) -> &dyn Any { + self + } +} + +impl FileSystem for Rafs { + type Inode = Inode; + type Handle = Handle; + + #[cfg(target_os = "macos")] + fn init(&self, _opts: FsOptions) -> Result { + Ok( + // These fuse features are supported by rafs by default. + FsOptions::ASYNC_READ | FsOptions::BIG_WRITES | FsOptions::ATOMIC_O_TRUNC, + ) + } + + #[cfg(target_os = "linux")] + fn init(&self, _opts: FsOptions) -> Result { + Ok( + // These fuse features are supported by rafs by default. + FsOptions::ASYNC_READ + | FsOptions::PARALLEL_DIROPS + | FsOptions::BIG_WRITES + | FsOptions::HANDLE_KILLPRIV + | FsOptions::ASYNC_DIO + | FsOptions::HAS_IOCTL_DIR + | FsOptions::WRITEBACK_CACHE + | FsOptions::ZERO_MESSAGE_OPEN + | FsOptions::ATOMIC_O_TRUNC + | FsOptions::CACHE_SYMLINKS + | FsOptions::ZERO_MESSAGE_OPENDIR, + ) + } + + fn destroy(&self) {} + + fn lookup(&self, _ctx: &Context, ino: u64, name: &CStr) -> Result { + let mut rec = FopRecorder::settle(Lookup, ino, &self.ios); + let target = OsStr::from_bytes(name.to_bytes()); + let parent = self.sb.get_inode(ino, self.digest_validate)?; + if !parent.is_dir() { + return Err(enotdir!()); + } + + rec.mark_success(0); + if target == DOT || (ino == ROOT_ID && target == DOTDOT) { + let mut entry = self.get_inode_entry(parent); + entry.inode = ino; + Ok(entry) + } else if target == DOTDOT { + let parent = self.sb.get_extended_inode(parent.ino(), false)?; + Ok(self + .sb + .get_inode(parent.parent(), self.digest_validate) + .map(|i| self.get_inode_entry(i)) + .unwrap_or_else(|_| self.negative_entry())) + } else { + Ok(parent + .get_child_by_name(target) + .map(|i| { + self.ios.new_file_counter(i.ino()); + self.get_inode_entry(i.as_inode()) + }) + .unwrap_or_else(|_| self.negative_entry())) + } + } + + fn forget(&self, _ctx: &Context, _inode: u64, _count: u64) {} + + fn batch_forget(&self, ctx: &Context, requests: Vec<(u64, u64)>) { + for (inode, count) in requests { + self.forget(ctx, inode, count) + } + } + + fn getattr( + &self, + _ctx: &Context, + ino: u64, + _handle: Option, + ) -> Result<(stat64, Duration)> { + let mut recorder = FopRecorder::settle(Getattr, ino, &self.ios); + + let attr = self.get_inode_attr(ino).map(|r| { + recorder.mark_success(0); + r + })?; + + Ok((attr.into(), self.sb.meta.attr_timeout)) + } + + fn readlink(&self, _ctx: &Context, ino: u64) -> Result> { + let mut rec = FopRecorder::settle(Readlink, ino, &self.ios); + let inode = self.sb.get_inode(ino, self.digest_validate)?; + + Ok(inode + .get_symlink() + .map(|r| { + rec.mark_success(0); + r + })? + .as_bytes() + .to_vec()) + } + + #[allow(clippy::too_many_arguments)] + fn read( + &self, + _ctx: &Context, + ino: u64, + _handle: u64, + w: &mut dyn ZeroCopyWriter, + size: u32, + offset: u64, + _lock_owner: Option, + _flags: u32, + ) -> Result { + if offset.checked_add(size as u64).is_none() { + return Err(einval!("offset + size wraps around.")); + } + + let inode = self.sb.get_inode(ino, false)?; + let inode_size = inode.size(); + let mut recorder = FopRecorder::settle(Read, ino, &self.ios); + // Check for zero size read. + if size == 0 || offset >= inode_size { + recorder.mark_success(0); + return Ok(0); + } + + let real_size = cmp::min(size as u64, inode_size - offset); + let mut result = 0; + let mut io_vecs = inode.alloc_bio_vecs(&self.device, offset, real_size as usize, true)?; + assert!(!io_vecs.is_empty() && !io_vecs[0].is_empty()); + + // Try to amplify user io for Rafs v5, to improve performance. + let user_io_batch_size = + cmp::min(self.user_io_batch_size as usize, w.available_bytes()) as u32; + if self.sb.meta.is_v5() && size < user_io_batch_size { + let all_chunks_ready = self.device.all_chunks_ready(&io_vecs); + if !all_chunks_ready { + let chunk_mask = self.metadata().chunk_size as u64 - 1; + let next_chunk_base = (offset + (size as u64) + chunk_mask) & !chunk_mask; + let window_base = cmp::min(next_chunk_base, inode_size); + let actual_size = window_base - (offset & !chunk_mask); + if actual_size < user_io_batch_size as u64 { + let window_size = user_io_batch_size as u64 - actual_size; + let orig_cnt = io_vecs.iter().fold(0, |s, d| s + d.len()); + self.sb.amplify_user_io( + &self.device, + user_io_batch_size, + &mut io_vecs, + &inode, + window_base, + window_size, + )?; + let new_cnt = io_vecs.iter().fold(0, |s, d| s + d.len()); + trace!( + "amplify RAFS v5 read from {} to {} chunks", + orig_cnt, + new_cnt + ); + } + } + } + + let start = self.ios.latency_start(); + for io_vec in io_vecs.iter_mut() { + assert!(!io_vec.is_empty()); + assert_ne!(io_vec.size(), 0); + + // Avoid copying `desc` + let r = self.device.read_to(w, io_vec)?; + result += r; + recorder.mark_success(r); + if r as u64 != io_vec.size() { + break; + } + } + self.ios.latency_end(&start, Read); + + Ok(result) + } + + fn open( + &self, + _ctx: &Context, + _inode: Self::Inode, + _flags: u32, + _fuse_flags: u32, + ) -> Result<(Option, OpenOptions, Option)> { + // Keep cache since we are readonly + Ok((None, OpenOptions::KEEP_CACHE, None)) + } + + fn release( + &self, + _ctx: &Context, + _inode: u64, + _flags: u32, + _handle: u64, + _flush: bool, + _flock_release: bool, + _lock_owner: Option, + ) -> Result<()> { + Ok(()) + } + + fn statfs(&self, _ctx: &Context, _inode: u64) -> Result { + // Safe because we are zero-initializing a struct with only POD fields. + let mut st: statvfs64 = unsafe { std::mem::zeroed() }; + + // This matches the behavior of libfuse as it returns these values if the + // filesystem doesn't implement this method. + st.f_namemax = 255; + st.f_bsize = 512; + st.f_fsid = self.sb.meta.magic as u64; + #[cfg(target_os = "macos")] + { + st.f_files = self.sb.meta.inodes_count as u32; + } + + #[cfg(target_os = "linux")] + { + st.f_files = self.sb.meta.inodes_count; + } + + Ok(st) + } + + fn getxattr( + &self, + _ctx: &Context, + inode: u64, + name: &CStr, + size: u32, + ) -> Result { + let mut recorder = FopRecorder::settle(Getxattr, inode, &self.ios); + + if !self.xattr_supported() { + return Err(std::io::Error::from_raw_os_error(libc::ENOSYS)); + } + + let name = OsStr::from_bytes(name.to_bytes()); + let inode = self.sb.get_inode(inode, false)?; + let value = inode.get_xattr(name)?; + let r = match value { + Some(value) => match size { + 0 => Ok(GetxattrReply::Count((value.len() + 1) as u32)), + x if x < value.len() as u32 => Err(std::io::Error::from_raw_os_error(libc::ERANGE)), + _ => Ok(GetxattrReply::Value(value)), + }, + None => { + // TODO: Hopefully, we can have a 'decorator' procedure macro in + // the future to wrap this method thus to handle different reasonable + // errors in a clean way. + recorder.mark_success(0); + Err(std::io::Error::from_raw_os_error(libc::ENODATA)) + } + }; + + r.map(|v| { + recorder.mark_success(0); + v + }) + } + + fn listxattr(&self, _ctx: &Context, inode: u64, size: u32) -> Result { + let mut rec = FopRecorder::settle(Listxattr, inode, &self.ios); + if !self.xattr_supported() { + return Err(std::io::Error::from_raw_os_error(libc::ENOSYS)); + } + + let inode = self.sb.get_inode(inode, false)?; + let mut count = 0; + let mut buf = Vec::new(); + for mut name in inode.get_xattrs()? { + count += name.len() + 1; + if size != 0 { + buf.append(&mut name); + buf.append(&mut vec![0u8; 1]); + } + } + + rec.mark_success(0); + + match size { + 0 => Ok(ListxattrReply::Count(count as u32)), + x if x < count as u32 => Err(std::io::Error::from_raw_os_error(libc::ERANGE)), + _ => Ok(ListxattrReply::Names(buf)), + } + } + + fn readdir( + &self, + _ctx: &Context, + inode: u64, + _handle: u64, + size: u32, + offset: u64, + add_entry: &mut dyn FnMut(DirEntry) -> Result, + ) -> Result<()> { + let mut rec = FopRecorder::settle(Readdir, inode, &self.ios); + + self.do_readdir(inode, size, offset, add_entry).map(|r| { + rec.mark_success(0); + r + }) + } + + fn readdirplus( + &self, + _ctx: &Context, + ino: u64, + _handle: u64, + size: u32, + offset: u64, + add_entry: &mut dyn FnMut(DirEntry, Entry) -> Result, + ) -> Result<()> { + let mut rec = FopRecorder::settle(Readdirplus, ino, &self.ios); + + self.do_readdir(ino, size, offset, &mut |dir_entry| { + let inode = self.sb.get_inode(dir_entry.ino, self.digest_validate)?; + add_entry(dir_entry, self.get_inode_entry(inode)) + }) + .map(|r| { + rec.mark_success(0); + r + }) + } + + fn opendir( + &self, + _ctx: &Context, + _inode: Self::Inode, + _flags: u32, + ) -> Result<(Option, OpenOptions)> { + // Cache dir since we are readonly + #[cfg(target_os = "macos")] + return Ok((None, OpenOptions::KEEP_CACHE)); + #[cfg(target_os = "linux")] + return Ok((None, OpenOptions::CACHE_DIR | OpenOptions::KEEP_CACHE)); + } + + fn releasedir(&self, _ctx: &Context, _inode: u64, _flags: u32, _handle: u64) -> Result<()> { + Ok(()) + } + + fn access(&self, ctx: &Context, ino: u64, mask: u32) -> Result<()> { + let mut rec = FopRecorder::settle(Access, ino, &self.ios); + let st = self.get_inode_attr(ino)?; + let mode = mask as i32 & (libc::R_OK | libc::W_OK | libc::X_OK); + + if mode == libc::F_OK { + rec.mark_success(0); + return Ok(()); + } + + if (mode & libc::R_OK) != 0 + && ctx.uid != 0 + && (st.uid != ctx.uid || st.mode & 0o400 == 0) + && (st.gid != ctx.gid || st.mode & 0o040 == 0) + && st.mode & 0o004 == 0 + { + return Err(eacces!("permission denied")); + } + + if (mode & libc::W_OK) != 0 + && ctx.uid != 0 + && (st.uid != ctx.uid || st.mode & 0o200 == 0) + && (st.gid != ctx.gid || st.mode & 0o020 == 0) + && st.mode & 0o002 == 0 + { + return Err(eacces!("permission denied")); + } + + // root can only execute something if it is executable by one of the owner, the group, or + // everyone. + if (mode & libc::X_OK) != 0 + && (ctx.uid != 0 || st.mode & 0o111 == 0) + && (st.uid != ctx.uid || st.mode & 0o100 == 0) + && (st.gid != ctx.gid || st.mode & 0o010 == 0) + && st.mode & 0o001 == 0 + { + return Err(eacces!("permission denied")); + } + + rec.mark_success(0); + Ok(()) + } +} + +#[cfg(target_os = "linux")] +// Let Rafs works as an OverlayFs layer. +impl Layer for Rafs { + fn root_inode(&self) -> Self::Inode { + self.root_ino() + } +} + +#[cfg(all(test, feature = "backend-oss"))] +pub(crate) mod tests { + use super::*; + use std::str::FromStr; + + pub fn new_rafs_backend() -> Box { + let config = r#" + version = 2 + id = "test" + [backend] + type = "oss" + [backend.oss] + endpoint = "test" + access_key_id = "test" + access_key_secret = "test" + bucket_name = "antsys-nydus" + object_prefix = "nydus_v2/" + scheme = "http" + [cache] + type = "filecache" + [cache.filecache] + work_dir = "." + [rafs] + mode = "direct" + validate = false + enable_xattr = true + [rafs.prefetch] + enable = true + threads = 10 + batch_size = 131072 + bandwidth_limit = 10485760 + "#; + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let mut source_path = PathBuf::from(root_dir); + source_path.push("../tests/texture/bootstrap/rafs-v5.boot"); + let mountpoint = "/mnt"; + let config = Arc::new(ConfigV2::from_str(config).unwrap()); + let bootstrapfile = source_path.to_str().unwrap(); + let (mut rafs, reader) = Rafs::new(&config, mountpoint, Path::new(bootstrapfile)).unwrap(); + rafs.import(reader, Some(vec![std::path::PathBuf::new()])) + .unwrap(); + Box::new(rafs) + } + + #[test] + fn it_should_create_new_rafs_fs() { + let rafs = new_rafs_backend(); + let attr = rafs.get_inode_attr(1).unwrap(); + assert_eq!(attr.ino, 1); + assert_eq!(attr.blocks, 8); + assert_eq!(attr.uid, 0); + // Root inode mode must be 0755 + assert_eq!(attr.mode & 0o777, 0o755); + } + + #[test] + fn it_should_access() { + let rafs = new_rafs_backend(); + let ctx = &Context { + gid: 0, + pid: 1, + uid: 0, + }; + if rafs.access(ctx, 1, 0).is_err() { + panic!("failed to access inode 1"); + } + } + + #[test] + fn it_should_listxattr() { + let rafs = new_rafs_backend(); + let ctx = &Context { + gid: 0, + pid: 1, + uid: 0, + }; + match rafs.listxattr(ctx, 1, 0) { + Ok(reply) => match reply { + ListxattrReply::Count(c) => assert_eq!(c, 0), + _ => panic!(), + }, + Err(_) => panic!("failed to access inode 1"), + } + } + + #[test] + fn it_should_get_statfs() { + let rafs = new_rafs_backend(); + let ctx = &Context { + gid: 0, + pid: 1, + uid: 0, + }; + match rafs.statfs(ctx, 1) { + Ok(statfs) => { + assert_eq!(statfs.f_files, 43082); + assert_eq!(statfs.f_bsize, 512); + assert_eq!(statfs.f_namemax, 255); + assert_eq!(statfs.f_fsid, 1380009555); + assert_eq!(statfs.f_ffree, 0); + } + Err(_) => panic!("failed to statfs"), + } + } + + #[test] + fn it_should_enable_xattr() { + let rafs = new_rafs_backend(); + assert!(rafs.xattr_enabled); + assert!(rafs.xattr_supported()); + } + + #[test] + fn it_should_lookup_entry() { + let rafs = new_rafs_backend(); + let ctx = &Context { + gid: 0, + pid: 1, + uid: 0, + }; + match rafs.lookup(ctx, 1, &std::ffi::CString::new("/etc").unwrap()) { + Err(_e) => { + panic!("failed to lookup /etc from ino 1"); + } + Ok(e) => { + assert_eq!(e.inode, 0); + } + } + } +} + +#[cfg(test)] +mod tests { + use nydus_utils::metrics::FsIoStats; + + use super::*; + #[test] + fn test_rafs() { + let rafs = Rafs { + id: "foo".into(), + device: BlobDevice::default(), + ios: FsIoStats::default().into(), + sb: Arc::new(RafsSuper::default()), + initialized: false, + digest_validate: false, + fs_prefetch: false, + prefetch_all: false, + xattr_enabled: false, + user_io_batch_size: 0, + i_uid: 0, + i_gid: 0, + i_time: 0, + }; + assert_eq!(rafs.id(), "foo"); + assert!(!rafs.xattr_supported()); + let ent = rafs.negative_entry(); + assert_eq!(ent.inode, 0); + assert_eq!(ent.generation, 0); + assert_eq!(ent.attr_flags, 0); + #[cfg(target_os = "linux")] + rafs.init(FsOptions::ASYNC_DIO).unwrap(); + rafs.open(&Context::default(), Inode::default(), 0, 0) + .unwrap(); + rafs.release( + &Context::default(), + Inode::default(), + 0, + Handle::default(), + false, + false, + Some(0), + ) + .unwrap(); + rafs.statfs(&Context::default(), Inode::default()).unwrap(); + rafs.destroy(); + } +} diff --git a/rafs/src/lib.rs b/rafs/src/lib.rs index 4ef03c548ab..0dbaa4ae58d 100644 --- a/rafs/src/lib.rs +++ b/rafs/src/lib.rs @@ -1,358 +1,358 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! RAFS: a chunk dedup, on-demand loading, readonly fuse filesystem. -//! -//! The Rafs filesystem is blob based readonly filesystem with chunk deduplication. A Rafs -//! filesystem is composed up of a metadata blob and zero or more data blobs. A blob is just a -//! plain object containing data chunks. Data chunks may be compressed, encrypted and deduplicated -//! by chunk content digest value. When Rafs file is used for container images, Rafs metadata blob -//! contains all filesystem metadatas, such as directory, file name, permission etc. Actually file -//! contents are divided into chunks and stored into data blobs. Rafs may built one data blob for -//! each container image layer or build a single data blob for the whole image, according to -//! building options. -//! -//! There are several versions of Rafs filesystem defined: -//! - V4: the original Rafs filesystem format -//! - V5: an optimized version based on V4 with metadata direct mapping, data prefetching etc. -//! - V6: a redesigned version to reduce metadata blob size and inter-operable with in kernel erofs, -//! better support of virtio-fs. -//! -//! The nydus-rafs crate depends on the nydus-storage crate to access metadata and data blobs and -//! improve performance by caching data on local storage. The nydus-rafs itself includes two main -//! sub modules: -//! - [fs](fs/index.html): the Rafs core to glue fuse, storage backend and filesystem metadata. -//! - [metadata](metadata/index.html): defines and accesses Rafs filesystem metadata. -//! -//! For more information, please refer to -//! [Dragonfly Image Service](https://github.com/dragonflyoss/nydus) - -#[macro_use] -extern crate log; -#[macro_use] -extern crate bitflags; -#[macro_use] -extern crate nydus_api; -#[macro_use] -extern crate nydus_storage as storage; - -use std::any::Any; -use std::borrow::Cow; -use std::fmt::Debug; -use std::fs::File; -use std::io::{BufWriter, Error, Read, Result, Seek, SeekFrom, Write}; -use std::os::unix::io::AsRawFd; -use std::path::{Path, PathBuf}; -use std::sync::Arc; - -use crate::metadata::{RafsInodeExt, RafsSuper}; - -#[cfg(feature = "virtio-fs")] -pub mod blobfs; -pub mod fs; -pub mod metadata; -#[cfg(test)] -pub mod mock; - -/// Error codes for rafs related operations. -#[derive(thiserror::Error, Debug)] -pub enum RafsError { - #[error("Operation is not supported.")] - Unsupported, - #[error("Rafs is not initialized.")] - Uninitialized, - #[error("Rafs is already mounted.")] - AlreadyMounted, - #[error("Failed to read metadata: {0}`")] - ReadMetadata(Error, String), - #[error("Failed to load config: {0}`")] - LoadConfig(Error), - #[error("Failed to parse config: {0}`")] - ParseConfig(#[source] serde_json::Error), - #[error("Failed to create swap backend: {0}`")] - SwapBackend(Error), - #[error("Failed to fill superBlock: {0}`")] - FillSuperBlock(Error), - #[error("Failed to create device: {0}`")] - CreateDevice(Error), - #[error("Failed to prefetch data: {0}`")] - Prefetch(String), - #[error("Failed to configure device: {0}`")] - Configure(String), - #[error("Incompatible RAFS version: `{0}`")] - Incompatible(u16), - #[error("Illegal meta struct, type is `{0:?}` and content is `{1}`")] - IllegalMetaStruct(MetaType, String), - #[error("Invalid image data")] - InvalidImageData, -} - -#[derive(Debug)] -pub enum MetaType { - Regular, - Dir, - Symlink, -} - -/// Specialized version of std::result::Result<> for Rafs. -pub type RafsResult = std::result::Result; - -/// Handler to read file system bootstrap. -pub type RafsIoReader = Box; - -/// A helper trait for RafsIoReader. -pub trait RafsIoRead: Read + AsRawFd + Seek + Send {} - -impl RafsIoRead for File {} - -/// Handler to write file system bootstrap. -pub type RafsIoWriter = Box; - -/// A helper trait for RafsIoWriter. -pub trait RafsIoWrite: Write + Seek + 'static { - fn as_any(&self) -> &dyn Any; - - fn validate_alignment(&mut self, size: usize, alignment: usize) -> Result { - if alignment != 0 { - let cur = self.stream_position()?; - - if (size & (alignment - 1) != 0) || (cur & (alignment as u64 - 1) != 0) { - return Err(einval!("unaligned data")); - } - } - - Ok(size) - } - - /// write padding to align to RAFS_ALIGNMENT. - fn write_padding(&mut self, size: usize) -> Result<()> { - if size > WRITE_PADDING_DATA.len() { - return Err(einval!("invalid padding size")); - } - self.write_all(&WRITE_PADDING_DATA[0..size]) - } - - /// Seek the writer to the end. - fn seek_to_end(&mut self) -> Result { - self.seek(SeekFrom::End(0)).map_err(|e| { - error!("Seeking to end fails, {}", e); - e - }) - } - - /// Seek the writer to the `offset`. - fn seek_offset(&mut self, offset: u64) -> Result { - self.seek(SeekFrom::Start(offset)).map_err(|e| { - error!("Seeking to offset {} from start fails, {}", offset, e); - e - }) - } - - /// Seek the writer to current position plus the specified offset. - fn seek_current(&mut self, offset: i64) -> Result { - self.seek(SeekFrom::Current(offset)) - } - - /// Do some finalization works. - fn finalize(&mut self, _name: Option) -> anyhow::Result<()> { - Ok(()) - } - - /// Return a slice to get all data written. - /// - /// No more data should be written after calling as_bytes(). - fn as_bytes(&mut self) -> std::io::Result> { - unimplemented!() - } -} - -impl RafsIoWrite for File { - fn as_any(&self) -> &dyn Any { - self - } -} - -// Rust file I/O is un-buffered by default. If we have many small write calls -// to a file, should use BufWriter. BufWriter maintains an in-memory buffer -// for writing, minimizing the number of system calls required. -impl RafsIoWrite for BufWriter { - fn as_any(&self) -> &dyn Any { - self - } -} - -const WRITE_PADDING_DATA: [u8; 64] = [0u8; 64]; - -impl dyn RafsIoRead { - /// Seek the reader to next aligned position. - pub fn seek_to_next_aligned(&mut self, last_read_len: usize, alignment: usize) -> Result { - let suffix = last_read_len & (alignment - 1); - let offset = if suffix == 0 { 0 } else { alignment - suffix }; - - self.seek(SeekFrom::Current(offset as i64)).map_err(|e| { - error!("Seeking to offset {} from current fails, {}", offset, e); - e - }) - } - - /// Move the reader current position forward with `plus_offset` bytes. - pub fn seek_plus_offset(&mut self, plus_offset: i64) -> Result { - // Seek should not fail otherwise rafs goes insane. - self.seek(SeekFrom::Current(plus_offset)).map_err(|e| { - error!( - "Seeking to offset {} from current fails, {}", - plus_offset, e - ); - e - }) - } - - /// Seek the reader to the `offset`. - pub fn seek_to_offset(&mut self, offset: u64) -> Result { - self.seek(SeekFrom::Start(offset)).map_err(|e| { - error!("Seeking to offset {} from start fails, {}", offset, e); - e - }) - } - - /// Seek the reader to the end. - pub fn seek_to_end(&mut self, offset: i64) -> Result { - self.seek(SeekFrom::End(offset)).map_err(|e| { - error!("Seeking to end fails, {}", e); - e - }) - } - - /// Create a reader from a file path. - pub fn from_file(path: impl AsRef) -> RafsResult { - let f = File::open(&path).map_err(|e| { - RafsError::ReadMetadata(e, path.as_ref().to_string_lossy().into_owned()) - })?; - - Ok(Box::new(f)) - } -} - -/// Iterator to walk all inodes of a Rafs filesystem. -pub struct RafsIterator<'a> { - _rs: &'a RafsSuper, - cursor_stack: Vec<(Arc, PathBuf)>, -} - -impl<'a> RafsIterator<'a> { - /// Create a new iterator to visit a Rafs filesystem. - pub fn new(rs: &'a RafsSuper) -> Self { - let cursor_stack = match rs.get_extended_inode(rs.superblock.root_ino(), false) { - Ok(node) => { - let path = PathBuf::from("/"); - vec![(node, path)] - } - Err(e) => { - error!( - "failed to get root inode from the bootstrap {}, damaged or malicious file?", - e - ); - vec![] - } - }; - - RafsIterator { - _rs: rs, - cursor_stack, - } - } -} - -impl<'a> Iterator for RafsIterator<'a> { - type Item = (Arc, PathBuf); - - fn next(&mut self) -> Option { - let (node, path) = self.cursor_stack.pop()?; - if node.is_dir() { - let children = 0..node.get_child_count(); - for idx in children.rev() { - if let Ok(child) = node.get_child_by_index(idx) { - let child_path = path.join(child.name()); - self.cursor_stack.push((child, child_path)); - } else { - error!( - "failed to get child inode from the bootstrap, damaged or malicious file?" - ); - } - } - } - Some((node, path)) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::metadata::RafsMode; - use std::fs::OpenOptions; - use vmm_sys_util::tempfile::TempFile; - - #[test] - fn test_rafs_io_writer() { - let mut file = TempFile::new().unwrap().into_file(); - - assert!(file.validate_alignment(2, 8).is_err()); - assert!(file.validate_alignment(7, 8).is_err()); - assert!(file.validate_alignment(9, 8).is_err()); - assert!(file.validate_alignment(8, 8).is_ok()); - - file.write_all(&[0x0u8; 7]).unwrap(); - assert!(file.validate_alignment(8, 8).is_err()); - { - let obj: &mut dyn RafsIoWrite = &mut file; - obj.write_padding(1).unwrap(); - } - assert!(file.validate_alignment(8, 8).is_ok()); - file.write_all(&[0x0u8; 1]).unwrap(); - assert!(file.validate_alignment(8, 8).is_err()); - - let obj: &mut dyn RafsIoRead = &mut file; - assert_eq!(obj.seek_to_offset(0).unwrap(), 0); - assert_eq!(obj.seek_plus_offset(7).unwrap(), 7); - assert_eq!(obj.seek_to_next_aligned(7, 8).unwrap(), 8); - assert_eq!(obj.seek_plus_offset(7).unwrap(), 15); - } - - #[test] - fn test_rafs_iterator() { - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let path = PathBuf::from(root_dir).join("../tests/texture/bootstrap/rafs-v5.boot"); - let bootstrap = OpenOptions::new() - .read(true) - .write(false) - .open(path) - .unwrap(); - let mut rs = RafsSuper { - mode: RafsMode::Direct, - validate_digest: false, - ..Default::default() - }; - rs.load(&mut (Box::new(bootstrap) as RafsIoReader)).unwrap(); - let iter = RafsIterator::new(&rs); - - let mut last = false; - for (idx, (_node, path)) in iter.enumerate() { - assert!(!last); - if idx == 1 { - assert_eq!(path, PathBuf::from("/bin")); - } else if idx == 2 { - assert_eq!(path, PathBuf::from("/boot")); - } else if idx == 3 { - assert_eq!(path, PathBuf::from("/dev")); - } else if idx == 10 { - assert_eq!(path, PathBuf::from("/etc/DIR_COLORS.256color")); - } else if idx == 11 { - assert_eq!(path, PathBuf::from("/etc/DIR_COLORS.lightbgcolor")); - } else if path == PathBuf::from("/var/yp") { - last = true; - } - } - assert!(last); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! RAFS: a chunk dedup, on-demand loading, readonly fuse filesystem. +//! +//! The Rafs filesystem is blob based readonly filesystem with chunk deduplication. A Rafs +//! filesystem is composed up of a metadata blob and zero or more data blobs. A blob is just a +//! plain object containing data chunks. Data chunks may be compressed, encrypted and deduplicated +//! by chunk content digest value. When Rafs file is used for container images, Rafs metadata blob +//! contains all filesystem metadatas, such as directory, file name, permission etc. Actually file +//! contents are divided into chunks and stored into data blobs. Rafs may built one data blob for +//! each container image layer or build a single data blob for the whole image, according to +//! building options. +//! +//! There are several versions of Rafs filesystem defined: +//! - V4: the original Rafs filesystem format +//! - V5: an optimized version based on V4 with metadata direct mapping, data prefetching etc. +//! - V6: a redesigned version to reduce metadata blob size and inter-operable with in kernel erofs, +//! better support of virtio-fs. +//! +//! The nydus-rafs crate depends on the nydus-storage crate to access metadata and data blobs and +//! improve performance by caching data on local storage. The nydus-rafs itself includes two main +//! sub modules: +//! - [fs](fs/index.html): the Rafs core to glue fuse, storage backend and filesystem metadata. +//! - [metadata](metadata/index.html): defines and accesses Rafs filesystem metadata. +//! +//! For more information, please refer to +//! [Dragonfly Image Service](https://github.com/dragonflyoss/nydus) + +#[macro_use] +extern crate log; +#[macro_use] +extern crate bitflags; +#[macro_use] +extern crate nydus_api; +#[macro_use] +extern crate nydus_storage as storage; + +use std::any::Any; +use std::borrow::Cow; +use std::fmt::Debug; +use std::fs::File; +use std::io::{BufWriter, Error, Read, Result, Seek, SeekFrom, Write}; +use std::os::unix::io::AsRawFd; +use std::path::{Path, PathBuf}; +use std::sync::Arc; + +use crate::metadata::{RafsInodeExt, RafsSuper}; + +#[cfg(feature = "virtio-fs")] +pub mod blobfs; +pub mod fs; +pub mod metadata; +#[cfg(test)] +pub mod mock; + +/// Error codes for rafs related operations. +#[derive(thiserror::Error, Debug)] +pub enum RafsError { + #[error("Operation is not supported.")] + Unsupported, + #[error("Rafs is not initialized.")] + Uninitialized, + #[error("Rafs is already mounted.")] + AlreadyMounted, + #[error("Failed to read metadata: {0}`")] + ReadMetadata(Error, String), + #[error("Failed to load config: {0}`")] + LoadConfig(Error), + #[error("Failed to parse config: {0}`")] + ParseConfig(#[source] serde_json::Error), + #[error("Failed to create swap backend: {0}`")] + SwapBackend(Error), + #[error("Failed to fill superBlock: {0}`")] + FillSuperBlock(Error), + #[error("Failed to create device: {0}`")] + CreateDevice(Error), + #[error("Failed to prefetch data: {0}`")] + Prefetch(String), + #[error("Failed to configure device: {0}`")] + Configure(String), + #[error("Incompatible RAFS version: `{0}`")] + Incompatible(u16), + #[error("Illegal meta struct, type is `{0:?}` and content is `{1}`")] + IllegalMetaStruct(MetaType, String), + #[error("Invalid image data")] + InvalidImageData, +} + +#[derive(Debug)] +pub enum MetaType { + Regular, + Dir, + Symlink, +} + +/// Specialized version of std::result::Result<> for Rafs. +pub type RafsResult = std::result::Result; + +/// Handler to read file system bootstrap. +pub type RafsIoReader = Box; + +/// A helper trait for RafsIoReader. +pub trait RafsIoRead: Read + AsRawFd + Seek + Send {} + +impl RafsIoRead for File {} + +/// Handler to write file system bootstrap. +pub type RafsIoWriter = Box; + +/// A helper trait for RafsIoWriter. +pub trait RafsIoWrite: Write + Seek + 'static { + fn as_any(&self) -> &dyn Any; + + fn validate_alignment(&mut self, size: usize, alignment: usize) -> Result { + if alignment != 0 { + let cur = self.stream_position()?; + + if (size & (alignment - 1) != 0) || (cur & (alignment as u64 - 1) != 0) { + return Err(einval!("unaligned data")); + } + } + + Ok(size) + } + + /// write padding to align to RAFS_ALIGNMENT. + fn write_padding(&mut self, size: usize) -> Result<()> { + if size > WRITE_PADDING_DATA.len() { + return Err(einval!("invalid padding size")); + } + self.write_all(&WRITE_PADDING_DATA[0..size]) + } + + /// Seek the writer to the end. + fn seek_to_end(&mut self) -> Result { + self.seek(SeekFrom::End(0)).map_err(|e| { + error!("Seeking to end fails, {}", e); + e + }) + } + + /// Seek the writer to the `offset`. + fn seek_offset(&mut self, offset: u64) -> Result { + self.seek(SeekFrom::Start(offset)).map_err(|e| { + error!("Seeking to offset {} from start fails, {}", offset, e); + e + }) + } + + /// Seek the writer to current position plus the specified offset. + fn seek_current(&mut self, offset: i64) -> Result { + self.seek(SeekFrom::Current(offset)) + } + + /// Do some finalization works. + fn finalize(&mut self, _name: Option) -> anyhow::Result<()> { + Ok(()) + } + + /// Return a slice to get all data written. + /// + /// No more data should be written after calling as_bytes(). + fn as_bytes(&mut self) -> std::io::Result> { + unimplemented!() + } +} + +impl RafsIoWrite for File { + fn as_any(&self) -> &dyn Any { + self + } +} + +// Rust file I/O is un-buffered by default. If we have many small write calls +// to a file, should use BufWriter. BufWriter maintains an in-memory buffer +// for writing, minimizing the number of system calls required. +impl RafsIoWrite for BufWriter { + fn as_any(&self) -> &dyn Any { + self + } +} + +const WRITE_PADDING_DATA: [u8; 64] = [0u8; 64]; + +impl dyn RafsIoRead { + /// Seek the reader to next aligned position. + pub fn seek_to_next_aligned(&mut self, last_read_len: usize, alignment: usize) -> Result { + let suffix = last_read_len & (alignment - 1); + let offset = if suffix == 0 { 0 } else { alignment - suffix }; + + self.seek(SeekFrom::Current(offset as i64)).map_err(|e| { + error!("Seeking to offset {} from current fails, {}", offset, e); + e + }) + } + + /// Move the reader current position forward with `plus_offset` bytes. + pub fn seek_plus_offset(&mut self, plus_offset: i64) -> Result { + // Seek should not fail otherwise rafs goes insane. + self.seek(SeekFrom::Current(plus_offset)).map_err(|e| { + error!( + "Seeking to offset {} from current fails, {}", + plus_offset, e + ); + e + }) + } + + /// Seek the reader to the `offset`. + pub fn seek_to_offset(&mut self, offset: u64) -> Result { + self.seek(SeekFrom::Start(offset)).map_err(|e| { + error!("Seeking to offset {} from start fails, {}", offset, e); + e + }) + } + + /// Seek the reader to the end. + pub fn seek_to_end(&mut self, offset: i64) -> Result { + self.seek(SeekFrom::End(offset)).map_err(|e| { + error!("Seeking to end fails, {}", e); + e + }) + } + + /// Create a reader from a file path. + pub fn from_file(path: impl AsRef) -> RafsResult { + let f = File::open(&path).map_err(|e| { + RafsError::ReadMetadata(e, path.as_ref().to_string_lossy().into_owned()) + })?; + + Ok(Box::new(f)) + } +} + +/// Iterator to walk all inodes of a Rafs filesystem. +pub struct RafsIterator<'a> { + _rs: &'a RafsSuper, + cursor_stack: Vec<(Arc, PathBuf)>, +} + +impl<'a> RafsIterator<'a> { + /// Create a new iterator to visit a Rafs filesystem. + pub fn new(rs: &'a RafsSuper) -> Self { + let cursor_stack = match rs.get_extended_inode(rs.superblock.root_ino(), false) { + Ok(node) => { + let path = PathBuf::from("/"); + vec![(node, path)] + } + Err(e) => { + error!( + "failed to get root inode from the bootstrap {}, damaged or malicious file?", + e + ); + vec![] + } + }; + + RafsIterator { + _rs: rs, + cursor_stack, + } + } +} + +impl<'a> Iterator for RafsIterator<'a> { + type Item = (Arc, PathBuf); + + fn next(&mut self) -> Option { + let (node, path) = self.cursor_stack.pop()?; + if node.is_dir() { + let children = 0..node.get_child_count(); + for idx in children.rev() { + if let Ok(child) = node.get_child_by_index(idx) { + let child_path = path.join(child.name()); + self.cursor_stack.push((child, child_path)); + } else { + error!( + "failed to get child inode from the bootstrap, damaged or malicious file?" + ); + } + } + } + Some((node, path)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::metadata::RafsMode; + use std::fs::OpenOptions; + use vmm_sys_util::tempfile::TempFile; + + #[test] + fn test_rafs_io_writer() { + let mut file = TempFile::new().unwrap().into_file(); + + assert!(file.validate_alignment(2, 8).is_err()); + assert!(file.validate_alignment(7, 8).is_err()); + assert!(file.validate_alignment(9, 8).is_err()); + assert!(file.validate_alignment(8, 8).is_ok()); + + file.write_all(&[0x0u8; 7]).unwrap(); + assert!(file.validate_alignment(8, 8).is_err()); + { + let obj: &mut dyn RafsIoWrite = &mut file; + obj.write_padding(1).unwrap(); + } + assert!(file.validate_alignment(8, 8).is_ok()); + file.write_all(&[0x0u8; 1]).unwrap(); + assert!(file.validate_alignment(8, 8).is_err()); + + let obj: &mut dyn RafsIoRead = &mut file; + assert_eq!(obj.seek_to_offset(0).unwrap(), 0); + assert_eq!(obj.seek_plus_offset(7).unwrap(), 7); + assert_eq!(obj.seek_to_next_aligned(7, 8).unwrap(), 8); + assert_eq!(obj.seek_plus_offset(7).unwrap(), 15); + } + + #[test] + fn test_rafs_iterator() { + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let path = PathBuf::from(root_dir).join("../tests/texture/bootstrap/rafs-v5.boot"); + let bootstrap = OpenOptions::new() + .read(true) + .write(false) + .open(path) + .unwrap(); + let mut rs = RafsSuper { + mode: RafsMode::Direct, + validate_digest: false, + ..Default::default() + }; + rs.load(&mut (Box::new(bootstrap) as RafsIoReader)).unwrap(); + let iter = RafsIterator::new(&rs); + + let mut last = false; + for (idx, (_node, path)) in iter.enumerate() { + assert!(!last); + if idx == 1 { + assert_eq!(path, PathBuf::from("/bin")); + } else if idx == 2 { + assert_eq!(path, PathBuf::from("/boot")); + } else if idx == 3 { + assert_eq!(path, PathBuf::from("/dev")); + } else if idx == 10 { + assert_eq!(path, PathBuf::from("/etc/DIR_COLORS.256color")); + } else if idx == 11 { + assert_eq!(path, PathBuf::from("/etc/DIR_COLORS.lightbgcolor")); + } else if path == PathBuf::from("/var/yp") { + last = true; + } + } + assert!(last); + } +} diff --git a/rafs/src/metadata/cached_v5.rs b/rafs/src/metadata/cached_v5.rs index 61e4dd1d1b4..6a2244a4cb0 100644 --- a/rafs/src/metadata/cached_v5.rs +++ b/rafs/src/metadata/cached_v5.rs @@ -1,1185 +1,1185 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2020-2022 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! A RAFS metadata manager to cache all file system metadata into memory. -//! -//! All filesystem metadata will be loaded, validated and cached into memory when loading the -//! file system. And currently the cache layer only supports readonly file systems. - -use std::any::Any; -use std::collections::{BTreeMap, HashMap}; -use std::ffi::{OsStr, OsString}; -use std::io::SeekFrom; -use std::io::{ErrorKind, Read, Result}; -use std::mem::size_of; -use std::ops::Deref; -use std::os::unix::ffi::OsStrExt; -use std::str::FromStr; -use std::sync::Arc; - -use fuse_backend_rs::abi::fuse_abi; -use fuse_backend_rs::api::filesystem::Entry; -use nydus_storage::device::v5::BlobV5ChunkInfo; -use nydus_storage::device::{BlobChunkFlags, BlobChunkInfo, BlobDevice, BlobInfo}; -use nydus_utils::digest::RafsDigest; -use nydus_utils::ByteSize; - -use crate::metadata::inode::RafsInodeFlags; -use crate::metadata::layout::v5::{ - rafsv5_alloc_bio_vecs, rafsv5_validate_inode, RafsV5BlobTable, RafsV5ChunkInfo, RafsV5Inode, - RafsV5InodeChunkOps, RafsV5InodeOps, RafsV5XAttrsTable, RAFSV5_ALIGNMENT, -}; -use crate::metadata::layout::{bytes_to_os_str, parse_xattr, RAFS_V5_ROOT_INODE}; -use crate::metadata::{ - BlobIoVec, Inode, RafsError, RafsInode, RafsInodeExt, RafsInodeWalkAction, - RafsInodeWalkHandler, RafsResult, RafsSuperBlock, RafsSuperInodes, RafsSuperMeta, XattrName, - XattrValue, DOT, DOTDOT, RAFS_ATTR_BLOCK_SIZE, RAFS_MAX_NAME, -}; -use crate::RafsIoReader; - -/// Cached Rafs v5 super block. -pub struct CachedSuperBlockV5 { - s_blob: Arc, - s_meta: Arc, - s_inodes: BTreeMap>, - max_inode: Inode, - validate_inode: bool, -} - -impl CachedSuperBlockV5 { - /// Create a new instance of `CachedSuperBlockV5`. - pub fn new(meta: RafsSuperMeta, validate_inode: bool) -> Self { - CachedSuperBlockV5 { - s_blob: Arc::new(RafsV5BlobTable::new()), - s_meta: Arc::new(meta), - s_inodes: BTreeMap::new(), - max_inode: RAFS_V5_ROOT_INODE, - validate_inode, - } - } - - /// Load all inodes into memory. - /// - /// Rafs v5 layout is based on BFS, which means parents always are in front of children. - fn load_all_inodes(&mut self, r: &mut RafsIoReader) -> Result<()> { - let mut dir_ino_set = Vec::with_capacity(self.s_meta.inode_table_entries as usize); - - for _idx in 0..self.s_meta.inode_table_entries { - let mut inode = CachedInodeV5::new(self.s_blob.clone(), self.s_meta.clone()); - match inode.load(&self.s_meta, r) { - Ok(_) => { - trace!( - "got inode ino {} parent {} size {} child_idx {} child_cnt {}", - inode.ino(), - inode.parent(), - inode.size(), - inode.i_child_idx, - inode.i_child_cnt, - ); - } - Err(ref e) if e.kind() == ErrorKind::UnexpectedEof => break, - Err(e) => { - error!("error when loading CachedInode {:?}", e); - return Err(e); - } - } - - let child_inode = self.hash_inode(Arc::new(inode))?; - if child_inode.is_dir() { - // Delay associating dir inode to its parent because that will take - // a cloned inode object, which preventing us from using `Arc::get_mut`. - // Without `Arc::get_mut` during Cached meta setup(loading all inodes), - // we have to lock inode everywhere for mutability. It really hurts. - dir_ino_set.push(child_inode.i_ino); - } else { - self.add_into_parent(child_inode); - } - } - - // Add directories to its parent in reverse order. - for ino in dir_ino_set.iter().rev() { - self.add_into_parent(self.get_node(*ino)?); - } - debug!("all {} inodes loaded", self.s_inodes.len()); - - Ok(()) - } - - fn get_node(&self, ino: Inode) -> Result> { - Ok(self.s_inodes.get(&ino).ok_or_else(|| enoent!())?.clone()) - } - - fn get_node_mut(&mut self, ino: Inode) -> Result<&mut Arc> { - self.s_inodes.get_mut(&ino).ok_or_else(|| enoent!()) - } - - fn hash_inode(&mut self, inode: Arc) -> Result> { - if self.max_inode < inode.ino() { - self.max_inode = inode.ino(); - } - - if inode.is_hardlink() { - if let Some(i) = self.s_inodes.get(&inode.i_ino) { - // Keep it as is, directory digest algorithm has dependency on it. - if !i.i_data.is_empty() { - return Ok(inode); - } - } - } - self.s_inodes.insert(inode.ino(), inode.clone()); - - Ok(inode) - } - - fn add_into_parent(&mut self, child_inode: Arc) { - if let Ok(parent_inode) = self.get_node_mut(child_inode.parent()) { - Arc::get_mut(parent_inode).unwrap().add_child(child_inode); - } - } -} - -impl RafsSuperInodes for CachedSuperBlockV5 { - fn get_max_ino(&self) -> u64 { - self.max_inode - } - - fn get_inode(&self, ino: Inode, _validate_digest: bool) -> Result> { - self.s_inodes - .get(&ino) - .map_or(Err(enoent!()), |i| Ok(i.clone())) - } - - fn get_extended_inode( - &self, - ino: Inode, - _validate_digest: bool, - ) -> Result> { - self.s_inodes - .get(&ino) - .map_or(Err(enoent!()), |i| Ok(i.clone())) - } -} - -impl RafsSuperBlock for CachedSuperBlockV5 { - fn load(&mut self, r: &mut RafsIoReader) -> Result<()> { - let meta = &self.s_meta; - - // FIXME: add validator for all load operations. - - // Now the seek offset points to inode table, so we can easily find first inode offset. - r.seek(SeekFrom::Start(meta.inode_table_offset))?; - let mut offset = [0u8; size_of::()]; - r.read_exact(&mut offset)?; - // The offset is aligned with 8 bytes to make it easier to validate RafsV5Inode. - let inode_offset = u32::from_le_bytes(offset) << 3; - - // Load blob table and extended blob table if there is one. - let mut blob_table = RafsV5BlobTable::new(); - if meta.extended_blob_table_offset > 0 { - r.seek(SeekFrom::Start(meta.extended_blob_table_offset))?; - blob_table - .extended - .load(r, meta.extended_blob_table_entries as usize)?; - } - r.seek(SeekFrom::Start(meta.blob_table_offset))?; - blob_table.load(r, meta.blob_table_size, meta.chunk_size, meta.flags)?; - self.s_blob = Arc::new(blob_table); - - // Load all inodes started from first inode offset. - r.seek(SeekFrom::Start(inode_offset as u64))?; - self.load_all_inodes(r)?; - - // Validate inode digest tree - let digester = self.s_meta.get_digester(); - let inode = self.get_extended_inode(RAFS_V5_ROOT_INODE, false)?; - if self.validate_inode && !rafsv5_validate_inode(inode.deref(), true, digester)? { - return Err(einval!("invalid inode digest")); - } - - Ok(()) - } - - fn update(&self, _r: &mut RafsIoReader) -> RafsResult<()> { - Err(RafsError::Unsupported) - } - - fn destroy(&mut self) { - self.s_inodes.clear(); - } - - fn get_blob_infos(&self) -> Vec> { - self.s_blob.entries.clone() - } - - fn root_ino(&self) -> u64 { - RAFS_V5_ROOT_INODE - } - - fn get_chunk_info(&self, _idx: usize) -> Result> { - unimplemented!("used by RAFS v6 only") - } - - fn set_blob_device(&self, _blob_device: BlobDevice) { - unimplemented!("used by RAFS v6 only") - } -} - -/// Cached RAFS v5 inode object. -#[derive(Default, Clone, Debug)] -pub struct CachedInodeV5 { - i_ino: Inode, - i_name: OsString, - i_digest: RafsDigest, - i_parent: u64, - i_mode: u32, - i_projid: u32, - i_uid: u32, - i_gid: u32, - i_flags: RafsInodeFlags, - i_size: u64, - i_blocks: u64, - i_nlink: u32, - i_child_idx: u32, - i_child_cnt: u32, - // extra info need cache - i_chunksize: u32, - i_rdev: u32, - i_mtime_nsec: u32, - i_mtime: u64, - i_target: OsString, // for symbol link - i_xattr: HashMap>, - i_data: Vec>, - i_child: Vec>, - i_blob_table: Arc, - i_meta: Arc, -} - -impl CachedInodeV5 { - /// Create a new instance of `CachedInodeV5`. - pub fn new(blob_table: Arc, meta: Arc) -> Self { - CachedInodeV5 { - i_blob_table: blob_table, - i_meta: meta, - ..Default::default() - } - } - - fn load_name(&mut self, name_size: usize, r: &mut RafsIoReader) -> Result<()> { - if name_size > 0 { - let mut name_buf = vec![0u8; name_size]; - r.read_exact(name_buf.as_mut_slice())?; - r.seek_to_next_aligned(name_size, RAFSV5_ALIGNMENT)?; - self.i_name = bytes_to_os_str(&name_buf).to_os_string(); - } - - Ok(()) - } - - fn load_symlink(&mut self, symlink_size: usize, r: &mut RafsIoReader) -> Result<()> { - if self.is_symlink() && symlink_size > 0 { - let mut symbol_buf = vec![0u8; symlink_size]; - r.read_exact(symbol_buf.as_mut_slice())?; - r.seek_to_next_aligned(symlink_size, RAFSV5_ALIGNMENT)?; - self.i_target = bytes_to_os_str(&symbol_buf).to_os_string(); - } - - Ok(()) - } - - fn load_xattr(&mut self, r: &mut RafsIoReader) -> Result<()> { - if self.has_xattr() { - let mut xattrs = RafsV5XAttrsTable::new(); - r.read_exact(xattrs.as_mut())?; - xattrs.size = u64::from_le(xattrs.size); - - let mut xattr_buf = vec![0u8; xattrs.aligned_size()]; - r.read_exact(xattr_buf.as_mut_slice())?; - parse_xattr(&xattr_buf, xattrs.size(), |name, value| { - self.i_xattr.insert(name.to_os_string(), value); - true - })?; - } - - Ok(()) - } - - fn load_chunk_info(&mut self, r: &mut RafsIoReader) -> Result<()> { - if self.is_reg() && self.i_child_cnt > 0 { - let mut chunk = RafsV5ChunkInfo::new(); - for _ in 0..self.i_child_cnt { - chunk.load(r)?; - self.i_data.push(Arc::new(CachedChunkInfoV5::from(&chunk))); - } - } - - Ok(()) - } - - /// Load an inode metadata from a reader. - pub fn load(&mut self, sb: &RafsSuperMeta, r: &mut RafsIoReader) -> Result<()> { - // RafsV5Inode...name...symbol link...xattrs...chunks - let mut inode = RafsV5Inode::new(); - - // parse ondisk inode: RafsV5Inode|name|symbol|xattr|chunks - r.read_exact(inode.as_mut())?; - self.copy_from_ondisk(&inode); - self.load_name(inode.i_name_size as usize, r)?; - self.load_symlink(inode.i_symlink_size as usize, r)?; - self.load_xattr(r)?; - self.load_chunk_info(r)?; - self.i_chunksize = sb.chunk_size; - self.validate(sb.inodes_count, self.i_chunksize as u64)?; - - Ok(()) - } - - fn copy_from_ondisk(&mut self, inode: &RafsV5Inode) { - self.i_ino = inode.i_ino; - self.i_digest = inode.i_digest; - self.i_parent = inode.i_parent; - self.i_mode = inode.i_mode; - self.i_projid = inode.i_projid; - self.i_uid = inode.i_uid; - self.i_gid = inode.i_gid; - self.i_flags = inode.i_flags; - self.i_size = inode.i_size; - self.i_nlink = inode.i_nlink; - self.i_blocks = inode.i_blocks; - self.i_child_idx = inode.i_child_index; - self.i_child_cnt = inode.i_child_count; - self.i_rdev = inode.i_rdev; - self.i_mtime = inode.i_mtime; - self.i_mtime_nsec = inode.i_mtime_nsec; - } - - fn add_child(&mut self, child: Arc) { - self.i_child.push(child); - if self.i_child.len() == (self.i_child_cnt as usize) { - // all children are ready, do sort - self.i_child.sort_by(|c1, c2| c1.i_name.cmp(&c2.i_name)); - } - } -} - -impl RafsInode for CachedInodeV5 { - // Somehow we got invalid `inode_count` from superblock. - fn validate(&self, _inode_count: u64, chunk_size: u64) -> Result<()> { - if self.i_ino == 0 - // || self.i_ino > inode_count - || self.i_nlink == 0 - || (self.i_ino != RAFS_V5_ROOT_INODE && self.i_parent == 0) - || self.i_name.len() > RAFS_MAX_NAME - || self.i_name.is_empty() - { - return Err(einval!("invalid inode")); - } - if !self.is_hardlink() && self.i_parent >= self.i_ino { - return Err(einval!("invalid parent inode")); - } - if self.is_reg() { - let chunks = (self.i_size + chunk_size - 1) / chunk_size; - if !self.has_hole() && chunks != self.i_data.len() as u64 { - return Err(einval!("invalid chunk count")); - } - let blocks = (self.i_size + 511) / 512; - // Old stargz builder generates inode with 0 blocks - if blocks != self.i_blocks && self.i_blocks != 0 { - return Err(einval!("invalid block count")); - } - } else if self.is_dir() { - if self.i_child_cnt != 0 && (self.i_child_idx as Inode) <= self.i_ino { - return Err(einval!("invalid directory")); - } - } else if self.is_symlink() && self.i_target.is_empty() { - return Err(einval!("invalid symlink target")); - } - - Ok(()) - } - - fn alloc_bio_vecs( - &self, - _device: &BlobDevice, - offset: u64, - size: usize, - user_io: bool, - ) -> Result> { - rafsv5_alloc_bio_vecs(self, offset, size, user_io) - } - - fn collect_descendants_inodes( - &self, - descendants: &mut Vec>, - ) -> Result { - if !self.is_dir() { - return Err(enotdir!()); - } - - let mut child_dirs: Vec> = Vec::new(); - - for child_inode in &self.i_child { - if child_inode.is_dir() { - child_dirs.push(child_inode.clone()); - } else if !child_inode.is_empty_size() { - descendants.push(child_inode.clone()); - } - } - - for d in child_dirs { - d.collect_descendants_inodes(descendants)?; - } - - Ok(0) - } - - #[inline] - fn get_entry(&self) -> Entry { - Entry { - attr: self.get_attr().into(), - inode: self.i_ino, - generation: 0, - attr_flags: 0, - attr_timeout: self.i_meta.attr_timeout, - entry_timeout: self.i_meta.entry_timeout, - } - } - - #[inline] - fn get_attr(&self) -> fuse_abi::Attr { - fuse_abi::Attr { - ino: self.i_ino, - size: self.i_size, - blocks: self.i_blocks, - mode: self.i_mode, - nlink: self.i_nlink as u32, - blksize: RAFS_ATTR_BLOCK_SIZE, - rdev: self.i_rdev, - ..Default::default() - } - } - - #[inline] - fn is_blkdev(&self) -> bool { - self.i_mode & libc::S_IFMT as u32 == libc::S_IFBLK as u32 - } - - #[inline] - fn is_chrdev(&self) -> bool { - self.i_mode & libc::S_IFMT as u32 == libc::S_IFCHR as u32 - } - - #[inline] - fn is_sock(&self) -> bool { - self.i_mode & libc::S_IFMT as u32 == libc::S_IFSOCK as u32 - } - - #[inline] - fn is_fifo(&self) -> bool { - self.i_mode & libc::S_IFMT as u32 == libc::S_IFIFO as u32 - } - - #[inline] - fn is_dir(&self) -> bool { - self.i_mode & libc::S_IFMT as u32 == libc::S_IFDIR as u32 - } - - #[inline] - fn is_symlink(&self) -> bool { - self.i_mode & libc::S_IFMT as u32 == libc::S_IFLNK as u32 - } - - #[inline] - fn is_reg(&self) -> bool { - self.i_mode & libc::S_IFMT as u32 == libc::S_IFREG as u32 - } - - #[inline] - fn is_hardlink(&self) -> bool { - !self.is_dir() && self.i_nlink > 1 - } - - #[inline] - fn has_xattr(&self) -> bool { - self.i_flags.contains(RafsInodeFlags::XATTR) - } - - #[inline] - fn get_xattr(&self, name: &OsStr) -> Result> { - Ok(self.i_xattr.get(name).cloned()) - } - - fn get_xattrs(&self) -> Result> { - Ok(self - .i_xattr - .keys() - .map(|k| k.as_bytes().to_vec()) - .collect::>()) - } - - #[inline] - fn get_symlink(&self) -> Result { - if !self.is_symlink() { - Err(einval!("inode is not a symlink")) - } else { - Ok(self.i_target.clone()) - } - } - - #[inline] - fn get_symlink_size(&self) -> u16 { - if self.is_symlink() { - self.i_target.byte_size() as u16 - } else { - 0 - } - } - - fn walk_children_inodes(&self, entry_offset: u64, handler: RafsInodeWalkHandler) -> Result<()> { - // offset 0 and 1 is for "." and ".." respectively. - let mut cur_offset = entry_offset; - - if cur_offset == 0 { - cur_offset += 1; - // Safe to unwrap since conversion from DOT to os string can't fail. - match handler( - None, - OsString::from_str(DOT).unwrap(), - self.ino(), - cur_offset, - ) { - Ok(RafsInodeWalkAction::Continue) => {} - Ok(RafsInodeWalkAction::Break) => return Ok(()), - Err(e) => return Err(e), - } - } - - if cur_offset == 1 { - let parent = if self.ino() == 1 { 1 } else { self.parent() }; - cur_offset += 1; - // Safe to unwrap since conversion from DOTDOT to os string can't fail. - match handler( - None, - OsString::from_str(DOTDOT).unwrap(), - parent, - cur_offset, - ) { - Ok(RafsInodeWalkAction::Continue) => {} - Ok(RafsInodeWalkAction::Break) => return Ok(()), - Err(e) => return Err(e), - }; - } - - let mut idx = cur_offset - 2; - while idx < self.get_child_count() as u64 { - assert!(idx <= u32::MAX as u64); - let child = self.get_child_by_index(idx as u32)?; - cur_offset += 1; - match handler(None, child.name(), child.ino(), cur_offset) { - Ok(RafsInodeWalkAction::Continue) => idx += 1, - Ok(RafsInodeWalkAction::Break) => break, - Err(e) => return Err(e), - } - } - - Ok(()) - } - - fn get_child_by_name(&self, name: &OsStr) -> Result> { - let idx = self - .i_child - .binary_search_by(|c| c.i_name.as_os_str().cmp(name)) - .map_err(|_| enoent!())?; - Ok(self.i_child[idx].clone()) - } - - #[inline] - fn get_child_by_index(&self, index: u32) -> Result> { - if (index as usize) < self.i_child.len() { - Ok(self.i_child[index as usize].clone()) - } else { - Err(einval!("invalid child index")) - } - } - - #[inline] - fn get_child_count(&self) -> u32 { - self.i_child_cnt - } - - #[inline] - fn get_child_index(&self) -> Result { - Ok(self.i_child_idx) - } - - #[inline] - fn get_chunk_count(&self) -> u32 { - self.get_child_count() - } - - fn as_any(&self) -> &dyn Any { - self - } - - impl_getter!(ino, i_ino, u64); - impl_getter!(size, i_size, u64); - impl_getter!(rdev, i_rdev, u32); - impl_getter!(projid, i_projid, u32); -} - -impl RafsInodeExt for CachedInodeV5 { - fn as_inode(&self) -> &dyn RafsInode { - self - } - - #[inline] - fn name(&self) -> OsString { - self.i_name.clone() - } - - #[inline] - fn get_name_size(&self) -> u16 { - self.i_name.byte_size() as u16 - } - - #[inline] - fn flags(&self) -> u64 { - self.i_flags.bits() - } - - #[inline] - fn get_digest(&self) -> RafsDigest { - self.i_digest - } - - #[inline] - fn get_chunk_info(&self, idx: u32) -> Result> { - if (idx as usize) < self.i_data.len() { - Ok(self.i_data[idx as usize].clone()) - } else { - Err(einval!("invalid chunk index")) - } - } - - impl_getter!(parent, i_parent, u64); -} - -impl RafsV5InodeChunkOps for CachedInodeV5 { - fn get_chunk_info_v5(&self, idx: u32) -> Result> { - if (idx as usize) < self.i_data.len() { - Ok(self.i_data[idx as usize].clone() as Arc) - } else { - Err(einval!("invalid chunk index")) - } - } -} - -impl RafsV5InodeOps for CachedInodeV5 { - fn get_blob_by_index(&self, idx: u32) -> Result> { - self.i_blob_table.get(idx) - } - - fn get_chunk_size(&self) -> u32 { - self.i_chunksize - } - - fn has_hole(&self) -> bool { - self.i_flags.contains(RafsInodeFlags::HAS_HOLE) - } -} - -/// Cached information about an Rafs Data Chunk. -#[derive(Clone, Default, Debug)] -pub struct CachedChunkInfoV5 { - // block hash - block_id: Arc, - // blob containing the block - blob_index: u32, - // chunk index in blob - index: u32, - // position of the block within the file - file_offset: u64, - // offset of the block within the blob - compressed_offset: u64, - uncompressed_offset: u64, - // size of the block, compressed - compressed_size: u32, - uncompressed_size: u32, - flags: BlobChunkFlags, -} - -impl CachedChunkInfoV5 { - /// Create a new instance of `CachedChunkInfoV5`. - pub fn new() -> Self { - CachedChunkInfoV5 { - ..Default::default() - } - } - - /// Load a chunk metadata from a reader. - pub fn load(&mut self, r: &mut RafsIoReader) -> Result<()> { - let mut chunk = RafsV5ChunkInfo::new(); - - r.read_exact(chunk.as_mut())?; - self.copy_from_ondisk(&chunk); - - Ok(()) - } - - fn copy_from_ondisk(&mut self, chunk: &RafsV5ChunkInfo) { - self.block_id = Arc::new(chunk.block_id); - self.blob_index = chunk.blob_index; - self.index = chunk.index; - self.compressed_offset = chunk.compressed_offset; - self.uncompressed_offset = chunk.uncompressed_offset; - self.uncompressed_size = chunk.uncompressed_size; - self.file_offset = chunk.file_offset; - self.compressed_size = chunk.compressed_size; - self.flags = chunk.flags; - } -} - -impl BlobChunkInfo for CachedChunkInfoV5 { - fn chunk_id(&self) -> &RafsDigest { - &self.block_id - } - - fn id(&self) -> u32 { - self.index() - } - - fn is_batch(&self) -> bool { - false - } - - fn is_compressed(&self) -> bool { - self.flags.contains(BlobChunkFlags::COMPRESSED) - } - - fn is_encrypted(&self) -> bool { - false - } - - fn as_any(&self) -> &dyn Any { - self - } - - impl_getter!(blob_index, blob_index, u32); - impl_getter!(compressed_offset, compressed_offset, u64); - impl_getter!(compressed_size, compressed_size, u32); - impl_getter!(uncompressed_offset, uncompressed_offset, u64); - impl_getter!(uncompressed_size, uncompressed_size, u32); -} - -impl BlobV5ChunkInfo for CachedChunkInfoV5 { - fn as_base(&self) -> &dyn BlobChunkInfo { - self - } - - impl_getter!(index, index, u32); - impl_getter!(file_offset, file_offset, u64); - impl_getter!(flags, flags, BlobChunkFlags); -} - -impl From<&RafsV5ChunkInfo> for CachedChunkInfoV5 { - fn from(info: &RafsV5ChunkInfo) -> Self { - let mut chunk = CachedChunkInfoV5::new(); - chunk.copy_from_ondisk(info); - chunk - } -} - -#[cfg(test)] -mod cached_tests { - use std::cmp; - use std::ffi::{OsStr, OsString}; - use std::fs::OpenOptions; - use std::io::Seek; - use std::io::SeekFrom::Start; - use std::os::unix::ffi::OsStrExt; - use std::sync::Arc; - - use nydus_storage::device::{BlobDevice, BlobFeatures}; - use nydus_utils::digest::{Algorithm, RafsDigest}; - use nydus_utils::ByteSize; - use storage::device::v5::BlobV5ChunkInfo; - use storage::device::{BlobChunkFlags, BlobChunkInfo}; - - use crate::metadata::cached_v5::{CachedInodeV5, CachedSuperBlockV5}; - use crate::metadata::inode::RafsInodeFlags; - use crate::metadata::layout::v5::{ - rafsv5_align, RafsV5BlobTable, RafsV5ChunkInfo, RafsV5Inode, RafsV5InodeWrapper, - }; - use crate::metadata::layout::{RafsXAttrs, RAFS_V5_ROOT_INODE}; - use crate::metadata::{ - RafsInode, RafsInodeWalkAction, RafsStore, RafsSuperBlock, RafsSuperInodes, RafsSuperMeta, - }; - use crate::{BufWriter, RafsInodeExt, RafsIoRead, RafsIoReader}; - use vmm_sys_util::tempfile::TempFile; - - use super::CachedChunkInfoV5; - - #[test] - fn test_load_inode() { - let mut f = OpenOptions::new() - .truncate(true) - .create(true) - .write(true) - .read(true) - .open("/tmp/buf_1") - .unwrap(); - let mut writer = BufWriter::new(f.try_clone().unwrap()); - let mut reader = Box::new(f.try_clone().unwrap()) as RafsIoReader; - - let mut ondisk_inode = RafsV5Inode::new(); - let file_name = OsString::from("c_inode_1"); - let mut xattr = RafsXAttrs::default(); - xattr - .add(OsString::from("user.k1"), vec![1u8, 2u8, 3u8, 4u8]) - .unwrap(); - xattr - .add(OsString::from("user.k2"), vec![10u8, 11u8, 12u8]) - .unwrap(); - ondisk_inode.i_name_size = file_name.byte_size() as u16; - ondisk_inode.i_child_count = 1; - ondisk_inode.i_ino = 3; - ondisk_inode.i_parent = RAFS_V5_ROOT_INODE; - ondisk_inode.i_size = 8192; - ondisk_inode.i_mode = libc::S_IFREG as u32; - ondisk_inode.i_nlink = 1; - ondisk_inode.i_blocks = 16; - let mut chunk = RafsV5ChunkInfo::new(); - chunk.uncompressed_size = 8192; - chunk.uncompressed_offset = 0; - chunk.compressed_offset = 0; - chunk.compressed_size = 4096; - let inode = RafsV5InodeWrapper { - name: file_name.as_os_str(), - symlink: None, - inode: &ondisk_inode, - }; - inode.store(&mut writer).unwrap(); - chunk.store(&mut writer).unwrap(); - xattr.store_v5(&mut writer).unwrap(); - - f.seek(Start(0)).unwrap(); - let md = RafsSuperMeta { - inodes_count: 100, - chunk_size: 1024 * 1024, - ..Default::default() - }; - let meta = Arc::new(md); - let blob_table = Arc::new(RafsV5BlobTable::new()); - let mut cached_inode = CachedInodeV5::new(blob_table, meta.clone()); - cached_inode.load(&meta, &mut reader).unwrap(); - // check data - assert_eq!(cached_inode.i_name, file_name.to_str().unwrap()); - assert_eq!(cached_inode.i_child_cnt, 1); - let attr = cached_inode.get_attr(); - assert_eq!(attr.ino, 3); - assert_eq!(attr.size, 8192); - let cached_chunk = cached_inode.get_chunk_info(0).unwrap(); - assert_eq!(cached_chunk.compressed_size(), 4096); - assert_eq!(cached_chunk.uncompressed_size(), 8192); - assert_eq!(cached_chunk.compressed_offset(), 0); - assert_eq!(cached_chunk.uncompressed_offset(), 0); - let c_xattr = cached_inode.get_xattrs().unwrap(); - for k in c_xattr.iter() { - let k = OsStr::from_bytes(k); - let v = cached_inode.get_xattr(k).unwrap(); - assert_eq!(xattr.get(k).cloned().unwrap(), v.unwrap()); - } - - // close file - drop(f); - std::fs::remove_file("/tmp/buf_1").unwrap(); - } - - #[test] - fn test_load_symlink() { - let mut f = OpenOptions::new() - .truncate(true) - .create(true) - .write(true) - .read(true) - .open("/tmp/buf_2") - .unwrap(); - let mut writer = BufWriter::new(f.try_clone().unwrap()); - let mut reader = Box::new(f.try_clone().unwrap()) as RafsIoReader; - let file_name = OsString::from("c_inode_2"); - let symlink_name = OsString::from("c_inode_1"); - let mut ondisk_inode = RafsV5Inode::new(); - ondisk_inode.i_name_size = file_name.byte_size() as u16; - ondisk_inode.i_ino = 3; - ondisk_inode.i_parent = RAFS_V5_ROOT_INODE; - ondisk_inode.i_nlink = 1; - ondisk_inode.i_symlink_size = symlink_name.byte_size() as u16; - ondisk_inode.i_mode = libc::S_IFLNK as u32; - - let inode = RafsV5InodeWrapper { - name: file_name.as_os_str(), - symlink: Some(symlink_name.as_os_str()), - inode: &ondisk_inode, - }; - inode.store(&mut writer).unwrap(); - - f.seek(Start(0)).unwrap(); - let mut meta = Arc::new(RafsSuperMeta::default()); - Arc::get_mut(&mut meta).unwrap().chunk_size = 1024 * 1024; - Arc::get_mut(&mut meta).unwrap().inodes_count = 4; - let blob_table = Arc::new(RafsV5BlobTable::new()); - let mut cached_inode = CachedInodeV5::new(blob_table, meta.clone()); - cached_inode.load(&meta, &mut reader).unwrap(); - - assert_eq!(cached_inode.i_name, "c_inode_2"); - assert_eq!(cached_inode.get_symlink().unwrap(), symlink_name); - - drop(f); - std::fs::remove_file("/tmp/buf_2").unwrap(); - } - - #[test] - fn test_alloc_bio_desc() { - let mut f = OpenOptions::new() - .truncate(true) - .create(true) - .write(true) - .read(true) - .open("/tmp/buf_3") - .unwrap(); - let mut writer = BufWriter::new(f.try_clone().unwrap()); - let mut reader = Box::new(f.try_clone().unwrap()) as RafsIoReader; - let file_name = OsString::from("c_inode_3"); - let mut ondisk_inode = RafsV5Inode::new(); - ondisk_inode.i_name_size = rafsv5_align(file_name.len()) as u16; - ondisk_inode.i_ino = 3; - ondisk_inode.i_parent = RAFS_V5_ROOT_INODE; - ondisk_inode.i_nlink = 1; - ondisk_inode.i_child_count = 4; - ondisk_inode.i_mode = libc::S_IFREG as u32; - ondisk_inode.i_size = 1024 * 1024 * 3 + 8192; - ondisk_inode.i_blocks = 6160; - - let inode = RafsV5InodeWrapper { - name: file_name.as_os_str(), - symlink: None, - inode: &ondisk_inode, - }; - inode.store(&mut writer).unwrap(); - - let mut size = ondisk_inode.i_size; - for i in 0..ondisk_inode.i_child_count { - let mut chunk = RafsV5ChunkInfo::new(); - chunk.uncompressed_size = cmp::min(1024 * 1024, size as u32); - chunk.uncompressed_offset = (i * 1024 * 1024) as u64; - chunk.compressed_size = chunk.uncompressed_size / 2; - chunk.compressed_offset = ((i * 1024 * 1024) / 2) as u64; - chunk.file_offset = chunk.uncompressed_offset; - chunk.store(&mut writer).unwrap(); - size -= chunk.uncompressed_size as u64; - } - f.seek(Start(0)).unwrap(); - let mut meta = Arc::new(RafsSuperMeta::default()); - Arc::get_mut(&mut meta).unwrap().chunk_size = 1024 * 1024; - Arc::get_mut(&mut meta).unwrap().inodes_count = 4; - let mut blob_table = Arc::new(RafsV5BlobTable::new()); - Arc::get_mut(&mut blob_table).unwrap().add( - String::from("123333"), - 0, - 0, - 0, - 0, - 0, - 0, - BlobFeatures::_V5_NO_EXT_BLOB_TABLE, - meta.flags, - false, - ); - let mut cached_inode = CachedInodeV5::new(blob_table, meta.clone()); - cached_inode.load(&meta, &mut reader).unwrap(); - let device = BlobDevice::default(); - let descs = cached_inode.alloc_bio_vecs(&device, 0, 100, true).unwrap(); - let desc1 = &descs[0]; - assert_eq!(desc1.size(), 100); - assert_eq!(desc1.len(), 1); - assert_eq!(desc1.blob_io_desc(0).unwrap().offset, 0); - assert_eq!(desc1.blob_io_desc(0).unwrap().blob.blob_id(), "123333"); - - let descs = cached_inode - .alloc_bio_vecs(&device, 1024 * 1024 - 100, 200, true) - .unwrap(); - let desc2 = &descs[0]; - assert_eq!(desc2.size(), 200); - assert_eq!(desc2.len(), 2); - assert_eq!(desc2.blob_io_desc(0).unwrap().offset, 1024 * 1024 - 100); - assert_eq!(desc2.blob_io_desc(0).unwrap().size, 100); - assert_eq!(desc2.blob_io_desc(1).unwrap().offset, 0); - assert_eq!(desc2.blob_io_desc(1).unwrap().size, 100); - - let descs = cached_inode - .alloc_bio_vecs(&device, 1024 * 1024 + 8192, 1024 * 1024 * 4, true) - .unwrap(); - let desc3 = &descs[0]; - assert_eq!(desc3.size(), 1024 * 1024 * 2); - assert_eq!(desc3.len(), 3); - assert_eq!(desc3.blob_io_desc(2).unwrap().size, 8192); - - drop(f); - std::fs::remove_file("/tmp/buf_3").unwrap(); - } - - #[test] - fn test_rafsv5_superblock() { - let md = RafsSuperMeta::default(); - let mut sb = CachedSuperBlockV5::new(md, true); - - assert_eq!(sb.max_inode, RAFS_V5_ROOT_INODE); - assert_eq!(sb.s_inodes.len(), 0); - assert!(sb.validate_inode); - - let mut inode = CachedInodeV5::new(sb.s_blob.clone(), sb.s_meta.clone()); - inode.i_ino = 1; - inode.i_nlink = 1; - inode.i_child_idx = 2; - inode.i_child_cnt = 3; - inode.i_mode = libc::S_IFDIR as u32; - sb.hash_inode(Arc::new(inode)).unwrap(); - assert_eq!(sb.max_inode, 1); - assert_eq!(sb.s_inodes.len(), 1); - - let mut inode = CachedInodeV5::new(sb.s_blob.clone(), sb.s_meta.clone()); - inode.i_ino = 2; - inode.i_mode = libc::S_IFDIR as u32; - inode.i_nlink = 2; - inode.i_parent = RAFS_V5_ROOT_INODE; - sb.hash_inode(Arc::new(inode)).unwrap(); - assert_eq!(sb.max_inode, 2); - assert_eq!(sb.s_inodes.len(), 2); - - let mut inode = CachedInodeV5::new(sb.s_blob.clone(), sb.s_meta.clone()); - inode.i_ino = 2; - inode.i_mode = libc::S_IFDIR as u32; - inode.i_nlink = 2; - inode.i_parent = RAFS_V5_ROOT_INODE; - sb.hash_inode(Arc::new(inode)).unwrap(); - assert_eq!(sb.max_inode, 2); - assert_eq!(sb.s_inodes.len(), 2); - - let mut inode = CachedInodeV5::new(sb.s_blob.clone(), sb.s_meta.clone()); - inode.i_ino = 4; - inode.i_mode = libc::S_IFDIR as u32; - inode.i_nlink = 1; - inode.i_parent = RAFS_V5_ROOT_INODE; - sb.hash_inode(Arc::new(inode)).unwrap(); - assert_eq!(sb.max_inode, 4); - assert_eq!(sb.s_inodes.len(), 3); - } - - fn get_streams() -> (Box, BufWriter) { - let temp = TempFile::new().unwrap(); - let w = OpenOptions::new() - .read(true) - .write(true) - .open(temp.as_path()) - .unwrap(); - let r = OpenOptions::new() - .read(true) - .write(false) - .open(temp.as_path()) - .unwrap(); - let writer: BufWriter = BufWriter::new(w); - let reader: Box = Box::new(r); - (reader, writer) - } - - #[test] - fn test_cached_super_block_v5() { - let digest = RafsDigest::from_buf("foobar".as_bytes(), Algorithm::Blake3); - let meta = RafsSuperMeta::default(); - let mut node = CachedInodeV5 { - i_ino: 0, - ..CachedInodeV5::default() - }; - node.i_mode |= libc::S_IFDIR as u32; - node.i_child_idx = 2; - node.i_flags = RafsInodeFlags::SYMLINK; - node.i_name = OsStr::new("foo").into(); - node.i_digest = digest; - let mut child_node = CachedInodeV5::default(); - child_node.i_mode |= libc::S_IFDIR as u32; - child_node.i_ino = 1; - child_node.i_name = OsStr::new("bar").into(); - let mut blk = CachedSuperBlockV5::new(meta, false); - let (r, _w) = get_streams(); - let mut r = r as RafsIoReader; - assert!(blk.load_all_inodes(&mut r).is_ok()); - assert_eq!(blk.get_max_ino(), RAFS_V5_ROOT_INODE); - assert!(blk.get_inode(0, false).is_err()); - assert!(blk.get_extended_inode(0, false).is_err()); - - blk.s_inodes.insert(0, Arc::new(node.clone())); - assert!(blk.get_inode(0, false).is_ok()); - assert!(blk.get_extended_inode(0, false).is_ok()); - - blk.destroy(); - assert!(blk.s_inodes.is_empty()); - let blobs = blk.get_blob_extra_infos(); - assert!(blobs.unwrap().is_empty()); - assert_eq!(blk.root_ino(), RAFS_V5_ROOT_INODE); - - node.add_child(Arc::new(child_node)); - assert_eq!(node.i_child.len(), 1); - - let mut descendants = Vec::>::new(); - node.collect_descendants_inodes(&mut descendants).unwrap(); - assert!(node.collect_descendants_inodes(&mut descendants).is_ok()); - assert_eq!(node.get_entry().inode, node.ino()); - assert_eq!(node.get_xattr(OsStr::new("foobar")).unwrap(), None); - assert!(!node.is_blkdev()); - assert!(!node.is_chrdev()); - assert!(!node.is_sock()); - assert!(!node.is_fifo()); - assert_eq!(node.get_symlink_size(), 0); - - node.i_child_cnt = 1; - let mut found = false; - node.walk_children_inodes(0, &mut |_node, _child_name, child_ino, _offset| { - if child_ino == 1 { - found = true; - Ok(RafsInodeWalkAction::Break) - } else { - Ok(RafsInodeWalkAction::Continue) - } - }) - .unwrap(); - assert!(found); - let rafsinode = node.as_inode(); - assert!(rafsinode.get_child_by_name(OsStr::new("bar")).is_ok()); - assert!(rafsinode.get_child_by_index(0).is_ok()); - assert!(rafsinode.get_child_by_index(1).is_err()); - assert_eq!(rafsinode.get_child_index().unwrap(), 2); - - assert_eq!(node.name(), "foo"); - assert_eq!(node.get_name_size(), "foo".len() as u16); - assert_eq!(node.flags(), RafsInodeFlags::SYMLINK.bits()); - assert_eq!(node.get_digest(), digest); - } - - #[test] - fn test_cached_chunk_info_v5() { - let mut info = CachedChunkInfoV5::new(); - info.index = 1024; - info.blob_index = 1; - info.flags = BlobChunkFlags::COMPRESSED; - - assert_eq!(info.index(), 1024 as u32); - assert!(info.is_compressed()); - assert!(!info.is_encrypted()); - let info = info.as_base(); - - assert_eq!(info.blob_index(), 1 as u32); - assert!(info.is_compressed()); - assert!(!info.is_encrypted()); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2020-2022 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! A RAFS metadata manager to cache all file system metadata into memory. +//! +//! All filesystem metadata will be loaded, validated and cached into memory when loading the +//! file system. And currently the cache layer only supports readonly file systems. + +use std::any::Any; +use std::collections::{BTreeMap, HashMap}; +use std::ffi::{OsStr, OsString}; +use std::io::SeekFrom; +use std::io::{ErrorKind, Read, Result}; +use std::mem::size_of; +use std::ops::Deref; +use std::os::unix::ffi::OsStrExt; +use std::str::FromStr; +use std::sync::Arc; + +use fuse_backend_rs::abi::fuse_abi; +use fuse_backend_rs::api::filesystem::Entry; +use nydus_storage::device::v5::BlobV5ChunkInfo; +use nydus_storage::device::{BlobChunkFlags, BlobChunkInfo, BlobDevice, BlobInfo}; +use nydus_utils::digest::RafsDigest; +use nydus_utils::ByteSize; + +use crate::metadata::inode::RafsInodeFlags; +use crate::metadata::layout::v5::{ + rafsv5_alloc_bio_vecs, rafsv5_validate_inode, RafsV5BlobTable, RafsV5ChunkInfo, RafsV5Inode, + RafsV5InodeChunkOps, RafsV5InodeOps, RafsV5XAttrsTable, RAFSV5_ALIGNMENT, +}; +use crate::metadata::layout::{bytes_to_os_str, parse_xattr, RAFS_V5_ROOT_INODE}; +use crate::metadata::{ + BlobIoVec, Inode, RafsError, RafsInode, RafsInodeExt, RafsInodeWalkAction, + RafsInodeWalkHandler, RafsResult, RafsSuperBlock, RafsSuperInodes, RafsSuperMeta, XattrName, + XattrValue, DOT, DOTDOT, RAFS_ATTR_BLOCK_SIZE, RAFS_MAX_NAME, +}; +use crate::RafsIoReader; + +/// Cached Rafs v5 super block. +pub struct CachedSuperBlockV5 { + s_blob: Arc, + s_meta: Arc, + s_inodes: BTreeMap>, + max_inode: Inode, + validate_inode: bool, +} + +impl CachedSuperBlockV5 { + /// Create a new instance of `CachedSuperBlockV5`. + pub fn new(meta: RafsSuperMeta, validate_inode: bool) -> Self { + CachedSuperBlockV5 { + s_blob: Arc::new(RafsV5BlobTable::new()), + s_meta: Arc::new(meta), + s_inodes: BTreeMap::new(), + max_inode: RAFS_V5_ROOT_INODE, + validate_inode, + } + } + + /// Load all inodes into memory. + /// + /// Rafs v5 layout is based on BFS, which means parents always are in front of children. + fn load_all_inodes(&mut self, r: &mut RafsIoReader) -> Result<()> { + let mut dir_ino_set = Vec::with_capacity(self.s_meta.inode_table_entries as usize); + + for _idx in 0..self.s_meta.inode_table_entries { + let mut inode = CachedInodeV5::new(self.s_blob.clone(), self.s_meta.clone()); + match inode.load(&self.s_meta, r) { + Ok(_) => { + trace!( + "got inode ino {} parent {} size {} child_idx {} child_cnt {}", + inode.ino(), + inode.parent(), + inode.size(), + inode.i_child_idx, + inode.i_child_cnt, + ); + } + Err(ref e) if e.kind() == ErrorKind::UnexpectedEof => break, + Err(e) => { + error!("error when loading CachedInode {:?}", e); + return Err(e); + } + } + + let child_inode = self.hash_inode(Arc::new(inode))?; + if child_inode.is_dir() { + // Delay associating dir inode to its parent because that will take + // a cloned inode object, which preventing us from using `Arc::get_mut`. + // Without `Arc::get_mut` during Cached meta setup(loading all inodes), + // we have to lock inode everywhere for mutability. It really hurts. + dir_ino_set.push(child_inode.i_ino); + } else { + self.add_into_parent(child_inode); + } + } + + // Add directories to its parent in reverse order. + for ino in dir_ino_set.iter().rev() { + self.add_into_parent(self.get_node(*ino)?); + } + debug!("all {} inodes loaded", self.s_inodes.len()); + + Ok(()) + } + + fn get_node(&self, ino: Inode) -> Result> { + Ok(self.s_inodes.get(&ino).ok_or_else(|| enoent!())?.clone()) + } + + fn get_node_mut(&mut self, ino: Inode) -> Result<&mut Arc> { + self.s_inodes.get_mut(&ino).ok_or_else(|| enoent!()) + } + + fn hash_inode(&mut self, inode: Arc) -> Result> { + if self.max_inode < inode.ino() { + self.max_inode = inode.ino(); + } + + if inode.is_hardlink() { + if let Some(i) = self.s_inodes.get(&inode.i_ino) { + // Keep it as is, directory digest algorithm has dependency on it. + if !i.i_data.is_empty() { + return Ok(inode); + } + } + } + self.s_inodes.insert(inode.ino(), inode.clone()); + + Ok(inode) + } + + fn add_into_parent(&mut self, child_inode: Arc) { + if let Ok(parent_inode) = self.get_node_mut(child_inode.parent()) { + Arc::get_mut(parent_inode).unwrap().add_child(child_inode); + } + } +} + +impl RafsSuperInodes for CachedSuperBlockV5 { + fn get_max_ino(&self) -> u64 { + self.max_inode + } + + fn get_inode(&self, ino: Inode, _validate_digest: bool) -> Result> { + self.s_inodes + .get(&ino) + .map_or(Err(enoent!()), |i| Ok(i.clone())) + } + + fn get_extended_inode( + &self, + ino: Inode, + _validate_digest: bool, + ) -> Result> { + self.s_inodes + .get(&ino) + .map_or(Err(enoent!()), |i| Ok(i.clone())) + } +} + +impl RafsSuperBlock for CachedSuperBlockV5 { + fn load(&mut self, r: &mut RafsIoReader) -> Result<()> { + let meta = &self.s_meta; + + // FIXME: add validator for all load operations. + + // Now the seek offset points to inode table, so we can easily find first inode offset. + r.seek(SeekFrom::Start(meta.inode_table_offset))?; + let mut offset = [0u8; size_of::()]; + r.read_exact(&mut offset)?; + // The offset is aligned with 8 bytes to make it easier to validate RafsV5Inode. + let inode_offset = u32::from_le_bytes(offset) << 3; + + // Load blob table and extended blob table if there is one. + let mut blob_table = RafsV5BlobTable::new(); + if meta.extended_blob_table_offset > 0 { + r.seek(SeekFrom::Start(meta.extended_blob_table_offset))?; + blob_table + .extended + .load(r, meta.extended_blob_table_entries as usize)?; + } + r.seek(SeekFrom::Start(meta.blob_table_offset))?; + blob_table.load(r, meta.blob_table_size, meta.chunk_size, meta.flags)?; + self.s_blob = Arc::new(blob_table); + + // Load all inodes started from first inode offset. + r.seek(SeekFrom::Start(inode_offset as u64))?; + self.load_all_inodes(r)?; + + // Validate inode digest tree + let digester = self.s_meta.get_digester(); + let inode = self.get_extended_inode(RAFS_V5_ROOT_INODE, false)?; + if self.validate_inode && !rafsv5_validate_inode(inode.deref(), true, digester)? { + return Err(einval!("invalid inode digest")); + } + + Ok(()) + } + + fn update(&self, _r: &mut RafsIoReader) -> RafsResult<()> { + Err(RafsError::Unsupported) + } + + fn destroy(&mut self) { + self.s_inodes.clear(); + } + + fn get_blob_infos(&self) -> Vec> { + self.s_blob.entries.clone() + } + + fn root_ino(&self) -> u64 { + RAFS_V5_ROOT_INODE + } + + fn get_chunk_info(&self, _idx: usize) -> Result> { + unimplemented!("used by RAFS v6 only") + } + + fn set_blob_device(&self, _blob_device: BlobDevice) { + unimplemented!("used by RAFS v6 only") + } +} + +/// Cached RAFS v5 inode object. +#[derive(Default, Clone, Debug)] +pub struct CachedInodeV5 { + i_ino: Inode, + i_name: OsString, + i_digest: RafsDigest, + i_parent: u64, + i_mode: u32, + i_projid: u32, + i_uid: u32, + i_gid: u32, + i_flags: RafsInodeFlags, + i_size: u64, + i_blocks: u64, + i_nlink: u32, + i_child_idx: u32, + i_child_cnt: u32, + // extra info need cache + i_chunksize: u32, + i_rdev: u32, + i_mtime_nsec: u32, + i_mtime: u64, + i_target: OsString, // for symbol link + i_xattr: HashMap>, + i_data: Vec>, + i_child: Vec>, + i_blob_table: Arc, + i_meta: Arc, +} + +impl CachedInodeV5 { + /// Create a new instance of `CachedInodeV5`. + pub fn new(blob_table: Arc, meta: Arc) -> Self { + CachedInodeV5 { + i_blob_table: blob_table, + i_meta: meta, + ..Default::default() + } + } + + fn load_name(&mut self, name_size: usize, r: &mut RafsIoReader) -> Result<()> { + if name_size > 0 { + let mut name_buf = vec![0u8; name_size]; + r.read_exact(name_buf.as_mut_slice())?; + r.seek_to_next_aligned(name_size, RAFSV5_ALIGNMENT)?; + self.i_name = bytes_to_os_str(&name_buf).to_os_string(); + } + + Ok(()) + } + + fn load_symlink(&mut self, symlink_size: usize, r: &mut RafsIoReader) -> Result<()> { + if self.is_symlink() && symlink_size > 0 { + let mut symbol_buf = vec![0u8; symlink_size]; + r.read_exact(symbol_buf.as_mut_slice())?; + r.seek_to_next_aligned(symlink_size, RAFSV5_ALIGNMENT)?; + self.i_target = bytes_to_os_str(&symbol_buf).to_os_string(); + } + + Ok(()) + } + + fn load_xattr(&mut self, r: &mut RafsIoReader) -> Result<()> { + if self.has_xattr() { + let mut xattrs = RafsV5XAttrsTable::new(); + r.read_exact(xattrs.as_mut())?; + xattrs.size = u64::from_le(xattrs.size); + + let mut xattr_buf = vec![0u8; xattrs.aligned_size()]; + r.read_exact(xattr_buf.as_mut_slice())?; + parse_xattr(&xattr_buf, xattrs.size(), |name, value| { + self.i_xattr.insert(name.to_os_string(), value); + true + })?; + } + + Ok(()) + } + + fn load_chunk_info(&mut self, r: &mut RafsIoReader) -> Result<()> { + if self.is_reg() && self.i_child_cnt > 0 { + let mut chunk = RafsV5ChunkInfo::new(); + for _ in 0..self.i_child_cnt { + chunk.load(r)?; + self.i_data.push(Arc::new(CachedChunkInfoV5::from(&chunk))); + } + } + + Ok(()) + } + + /// Load an inode metadata from a reader. + pub fn load(&mut self, sb: &RafsSuperMeta, r: &mut RafsIoReader) -> Result<()> { + // RafsV5Inode...name...symbol link...xattrs...chunks + let mut inode = RafsV5Inode::new(); + + // parse ondisk inode: RafsV5Inode|name|symbol|xattr|chunks + r.read_exact(inode.as_mut())?; + self.copy_from_ondisk(&inode); + self.load_name(inode.i_name_size as usize, r)?; + self.load_symlink(inode.i_symlink_size as usize, r)?; + self.load_xattr(r)?; + self.load_chunk_info(r)?; + self.i_chunksize = sb.chunk_size; + self.validate(sb.inodes_count, self.i_chunksize as u64)?; + + Ok(()) + } + + fn copy_from_ondisk(&mut self, inode: &RafsV5Inode) { + self.i_ino = inode.i_ino; + self.i_digest = inode.i_digest; + self.i_parent = inode.i_parent; + self.i_mode = inode.i_mode; + self.i_projid = inode.i_projid; + self.i_uid = inode.i_uid; + self.i_gid = inode.i_gid; + self.i_flags = inode.i_flags; + self.i_size = inode.i_size; + self.i_nlink = inode.i_nlink; + self.i_blocks = inode.i_blocks; + self.i_child_idx = inode.i_child_index; + self.i_child_cnt = inode.i_child_count; + self.i_rdev = inode.i_rdev; + self.i_mtime = inode.i_mtime; + self.i_mtime_nsec = inode.i_mtime_nsec; + } + + fn add_child(&mut self, child: Arc) { + self.i_child.push(child); + if self.i_child.len() == (self.i_child_cnt as usize) { + // all children are ready, do sort + self.i_child.sort_by(|c1, c2| c1.i_name.cmp(&c2.i_name)); + } + } +} + +impl RafsInode for CachedInodeV5 { + // Somehow we got invalid `inode_count` from superblock. + fn validate(&self, _inode_count: u64, chunk_size: u64) -> Result<()> { + if self.i_ino == 0 + // || self.i_ino > inode_count + || self.i_nlink == 0 + || (self.i_ino != RAFS_V5_ROOT_INODE && self.i_parent == 0) + || self.i_name.len() > RAFS_MAX_NAME + || self.i_name.is_empty() + { + return Err(einval!("invalid inode")); + } + if !self.is_hardlink() && self.i_parent >= self.i_ino { + return Err(einval!("invalid parent inode")); + } + if self.is_reg() { + let chunks = (self.i_size + chunk_size - 1) / chunk_size; + if !self.has_hole() && chunks != self.i_data.len() as u64 { + return Err(einval!("invalid chunk count")); + } + let blocks = (self.i_size + 511) / 512; + // Old stargz builder generates inode with 0 blocks + if blocks != self.i_blocks && self.i_blocks != 0 { + return Err(einval!("invalid block count")); + } + } else if self.is_dir() { + if self.i_child_cnt != 0 && (self.i_child_idx as Inode) <= self.i_ino { + return Err(einval!("invalid directory")); + } + } else if self.is_symlink() && self.i_target.is_empty() { + return Err(einval!("invalid symlink target")); + } + + Ok(()) + } + + fn alloc_bio_vecs( + &self, + _device: &BlobDevice, + offset: u64, + size: usize, + user_io: bool, + ) -> Result> { + rafsv5_alloc_bio_vecs(self, offset, size, user_io) + } + + fn collect_descendants_inodes( + &self, + descendants: &mut Vec>, + ) -> Result { + if !self.is_dir() { + return Err(enotdir!()); + } + + let mut child_dirs: Vec> = Vec::new(); + + for child_inode in &self.i_child { + if child_inode.is_dir() { + child_dirs.push(child_inode.clone()); + } else if !child_inode.is_empty_size() { + descendants.push(child_inode.clone()); + } + } + + for d in child_dirs { + d.collect_descendants_inodes(descendants)?; + } + + Ok(0) + } + + #[inline] + fn get_entry(&self) -> Entry { + Entry { + attr: self.get_attr().into(), + inode: self.i_ino, + generation: 0, + attr_flags: 0, + attr_timeout: self.i_meta.attr_timeout, + entry_timeout: self.i_meta.entry_timeout, + } + } + + #[inline] + fn get_attr(&self) -> fuse_abi::Attr { + fuse_abi::Attr { + ino: self.i_ino, + size: self.i_size, + blocks: self.i_blocks, + mode: self.i_mode, + nlink: self.i_nlink as u32, + blksize: RAFS_ATTR_BLOCK_SIZE, + rdev: self.i_rdev, + ..Default::default() + } + } + + #[inline] + fn is_blkdev(&self) -> bool { + self.i_mode & libc::S_IFMT as u32 == libc::S_IFBLK as u32 + } + + #[inline] + fn is_chrdev(&self) -> bool { + self.i_mode & libc::S_IFMT as u32 == libc::S_IFCHR as u32 + } + + #[inline] + fn is_sock(&self) -> bool { + self.i_mode & libc::S_IFMT as u32 == libc::S_IFSOCK as u32 + } + + #[inline] + fn is_fifo(&self) -> bool { + self.i_mode & libc::S_IFMT as u32 == libc::S_IFIFO as u32 + } + + #[inline] + fn is_dir(&self) -> bool { + self.i_mode & libc::S_IFMT as u32 == libc::S_IFDIR as u32 + } + + #[inline] + fn is_symlink(&self) -> bool { + self.i_mode & libc::S_IFMT as u32 == libc::S_IFLNK as u32 + } + + #[inline] + fn is_reg(&self) -> bool { + self.i_mode & libc::S_IFMT as u32 == libc::S_IFREG as u32 + } + + #[inline] + fn is_hardlink(&self) -> bool { + !self.is_dir() && self.i_nlink > 1 + } + + #[inline] + fn has_xattr(&self) -> bool { + self.i_flags.contains(RafsInodeFlags::XATTR) + } + + #[inline] + fn get_xattr(&self, name: &OsStr) -> Result> { + Ok(self.i_xattr.get(name).cloned()) + } + + fn get_xattrs(&self) -> Result> { + Ok(self + .i_xattr + .keys() + .map(|k| k.as_bytes().to_vec()) + .collect::>()) + } + + #[inline] + fn get_symlink(&self) -> Result { + if !self.is_symlink() { + Err(einval!("inode is not a symlink")) + } else { + Ok(self.i_target.clone()) + } + } + + #[inline] + fn get_symlink_size(&self) -> u16 { + if self.is_symlink() { + self.i_target.byte_size() as u16 + } else { + 0 + } + } + + fn walk_children_inodes(&self, entry_offset: u64, handler: RafsInodeWalkHandler) -> Result<()> { + // offset 0 and 1 is for "." and ".." respectively. + let mut cur_offset = entry_offset; + + if cur_offset == 0 { + cur_offset += 1; + // Safe to unwrap since conversion from DOT to os string can't fail. + match handler( + None, + OsString::from_str(DOT).unwrap(), + self.ino(), + cur_offset, + ) { + Ok(RafsInodeWalkAction::Continue) => {} + Ok(RafsInodeWalkAction::Break) => return Ok(()), + Err(e) => return Err(e), + } + } + + if cur_offset == 1 { + let parent = if self.ino() == 1 { 1 } else { self.parent() }; + cur_offset += 1; + // Safe to unwrap since conversion from DOTDOT to os string can't fail. + match handler( + None, + OsString::from_str(DOTDOT).unwrap(), + parent, + cur_offset, + ) { + Ok(RafsInodeWalkAction::Continue) => {} + Ok(RafsInodeWalkAction::Break) => return Ok(()), + Err(e) => return Err(e), + }; + } + + let mut idx = cur_offset - 2; + while idx < self.get_child_count() as u64 { + assert!(idx <= u32::MAX as u64); + let child = self.get_child_by_index(idx as u32)?; + cur_offset += 1; + match handler(None, child.name(), child.ino(), cur_offset) { + Ok(RafsInodeWalkAction::Continue) => idx += 1, + Ok(RafsInodeWalkAction::Break) => break, + Err(e) => return Err(e), + } + } + + Ok(()) + } + + fn get_child_by_name(&self, name: &OsStr) -> Result> { + let idx = self + .i_child + .binary_search_by(|c| c.i_name.as_os_str().cmp(name)) + .map_err(|_| enoent!())?; + Ok(self.i_child[idx].clone()) + } + + #[inline] + fn get_child_by_index(&self, index: u32) -> Result> { + if (index as usize) < self.i_child.len() { + Ok(self.i_child[index as usize].clone()) + } else { + Err(einval!("invalid child index")) + } + } + + #[inline] + fn get_child_count(&self) -> u32 { + self.i_child_cnt + } + + #[inline] + fn get_child_index(&self) -> Result { + Ok(self.i_child_idx) + } + + #[inline] + fn get_chunk_count(&self) -> u32 { + self.get_child_count() + } + + fn as_any(&self) -> &dyn Any { + self + } + + impl_getter!(ino, i_ino, u64); + impl_getter!(size, i_size, u64); + impl_getter!(rdev, i_rdev, u32); + impl_getter!(projid, i_projid, u32); +} + +impl RafsInodeExt for CachedInodeV5 { + fn as_inode(&self) -> &dyn RafsInode { + self + } + + #[inline] + fn name(&self) -> OsString { + self.i_name.clone() + } + + #[inline] + fn get_name_size(&self) -> u16 { + self.i_name.byte_size() as u16 + } + + #[inline] + fn flags(&self) -> u64 { + self.i_flags.bits() + } + + #[inline] + fn get_digest(&self) -> RafsDigest { + self.i_digest + } + + #[inline] + fn get_chunk_info(&self, idx: u32) -> Result> { + if (idx as usize) < self.i_data.len() { + Ok(self.i_data[idx as usize].clone()) + } else { + Err(einval!("invalid chunk index")) + } + } + + impl_getter!(parent, i_parent, u64); +} + +impl RafsV5InodeChunkOps for CachedInodeV5 { + fn get_chunk_info_v5(&self, idx: u32) -> Result> { + if (idx as usize) < self.i_data.len() { + Ok(self.i_data[idx as usize].clone() as Arc) + } else { + Err(einval!("invalid chunk index")) + } + } +} + +impl RafsV5InodeOps for CachedInodeV5 { + fn get_blob_by_index(&self, idx: u32) -> Result> { + self.i_blob_table.get(idx) + } + + fn get_chunk_size(&self) -> u32 { + self.i_chunksize + } + + fn has_hole(&self) -> bool { + self.i_flags.contains(RafsInodeFlags::HAS_HOLE) + } +} + +/// Cached information about an Rafs Data Chunk. +#[derive(Clone, Default, Debug)] +pub struct CachedChunkInfoV5 { + // block hash + block_id: Arc, + // blob containing the block + blob_index: u32, + // chunk index in blob + index: u32, + // position of the block within the file + file_offset: u64, + // offset of the block within the blob + compressed_offset: u64, + uncompressed_offset: u64, + // size of the block, compressed + compressed_size: u32, + uncompressed_size: u32, + flags: BlobChunkFlags, +} + +impl CachedChunkInfoV5 { + /// Create a new instance of `CachedChunkInfoV5`. + pub fn new() -> Self { + CachedChunkInfoV5 { + ..Default::default() + } + } + + /// Load a chunk metadata from a reader. + pub fn load(&mut self, r: &mut RafsIoReader) -> Result<()> { + let mut chunk = RafsV5ChunkInfo::new(); + + r.read_exact(chunk.as_mut())?; + self.copy_from_ondisk(&chunk); + + Ok(()) + } + + fn copy_from_ondisk(&mut self, chunk: &RafsV5ChunkInfo) { + self.block_id = Arc::new(chunk.block_id); + self.blob_index = chunk.blob_index; + self.index = chunk.index; + self.compressed_offset = chunk.compressed_offset; + self.uncompressed_offset = chunk.uncompressed_offset; + self.uncompressed_size = chunk.uncompressed_size; + self.file_offset = chunk.file_offset; + self.compressed_size = chunk.compressed_size; + self.flags = chunk.flags; + } +} + +impl BlobChunkInfo for CachedChunkInfoV5 { + fn chunk_id(&self) -> &RafsDigest { + &self.block_id + } + + fn id(&self) -> u32 { + self.index() + } + + fn is_batch(&self) -> bool { + false + } + + fn is_compressed(&self) -> bool { + self.flags.contains(BlobChunkFlags::COMPRESSED) + } + + fn is_encrypted(&self) -> bool { + false + } + + fn as_any(&self) -> &dyn Any { + self + } + + impl_getter!(blob_index, blob_index, u32); + impl_getter!(compressed_offset, compressed_offset, u64); + impl_getter!(compressed_size, compressed_size, u32); + impl_getter!(uncompressed_offset, uncompressed_offset, u64); + impl_getter!(uncompressed_size, uncompressed_size, u32); +} + +impl BlobV5ChunkInfo for CachedChunkInfoV5 { + fn as_base(&self) -> &dyn BlobChunkInfo { + self + } + + impl_getter!(index, index, u32); + impl_getter!(file_offset, file_offset, u64); + impl_getter!(flags, flags, BlobChunkFlags); +} + +impl From<&RafsV5ChunkInfo> for CachedChunkInfoV5 { + fn from(info: &RafsV5ChunkInfo) -> Self { + let mut chunk = CachedChunkInfoV5::new(); + chunk.copy_from_ondisk(info); + chunk + } +} + +#[cfg(test)] +mod cached_tests { + use std::cmp; + use std::ffi::{OsStr, OsString}; + use std::fs::OpenOptions; + use std::io::Seek; + use std::io::SeekFrom::Start; + use std::os::unix::ffi::OsStrExt; + use std::sync::Arc; + + use nydus_storage::device::{BlobDevice, BlobFeatures}; + use nydus_utils::digest::{Algorithm, RafsDigest}; + use nydus_utils::ByteSize; + use storage::device::v5::BlobV5ChunkInfo; + use storage::device::{BlobChunkFlags, BlobChunkInfo}; + + use crate::metadata::cached_v5::{CachedInodeV5, CachedSuperBlockV5}; + use crate::metadata::inode::RafsInodeFlags; + use crate::metadata::layout::v5::{ + rafsv5_align, RafsV5BlobTable, RafsV5ChunkInfo, RafsV5Inode, RafsV5InodeWrapper, + }; + use crate::metadata::layout::{RafsXAttrs, RAFS_V5_ROOT_INODE}; + use crate::metadata::{ + RafsInode, RafsInodeWalkAction, RafsStore, RafsSuperBlock, RafsSuperInodes, RafsSuperMeta, + }; + use crate::{BufWriter, RafsInodeExt, RafsIoRead, RafsIoReader}; + use vmm_sys_util::tempfile::TempFile; + + use super::CachedChunkInfoV5; + + #[test] + fn test_load_inode() { + let mut f = OpenOptions::new() + .truncate(true) + .create(true) + .write(true) + .read(true) + .open("/tmp/buf_1") + .unwrap(); + let mut writer = BufWriter::new(f.try_clone().unwrap()); + let mut reader = Box::new(f.try_clone().unwrap()) as RafsIoReader; + + let mut ondisk_inode = RafsV5Inode::new(); + let file_name = OsString::from("c_inode_1"); + let mut xattr = RafsXAttrs::default(); + xattr + .add(OsString::from("user.k1"), vec![1u8, 2u8, 3u8, 4u8]) + .unwrap(); + xattr + .add(OsString::from("user.k2"), vec![10u8, 11u8, 12u8]) + .unwrap(); + ondisk_inode.i_name_size = file_name.byte_size() as u16; + ondisk_inode.i_child_count = 1; + ondisk_inode.i_ino = 3; + ondisk_inode.i_parent = RAFS_V5_ROOT_INODE; + ondisk_inode.i_size = 8192; + ondisk_inode.i_mode = libc::S_IFREG as u32; + ondisk_inode.i_nlink = 1; + ondisk_inode.i_blocks = 16; + let mut chunk = RafsV5ChunkInfo::new(); + chunk.uncompressed_size = 8192; + chunk.uncompressed_offset = 0; + chunk.compressed_offset = 0; + chunk.compressed_size = 4096; + let inode = RafsV5InodeWrapper { + name: file_name.as_os_str(), + symlink: None, + inode: &ondisk_inode, + }; + inode.store(&mut writer).unwrap(); + chunk.store(&mut writer).unwrap(); + xattr.store_v5(&mut writer).unwrap(); + + f.seek(Start(0)).unwrap(); + let md = RafsSuperMeta { + inodes_count: 100, + chunk_size: 1024 * 1024, + ..Default::default() + }; + let meta = Arc::new(md); + let blob_table = Arc::new(RafsV5BlobTable::new()); + let mut cached_inode = CachedInodeV5::new(blob_table, meta.clone()); + cached_inode.load(&meta, &mut reader).unwrap(); + // check data + assert_eq!(cached_inode.i_name, file_name.to_str().unwrap()); + assert_eq!(cached_inode.i_child_cnt, 1); + let attr = cached_inode.get_attr(); + assert_eq!(attr.ino, 3); + assert_eq!(attr.size, 8192); + let cached_chunk = cached_inode.get_chunk_info(0).unwrap(); + assert_eq!(cached_chunk.compressed_size(), 4096); + assert_eq!(cached_chunk.uncompressed_size(), 8192); + assert_eq!(cached_chunk.compressed_offset(), 0); + assert_eq!(cached_chunk.uncompressed_offset(), 0); + let c_xattr = cached_inode.get_xattrs().unwrap(); + for k in c_xattr.iter() { + let k = OsStr::from_bytes(k); + let v = cached_inode.get_xattr(k).unwrap(); + assert_eq!(xattr.get(k).cloned().unwrap(), v.unwrap()); + } + + // close file + drop(f); + std::fs::remove_file("/tmp/buf_1").unwrap(); + } + + #[test] + fn test_load_symlink() { + let mut f = OpenOptions::new() + .truncate(true) + .create(true) + .write(true) + .read(true) + .open("/tmp/buf_2") + .unwrap(); + let mut writer = BufWriter::new(f.try_clone().unwrap()); + let mut reader = Box::new(f.try_clone().unwrap()) as RafsIoReader; + let file_name = OsString::from("c_inode_2"); + let symlink_name = OsString::from("c_inode_1"); + let mut ondisk_inode = RafsV5Inode::new(); + ondisk_inode.i_name_size = file_name.byte_size() as u16; + ondisk_inode.i_ino = 3; + ondisk_inode.i_parent = RAFS_V5_ROOT_INODE; + ondisk_inode.i_nlink = 1; + ondisk_inode.i_symlink_size = symlink_name.byte_size() as u16; + ondisk_inode.i_mode = libc::S_IFLNK as u32; + + let inode = RafsV5InodeWrapper { + name: file_name.as_os_str(), + symlink: Some(symlink_name.as_os_str()), + inode: &ondisk_inode, + }; + inode.store(&mut writer).unwrap(); + + f.seek(Start(0)).unwrap(); + let mut meta = Arc::new(RafsSuperMeta::default()); + Arc::get_mut(&mut meta).unwrap().chunk_size = 1024 * 1024; + Arc::get_mut(&mut meta).unwrap().inodes_count = 4; + let blob_table = Arc::new(RafsV5BlobTable::new()); + let mut cached_inode = CachedInodeV5::new(blob_table, meta.clone()); + cached_inode.load(&meta, &mut reader).unwrap(); + + assert_eq!(cached_inode.i_name, "c_inode_2"); + assert_eq!(cached_inode.get_symlink().unwrap(), symlink_name); + + drop(f); + std::fs::remove_file("/tmp/buf_2").unwrap(); + } + + #[test] + fn test_alloc_bio_desc() { + let mut f = OpenOptions::new() + .truncate(true) + .create(true) + .write(true) + .read(true) + .open("/tmp/buf_3") + .unwrap(); + let mut writer = BufWriter::new(f.try_clone().unwrap()); + let mut reader = Box::new(f.try_clone().unwrap()) as RafsIoReader; + let file_name = OsString::from("c_inode_3"); + let mut ondisk_inode = RafsV5Inode::new(); + ondisk_inode.i_name_size = rafsv5_align(file_name.len()) as u16; + ondisk_inode.i_ino = 3; + ondisk_inode.i_parent = RAFS_V5_ROOT_INODE; + ondisk_inode.i_nlink = 1; + ondisk_inode.i_child_count = 4; + ondisk_inode.i_mode = libc::S_IFREG as u32; + ondisk_inode.i_size = 1024 * 1024 * 3 + 8192; + ondisk_inode.i_blocks = 6160; + + let inode = RafsV5InodeWrapper { + name: file_name.as_os_str(), + symlink: None, + inode: &ondisk_inode, + }; + inode.store(&mut writer).unwrap(); + + let mut size = ondisk_inode.i_size; + for i in 0..ondisk_inode.i_child_count { + let mut chunk = RafsV5ChunkInfo::new(); + chunk.uncompressed_size = cmp::min(1024 * 1024, size as u32); + chunk.uncompressed_offset = (i * 1024 * 1024) as u64; + chunk.compressed_size = chunk.uncompressed_size / 2; + chunk.compressed_offset = ((i * 1024 * 1024) / 2) as u64; + chunk.file_offset = chunk.uncompressed_offset; + chunk.store(&mut writer).unwrap(); + size -= chunk.uncompressed_size as u64; + } + f.seek(Start(0)).unwrap(); + let mut meta = Arc::new(RafsSuperMeta::default()); + Arc::get_mut(&mut meta).unwrap().chunk_size = 1024 * 1024; + Arc::get_mut(&mut meta).unwrap().inodes_count = 4; + let mut blob_table = Arc::new(RafsV5BlobTable::new()); + Arc::get_mut(&mut blob_table).unwrap().add( + String::from("123333"), + 0, + 0, + 0, + 0, + 0, + 0, + BlobFeatures::_V5_NO_EXT_BLOB_TABLE, + meta.flags, + false, + ); + let mut cached_inode = CachedInodeV5::new(blob_table, meta.clone()); + cached_inode.load(&meta, &mut reader).unwrap(); + let device = BlobDevice::default(); + let descs = cached_inode.alloc_bio_vecs(&device, 0, 100, true).unwrap(); + let desc1 = &descs[0]; + assert_eq!(desc1.size(), 100); + assert_eq!(desc1.len(), 1); + assert_eq!(desc1.blob_io_desc(0).unwrap().offset, 0); + assert_eq!(desc1.blob_io_desc(0).unwrap().blob.blob_id(), "123333"); + + let descs = cached_inode + .alloc_bio_vecs(&device, 1024 * 1024 - 100, 200, true) + .unwrap(); + let desc2 = &descs[0]; + assert_eq!(desc2.size(), 200); + assert_eq!(desc2.len(), 2); + assert_eq!(desc2.blob_io_desc(0).unwrap().offset, 1024 * 1024 - 100); + assert_eq!(desc2.blob_io_desc(0).unwrap().size, 100); + assert_eq!(desc2.blob_io_desc(1).unwrap().offset, 0); + assert_eq!(desc2.blob_io_desc(1).unwrap().size, 100); + + let descs = cached_inode + .alloc_bio_vecs(&device, 1024 * 1024 + 8192, 1024 * 1024 * 4, true) + .unwrap(); + let desc3 = &descs[0]; + assert_eq!(desc3.size(), 1024 * 1024 * 2); + assert_eq!(desc3.len(), 3); + assert_eq!(desc3.blob_io_desc(2).unwrap().size, 8192); + + drop(f); + std::fs::remove_file("/tmp/buf_3").unwrap(); + } + + #[test] + fn test_rafsv5_superblock() { + let md = RafsSuperMeta::default(); + let mut sb = CachedSuperBlockV5::new(md, true); + + assert_eq!(sb.max_inode, RAFS_V5_ROOT_INODE); + assert_eq!(sb.s_inodes.len(), 0); + assert!(sb.validate_inode); + + let mut inode = CachedInodeV5::new(sb.s_blob.clone(), sb.s_meta.clone()); + inode.i_ino = 1; + inode.i_nlink = 1; + inode.i_child_idx = 2; + inode.i_child_cnt = 3; + inode.i_mode = libc::S_IFDIR as u32; + sb.hash_inode(Arc::new(inode)).unwrap(); + assert_eq!(sb.max_inode, 1); + assert_eq!(sb.s_inodes.len(), 1); + + let mut inode = CachedInodeV5::new(sb.s_blob.clone(), sb.s_meta.clone()); + inode.i_ino = 2; + inode.i_mode = libc::S_IFDIR as u32; + inode.i_nlink = 2; + inode.i_parent = RAFS_V5_ROOT_INODE; + sb.hash_inode(Arc::new(inode)).unwrap(); + assert_eq!(sb.max_inode, 2); + assert_eq!(sb.s_inodes.len(), 2); + + let mut inode = CachedInodeV5::new(sb.s_blob.clone(), sb.s_meta.clone()); + inode.i_ino = 2; + inode.i_mode = libc::S_IFDIR as u32; + inode.i_nlink = 2; + inode.i_parent = RAFS_V5_ROOT_INODE; + sb.hash_inode(Arc::new(inode)).unwrap(); + assert_eq!(sb.max_inode, 2); + assert_eq!(sb.s_inodes.len(), 2); + + let mut inode = CachedInodeV5::new(sb.s_blob.clone(), sb.s_meta.clone()); + inode.i_ino = 4; + inode.i_mode = libc::S_IFDIR as u32; + inode.i_nlink = 1; + inode.i_parent = RAFS_V5_ROOT_INODE; + sb.hash_inode(Arc::new(inode)).unwrap(); + assert_eq!(sb.max_inode, 4); + assert_eq!(sb.s_inodes.len(), 3); + } + + fn get_streams() -> (Box, BufWriter) { + let temp = TempFile::new().unwrap(); + let w = OpenOptions::new() + .read(true) + .write(true) + .open(temp.as_path()) + .unwrap(); + let r = OpenOptions::new() + .read(true) + .write(false) + .open(temp.as_path()) + .unwrap(); + let writer: BufWriter = BufWriter::new(w); + let reader: Box = Box::new(r); + (reader, writer) + } + + #[test] + fn test_cached_super_block_v5() { + let digest = RafsDigest::from_buf("foobar".as_bytes(), Algorithm::Blake3); + let meta = RafsSuperMeta::default(); + let mut node = CachedInodeV5 { + i_ino: 0, + ..CachedInodeV5::default() + }; + node.i_mode |= libc::S_IFDIR as u32; + node.i_child_idx = 2; + node.i_flags = RafsInodeFlags::SYMLINK; + node.i_name = OsStr::new("foo").into(); + node.i_digest = digest; + let mut child_node = CachedInodeV5::default(); + child_node.i_mode |= libc::S_IFDIR as u32; + child_node.i_ino = 1; + child_node.i_name = OsStr::new("bar").into(); + let mut blk = CachedSuperBlockV5::new(meta, false); + let (r, _w) = get_streams(); + let mut r = r as RafsIoReader; + assert!(blk.load_all_inodes(&mut r).is_ok()); + assert_eq!(blk.get_max_ino(), RAFS_V5_ROOT_INODE); + assert!(blk.get_inode(0, false).is_err()); + assert!(blk.get_extended_inode(0, false).is_err()); + + blk.s_inodes.insert(0, Arc::new(node.clone())); + assert!(blk.get_inode(0, false).is_ok()); + assert!(blk.get_extended_inode(0, false).is_ok()); + + blk.destroy(); + assert!(blk.s_inodes.is_empty()); + let blobs = blk.get_blob_extra_infos(); + assert!(blobs.unwrap().is_empty()); + assert_eq!(blk.root_ino(), RAFS_V5_ROOT_INODE); + + node.add_child(Arc::new(child_node)); + assert_eq!(node.i_child.len(), 1); + + let mut descendants = Vec::>::new(); + node.collect_descendants_inodes(&mut descendants).unwrap(); + assert!(node.collect_descendants_inodes(&mut descendants).is_ok()); + assert_eq!(node.get_entry().inode, node.ino()); + assert_eq!(node.get_xattr(OsStr::new("foobar")).unwrap(), None); + assert!(!node.is_blkdev()); + assert!(!node.is_chrdev()); + assert!(!node.is_sock()); + assert!(!node.is_fifo()); + assert_eq!(node.get_symlink_size(), 0); + + node.i_child_cnt = 1; + let mut found = false; + node.walk_children_inodes(0, &mut |_node, _child_name, child_ino, _offset| { + if child_ino == 1 { + found = true; + Ok(RafsInodeWalkAction::Break) + } else { + Ok(RafsInodeWalkAction::Continue) + } + }) + .unwrap(); + assert!(found); + let rafsinode = node.as_inode(); + assert!(rafsinode.get_child_by_name(OsStr::new("bar")).is_ok()); + assert!(rafsinode.get_child_by_index(0).is_ok()); + assert!(rafsinode.get_child_by_index(1).is_err()); + assert_eq!(rafsinode.get_child_index().unwrap(), 2); + + assert_eq!(node.name(), "foo"); + assert_eq!(node.get_name_size(), "foo".len() as u16); + assert_eq!(node.flags(), RafsInodeFlags::SYMLINK.bits()); + assert_eq!(node.get_digest(), digest); + } + + #[test] + fn test_cached_chunk_info_v5() { + let mut info = CachedChunkInfoV5::new(); + info.index = 1024; + info.blob_index = 1; + info.flags = BlobChunkFlags::COMPRESSED; + + assert_eq!(info.index(), 1024 as u32); + assert!(info.is_compressed()); + assert!(!info.is_encrypted()); + let info = info.as_base(); + + assert_eq!(info.blob_index(), 1 as u32); + assert!(info.is_compressed()); + assert!(!info.is_encrypted()); + } +} diff --git a/rafs/src/metadata/chunk.rs b/rafs/src/metadata/chunk.rs index 9321fb2bfda..4c345a5cbc0 100644 --- a/rafs/src/metadata/chunk.rs +++ b/rafs/src/metadata/chunk.rs @@ -1,638 +1,638 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2021-2022 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::fmt::{self, Debug, Display, Formatter}; -use std::ops::Deref; -use std::sync::Arc; - -use anyhow::{Context, Result}; -use nydus_storage::device::v5::BlobV5ChunkInfo; -use nydus_storage::device::{BlobChunkFlags, BlobChunkInfo}; -use nydus_storage::meta::BlobMetaChunk; -use nydus_utils::digest::RafsDigest; - -use crate::metadata::cached_v5::CachedChunkInfoV5; -use crate::metadata::direct_v5::DirectChunkInfoV5; -use crate::metadata::direct_v6::{DirectChunkInfoV6, TarfsChunkInfoV6}; -use crate::metadata::layout::v5::RafsV5ChunkInfo; -use crate::metadata::{RafsStore, RafsVersion}; -use crate::RafsIoWrite; - -/// A wrapper to encapsulate different versions of chunk information objects. -#[derive(Clone)] -pub enum ChunkWrapper { - /// Chunk info for RAFS v5. - V5(RafsV5ChunkInfo), - /// Chunk info RAFS v6, reuse `RafsV5ChunkInfo` as IR for v6. - V6(RafsV5ChunkInfo), - /// Reference to a `BlobChunkInfo` object. - Ref(Arc), -} - -impl Debug for ChunkWrapper { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - match self { - Self::V5(c) => write!(f, "{:?}", c), - Self::V6(c) => write!(f, "{:?}", c), - Self::Ref(c) => { - let chunk = to_rafs_v5_chunk_info(as_blob_v5_chunk_info(c.deref())); - write!(f, "{:?}", chunk) - } - } - } -} - -impl Display for ChunkWrapper { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - write!( - f, - "id {}, index {}, blob_index {}, file_offset {}, compressed {}/{}, uncompressed {}/{}", - self.id(), - self.index(), - self.blob_index(), - self.file_offset(), - self.compressed_offset(), - self.compressed_size(), - self.uncompressed_offset(), - self.uncompressed_size(), - ) - } -} - -impl ChunkWrapper { - /// Create a new `ChunkWrapper` object with default value. - pub fn new(version: RafsVersion) -> Self { - match version { - RafsVersion::V5 => ChunkWrapper::V5(RafsV5ChunkInfo::default()), - RafsVersion::V6 => ChunkWrapper::V6(RafsV5ChunkInfo::default()), - } - } - - /// Create a `ChunkWrapper` object from a `BlobChunkInfo` trait object. - pub fn from_chunk_info(cki: Arc) -> Self { - Self::Ref(cki) - } - - /// Get digest of chunk data, which is also used as chunk ID. - pub fn id(&self) -> &RafsDigest { - match self { - ChunkWrapper::V5(c) => &c.block_id, - ChunkWrapper::V6(c) => &c.block_id, - ChunkWrapper::Ref(c) => c.chunk_id(), - } - } - - /// Set digest of chunk data, which is also used as chunk ID. - pub fn set_id(&mut self, id: RafsDigest) { - self.ensure_owned(); - match self { - ChunkWrapper::V5(c) => c.block_id = id, - ChunkWrapper::V6(c) => c.block_id = id, - ChunkWrapper::Ref(_c) => panic!("unexpected"), - } - } - - /// Get index of the data blob associated with the chunk. - pub fn blob_index(&self) -> u32 { - match self { - ChunkWrapper::V5(c) => c.blob_index, - ChunkWrapper::V6(c) => c.blob_index, - ChunkWrapper::Ref(c) => c.blob_index(), - } - } - - /// Set index of the data blob associated with the chunk. - pub fn set_blob_index(&mut self, index: u32) { - self.ensure_owned(); - match self { - ChunkWrapper::V5(c) => c.blob_index = index, - ChunkWrapper::V6(c) => c.blob_index = index, - ChunkWrapper::Ref(_c) => panic!("unexpected"), - } - } - - /// Get offset into the compressed data blob to fetch chunk data. - pub fn compressed_offset(&self) -> u64 { - match self { - ChunkWrapper::V5(c) => c.compressed_offset, - ChunkWrapper::V6(c) => c.compressed_offset, - ChunkWrapper::Ref(c) => c.compressed_offset(), - } - } - - /// Set offset into the compressed data blob to fetch chunk data. - pub fn set_compressed_offset(&mut self, offset: u64) { - self.ensure_owned(); - match self { - ChunkWrapper::V5(c) => c.compressed_offset = offset, - ChunkWrapper::V6(c) => c.compressed_offset = offset, - ChunkWrapper::Ref(_c) => panic!("unexpected"), - } - } - - /// Get size of compressed chunk data. - pub fn compressed_size(&self) -> u32 { - match self { - ChunkWrapper::V5(c) => c.compressed_size, - ChunkWrapper::V6(c) => c.compressed_size, - ChunkWrapper::Ref(c) => c.compressed_size(), - } - } - - /// Set size of compressed chunk data. - pub fn set_compressed_size(&mut self, size: u32) { - self.ensure_owned(); - match self { - ChunkWrapper::V5(c) => c.compressed_size = size, - ChunkWrapper::V6(c) => c.compressed_size = size, - ChunkWrapper::Ref(_c) => panic!("unexpected"), - } - } - - /// Get offset into the uncompressed data blob file to get chunk data. - pub fn uncompressed_offset(&self) -> u64 { - match self { - ChunkWrapper::V5(c) => c.uncompressed_offset, - ChunkWrapper::V6(c) => c.uncompressed_offset, - ChunkWrapper::Ref(c) => c.uncompressed_offset(), - } - } - - /// Set offset into the uncompressed data blob file to get chunk data. - pub fn set_uncompressed_offset(&mut self, offset: u64) { - self.ensure_owned(); - match self { - ChunkWrapper::V5(c) => c.uncompressed_offset = offset, - ChunkWrapper::V6(c) => c.uncompressed_offset = offset, - ChunkWrapper::Ref(_c) => panic!("unexpected"), - } - } - - /// Get size of uncompressed chunk data. - pub fn uncompressed_size(&self) -> u32 { - match self { - ChunkWrapper::V5(c) => c.uncompressed_size, - ChunkWrapper::V6(c) => c.uncompressed_size, - ChunkWrapper::Ref(c) => c.uncompressed_size(), - } - } - - /// Set size of uncompressed chunk data. - pub fn set_uncompressed_size(&mut self, size: u32) { - self.ensure_owned(); - match self { - ChunkWrapper::V5(c) => c.uncompressed_size = size, - ChunkWrapper::V6(c) => c.uncompressed_size = size, - ChunkWrapper::Ref(_c) => panic!("unexpected"), - } - } - - /// Get chunk index into the RAFS chunk information array, used by RAFS v5. - pub fn index(&self) -> u32 { - match self { - ChunkWrapper::V5(c) => c.index, - ChunkWrapper::V6(c) => c.index, - ChunkWrapper::Ref(c) => as_blob_v5_chunk_info(c.deref()).index(), - } - } - - /// Set chunk index into the RAFS chunk information array, used by RAFS v5. - pub fn set_index(&mut self, index: u32) { - self.ensure_owned(); - match self { - ChunkWrapper::V5(c) => c.index = index, - ChunkWrapper::V6(c) => c.index = index, - ChunkWrapper::Ref(_c) => panic!("unexpected"), - } - } - - /// Get chunk offset in the file it belongs to, RAFS v5. - pub fn file_offset(&self) -> u64 { - match self { - ChunkWrapper::V5(c) => c.file_offset, - ChunkWrapper::V6(c) => c.file_offset, - ChunkWrapper::Ref(c) => as_blob_v5_chunk_info(c.deref()).file_offset(), - } - } - - /// Set chunk offset in the file it belongs to, RAFS v5. - pub fn set_file_offset(&mut self, offset: u64) { - self.ensure_owned(); - match self { - ChunkWrapper::V5(c) => c.file_offset = offset, - ChunkWrapper::V6(c) => c.file_offset = offset, - ChunkWrapper::Ref(_c) => panic!("unexpected"), - } - } - - /// Check whether the chunk is compressed or not. - pub fn is_compressed(&self) -> bool { - match self { - ChunkWrapper::V5(c) => c.flags.contains(BlobChunkFlags::COMPRESSED), - ChunkWrapper::V6(c) => c.flags.contains(BlobChunkFlags::COMPRESSED), - ChunkWrapper::Ref(c) => as_blob_v5_chunk_info(c.deref()) - .flags() - .contains(BlobChunkFlags::COMPRESSED), - } - } - - /// Set flag for whether chunk is compressed. - pub fn set_compressed(&mut self, compressed: bool) { - self.ensure_owned(); - match self { - ChunkWrapper::V5(c) => c.flags.set(BlobChunkFlags::COMPRESSED, compressed), - ChunkWrapper::V6(c) => c.flags.set(BlobChunkFlags::COMPRESSED, compressed), - ChunkWrapper::Ref(_c) => panic!("unexpected"), - } - } - - /// Check whether the chunk is encrypted or not. - pub fn is_encrypted(&self) -> bool { - match self { - ChunkWrapper::V5(c) => c.flags.contains(BlobChunkFlags::ENCYPTED), - ChunkWrapper::V6(c) => c.flags.contains(BlobChunkFlags::ENCYPTED), - ChunkWrapper::Ref(c) => as_blob_v5_chunk_info(c.deref()) - .flags() - .contains(BlobChunkFlags::ENCYPTED), - } - } - - /// Set flag for whether chunk is encrypted. - pub fn set_encrypted(&mut self, encrypted: bool) { - self.ensure_owned(); - match self { - ChunkWrapper::V5(c) => c.flags.set(BlobChunkFlags::ENCYPTED, encrypted), - ChunkWrapper::V6(c) => c.flags.set(BlobChunkFlags::ENCYPTED, encrypted), - ChunkWrapper::Ref(_c) => panic!("unexpected"), - } - } - - /// Set flag for whether chunk is batch chunk. - pub fn set_batch(&mut self, batch: bool) { - self.ensure_owned(); - match self { - ChunkWrapper::V5(c) => c.flags.set(BlobChunkFlags::BATCH, batch), - ChunkWrapper::V6(c) => c.flags.set(BlobChunkFlags::BATCH, batch), - ChunkWrapper::Ref(_c) => panic!("unexpected"), - } - } - - /// Check whether the chunk is batch chunk or not. - pub fn is_batch(&self) -> bool { - match self { - ChunkWrapper::V5(c) => c.flags.contains(BlobChunkFlags::BATCH), - ChunkWrapper::V6(c) => c.flags.contains(BlobChunkFlags::BATCH), - ChunkWrapper::Ref(c) => as_blob_v5_chunk_info(c.deref()) - .flags() - .contains(BlobChunkFlags::BATCH), - } - } - - #[allow(clippy::too_many_arguments)] - /// Set a group of chunk information fields. - pub fn set_chunk_info( - &mut self, - blob_index: u32, - chunk_index: u32, - file_offset: u64, - uncompressed_offset: u64, - uncompressed_size: u32, - compressed_offset: u64, - compressed_size: u32, - is_compressed: bool, - is_encrypted: bool, - ) -> Result<()> { - self.ensure_owned(); - match self { - ChunkWrapper::V5(c) => { - c.index = chunk_index; - c.blob_index = blob_index; - c.file_offset = file_offset; - c.compressed_offset = compressed_offset; - c.compressed_size = compressed_size; - c.uncompressed_offset = uncompressed_offset; - c.uncompressed_size = uncompressed_size; - if is_compressed { - c.flags |= BlobChunkFlags::COMPRESSED; - } - } - ChunkWrapper::V6(c) => { - c.index = chunk_index; - c.blob_index = blob_index; - c.file_offset = file_offset; - c.compressed_offset = compressed_offset; - c.compressed_size = compressed_size; - c.uncompressed_offset = uncompressed_offset; - c.uncompressed_size = uncompressed_size; - if is_compressed { - c.flags |= BlobChunkFlags::COMPRESSED; - } - if is_encrypted { - c.flags |= BlobChunkFlags::ENCYPTED; - } - } - ChunkWrapper::Ref(_c) => panic!("unexpected"), - } - - Ok(()) - } - - /// Copy chunk information from another `ChunkWrapper` object. - pub fn copy_from(&mut self, other: &Self) { - self.ensure_owned(); - match (self, other) { - (ChunkWrapper::V5(s), ChunkWrapper::V5(o)) => s.clone_from(o), - (ChunkWrapper::V6(s), ChunkWrapper::V6(o)) => s.clone_from(o), - (ChunkWrapper::V5(s), ChunkWrapper::V6(o)) => s.clone_from(o), - (ChunkWrapper::V6(s), ChunkWrapper::V5(o)) => s.clone_from(o), - (ChunkWrapper::V5(s), ChunkWrapper::Ref(o)) => { - s.clone_from(&to_rafs_v5_chunk_info(as_blob_v5_chunk_info(o.deref()))) - } - (ChunkWrapper::V6(s), ChunkWrapper::Ref(o)) => { - s.clone_from(&to_rafs_v5_chunk_info(as_blob_v5_chunk_info(o.deref()))) - } - (ChunkWrapper::Ref(_s), ChunkWrapper::V5(_o)) => panic!("unexpected"), - (ChunkWrapper::Ref(_s), ChunkWrapper::V6(_o)) => panic!("unexpected"), - (ChunkWrapper::Ref(_s), ChunkWrapper::Ref(_o)) => panic!("unexpected"), - } - } - - /// Store the chunk information object into RAFS metadata blob. - pub fn store(&self, w: &mut dyn RafsIoWrite) -> Result { - match self { - ChunkWrapper::V5(c) => c.store(w).context("failed to store rafs v5 chunk"), - ChunkWrapper::V6(c) => c.store(w).context("failed to store rafs v6 chunk"), - ChunkWrapper::Ref(c) => { - let chunk = to_rafs_v5_chunk_info(as_blob_v5_chunk_info(c.deref())); - chunk.store(w).context("failed to store rafs v6 chunk") - } - } - } - - fn ensure_owned(&mut self) { - if let Self::Ref(cki) = self { - if let Some(cki_v6) = cki.as_any().downcast_ref::() { - *self = Self::V6(to_rafs_v5_chunk_info(cki_v6)); - } else if let Some(cki_v6) = cki.as_any().downcast_ref::() { - *self = Self::V6(to_rafs_v5_chunk_info(cki_v6)); - } else if let Some(cki_v6) = cki.as_any().downcast_ref::() { - *self = Self::V6(to_rafs_v5_chunk_info(cki_v6)); - } else if let Some(cki_v5) = cki.as_any().downcast_ref::() { - *self = Self::V5(to_rafs_v5_chunk_info(cki_v5)); - } else if let Some(cki_v5) = cki.as_any().downcast_ref::() { - *self = Self::V5(to_rafs_v5_chunk_info(cki_v5)); - } else { - panic!("unknown chunk information struct"); - } - } - } -} - -fn as_blob_v5_chunk_info(cki: &dyn BlobChunkInfo) -> &dyn BlobV5ChunkInfo { - if let Some(cki_v6) = cki.as_any().downcast_ref::() { - cki_v6 - } else if let Some(cki_v6) = cki.as_any().downcast_ref::() { - cki_v6 - } else if let Some(cki_v6) = cki.as_any().downcast_ref::() { - cki_v6 - } else if let Some(cki_v5) = cki.as_any().downcast_ref::() { - cki_v5 - } else if let Some(cki_v5) = cki.as_any().downcast_ref::() { - cki_v5 - } else { - panic!("unknown chunk information struct"); - } -} - -/// Construct a `RafsV5ChunkInfo` object from a `dyn BlobChunkInfo` object. -fn to_rafs_v5_chunk_info(cki: &dyn BlobV5ChunkInfo) -> RafsV5ChunkInfo { - RafsV5ChunkInfo { - block_id: *cki.chunk_id(), - blob_index: cki.blob_index(), - flags: cki.flags(), - compressed_size: cki.compressed_size(), - uncompressed_size: cki.uncompressed_size(), - compressed_offset: cki.compressed_offset(), - uncompressed_offset: cki.uncompressed_offset(), - file_offset: cki.file_offset(), - index: cki.index(), - reserved: 0u32, - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::mock::MockChunkInfo; - use nydus_utils::digest; - - fn test_chunk_wrapper(mut wrapper: ChunkWrapper) { - let dig = RafsDigest::from_buf([0xc; 32].as_slice(), digest::Algorithm::Blake3); - wrapper.set_id(dig); - assert_eq!(wrapper.id().to_owned(), dig); - wrapper.set_blob_index(1024); - assert_eq!(wrapper.blob_index(), 1024); - wrapper.set_compressed_offset(1024); - assert_eq!(wrapper.compressed_offset(), 1024); - wrapper.set_compressed_size(1024); - assert_eq!(wrapper.compressed_size(), 1024); - wrapper.set_uncompressed_offset(1024); - assert_eq!(wrapper.uncompressed_offset(), 1024); - wrapper.set_uncompressed_size(1024); - assert_eq!(wrapper.uncompressed_size(), 1024); - wrapper.set_index(1024); - assert_eq!(wrapper.index(), 1024); - wrapper.set_file_offset(1024); - assert_eq!(wrapper.file_offset(), 1024); - wrapper.set_compressed(true); - assert!(wrapper.is_compressed()); - wrapper.set_batch(true); - assert!(wrapper.is_batch()); - wrapper - .set_chunk_info(2048, 2048, 2048, 2048, 2048, 2048, 2048, true, true) - .unwrap(); - assert_eq!(wrapper.blob_index(), 2048); - assert_eq!(wrapper.compressed_offset(), 2048); - assert_eq!(wrapper.compressed_size(), 2048); - assert_eq!(wrapper.uncompressed_offset(), 2048); - assert_eq!(wrapper.uncompressed_size(), 2048); - assert_eq!(wrapper.file_offset(), 2048); - assert!(wrapper.is_compressed()); - } - - #[test] - fn test_chunk_wrapper_v5() { - let wrapper = ChunkWrapper::new(RafsVersion::V5); - test_chunk_wrapper(wrapper); - let wrapper = ChunkWrapper::Ref(Arc::new(CachedChunkInfoV5::default())); - test_chunk_wrapper(wrapper); - } - - #[test] - fn test_chunk_wrapper_v6() { - let wrapper = ChunkWrapper::new(RafsVersion::V6); - test_chunk_wrapper(wrapper); - let wrapper = ChunkWrapper::Ref(Arc::new(TarfsChunkInfoV6::new(0, 0, 0, 0))); - test_chunk_wrapper(wrapper); - } - - #[test] - #[should_panic] - fn test_chunk_wrapper_ref() { - let wrapper = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); - assert_eq!(wrapper.id().to_owned(), RafsDigest::default()); - assert_eq!(wrapper.blob_index(), 0); - assert_eq!(wrapper.compressed_offset(), 0); - assert_eq!(wrapper.compressed_size(), 0); - assert_eq!(wrapper.uncompressed_offset(), 0); - assert_eq!(wrapper.uncompressed_size(), 0); - assert_eq!(wrapper.index(), 0); - assert_eq!(wrapper.file_offset(), 0); - assert!(!wrapper.is_compressed()); - assert!(!wrapper.is_batch()); - } - - #[test] - #[should_panic] - fn test_chunk_wrapper_ref_set_id() { - let mut wrapper = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); - let dig = RafsDigest::from_buf([0xc; 32].as_slice(), digest::Algorithm::Blake3); - wrapper.set_id(dig); - } - - #[test] - #[should_panic] - fn test_chunk_wrapper_ref_set_blob_index() { - let mut wrapper = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); - wrapper.set_blob_index(1024); - } - - #[test] - #[should_panic] - fn test_chunk_wrapper_ref_set_compressed_offset() { - let mut wrapper = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); - wrapper.set_compressed_offset(2048); - } - - #[test] - #[should_panic] - fn test_chunk_wrapper_ref_set_uncompressed_size() { - let mut wrapper = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); - wrapper.set_uncompressed_size(1024); - } - - #[test] - #[should_panic] - fn test_chunk_wrapper_ref_set_uncompressed_offset() { - let mut wrapper = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); - wrapper.set_uncompressed_offset(1024); - } - - #[test] - #[should_panic] - fn test_chunk_wrapper_ref_set_compressed_size() { - let mut wrapper = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); - wrapper.set_compressed_size(2048); - } - - #[test] - #[should_panic] - fn test_chunk_wrapper_ref_set_index() { - let mut wrapper = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); - wrapper.set_index(2048); - } - #[test] - #[should_panic] - fn test_chunk_wrapper_ref_set_file_offset() { - let mut wrapper = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); - wrapper.set_file_offset(1024); - } - #[test] - #[should_panic] - fn test_chunk_wrapper_ref_set_compressed() { - let mut wrapper = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); - wrapper.set_compressed(true); - } - #[test] - #[should_panic] - fn test_chunk_wrapper_ref_set_batch() { - let mut wrapper = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); - wrapper.set_batch(true); - } - - #[test] - #[should_panic] - fn test_chunk_wrapper_ref_set_chunk_info() { - let mut wrapper = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); - wrapper - .set_chunk_info(2048, 2048, 2048, 2048, 2048, 2048, 2048, true, true) - .unwrap(); - } - - #[test] - #[should_panic] - fn test_chunk_wrapper_ref_ensure_owned() { - let mut wrapper = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); - wrapper.ensure_owned(); - } - - fn test_copy_from(mut w1: ChunkWrapper, w2: ChunkWrapper) { - w1.copy_from(&w2); - assert_eq!(w1.blob_index(), w2.blob_index()); - assert_eq!(w1.compressed_offset(), w2.compressed_offset()); - assert_eq!(w1.compressed_size(), w2.compressed_size()); - assert_eq!(w1.uncompressed_offset(), w2.uncompressed_offset()); - assert_eq!(w1.uncompressed_size(), w2.uncompressed_size()); - } - - #[test] - fn test_chunk_wrapper_copy_from() { - let wrapper_v6 = ChunkWrapper::Ref(Arc::new(TarfsChunkInfoV6::new(0, 1, 128, 256))); - let wrapper_v5 = ChunkWrapper::Ref(Arc::new(CachedChunkInfoV5::new())); - test_copy_from(wrapper_v5.clone(), wrapper_v5.clone()); - test_copy_from(wrapper_v5.clone(), wrapper_v6.clone()); - test_copy_from(wrapper_v6.clone(), wrapper_v5); - test_copy_from(wrapper_v6.clone(), wrapper_v6); - } - - #[test] - #[should_panic] - fn test_ref_copy1() { - let wrapper_ref = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); - test_copy_from(wrapper_ref.clone(), wrapper_ref); - } - - #[test] - #[should_panic] - fn test_ref_copy2() { - let wrapper_ref = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); - let wrapper_v5 = ChunkWrapper::Ref(Arc::new(CachedChunkInfoV5::default())); - test_copy_from(wrapper_ref, wrapper_v5); - } - - #[test] - #[should_panic] - fn test_ref_copy3() { - let wrapper_ref = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); - let wrapper_v6 = ChunkWrapper::Ref(Arc::new(TarfsChunkInfoV6::new(0, 0, 0, 0))); - test_copy_from(wrapper_ref, wrapper_v6); - } - - #[test] - #[should_panic] - fn test_ref_copy4() { - let wrapper_ref = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); - let wrapper_v6 = ChunkWrapper::Ref(Arc::new(TarfsChunkInfoV6::new(0, 0, 0, 0))); - test_copy_from(wrapper_v6, wrapper_ref); - } - - #[test] - #[should_panic] - fn test_ref_copy5() { - let wrapper_ref = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); - let wrapper_v5 = ChunkWrapper::Ref(Arc::new(CachedChunkInfoV5::default())); - test_copy_from(wrapper_v5, wrapper_ref); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2021-2022 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::fmt::{self, Debug, Display, Formatter}; +use std::ops::Deref; +use std::sync::Arc; + +use anyhow::{Context, Result}; +use nydus_storage::device::v5::BlobV5ChunkInfo; +use nydus_storage::device::{BlobChunkFlags, BlobChunkInfo}; +use nydus_storage::meta::BlobMetaChunk; +use nydus_utils::digest::RafsDigest; + +use crate::metadata::cached_v5::CachedChunkInfoV5; +use crate::metadata::direct_v5::DirectChunkInfoV5; +use crate::metadata::direct_v6::{DirectChunkInfoV6, TarfsChunkInfoV6}; +use crate::metadata::layout::v5::RafsV5ChunkInfo; +use crate::metadata::{RafsStore, RafsVersion}; +use crate::RafsIoWrite; + +/// A wrapper to encapsulate different versions of chunk information objects. +#[derive(Clone)] +pub enum ChunkWrapper { + /// Chunk info for RAFS v5. + V5(RafsV5ChunkInfo), + /// Chunk info RAFS v6, reuse `RafsV5ChunkInfo` as IR for v6. + V6(RafsV5ChunkInfo), + /// Reference to a `BlobChunkInfo` object. + Ref(Arc), +} + +impl Debug for ChunkWrapper { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + Self::V5(c) => write!(f, "{:?}", c), + Self::V6(c) => write!(f, "{:?}", c), + Self::Ref(c) => { + let chunk = to_rafs_v5_chunk_info(as_blob_v5_chunk_info(c.deref())); + write!(f, "{:?}", chunk) + } + } + } +} + +impl Display for ChunkWrapper { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!( + f, + "id {}, index {}, blob_index {}, file_offset {}, compressed {}/{}, uncompressed {}/{}", + self.id(), + self.index(), + self.blob_index(), + self.file_offset(), + self.compressed_offset(), + self.compressed_size(), + self.uncompressed_offset(), + self.uncompressed_size(), + ) + } +} + +impl ChunkWrapper { + /// Create a new `ChunkWrapper` object with default value. + pub fn new(version: RafsVersion) -> Self { + match version { + RafsVersion::V5 => ChunkWrapper::V5(RafsV5ChunkInfo::default()), + RafsVersion::V6 => ChunkWrapper::V6(RafsV5ChunkInfo::default()), + } + } + + /// Create a `ChunkWrapper` object from a `BlobChunkInfo` trait object. + pub fn from_chunk_info(cki: Arc) -> Self { + Self::Ref(cki) + } + + /// Get digest of chunk data, which is also used as chunk ID. + pub fn id(&self) -> &RafsDigest { + match self { + ChunkWrapper::V5(c) => &c.block_id, + ChunkWrapper::V6(c) => &c.block_id, + ChunkWrapper::Ref(c) => c.chunk_id(), + } + } + + /// Set digest of chunk data, which is also used as chunk ID. + pub fn set_id(&mut self, id: RafsDigest) { + self.ensure_owned(); + match self { + ChunkWrapper::V5(c) => c.block_id = id, + ChunkWrapper::V6(c) => c.block_id = id, + ChunkWrapper::Ref(_c) => panic!("unexpected"), + } + } + + /// Get index of the data blob associated with the chunk. + pub fn blob_index(&self) -> u32 { + match self { + ChunkWrapper::V5(c) => c.blob_index, + ChunkWrapper::V6(c) => c.blob_index, + ChunkWrapper::Ref(c) => c.blob_index(), + } + } + + /// Set index of the data blob associated with the chunk. + pub fn set_blob_index(&mut self, index: u32) { + self.ensure_owned(); + match self { + ChunkWrapper::V5(c) => c.blob_index = index, + ChunkWrapper::V6(c) => c.blob_index = index, + ChunkWrapper::Ref(_c) => panic!("unexpected"), + } + } + + /// Get offset into the compressed data blob to fetch chunk data. + pub fn compressed_offset(&self) -> u64 { + match self { + ChunkWrapper::V5(c) => c.compressed_offset, + ChunkWrapper::V6(c) => c.compressed_offset, + ChunkWrapper::Ref(c) => c.compressed_offset(), + } + } + + /// Set offset into the compressed data blob to fetch chunk data. + pub fn set_compressed_offset(&mut self, offset: u64) { + self.ensure_owned(); + match self { + ChunkWrapper::V5(c) => c.compressed_offset = offset, + ChunkWrapper::V6(c) => c.compressed_offset = offset, + ChunkWrapper::Ref(_c) => panic!("unexpected"), + } + } + + /// Get size of compressed chunk data. + pub fn compressed_size(&self) -> u32 { + match self { + ChunkWrapper::V5(c) => c.compressed_size, + ChunkWrapper::V6(c) => c.compressed_size, + ChunkWrapper::Ref(c) => c.compressed_size(), + } + } + + /// Set size of compressed chunk data. + pub fn set_compressed_size(&mut self, size: u32) { + self.ensure_owned(); + match self { + ChunkWrapper::V5(c) => c.compressed_size = size, + ChunkWrapper::V6(c) => c.compressed_size = size, + ChunkWrapper::Ref(_c) => panic!("unexpected"), + } + } + + /// Get offset into the uncompressed data blob file to get chunk data. + pub fn uncompressed_offset(&self) -> u64 { + match self { + ChunkWrapper::V5(c) => c.uncompressed_offset, + ChunkWrapper::V6(c) => c.uncompressed_offset, + ChunkWrapper::Ref(c) => c.uncompressed_offset(), + } + } + + /// Set offset into the uncompressed data blob file to get chunk data. + pub fn set_uncompressed_offset(&mut self, offset: u64) { + self.ensure_owned(); + match self { + ChunkWrapper::V5(c) => c.uncompressed_offset = offset, + ChunkWrapper::V6(c) => c.uncompressed_offset = offset, + ChunkWrapper::Ref(_c) => panic!("unexpected"), + } + } + + /// Get size of uncompressed chunk data. + pub fn uncompressed_size(&self) -> u32 { + match self { + ChunkWrapper::V5(c) => c.uncompressed_size, + ChunkWrapper::V6(c) => c.uncompressed_size, + ChunkWrapper::Ref(c) => c.uncompressed_size(), + } + } + + /// Set size of uncompressed chunk data. + pub fn set_uncompressed_size(&mut self, size: u32) { + self.ensure_owned(); + match self { + ChunkWrapper::V5(c) => c.uncompressed_size = size, + ChunkWrapper::V6(c) => c.uncompressed_size = size, + ChunkWrapper::Ref(_c) => panic!("unexpected"), + } + } + + /// Get chunk index into the RAFS chunk information array, used by RAFS v5. + pub fn index(&self) -> u32 { + match self { + ChunkWrapper::V5(c) => c.index, + ChunkWrapper::V6(c) => c.index, + ChunkWrapper::Ref(c) => as_blob_v5_chunk_info(c.deref()).index(), + } + } + + /// Set chunk index into the RAFS chunk information array, used by RAFS v5. + pub fn set_index(&mut self, index: u32) { + self.ensure_owned(); + match self { + ChunkWrapper::V5(c) => c.index = index, + ChunkWrapper::V6(c) => c.index = index, + ChunkWrapper::Ref(_c) => panic!("unexpected"), + } + } + + /// Get chunk offset in the file it belongs to, RAFS v5. + pub fn file_offset(&self) -> u64 { + match self { + ChunkWrapper::V5(c) => c.file_offset, + ChunkWrapper::V6(c) => c.file_offset, + ChunkWrapper::Ref(c) => as_blob_v5_chunk_info(c.deref()).file_offset(), + } + } + + /// Set chunk offset in the file it belongs to, RAFS v5. + pub fn set_file_offset(&mut self, offset: u64) { + self.ensure_owned(); + match self { + ChunkWrapper::V5(c) => c.file_offset = offset, + ChunkWrapper::V6(c) => c.file_offset = offset, + ChunkWrapper::Ref(_c) => panic!("unexpected"), + } + } + + /// Check whether the chunk is compressed or not. + pub fn is_compressed(&self) -> bool { + match self { + ChunkWrapper::V5(c) => c.flags.contains(BlobChunkFlags::COMPRESSED), + ChunkWrapper::V6(c) => c.flags.contains(BlobChunkFlags::COMPRESSED), + ChunkWrapper::Ref(c) => as_blob_v5_chunk_info(c.deref()) + .flags() + .contains(BlobChunkFlags::COMPRESSED), + } + } + + /// Set flag for whether chunk is compressed. + pub fn set_compressed(&mut self, compressed: bool) { + self.ensure_owned(); + match self { + ChunkWrapper::V5(c) => c.flags.set(BlobChunkFlags::COMPRESSED, compressed), + ChunkWrapper::V6(c) => c.flags.set(BlobChunkFlags::COMPRESSED, compressed), + ChunkWrapper::Ref(_c) => panic!("unexpected"), + } + } + + /// Check whether the chunk is encrypted or not. + pub fn is_encrypted(&self) -> bool { + match self { + ChunkWrapper::V5(c) => c.flags.contains(BlobChunkFlags::ENCYPTED), + ChunkWrapper::V6(c) => c.flags.contains(BlobChunkFlags::ENCYPTED), + ChunkWrapper::Ref(c) => as_blob_v5_chunk_info(c.deref()) + .flags() + .contains(BlobChunkFlags::ENCYPTED), + } + } + + /// Set flag for whether chunk is encrypted. + pub fn set_encrypted(&mut self, encrypted: bool) { + self.ensure_owned(); + match self { + ChunkWrapper::V5(c) => c.flags.set(BlobChunkFlags::ENCYPTED, encrypted), + ChunkWrapper::V6(c) => c.flags.set(BlobChunkFlags::ENCYPTED, encrypted), + ChunkWrapper::Ref(_c) => panic!("unexpected"), + } + } + + /// Set flag for whether chunk is batch chunk. + pub fn set_batch(&mut self, batch: bool) { + self.ensure_owned(); + match self { + ChunkWrapper::V5(c) => c.flags.set(BlobChunkFlags::BATCH, batch), + ChunkWrapper::V6(c) => c.flags.set(BlobChunkFlags::BATCH, batch), + ChunkWrapper::Ref(_c) => panic!("unexpected"), + } + } + + /// Check whether the chunk is batch chunk or not. + pub fn is_batch(&self) -> bool { + match self { + ChunkWrapper::V5(c) => c.flags.contains(BlobChunkFlags::BATCH), + ChunkWrapper::V6(c) => c.flags.contains(BlobChunkFlags::BATCH), + ChunkWrapper::Ref(c) => as_blob_v5_chunk_info(c.deref()) + .flags() + .contains(BlobChunkFlags::BATCH), + } + } + + #[allow(clippy::too_many_arguments)] + /// Set a group of chunk information fields. + pub fn set_chunk_info( + &mut self, + blob_index: u32, + chunk_index: u32, + file_offset: u64, + uncompressed_offset: u64, + uncompressed_size: u32, + compressed_offset: u64, + compressed_size: u32, + is_compressed: bool, + is_encrypted: bool, + ) -> Result<()> { + self.ensure_owned(); + match self { + ChunkWrapper::V5(c) => { + c.index = chunk_index; + c.blob_index = blob_index; + c.file_offset = file_offset; + c.compressed_offset = compressed_offset; + c.compressed_size = compressed_size; + c.uncompressed_offset = uncompressed_offset; + c.uncompressed_size = uncompressed_size; + if is_compressed { + c.flags |= BlobChunkFlags::COMPRESSED; + } + } + ChunkWrapper::V6(c) => { + c.index = chunk_index; + c.blob_index = blob_index; + c.file_offset = file_offset; + c.compressed_offset = compressed_offset; + c.compressed_size = compressed_size; + c.uncompressed_offset = uncompressed_offset; + c.uncompressed_size = uncompressed_size; + if is_compressed { + c.flags |= BlobChunkFlags::COMPRESSED; + } + if is_encrypted { + c.flags |= BlobChunkFlags::ENCYPTED; + } + } + ChunkWrapper::Ref(_c) => panic!("unexpected"), + } + + Ok(()) + } + + /// Copy chunk information from another `ChunkWrapper` object. + pub fn copy_from(&mut self, other: &Self) { + self.ensure_owned(); + match (self, other) { + (ChunkWrapper::V5(s), ChunkWrapper::V5(o)) => s.clone_from(o), + (ChunkWrapper::V6(s), ChunkWrapper::V6(o)) => s.clone_from(o), + (ChunkWrapper::V5(s), ChunkWrapper::V6(o)) => s.clone_from(o), + (ChunkWrapper::V6(s), ChunkWrapper::V5(o)) => s.clone_from(o), + (ChunkWrapper::V5(s), ChunkWrapper::Ref(o)) => { + s.clone_from(&to_rafs_v5_chunk_info(as_blob_v5_chunk_info(o.deref()))) + } + (ChunkWrapper::V6(s), ChunkWrapper::Ref(o)) => { + s.clone_from(&to_rafs_v5_chunk_info(as_blob_v5_chunk_info(o.deref()))) + } + (ChunkWrapper::Ref(_s), ChunkWrapper::V5(_o)) => panic!("unexpected"), + (ChunkWrapper::Ref(_s), ChunkWrapper::V6(_o)) => panic!("unexpected"), + (ChunkWrapper::Ref(_s), ChunkWrapper::Ref(_o)) => panic!("unexpected"), + } + } + + /// Store the chunk information object into RAFS metadata blob. + pub fn store(&self, w: &mut dyn RafsIoWrite) -> Result { + match self { + ChunkWrapper::V5(c) => c.store(w).context("failed to store rafs v5 chunk"), + ChunkWrapper::V6(c) => c.store(w).context("failed to store rafs v6 chunk"), + ChunkWrapper::Ref(c) => { + let chunk = to_rafs_v5_chunk_info(as_blob_v5_chunk_info(c.deref())); + chunk.store(w).context("failed to store rafs v6 chunk") + } + } + } + + fn ensure_owned(&mut self) { + if let Self::Ref(cki) = self { + if let Some(cki_v6) = cki.as_any().downcast_ref::() { + *self = Self::V6(to_rafs_v5_chunk_info(cki_v6)); + } else if let Some(cki_v6) = cki.as_any().downcast_ref::() { + *self = Self::V6(to_rafs_v5_chunk_info(cki_v6)); + } else if let Some(cki_v6) = cki.as_any().downcast_ref::() { + *self = Self::V6(to_rafs_v5_chunk_info(cki_v6)); + } else if let Some(cki_v5) = cki.as_any().downcast_ref::() { + *self = Self::V5(to_rafs_v5_chunk_info(cki_v5)); + } else if let Some(cki_v5) = cki.as_any().downcast_ref::() { + *self = Self::V5(to_rafs_v5_chunk_info(cki_v5)); + } else { + panic!("unknown chunk information struct"); + } + } + } +} + +fn as_blob_v5_chunk_info(cki: &dyn BlobChunkInfo) -> &dyn BlobV5ChunkInfo { + if let Some(cki_v6) = cki.as_any().downcast_ref::() { + cki_v6 + } else if let Some(cki_v6) = cki.as_any().downcast_ref::() { + cki_v6 + } else if let Some(cki_v6) = cki.as_any().downcast_ref::() { + cki_v6 + } else if let Some(cki_v5) = cki.as_any().downcast_ref::() { + cki_v5 + } else if let Some(cki_v5) = cki.as_any().downcast_ref::() { + cki_v5 + } else { + panic!("unknown chunk information struct"); + } +} + +/// Construct a `RafsV5ChunkInfo` object from a `dyn BlobChunkInfo` object. +fn to_rafs_v5_chunk_info(cki: &dyn BlobV5ChunkInfo) -> RafsV5ChunkInfo { + RafsV5ChunkInfo { + block_id: *cki.chunk_id(), + blob_index: cki.blob_index(), + flags: cki.flags(), + compressed_size: cki.compressed_size(), + uncompressed_size: cki.uncompressed_size(), + compressed_offset: cki.compressed_offset(), + uncompressed_offset: cki.uncompressed_offset(), + file_offset: cki.file_offset(), + index: cki.index(), + reserved: 0u32, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::mock::MockChunkInfo; + use nydus_utils::digest; + + fn test_chunk_wrapper(mut wrapper: ChunkWrapper) { + let dig = RafsDigest::from_buf([0xc; 32].as_slice(), digest::Algorithm::Blake3); + wrapper.set_id(dig); + assert_eq!(wrapper.id().to_owned(), dig); + wrapper.set_blob_index(1024); + assert_eq!(wrapper.blob_index(), 1024); + wrapper.set_compressed_offset(1024); + assert_eq!(wrapper.compressed_offset(), 1024); + wrapper.set_compressed_size(1024); + assert_eq!(wrapper.compressed_size(), 1024); + wrapper.set_uncompressed_offset(1024); + assert_eq!(wrapper.uncompressed_offset(), 1024); + wrapper.set_uncompressed_size(1024); + assert_eq!(wrapper.uncompressed_size(), 1024); + wrapper.set_index(1024); + assert_eq!(wrapper.index(), 1024); + wrapper.set_file_offset(1024); + assert_eq!(wrapper.file_offset(), 1024); + wrapper.set_compressed(true); + assert!(wrapper.is_compressed()); + wrapper.set_batch(true); + assert!(wrapper.is_batch()); + wrapper + .set_chunk_info(2048, 2048, 2048, 2048, 2048, 2048, 2048, true, true) + .unwrap(); + assert_eq!(wrapper.blob_index(), 2048); + assert_eq!(wrapper.compressed_offset(), 2048); + assert_eq!(wrapper.compressed_size(), 2048); + assert_eq!(wrapper.uncompressed_offset(), 2048); + assert_eq!(wrapper.uncompressed_size(), 2048); + assert_eq!(wrapper.file_offset(), 2048); + assert!(wrapper.is_compressed()); + } + + #[test] + fn test_chunk_wrapper_v5() { + let wrapper = ChunkWrapper::new(RafsVersion::V5); + test_chunk_wrapper(wrapper); + let wrapper = ChunkWrapper::Ref(Arc::new(CachedChunkInfoV5::default())); + test_chunk_wrapper(wrapper); + } + + #[test] + fn test_chunk_wrapper_v6() { + let wrapper = ChunkWrapper::new(RafsVersion::V6); + test_chunk_wrapper(wrapper); + let wrapper = ChunkWrapper::Ref(Arc::new(TarfsChunkInfoV6::new(0, 0, 0, 0))); + test_chunk_wrapper(wrapper); + } + + #[test] + #[should_panic] + fn test_chunk_wrapper_ref() { + let wrapper = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); + assert_eq!(wrapper.id().to_owned(), RafsDigest::default()); + assert_eq!(wrapper.blob_index(), 0); + assert_eq!(wrapper.compressed_offset(), 0); + assert_eq!(wrapper.compressed_size(), 0); + assert_eq!(wrapper.uncompressed_offset(), 0); + assert_eq!(wrapper.uncompressed_size(), 0); + assert_eq!(wrapper.index(), 0); + assert_eq!(wrapper.file_offset(), 0); + assert!(!wrapper.is_compressed()); + assert!(!wrapper.is_batch()); + } + + #[test] + #[should_panic] + fn test_chunk_wrapper_ref_set_id() { + let mut wrapper = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); + let dig = RafsDigest::from_buf([0xc; 32].as_slice(), digest::Algorithm::Blake3); + wrapper.set_id(dig); + } + + #[test] + #[should_panic] + fn test_chunk_wrapper_ref_set_blob_index() { + let mut wrapper = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); + wrapper.set_blob_index(1024); + } + + #[test] + #[should_panic] + fn test_chunk_wrapper_ref_set_compressed_offset() { + let mut wrapper = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); + wrapper.set_compressed_offset(2048); + } + + #[test] + #[should_panic] + fn test_chunk_wrapper_ref_set_uncompressed_size() { + let mut wrapper = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); + wrapper.set_uncompressed_size(1024); + } + + #[test] + #[should_panic] + fn test_chunk_wrapper_ref_set_uncompressed_offset() { + let mut wrapper = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); + wrapper.set_uncompressed_offset(1024); + } + + #[test] + #[should_panic] + fn test_chunk_wrapper_ref_set_compressed_size() { + let mut wrapper = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); + wrapper.set_compressed_size(2048); + } + + #[test] + #[should_panic] + fn test_chunk_wrapper_ref_set_index() { + let mut wrapper = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); + wrapper.set_index(2048); + } + #[test] + #[should_panic] + fn test_chunk_wrapper_ref_set_file_offset() { + let mut wrapper = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); + wrapper.set_file_offset(1024); + } + #[test] + #[should_panic] + fn test_chunk_wrapper_ref_set_compressed() { + let mut wrapper = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); + wrapper.set_compressed(true); + } + #[test] + #[should_panic] + fn test_chunk_wrapper_ref_set_batch() { + let mut wrapper = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); + wrapper.set_batch(true); + } + + #[test] + #[should_panic] + fn test_chunk_wrapper_ref_set_chunk_info() { + let mut wrapper = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); + wrapper + .set_chunk_info(2048, 2048, 2048, 2048, 2048, 2048, 2048, true, true) + .unwrap(); + } + + #[test] + #[should_panic] + fn test_chunk_wrapper_ref_ensure_owned() { + let mut wrapper = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); + wrapper.ensure_owned(); + } + + fn test_copy_from(mut w1: ChunkWrapper, w2: ChunkWrapper) { + w1.copy_from(&w2); + assert_eq!(w1.blob_index(), w2.blob_index()); + assert_eq!(w1.compressed_offset(), w2.compressed_offset()); + assert_eq!(w1.compressed_size(), w2.compressed_size()); + assert_eq!(w1.uncompressed_offset(), w2.uncompressed_offset()); + assert_eq!(w1.uncompressed_size(), w2.uncompressed_size()); + } + + #[test] + fn test_chunk_wrapper_copy_from() { + let wrapper_v6 = ChunkWrapper::Ref(Arc::new(TarfsChunkInfoV6::new(0, 1, 128, 256))); + let wrapper_v5 = ChunkWrapper::Ref(Arc::new(CachedChunkInfoV5::new())); + test_copy_from(wrapper_v5.clone(), wrapper_v5.clone()); + test_copy_from(wrapper_v5.clone(), wrapper_v6.clone()); + test_copy_from(wrapper_v6.clone(), wrapper_v5); + test_copy_from(wrapper_v6.clone(), wrapper_v6); + } + + #[test] + #[should_panic] + fn test_ref_copy1() { + let wrapper_ref = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); + test_copy_from(wrapper_ref.clone(), wrapper_ref); + } + + #[test] + #[should_panic] + fn test_ref_copy2() { + let wrapper_ref = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); + let wrapper_v5 = ChunkWrapper::Ref(Arc::new(CachedChunkInfoV5::default())); + test_copy_from(wrapper_ref, wrapper_v5); + } + + #[test] + #[should_panic] + fn test_ref_copy3() { + let wrapper_ref = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); + let wrapper_v6 = ChunkWrapper::Ref(Arc::new(TarfsChunkInfoV6::new(0, 0, 0, 0))); + test_copy_from(wrapper_ref, wrapper_v6); + } + + #[test] + #[should_panic] + fn test_ref_copy4() { + let wrapper_ref = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); + let wrapper_v6 = ChunkWrapper::Ref(Arc::new(TarfsChunkInfoV6::new(0, 0, 0, 0))); + test_copy_from(wrapper_v6, wrapper_ref); + } + + #[test] + #[should_panic] + fn test_ref_copy5() { + let wrapper_ref = ChunkWrapper::Ref(Arc::new(MockChunkInfo::default())); + let wrapper_v5 = ChunkWrapper::Ref(Arc::new(CachedChunkInfoV5::default())); + test_copy_from(wrapper_v5, wrapper_ref); + } +} diff --git a/rafs/src/metadata/direct_v5.rs b/rafs/src/metadata/direct_v5.rs index 682cbfc10a8..cd889a4e0fc 100644 --- a/rafs/src/metadata/direct_v5.rs +++ b/rafs/src/metadata/direct_v5.rs @@ -1,876 +1,876 @@ -// Copyright (C) 2020 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -/// A bootstrap driver to directly use on disk bootstrap as runtime in-memory bootstrap. -/// -/// To reduce memory footprint and speed up filesystem initialization, the V5 on disk bootstrap -/// layout has been designed to support directly mapping as runtime bootstrap. So we don't need to -/// define another set of runtime data structures to cache on-disk bootstrap in memory. -/// -/// To support modification to the runtime bootstrap, several technologies have been adopted: -/// * - arc-swap is used to support RCU-like update instead of Mutex/RwLock. -/// * - `offset` instead of `pointer` is used to record data structure position. -/// * - reference count to the referenced resources/objects. -/// -/// # Security -/// The bootstrap file may be provided by untrusted parties, so we must ensure strong validations -/// before making use of any bootstrap, especially we are using them in memory-mapped mode. The -/// rule is to call validate() after creating any data structure from the on-disk bootstrap. -use std::any::Any; -use std::ffi::{OsStr, OsString}; -use std::io::Result; -use std::io::SeekFrom; -use std::mem::{size_of, ManuallyDrop}; -use std::ops::Deref; -use std::os::unix::io::AsRawFd; -use std::sync::Arc; - -use arc_swap::{ArcSwap, Guard}; -use nydus_storage::device::v5::BlobV5ChunkInfo; -use nydus_storage::device::{BlobChunkFlags, BlobChunkInfo, BlobDevice, BlobInfo, BlobIoVec}; -use nydus_storage::utils::readahead; -use nydus_utils::digest::RafsDigest; -use nydus_utils::filemap::{clone_file, FileMapState}; - -use crate::metadata::layout::v5::{ - rafsv5_align, rafsv5_alloc_bio_vecs, rafsv5_validate_inode, RafsV5BlobTable, RafsV5ChunkInfo, - RafsV5Inode, RafsV5InodeChunkOps, RafsV5InodeOps, RafsV5InodeTable, RafsV5XAttrsTable, - RAFSV5_ALIGNMENT, RAFSV5_EXT_BLOB_ENTRY_SIZE, RAFSV5_SUPERBLOCK_SIZE, -}; -use crate::metadata::layout::{ - bytes_to_os_str, parse_xattr_names, parse_xattr_value, MetaRange, XattrName, XattrValue, - RAFS_V5_ROOT_INODE, -}; -use crate::metadata::{ - Attr, Entry, Inode, RafsInode, RafsInodeWalkAction, RafsInodeWalkHandler, RafsSuperBlock, - RafsSuperInodes, RafsSuperMeta, DOT, DOTDOT, RAFS_ATTR_BLOCK_SIZE, RAFS_MAX_METADATA_SIZE, - RAFS_MAX_NAME, -}; -use crate::{RafsError, RafsInodeExt, RafsIoReader, RafsResult}; - -/// Impl get accessor for inode object. -macro_rules! impl_inode_getter { - ($G: ident, $F: ident, $U: ty) => { - #[inline] - fn $G(&self) -> $U { - let state = self.state(); - let inode = self.inode(state.deref()); - - inode.$F - } - }; -} - -/// Impl get accessor for inode object. -macro_rules! impl_inode_wrapper { - ($G: ident, $U: ty) => { - #[inline] - fn $G(&self) -> $U { - let state = self.state(); - let inode = self.inode(state.deref()); - - inode.$G() - } - }; -} - -/// Impl get accessor for chunkinfo object. -macro_rules! impl_chunkinfo_getter { - ($G: ident, $U: ty) => { - #[inline] - fn $G(&self) -> $U { - let state = self.state(); - - self.chunk(state.deref()).$G - } - }; -} - -/// The underlying struct to maintain memory mapped bootstrap for a file system. -/// -/// Only the DirectMappingState may store raw pointers. -/// Other data structures should not store raw pointers, instead they should hold a reference to -/// the DirectMappingState object and store an offset, so a `pointer` could be reconstruct by -/// `DirectMappingState.base + offset`. -struct DirectMappingState { - meta: RafsSuperMeta, - inode_table: ManuallyDrop, - blob_table: RafsV5BlobTable, - file_map: FileMapState, - mmapped_inode_table: bool, - validate_inode: bool, -} - -impl DirectMappingState { - fn new(meta: &RafsSuperMeta, validate_inode: bool) -> Self { - DirectMappingState { - meta: *meta, - inode_table: ManuallyDrop::new(RafsV5InodeTable::default()), - blob_table: RafsV5BlobTable::default(), - file_map: FileMapState::default(), - mmapped_inode_table: false, - validate_inode, - } - } -} - -impl Drop for DirectMappingState { - fn drop(&mut self) { - if !self.mmapped_inode_table { - // Safe because it's a allocated vector. - unsafe { ManuallyDrop::drop(&mut self.inode_table) }; - } - } -} - -/// Direct-mapped Rafs v5 super block. -#[derive(Clone)] -pub struct DirectSuperBlockV5 { - state: Arc>, -} - -impl DirectSuperBlockV5 { - /// Create a new instance of `DirectSuperBlockV5`. - pub fn new(meta: &RafsSuperMeta, validate_inode: bool) -> Self { - let state = DirectMappingState::new(meta, validate_inode); - - Self { - state: Arc::new(ArcSwap::new(Arc::new(state))), - } - } - - #[inline] - fn get_inode_wrapper( - &self, - ino: Inode, - state: &DirectMappingState, - validate_inode: bool, - ) -> Result { - let offset = state.inode_table.get(ino)? as usize; - let _inode = state.file_map.get_ref::(offset)?; - let wrapper = OndiskInodeWrapper { - mapping: self.clone(), - offset, - }; - - if let Err(e) = wrapper.validate(state.meta.inodes_count, state.meta.chunk_size as u64) { - if e.raw_os_error().unwrap_or(0) != libc::EOPNOTSUPP { - return Err(e); - } - // ignore unsupported err - } - - if validate_inode { - let digester = state.meta.get_digester(); - if !rafsv5_validate_inode(&wrapper, false, digester)? { - return Err(einval!("invalid inode digest")); - } - } - - Ok(wrapper) - } - - fn update_state(&self, r: &mut RafsIoReader) -> Result<()> { - let old_state = self.state(); - - // Validate file size - let file = clone_file(r.as_raw_fd())?; - let md = file.metadata()?; - let len = md.len(); - let size = len as usize; - if len < RAFSV5_SUPERBLOCK_SIZE as u64 - || len > RAFS_MAX_METADATA_SIZE as u64 - || len & (RAFSV5_ALIGNMENT as u64 - 1) != 0 - { - return Err(ebadf!("invalid bootstrap file")); - } - let md_range = MetaRange::new( - RAFSV5_SUPERBLOCK_SIZE as u64, - len - RAFSV5_SUPERBLOCK_SIZE as u64, - true, - )?; - - // Validate inode table layout - let inode_table_start = old_state.meta.inode_table_offset; - let inode_table_size = old_state.meta.inode_table_entries as u64 * size_of::() as u64; - let inode_table_range = MetaRange::new(inode_table_start, inode_table_size, false)?; - if !inode_table_range.is_subrange_of(&md_range) { - return Err(ebadf!("invalid inode table")); - } - - // Validate blob table layout - let blob_table_start = old_state.meta.blob_table_offset; - let blob_table_size = old_state.meta.blob_table_size as u64; - let blob_table_range = MetaRange::new(blob_table_start, blob_table_size, false)?; - if !blob_table_range.is_subrange_of(&md_range) - || blob_table_range.intersect_with(&inode_table_range) - { - return Err(ebadf!("invalid blob table")); - } - - // Validate extended blob table layout - let extended_blob_table_offset = old_state.meta.extended_blob_table_offset; - let extended_blob_table_size = - old_state.meta.extended_blob_table_entries as u64 * RAFSV5_EXT_BLOB_ENTRY_SIZE as u64; - let extended_blob_table_range = - MetaRange::new(extended_blob_table_offset, extended_blob_table_size, true)?; - if extended_blob_table_offset > 0 - && extended_blob_table_size > 0 - && (!extended_blob_table_range.is_subrange_of(&md_range) - || extended_blob_table_range.intersect_with(&inode_table_range) - || extended_blob_table_range.intersect_with(&blob_table_range)) - { - return Err(ebadf!("invalid extended blob table")); - } - - // Prefetch the bootstrap file - readahead(file.as_raw_fd(), 0, len); - - // Mmap the bootstrap file into current process for direct access - let file_map = FileMapState::new(file, 0, size, false)?; - - // Load blob table. Safe because we have validated the blob table layout. - let mut blob_table = RafsV5BlobTable::new(); - let meta = &old_state.meta; - - // Load extended blob table if the bootstrap including extended blob table. - if extended_blob_table_offset > 0 && extended_blob_table_size > 0 { - r.seek(SeekFrom::Start(extended_blob_table_offset))?; - blob_table - .extended - .load(r, meta.extended_blob_table_entries as usize)?; - } - r.seek(SeekFrom::Start(meta.blob_table_offset))?; - blob_table.load(r, meta.blob_table_size, meta.chunk_size, meta.flags)?; - - // Load(Map) inode table. Safe because we have validated the inode table layout. - // Though we have passed *mut u32 to Vec::from_raw_parts(), it will trigger invalid memory - // access if the underlying memory is written to. - let inode_table = unsafe { - RafsV5InodeTable { - data: Vec::from_raw_parts( - file_map.offset(inode_table_start as usize) as *const u32 as *mut u32, - old_state.meta.inode_table_entries as usize, - old_state.meta.inode_table_entries as usize, - ), - } - }; - - let validate_inode = old_state.validate_inode; - - let state = DirectMappingState { - meta: old_state.meta, - inode_table: ManuallyDrop::new(inode_table), - blob_table, - file_map, - mmapped_inode_table: true, - validate_inode, - }; - - // Swap new and old DirectMappingState object, the old object will be destroyed when the - // reference count reaches zero. - self.state.store(Arc::new(state)); - - Ok(()) - } - - #[inline] - fn state(&self) -> Guard> { - self.state.load() - } -} - -impl RafsSuperInodes for DirectSuperBlockV5 { - fn get_max_ino(&self) -> Inode { - self.state().inode_table.len() as u64 - } - - /// Find inode offset by ino from inode table and mmap to OndiskInode. - fn get_inode(&self, ino: Inode, validate_inode: bool) -> Result> { - let state = self.state(); - let wrapper = self.get_inode_wrapper(ino, state.deref(), validate_inode)?; - Ok(Arc::new(wrapper)) - } - - fn get_extended_inode( - &self, - ino: Inode, - validate_inode: bool, - ) -> Result> { - let state = self.state(); - let wrapper = self.get_inode_wrapper(ino, state.deref(), validate_inode)?; - Ok(Arc::new(wrapper)) - } -} - -impl RafsSuperBlock for DirectSuperBlockV5 { - fn load(&mut self, r: &mut RafsIoReader) -> Result<()> { - self.update_state(r) - } - - fn update(&self, r: &mut RafsIoReader) -> RafsResult<()> { - self.update_state(r).map_err(RafsError::SwapBackend) - } - - fn destroy(&mut self) { - let state = DirectMappingState::new(&RafsSuperMeta::default(), false); - - self.state.store(Arc::new(state)); - } - - fn get_blob_infos(&self) -> Vec> { - self.state().blob_table.entries.clone() - } - - fn root_ino(&self) -> u64 { - RAFS_V5_ROOT_INODE - } - - fn get_chunk_info(&self, _idx: usize) -> Result> { - unimplemented!("used by RAFS v6 only") - } - - fn set_blob_device(&self, _blob_device: BlobDevice) { - unimplemented!("used by RAFS v6 only") - } -} - -/// Direct-mapped RAFS v5 inode object. -pub struct OndiskInodeWrapper { - pub mapping: DirectSuperBlockV5, - pub offset: usize, -} - -impl OndiskInodeWrapper { - #[inline] - fn state(&self) -> Guard> { - self.mapping.state() - } - - /// Convert `OndiskInodeWrapper` to an `RafsV5Inode` object. - /// - /// # Safety - /// It depends on Self::validate() to ensure valid memory layout. - #[inline] - fn inode<'a>(&self, state: &'a DirectMappingState) -> &'a RafsV5Inode { - state.file_map.get_ref::(self.offset).unwrap() - } - - /// Get an reference to the file name string. - /// - /// # Safety - /// It depends on Self::validate() to ensure valid memory layout. - fn name_ref<'a>(&self, state: &'a DirectMappingState) -> &'a OsStr { - let offset = self.offset + size_of::(); - let size = self.inode(state).i_name_size as usize; - let name = state.file_map.get_slice(offset, size).unwrap(); - bytes_to_os_str(name) - } - - fn get_xattr_data<'a>( - &self, - state: &'a Guard>, - ) -> Result<(&'a [u8], usize)> { - let inode = self.inode(state.deref()); - - if !inode.has_xattr() { - return Ok((&[], 0)); - } - - let offset = self.offset + inode.size(); - let xattrs = state.file_map.get_ref::(offset)?; - let xattr_size = xattrs.size(); - let xattr_data = state - .file_map - .get_slice(offset + size_of::(), xattr_size)?; - - Ok((xattr_data, xattr_size)) - } - - fn _get_chunk_info(&self, idx: u32) -> Result> { - let state = self.state(); - let inode = self.inode(state.deref()); - - if !inode.is_reg() || inode.i_child_count == 0 || idx >= inode.i_child_count { - return Err(enoent!("invalid chunk info")); - } - - let mut offset = self.offset + inode.size(); - if inode.has_xattr() { - let xattrs = state.file_map.get_ref::(offset)?; - offset += size_of::() + xattrs.aligned_size(); - } - offset += size_of::() * idx as usize; - - let chunk = state.file_map.get_ref::(offset)?; - let wrapper = DirectChunkInfoV5::new(&state, chunk, self.mapping.clone(), offset)?; - - Ok(Arc::new(wrapper)) - } -} - -impl RafsInode for OndiskInodeWrapper { - // Somehow we got invalid `inode_count` from superblock. - fn validate(&self, _inode_count: u64, chunk_size: u64) -> Result<()> { - let state = self.state(); - let inode = state.file_map.get_ref::(self.offset)?; - let max_inode = state.inode_table.len() as u64; - let xattr_size = if inode.has_xattr() { - let offset = self.offset + inode.size(); - let xattrs = state.file_map.get_ref::(offset)?; - size_of::() + xattrs.aligned_size() - } else { - 0 - }; - - // * - parent inode number must be less than child inode number unless child is a hardlink. - // * - inode link count must not be zero. - // * - name_size must be less than 255. Due to alignment, the check is not so strict. - // * - name_size and symlink_size must be correctly aligned. - // Should we store raw size instead of aligned size for name and symlink? - if inode.i_ino == 0 - || inode.i_ino > max_inode - // || inode.i_ino > _inode_count - || (inode.i_ino != RAFS_V5_ROOT_INODE && inode.i_parent == 0) - || inode.i_nlink == 0 - || inode.i_name_size as usize > (RAFS_MAX_NAME + 1) - || inode.i_name_size == 0 - { - return Err(ebadf!(format!( - "inode validation failure, inode {:?}", - inode - ))); - } - if !inode.is_hardlink() && inode.i_parent >= inode.i_ino { - return Err(einval!("invalid parent inode")); - } - - let chunk_count = 0; - if inode.is_reg() { - if self.state().meta.is_chunk_dict() { - // chunk-dict doesn't support chunk_count check - return Err(std::io::Error::from_raw_os_error(libc::EOPNOTSUPP)); - } - let chunks = (inode.i_size + chunk_size - 1) / chunk_size; - if !inode.has_hole() && chunks != inode.i_child_count as u64 { - return Err(einval!(format!( - "invalid chunk count, ino {}, expected {}, actual {}", - inode.i_ino, chunks, inode.i_child_count, - ))); - } - let size = inode.size() - + xattr_size - + inode.i_child_count as usize * size_of::(); - state.file_map.validate_range(self.offset, size)?; - } else if inode.is_dir() { - // Only valid i_child_index, i_child_count when we have children. - if inode.i_child_count > 0 - && ((inode.i_child_index as Inode) <= inode.i_ino - || inode.i_child_count as u64 >= max_inode - || inode.i_child_count as u64 + inode.i_child_index as u64 - 1 > max_inode) - { - return Err(einval!("invalid directory")); - } - let size = inode.size() + xattr_size; - state.file_map.validate_range(self.offset, size)?; - } else if inode.is_symlink() && inode.i_symlink_size == 0 { - return Err(einval!("invalid symlink target")); - } - if !inode.is_hardlink() && inode.i_parent >= inode.i_ino { - return Err(einval!("invalid parent inode")); - } - - let size = inode.size() + xattr_size + chunk_count * size_of::(); - state.file_map.validate_range(self.offset, size)?; - - Ok(()) - } - - fn alloc_bio_vecs( - &self, - _device: &BlobDevice, - offset: u64, - size: usize, - user_io: bool, - ) -> Result> { - rafsv5_alloc_bio_vecs(self, offset, size, user_io) - } - - fn collect_descendants_inodes( - &self, - descendants: &mut Vec>, - ) -> Result { - if !self.is_dir() { - return Err(enotdir!()); - } - - let state = self.state(); - let inode = self.inode(state.deref()); - let child_count = inode.i_child_count as u64; - let child_index = inode.i_child_index as u64; - let mut child_dirs: Vec> = Vec::new(); - - for idx in child_index..(child_index + child_count) { - let child_inode = self.mapping.get_inode(idx, false)?; - if child_inode.is_dir() { - child_dirs.push(child_inode); - } else if !child_inode.is_empty_size() { - descendants.push(child_inode); - } - } - - for d in child_dirs { - d.collect_descendants_inodes(descendants)?; - } - - Ok(0) - } - - fn get_entry(&self) -> Entry { - let state = self.state(); - let inode = self.inode(state.deref()); - - Entry { - attr: self.get_attr().into(), - inode: inode.i_ino, - generation: 0, - attr_flags: 0, - attr_timeout: state.meta.attr_timeout, - entry_timeout: state.meta.entry_timeout, - } - } - - fn get_attr(&self) -> Attr { - let state = self.state(); - let inode = self.inode(state.deref()); - - Attr { - ino: inode.i_ino, - size: inode.i_size, - blocks: inode.i_blocks, - mode: inode.i_mode, - nlink: inode.i_nlink as u32, - uid: inode.i_uid, - gid: inode.i_gid, - mtime: inode.i_mtime, - mtimensec: inode.i_mtime_nsec, - blksize: RAFS_ATTR_BLOCK_SIZE, - rdev: inode.i_rdev, - ..Default::default() - } - } - - fn get_xattr(&self, name: &OsStr) -> Result> { - let state = self.state(); - let (xattr_data, xattr_size) = self.get_xattr_data(&state)?; - parse_xattr_value(xattr_data, xattr_size, name) - } - - fn get_xattrs(&self) -> Result> { - let state = self.state(); - let (xattr_data, xattr_size) = self.get_xattr_data(&state)?; - parse_xattr_names(xattr_data, xattr_size) - } - - /// Get symlink target of the inode. - /// - /// # Safety - /// It depends on Self::validate() to ensure valid memory layout. - fn get_symlink(&self) -> Result { - let state = self.state(); - let inode = self.inode(state.deref()); - let offset = - self.offset + size_of::() + rafsv5_align(inode.i_name_size as usize); - let size = inode.i_symlink_size as usize; - let symlink = state.file_map.get_slice(offset, size).unwrap(); - Ok(bytes_to_os_str(symlink).to_os_string()) - } - - fn walk_children_inodes(&self, entry_offset: u64, handler: RafsInodeWalkHandler) -> Result<()> { - // offset 0 and 1 is for "." and ".." respectively. - let mut cur_offset = entry_offset; - - if cur_offset == 0 { - cur_offset += 1; - // Safe to unwrap since conversion from DOT to os string can't fail. - match handler(None, OsString::from(DOT), self.ino(), cur_offset) { - Ok(RafsInodeWalkAction::Continue) => {} - Ok(RafsInodeWalkAction::Break) => return Ok(()), - Err(e) => return Err(e), - } - } - - if cur_offset == 1 { - let parent = if self.ino() == 1 { 1 } else { self.parent() }; - cur_offset += 1; - // Safe to unwrap since conversion from DOTDOT to os string can't fail. - match handler(None, OsString::from(DOTDOT), parent, cur_offset) { - Ok(RafsInodeWalkAction::Continue) => {} - Ok(RafsInodeWalkAction::Break) => return Ok(()), - Err(e) => return Err(e), - }; - } - - let mut idx = cur_offset - 2; - while idx < self.get_child_count() as u64 { - assert!(idx <= u32::MAX as u64); - let child = self.get_child_by_index(idx as u32)?; - cur_offset += 1; - match handler(None, child.name(), child.ino(), cur_offset) { - Ok(RafsInodeWalkAction::Continue) => idx += 1, - Ok(RafsInodeWalkAction::Break) => break, - Err(e) => return Err(e), - } - } - - Ok(()) - } - - /// Get the child with the specified name. - /// - /// # Safety - /// It depends on Self::validate() to ensure valid memory layout. - fn get_child_by_name(&self, name: &OsStr) -> Result> { - let state = self.state(); - let inode = self.inode(state.deref()); - - if !inode.is_dir() { - return Err(einval!("inode is not a directory")); - } else if inode.i_child_count == 0 { - return Err(enoent!()); - } - - let mut first = 0i32; - let mut last = (inode.i_child_count - 1) as i32; - - // Binary search by child name. - // This implementation is more convenient and slightly outperforms than slice::binary_search. - while first <= last { - let pivot = first + ((last - first) >> 1); - let wrapper = self.mapping.get_inode_wrapper( - (inode.i_child_index as i32 + pivot) as u64, - state.deref(), - state.validate_inode, - )?; - let target = wrapper.name_ref(state.deref()); - if target == name { - return Ok(Arc::new(wrapper)); - } - if target > name { - last = pivot - 1; - } else { - first = pivot + 1; - } - } - - Err(enoent!()) - } - - /// Get the child with the specified index. - /// - /// # Safety - /// It depends on Self::validate() to ensure valid memory layout. - fn get_child_by_index(&self, idx: u32) -> Result> { - let state = self.state(); - let inode = self.inode(state.deref()); - let child_count = inode.i_child_count; - let child_index = inode.i_child_index; - - if !inode.is_dir() { - return Err(einval!("inode is not a directory")); - } else if idx >= child_count { - return Err(enoent!("invalid child index")); - } - - let wrapper = self.mapping.get_inode_wrapper( - (idx + child_index) as Inode, - state.deref(), - state.validate_inode, - )?; - Ok(Arc::new(wrapper)) - } - - #[inline] - fn get_child_count(&self) -> u32 { - let state = self.state(); - let inode = self.inode(state.deref()); - inode.i_child_count - } - - #[inline] - fn get_child_index(&self) -> Result { - let state = self.state(); - let inode = self.inode(state.deref()); - Ok(inode.i_child_index) - } - - #[inline] - fn get_chunk_count(&self) -> u32 { - self.get_child_count() - } - - fn as_any(&self) -> &dyn Any { - self - } - - impl_inode_wrapper!(is_blkdev, bool); - impl_inode_wrapper!(is_chrdev, bool); - impl_inode_wrapper!(is_sock, bool); - impl_inode_wrapper!(is_fifo, bool); - impl_inode_wrapper!(is_dir, bool); - impl_inode_wrapper!(is_reg, bool); - impl_inode_wrapper!(is_symlink, bool); - impl_inode_wrapper!(is_hardlink, bool); - impl_inode_wrapper!(has_xattr, bool); - impl_inode_getter!(ino, i_ino, u64); - impl_inode_getter!(size, i_size, u64); - impl_inode_getter!(rdev, i_rdev, u32); - impl_inode_getter!(projid, i_projid, u32); - impl_inode_getter!(get_symlink_size, i_symlink_size, u16); -} - -impl RafsInodeExt for OndiskInodeWrapper { - fn as_inode(&self) -> &dyn RafsInode { - self - } - - /// Get name of the inode. - /// - /// # Safety - /// It depends on Self::validate() to ensure valid memory layout. - fn name(&self) -> OsString { - let state = self.state(); - self.name_ref(state.deref()).to_owned() - } - - fn flags(&self) -> u64 { - let state = self.state(); - let inode = self.inode(state.deref()); - - inode.i_flags.bits() - } - - fn get_digest(&self) -> RafsDigest { - let state = self.state(); - let inode = self.inode(state.deref()); - inode.i_digest - } - - /// Get chunk information with index `idx` - /// - /// # Safety - /// It depends on Self::validate() to ensure valid memory layout. - fn get_chunk_info(&self, idx: u32) -> Result> { - self._get_chunk_info(idx) - .map(|v| v as Arc) - } - - impl_inode_getter!(get_name_size, i_name_size, u16); - impl_inode_getter!(parent, i_parent, u64); -} - -impl RafsV5InodeChunkOps for OndiskInodeWrapper { - fn get_chunk_info_v5(&self, idx: u32) -> Result> { - self._get_chunk_info(idx) - .map(|v| v as Arc) - } -} - -impl RafsV5InodeOps for OndiskInodeWrapper { - fn get_blob_by_index(&self, idx: u32) -> Result> { - self.state().blob_table.get(idx) - } - - fn get_chunk_size(&self) -> u32 { - self.mapping.state().meta.chunk_size - } - - impl_inode_wrapper!(has_hole, bool); -} - -pub struct DirectChunkInfoV5 { - mapping: DirectSuperBlockV5, - offset: usize, - digest: RafsDigest, -} - -// This is *direct* metadata mode in-memory chunk info object. -impl DirectChunkInfoV5 { - #[inline] - fn new( - state: &DirectMappingState, - chunk: &RafsV5ChunkInfo, - mapping: DirectSuperBlockV5, - offset: usize, - ) -> Result { - state.file_map.get_ref::(offset)?; - Ok(Self { - mapping, - offset, - digest: chunk.block_id, - }) - } - - #[inline] - fn state(&self) -> Guard> { - self.mapping.state() - } - - /// Dereference the underlying OndiskChunkInfo object. - /// - /// # Safety - /// It depends on Self::validate() to ensure valid memory layout. - /// The OndiskChunkInfoWrapper could only be constructed from a valid OndiskChunkInfo pointer, - /// so it's safe to dereference the underlying OndiskChunkInfo object. - fn chunk<'a>(&self, state: &'a DirectMappingState) -> &'a RafsV5ChunkInfo { - state - .file_map - .get_ref::(self.offset) - .unwrap() - } -} - -impl BlobChunkInfo for DirectChunkInfoV5 { - fn chunk_id(&self) -> &RafsDigest { - &self.digest - } - - fn id(&self) -> u32 { - self.index() - } - - fn is_batch(&self) -> bool { - false - } - - fn is_compressed(&self) -> bool { - self.chunk(self.state().deref()) - .flags - .contains(BlobChunkFlags::COMPRESSED) - } - - fn is_encrypted(&self) -> bool { - false - } - - fn as_any(&self) -> &dyn Any { - self - } - - impl_chunkinfo_getter!(blob_index, u32); - impl_chunkinfo_getter!(compressed_offset, u64); - impl_chunkinfo_getter!(compressed_size, u32); - impl_chunkinfo_getter!(uncompressed_offset, u64); - impl_chunkinfo_getter!(uncompressed_size, u32); -} - -impl BlobV5ChunkInfo for DirectChunkInfoV5 { - fn as_base(&self) -> &dyn BlobChunkInfo { - self - } - - impl_chunkinfo_getter!(index, u32); - impl_chunkinfo_getter!(file_offset, u64); - impl_chunkinfo_getter!(flags, BlobChunkFlags); -} +// Copyright (C) 2020 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +/// A bootstrap driver to directly use on disk bootstrap as runtime in-memory bootstrap. +/// +/// To reduce memory footprint and speed up filesystem initialization, the V5 on disk bootstrap +/// layout has been designed to support directly mapping as runtime bootstrap. So we don't need to +/// define another set of runtime data structures to cache on-disk bootstrap in memory. +/// +/// To support modification to the runtime bootstrap, several technologies have been adopted: +/// * - arc-swap is used to support RCU-like update instead of Mutex/RwLock. +/// * - `offset` instead of `pointer` is used to record data structure position. +/// * - reference count to the referenced resources/objects. +/// +/// # Security +/// The bootstrap file may be provided by untrusted parties, so we must ensure strong validations +/// before making use of any bootstrap, especially we are using them in memory-mapped mode. The +/// rule is to call validate() after creating any data structure from the on-disk bootstrap. +use std::any::Any; +use std::ffi::{OsStr, OsString}; +use std::io::Result; +use std::io::SeekFrom; +use std::mem::{size_of, ManuallyDrop}; +use std::ops::Deref; +use std::os::unix::io::AsRawFd; +use std::sync::Arc; + +use arc_swap::{ArcSwap, Guard}; +use nydus_storage::device::v5::BlobV5ChunkInfo; +use nydus_storage::device::{BlobChunkFlags, BlobChunkInfo, BlobDevice, BlobInfo, BlobIoVec}; +use nydus_storage::utils::readahead; +use nydus_utils::digest::RafsDigest; +use nydus_utils::filemap::{clone_file, FileMapState}; + +use crate::metadata::layout::v5::{ + rafsv5_align, rafsv5_alloc_bio_vecs, rafsv5_validate_inode, RafsV5BlobTable, RafsV5ChunkInfo, + RafsV5Inode, RafsV5InodeChunkOps, RafsV5InodeOps, RafsV5InodeTable, RafsV5XAttrsTable, + RAFSV5_ALIGNMENT, RAFSV5_EXT_BLOB_ENTRY_SIZE, RAFSV5_SUPERBLOCK_SIZE, +}; +use crate::metadata::layout::{ + bytes_to_os_str, parse_xattr_names, parse_xattr_value, MetaRange, XattrName, XattrValue, + RAFS_V5_ROOT_INODE, +}; +use crate::metadata::{ + Attr, Entry, Inode, RafsInode, RafsInodeWalkAction, RafsInodeWalkHandler, RafsSuperBlock, + RafsSuperInodes, RafsSuperMeta, DOT, DOTDOT, RAFS_ATTR_BLOCK_SIZE, RAFS_MAX_METADATA_SIZE, + RAFS_MAX_NAME, +}; +use crate::{RafsError, RafsInodeExt, RafsIoReader, RafsResult}; + +/// Impl get accessor for inode object. +macro_rules! impl_inode_getter { + ($G: ident, $F: ident, $U: ty) => { + #[inline] + fn $G(&self) -> $U { + let state = self.state(); + let inode = self.inode(state.deref()); + + inode.$F + } + }; +} + +/// Impl get accessor for inode object. +macro_rules! impl_inode_wrapper { + ($G: ident, $U: ty) => { + #[inline] + fn $G(&self) -> $U { + let state = self.state(); + let inode = self.inode(state.deref()); + + inode.$G() + } + }; +} + +/// Impl get accessor for chunkinfo object. +macro_rules! impl_chunkinfo_getter { + ($G: ident, $U: ty) => { + #[inline] + fn $G(&self) -> $U { + let state = self.state(); + + self.chunk(state.deref()).$G + } + }; +} + +/// The underlying struct to maintain memory mapped bootstrap for a file system. +/// +/// Only the DirectMappingState may store raw pointers. +/// Other data structures should not store raw pointers, instead they should hold a reference to +/// the DirectMappingState object and store an offset, so a `pointer` could be reconstruct by +/// `DirectMappingState.base + offset`. +struct DirectMappingState { + meta: RafsSuperMeta, + inode_table: ManuallyDrop, + blob_table: RafsV5BlobTable, + file_map: FileMapState, + mmapped_inode_table: bool, + validate_inode: bool, +} + +impl DirectMappingState { + fn new(meta: &RafsSuperMeta, validate_inode: bool) -> Self { + DirectMappingState { + meta: *meta, + inode_table: ManuallyDrop::new(RafsV5InodeTable::default()), + blob_table: RafsV5BlobTable::default(), + file_map: FileMapState::default(), + mmapped_inode_table: false, + validate_inode, + } + } +} + +impl Drop for DirectMappingState { + fn drop(&mut self) { + if !self.mmapped_inode_table { + // Safe because it's a allocated vector. + unsafe { ManuallyDrop::drop(&mut self.inode_table) }; + } + } +} + +/// Direct-mapped Rafs v5 super block. +#[derive(Clone)] +pub struct DirectSuperBlockV5 { + state: Arc>, +} + +impl DirectSuperBlockV5 { + /// Create a new instance of `DirectSuperBlockV5`. + pub fn new(meta: &RafsSuperMeta, validate_inode: bool) -> Self { + let state = DirectMappingState::new(meta, validate_inode); + + Self { + state: Arc::new(ArcSwap::new(Arc::new(state))), + } + } + + #[inline] + fn get_inode_wrapper( + &self, + ino: Inode, + state: &DirectMappingState, + validate_inode: bool, + ) -> Result { + let offset = state.inode_table.get(ino)? as usize; + let _inode = state.file_map.get_ref::(offset)?; + let wrapper = OndiskInodeWrapper { + mapping: self.clone(), + offset, + }; + + if let Err(e) = wrapper.validate(state.meta.inodes_count, state.meta.chunk_size as u64) { + if e.raw_os_error().unwrap_or(0) != libc::EOPNOTSUPP { + return Err(e); + } + // ignore unsupported err + } + + if validate_inode { + let digester = state.meta.get_digester(); + if !rafsv5_validate_inode(&wrapper, false, digester)? { + return Err(einval!("invalid inode digest")); + } + } + + Ok(wrapper) + } + + fn update_state(&self, r: &mut RafsIoReader) -> Result<()> { + let old_state = self.state(); + + // Validate file size + let file = clone_file(r.as_raw_fd())?; + let md = file.metadata()?; + let len = md.len(); + let size = len as usize; + if len < RAFSV5_SUPERBLOCK_SIZE as u64 + || len > RAFS_MAX_METADATA_SIZE as u64 + || len & (RAFSV5_ALIGNMENT as u64 - 1) != 0 + { + return Err(ebadf!("invalid bootstrap file")); + } + let md_range = MetaRange::new( + RAFSV5_SUPERBLOCK_SIZE as u64, + len - RAFSV5_SUPERBLOCK_SIZE as u64, + true, + )?; + + // Validate inode table layout + let inode_table_start = old_state.meta.inode_table_offset; + let inode_table_size = old_state.meta.inode_table_entries as u64 * size_of::() as u64; + let inode_table_range = MetaRange::new(inode_table_start, inode_table_size, false)?; + if !inode_table_range.is_subrange_of(&md_range) { + return Err(ebadf!("invalid inode table")); + } + + // Validate blob table layout + let blob_table_start = old_state.meta.blob_table_offset; + let blob_table_size = old_state.meta.blob_table_size as u64; + let blob_table_range = MetaRange::new(blob_table_start, blob_table_size, false)?; + if !blob_table_range.is_subrange_of(&md_range) + || blob_table_range.intersect_with(&inode_table_range) + { + return Err(ebadf!("invalid blob table")); + } + + // Validate extended blob table layout + let extended_blob_table_offset = old_state.meta.extended_blob_table_offset; + let extended_blob_table_size = + old_state.meta.extended_blob_table_entries as u64 * RAFSV5_EXT_BLOB_ENTRY_SIZE as u64; + let extended_blob_table_range = + MetaRange::new(extended_blob_table_offset, extended_blob_table_size, true)?; + if extended_blob_table_offset > 0 + && extended_blob_table_size > 0 + && (!extended_blob_table_range.is_subrange_of(&md_range) + || extended_blob_table_range.intersect_with(&inode_table_range) + || extended_blob_table_range.intersect_with(&blob_table_range)) + { + return Err(ebadf!("invalid extended blob table")); + } + + // Prefetch the bootstrap file + readahead(file.as_raw_fd(), 0, len); + + // Mmap the bootstrap file into current process for direct access + let file_map = FileMapState::new(file, 0, size, false)?; + + // Load blob table. Safe because we have validated the blob table layout. + let mut blob_table = RafsV5BlobTable::new(); + let meta = &old_state.meta; + + // Load extended blob table if the bootstrap including extended blob table. + if extended_blob_table_offset > 0 && extended_blob_table_size > 0 { + r.seek(SeekFrom::Start(extended_blob_table_offset))?; + blob_table + .extended + .load(r, meta.extended_blob_table_entries as usize)?; + } + r.seek(SeekFrom::Start(meta.blob_table_offset))?; + blob_table.load(r, meta.blob_table_size, meta.chunk_size, meta.flags)?; + + // Load(Map) inode table. Safe because we have validated the inode table layout. + // Though we have passed *mut u32 to Vec::from_raw_parts(), it will trigger invalid memory + // access if the underlying memory is written to. + let inode_table = unsafe { + RafsV5InodeTable { + data: Vec::from_raw_parts( + file_map.offset(inode_table_start as usize) as *const u32 as *mut u32, + old_state.meta.inode_table_entries as usize, + old_state.meta.inode_table_entries as usize, + ), + } + }; + + let validate_inode = old_state.validate_inode; + + let state = DirectMappingState { + meta: old_state.meta, + inode_table: ManuallyDrop::new(inode_table), + blob_table, + file_map, + mmapped_inode_table: true, + validate_inode, + }; + + // Swap new and old DirectMappingState object, the old object will be destroyed when the + // reference count reaches zero. + self.state.store(Arc::new(state)); + + Ok(()) + } + + #[inline] + fn state(&self) -> Guard> { + self.state.load() + } +} + +impl RafsSuperInodes for DirectSuperBlockV5 { + fn get_max_ino(&self) -> Inode { + self.state().inode_table.len() as u64 + } + + /// Find inode offset by ino from inode table and mmap to OndiskInode. + fn get_inode(&self, ino: Inode, validate_inode: bool) -> Result> { + let state = self.state(); + let wrapper = self.get_inode_wrapper(ino, state.deref(), validate_inode)?; + Ok(Arc::new(wrapper)) + } + + fn get_extended_inode( + &self, + ino: Inode, + validate_inode: bool, + ) -> Result> { + let state = self.state(); + let wrapper = self.get_inode_wrapper(ino, state.deref(), validate_inode)?; + Ok(Arc::new(wrapper)) + } +} + +impl RafsSuperBlock for DirectSuperBlockV5 { + fn load(&mut self, r: &mut RafsIoReader) -> Result<()> { + self.update_state(r) + } + + fn update(&self, r: &mut RafsIoReader) -> RafsResult<()> { + self.update_state(r).map_err(RafsError::SwapBackend) + } + + fn destroy(&mut self) { + let state = DirectMappingState::new(&RafsSuperMeta::default(), false); + + self.state.store(Arc::new(state)); + } + + fn get_blob_infos(&self) -> Vec> { + self.state().blob_table.entries.clone() + } + + fn root_ino(&self) -> u64 { + RAFS_V5_ROOT_INODE + } + + fn get_chunk_info(&self, _idx: usize) -> Result> { + unimplemented!("used by RAFS v6 only") + } + + fn set_blob_device(&self, _blob_device: BlobDevice) { + unimplemented!("used by RAFS v6 only") + } +} + +/// Direct-mapped RAFS v5 inode object. +pub struct OndiskInodeWrapper { + pub mapping: DirectSuperBlockV5, + pub offset: usize, +} + +impl OndiskInodeWrapper { + #[inline] + fn state(&self) -> Guard> { + self.mapping.state() + } + + /// Convert `OndiskInodeWrapper` to an `RafsV5Inode` object. + /// + /// # Safety + /// It depends on Self::validate() to ensure valid memory layout. + #[inline] + fn inode<'a>(&self, state: &'a DirectMappingState) -> &'a RafsV5Inode { + state.file_map.get_ref::(self.offset).unwrap() + } + + /// Get an reference to the file name string. + /// + /// # Safety + /// It depends on Self::validate() to ensure valid memory layout. + fn name_ref<'a>(&self, state: &'a DirectMappingState) -> &'a OsStr { + let offset = self.offset + size_of::(); + let size = self.inode(state).i_name_size as usize; + let name = state.file_map.get_slice(offset, size).unwrap(); + bytes_to_os_str(name) + } + + fn get_xattr_data<'a>( + &self, + state: &'a Guard>, + ) -> Result<(&'a [u8], usize)> { + let inode = self.inode(state.deref()); + + if !inode.has_xattr() { + return Ok((&[], 0)); + } + + let offset = self.offset + inode.size(); + let xattrs = state.file_map.get_ref::(offset)?; + let xattr_size = xattrs.size(); + let xattr_data = state + .file_map + .get_slice(offset + size_of::(), xattr_size)?; + + Ok((xattr_data, xattr_size)) + } + + fn _get_chunk_info(&self, idx: u32) -> Result> { + let state = self.state(); + let inode = self.inode(state.deref()); + + if !inode.is_reg() || inode.i_child_count == 0 || idx >= inode.i_child_count { + return Err(enoent!("invalid chunk info")); + } + + let mut offset = self.offset + inode.size(); + if inode.has_xattr() { + let xattrs = state.file_map.get_ref::(offset)?; + offset += size_of::() + xattrs.aligned_size(); + } + offset += size_of::() * idx as usize; + + let chunk = state.file_map.get_ref::(offset)?; + let wrapper = DirectChunkInfoV5::new(&state, chunk, self.mapping.clone(), offset)?; + + Ok(Arc::new(wrapper)) + } +} + +impl RafsInode for OndiskInodeWrapper { + // Somehow we got invalid `inode_count` from superblock. + fn validate(&self, _inode_count: u64, chunk_size: u64) -> Result<()> { + let state = self.state(); + let inode = state.file_map.get_ref::(self.offset)?; + let max_inode = state.inode_table.len() as u64; + let xattr_size = if inode.has_xattr() { + let offset = self.offset + inode.size(); + let xattrs = state.file_map.get_ref::(offset)?; + size_of::() + xattrs.aligned_size() + } else { + 0 + }; + + // * - parent inode number must be less than child inode number unless child is a hardlink. + // * - inode link count must not be zero. + // * - name_size must be less than 255. Due to alignment, the check is not so strict. + // * - name_size and symlink_size must be correctly aligned. + // Should we store raw size instead of aligned size for name and symlink? + if inode.i_ino == 0 + || inode.i_ino > max_inode + // || inode.i_ino > _inode_count + || (inode.i_ino != RAFS_V5_ROOT_INODE && inode.i_parent == 0) + || inode.i_nlink == 0 + || inode.i_name_size as usize > (RAFS_MAX_NAME + 1) + || inode.i_name_size == 0 + { + return Err(ebadf!(format!( + "inode validation failure, inode {:?}", + inode + ))); + } + if !inode.is_hardlink() && inode.i_parent >= inode.i_ino { + return Err(einval!("invalid parent inode")); + } + + let chunk_count = 0; + if inode.is_reg() { + if self.state().meta.is_chunk_dict() { + // chunk-dict doesn't support chunk_count check + return Err(std::io::Error::from_raw_os_error(libc::EOPNOTSUPP)); + } + let chunks = (inode.i_size + chunk_size - 1) / chunk_size; + if !inode.has_hole() && chunks != inode.i_child_count as u64 { + return Err(einval!(format!( + "invalid chunk count, ino {}, expected {}, actual {}", + inode.i_ino, chunks, inode.i_child_count, + ))); + } + let size = inode.size() + + xattr_size + + inode.i_child_count as usize * size_of::(); + state.file_map.validate_range(self.offset, size)?; + } else if inode.is_dir() { + // Only valid i_child_index, i_child_count when we have children. + if inode.i_child_count > 0 + && ((inode.i_child_index as Inode) <= inode.i_ino + || inode.i_child_count as u64 >= max_inode + || inode.i_child_count as u64 + inode.i_child_index as u64 - 1 > max_inode) + { + return Err(einval!("invalid directory")); + } + let size = inode.size() + xattr_size; + state.file_map.validate_range(self.offset, size)?; + } else if inode.is_symlink() && inode.i_symlink_size == 0 { + return Err(einval!("invalid symlink target")); + } + if !inode.is_hardlink() && inode.i_parent >= inode.i_ino { + return Err(einval!("invalid parent inode")); + } + + let size = inode.size() + xattr_size + chunk_count * size_of::(); + state.file_map.validate_range(self.offset, size)?; + + Ok(()) + } + + fn alloc_bio_vecs( + &self, + _device: &BlobDevice, + offset: u64, + size: usize, + user_io: bool, + ) -> Result> { + rafsv5_alloc_bio_vecs(self, offset, size, user_io) + } + + fn collect_descendants_inodes( + &self, + descendants: &mut Vec>, + ) -> Result { + if !self.is_dir() { + return Err(enotdir!()); + } + + let state = self.state(); + let inode = self.inode(state.deref()); + let child_count = inode.i_child_count as u64; + let child_index = inode.i_child_index as u64; + let mut child_dirs: Vec> = Vec::new(); + + for idx in child_index..(child_index + child_count) { + let child_inode = self.mapping.get_inode(idx, false)?; + if child_inode.is_dir() { + child_dirs.push(child_inode); + } else if !child_inode.is_empty_size() { + descendants.push(child_inode); + } + } + + for d in child_dirs { + d.collect_descendants_inodes(descendants)?; + } + + Ok(0) + } + + fn get_entry(&self) -> Entry { + let state = self.state(); + let inode = self.inode(state.deref()); + + Entry { + attr: self.get_attr().into(), + inode: inode.i_ino, + generation: 0, + attr_flags: 0, + attr_timeout: state.meta.attr_timeout, + entry_timeout: state.meta.entry_timeout, + } + } + + fn get_attr(&self) -> Attr { + let state = self.state(); + let inode = self.inode(state.deref()); + + Attr { + ino: inode.i_ino, + size: inode.i_size, + blocks: inode.i_blocks, + mode: inode.i_mode, + nlink: inode.i_nlink as u32, + uid: inode.i_uid, + gid: inode.i_gid, + mtime: inode.i_mtime, + mtimensec: inode.i_mtime_nsec, + blksize: RAFS_ATTR_BLOCK_SIZE, + rdev: inode.i_rdev, + ..Default::default() + } + } + + fn get_xattr(&self, name: &OsStr) -> Result> { + let state = self.state(); + let (xattr_data, xattr_size) = self.get_xattr_data(&state)?; + parse_xattr_value(xattr_data, xattr_size, name) + } + + fn get_xattrs(&self) -> Result> { + let state = self.state(); + let (xattr_data, xattr_size) = self.get_xattr_data(&state)?; + parse_xattr_names(xattr_data, xattr_size) + } + + /// Get symlink target of the inode. + /// + /// # Safety + /// It depends on Self::validate() to ensure valid memory layout. + fn get_symlink(&self) -> Result { + let state = self.state(); + let inode = self.inode(state.deref()); + let offset = + self.offset + size_of::() + rafsv5_align(inode.i_name_size as usize); + let size = inode.i_symlink_size as usize; + let symlink = state.file_map.get_slice(offset, size).unwrap(); + Ok(bytes_to_os_str(symlink).to_os_string()) + } + + fn walk_children_inodes(&self, entry_offset: u64, handler: RafsInodeWalkHandler) -> Result<()> { + // offset 0 and 1 is for "." and ".." respectively. + let mut cur_offset = entry_offset; + + if cur_offset == 0 { + cur_offset += 1; + // Safe to unwrap since conversion from DOT to os string can't fail. + match handler(None, OsString::from(DOT), self.ino(), cur_offset) { + Ok(RafsInodeWalkAction::Continue) => {} + Ok(RafsInodeWalkAction::Break) => return Ok(()), + Err(e) => return Err(e), + } + } + + if cur_offset == 1 { + let parent = if self.ino() == 1 { 1 } else { self.parent() }; + cur_offset += 1; + // Safe to unwrap since conversion from DOTDOT to os string can't fail. + match handler(None, OsString::from(DOTDOT), parent, cur_offset) { + Ok(RafsInodeWalkAction::Continue) => {} + Ok(RafsInodeWalkAction::Break) => return Ok(()), + Err(e) => return Err(e), + }; + } + + let mut idx = cur_offset - 2; + while idx < self.get_child_count() as u64 { + assert!(idx <= u32::MAX as u64); + let child = self.get_child_by_index(idx as u32)?; + cur_offset += 1; + match handler(None, child.name(), child.ino(), cur_offset) { + Ok(RafsInodeWalkAction::Continue) => idx += 1, + Ok(RafsInodeWalkAction::Break) => break, + Err(e) => return Err(e), + } + } + + Ok(()) + } + + /// Get the child with the specified name. + /// + /// # Safety + /// It depends on Self::validate() to ensure valid memory layout. + fn get_child_by_name(&self, name: &OsStr) -> Result> { + let state = self.state(); + let inode = self.inode(state.deref()); + + if !inode.is_dir() { + return Err(einval!("inode is not a directory")); + } else if inode.i_child_count == 0 { + return Err(enoent!()); + } + + let mut first = 0i32; + let mut last = (inode.i_child_count - 1) as i32; + + // Binary search by child name. + // This implementation is more convenient and slightly outperforms than slice::binary_search. + while first <= last { + let pivot = first + ((last - first) >> 1); + let wrapper = self.mapping.get_inode_wrapper( + (inode.i_child_index as i32 + pivot) as u64, + state.deref(), + state.validate_inode, + )?; + let target = wrapper.name_ref(state.deref()); + if target == name { + return Ok(Arc::new(wrapper)); + } + if target > name { + last = pivot - 1; + } else { + first = pivot + 1; + } + } + + Err(enoent!()) + } + + /// Get the child with the specified index. + /// + /// # Safety + /// It depends on Self::validate() to ensure valid memory layout. + fn get_child_by_index(&self, idx: u32) -> Result> { + let state = self.state(); + let inode = self.inode(state.deref()); + let child_count = inode.i_child_count; + let child_index = inode.i_child_index; + + if !inode.is_dir() { + return Err(einval!("inode is not a directory")); + } else if idx >= child_count { + return Err(enoent!("invalid child index")); + } + + let wrapper = self.mapping.get_inode_wrapper( + (idx + child_index) as Inode, + state.deref(), + state.validate_inode, + )?; + Ok(Arc::new(wrapper)) + } + + #[inline] + fn get_child_count(&self) -> u32 { + let state = self.state(); + let inode = self.inode(state.deref()); + inode.i_child_count + } + + #[inline] + fn get_child_index(&self) -> Result { + let state = self.state(); + let inode = self.inode(state.deref()); + Ok(inode.i_child_index) + } + + #[inline] + fn get_chunk_count(&self) -> u32 { + self.get_child_count() + } + + fn as_any(&self) -> &dyn Any { + self + } + + impl_inode_wrapper!(is_blkdev, bool); + impl_inode_wrapper!(is_chrdev, bool); + impl_inode_wrapper!(is_sock, bool); + impl_inode_wrapper!(is_fifo, bool); + impl_inode_wrapper!(is_dir, bool); + impl_inode_wrapper!(is_reg, bool); + impl_inode_wrapper!(is_symlink, bool); + impl_inode_wrapper!(is_hardlink, bool); + impl_inode_wrapper!(has_xattr, bool); + impl_inode_getter!(ino, i_ino, u64); + impl_inode_getter!(size, i_size, u64); + impl_inode_getter!(rdev, i_rdev, u32); + impl_inode_getter!(projid, i_projid, u32); + impl_inode_getter!(get_symlink_size, i_symlink_size, u16); +} + +impl RafsInodeExt for OndiskInodeWrapper { + fn as_inode(&self) -> &dyn RafsInode { + self + } + + /// Get name of the inode. + /// + /// # Safety + /// It depends on Self::validate() to ensure valid memory layout. + fn name(&self) -> OsString { + let state = self.state(); + self.name_ref(state.deref()).to_owned() + } + + fn flags(&self) -> u64 { + let state = self.state(); + let inode = self.inode(state.deref()); + + inode.i_flags.bits() + } + + fn get_digest(&self) -> RafsDigest { + let state = self.state(); + let inode = self.inode(state.deref()); + inode.i_digest + } + + /// Get chunk information with index `idx` + /// + /// # Safety + /// It depends on Self::validate() to ensure valid memory layout. + fn get_chunk_info(&self, idx: u32) -> Result> { + self._get_chunk_info(idx) + .map(|v| v as Arc) + } + + impl_inode_getter!(get_name_size, i_name_size, u16); + impl_inode_getter!(parent, i_parent, u64); +} + +impl RafsV5InodeChunkOps for OndiskInodeWrapper { + fn get_chunk_info_v5(&self, idx: u32) -> Result> { + self._get_chunk_info(idx) + .map(|v| v as Arc) + } +} + +impl RafsV5InodeOps for OndiskInodeWrapper { + fn get_blob_by_index(&self, idx: u32) -> Result> { + self.state().blob_table.get(idx) + } + + fn get_chunk_size(&self) -> u32 { + self.mapping.state().meta.chunk_size + } + + impl_inode_wrapper!(has_hole, bool); +} + +pub struct DirectChunkInfoV5 { + mapping: DirectSuperBlockV5, + offset: usize, + digest: RafsDigest, +} + +// This is *direct* metadata mode in-memory chunk info object. +impl DirectChunkInfoV5 { + #[inline] + fn new( + state: &DirectMappingState, + chunk: &RafsV5ChunkInfo, + mapping: DirectSuperBlockV5, + offset: usize, + ) -> Result { + state.file_map.get_ref::(offset)?; + Ok(Self { + mapping, + offset, + digest: chunk.block_id, + }) + } + + #[inline] + fn state(&self) -> Guard> { + self.mapping.state() + } + + /// Dereference the underlying OndiskChunkInfo object. + /// + /// # Safety + /// It depends on Self::validate() to ensure valid memory layout. + /// The OndiskChunkInfoWrapper could only be constructed from a valid OndiskChunkInfo pointer, + /// so it's safe to dereference the underlying OndiskChunkInfo object. + fn chunk<'a>(&self, state: &'a DirectMappingState) -> &'a RafsV5ChunkInfo { + state + .file_map + .get_ref::(self.offset) + .unwrap() + } +} + +impl BlobChunkInfo for DirectChunkInfoV5 { + fn chunk_id(&self) -> &RafsDigest { + &self.digest + } + + fn id(&self) -> u32 { + self.index() + } + + fn is_batch(&self) -> bool { + false + } + + fn is_compressed(&self) -> bool { + self.chunk(self.state().deref()) + .flags + .contains(BlobChunkFlags::COMPRESSED) + } + + fn is_encrypted(&self) -> bool { + false + } + + fn as_any(&self) -> &dyn Any { + self + } + + impl_chunkinfo_getter!(blob_index, u32); + impl_chunkinfo_getter!(compressed_offset, u64); + impl_chunkinfo_getter!(compressed_size, u32); + impl_chunkinfo_getter!(uncompressed_offset, u64); + impl_chunkinfo_getter!(uncompressed_size, u32); +} + +impl BlobV5ChunkInfo for DirectChunkInfoV5 { + fn as_base(&self) -> &dyn BlobChunkInfo { + self + } + + impl_chunkinfo_getter!(index, u32); + impl_chunkinfo_getter!(file_offset, u64); + impl_chunkinfo_getter!(flags, BlobChunkFlags); +} diff --git a/rafs/src/metadata/direct_v6.rs b/rafs/src/metadata/direct_v6.rs index 3330aea9451..9eae9c49b8a 100644 --- a/rafs/src/metadata/direct_v6.rs +++ b/rafs/src/metadata/direct_v6.rs @@ -1,1613 +1,1613 @@ -// Copyright (C) 2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -/// A bootstrap driver to directly use on disk bootstrap as runtime in-memory bootstrap. -/// -/// To reduce memory footprint and speed up filesystem initialization, the V5 on disk bootstrap -/// layout has been designed to support directly mapping as runtime bootstrap. So we don't need to -/// define another set of runtime data structures to cache on-disk bootstrap in memory. -/// -/// To support modification to the runtime bootstrap, several technologies have been adopted: -/// * - arc-swap is used to support RCU-like update instead of Mutex/RwLock. -/// * - `offset` instead of `pointer` is used to record data structure position. -/// * - reference count to the referenced resources/objects. -/// -/// # Security -/// The bootstrap file may be provided by untrusted parties, so we must ensure strong validations -/// before making use of any bootstrap, especially we are using them in memory-mapped mode. The -/// rule is to call validate() after creating any data structure from the on-disk bootstrap. -use std::any::Any; -use std::cmp::Ordering; -use std::collections::HashMap; -use std::ffi::{OsStr, OsString}; -use std::io::{Result, SeekFrom}; -use std::mem::size_of; -use std::os::unix::ffi::{OsStrExt, OsStringExt}; -use std::os::unix::io::AsRawFd; -use std::sync::{Arc, Mutex}; -use std::time::Duration; - -use arc_swap::{ArcSwap, Guard}; -use nydus_storage::device::{ - v5::BlobV5ChunkInfo, BlobChunkFlags, BlobChunkInfo, BlobDevice, BlobInfo, BlobIoDesc, BlobIoVec, -}; -use nydus_storage::utils::readahead; -use nydus_utils::filemap::{clone_file, FileMapState}; -use nydus_utils::{digest::RafsDigest, div_round_up, round_up}; - -use crate::metadata::layout::v5::RafsV5ChunkInfo; -use crate::metadata::layout::v6::{ - rafsv6_load_blob_extra_info, recover_namespace, RafsV6BlobTable, RafsV6Dirent, - RafsV6InodeChunkAddr, RafsV6InodeCompact, RafsV6InodeExtended, RafsV6OndiskInode, - RafsV6XattrEntry, RafsV6XattrIbodyHeader, EROFS_BLOCK_BITS_9, EROFS_BLOCK_SIZE_4096, - EROFS_BLOCK_SIZE_512, EROFS_INODE_CHUNK_BASED, EROFS_INODE_FLAT_INLINE, EROFS_INODE_FLAT_PLAIN, - EROFS_INODE_SLOT_SIZE, EROFS_I_DATALAYOUT_BITS, EROFS_I_VERSION_BIT, EROFS_I_VERSION_BITS, -}; -use crate::metadata::layout::{bytes_to_os_str, MetaRange, XattrName, XattrValue}; -use crate::metadata::{ - Attr, Entry, Inode, RafsBlobExtraInfo, RafsInode, RafsInodeWalkAction, RafsInodeWalkHandler, - RafsSuperBlock, RafsSuperFlags, RafsSuperInodes, RafsSuperMeta, RAFS_ATTR_BLOCK_SIZE, - RAFS_MAX_NAME, -}; -use crate::{MetaType, RafsError, RafsInodeExt, RafsIoReader, RafsResult}; - -fn err_invalidate_data(rafs_err: RafsError) -> std::io::Error { - std::io::Error::new(std::io::ErrorKind::InvalidData, rafs_err) -} - -/// The underlying struct to maintain memory mapped bootstrap for a file system. -/// -/// Only the DirectMappingState may store raw pointers. -/// Other data structures should not store raw pointers, instead they should hold a reference to -/// the DirectMappingState object and store an offset, so a `pointer` could be reconstruct by -/// `DirectMappingState.base + offset`. -struct DirectMappingState { - meta: Arc, - blob_table: RafsV6BlobTable, - blob_extra_infos: HashMap, - map: FileMapState, -} - -impl DirectMappingState { - fn new(meta: &RafsSuperMeta) -> Self { - DirectMappingState { - meta: Arc::new(*meta), - blob_table: RafsV6BlobTable::default(), - blob_extra_infos: HashMap::new(), - map: FileMapState::default(), - } - } - - fn is_tarfs(&self) -> bool { - self.meta.flags.contains(RafsSuperFlags::TARTFS_MODE) - } - - fn block_size(&self) -> u64 { - if self.is_tarfs() { - EROFS_BLOCK_SIZE_512 - } else { - EROFS_BLOCK_SIZE_4096 - } - } -} - -struct DirectCachedInfo { - meta_offset: usize, - root_ino: Inode, - chunk_size: u32, - chunk_map: Mutex>>, - attr_timeout: Duration, - entry_timeout: Duration, -} - -/// Direct-mapped Rafs v6 super block. -#[derive(Clone)] -pub struct DirectSuperBlockV6 { - info: Arc, - state: Arc>, - device: Arc>, -} - -impl DirectSuperBlockV6 { - /// Create a new instance of `DirectSuperBlockV6`. - pub fn new(meta: &RafsSuperMeta) -> Self { - let state = DirectMappingState::new(meta); - let block_size = state.block_size(); - let meta_offset = meta.meta_blkaddr as usize * block_size as usize; - let info = DirectCachedInfo { - meta_offset, - root_ino: meta.root_nid as Inode, - chunk_size: meta.chunk_size, - chunk_map: Mutex::new(None), - attr_timeout: meta.attr_timeout, - entry_timeout: meta.entry_timeout, - }; - - Self { - info: Arc::new(info), - state: Arc::new(ArcSwap::new(Arc::new(state))), - device: Arc::new(Mutex::new(BlobDevice::default())), - } - } - - fn disk_inode( - state: &Guard>, - offset: usize, - ) -> Result<&dyn RafsV6OndiskInode> { - let i: &RafsV6InodeCompact = state.map.get_ref(offset)?; - if i.format() & EROFS_I_VERSION_BITS == 0 { - Ok(i) - } else { - let i = state.map.get_ref::(offset)?; - Ok(i) - } - } - - fn inode_wrapper( - &self, - state: &Guard>, - nid: u64, - ) -> Result { - if nid >= (usize::MAX / EROFS_INODE_SLOT_SIZE) as u64 { - Err(einval!(format!("v6: inode number 0x{:x} is too big", nid))) - } else if let Some(offset) = self - .info - .meta_offset - .checked_add(nid as usize * EROFS_INODE_SLOT_SIZE) - { - OndiskInodeWrapper::new(state, self.clone(), offset) - } else { - Err(einval!(format!("v6: invalid inode number 0x{:x}", nid))) - } - } - - // For RafsV6, we can't get the parent info of a non-dir file with its on-disk inode, - // so we need to pass corresponding parent info when constructing the child inode. - fn inode_wrapper_with_info( - &self, - state: &Guard>, - nid: u64, - parent_inode: Inode, - name: OsString, - ) -> Result { - self.inode_wrapper(state, nid).map(|inode| { - let mut inode = inode; - inode.parent_inode = Some(parent_inode); - inode.name = Some(name); - inode - }) - } - - fn update_state(&self, r: &mut RafsIoReader) -> Result<()> { - // Validate file size - let file = clone_file(r.as_raw_fd())?; - let md = file.metadata()?; - let len = md.len(); - let md_range = MetaRange::new( - EROFS_BLOCK_SIZE_4096 as u64, - len - EROFS_BLOCK_SIZE_4096 as u64, - true, - )?; - - // Validate blob table layout as blob_table_start and blob_table_offset is read from bootstrap. - let old_state = self.state.load(); - let blob_table_size = old_state.meta.blob_table_size as u64; - let blob_table_start = old_state.meta.blob_table_offset; - let blob_table_range = MetaRange::new(blob_table_start, blob_table_size, false)?; - if !blob_table_range.is_subrange_of(&md_range) { - return Err(ebadf!("invalid blob table")); - } - - // Prefetch the bootstrap file - readahead(file.as_raw_fd(), 0, len); - - // Load extended blob table if the bootstrap including extended blob table. - let mut blob_table = RafsV6BlobTable::new(); - let meta = &old_state.meta; - r.seek(SeekFrom::Start(meta.blob_table_offset))?; - blob_table.load(r, meta.blob_table_size, meta.chunk_size, meta.flags)?; - let blob_extra_infos = rafsv6_load_blob_extra_info(meta, r)?; - - let file_map = FileMapState::new(file, 0, len as usize, false)?; - let state = DirectMappingState { - meta: old_state.meta.clone(), - blob_table, - blob_extra_infos, - map: file_map, - }; - - // Swap new and old DirectMappingState object, - // the old object will be destroyed when the reference count reaches zero. - self.state.store(Arc::new(state)); - - Ok(()) - } - - // For RafsV6, inode doesn't store detailed chunk info, only a simple RafsV6InodeChunkAddr - // so we need to use the chunk table at the end of the bootstrap to restore the chunk info of an inode - fn load_chunk_map(&self) -> Result> { - let mut chunk_map = HashMap::default(); - let state = self.state.load(); - let size = state.meta.chunk_table_size as usize; - if size == 0 { - return Ok(chunk_map); - } - - let block_size = state.block_size(); - let unit_size = size_of::(); - if size % unit_size != 0 { - return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); - } - - for idx in 0..(size / unit_size) { - let chunk = DirectChunkInfoV6::new(&state, self.clone(), idx)?; - let mut v6_chunk = RafsV6InodeChunkAddr::new(); - v6_chunk.set_blob_index(chunk.blob_index()); - v6_chunk.set_blob_ci_index(chunk.id()); - v6_chunk.set_block_addr((chunk.uncompressed_offset() / block_size) as u32); - chunk_map.insert(v6_chunk, idx); - } - - Ok(chunk_map) - } -} - -impl RafsSuperInodes for DirectSuperBlockV6 { - fn get_max_ino(&self) -> Inode { - let state = self.state.load(); - // The maximum inode number supported by RAFSv6 is smaller than limit of fuse-backend-rs. - (0xffff_ffffu64) * state.block_size() / EROFS_INODE_SLOT_SIZE as u64 - } - - /// Find inode offset by ino from inode table and mmap to OndiskInode. - fn get_inode(&self, ino: Inode, _validate_digest: bool) -> Result> { - let state = self.state.load(); - Ok(Arc::new(self.inode_wrapper(&state, ino)?)) - } - - fn get_extended_inode( - &self, - ino: Inode, - _validate_digest: bool, - ) -> Result> { - let state = self.state.load(); - if ino == state.meta.root_nid as u64 { - let inode = self.inode_wrapper_with_info(&state, ino, ino, OsString::from("/"))?; - return Ok(Arc::new(inode)); - } - let mut inode = self.inode_wrapper(&state, ino)?; - if inode.is_dir() { - inode.get_parent()?; - inode.get_name(&state)?; - return Ok(Arc::new(inode)); - } - Err(enoent!(format!( - "can't get extended inode for {}, root nid {} {:?}", - ino, state.meta.root_nid, inode.name - ))) - } -} - -impl RafsSuperBlock for DirectSuperBlockV6 { - fn load(&mut self, r: &mut RafsIoReader) -> Result<()> { - self.update_state(r) - } - - fn update(&self, r: &mut RafsIoReader) -> RafsResult<()> { - self.update_state(r).map_err(RafsError::SwapBackend) - } - - fn destroy(&mut self) { - let state = DirectMappingState::new(&RafsSuperMeta::default()); - self.state.store(Arc::new(state)); - } - - fn get_blob_infos(&self) -> Vec> { - self.state.load().blob_table.get_all() - } - - fn get_blob_extra_infos(&self) -> Result> { - Ok(self.state.load().blob_extra_infos.clone()) - } - - fn root_ino(&self) -> u64 { - self.info.root_ino - } - - fn get_chunk_info(&self, idx: usize) -> Result> { - let state = self.state.load(); - let chunk = DirectChunkInfoV6::new(&state, self.clone(), idx)?; - Ok(Arc::new(chunk)) - } - - fn set_blob_device(&self, blob_device: BlobDevice) { - *self.device.lock().unwrap() = blob_device; - } -} - -/// Direct-mapped RAFS v6 inode object. -pub struct OndiskInodeWrapper { - mapping: DirectSuperBlockV6, - offset: usize, - blocks_count: u64, - parent_inode: Option, - name: Option, -} - -impl OndiskInodeWrapper { - fn new( - state: &Guard>, - mapping: DirectSuperBlockV6, - offset: usize, - ) -> Result { - let inode = DirectSuperBlockV6::disk_inode(state, offset)?; - let blocks_count = div_round_up(inode.size(), state.block_size()); - - Ok(OndiskInodeWrapper { - mapping, - offset, - blocks_count, - parent_inode: None, - name: None, - }) - } - - fn state(&self) -> Guard> { - self.mapping.state.load() - } - - fn blocks_count(&self) -> u64 { - self.blocks_count - } - - fn disk_inode<'a>( - &self, - state: &'a Guard>, - ) -> &'a dyn RafsV6OndiskInode { - // Safe to unwrap() because `self.offset` has been validated in new(). - DirectSuperBlockV6::disk_inode(state, self.offset).unwrap() - } - - fn get_entry<'a>( - &self, - state: &'a Guard>, - inode: &dyn RafsV6OndiskInode, - block_index: usize, - index: usize, - ) -> RafsResult<&'a RafsV6Dirent> { - let offset = self.data_block_offset(state, inode, block_index)?; - if size_of::() * (index + 1) >= state.block_size() as usize { - Err(RafsError::InvalidImageData) - } else if let Some(offset) = offset.checked_add(size_of::() * index) { - state - .map - .get_ref(offset) - .map_err(|_e| RafsError::InvalidImageData) - } else { - Err(RafsError::InvalidImageData) - } - } - - // `max_entries` indicates the quantity of entries residing in a single block including tail packing. - // Both `block_index` and `index` start from 0. - fn entry_name<'a>( - &self, - state: &'a Guard>, - inode: &dyn RafsV6OndiskInode, - block_index: usize, - index: usize, - max_entries: usize, - ) -> RafsResult<&'a OsStr> { - assert!(max_entries > 0); - let block_size = state.block_size(); - let offset = self.data_block_offset(state, inode, block_index)?; - let de = self.get_entry(state, inode, block_index, index)?; - let buf: &[u8] = match index.cmp(&(max_entries - 1)) { - Ordering::Less => { - let next_de = self.get_entry(state, inode, block_index, index + 1)?; - if next_de.e_nameoff as u64 >= block_size { - return Err(RafsError::InvalidImageData); - } - let len = next_de.e_nameoff.checked_sub(de.e_nameoff).ok_or_else(|| { - error!( - "nid {} entry index {} block index {} next dir entry {:?} current dir entry {:?}", - self.ino(), index, block_index, next_de, de - ); - RafsError::IllegalMetaStruct( - MetaType::Dir, - format!("cur {} next {}", next_de.e_nameoff, de.e_nameoff), - ) - })?; - - state - .map - .get_slice(offset + de.e_nameoff as usize, len as usize) - .map_err(|_e| RafsError::InvalidImageData)? - } - Ordering::Equal => { - let base = de.e_nameoff as u64; - if base >= block_size { - return Err(RafsError::InvalidImageData); - } - - // The possible maximum len of the last dirent's file name should be calculated - // differently depends on whether the dirent is at the last block of the dir file. - // Because the other blocks should be fully used, while the last may not. - let block_count = self.blocks_count() as usize; - let len = match block_count.cmp(&(block_index + 1)) { - Ordering::Greater => (block_size - base) as usize, - Ordering::Equal => { - if self.size() % block_size == 0 { - (block_size - base) as usize - } else { - (self.size() % block_size - base) as usize - } - } - Ordering::Less => return Err(RafsError::InvalidImageData), - }; - - let buf: &[u8] = state - .map - .get_slice(offset + base as usize, len) - .map_err(|_e| RafsError::InvalidImageData)?; - // Use this trick to temporarily decide entry name's length. Improve this? - let mut l: usize = 0; - for i in buf { - if *i != 0 { - l += 1; - if len == l { - break; - } - } else { - break; - } - } - &buf[..l] - } - Ordering::Greater => return Err(RafsError::InvalidImageData), - }; - - Ok(bytes_to_os_str(buf)) - } - - // COPIED from kernel code: - // erofs inode data layout (i_format in on-disk inode): - // 0 - inode plain without inline data A: inode, [xattrs], ... | ... | no-holed data - // 1 - inode VLE compression B (legacy): inode, [xattrs], extents ... | ... - // 2 - inode plain with inline data C: inode, [xattrs], last_inline_data, ... | ... | no-holed data - // 3 - inode compression D: inode, [xattrs], map_header, extents ... | ... - // 4 - inode chunk-based E: inode, [xattrs], chunk indexes ... | ... - // 5~7 - reserved - fn data_block_offset( - &self, - state: &Guard>, - inode: &dyn RafsV6OndiskInode, - index: usize, - ) -> RafsResult { - const VALID_MODE_BITS: u16 = ((1 << EROFS_I_DATALAYOUT_BITS) - 1) << EROFS_I_VERSION_BITS - | ((1 << EROFS_I_VERSION_BITS) - 1); - if inode.format() & !VALID_MODE_BITS != 0 || index > u32::MAX as usize { - return Err(RafsError::Incompatible(inode.format())); - } - - let layout = inode.format() >> EROFS_I_VERSION_BITS; - match layout { - EROFS_INODE_FLAT_PLAIN => Self::flat_data_block_offset(state, inode, index), - EROFS_INODE_FLAT_INLINE => match self.blocks_count().cmp(&(index as u64 + 1)) { - Ordering::Greater => Self::flat_data_block_offset(state, inode, index), - Ordering::Equal => { - Ok(self.offset as usize + Self::inode_size(inode) + Self::xattr_size(inode)) - } - Ordering::Less => Err(RafsError::InvalidImageData), - }, - _ => Err(RafsError::InvalidImageData), - } - } - - fn flat_data_block_offset( - state: &Guard>, - inode: &dyn RafsV6OndiskInode, - index: usize, - ) -> RafsResult { - // `i_u` points to the Nth block - let base = inode.union() as usize; - if base.checked_add(index).is_none() || base + index > u32::MAX as usize { - Err(RafsError::InvalidImageData) - } else { - Ok((base + index) * state.block_size() as usize) - } - } - - fn mode_format_bits(&self) -> u32 { - let state = self.state(); - let i = self.disk_inode(&state); - i.mode() as u32 & libc::S_IFMT as u32 - } - - #[allow(clippy::too_many_arguments)] - fn make_chunk_io( - &self, - state: &Guard>, - device: &BlobDevice, - chunk_addr: &RafsV6InodeChunkAddr, - content_offset: u32, - content_len: u32, - user_io: bool, - is_tarfs_mode: bool, - is_tail: bool, - ) -> Option { - let blob_index = match chunk_addr.blob_index() { - Err(e) => { - warn!( - "failed to get blob index for chunk address {:?}, {}", - chunk_addr, e - ); - return None; - } - Ok(v) => v, - }; - - match state.blob_table.get(blob_index) { - Err(e) => { - warn!( - "failed to get blob with index {} for chunk address {:?}, {}", - blob_index, chunk_addr, e - ); - None - } - Ok(blob) => { - if is_tarfs_mode { - let size = if is_tail { - (self.size() % self.chunk_size() as u64) as u32 - } else { - self.chunk_size() - }; - let chunk = TarfsChunkInfoV6::from_chunk_addr(chunk_addr, size).ok()?; - let chunk = Arc::new(chunk) as Arc; - Some(BlobIoDesc::new( - blob, - chunk.into(), - content_offset, - content_len, - user_io, - )) - } else { - let chunk_index = chunk_addr.blob_ci_index(); - device - .create_io_chunk(blob.blob_index(), chunk_index) - .map(|v| BlobIoDesc::new(blob, v, content_offset, content_len, user_io)) - } - } - } - } - - fn chunk_size(&self) -> u32 { - self.mapping.info.chunk_size - } - - fn inode_size(inode: &dyn RafsV6OndiskInode) -> usize { - if (inode.format() & 1 << EROFS_I_VERSION_BIT) != 0 { - size_of::() - } else { - size_of::() - } - } - - fn xattr_size(inode: &dyn RafsV6OndiskInode) -> usize { - // Rafs v6 only supports EROFS inline xattr. - if inode.xattr_inline_count() > 0 { - (inode.xattr_inline_count() as usize - 1) * size_of::() - + size_of::() - } else { - 0 - } - } - - // Get sum of inode and xattr size aligned to RafsV6InodeChunkAddr. - fn inode_xattr_size(inode: &dyn RafsV6OndiskInode) -> usize { - let sz = Self::inode_size(inode) as u64 + Self::xattr_size(inode) as u64; - round_up(sz, size_of::() as u64) as usize - } - - fn chunk_addresses<'a>( - &self, - state: &'a Guard>, - base_index: u32, - ) -> RafsResult<&'a [RafsV6InodeChunkAddr]> { - let total_chunks = div_round_up(self.size(), self.chunk_size() as u64); - if total_chunks > u32::MAX as u64 || total_chunks <= base_index as u64 { - return Err(RafsError::InvalidImageData); - } - - let inode = self.disk_inode(state); - assert_eq!( - inode.format() >> EROFS_I_VERSION_BITS, - EROFS_INODE_CHUNK_BASED - ); - - let base_index = base_index as usize; - let base = Self::inode_xattr_size(inode) + base_index * size_of::(); - if let Some(offset) = base.checked_add(self.offset) { - let count = total_chunks as usize - base_index; - state - .map - .get_slice(offset, count) - .map_err(|_e| RafsError::InvalidImageData) - } else { - Err(RafsError::InvalidImageData) - } - } - - fn find_target_block( - &self, - state: &Guard>, - inode: &dyn RafsV6OndiskInode, - name: &OsStr, - ) -> Result> { - if inode.size() == 0 || !self.is_dir() { - return Ok(None); - } - - let blocks_count = self.blocks_count(); - if blocks_count > u32::MAX as u64 { - return Err(einval!("v6: invalid block count in directory entry")); - } - - let mut first = 0; - let mut last = (blocks_count - 1) as i64; - while first <= last { - let pivot = first + ((last - first) >> 1); - let entries_count = self.get_entry_count(&state, inode, pivot as usize)?; - let h_name = self - .entry_name(state, inode, pivot as usize, 0, entries_count) - .map_err(err_invalidate_data)?; - let t_name = self - .entry_name( - state, - inode, - pivot as usize, - entries_count - 1, - entries_count, - ) - .map_err(err_invalidate_data)?; - if h_name <= name && t_name >= name { - return Ok(Some(pivot as usize)); - } else if h_name > name { - if pivot == 0 { - break; - } - last = pivot - 1; - } else { - first = pivot + 1; - } - } - - Ok(None) - } - - fn get_parent(&mut self) -> Result<()> { - assert!(self.is_dir()); - let parent = self.get_child_by_name(OsStr::new(".."))?; - self.parent_inode = Some(parent.ino()); - Ok(()) - } - - fn get_name(&mut self, state: &Guard>) -> Result<()> { - assert!(self.is_dir()); - let cur_ino = self.ino(); - if cur_ino == self.mapping.info.root_ino { - self.name = Some(OsString::from("")); - } else { - let parent = self.mapping.inode_wrapper(state, self.parent())?; - parent.walk_children_inodes( - 0, - &mut |_inode: Option>, name: OsString, ino, _offset| { - if cur_ino == ino { - self.name = Some(name); - return Ok(RafsInodeWalkAction::Break); - } - Ok(RafsInodeWalkAction::Continue) - }, - )?; - if self.name.is_none() { - return Err(einval!(format!( - "v6: failed to get parent for directory with inode 0x{:x}", - cur_ino - ))); - } - } - - Ok(()) - } - - fn get_entry_count( - &self, - state: &Guard>, - inode: &dyn RafsV6OndiskInode, - block_index: usize, - ) -> Result { - let head_entry = self - .get_entry(&state, inode, block_index, 0) - .map_err(err_invalidate_data)?; - let name_offset = head_entry.e_nameoff as usize; - if name_offset as u64 >= EROFS_BLOCK_SIZE_4096 - || name_offset % size_of::() != 0 - { - Err(enoent!(format!( - "v6: invalid e_nameoff {} from directory entry", - name_offset - ))) - } else { - Ok(name_offset / size_of::()) - } - } -} - -impl RafsInode for OndiskInodeWrapper { - fn validate(&self, _inode_count: u64, _chunk_size: u64) -> Result<()> { - let state = self.state(); - let inode = self.disk_inode(&state); - let max_inode = self.mapping.get_max_ino(); - - if self.ino() > max_inode - || self.offset > (u32::MAX as usize) * EROFS_BLOCK_SIZE_4096 as usize - || inode.nlink() == 0 - || self.get_name_size() as usize > (RAFS_MAX_NAME + 1) - { - return Err(ebadf!(format!( - "inode validation failure, inode {:?}", - inode - ))); - } - - if self.is_reg() { - if state.meta.is_chunk_dict() { - // chunk-dict doesn't support chunk_count check - return Err(std::io::Error::from_raw_os_error(libc::EOPNOTSUPP)); - } - let chunks = div_round_up(self.size(), self.chunk_size() as u64) as usize; - let chunk_size = chunks * size_of::(); - let size = OndiskInodeWrapper::inode_xattr_size(inode) - .checked_add(chunk_size) - .ok_or_else(|| einval!("v6: invalid inode size"))?; - state.map.validate_range(self.offset, size)?; - } else if self.is_dir() { - if self.get_child_count() as u64 >= max_inode { - return Err(einval!("invalid directory")); - } - let xattr_size = Self::xattr_size(inode) as usize; - let size = Self::inode_size(inode) + xattr_size; - state.map.validate_range(self.offset, size)?; - } else if self.is_symlink() && self.size() == 0 { - return Err(einval!("invalid symlink target")); - } - Ok(()) - } - - fn alloc_bio_vecs( - &self, - device: &BlobDevice, - offset: u64, - size: usize, - user_io: bool, - ) -> Result> { - let state = self.state(); - let chunk_size = self.chunk_size(); - let head_chunk_index = offset / chunk_size as u64; - if head_chunk_index > u32::MAX as u64 { - return Err(einval!( - "v6: invalid offset or chunk size when calculate chunk index" - )); - } - let mut vec: Vec = Vec::new(); - let chunks = self - .chunk_addresses(&state, head_chunk_index as u32) - .map_err(err_invalidate_data)?; - if chunks.is_empty() { - return Ok(vec); - } - - let mut curr_chunk_index = head_chunk_index as u32; - let tail_chunk_index = self.get_chunk_count() - 1; - let is_tarfs_mode = state.is_tarfs(); - let content_offset = (offset % chunk_size as u64) as u32; - let mut left = std::cmp::min(self.size() - offset, size as u64) as u32; - let mut content_len = std::cmp::min(chunk_size - content_offset, left); - let desc = self - .make_chunk_io( - &state, - device, - &chunks[0], - content_offset, - content_len, - user_io, - is_tarfs_mode, - curr_chunk_index == tail_chunk_index, - ) - .ok_or_else(|| einval!("failed to get chunk information"))?; - - let mut descs = BlobIoVec::new(desc.blob.clone()); - descs.push(desc); - left -= content_len; - if left != 0 { - // Handle the rest of chunks since they shares the same content length = 0. - for c in chunks.iter().skip(1) { - curr_chunk_index += 1; - content_len = std::cmp::min(chunk_size, left); - let desc = self - .make_chunk_io( - &state, - device, - c, - 0, - content_len, - user_io, - is_tarfs_mode, - curr_chunk_index == tail_chunk_index, - ) - .ok_or_else(|| einval!("failed to get chunk information"))?; - if desc.blob.blob_index() != descs.blob_index() { - vec.push(descs); - descs = BlobIoVec::new(desc.blob.clone()); - } - descs.push(desc); - left -= content_len; - if left == 0 { - break; - } - } - } - if !descs.is_empty() { - vec.push(descs) - } - assert_eq!(left, 0); - - Ok(vec) - } - - fn collect_descendants_inodes( - &self, - descendants: &mut Vec>, - ) -> Result { - if !self.is_dir() { - return Err(enotdir!()); - } - - let mut child_dirs: Vec> = Vec::new(); - let callback = &mut |inode: Option>, name: OsString, _ino, _offset| { - if let Some(child_inode) = inode { - if child_inode.is_dir() { - // EROFS packs dot and dotdot, so skip them two. - if name != "." && name != ".." { - child_dirs.push(child_inode); - } - } else if !child_inode.is_empty_size() && child_inode.is_reg() { - descendants.push(child_inode); - } - Ok(RafsInodeWalkAction::Continue) - } else { - Ok(RafsInodeWalkAction::Continue) - } - }; - - self.walk_children_inodes(0, callback)?; - for d in child_dirs { - d.collect_descendants_inodes(descendants)?; - } - - Ok(0) - } - - fn get_entry(&self) -> Entry { - Entry { - attr: self.get_attr().into(), - inode: self.ino(), - generation: 0, - attr_timeout: self.mapping.info.attr_timeout, - entry_timeout: self.mapping.info.entry_timeout, - ..Default::default() - } - } - - fn get_attr(&self) -> Attr { - let state = self.state(); - let inode = self.disk_inode(&state); - - Attr { - ino: self.ino(), - size: inode.size(), - mode: inode.mode() as u32, - nlink: inode.nlink(), - blocks: div_round_up(inode.size(), 512), - uid: inode.ugid().0, - gid: inode.ugid().1, - mtime: inode.mtime_s_ns().0, - mtimensec: inode.mtime_s_ns().1, - blksize: RAFS_ATTR_BLOCK_SIZE, - rdev: inode.rdev(), - ..Default::default() - } - } - - fn ino(&self) -> u64 { - assert!(self.offset > self.mapping.info.meta_offset); - (self.offset - self.mapping.info.meta_offset) as u64 / EROFS_INODE_SLOT_SIZE as u64 - } - - /// Get real device number of the inode. - fn rdev(&self) -> u32 { - let state = self.state(); - self.disk_inode(&state).union() - } - - /// Get project id associated with the inode. - fn projid(&self) -> u32 { - 0 - } - - #[inline] - fn is_blkdev(&self) -> bool { - self.mode_format_bits() == libc::S_IFBLK as u32 - } - - #[inline] - fn is_chrdev(&self) -> bool { - self.mode_format_bits() == libc::S_IFCHR as u32 - } - - #[inline] - fn is_sock(&self) -> bool { - self.mode_format_bits() == libc::S_IFSOCK as u32 - } - - #[inline] - fn is_fifo(&self) -> bool { - self.mode_format_bits() == libc::S_IFIFO as u32 - } - - fn is_dir(&self) -> bool { - self.mode_format_bits() == libc::S_IFDIR as u32 - } - - /// Check whether the inode is a symlink. - fn is_symlink(&self) -> bool { - self.mode_format_bits() == libc::S_IFLNK as u32 - } - - /// Check whether the inode is a regular file. - fn is_reg(&self) -> bool { - self.mode_format_bits() == libc::S_IFREG as u32 - } - - /// Check whether the inode is a hardlink. - fn is_hardlink(&self) -> bool { - let state = self.state(); - let inode = self.disk_inode(&state); - inode.nlink() > 1 && self.is_reg() - } - - /// Check whether the inode has extended attributes. - fn has_xattr(&self) -> bool { - let state = self.state(); - self.disk_inode(&state).xattr_inline_count() > 0 - } - - fn get_xattr(&self, name: &OsStr) -> Result> { - let state = self.state(); - let inode = self.disk_inode(&state); - let total = inode.xattr_inline_count(); - if total == 0 { - return Ok(None); - } - - let mut offset = - self.offset + Self::inode_size(inode) + size_of::(); - let mut remaining = (total - 1) as usize * size_of::(); - while remaining > 0 { - let e: &RafsV6XattrEntry = state.map.get_ref(offset)?; - if e.name_len() as usize + e.value_size() as usize > remaining { - return Err(einval!(format!( - "v6: invalid xattr name size {}", - e.name_len() - ))); - } - let mut xa_name = recover_namespace(e.name_index())?; - let suffix: &[u8] = state.map.get_slice( - offset + size_of::(), - e.name_len() as usize, - )?; - xa_name.push(OsStr::from_bytes(suffix)); - if xa_name == name { - let data: &[u8] = state.map.get_slice( - offset + size_of::() + e.name_len() as usize, - e.value_size() as usize, - )?; - return Ok(Some(data.to_vec())); - } - - let mut s = e.name_len() + e.value_size() + size_of::() as u32; - s = round_up(s as u64, size_of::() as u64) as u32; - if s as usize >= remaining { - break; - } - remaining -= s as usize; - offset += s as usize; - } - - Ok(None) - } - - fn get_xattrs(&self) -> Result> { - let state = self.state(); - let inode = self.disk_inode(&state); - let mut xattrs = Vec::new(); - let total = inode.xattr_inline_count(); - if total == 0 { - return Ok(xattrs); - } - - let mut offset = - self.offset + Self::inode_size(inode) + size_of::(); - let mut remaining = (total - 1) as usize * size_of::(); - while remaining > 0 { - let e: &RafsV6XattrEntry = state.map.get_ref(offset)?; - if e.name_len() as usize + e.value_size() as usize > remaining { - return Err(einval!(format!( - "v6: invalid xattr name size {}", - e.name_len() - ))); - } - let name: &[u8] = state.map.get_slice( - offset + size_of::(), - e.name_len() as usize, - )?; - let ns = recover_namespace(e.name_index())?; - let mut xa = ns.into_vec(); - xa.extend_from_slice(name); - xattrs.push(xa); - - let mut s = e.name_len() + e.value_size() + size_of::() as u32; - s = round_up(s as u64, size_of::() as u64) as u32; - if s as usize >= remaining { - break; - } - offset += s as usize; - remaining -= s as usize; - } - - Ok(xattrs) - } - - /// Get symlink target of the inode. - /// - /// # Safety - /// It depends on Self::validate() to ensure valid memory layout. - fn get_symlink(&self) -> Result { - let state = self.state(); - let inode = self.disk_inode(&state); - if inode.size() > EROFS_BLOCK_SIZE_4096 { - return Err(einval!(format!( - "v6: invalid symlink size {}", - inode.size() - ))); - } - let offset = self - .data_block_offset(&state, inode, 0) - .map_err(err_invalidate_data)?; - let buf: &[u8] = state.map.get_slice(offset, inode.size() as usize)?; - Ok(bytes_to_os_str(buf).to_os_string()) - } - - fn get_symlink_size(&self) -> u16 { - let state = self.state(); - let inode = self.disk_inode(&state); - inode.size() as u16 - } - - fn walk_children_inodes(&self, entry_offset: u64, handler: RafsInodeWalkHandler) -> Result<()> { - let state = self.state(); - let inode = self.disk_inode(&state); - if inode.size() == 0 { - return Err(enoent!()); - } - - let blocks_count = self.blocks_count(); - let mut cur_offset = entry_offset; - let mut skipped = entry_offset; - trace!( - "Total blocks count {} skipped {} current offset {} nid {} inode {:?}", - blocks_count, - skipped, - cur_offset, - self.ino(), - inode, - ); - - for i in 0..blocks_count as usize { - let entries_count = self.get_entry_count(&state, inode, i)?; - for j in 0..entries_count { - // Skip specified offset - if skipped != 0 { - skipped -= 1; - continue; - } - - let de = self - .get_entry(&state, inode, i, j) - .map_err(err_invalidate_data)?; - let name = self - .entry_name(&state, inode, i, j, entries_count) - .map_err(err_invalidate_data)?; - let nid = de.e_nid; - let inode = Arc::new(self.mapping.inode_wrapper_with_info( - &state, - nid, - self.ino(), - OsString::from(name), - )?) as Arc; - cur_offset += 1; - match handler(Some(inode), name.to_os_string(), nid, cur_offset) { - // Break returned by handler indicates that there is not enough buffer of readdir for entries inreaddir, - // such that it has to return. because this is a nested loop, - // using break can only jump out of the internal loop, there is no way to jump out of the whole loop. - Ok(RafsInodeWalkAction::Break) => return Ok(()), - Ok(RafsInodeWalkAction::Continue) => continue, - Err(e) => return Err(e), - }; - } - } - - Ok(()) - } - - /// Get the child with the specified name. - /// - /// # Safety - /// It depends on Self::validate() to ensure valid memory layout. - fn get_child_by_name(&self, name: &OsStr) -> Result> { - let state = self.state(); - let inode = self.disk_inode(&state); - if let Some(target_block) = self.find_target_block(&state, inode, name)? { - let entries_count = self.get_entry_count(&state, inode, target_block)?; - let mut first = 0; - let mut last = (entries_count - 1) as i64; - while first <= last { - let pivot = first + ((last - first) >> 1); - let de = self - .get_entry(&state, inode, target_block, pivot as usize) - .map_err(err_invalidate_data)?; - let d_name = self - .entry_name(&state, inode, target_block, pivot as usize, entries_count) - .map_err(err_invalidate_data)?; - match d_name.cmp(name) { - Ordering::Equal => { - let inode = self.mapping.inode_wrapper_with_info( - &state, - de.e_nid, - self.ino(), - OsString::from(name), - )?; - return Ok(Arc::new(inode)); - } - Ordering::Less => first = pivot + 1, - Ordering::Greater => last = pivot - 1, - } - } - } - Err(enoent!()) - } - - /// Get the child with the specified index. - /// - /// # Safety - /// It depends on Self::validate() to ensure valid memory layout. - /// `idx` is the number of child files in line. So we can keep the term `idx` - /// in super crate and keep it consistent with layout v5. - fn get_child_by_index(&self, idx: u32) -> Result> { - let state = self.state(); - let inode = self.disk_inode(&state); - if !self.is_dir() { - return Err(einval!("inode is not a directory")); - } - - let blocks_count = self.blocks_count(); - let mut cur_idx = 0u32; - for i in 0..blocks_count as usize { - let entries_count = self.get_entry_count(&state, inode, i)?; - for j in 0..entries_count { - let de = self - .get_entry(&state, inode, i, j) - .map_err(err_invalidate_data)?; - let name = self - .entry_name(&state, inode, i, j, entries_count) - .map_err(err_invalidate_data)?; - if name == "." || name == ".." { - continue; - } - if cur_idx == idx { - let inode = self.mapping.inode_wrapper_with_info( - &state, - de.e_nid, - self.ino(), - OsString::from(name), - )?; - return Ok(Arc::new(inode)); - } - cur_idx += 1; - } - } - - Err(enoent!("invalid child index")) - } - - fn get_child_count(&self) -> u32 { - // For regular file, return chunk info count. - if !self.is_dir() { - return div_round_up(self.size(), self.chunk_size() as u64) as u32; - } - - let mut child_cnt = 0; - let state = self.state(); - let inode = self.disk_inode(&state); - let blocks_count = self.blocks_count(); - for i in 0..blocks_count as usize { - let entries_count = self.get_entry_count(&state, inode, i).unwrap_or(0); - child_cnt += entries_count; - } - - if child_cnt >= 2 && child_cnt <= u32::MAX as usize { - // Skip DOT and DOTDOT - child_cnt as u32 - 2 - } else { - 0 - } - } - - fn get_child_index(&self) -> Result { - Ok(0) - } - - /// Get data size of the inode. - fn size(&self) -> u64 { - let state = self.state(); - let i = self.disk_inode(&state); - i.size() - } - - #[inline] - fn get_chunk_count(&self) -> u32 { - self.get_child_count() - } - - fn as_any(&self) -> &dyn Any { - self - } -} - -impl RafsInodeExt for OndiskInodeWrapper { - fn as_inode(&self) -> &dyn RafsInode { - self - } - - /// Get inode number of the parent directory. - fn parent(&self) -> u64 { - self.parent_inode.unwrap() - } - - /// Get name of the inode. - /// - /// # Safety - /// It depends on Self::validate() to ensure valid memory layout. - fn name(&self) -> OsString { - assert!(self.name.is_some()); - self.name.clone().unwrap_or_default() - } - - /// Get file name size of the inode. - fn get_name_size(&self) -> u16 { - self.name().len() as u16 - } - - // RafsV5 flags, not used by v6, return 0 - fn flags(&self) -> u64 { - 0 - } - - fn get_digest(&self) -> RafsDigest { - RafsDigest::default() - } - - /// Get chunk information with index `idx` - /// - /// # Safety - /// It depends on Self::validate() to ensure valid memory layout. - fn get_chunk_info(&self, idx: u32) -> Result> { - let state = self.state(); - let inode = self.disk_inode(&state); - if !self.is_reg() || idx >= self.get_chunk_count() { - return Err(enoent!("invalid chunk info")); - } - - let base = OndiskInodeWrapper::inode_xattr_size(inode) - + (idx as usize * size_of::()); - let offset = base - .checked_add(self.offset as usize) - .ok_or_else(|| einval!("v6: invalid offset or index to calculate chunk address"))?; - let chunk_addr = state.map.get_ref::(offset)?; - let has_device = self.mapping.device.lock().unwrap().has_device(); - - if state.meta.has_inlined_chunk_digest() && has_device { - let blob_index = chunk_addr.blob_index()?; - let chunk_index = chunk_addr.blob_ci_index(); - let device = self.mapping.device.lock().unwrap(); - device - .get_chunk_info(blob_index, chunk_index) - .ok_or_else(|| { - enoent!(format!( - "no chunk information object for blob {} chunk {}", - blob_index, chunk_index - )) - }) - } else if state.is_tarfs() { - let size = if idx == self.get_chunk_count() - 1 { - (self.size() % self.chunk_size() as u64) as u32 - } else { - self.chunk_size() - }; - let chunk_info = TarfsChunkInfoV6::from_chunk_addr(chunk_addr, size)?; - Ok(Arc::new(chunk_info)) - } else { - let mut chunk_map = self.mapping.info.chunk_map.lock().unwrap(); - if chunk_map.is_none() { - *chunk_map = Some(self.mapping.load_chunk_map()?); - } - match chunk_map.as_ref().unwrap().get(chunk_addr) { - None => Err(enoent!(format!( - "failed to get chunk info for chunk {}/{}/{}", - chunk_addr.blob_index().unwrap_or_default(), - chunk_addr.blob_ci_index(), - chunk_addr.block_addr() - ))), - Some(idx) => DirectChunkInfoV6::new(&state, self.mapping.clone(), *idx) - .map(|v| Arc::new(v) as Arc), - } - } - } -} - -/// Impl get accessor for chunkinfo object. -macro_rules! impl_chunkinfo_getter { - ($G: ident, $U: ty) => { - #[inline] - fn $G(&self) -> $U { - let state = self.state(); - - self.v5_chunk(&state).$G - } - }; -} - -/// RAFS v6 chunk information object. -pub(crate) struct DirectChunkInfoV6 { - mapping: DirectSuperBlockV6, - offset: usize, - digest: RafsDigest, -} - -// This is *direct* metadata mode in-memory chunk info object. -impl DirectChunkInfoV6 { - fn new(state: &DirectMappingState, mapping: DirectSuperBlockV6, idx: usize) -> Result { - let unit_size = size_of::(); - let offset = state.meta.chunk_table_offset as usize + idx * unit_size; - let chunk_tbl_end = state.meta.chunk_table_offset + state.meta.chunk_table_size; - if (offset as u64) < state.meta.chunk_table_offset - || (offset + unit_size) as u64 > chunk_tbl_end - { - return Err(einval!(format!( - "invalid chunk offset {} chunk table {} {}", - offset, state.meta.chunk_table_offset, state.meta.chunk_table_size - ))); - } - let chunk = state.map.get_ref::(offset)?; - Ok(Self { - mapping, - offset, - digest: chunk.block_id, - }) - } - - #[inline] - fn state(&self) -> Guard> { - self.mapping.state.load() - } - - /// Dereference the underlying OndiskChunkInfo object. - /// - /// # Safety - /// The OndiskChunkInfoWrapper could only be constructed from a valid OndiskChunkInfo pointer, - /// so it's safe to dereference the underlying OndiskChunkInfo object. - fn v5_chunk<'a>(&self, state: &'a DirectMappingState) -> &'a RafsV5ChunkInfo { - // Safe to unwrap() because we have validated the offset in DirectChunkInfoV6::new(). - state.map.get_ref::(self.offset).unwrap() - } -} - -impl BlobChunkInfo for DirectChunkInfoV6 { - fn chunk_id(&self) -> &RafsDigest { - &self.digest - } - - fn id(&self) -> u32 { - self.index() - } - - fn is_batch(&self) -> bool { - let state = self.state(); - self.v5_chunk(&state).flags.contains(BlobChunkFlags::BATCH) - } - - fn is_compressed(&self) -> bool { - let state = self.state(); - self.v5_chunk(&state) - .flags - .contains(BlobChunkFlags::COMPRESSED) - } - - fn is_encrypted(&self) -> bool { - let state = self.state(); - self.v5_chunk(&state) - .flags - .contains(BlobChunkFlags::ENCYPTED) - } - - fn as_any(&self) -> &dyn Any { - self - } - - impl_chunkinfo_getter!(blob_index, u32); - impl_chunkinfo_getter!(compressed_offset, u64); - impl_chunkinfo_getter!(compressed_size, u32); - impl_chunkinfo_getter!(uncompressed_offset, u64); - impl_chunkinfo_getter!(uncompressed_size, u32); -} - -impl BlobV5ChunkInfo for DirectChunkInfoV6 { - fn as_base(&self) -> &dyn BlobChunkInfo { - self - } - - impl_chunkinfo_getter!(index, u32); - impl_chunkinfo_getter!(file_offset, u64); - impl_chunkinfo_getter!(flags, BlobChunkFlags); -} - -/// Rafs v6 fake ChunkInfo for Tarfs. -pub(crate) struct TarfsChunkInfoV6 { - blob_index: u32, - chunk_index: u32, - offset: u64, - size: u32, -} - -impl TarfsChunkInfoV6 { - /// Create a new instance of [TarfsChunkInfoV6]. - pub fn new(blob_index: u32, chunk_index: u32, offset: u64, size: u32) -> Self { - TarfsChunkInfoV6 { - blob_index, - chunk_index, - offset, - size, - } - } - - fn from_chunk_addr(chunk_addr: &RafsV6InodeChunkAddr, size: u32) -> Result { - let blob_index = chunk_addr.blob_index()?; - let chunk_index = chunk_addr.blob_ci_index(); - let offset = (chunk_addr.block_addr() as u64) << EROFS_BLOCK_BITS_9; - Ok(TarfsChunkInfoV6::new(blob_index, chunk_index, offset, size)) - } -} - -const TARFS_DIGEST: RafsDigest = RafsDigest { data: [0u8; 32] }; - -impl BlobChunkInfo for TarfsChunkInfoV6 { - fn chunk_id(&self) -> &RafsDigest { - &TARFS_DIGEST - } - - fn id(&self) -> u32 { - self.chunk_index - } - - fn blob_index(&self) -> u32 { - self.blob_index - } - - fn compressed_offset(&self) -> u64 { - self.offset - } - - fn compressed_size(&self) -> u32 { - self.size - } - - fn uncompressed_offset(&self) -> u64 { - self.offset - } - - fn uncompressed_size(&self) -> u32 { - self.size - } - - fn is_batch(&self) -> bool { - false - } - - fn is_compressed(&self) -> bool { - false - } - - fn is_encrypted(&self) -> bool { - false - } - - fn as_any(&self) -> &dyn Any { - self - } -} - -impl BlobV5ChunkInfo for TarfsChunkInfoV6 { - fn index(&self) -> u32 { - self.chunk_index - } - - fn file_offset(&self) -> u64 { - 0 - } - - fn flags(&self) -> BlobChunkFlags { - BlobChunkFlags::empty() - } - - fn as_base(&self) -> &dyn BlobChunkInfo { - self - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_direct_mapping_state() { - let mut meta = RafsSuperMeta::default(); - meta.flags |= RafsSuperFlags::TARTFS_MODE; - let state = DirectMappingState::new(&meta); - assert!(state.is_tarfs()); - assert_eq!(state.block_size(), EROFS_BLOCK_SIZE_512); - - meta.flags &= !RafsSuperFlags::TARTFS_MODE; - let state = DirectMappingState::new(&meta); - assert!(!state.is_tarfs()); - assert_eq!(state.block_size(), EROFS_BLOCK_SIZE_4096); - } - - #[test] - fn test_tarfs_chunk_info_v6() { - let info1 = TarfsChunkInfoV6::new(0x0000_0001, 0x0000_0002, 0x0000_0004, 0x0000_0008); - let _info2 = TarfsChunkInfoV6::from_chunk_addr(&RafsV6InodeChunkAddr::default(), 0); - assert_eq!(info1.chunk_id().to_owned(), TARFS_DIGEST); - assert_eq!(info1.id(), 0x0000_0002); - assert_eq!(info1.blob_index(), 0x0000_0001); - assert_eq!(info1.compressed_offset(), 0x0000_0004); - assert_eq!(info1.uncompressed_offset(), 0x0000_0004); - assert_eq!(info1.compressed_size(), 0x0000_0008); - assert_eq!(info1.uncompressed_size(), 0x0000_0008); - assert!(!info1.is_compressed()); - assert!(!info1.is_encrypted()); - assert_eq!(info1.index(), 0x0000_0002); - assert_eq!(info1.file_offset(), 0x0000_0000); - assert_eq!(info1.flags(), BlobChunkFlags::empty()); - } -} +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +/// A bootstrap driver to directly use on disk bootstrap as runtime in-memory bootstrap. +/// +/// To reduce memory footprint and speed up filesystem initialization, the V5 on disk bootstrap +/// layout has been designed to support directly mapping as runtime bootstrap. So we don't need to +/// define another set of runtime data structures to cache on-disk bootstrap in memory. +/// +/// To support modification to the runtime bootstrap, several technologies have been adopted: +/// * - arc-swap is used to support RCU-like update instead of Mutex/RwLock. +/// * - `offset` instead of `pointer` is used to record data structure position. +/// * - reference count to the referenced resources/objects. +/// +/// # Security +/// The bootstrap file may be provided by untrusted parties, so we must ensure strong validations +/// before making use of any bootstrap, especially we are using them in memory-mapped mode. The +/// rule is to call validate() after creating any data structure from the on-disk bootstrap. +use std::any::Any; +use std::cmp::Ordering; +use std::collections::HashMap; +use std::ffi::{OsStr, OsString}; +use std::io::{Result, SeekFrom}; +use std::mem::size_of; +use std::os::unix::ffi::{OsStrExt, OsStringExt}; +use std::os::unix::io::AsRawFd; +use std::sync::{Arc, Mutex}; +use std::time::Duration; + +use arc_swap::{ArcSwap, Guard}; +use nydus_storage::device::{ + v5::BlobV5ChunkInfo, BlobChunkFlags, BlobChunkInfo, BlobDevice, BlobInfo, BlobIoDesc, BlobIoVec, +}; +use nydus_storage::utils::readahead; +use nydus_utils::filemap::{clone_file, FileMapState}; +use nydus_utils::{digest::RafsDigest, div_round_up, round_up}; + +use crate::metadata::layout::v5::RafsV5ChunkInfo; +use crate::metadata::layout::v6::{ + rafsv6_load_blob_extra_info, recover_namespace, RafsV6BlobTable, RafsV6Dirent, + RafsV6InodeChunkAddr, RafsV6InodeCompact, RafsV6InodeExtended, RafsV6OndiskInode, + RafsV6XattrEntry, RafsV6XattrIbodyHeader, EROFS_BLOCK_BITS_9, EROFS_BLOCK_SIZE_4096, + EROFS_BLOCK_SIZE_512, EROFS_INODE_CHUNK_BASED, EROFS_INODE_FLAT_INLINE, EROFS_INODE_FLAT_PLAIN, + EROFS_INODE_SLOT_SIZE, EROFS_I_DATALAYOUT_BITS, EROFS_I_VERSION_BIT, EROFS_I_VERSION_BITS, +}; +use crate::metadata::layout::{bytes_to_os_str, MetaRange, XattrName, XattrValue}; +use crate::metadata::{ + Attr, Entry, Inode, RafsBlobExtraInfo, RafsInode, RafsInodeWalkAction, RafsInodeWalkHandler, + RafsSuperBlock, RafsSuperFlags, RafsSuperInodes, RafsSuperMeta, RAFS_ATTR_BLOCK_SIZE, + RAFS_MAX_NAME, +}; +use crate::{MetaType, RafsError, RafsInodeExt, RafsIoReader, RafsResult}; + +fn err_invalidate_data(rafs_err: RafsError) -> std::io::Error { + std::io::Error::new(std::io::ErrorKind::InvalidData, rafs_err) +} + +/// The underlying struct to maintain memory mapped bootstrap for a file system. +/// +/// Only the DirectMappingState may store raw pointers. +/// Other data structures should not store raw pointers, instead they should hold a reference to +/// the DirectMappingState object and store an offset, so a `pointer` could be reconstruct by +/// `DirectMappingState.base + offset`. +struct DirectMappingState { + meta: Arc, + blob_table: RafsV6BlobTable, + blob_extra_infos: HashMap, + map: FileMapState, +} + +impl DirectMappingState { + fn new(meta: &RafsSuperMeta) -> Self { + DirectMappingState { + meta: Arc::new(*meta), + blob_table: RafsV6BlobTable::default(), + blob_extra_infos: HashMap::new(), + map: FileMapState::default(), + } + } + + fn is_tarfs(&self) -> bool { + self.meta.flags.contains(RafsSuperFlags::TARTFS_MODE) + } + + fn block_size(&self) -> u64 { + if self.is_tarfs() { + EROFS_BLOCK_SIZE_512 + } else { + EROFS_BLOCK_SIZE_4096 + } + } +} + +struct DirectCachedInfo { + meta_offset: usize, + root_ino: Inode, + chunk_size: u32, + chunk_map: Mutex>>, + attr_timeout: Duration, + entry_timeout: Duration, +} + +/// Direct-mapped Rafs v6 super block. +#[derive(Clone)] +pub struct DirectSuperBlockV6 { + info: Arc, + state: Arc>, + device: Arc>, +} + +impl DirectSuperBlockV6 { + /// Create a new instance of `DirectSuperBlockV6`. + pub fn new(meta: &RafsSuperMeta) -> Self { + let state = DirectMappingState::new(meta); + let block_size = state.block_size(); + let meta_offset = meta.meta_blkaddr as usize * block_size as usize; + let info = DirectCachedInfo { + meta_offset, + root_ino: meta.root_nid as Inode, + chunk_size: meta.chunk_size, + chunk_map: Mutex::new(None), + attr_timeout: meta.attr_timeout, + entry_timeout: meta.entry_timeout, + }; + + Self { + info: Arc::new(info), + state: Arc::new(ArcSwap::new(Arc::new(state))), + device: Arc::new(Mutex::new(BlobDevice::default())), + } + } + + fn disk_inode( + state: &Guard>, + offset: usize, + ) -> Result<&dyn RafsV6OndiskInode> { + let i: &RafsV6InodeCompact = state.map.get_ref(offset)?; + if i.format() & EROFS_I_VERSION_BITS == 0 { + Ok(i) + } else { + let i = state.map.get_ref::(offset)?; + Ok(i) + } + } + + fn inode_wrapper( + &self, + state: &Guard>, + nid: u64, + ) -> Result { + if nid >= (usize::MAX / EROFS_INODE_SLOT_SIZE) as u64 { + Err(einval!(format!("v6: inode number 0x{:x} is too big", nid))) + } else if let Some(offset) = self + .info + .meta_offset + .checked_add(nid as usize * EROFS_INODE_SLOT_SIZE) + { + OndiskInodeWrapper::new(state, self.clone(), offset) + } else { + Err(einval!(format!("v6: invalid inode number 0x{:x}", nid))) + } + } + + // For RafsV6, we can't get the parent info of a non-dir file with its on-disk inode, + // so we need to pass corresponding parent info when constructing the child inode. + fn inode_wrapper_with_info( + &self, + state: &Guard>, + nid: u64, + parent_inode: Inode, + name: OsString, + ) -> Result { + self.inode_wrapper(state, nid).map(|inode| { + let mut inode = inode; + inode.parent_inode = Some(parent_inode); + inode.name = Some(name); + inode + }) + } + + fn update_state(&self, r: &mut RafsIoReader) -> Result<()> { + // Validate file size + let file = clone_file(r.as_raw_fd())?; + let md = file.metadata()?; + let len = md.len(); + let md_range = MetaRange::new( + EROFS_BLOCK_SIZE_4096 as u64, + len - EROFS_BLOCK_SIZE_4096 as u64, + true, + )?; + + // Validate blob table layout as blob_table_start and blob_table_offset is read from bootstrap. + let old_state = self.state.load(); + let blob_table_size = old_state.meta.blob_table_size as u64; + let blob_table_start = old_state.meta.blob_table_offset; + let blob_table_range = MetaRange::new(blob_table_start, blob_table_size, false)?; + if !blob_table_range.is_subrange_of(&md_range) { + return Err(ebadf!("invalid blob table")); + } + + // Prefetch the bootstrap file + readahead(file.as_raw_fd(), 0, len); + + // Load extended blob table if the bootstrap including extended blob table. + let mut blob_table = RafsV6BlobTable::new(); + let meta = &old_state.meta; + r.seek(SeekFrom::Start(meta.blob_table_offset))?; + blob_table.load(r, meta.blob_table_size, meta.chunk_size, meta.flags)?; + let blob_extra_infos = rafsv6_load_blob_extra_info(meta, r)?; + + let file_map = FileMapState::new(file, 0, len as usize, false)?; + let state = DirectMappingState { + meta: old_state.meta.clone(), + blob_table, + blob_extra_infos, + map: file_map, + }; + + // Swap new and old DirectMappingState object, + // the old object will be destroyed when the reference count reaches zero. + self.state.store(Arc::new(state)); + + Ok(()) + } + + // For RafsV6, inode doesn't store detailed chunk info, only a simple RafsV6InodeChunkAddr + // so we need to use the chunk table at the end of the bootstrap to restore the chunk info of an inode + fn load_chunk_map(&self) -> Result> { + let mut chunk_map = HashMap::default(); + let state = self.state.load(); + let size = state.meta.chunk_table_size as usize; + if size == 0 { + return Ok(chunk_map); + } + + let block_size = state.block_size(); + let unit_size = size_of::(); + if size % unit_size != 0 { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } + + for idx in 0..(size / unit_size) { + let chunk = DirectChunkInfoV6::new(&state, self.clone(), idx)?; + let mut v6_chunk = RafsV6InodeChunkAddr::new(); + v6_chunk.set_blob_index(chunk.blob_index()); + v6_chunk.set_blob_ci_index(chunk.id()); + v6_chunk.set_block_addr((chunk.uncompressed_offset() / block_size) as u32); + chunk_map.insert(v6_chunk, idx); + } + + Ok(chunk_map) + } +} + +impl RafsSuperInodes for DirectSuperBlockV6 { + fn get_max_ino(&self) -> Inode { + let state = self.state.load(); + // The maximum inode number supported by RAFSv6 is smaller than limit of fuse-backend-rs. + (0xffff_ffffu64) * state.block_size() / EROFS_INODE_SLOT_SIZE as u64 + } + + /// Find inode offset by ino from inode table and mmap to OndiskInode. + fn get_inode(&self, ino: Inode, _validate_digest: bool) -> Result> { + let state = self.state.load(); + Ok(Arc::new(self.inode_wrapper(&state, ino)?)) + } + + fn get_extended_inode( + &self, + ino: Inode, + _validate_digest: bool, + ) -> Result> { + let state = self.state.load(); + if ino == state.meta.root_nid as u64 { + let inode = self.inode_wrapper_with_info(&state, ino, ino, OsString::from("/"))?; + return Ok(Arc::new(inode)); + } + let mut inode = self.inode_wrapper(&state, ino)?; + if inode.is_dir() { + inode.get_parent()?; + inode.get_name(&state)?; + return Ok(Arc::new(inode)); + } + Err(enoent!(format!( + "can't get extended inode for {}, root nid {} {:?}", + ino, state.meta.root_nid, inode.name + ))) + } +} + +impl RafsSuperBlock for DirectSuperBlockV6 { + fn load(&mut self, r: &mut RafsIoReader) -> Result<()> { + self.update_state(r) + } + + fn update(&self, r: &mut RafsIoReader) -> RafsResult<()> { + self.update_state(r).map_err(RafsError::SwapBackend) + } + + fn destroy(&mut self) { + let state = DirectMappingState::new(&RafsSuperMeta::default()); + self.state.store(Arc::new(state)); + } + + fn get_blob_infos(&self) -> Vec> { + self.state.load().blob_table.get_all() + } + + fn get_blob_extra_infos(&self) -> Result> { + Ok(self.state.load().blob_extra_infos.clone()) + } + + fn root_ino(&self) -> u64 { + self.info.root_ino + } + + fn get_chunk_info(&self, idx: usize) -> Result> { + let state = self.state.load(); + let chunk = DirectChunkInfoV6::new(&state, self.clone(), idx)?; + Ok(Arc::new(chunk)) + } + + fn set_blob_device(&self, blob_device: BlobDevice) { + *self.device.lock().unwrap() = blob_device; + } +} + +/// Direct-mapped RAFS v6 inode object. +pub struct OndiskInodeWrapper { + mapping: DirectSuperBlockV6, + offset: usize, + blocks_count: u64, + parent_inode: Option, + name: Option, +} + +impl OndiskInodeWrapper { + fn new( + state: &Guard>, + mapping: DirectSuperBlockV6, + offset: usize, + ) -> Result { + let inode = DirectSuperBlockV6::disk_inode(state, offset)?; + let blocks_count = div_round_up(inode.size(), state.block_size()); + + Ok(OndiskInodeWrapper { + mapping, + offset, + blocks_count, + parent_inode: None, + name: None, + }) + } + + fn state(&self) -> Guard> { + self.mapping.state.load() + } + + fn blocks_count(&self) -> u64 { + self.blocks_count + } + + fn disk_inode<'a>( + &self, + state: &'a Guard>, + ) -> &'a dyn RafsV6OndiskInode { + // Safe to unwrap() because `self.offset` has been validated in new(). + DirectSuperBlockV6::disk_inode(state, self.offset).unwrap() + } + + fn get_entry<'a>( + &self, + state: &'a Guard>, + inode: &dyn RafsV6OndiskInode, + block_index: usize, + index: usize, + ) -> RafsResult<&'a RafsV6Dirent> { + let offset = self.data_block_offset(state, inode, block_index)?; + if size_of::() * (index + 1) >= state.block_size() as usize { + Err(RafsError::InvalidImageData) + } else if let Some(offset) = offset.checked_add(size_of::() * index) { + state + .map + .get_ref(offset) + .map_err(|_e| RafsError::InvalidImageData) + } else { + Err(RafsError::InvalidImageData) + } + } + + // `max_entries` indicates the quantity of entries residing in a single block including tail packing. + // Both `block_index` and `index` start from 0. + fn entry_name<'a>( + &self, + state: &'a Guard>, + inode: &dyn RafsV6OndiskInode, + block_index: usize, + index: usize, + max_entries: usize, + ) -> RafsResult<&'a OsStr> { + assert!(max_entries > 0); + let block_size = state.block_size(); + let offset = self.data_block_offset(state, inode, block_index)?; + let de = self.get_entry(state, inode, block_index, index)?; + let buf: &[u8] = match index.cmp(&(max_entries - 1)) { + Ordering::Less => { + let next_de = self.get_entry(state, inode, block_index, index + 1)?; + if next_de.e_nameoff as u64 >= block_size { + return Err(RafsError::InvalidImageData); + } + let len = next_de.e_nameoff.checked_sub(de.e_nameoff).ok_or_else(|| { + error!( + "nid {} entry index {} block index {} next dir entry {:?} current dir entry {:?}", + self.ino(), index, block_index, next_de, de + ); + RafsError::IllegalMetaStruct( + MetaType::Dir, + format!("cur {} next {}", next_de.e_nameoff, de.e_nameoff), + ) + })?; + + state + .map + .get_slice(offset + de.e_nameoff as usize, len as usize) + .map_err(|_e| RafsError::InvalidImageData)? + } + Ordering::Equal => { + let base = de.e_nameoff as u64; + if base >= block_size { + return Err(RafsError::InvalidImageData); + } + + // The possible maximum len of the last dirent's file name should be calculated + // differently depends on whether the dirent is at the last block of the dir file. + // Because the other blocks should be fully used, while the last may not. + let block_count = self.blocks_count() as usize; + let len = match block_count.cmp(&(block_index + 1)) { + Ordering::Greater => (block_size - base) as usize, + Ordering::Equal => { + if self.size() % block_size == 0 { + (block_size - base) as usize + } else { + (self.size() % block_size - base) as usize + } + } + Ordering::Less => return Err(RafsError::InvalidImageData), + }; + + let buf: &[u8] = state + .map + .get_slice(offset + base as usize, len) + .map_err(|_e| RafsError::InvalidImageData)?; + // Use this trick to temporarily decide entry name's length. Improve this? + let mut l: usize = 0; + for i in buf { + if *i != 0 { + l += 1; + if len == l { + break; + } + } else { + break; + } + } + &buf[..l] + } + Ordering::Greater => return Err(RafsError::InvalidImageData), + }; + + Ok(bytes_to_os_str(buf)) + } + + // COPIED from kernel code: + // erofs inode data layout (i_format in on-disk inode): + // 0 - inode plain without inline data A: inode, [xattrs], ... | ... | no-holed data + // 1 - inode VLE compression B (legacy): inode, [xattrs], extents ... | ... + // 2 - inode plain with inline data C: inode, [xattrs], last_inline_data, ... | ... | no-holed data + // 3 - inode compression D: inode, [xattrs], map_header, extents ... | ... + // 4 - inode chunk-based E: inode, [xattrs], chunk indexes ... | ... + // 5~7 - reserved + fn data_block_offset( + &self, + state: &Guard>, + inode: &dyn RafsV6OndiskInode, + index: usize, + ) -> RafsResult { + const VALID_MODE_BITS: u16 = ((1 << EROFS_I_DATALAYOUT_BITS) - 1) << EROFS_I_VERSION_BITS + | ((1 << EROFS_I_VERSION_BITS) - 1); + if inode.format() & !VALID_MODE_BITS != 0 || index > u32::MAX as usize { + return Err(RafsError::Incompatible(inode.format())); + } + + let layout = inode.format() >> EROFS_I_VERSION_BITS; + match layout { + EROFS_INODE_FLAT_PLAIN => Self::flat_data_block_offset(state, inode, index), + EROFS_INODE_FLAT_INLINE => match self.blocks_count().cmp(&(index as u64 + 1)) { + Ordering::Greater => Self::flat_data_block_offset(state, inode, index), + Ordering::Equal => { + Ok(self.offset as usize + Self::inode_size(inode) + Self::xattr_size(inode)) + } + Ordering::Less => Err(RafsError::InvalidImageData), + }, + _ => Err(RafsError::InvalidImageData), + } + } + + fn flat_data_block_offset( + state: &Guard>, + inode: &dyn RafsV6OndiskInode, + index: usize, + ) -> RafsResult { + // `i_u` points to the Nth block + let base = inode.union() as usize; + if base.checked_add(index).is_none() || base + index > u32::MAX as usize { + Err(RafsError::InvalidImageData) + } else { + Ok((base + index) * state.block_size() as usize) + } + } + + fn mode_format_bits(&self) -> u32 { + let state = self.state(); + let i = self.disk_inode(&state); + i.mode() as u32 & libc::S_IFMT as u32 + } + + #[allow(clippy::too_many_arguments)] + fn make_chunk_io( + &self, + state: &Guard>, + device: &BlobDevice, + chunk_addr: &RafsV6InodeChunkAddr, + content_offset: u32, + content_len: u32, + user_io: bool, + is_tarfs_mode: bool, + is_tail: bool, + ) -> Option { + let blob_index = match chunk_addr.blob_index() { + Err(e) => { + warn!( + "failed to get blob index for chunk address {:?}, {}", + chunk_addr, e + ); + return None; + } + Ok(v) => v, + }; + + match state.blob_table.get(blob_index) { + Err(e) => { + warn!( + "failed to get blob with index {} for chunk address {:?}, {}", + blob_index, chunk_addr, e + ); + None + } + Ok(blob) => { + if is_tarfs_mode { + let size = if is_tail { + (self.size() % self.chunk_size() as u64) as u32 + } else { + self.chunk_size() + }; + let chunk = TarfsChunkInfoV6::from_chunk_addr(chunk_addr, size).ok()?; + let chunk = Arc::new(chunk) as Arc; + Some(BlobIoDesc::new( + blob, + chunk.into(), + content_offset, + content_len, + user_io, + )) + } else { + let chunk_index = chunk_addr.blob_ci_index(); + device + .create_io_chunk(blob.blob_index(), chunk_index) + .map(|v| BlobIoDesc::new(blob, v, content_offset, content_len, user_io)) + } + } + } + } + + fn chunk_size(&self) -> u32 { + self.mapping.info.chunk_size + } + + fn inode_size(inode: &dyn RafsV6OndiskInode) -> usize { + if (inode.format() & 1 << EROFS_I_VERSION_BIT) != 0 { + size_of::() + } else { + size_of::() + } + } + + fn xattr_size(inode: &dyn RafsV6OndiskInode) -> usize { + // Rafs v6 only supports EROFS inline xattr. + if inode.xattr_inline_count() > 0 { + (inode.xattr_inline_count() as usize - 1) * size_of::() + + size_of::() + } else { + 0 + } + } + + // Get sum of inode and xattr size aligned to RafsV6InodeChunkAddr. + fn inode_xattr_size(inode: &dyn RafsV6OndiskInode) -> usize { + let sz = Self::inode_size(inode) as u64 + Self::xattr_size(inode) as u64; + round_up(sz, size_of::() as u64) as usize + } + + fn chunk_addresses<'a>( + &self, + state: &'a Guard>, + base_index: u32, + ) -> RafsResult<&'a [RafsV6InodeChunkAddr]> { + let total_chunks = div_round_up(self.size(), self.chunk_size() as u64); + if total_chunks > u32::MAX as u64 || total_chunks <= base_index as u64 { + return Err(RafsError::InvalidImageData); + } + + let inode = self.disk_inode(state); + assert_eq!( + inode.format() >> EROFS_I_VERSION_BITS, + EROFS_INODE_CHUNK_BASED + ); + + let base_index = base_index as usize; + let base = Self::inode_xattr_size(inode) + base_index * size_of::(); + if let Some(offset) = base.checked_add(self.offset) { + let count = total_chunks as usize - base_index; + state + .map + .get_slice(offset, count) + .map_err(|_e| RafsError::InvalidImageData) + } else { + Err(RafsError::InvalidImageData) + } + } + + fn find_target_block( + &self, + state: &Guard>, + inode: &dyn RafsV6OndiskInode, + name: &OsStr, + ) -> Result> { + if inode.size() == 0 || !self.is_dir() { + return Ok(None); + } + + let blocks_count = self.blocks_count(); + if blocks_count > u32::MAX as u64 { + return Err(einval!("v6: invalid block count in directory entry")); + } + + let mut first = 0; + let mut last = (blocks_count - 1) as i64; + while first <= last { + let pivot = first + ((last - first) >> 1); + let entries_count = self.get_entry_count(&state, inode, pivot as usize)?; + let h_name = self + .entry_name(state, inode, pivot as usize, 0, entries_count) + .map_err(err_invalidate_data)?; + let t_name = self + .entry_name( + state, + inode, + pivot as usize, + entries_count - 1, + entries_count, + ) + .map_err(err_invalidate_data)?; + if h_name <= name && t_name >= name { + return Ok(Some(pivot as usize)); + } else if h_name > name { + if pivot == 0 { + break; + } + last = pivot - 1; + } else { + first = pivot + 1; + } + } + + Ok(None) + } + + fn get_parent(&mut self) -> Result<()> { + assert!(self.is_dir()); + let parent = self.get_child_by_name(OsStr::new(".."))?; + self.parent_inode = Some(parent.ino()); + Ok(()) + } + + fn get_name(&mut self, state: &Guard>) -> Result<()> { + assert!(self.is_dir()); + let cur_ino = self.ino(); + if cur_ino == self.mapping.info.root_ino { + self.name = Some(OsString::from("")); + } else { + let parent = self.mapping.inode_wrapper(state, self.parent())?; + parent.walk_children_inodes( + 0, + &mut |_inode: Option>, name: OsString, ino, _offset| { + if cur_ino == ino { + self.name = Some(name); + return Ok(RafsInodeWalkAction::Break); + } + Ok(RafsInodeWalkAction::Continue) + }, + )?; + if self.name.is_none() { + return Err(einval!(format!( + "v6: failed to get parent for directory with inode 0x{:x}", + cur_ino + ))); + } + } + + Ok(()) + } + + fn get_entry_count( + &self, + state: &Guard>, + inode: &dyn RafsV6OndiskInode, + block_index: usize, + ) -> Result { + let head_entry = self + .get_entry(&state, inode, block_index, 0) + .map_err(err_invalidate_data)?; + let name_offset = head_entry.e_nameoff as usize; + if name_offset as u64 >= EROFS_BLOCK_SIZE_4096 + || name_offset % size_of::() != 0 + { + Err(enoent!(format!( + "v6: invalid e_nameoff {} from directory entry", + name_offset + ))) + } else { + Ok(name_offset / size_of::()) + } + } +} + +impl RafsInode for OndiskInodeWrapper { + fn validate(&self, _inode_count: u64, _chunk_size: u64) -> Result<()> { + let state = self.state(); + let inode = self.disk_inode(&state); + let max_inode = self.mapping.get_max_ino(); + + if self.ino() > max_inode + || self.offset > (u32::MAX as usize) * EROFS_BLOCK_SIZE_4096 as usize + || inode.nlink() == 0 + || self.get_name_size() as usize > (RAFS_MAX_NAME + 1) + { + return Err(ebadf!(format!( + "inode validation failure, inode {:?}", + inode + ))); + } + + if self.is_reg() { + if state.meta.is_chunk_dict() { + // chunk-dict doesn't support chunk_count check + return Err(std::io::Error::from_raw_os_error(libc::EOPNOTSUPP)); + } + let chunks = div_round_up(self.size(), self.chunk_size() as u64) as usize; + let chunk_size = chunks * size_of::(); + let size = OndiskInodeWrapper::inode_xattr_size(inode) + .checked_add(chunk_size) + .ok_or_else(|| einval!("v6: invalid inode size"))?; + state.map.validate_range(self.offset, size)?; + } else if self.is_dir() { + if self.get_child_count() as u64 >= max_inode { + return Err(einval!("invalid directory")); + } + let xattr_size = Self::xattr_size(inode) as usize; + let size = Self::inode_size(inode) + xattr_size; + state.map.validate_range(self.offset, size)?; + } else if self.is_symlink() && self.size() == 0 { + return Err(einval!("invalid symlink target")); + } + Ok(()) + } + + fn alloc_bio_vecs( + &self, + device: &BlobDevice, + offset: u64, + size: usize, + user_io: bool, + ) -> Result> { + let state = self.state(); + let chunk_size = self.chunk_size(); + let head_chunk_index = offset / chunk_size as u64; + if head_chunk_index > u32::MAX as u64 { + return Err(einval!( + "v6: invalid offset or chunk size when calculate chunk index" + )); + } + let mut vec: Vec = Vec::new(); + let chunks = self + .chunk_addresses(&state, head_chunk_index as u32) + .map_err(err_invalidate_data)?; + if chunks.is_empty() { + return Ok(vec); + } + + let mut curr_chunk_index = head_chunk_index as u32; + let tail_chunk_index = self.get_chunk_count() - 1; + let is_tarfs_mode = state.is_tarfs(); + let content_offset = (offset % chunk_size as u64) as u32; + let mut left = std::cmp::min(self.size() - offset, size as u64) as u32; + let mut content_len = std::cmp::min(chunk_size - content_offset, left); + let desc = self + .make_chunk_io( + &state, + device, + &chunks[0], + content_offset, + content_len, + user_io, + is_tarfs_mode, + curr_chunk_index == tail_chunk_index, + ) + .ok_or_else(|| einval!("failed to get chunk information"))?; + + let mut descs = BlobIoVec::new(desc.blob.clone()); + descs.push(desc); + left -= content_len; + if left != 0 { + // Handle the rest of chunks since they shares the same content length = 0. + for c in chunks.iter().skip(1) { + curr_chunk_index += 1; + content_len = std::cmp::min(chunk_size, left); + let desc = self + .make_chunk_io( + &state, + device, + c, + 0, + content_len, + user_io, + is_tarfs_mode, + curr_chunk_index == tail_chunk_index, + ) + .ok_or_else(|| einval!("failed to get chunk information"))?; + if desc.blob.blob_index() != descs.blob_index() { + vec.push(descs); + descs = BlobIoVec::new(desc.blob.clone()); + } + descs.push(desc); + left -= content_len; + if left == 0 { + break; + } + } + } + if !descs.is_empty() { + vec.push(descs) + } + assert_eq!(left, 0); + + Ok(vec) + } + + fn collect_descendants_inodes( + &self, + descendants: &mut Vec>, + ) -> Result { + if !self.is_dir() { + return Err(enotdir!()); + } + + let mut child_dirs: Vec> = Vec::new(); + let callback = &mut |inode: Option>, name: OsString, _ino, _offset| { + if let Some(child_inode) = inode { + if child_inode.is_dir() { + // EROFS packs dot and dotdot, so skip them two. + if name != "." && name != ".." { + child_dirs.push(child_inode); + } + } else if !child_inode.is_empty_size() && child_inode.is_reg() { + descendants.push(child_inode); + } + Ok(RafsInodeWalkAction::Continue) + } else { + Ok(RafsInodeWalkAction::Continue) + } + }; + + self.walk_children_inodes(0, callback)?; + for d in child_dirs { + d.collect_descendants_inodes(descendants)?; + } + + Ok(0) + } + + fn get_entry(&self) -> Entry { + Entry { + attr: self.get_attr().into(), + inode: self.ino(), + generation: 0, + attr_timeout: self.mapping.info.attr_timeout, + entry_timeout: self.mapping.info.entry_timeout, + ..Default::default() + } + } + + fn get_attr(&self) -> Attr { + let state = self.state(); + let inode = self.disk_inode(&state); + + Attr { + ino: self.ino(), + size: inode.size(), + mode: inode.mode() as u32, + nlink: inode.nlink(), + blocks: div_round_up(inode.size(), 512), + uid: inode.ugid().0, + gid: inode.ugid().1, + mtime: inode.mtime_s_ns().0, + mtimensec: inode.mtime_s_ns().1, + blksize: RAFS_ATTR_BLOCK_SIZE, + rdev: inode.rdev(), + ..Default::default() + } + } + + fn ino(&self) -> u64 { + assert!(self.offset > self.mapping.info.meta_offset); + (self.offset - self.mapping.info.meta_offset) as u64 / EROFS_INODE_SLOT_SIZE as u64 + } + + /// Get real device number of the inode. + fn rdev(&self) -> u32 { + let state = self.state(); + self.disk_inode(&state).union() + } + + /// Get project id associated with the inode. + fn projid(&self) -> u32 { + 0 + } + + #[inline] + fn is_blkdev(&self) -> bool { + self.mode_format_bits() == libc::S_IFBLK as u32 + } + + #[inline] + fn is_chrdev(&self) -> bool { + self.mode_format_bits() == libc::S_IFCHR as u32 + } + + #[inline] + fn is_sock(&self) -> bool { + self.mode_format_bits() == libc::S_IFSOCK as u32 + } + + #[inline] + fn is_fifo(&self) -> bool { + self.mode_format_bits() == libc::S_IFIFO as u32 + } + + fn is_dir(&self) -> bool { + self.mode_format_bits() == libc::S_IFDIR as u32 + } + + /// Check whether the inode is a symlink. + fn is_symlink(&self) -> bool { + self.mode_format_bits() == libc::S_IFLNK as u32 + } + + /// Check whether the inode is a regular file. + fn is_reg(&self) -> bool { + self.mode_format_bits() == libc::S_IFREG as u32 + } + + /// Check whether the inode is a hardlink. + fn is_hardlink(&self) -> bool { + let state = self.state(); + let inode = self.disk_inode(&state); + inode.nlink() > 1 && self.is_reg() + } + + /// Check whether the inode has extended attributes. + fn has_xattr(&self) -> bool { + let state = self.state(); + self.disk_inode(&state).xattr_inline_count() > 0 + } + + fn get_xattr(&self, name: &OsStr) -> Result> { + let state = self.state(); + let inode = self.disk_inode(&state); + let total = inode.xattr_inline_count(); + if total == 0 { + return Ok(None); + } + + let mut offset = + self.offset + Self::inode_size(inode) + size_of::(); + let mut remaining = (total - 1) as usize * size_of::(); + while remaining > 0 { + let e: &RafsV6XattrEntry = state.map.get_ref(offset)?; + if e.name_len() as usize + e.value_size() as usize > remaining { + return Err(einval!(format!( + "v6: invalid xattr name size {}", + e.name_len() + ))); + } + let mut xa_name = recover_namespace(e.name_index())?; + let suffix: &[u8] = state.map.get_slice( + offset + size_of::(), + e.name_len() as usize, + )?; + xa_name.push(OsStr::from_bytes(suffix)); + if xa_name == name { + let data: &[u8] = state.map.get_slice( + offset + size_of::() + e.name_len() as usize, + e.value_size() as usize, + )?; + return Ok(Some(data.to_vec())); + } + + let mut s = e.name_len() + e.value_size() + size_of::() as u32; + s = round_up(s as u64, size_of::() as u64) as u32; + if s as usize >= remaining { + break; + } + remaining -= s as usize; + offset += s as usize; + } + + Ok(None) + } + + fn get_xattrs(&self) -> Result> { + let state = self.state(); + let inode = self.disk_inode(&state); + let mut xattrs = Vec::new(); + let total = inode.xattr_inline_count(); + if total == 0 { + return Ok(xattrs); + } + + let mut offset = + self.offset + Self::inode_size(inode) + size_of::(); + let mut remaining = (total - 1) as usize * size_of::(); + while remaining > 0 { + let e: &RafsV6XattrEntry = state.map.get_ref(offset)?; + if e.name_len() as usize + e.value_size() as usize > remaining { + return Err(einval!(format!( + "v6: invalid xattr name size {}", + e.name_len() + ))); + } + let name: &[u8] = state.map.get_slice( + offset + size_of::(), + e.name_len() as usize, + )?; + let ns = recover_namespace(e.name_index())?; + let mut xa = ns.into_vec(); + xa.extend_from_slice(name); + xattrs.push(xa); + + let mut s = e.name_len() + e.value_size() + size_of::() as u32; + s = round_up(s as u64, size_of::() as u64) as u32; + if s as usize >= remaining { + break; + } + offset += s as usize; + remaining -= s as usize; + } + + Ok(xattrs) + } + + /// Get symlink target of the inode. + /// + /// # Safety + /// It depends on Self::validate() to ensure valid memory layout. + fn get_symlink(&self) -> Result { + let state = self.state(); + let inode = self.disk_inode(&state); + if inode.size() > EROFS_BLOCK_SIZE_4096 { + return Err(einval!(format!( + "v6: invalid symlink size {}", + inode.size() + ))); + } + let offset = self + .data_block_offset(&state, inode, 0) + .map_err(err_invalidate_data)?; + let buf: &[u8] = state.map.get_slice(offset, inode.size() as usize)?; + Ok(bytes_to_os_str(buf).to_os_string()) + } + + fn get_symlink_size(&self) -> u16 { + let state = self.state(); + let inode = self.disk_inode(&state); + inode.size() as u16 + } + + fn walk_children_inodes(&self, entry_offset: u64, handler: RafsInodeWalkHandler) -> Result<()> { + let state = self.state(); + let inode = self.disk_inode(&state); + if inode.size() == 0 { + return Err(enoent!()); + } + + let blocks_count = self.blocks_count(); + let mut cur_offset = entry_offset; + let mut skipped = entry_offset; + trace!( + "Total blocks count {} skipped {} current offset {} nid {} inode {:?}", + blocks_count, + skipped, + cur_offset, + self.ino(), + inode, + ); + + for i in 0..blocks_count as usize { + let entries_count = self.get_entry_count(&state, inode, i)?; + for j in 0..entries_count { + // Skip specified offset + if skipped != 0 { + skipped -= 1; + continue; + } + + let de = self + .get_entry(&state, inode, i, j) + .map_err(err_invalidate_data)?; + let name = self + .entry_name(&state, inode, i, j, entries_count) + .map_err(err_invalidate_data)?; + let nid = de.e_nid; + let inode = Arc::new(self.mapping.inode_wrapper_with_info( + &state, + nid, + self.ino(), + OsString::from(name), + )?) as Arc; + cur_offset += 1; + match handler(Some(inode), name.to_os_string(), nid, cur_offset) { + // Break returned by handler indicates that there is not enough buffer of readdir for entries inreaddir, + // such that it has to return. because this is a nested loop, + // using break can only jump out of the internal loop, there is no way to jump out of the whole loop. + Ok(RafsInodeWalkAction::Break) => return Ok(()), + Ok(RafsInodeWalkAction::Continue) => continue, + Err(e) => return Err(e), + }; + } + } + + Ok(()) + } + + /// Get the child with the specified name. + /// + /// # Safety + /// It depends on Self::validate() to ensure valid memory layout. + fn get_child_by_name(&self, name: &OsStr) -> Result> { + let state = self.state(); + let inode = self.disk_inode(&state); + if let Some(target_block) = self.find_target_block(&state, inode, name)? { + let entries_count = self.get_entry_count(&state, inode, target_block)?; + let mut first = 0; + let mut last = (entries_count - 1) as i64; + while first <= last { + let pivot = first + ((last - first) >> 1); + let de = self + .get_entry(&state, inode, target_block, pivot as usize) + .map_err(err_invalidate_data)?; + let d_name = self + .entry_name(&state, inode, target_block, pivot as usize, entries_count) + .map_err(err_invalidate_data)?; + match d_name.cmp(name) { + Ordering::Equal => { + let inode = self.mapping.inode_wrapper_with_info( + &state, + de.e_nid, + self.ino(), + OsString::from(name), + )?; + return Ok(Arc::new(inode)); + } + Ordering::Less => first = pivot + 1, + Ordering::Greater => last = pivot - 1, + } + } + } + Err(enoent!()) + } + + /// Get the child with the specified index. + /// + /// # Safety + /// It depends on Self::validate() to ensure valid memory layout. + /// `idx` is the number of child files in line. So we can keep the term `idx` + /// in super crate and keep it consistent with layout v5. + fn get_child_by_index(&self, idx: u32) -> Result> { + let state = self.state(); + let inode = self.disk_inode(&state); + if !self.is_dir() { + return Err(einval!("inode is not a directory")); + } + + let blocks_count = self.blocks_count(); + let mut cur_idx = 0u32; + for i in 0..blocks_count as usize { + let entries_count = self.get_entry_count(&state, inode, i)?; + for j in 0..entries_count { + let de = self + .get_entry(&state, inode, i, j) + .map_err(err_invalidate_data)?; + let name = self + .entry_name(&state, inode, i, j, entries_count) + .map_err(err_invalidate_data)?; + if name == "." || name == ".." { + continue; + } + if cur_idx == idx { + let inode = self.mapping.inode_wrapper_with_info( + &state, + de.e_nid, + self.ino(), + OsString::from(name), + )?; + return Ok(Arc::new(inode)); + } + cur_idx += 1; + } + } + + Err(enoent!("invalid child index")) + } + + fn get_child_count(&self) -> u32 { + // For regular file, return chunk info count. + if !self.is_dir() { + return div_round_up(self.size(), self.chunk_size() as u64) as u32; + } + + let mut child_cnt = 0; + let state = self.state(); + let inode = self.disk_inode(&state); + let blocks_count = self.blocks_count(); + for i in 0..blocks_count as usize { + let entries_count = self.get_entry_count(&state, inode, i).unwrap_or(0); + child_cnt += entries_count; + } + + if child_cnt >= 2 && child_cnt <= u32::MAX as usize { + // Skip DOT and DOTDOT + child_cnt as u32 - 2 + } else { + 0 + } + } + + fn get_child_index(&self) -> Result { + Ok(0) + } + + /// Get data size of the inode. + fn size(&self) -> u64 { + let state = self.state(); + let i = self.disk_inode(&state); + i.size() + } + + #[inline] + fn get_chunk_count(&self) -> u32 { + self.get_child_count() + } + + fn as_any(&self) -> &dyn Any { + self + } +} + +impl RafsInodeExt for OndiskInodeWrapper { + fn as_inode(&self) -> &dyn RafsInode { + self + } + + /// Get inode number of the parent directory. + fn parent(&self) -> u64 { + self.parent_inode.unwrap() + } + + /// Get name of the inode. + /// + /// # Safety + /// It depends on Self::validate() to ensure valid memory layout. + fn name(&self) -> OsString { + assert!(self.name.is_some()); + self.name.clone().unwrap_or_default() + } + + /// Get file name size of the inode. + fn get_name_size(&self) -> u16 { + self.name().len() as u16 + } + + // RafsV5 flags, not used by v6, return 0 + fn flags(&self) -> u64 { + 0 + } + + fn get_digest(&self) -> RafsDigest { + RafsDigest::default() + } + + /// Get chunk information with index `idx` + /// + /// # Safety + /// It depends on Self::validate() to ensure valid memory layout. + fn get_chunk_info(&self, idx: u32) -> Result> { + let state = self.state(); + let inode = self.disk_inode(&state); + if !self.is_reg() || idx >= self.get_chunk_count() { + return Err(enoent!("invalid chunk info")); + } + + let base = OndiskInodeWrapper::inode_xattr_size(inode) + + (idx as usize * size_of::()); + let offset = base + .checked_add(self.offset as usize) + .ok_or_else(|| einval!("v6: invalid offset or index to calculate chunk address"))?; + let chunk_addr = state.map.get_ref::(offset)?; + let has_device = self.mapping.device.lock().unwrap().has_device(); + + if state.meta.has_inlined_chunk_digest() && has_device { + let blob_index = chunk_addr.blob_index()?; + let chunk_index = chunk_addr.blob_ci_index(); + let device = self.mapping.device.lock().unwrap(); + device + .get_chunk_info(blob_index, chunk_index) + .ok_or_else(|| { + enoent!(format!( + "no chunk information object for blob {} chunk {}", + blob_index, chunk_index + )) + }) + } else if state.is_tarfs() { + let size = if idx == self.get_chunk_count() - 1 { + (self.size() % self.chunk_size() as u64) as u32 + } else { + self.chunk_size() + }; + let chunk_info = TarfsChunkInfoV6::from_chunk_addr(chunk_addr, size)?; + Ok(Arc::new(chunk_info)) + } else { + let mut chunk_map = self.mapping.info.chunk_map.lock().unwrap(); + if chunk_map.is_none() { + *chunk_map = Some(self.mapping.load_chunk_map()?); + } + match chunk_map.as_ref().unwrap().get(chunk_addr) { + None => Err(enoent!(format!( + "failed to get chunk info for chunk {}/{}/{}", + chunk_addr.blob_index().unwrap_or_default(), + chunk_addr.blob_ci_index(), + chunk_addr.block_addr() + ))), + Some(idx) => DirectChunkInfoV6::new(&state, self.mapping.clone(), *idx) + .map(|v| Arc::new(v) as Arc), + } + } + } +} + +/// Impl get accessor for chunkinfo object. +macro_rules! impl_chunkinfo_getter { + ($G: ident, $U: ty) => { + #[inline] + fn $G(&self) -> $U { + let state = self.state(); + + self.v5_chunk(&state).$G + } + }; +} + +/// RAFS v6 chunk information object. +pub(crate) struct DirectChunkInfoV6 { + mapping: DirectSuperBlockV6, + offset: usize, + digest: RafsDigest, +} + +// This is *direct* metadata mode in-memory chunk info object. +impl DirectChunkInfoV6 { + fn new(state: &DirectMappingState, mapping: DirectSuperBlockV6, idx: usize) -> Result { + let unit_size = size_of::(); + let offset = state.meta.chunk_table_offset as usize + idx * unit_size; + let chunk_tbl_end = state.meta.chunk_table_offset + state.meta.chunk_table_size; + if (offset as u64) < state.meta.chunk_table_offset + || (offset + unit_size) as u64 > chunk_tbl_end + { + return Err(einval!(format!( + "invalid chunk offset {} chunk table {} {}", + offset, state.meta.chunk_table_offset, state.meta.chunk_table_size + ))); + } + let chunk = state.map.get_ref::(offset)?; + Ok(Self { + mapping, + offset, + digest: chunk.block_id, + }) + } + + #[inline] + fn state(&self) -> Guard> { + self.mapping.state.load() + } + + /// Dereference the underlying OndiskChunkInfo object. + /// + /// # Safety + /// The OndiskChunkInfoWrapper could only be constructed from a valid OndiskChunkInfo pointer, + /// so it's safe to dereference the underlying OndiskChunkInfo object. + fn v5_chunk<'a>(&self, state: &'a DirectMappingState) -> &'a RafsV5ChunkInfo { + // Safe to unwrap() because we have validated the offset in DirectChunkInfoV6::new(). + state.map.get_ref::(self.offset).unwrap() + } +} + +impl BlobChunkInfo for DirectChunkInfoV6 { + fn chunk_id(&self) -> &RafsDigest { + &self.digest + } + + fn id(&self) -> u32 { + self.index() + } + + fn is_batch(&self) -> bool { + let state = self.state(); + self.v5_chunk(&state).flags.contains(BlobChunkFlags::BATCH) + } + + fn is_compressed(&self) -> bool { + let state = self.state(); + self.v5_chunk(&state) + .flags + .contains(BlobChunkFlags::COMPRESSED) + } + + fn is_encrypted(&self) -> bool { + let state = self.state(); + self.v5_chunk(&state) + .flags + .contains(BlobChunkFlags::ENCYPTED) + } + + fn as_any(&self) -> &dyn Any { + self + } + + impl_chunkinfo_getter!(blob_index, u32); + impl_chunkinfo_getter!(compressed_offset, u64); + impl_chunkinfo_getter!(compressed_size, u32); + impl_chunkinfo_getter!(uncompressed_offset, u64); + impl_chunkinfo_getter!(uncompressed_size, u32); +} + +impl BlobV5ChunkInfo for DirectChunkInfoV6 { + fn as_base(&self) -> &dyn BlobChunkInfo { + self + } + + impl_chunkinfo_getter!(index, u32); + impl_chunkinfo_getter!(file_offset, u64); + impl_chunkinfo_getter!(flags, BlobChunkFlags); +} + +/// Rafs v6 fake ChunkInfo for Tarfs. +pub(crate) struct TarfsChunkInfoV6 { + blob_index: u32, + chunk_index: u32, + offset: u64, + size: u32, +} + +impl TarfsChunkInfoV6 { + /// Create a new instance of [TarfsChunkInfoV6]. + pub fn new(blob_index: u32, chunk_index: u32, offset: u64, size: u32) -> Self { + TarfsChunkInfoV6 { + blob_index, + chunk_index, + offset, + size, + } + } + + fn from_chunk_addr(chunk_addr: &RafsV6InodeChunkAddr, size: u32) -> Result { + let blob_index = chunk_addr.blob_index()?; + let chunk_index = chunk_addr.blob_ci_index(); + let offset = (chunk_addr.block_addr() as u64) << EROFS_BLOCK_BITS_9; + Ok(TarfsChunkInfoV6::new(blob_index, chunk_index, offset, size)) + } +} + +const TARFS_DIGEST: RafsDigest = RafsDigest { data: [0u8; 32] }; + +impl BlobChunkInfo for TarfsChunkInfoV6 { + fn chunk_id(&self) -> &RafsDigest { + &TARFS_DIGEST + } + + fn id(&self) -> u32 { + self.chunk_index + } + + fn blob_index(&self) -> u32 { + self.blob_index + } + + fn compressed_offset(&self) -> u64 { + self.offset + } + + fn compressed_size(&self) -> u32 { + self.size + } + + fn uncompressed_offset(&self) -> u64 { + self.offset + } + + fn uncompressed_size(&self) -> u32 { + self.size + } + + fn is_batch(&self) -> bool { + false + } + + fn is_compressed(&self) -> bool { + false + } + + fn is_encrypted(&self) -> bool { + false + } + + fn as_any(&self) -> &dyn Any { + self + } +} + +impl BlobV5ChunkInfo for TarfsChunkInfoV6 { + fn index(&self) -> u32 { + self.chunk_index + } + + fn file_offset(&self) -> u64 { + 0 + } + + fn flags(&self) -> BlobChunkFlags { + BlobChunkFlags::empty() + } + + fn as_base(&self) -> &dyn BlobChunkInfo { + self + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_direct_mapping_state() { + let mut meta = RafsSuperMeta::default(); + meta.flags |= RafsSuperFlags::TARTFS_MODE; + let state = DirectMappingState::new(&meta); + assert!(state.is_tarfs()); + assert_eq!(state.block_size(), EROFS_BLOCK_SIZE_512); + + meta.flags &= !RafsSuperFlags::TARTFS_MODE; + let state = DirectMappingState::new(&meta); + assert!(!state.is_tarfs()); + assert_eq!(state.block_size(), EROFS_BLOCK_SIZE_4096); + } + + #[test] + fn test_tarfs_chunk_info_v6() { + let info1 = TarfsChunkInfoV6::new(0x0000_0001, 0x0000_0002, 0x0000_0004, 0x0000_0008); + let _info2 = TarfsChunkInfoV6::from_chunk_addr(&RafsV6InodeChunkAddr::default(), 0); + assert_eq!(info1.chunk_id().to_owned(), TARFS_DIGEST); + assert_eq!(info1.id(), 0x0000_0002); + assert_eq!(info1.blob_index(), 0x0000_0001); + assert_eq!(info1.compressed_offset(), 0x0000_0004); + assert_eq!(info1.uncompressed_offset(), 0x0000_0004); + assert_eq!(info1.compressed_size(), 0x0000_0008); + assert_eq!(info1.uncompressed_size(), 0x0000_0008); + assert!(!info1.is_compressed()); + assert!(!info1.is_encrypted()); + assert_eq!(info1.index(), 0x0000_0002); + assert_eq!(info1.file_offset(), 0x0000_0000); + assert_eq!(info1.flags(), BlobChunkFlags::empty()); + } +} diff --git a/rafs/src/metadata/inode.rs b/rafs/src/metadata/inode.rs index 938b058ae6d..77502f92881 100644 --- a/rafs/src/metadata/inode.rs +++ b/rafs/src/metadata/inode.rs @@ -1,1126 +1,1126 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2021-2022 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::fmt::{Debug, Formatter}; -use std::mem::size_of; -use std::ops::Deref; -use std::sync::Arc; - -use nydus_utils::digest::RafsDigest; - -use crate::metadata::cached_v5::CachedInodeV5; -use crate::metadata::chunk::ChunkWrapper; -use crate::metadata::direct_v5::OndiskInodeWrapper as OndiskInodeWrapperV5; -use crate::metadata::direct_v6::OndiskInodeWrapper as OndiskInodeWrapperV6; -use crate::metadata::layout::v5::{RafsV5ChunkInfo, RafsV5Inode}; -use crate::metadata::layout::v6::{RafsV6InodeCompact, RafsV6InodeExtended}; -use crate::metadata::layout::RafsXAttrs; -use crate::metadata::{Inode, RafsVersion}; -use crate::RafsInodeExt; - -/// An inode object wrapper for different RAFS versions. -#[derive(Clone)] -pub enum InodeWrapper { - /// Inode info structure for RAFS v5. - V5(RafsV5Inode), - /// Inode info structure for RAFS v6, reuse `RafsV5Inode` as IR for v6. - V6(RafsV6Inode), - /// A reference to a `RafsInodeExt` object. - Ref(Arc), -} - -impl Debug for InodeWrapper { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match self { - Self::V5(i) => write!(f, "{:?}", i), - Self::V6(i) => write!(f, "{:?}", i), - Self::Ref(i) => { - let i = RafsV5Inode::from(i.deref()); - write!(f, "{:?}", i) - } - } - } -} - -impl InodeWrapper { - /// Create a new instance of `InodeWrapper` with default value. - pub fn new(version: RafsVersion) -> Self { - match version { - RafsVersion::V5 => InodeWrapper::V5(RafsV5Inode::new()), - RafsVersion::V6 => InodeWrapper::V6(RafsV6Inode::new()), - } - } - - /// Create an `InodeWrapper` object from a `RafsInodeExt` trait object. - pub fn from_inode_info(inode: Arc) -> Self { - Self::Ref(inode) - } - - /// Check whether is a RAFS V5 inode. - pub fn is_v5(&self) -> bool { - match self { - InodeWrapper::V5(_i) => true, - InodeWrapper::V6(_i) => false, - InodeWrapper::Ref(inode) => { - if let Some(_inode) = inode.as_any().downcast_ref::() { - true - } else { - inode - .as_any() - .downcast_ref::() - .is_some() - } - } - } - } - - /// Check whether is a RAFS V6 inode. - pub fn is_v6(&self) -> bool { - match self { - InodeWrapper::V5(_i) => false, - InodeWrapper::V6(_i) => true, - InodeWrapper::Ref(inode) => inode - .as_any() - .downcast_ref::() - .is_some(), - } - } - - /// Get file content size of the inode. - pub fn inode_size(&self) -> usize { - match self { - InodeWrapper::V5(i) => i.size(), - _ => panic!("should only be called for RAFS v5 inode"), - } - } - - /// Get access permission/mode for the inode. - pub fn mode(&self) -> u32 { - match self { - InodeWrapper::V5(i) => i.mode(), - InodeWrapper::V6(i) => i.mode(), - InodeWrapper::Ref(i) => i.get_attr().mode, - } - } - - /// Set access permission/mode for the inode. - pub fn set_mode(&mut self, mode: u32) { - self.ensure_owned(); - match self { - InodeWrapper::V5(i) => i.i_mode = mode, - InodeWrapper::V6(i) => i.i_mode = mode, - InodeWrapper::Ref(_i) => panic!("unexpected"), - } - } - - /// Check whether the inode is a directory. - pub fn is_dir(&self) -> bool { - match self { - InodeWrapper::V5(i) => i.is_dir(), - InodeWrapper::V6(i) => i.is_dir(), - InodeWrapper::Ref(i) => i.is_dir(), - } - } - - /// Check whether the inode is a regular file. - pub fn is_reg(&self) -> bool { - match self { - InodeWrapper::V5(i) => i.is_reg(), - InodeWrapper::V6(i) => i.is_reg(), - InodeWrapper::Ref(i) => i.is_reg(), - } - } - - /// Check whether the inode is a hardlink. - pub fn is_hardlink(&self) -> bool { - match self { - InodeWrapper::V5(i) => i.is_hardlink(), - InodeWrapper::V6(i) => i.is_hardlink(), - InodeWrapper::Ref(i) => i.is_hardlink(), - } - } - - /// Check whether the inode is a symlink. - pub fn is_symlink(&self) -> bool { - match self { - InodeWrapper::V5(i) => i.is_symlink(), - InodeWrapper::V6(i) => i.is_symlink(), - InodeWrapper::Ref(i) => i.is_symlink(), - } - } - - /// Check whether the inode is a char device node. - pub fn is_chrdev(&self) -> bool { - match self { - InodeWrapper::V5(i) => i.is_chrdev(), - InodeWrapper::V6(i) => i.is_chrdev(), - InodeWrapper::Ref(i) => i.as_inode().is_chrdev(), - } - } - - /// Check whether the inode is a block device node. - pub fn is_blkdev(&self) -> bool { - match self { - InodeWrapper::V5(i) => i.is_blkdev(), - InodeWrapper::V6(i) => i.is_blkdev(), - InodeWrapper::Ref(_i) => unimplemented!(), - } - } - - /// Check whether the inode is a FIFO. - pub fn is_fifo(&self) -> bool { - match self { - InodeWrapper::V5(i) => i.is_fifo(), - InodeWrapper::V6(i) => i.is_fifo(), - InodeWrapper::Ref(_i) => unimplemented!(), - } - } - - /// Check whether the inode is a socket. - pub fn is_sock(&self) -> bool { - match self { - InodeWrapper::V5(i) => i.is_sock(), - InodeWrapper::V6(i) => i.is_sock(), - InodeWrapper::Ref(i) => i.as_inode().is_dir(), - } - } - - /// Check whether the inode is a special file, such chardev, blkdev, FIFO and socket. - pub fn is_special(&self) -> bool { - self.is_chrdev() || self.is_blkdev() || self.is_fifo() || self.is_sock() - } - - /// Get inode flags. - pub fn has_hardlink(&self) -> bool { - match self { - InodeWrapper::V5(i) => i.has_hardlink(), - InodeWrapper::V6(i) => i.has_hardlink(), - InodeWrapper::Ref(_i) => unimplemented!(), - } - } - - /// Set whether the inode has HARDLINK flag set. - pub fn set_has_hardlink(&mut self, enable: bool) { - self.ensure_owned(); - match self { - InodeWrapper::V5(i) => { - if enable { - i.i_flags |= RafsInodeFlags::HARDLINK; - } else { - i.i_flags &= !RafsInodeFlags::HARDLINK; - } - } - InodeWrapper::V6(i) => { - if enable { - i.i_flags |= RafsInodeFlags::HARDLINK; - } else { - i.i_flags &= !RafsInodeFlags::HARDLINK; - } - } - InodeWrapper::Ref(_i) => unimplemented!(), - } - } - - /// Check whether the inode has associated xattrs. - pub fn has_xattr(&self) -> bool { - match self { - InodeWrapper::V5(i) => i.has_xattr(), - InodeWrapper::V6(i) => i.has_xattr(), - InodeWrapper::Ref(i) => i.has_xattr(), - } - } - - /// Set whether the inode has associated xattrs. - pub fn set_has_xattr(&mut self, enable: bool) { - self.ensure_owned(); - match self { - InodeWrapper::V5(i) => { - if enable { - i.i_flags |= RafsInodeFlags::XATTR; - } else { - i.i_flags &= !RafsInodeFlags::XATTR; - } - } - InodeWrapper::V6(i) => { - if enable { - i.i_flags |= RafsInodeFlags::XATTR; - } else { - i.i_flags &= !RafsInodeFlags::XATTR; - } - } - InodeWrapper::Ref(_i) => unimplemented!(), - } - } - - /// Get inode number. - pub fn ino(&self) -> Inode { - match self { - InodeWrapper::V5(i) => i.i_ino, - InodeWrapper::V6(i) => i.i_ino, - InodeWrapper::Ref(i) => i.ino(), - } - } - - /// Set inode number. - pub fn set_ino(&mut self, ino: Inode) { - self.ensure_owned(); - match self { - InodeWrapper::V5(i) => i.i_ino = ino, - InodeWrapper::V6(i) => i.i_ino = ino, - InodeWrapper::Ref(_i) => panic!("unexpected"), - } - } - - /// Get parent inode number, only for RAFS v5. - pub fn parent(&self) -> Inode { - match self { - InodeWrapper::V5(i) => i.i_parent, - InodeWrapper::V6(_i) => unimplemented!(), - InodeWrapper::Ref(i) => { - if self.is_v5() { - i.parent() - } else { - unimplemented!() - } - } - } - } - - /// Set parent inode number, only for RAFS v5. - pub fn set_parent(&mut self, parent: Inode) { - self.ensure_owned(); - match self { - InodeWrapper::V5(i) => i.i_parent = parent, - InodeWrapper::V6(_i) => unimplemented!(), - InodeWrapper::Ref(_i) => panic!("unexpected"), - } - } - - /// Get inode content size of regular file, directory and symlink. - pub fn size(&self) -> u64 { - match self { - InodeWrapper::V5(i) => i.i_size, - InodeWrapper::V6(i) => i.i_size, - InodeWrapper::Ref(i) => i.size(), - } - } - - /// Set inode content size. - pub fn set_size(&mut self, size: u64) { - self.ensure_owned(); - match self { - InodeWrapper::V5(i) => i.i_size = size, - InodeWrapper::V6(i) => i.i_size = size, - InodeWrapper::Ref(_i) => panic!("unexpected"), - } - } - - /// Get user id associated with the inode. - pub fn uid(&self) -> u32 { - match self { - InodeWrapper::V5(i) => i.i_uid, - InodeWrapper::V6(i) => i.i_uid, - InodeWrapper::Ref(i) => i.as_inode().get_attr().uid, - } - } - - /// Set user id associated with the inode. - pub fn set_uid(&mut self, uid: u32) { - self.ensure_owned(); - match self { - InodeWrapper::V5(i) => i.i_uid = uid, - InodeWrapper::V6(i) => i.i_uid = uid, - InodeWrapper::Ref(_i) => panic!("unexpected"), - } - } - - /// Get group id associated with the inode. - pub fn gid(&self) -> u32 { - match self { - InodeWrapper::V5(i) => i.i_gid, - InodeWrapper::V6(i) => i.i_gid, - InodeWrapper::Ref(i) => i.as_inode().get_attr().gid, - } - } - - /// Set group id associated with the inode. - pub fn set_gid(&mut self, gid: u32) { - self.ensure_owned(); - match self { - InodeWrapper::V5(i) => i.i_gid = gid, - InodeWrapper::V6(i) => i.i_gid = gid, - InodeWrapper::Ref(_i) => panic!("unexpected"), - } - } - - /// Get modified time. - pub fn mtime(&self) -> u64 { - match self { - InodeWrapper::V5(i) => i.i_mtime, - InodeWrapper::V6(i) => i.i_mtime, - InodeWrapper::Ref(i) => i.get_attr().mtime, - } - } - - /// Set modified time. - pub fn set_mtime(&mut self, mtime: u64) { - self.ensure_owned(); - match self { - InodeWrapper::V5(i) => i.i_mtime = mtime, - InodeWrapper::V6(i) => i.i_mtime = mtime, - InodeWrapper::Ref(_i) => panic!("unexpected"), - } - } - - /// Get nsec part of modified time. - pub fn mtime_nsec(&self) -> u32 { - match self { - InodeWrapper::V5(i) => i.i_mtime_nsec, - InodeWrapper::V6(i) => i.i_mtime_nsec, - InodeWrapper::Ref(i) => i.get_attr().mtimensec, - } - } - - /// Set nsec part of modified time. - pub fn set_mtime_nsec(&mut self, mtime_nsec: u32) { - self.ensure_owned(); - match self { - InodeWrapper::V5(i) => i.i_mtime_nsec = mtime_nsec, - InodeWrapper::V6(i) => i.i_mtime_nsec = mtime_nsec, - InodeWrapper::Ref(_i) => panic!("unexpected"), - } - } - - /// Get data blocks of file content, in unit of 512 bytes. - pub fn blocks(&self) -> u64 { - match self { - InodeWrapper::V5(i) => i.i_blocks, - InodeWrapper::V6(i) => i.i_blocks, - InodeWrapper::Ref(i) => i.get_attr().blocks, - } - } - - /// Set data blocks of file content, in unit of 512 bytes. - pub fn set_blocks(&mut self, blocks: u64) { - self.ensure_owned(); - match self { - InodeWrapper::V5(i) => i.i_blocks = blocks, - InodeWrapper::V6(i) => i.i_blocks = blocks, - InodeWrapper::Ref(_i) => panic!("unexpected"), - } - } - - /// Get real device id associated with the inode. - pub fn rdev(&self) -> u32 { - match self { - InodeWrapper::V5(i) => i.i_rdev, - InodeWrapper::V6(i) => i.i_rdev, - InodeWrapper::Ref(i) => i.rdev(), - } - } - - /// Set real device id associated with the inode. - pub fn set_rdev(&mut self, rdev: u32) { - self.ensure_owned(); - match self { - InodeWrapper::V5(i) => i.i_rdev = rdev, - InodeWrapper::V6(i) => i.i_rdev = rdev, - InodeWrapper::Ref(_i) => panic!("unexpected"), - } - } - - /// Set project ID associated with the inode. - pub fn set_projid(&mut self, projid: u32) { - self.ensure_owned(); - match self { - InodeWrapper::V5(i) => i.i_projid = projid, - InodeWrapper::V6(i) => i.i_projid = projid, - InodeWrapper::Ref(_i) => panic!("unexpected"), - } - } - - /// Get number of hardlinks. - pub fn nlink(&self) -> u32 { - match self { - InodeWrapper::V5(i) => i.i_nlink, - InodeWrapper::V6(i) => i.i_nlink, - InodeWrapper::Ref(i) => i.get_attr().nlink, - } - } - - /// Set number of hardlinks. - pub fn set_nlink(&mut self, nlink: u32) { - self.ensure_owned(); - match self { - InodeWrapper::V5(i) => i.i_nlink = nlink, - InodeWrapper::V6(i) => i.i_nlink = nlink, - InodeWrapper::Ref(_i) => panic!("unexpected"), - } - } - - /// Get digest of inode metadata, RAFS v5 only. - pub fn digest(&self) -> &RafsDigest { - if let InodeWrapper::V5(i) = self { - &i.i_digest - } else { - unimplemented!() - } - } - - /// Set digest of inode metadata, RAFS v5 only. - pub fn set_digest(&mut self, digest: RafsDigest) { - self.ensure_owned(); - if let InodeWrapper::V5(i) = self { - i.i_digest = digest; - } - } - - /// Get size of inode name. - pub fn name_size(&self) -> u16 { - match self { - InodeWrapper::V5(i) => i.i_name_size, - InodeWrapper::V6(i) => i.i_name_size, - InodeWrapper::Ref(i) => i.get_name_size(), - } - } - - /// Set size of inode name. - pub fn set_name_size(&mut self, size: usize) { - debug_assert!(size < u16::MAX as usize); - self.ensure_owned(); - match self { - InodeWrapper::V5(i) => i.i_name_size = size as u16, - InodeWrapper::V6(i) => i.i_name_size = size as u16, - InodeWrapper::Ref(_i) => panic!("unexpected"), - } - } - - /// Get size of symlink. - pub fn symlink_size(&self) -> u16 { - match self { - InodeWrapper::V5(i) => i.i_symlink_size, - InodeWrapper::V6(i) => i.i_symlink_size, - InodeWrapper::Ref(i) => i.get_symlink_size(), - } - } - - /// Set size of symlink. - pub fn set_symlink_size(&mut self, size: usize) { - debug_assert!(size <= u16::MAX as usize); - self.ensure_owned(); - match self { - InodeWrapper::V5(i) => { - i.i_flags |= RafsInodeFlags::SYMLINK; - i.i_symlink_size = size as u16; - } - InodeWrapper::V6(i) => { - i.i_flags |= RafsInodeFlags::SYMLINK; - i.i_symlink_size = size as u16; - } - InodeWrapper::Ref(_i) => panic!("unexpected"), - } - } - - /// Get child inode index, only valid for RAFS v5. - pub fn child_index(&self) -> u32 { - match self { - InodeWrapper::V5(i) => i.i_child_index, - InodeWrapper::V6(_i) => u32::MAX, - InodeWrapper::Ref(i) => i.get_child_index().unwrap_or(u32::MAX), - } - } - - /// Set child inode index, only fro RAFS v5. - pub fn set_child_index(&mut self, index: u32) { - self.ensure_owned(); - if let InodeWrapper::V5(i) = self { - i.i_child_index = index; - } - } - - /// Get child/chunk count. - pub fn child_count(&self) -> u32 { - match self { - InodeWrapper::V5(i) => i.i_child_count, - InodeWrapper::V6(i) => i.i_child_count, - InodeWrapper::Ref(i) => i.get_child_count(), - } - } - - /// Set child/chunk count. - pub fn set_child_count(&mut self, count: u32) { - self.ensure_owned(); - match self { - InodeWrapper::V5(i) => i.i_child_count = count, - InodeWrapper::V6(i) => i.i_child_count = count, - InodeWrapper::Ref(_i) => panic!("unexpected"), - } - } - - /// Create a `ChunkWrapper` object to be associated with the inode. - pub fn create_chunk(&self) -> ChunkWrapper { - match self { - InodeWrapper::V5(_) => ChunkWrapper::V5(RafsV5ChunkInfo::new()), - InodeWrapper::V6(_) => ChunkWrapper::V6(RafsV5ChunkInfo::new()), - InodeWrapper::Ref(_i) => unimplemented!(), - } - } - - /// Get memory/disk space occupied by the inode structure, including xattrs. - pub fn get_inode_size_with_xattr(&self, xattrs: &RafsXAttrs, v6_compact: bool) -> usize { - assert!(matches!(self, InodeWrapper::V6(_))); - let inode_size = if v6_compact { - size_of::() - } else { - size_of::() - }; - inode_size + xattrs.aligned_size_v6() - } - - fn ensure_owned(&mut self) { - if let Self::Ref(i) = self { - let i = i.clone(); - if self.is_v6() { - *self = Self::V6(RafsV6Inode::from(i.deref())); - } else { - assert!(self.is_v5()); - *self = Self::V5(RafsV5Inode::from(i.deref())); - } - } - } -} - -#[derive(Clone, Copy, Default, Debug)] -pub struct RafsV6Inode { - /// Artifact inode number set by the nydus image builder. Start from RAFS_ROOT_INODE = 1. - pub i_ino: u64, - pub i_uid: u32, - pub i_gid: u32, - pub i_projid: u32, - pub i_mode: u32, // 64 - pub i_size: u64, - pub i_blocks: u64, - pub i_flags: RafsInodeFlags, - pub i_nlink: u32, - /// for dir, means child count. - /// for regular file, means chunk info count. - pub i_child_count: u32, - /// file name size, [char; i_name_size] - pub i_name_size: u16, - /// symlink path size, [char; i_symlink_size] - pub i_symlink_size: u16, // 104 - // inode device block number, ignored for non-special files - pub i_rdev: u32, - // for alignment reason, we put nsec first - pub i_mtime_nsec: u32, - pub i_mtime: u64, // 120 -} - -impl RafsV6Inode { - /// Create a new instance of `RafsV5Inode`. - pub fn new() -> Self { - Self::default() - } - - /// Set size of the file name. - #[inline] - pub fn set_name_size(&mut self, name_len: usize) { - self.i_name_size = name_len as u16; - } - - /// Mark the inode as a symlink. - #[inline] - pub fn set_symlink_size(&mut self, symlink_len: usize) { - self.i_symlink_size = symlink_len as u16; - } - - /// Get the uid and the gid of the inode. - #[inline] - pub fn uidgid(&self) -> (u32, u32) { - (self.i_uid, self.i_gid) - } - - /// Get the uid and the gid of the inode. - #[inline] - pub fn mtime(&self) -> (u64, u32) { - (self.i_mtime, self.i_mtime_nsec) - } - - /// Get the mode of the inode. - #[inline] - pub fn mode(&self) -> u32 { - self.i_mode - } - - /// Check whether the inode is a directory. - #[inline] - pub fn is_dir(&self) -> bool { - self.i_mode & libc::S_IFMT as u32 == libc::S_IFDIR as u32 - } - - /// Check whether the inode is a symlink. - #[inline] - pub fn is_symlink(&self) -> bool { - self.i_mode & libc::S_IFMT as u32 == libc::S_IFLNK as u32 - } - - /// Check whether the inode is a regular file. - #[inline] - pub fn is_reg(&self) -> bool { - self.i_mode & libc::S_IFMT as u32 == libc::S_IFREG as u32 - } - - /// Check whether the inode is a char device node. - pub fn is_chrdev(&self) -> bool { - self.i_mode & libc::S_IFMT as u32 == libc::S_IFCHR as u32 - } - - /// Check whether the inode is a block device node. - pub fn is_blkdev(&self) -> bool { - self.i_mode & libc::S_IFMT as u32 == libc::S_IFBLK as u32 - } - - /// Check whether the inode is a FIFO. - pub fn is_fifo(&self) -> bool { - self.i_mode & libc::S_IFMT as u32 == libc::S_IFIFO as u32 - } - - /// Check whether the inode is a socket. - pub fn is_sock(&self) -> bool { - self.i_mode & libc::S_IFMT as u32 == libc::S_IFSOCK as u32 - } - - /// Check whether the inode is a hardlink. - #[inline] - pub fn is_hardlink(&self) -> bool { - self.is_reg() && self.i_nlink > 1 - } - - /// Get inode flags - pub fn has_hardlink(&self) -> bool { - self.i_flags.contains(RafsInodeFlags::HARDLINK) - } - - /// Mark the inode as having extended attributes. - #[inline] - pub fn has_xattr(&self) -> bool { - self.i_flags.contains(RafsInodeFlags::XATTR) - } - - /// Mark the inode as having hole chunks. - #[inline] - pub fn has_hole(&self) -> bool { - self.i_flags.contains(RafsInodeFlags::HAS_HOLE) - } -} - -impl From<&dyn RafsInodeExt> for RafsV6Inode { - fn from(inode: &dyn RafsInodeExt) -> Self { - let attr = inode.get_attr(); - RafsV6Inode { - i_ino: attr.ino, - i_uid: attr.uid, - i_gid: attr.gid, - i_projid: inode.projid(), - i_mode: attr.mode, - i_size: attr.size, - i_blocks: attr.blocks, - i_flags: RafsInodeFlags::from_bits_truncate(inode.flags()), - i_nlink: attr.nlink, - i_child_count: inode.get_child_count(), - i_name_size: inode.get_name_size(), - i_symlink_size: inode.get_symlink_size(), - i_rdev: attr.rdev, - i_mtime_nsec: attr.mtimensec, - i_mtime: attr.mtime, - } - } -} - -bitflags! { - /// Rafs v5 inode flags. - pub struct RafsInodeFlags: u64 { - /// Inode is a symlink. - const SYMLINK = 0x0000_0001; - /// Inode has hardlinks. - const HARDLINK = 0x0000_0002; - /// Inode has extended attributes. - const XATTR = 0x0000_0004; - /// Inode chunks has holes. - const HAS_HOLE = 0x0000_0008; - } -} - -impl Default for RafsInodeFlags { - fn default() -> Self { - RafsInodeFlags::empty() - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::{ - metadata::{direct_v5::DirectSuperBlockV5, RafsSuperMeta}, - mock::MockInode, - }; - - #[test] - fn test_inode_wrapper() { - let mut wrapper_v5 = InodeWrapper::new(RafsVersion::V5); - let mut wrapper_v6 = InodeWrapper::new(RafsVersion::V6); - let mut wrapper_cache_v5 = - InodeWrapper::from_inode_info(Arc::new(CachedInodeV5::default())); - let wrapper_ondisk_v5 = InodeWrapper::from_inode_info(Arc::new(OndiskInodeWrapperV5 { - mapping: DirectSuperBlockV5::new(&RafsSuperMeta::default(), false), - offset: 0, - })); - - assert!(wrapper_v5.is_v5()); - assert!(!wrapper_v6.is_v5()); - assert!(wrapper_cache_v5.is_v5()); - assert!(wrapper_ondisk_v5.is_v5()); - assert!(!wrapper_v5.is_v6()); - assert!(wrapper_v6.is_v6()); - assert!(!wrapper_cache_v5.is_v6()); - assert!(!wrapper_ondisk_v5.is_v6()); - assert_eq!(wrapper_v5.inode_size(), 128); - - wrapper_v5.set_mode(0x0000_0001); - wrapper_v6.set_mode(0x0000_0002); - assert_eq!(wrapper_v5.mode(), 0x0000_0001); - assert_eq!(wrapper_v6.mode(), 0x0000_0002); - - assert!(!wrapper_v5.is_hardlink()); - assert!(!wrapper_v6.is_hardlink()); - assert!(!wrapper_cache_v5.is_hardlink()); - assert!(!wrapper_v5.is_symlink()); - assert!(!wrapper_v6.is_symlink()); - assert!(!wrapper_cache_v5.is_symlink()); - assert!(!wrapper_v5.is_chrdev()); - assert!(!wrapper_v6.is_chrdev()); - assert!(!wrapper_cache_v5.is_chrdev()); - assert!(!wrapper_v5.is_blkdev()); - assert!(!wrapper_v6.is_blkdev()); - assert!(!wrapper_v5.is_fifo()); - assert!(!wrapper_v6.is_fifo()); - assert!(!wrapper_v5.is_sock()); - assert!(!wrapper_v6.is_sock()); - assert!(!wrapper_cache_v5.is_sock()); - assert!(!wrapper_v5.has_hardlink()); - assert!(!wrapper_v6.has_hardlink()); - wrapper_v5.set_has_hardlink(true); - wrapper_v6.set_has_hardlink(true); - assert!(wrapper_v5.has_hardlink()); - assert!(wrapper_v6.has_hardlink()); - wrapper_v5.set_has_hardlink(false); - wrapper_v6.set_has_hardlink(false); - assert!(!wrapper_v5.has_hardlink()); - assert!(!wrapper_v6.has_hardlink()); - assert!(!wrapper_v5.has_xattr()); - assert!(!wrapper_v6.has_xattr()); - assert!(!wrapper_cache_v5.has_xattr()); - wrapper_v5.set_has_xattr(true); - wrapper_v6.set_has_xattr(true); - assert!(wrapper_v5.has_xattr()); - assert!(wrapper_v6.has_xattr()); - wrapper_v5.set_has_xattr(false); - wrapper_v6.set_has_xattr(false); - assert!(!wrapper_v5.has_xattr()); - assert!(!wrapper_v6.has_xattr()); - wrapper_v5.set_ino(0x0000_0001); - wrapper_v6.set_ino(0x0000_0002); - assert_eq!(wrapper_v5.ino(), 0x0000_0001); - assert_eq!(wrapper_v6.ino(), 0x0000_0002); - wrapper_v5.set_parent(0x0000_0004); - assert_eq!(wrapper_v5.parent(), 0x0000_0004); - assert_eq!(wrapper_cache_v5.size(), 0); - wrapper_v5.set_uid(0x0000_0001); - wrapper_v6.set_uid(0x0000_0002); - assert_eq!(wrapper_v5.uid(), 0x0000_0001); - assert_eq!(wrapper_v6.uid(), 0x0000_0002); - wrapper_v5.set_gid(0x0000_0001); - wrapper_v6.set_gid(0x0000_0002); - assert_eq!(wrapper_v5.gid(), 0x0000_0001); - assert_eq!(wrapper_v6.gid(), 0x0000_0002); - wrapper_v5.set_mtime(0x0000_0004); - wrapper_v6.set_mtime(0x0000_0008); - assert_eq!(wrapper_v5.mtime(), 0x0000_0004); - assert_eq!(wrapper_v6.mtime(), 0x0000_0008); - assert_eq!(wrapper_cache_v5.mtime(), 0x0000_0000); - wrapper_v5.set_mtime_nsec(0x0000_0004); - wrapper_v6.set_mtime_nsec(0x0000_0008); - assert_eq!(wrapper_v5.mtime_nsec(), 0x0000_0004); - assert_eq!(wrapper_v6.mtime_nsec(), 0x0000_0008); - assert_eq!(wrapper_cache_v5.mtime_nsec(), 0x0000_0000); - wrapper_v5.set_blocks(0x0000_0010); - wrapper_v6.set_blocks(0x0000_0020); - assert_eq!(wrapper_v5.blocks(), 0x0000_0010); - assert_eq!(wrapper_v6.blocks(), 0x0000_0020); - assert_eq!(wrapper_cache_v5.blocks(), 0x0000_0000); - wrapper_v5.set_rdev(0x0000_0010); - wrapper_v6.set_rdev(0x0000_0020); - assert_eq!(wrapper_v5.rdev(), 0x0000_0010); - assert_eq!(wrapper_v6.rdev(), 0x0000_0020); - assert_eq!(wrapper_cache_v5.rdev(), 0x0000_0000); - wrapper_v5.set_projid(0x0000_0100); - wrapper_v6.set_projid(0x0000_0200); - wrapper_v5.set_nlink(0x0000_0010); - wrapper_v6.set_nlink(0x0000_0020); - assert_eq!(wrapper_v5.nlink(), 0x0000_0010); - assert_eq!(wrapper_v6.nlink(), 0x0000_0020); - assert_eq!(wrapper_cache_v5.nlink(), 0x0000_0000); - wrapper_v5.set_name_size(0x0000_0010); - wrapper_v6.set_name_size(0x0000_0020); - assert_eq!(wrapper_v5.name_size(), 0x0000_0010); - assert_eq!(wrapper_v6.name_size(), 0x0000_0020); - assert_eq!(wrapper_cache_v5.name_size(), 0x0000_0000); - wrapper_v5.set_symlink_size(0x0000_0010); - wrapper_v6.set_symlink_size(0x0000_0020); - assert_eq!(wrapper_v5.symlink_size(), 0x0000_0010); - assert_eq!(wrapper_v6.symlink_size(), 0x0000_0020); - assert_eq!(wrapper_cache_v5.symlink_size(), 0x0000_0000); - wrapper_v5.set_child_index(0x0000_0010); - wrapper_v6.set_child_index(0x0000_0020); - wrapper_cache_v5.set_child_index(0x0000_0008); - assert_eq!(wrapper_v5.child_index(), 0x0000_0010); - assert_eq!(wrapper_v6.child_index(), u32::MAX); - assert_eq!(wrapper_cache_v5.child_index(), 0x0000_0008); - wrapper_v5.set_child_count(0x0000_0010); - wrapper_v6.set_child_count(0x0000_0020); - assert_eq!(wrapper_v5.child_count(), 0x0000_0010); - assert_eq!(wrapper_v6.child_count(), 0x0000_0020); - assert_eq!(wrapper_cache_v5.child_count(), 0x0000_0000); - wrapper_v5.create_chunk(); - wrapper_v6.create_chunk(); - } - - #[test] - #[should_panic] - fn test_inode_size_v6() { - let wrapper_v6 = InodeWrapper::new(RafsVersion::V6); - wrapper_v6.inode_size(); - } - - #[test] - #[should_panic] - fn test_inode_size_ref() { - let wrapper_cache_v5 = InodeWrapper::from_inode_info(Arc::new(CachedInodeV5::default())); - wrapper_cache_v5.inode_size(); - } - - #[test] - #[should_panic] - fn test_set_mode_ref() { - let mut wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); - wrapper_mock.set_mode(0x0000_0001); - } - - #[test] - #[should_panic] - fn test_is_blk_dev_ref() { - let wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); - wrapper_mock.is_blkdev(); - } - - #[test] - #[should_panic] - fn test_is_fifo_ref() { - let wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); - wrapper_mock.is_fifo(); - } - - #[test] - #[should_panic] - fn test_has_hardlink_ref() { - let wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); - wrapper_mock.has_hardlink(); - } - - #[test] - #[should_panic] - fn test_set_has_hardlink_ref() { - let mut wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); - wrapper_mock.set_has_hardlink(true); - } - - #[test] - #[should_panic] - fn test_set_has_xattr_ref() { - let mut wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); - wrapper_mock.set_has_xattr(true); - } - - #[test] - #[should_panic] - fn test_set_ino_ref() { - let mut wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); - wrapper_mock.set_ino(Inode::default()); - } - - #[test] - #[should_panic] - fn test_set_parent_v6() { - let mut wrapper_v6 = InodeWrapper::new(RafsVersion::V6); - wrapper_v6.set_parent(Inode::default()); - } - - #[test] - #[should_panic] - fn test_set_parent_ref() { - let mut wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); - wrapper_mock.set_parent(Inode::default()); - } - - #[test] - #[should_panic] - fn test_get_parent_v6() { - let wrapper_v6 = InodeWrapper::new(RafsVersion::V6); - wrapper_v6.parent(); - } - - #[test] - #[should_panic] - fn test_get_parent_ref() { - let wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); - wrapper_mock.parent(); - } - - #[test] - #[should_panic] - fn test_set_size_ref() { - let mut wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); - wrapper_mock.set_size(0x0000_0001); - } - - #[test] - #[should_panic] - fn test_set_uid_ref() { - let mut wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); - wrapper_mock.set_uid(0x0000_0000_0001); - } - - #[test] - #[should_panic] - fn test_set_gid_ref() { - let mut wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); - wrapper_mock.set_gid(0x0000_0001); - } - - #[test] - #[should_panic] - fn test_set_mtime_ref() { - let mut wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); - wrapper_mock.set_mtime(0x0000_0001); - } - - #[test] - #[should_panic] - fn test_set_mtime_nsec_ref() { - let mut wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); - wrapper_mock.set_mtime_nsec(0x0000_0001); - } - - #[test] - #[should_panic] - fn test_set_blocks_ref() { - let mut wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); - wrapper_mock.set_blocks(0x0000_0001); - } - - #[test] - #[should_panic] - fn test_set_rdev_ref() { - let mut wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); - wrapper_mock.set_rdev(0x0000_0001); - } - - #[test] - #[should_panic] - fn test_set_projid_ref() { - let mut wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); - wrapper_mock.set_projid(0x0000_0001); - } - - #[test] - #[should_panic] - fn test_set_digest_ref() { - let mut wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); - wrapper_mock.set_digest(RafsDigest::default()); - } - - #[test] - #[should_panic] - fn test_get_digest_v6() { - let wrapper_v6 = InodeWrapper::new(RafsVersion::V6); - wrapper_v6.digest(); - } - - #[test] - #[should_panic] - fn test_set_namesize_ref() { - let mut wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); - wrapper_mock.set_name_size(0x0000_0000); - } - - #[test] - #[should_panic] - fn test_set_symlink_size_ref() { - let mut wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); - wrapper_mock.set_symlink_size(0x0000_0000); - } - - #[test] - #[should_panic] - fn test_set_child_count_ref() { - let mut wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); - wrapper_mock.set_child_count(0x0000_0000); - } - - #[test] - #[should_panic] - fn test_create_chunk_ref() { - let wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); - wrapper_mock.create_chunk(); - } - - #[test] - fn test_rafs_v6_inode() { - let mut inode = RafsV6Inode { - i_ino: 0x0000_0000, - i_uid: 0x0000_0001, - i_gid: 0x0000_0002, - i_projid: 0x0000_0003, - i_mode: 0x0000_0000, - i_size: 0x0000_0005, - i_blocks: 0x0000_0006, - i_flags: RafsInodeFlags::default(), - i_nlink: 0x0000_0007, - i_child_count: 0x0000_0008, - i_name_size: 0x0000_0010, - i_symlink_size: 0x0000_0011, - i_rdev: 0x0000_0012, - i_mtime_nsec: 0x0000_0013, - i_mtime: 0x0000_0014, - }; - - inode.set_name_size(0x0000_0001); - inode.set_symlink_size(0x0000_0002); - - assert_eq!(inode.i_name_size, 0x0000_0001); - assert_eq!(inode.i_symlink_size, 0x0000_0002); - assert_eq!(inode.uidgid(), (0x0000_0001, 0x0000_0002)); - assert_eq!(inode.mtime(), (0x0000_0014 as u64, 0x0000_0013)); - assert_eq!(inode.mode(), 0x0000_0000); - assert!(!inode.is_chrdev()); - assert!(!inode.is_blkdev()); - assert!(!inode.is_fifo()); - assert!(!inode.is_sock()); - assert!(!inode.is_hardlink()); - assert!(!inode.has_hardlink()); - assert!(!inode.has_xattr()); - assert!(!inode.has_hole()); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2021-2022 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::fmt::{Debug, Formatter}; +use std::mem::size_of; +use std::ops::Deref; +use std::sync::Arc; + +use nydus_utils::digest::RafsDigest; + +use crate::metadata::cached_v5::CachedInodeV5; +use crate::metadata::chunk::ChunkWrapper; +use crate::metadata::direct_v5::OndiskInodeWrapper as OndiskInodeWrapperV5; +use crate::metadata::direct_v6::OndiskInodeWrapper as OndiskInodeWrapperV6; +use crate::metadata::layout::v5::{RafsV5ChunkInfo, RafsV5Inode}; +use crate::metadata::layout::v6::{RafsV6InodeCompact, RafsV6InodeExtended}; +use crate::metadata::layout::RafsXAttrs; +use crate::metadata::{Inode, RafsVersion}; +use crate::RafsInodeExt; + +/// An inode object wrapper for different RAFS versions. +#[derive(Clone)] +pub enum InodeWrapper { + /// Inode info structure for RAFS v5. + V5(RafsV5Inode), + /// Inode info structure for RAFS v6, reuse `RafsV5Inode` as IR for v6. + V6(RafsV6Inode), + /// A reference to a `RafsInodeExt` object. + Ref(Arc), +} + +impl Debug for InodeWrapper { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Self::V5(i) => write!(f, "{:?}", i), + Self::V6(i) => write!(f, "{:?}", i), + Self::Ref(i) => { + let i = RafsV5Inode::from(i.deref()); + write!(f, "{:?}", i) + } + } + } +} + +impl InodeWrapper { + /// Create a new instance of `InodeWrapper` with default value. + pub fn new(version: RafsVersion) -> Self { + match version { + RafsVersion::V5 => InodeWrapper::V5(RafsV5Inode::new()), + RafsVersion::V6 => InodeWrapper::V6(RafsV6Inode::new()), + } + } + + /// Create an `InodeWrapper` object from a `RafsInodeExt` trait object. + pub fn from_inode_info(inode: Arc) -> Self { + Self::Ref(inode) + } + + /// Check whether is a RAFS V5 inode. + pub fn is_v5(&self) -> bool { + match self { + InodeWrapper::V5(_i) => true, + InodeWrapper::V6(_i) => false, + InodeWrapper::Ref(inode) => { + if let Some(_inode) = inode.as_any().downcast_ref::() { + true + } else { + inode + .as_any() + .downcast_ref::() + .is_some() + } + } + } + } + + /// Check whether is a RAFS V6 inode. + pub fn is_v6(&self) -> bool { + match self { + InodeWrapper::V5(_i) => false, + InodeWrapper::V6(_i) => true, + InodeWrapper::Ref(inode) => inode + .as_any() + .downcast_ref::() + .is_some(), + } + } + + /// Get file content size of the inode. + pub fn inode_size(&self) -> usize { + match self { + InodeWrapper::V5(i) => i.size(), + _ => panic!("should only be called for RAFS v5 inode"), + } + } + + /// Get access permission/mode for the inode. + pub fn mode(&self) -> u32 { + match self { + InodeWrapper::V5(i) => i.mode(), + InodeWrapper::V6(i) => i.mode(), + InodeWrapper::Ref(i) => i.get_attr().mode, + } + } + + /// Set access permission/mode for the inode. + pub fn set_mode(&mut self, mode: u32) { + self.ensure_owned(); + match self { + InodeWrapper::V5(i) => i.i_mode = mode, + InodeWrapper::V6(i) => i.i_mode = mode, + InodeWrapper::Ref(_i) => panic!("unexpected"), + } + } + + /// Check whether the inode is a directory. + pub fn is_dir(&self) -> bool { + match self { + InodeWrapper::V5(i) => i.is_dir(), + InodeWrapper::V6(i) => i.is_dir(), + InodeWrapper::Ref(i) => i.is_dir(), + } + } + + /// Check whether the inode is a regular file. + pub fn is_reg(&self) -> bool { + match self { + InodeWrapper::V5(i) => i.is_reg(), + InodeWrapper::V6(i) => i.is_reg(), + InodeWrapper::Ref(i) => i.is_reg(), + } + } + + /// Check whether the inode is a hardlink. + pub fn is_hardlink(&self) -> bool { + match self { + InodeWrapper::V5(i) => i.is_hardlink(), + InodeWrapper::V6(i) => i.is_hardlink(), + InodeWrapper::Ref(i) => i.is_hardlink(), + } + } + + /// Check whether the inode is a symlink. + pub fn is_symlink(&self) -> bool { + match self { + InodeWrapper::V5(i) => i.is_symlink(), + InodeWrapper::V6(i) => i.is_symlink(), + InodeWrapper::Ref(i) => i.is_symlink(), + } + } + + /// Check whether the inode is a char device node. + pub fn is_chrdev(&self) -> bool { + match self { + InodeWrapper::V5(i) => i.is_chrdev(), + InodeWrapper::V6(i) => i.is_chrdev(), + InodeWrapper::Ref(i) => i.as_inode().is_chrdev(), + } + } + + /// Check whether the inode is a block device node. + pub fn is_blkdev(&self) -> bool { + match self { + InodeWrapper::V5(i) => i.is_blkdev(), + InodeWrapper::V6(i) => i.is_blkdev(), + InodeWrapper::Ref(_i) => unimplemented!(), + } + } + + /// Check whether the inode is a FIFO. + pub fn is_fifo(&self) -> bool { + match self { + InodeWrapper::V5(i) => i.is_fifo(), + InodeWrapper::V6(i) => i.is_fifo(), + InodeWrapper::Ref(_i) => unimplemented!(), + } + } + + /// Check whether the inode is a socket. + pub fn is_sock(&self) -> bool { + match self { + InodeWrapper::V5(i) => i.is_sock(), + InodeWrapper::V6(i) => i.is_sock(), + InodeWrapper::Ref(i) => i.as_inode().is_dir(), + } + } + + /// Check whether the inode is a special file, such chardev, blkdev, FIFO and socket. + pub fn is_special(&self) -> bool { + self.is_chrdev() || self.is_blkdev() || self.is_fifo() || self.is_sock() + } + + /// Get inode flags. + pub fn has_hardlink(&self) -> bool { + match self { + InodeWrapper::V5(i) => i.has_hardlink(), + InodeWrapper::V6(i) => i.has_hardlink(), + InodeWrapper::Ref(_i) => unimplemented!(), + } + } + + /// Set whether the inode has HARDLINK flag set. + pub fn set_has_hardlink(&mut self, enable: bool) { + self.ensure_owned(); + match self { + InodeWrapper::V5(i) => { + if enable { + i.i_flags |= RafsInodeFlags::HARDLINK; + } else { + i.i_flags &= !RafsInodeFlags::HARDLINK; + } + } + InodeWrapper::V6(i) => { + if enable { + i.i_flags |= RafsInodeFlags::HARDLINK; + } else { + i.i_flags &= !RafsInodeFlags::HARDLINK; + } + } + InodeWrapper::Ref(_i) => unimplemented!(), + } + } + + /// Check whether the inode has associated xattrs. + pub fn has_xattr(&self) -> bool { + match self { + InodeWrapper::V5(i) => i.has_xattr(), + InodeWrapper::V6(i) => i.has_xattr(), + InodeWrapper::Ref(i) => i.has_xattr(), + } + } + + /// Set whether the inode has associated xattrs. + pub fn set_has_xattr(&mut self, enable: bool) { + self.ensure_owned(); + match self { + InodeWrapper::V5(i) => { + if enable { + i.i_flags |= RafsInodeFlags::XATTR; + } else { + i.i_flags &= !RafsInodeFlags::XATTR; + } + } + InodeWrapper::V6(i) => { + if enable { + i.i_flags |= RafsInodeFlags::XATTR; + } else { + i.i_flags &= !RafsInodeFlags::XATTR; + } + } + InodeWrapper::Ref(_i) => unimplemented!(), + } + } + + /// Get inode number. + pub fn ino(&self) -> Inode { + match self { + InodeWrapper::V5(i) => i.i_ino, + InodeWrapper::V6(i) => i.i_ino, + InodeWrapper::Ref(i) => i.ino(), + } + } + + /// Set inode number. + pub fn set_ino(&mut self, ino: Inode) { + self.ensure_owned(); + match self { + InodeWrapper::V5(i) => i.i_ino = ino, + InodeWrapper::V6(i) => i.i_ino = ino, + InodeWrapper::Ref(_i) => panic!("unexpected"), + } + } + + /// Get parent inode number, only for RAFS v5. + pub fn parent(&self) -> Inode { + match self { + InodeWrapper::V5(i) => i.i_parent, + InodeWrapper::V6(_i) => unimplemented!(), + InodeWrapper::Ref(i) => { + if self.is_v5() { + i.parent() + } else { + unimplemented!() + } + } + } + } + + /// Set parent inode number, only for RAFS v5. + pub fn set_parent(&mut self, parent: Inode) { + self.ensure_owned(); + match self { + InodeWrapper::V5(i) => i.i_parent = parent, + InodeWrapper::V6(_i) => unimplemented!(), + InodeWrapper::Ref(_i) => panic!("unexpected"), + } + } + + /// Get inode content size of regular file, directory and symlink. + pub fn size(&self) -> u64 { + match self { + InodeWrapper::V5(i) => i.i_size, + InodeWrapper::V6(i) => i.i_size, + InodeWrapper::Ref(i) => i.size(), + } + } + + /// Set inode content size. + pub fn set_size(&mut self, size: u64) { + self.ensure_owned(); + match self { + InodeWrapper::V5(i) => i.i_size = size, + InodeWrapper::V6(i) => i.i_size = size, + InodeWrapper::Ref(_i) => panic!("unexpected"), + } + } + + /// Get user id associated with the inode. + pub fn uid(&self) -> u32 { + match self { + InodeWrapper::V5(i) => i.i_uid, + InodeWrapper::V6(i) => i.i_uid, + InodeWrapper::Ref(i) => i.as_inode().get_attr().uid, + } + } + + /// Set user id associated with the inode. + pub fn set_uid(&mut self, uid: u32) { + self.ensure_owned(); + match self { + InodeWrapper::V5(i) => i.i_uid = uid, + InodeWrapper::V6(i) => i.i_uid = uid, + InodeWrapper::Ref(_i) => panic!("unexpected"), + } + } + + /// Get group id associated with the inode. + pub fn gid(&self) -> u32 { + match self { + InodeWrapper::V5(i) => i.i_gid, + InodeWrapper::V6(i) => i.i_gid, + InodeWrapper::Ref(i) => i.as_inode().get_attr().gid, + } + } + + /// Set group id associated with the inode. + pub fn set_gid(&mut self, gid: u32) { + self.ensure_owned(); + match self { + InodeWrapper::V5(i) => i.i_gid = gid, + InodeWrapper::V6(i) => i.i_gid = gid, + InodeWrapper::Ref(_i) => panic!("unexpected"), + } + } + + /// Get modified time. + pub fn mtime(&self) -> u64 { + match self { + InodeWrapper::V5(i) => i.i_mtime, + InodeWrapper::V6(i) => i.i_mtime, + InodeWrapper::Ref(i) => i.get_attr().mtime, + } + } + + /// Set modified time. + pub fn set_mtime(&mut self, mtime: u64) { + self.ensure_owned(); + match self { + InodeWrapper::V5(i) => i.i_mtime = mtime, + InodeWrapper::V6(i) => i.i_mtime = mtime, + InodeWrapper::Ref(_i) => panic!("unexpected"), + } + } + + /// Get nsec part of modified time. + pub fn mtime_nsec(&self) -> u32 { + match self { + InodeWrapper::V5(i) => i.i_mtime_nsec, + InodeWrapper::V6(i) => i.i_mtime_nsec, + InodeWrapper::Ref(i) => i.get_attr().mtimensec, + } + } + + /// Set nsec part of modified time. + pub fn set_mtime_nsec(&mut self, mtime_nsec: u32) { + self.ensure_owned(); + match self { + InodeWrapper::V5(i) => i.i_mtime_nsec = mtime_nsec, + InodeWrapper::V6(i) => i.i_mtime_nsec = mtime_nsec, + InodeWrapper::Ref(_i) => panic!("unexpected"), + } + } + + /// Get data blocks of file content, in unit of 512 bytes. + pub fn blocks(&self) -> u64 { + match self { + InodeWrapper::V5(i) => i.i_blocks, + InodeWrapper::V6(i) => i.i_blocks, + InodeWrapper::Ref(i) => i.get_attr().blocks, + } + } + + /// Set data blocks of file content, in unit of 512 bytes. + pub fn set_blocks(&mut self, blocks: u64) { + self.ensure_owned(); + match self { + InodeWrapper::V5(i) => i.i_blocks = blocks, + InodeWrapper::V6(i) => i.i_blocks = blocks, + InodeWrapper::Ref(_i) => panic!("unexpected"), + } + } + + /// Get real device id associated with the inode. + pub fn rdev(&self) -> u32 { + match self { + InodeWrapper::V5(i) => i.i_rdev, + InodeWrapper::V6(i) => i.i_rdev, + InodeWrapper::Ref(i) => i.rdev(), + } + } + + /// Set real device id associated with the inode. + pub fn set_rdev(&mut self, rdev: u32) { + self.ensure_owned(); + match self { + InodeWrapper::V5(i) => i.i_rdev = rdev, + InodeWrapper::V6(i) => i.i_rdev = rdev, + InodeWrapper::Ref(_i) => panic!("unexpected"), + } + } + + /// Set project ID associated with the inode. + pub fn set_projid(&mut self, projid: u32) { + self.ensure_owned(); + match self { + InodeWrapper::V5(i) => i.i_projid = projid, + InodeWrapper::V6(i) => i.i_projid = projid, + InodeWrapper::Ref(_i) => panic!("unexpected"), + } + } + + /// Get number of hardlinks. + pub fn nlink(&self) -> u32 { + match self { + InodeWrapper::V5(i) => i.i_nlink, + InodeWrapper::V6(i) => i.i_nlink, + InodeWrapper::Ref(i) => i.get_attr().nlink, + } + } + + /// Set number of hardlinks. + pub fn set_nlink(&mut self, nlink: u32) { + self.ensure_owned(); + match self { + InodeWrapper::V5(i) => i.i_nlink = nlink, + InodeWrapper::V6(i) => i.i_nlink = nlink, + InodeWrapper::Ref(_i) => panic!("unexpected"), + } + } + + /// Get digest of inode metadata, RAFS v5 only. + pub fn digest(&self) -> &RafsDigest { + if let InodeWrapper::V5(i) = self { + &i.i_digest + } else { + unimplemented!() + } + } + + /// Set digest of inode metadata, RAFS v5 only. + pub fn set_digest(&mut self, digest: RafsDigest) { + self.ensure_owned(); + if let InodeWrapper::V5(i) = self { + i.i_digest = digest; + } + } + + /// Get size of inode name. + pub fn name_size(&self) -> u16 { + match self { + InodeWrapper::V5(i) => i.i_name_size, + InodeWrapper::V6(i) => i.i_name_size, + InodeWrapper::Ref(i) => i.get_name_size(), + } + } + + /// Set size of inode name. + pub fn set_name_size(&mut self, size: usize) { + debug_assert!(size < u16::MAX as usize); + self.ensure_owned(); + match self { + InodeWrapper::V5(i) => i.i_name_size = size as u16, + InodeWrapper::V6(i) => i.i_name_size = size as u16, + InodeWrapper::Ref(_i) => panic!("unexpected"), + } + } + + /// Get size of symlink. + pub fn symlink_size(&self) -> u16 { + match self { + InodeWrapper::V5(i) => i.i_symlink_size, + InodeWrapper::V6(i) => i.i_symlink_size, + InodeWrapper::Ref(i) => i.get_symlink_size(), + } + } + + /// Set size of symlink. + pub fn set_symlink_size(&mut self, size: usize) { + debug_assert!(size <= u16::MAX as usize); + self.ensure_owned(); + match self { + InodeWrapper::V5(i) => { + i.i_flags |= RafsInodeFlags::SYMLINK; + i.i_symlink_size = size as u16; + } + InodeWrapper::V6(i) => { + i.i_flags |= RafsInodeFlags::SYMLINK; + i.i_symlink_size = size as u16; + } + InodeWrapper::Ref(_i) => panic!("unexpected"), + } + } + + /// Get child inode index, only valid for RAFS v5. + pub fn child_index(&self) -> u32 { + match self { + InodeWrapper::V5(i) => i.i_child_index, + InodeWrapper::V6(_i) => u32::MAX, + InodeWrapper::Ref(i) => i.get_child_index().unwrap_or(u32::MAX), + } + } + + /// Set child inode index, only fro RAFS v5. + pub fn set_child_index(&mut self, index: u32) { + self.ensure_owned(); + if let InodeWrapper::V5(i) = self { + i.i_child_index = index; + } + } + + /// Get child/chunk count. + pub fn child_count(&self) -> u32 { + match self { + InodeWrapper::V5(i) => i.i_child_count, + InodeWrapper::V6(i) => i.i_child_count, + InodeWrapper::Ref(i) => i.get_child_count(), + } + } + + /// Set child/chunk count. + pub fn set_child_count(&mut self, count: u32) { + self.ensure_owned(); + match self { + InodeWrapper::V5(i) => i.i_child_count = count, + InodeWrapper::V6(i) => i.i_child_count = count, + InodeWrapper::Ref(_i) => panic!("unexpected"), + } + } + + /// Create a `ChunkWrapper` object to be associated with the inode. + pub fn create_chunk(&self) -> ChunkWrapper { + match self { + InodeWrapper::V5(_) => ChunkWrapper::V5(RafsV5ChunkInfo::new()), + InodeWrapper::V6(_) => ChunkWrapper::V6(RafsV5ChunkInfo::new()), + InodeWrapper::Ref(_i) => unimplemented!(), + } + } + + /// Get memory/disk space occupied by the inode structure, including xattrs. + pub fn get_inode_size_with_xattr(&self, xattrs: &RafsXAttrs, v6_compact: bool) -> usize { + assert!(matches!(self, InodeWrapper::V6(_))); + let inode_size = if v6_compact { + size_of::() + } else { + size_of::() + }; + inode_size + xattrs.aligned_size_v6() + } + + fn ensure_owned(&mut self) { + if let Self::Ref(i) = self { + let i = i.clone(); + if self.is_v6() { + *self = Self::V6(RafsV6Inode::from(i.deref())); + } else { + assert!(self.is_v5()); + *self = Self::V5(RafsV5Inode::from(i.deref())); + } + } + } +} + +#[derive(Clone, Copy, Default, Debug)] +pub struct RafsV6Inode { + /// Artifact inode number set by the nydus image builder. Start from RAFS_ROOT_INODE = 1. + pub i_ino: u64, + pub i_uid: u32, + pub i_gid: u32, + pub i_projid: u32, + pub i_mode: u32, // 64 + pub i_size: u64, + pub i_blocks: u64, + pub i_flags: RafsInodeFlags, + pub i_nlink: u32, + /// for dir, means child count. + /// for regular file, means chunk info count. + pub i_child_count: u32, + /// file name size, [char; i_name_size] + pub i_name_size: u16, + /// symlink path size, [char; i_symlink_size] + pub i_symlink_size: u16, // 104 + // inode device block number, ignored for non-special files + pub i_rdev: u32, + // for alignment reason, we put nsec first + pub i_mtime_nsec: u32, + pub i_mtime: u64, // 120 +} + +impl RafsV6Inode { + /// Create a new instance of `RafsV5Inode`. + pub fn new() -> Self { + Self::default() + } + + /// Set size of the file name. + #[inline] + pub fn set_name_size(&mut self, name_len: usize) { + self.i_name_size = name_len as u16; + } + + /// Mark the inode as a symlink. + #[inline] + pub fn set_symlink_size(&mut self, symlink_len: usize) { + self.i_symlink_size = symlink_len as u16; + } + + /// Get the uid and the gid of the inode. + #[inline] + pub fn uidgid(&self) -> (u32, u32) { + (self.i_uid, self.i_gid) + } + + /// Get the uid and the gid of the inode. + #[inline] + pub fn mtime(&self) -> (u64, u32) { + (self.i_mtime, self.i_mtime_nsec) + } + + /// Get the mode of the inode. + #[inline] + pub fn mode(&self) -> u32 { + self.i_mode + } + + /// Check whether the inode is a directory. + #[inline] + pub fn is_dir(&self) -> bool { + self.i_mode & libc::S_IFMT as u32 == libc::S_IFDIR as u32 + } + + /// Check whether the inode is a symlink. + #[inline] + pub fn is_symlink(&self) -> bool { + self.i_mode & libc::S_IFMT as u32 == libc::S_IFLNK as u32 + } + + /// Check whether the inode is a regular file. + #[inline] + pub fn is_reg(&self) -> bool { + self.i_mode & libc::S_IFMT as u32 == libc::S_IFREG as u32 + } + + /// Check whether the inode is a char device node. + pub fn is_chrdev(&self) -> bool { + self.i_mode & libc::S_IFMT as u32 == libc::S_IFCHR as u32 + } + + /// Check whether the inode is a block device node. + pub fn is_blkdev(&self) -> bool { + self.i_mode & libc::S_IFMT as u32 == libc::S_IFBLK as u32 + } + + /// Check whether the inode is a FIFO. + pub fn is_fifo(&self) -> bool { + self.i_mode & libc::S_IFMT as u32 == libc::S_IFIFO as u32 + } + + /// Check whether the inode is a socket. + pub fn is_sock(&self) -> bool { + self.i_mode & libc::S_IFMT as u32 == libc::S_IFSOCK as u32 + } + + /// Check whether the inode is a hardlink. + #[inline] + pub fn is_hardlink(&self) -> bool { + self.is_reg() && self.i_nlink > 1 + } + + /// Get inode flags + pub fn has_hardlink(&self) -> bool { + self.i_flags.contains(RafsInodeFlags::HARDLINK) + } + + /// Mark the inode as having extended attributes. + #[inline] + pub fn has_xattr(&self) -> bool { + self.i_flags.contains(RafsInodeFlags::XATTR) + } + + /// Mark the inode as having hole chunks. + #[inline] + pub fn has_hole(&self) -> bool { + self.i_flags.contains(RafsInodeFlags::HAS_HOLE) + } +} + +impl From<&dyn RafsInodeExt> for RafsV6Inode { + fn from(inode: &dyn RafsInodeExt) -> Self { + let attr = inode.get_attr(); + RafsV6Inode { + i_ino: attr.ino, + i_uid: attr.uid, + i_gid: attr.gid, + i_projid: inode.projid(), + i_mode: attr.mode, + i_size: attr.size, + i_blocks: attr.blocks, + i_flags: RafsInodeFlags::from_bits_truncate(inode.flags()), + i_nlink: attr.nlink, + i_child_count: inode.get_child_count(), + i_name_size: inode.get_name_size(), + i_symlink_size: inode.get_symlink_size(), + i_rdev: attr.rdev, + i_mtime_nsec: attr.mtimensec, + i_mtime: attr.mtime, + } + } +} + +bitflags! { + /// Rafs v5 inode flags. + pub struct RafsInodeFlags: u64 { + /// Inode is a symlink. + const SYMLINK = 0x0000_0001; + /// Inode has hardlinks. + const HARDLINK = 0x0000_0002; + /// Inode has extended attributes. + const XATTR = 0x0000_0004; + /// Inode chunks has holes. + const HAS_HOLE = 0x0000_0008; + } +} + +impl Default for RafsInodeFlags { + fn default() -> Self { + RafsInodeFlags::empty() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{ + metadata::{direct_v5::DirectSuperBlockV5, RafsSuperMeta}, + mock::MockInode, + }; + + #[test] + fn test_inode_wrapper() { + let mut wrapper_v5 = InodeWrapper::new(RafsVersion::V5); + let mut wrapper_v6 = InodeWrapper::new(RafsVersion::V6); + let mut wrapper_cache_v5 = + InodeWrapper::from_inode_info(Arc::new(CachedInodeV5::default())); + let wrapper_ondisk_v5 = InodeWrapper::from_inode_info(Arc::new(OndiskInodeWrapperV5 { + mapping: DirectSuperBlockV5::new(&RafsSuperMeta::default(), false), + offset: 0, + })); + + assert!(wrapper_v5.is_v5()); + assert!(!wrapper_v6.is_v5()); + assert!(wrapper_cache_v5.is_v5()); + assert!(wrapper_ondisk_v5.is_v5()); + assert!(!wrapper_v5.is_v6()); + assert!(wrapper_v6.is_v6()); + assert!(!wrapper_cache_v5.is_v6()); + assert!(!wrapper_ondisk_v5.is_v6()); + assert_eq!(wrapper_v5.inode_size(), 128); + + wrapper_v5.set_mode(0x0000_0001); + wrapper_v6.set_mode(0x0000_0002); + assert_eq!(wrapper_v5.mode(), 0x0000_0001); + assert_eq!(wrapper_v6.mode(), 0x0000_0002); + + assert!(!wrapper_v5.is_hardlink()); + assert!(!wrapper_v6.is_hardlink()); + assert!(!wrapper_cache_v5.is_hardlink()); + assert!(!wrapper_v5.is_symlink()); + assert!(!wrapper_v6.is_symlink()); + assert!(!wrapper_cache_v5.is_symlink()); + assert!(!wrapper_v5.is_chrdev()); + assert!(!wrapper_v6.is_chrdev()); + assert!(!wrapper_cache_v5.is_chrdev()); + assert!(!wrapper_v5.is_blkdev()); + assert!(!wrapper_v6.is_blkdev()); + assert!(!wrapper_v5.is_fifo()); + assert!(!wrapper_v6.is_fifo()); + assert!(!wrapper_v5.is_sock()); + assert!(!wrapper_v6.is_sock()); + assert!(!wrapper_cache_v5.is_sock()); + assert!(!wrapper_v5.has_hardlink()); + assert!(!wrapper_v6.has_hardlink()); + wrapper_v5.set_has_hardlink(true); + wrapper_v6.set_has_hardlink(true); + assert!(wrapper_v5.has_hardlink()); + assert!(wrapper_v6.has_hardlink()); + wrapper_v5.set_has_hardlink(false); + wrapper_v6.set_has_hardlink(false); + assert!(!wrapper_v5.has_hardlink()); + assert!(!wrapper_v6.has_hardlink()); + assert!(!wrapper_v5.has_xattr()); + assert!(!wrapper_v6.has_xattr()); + assert!(!wrapper_cache_v5.has_xattr()); + wrapper_v5.set_has_xattr(true); + wrapper_v6.set_has_xattr(true); + assert!(wrapper_v5.has_xattr()); + assert!(wrapper_v6.has_xattr()); + wrapper_v5.set_has_xattr(false); + wrapper_v6.set_has_xattr(false); + assert!(!wrapper_v5.has_xattr()); + assert!(!wrapper_v6.has_xattr()); + wrapper_v5.set_ino(0x0000_0001); + wrapper_v6.set_ino(0x0000_0002); + assert_eq!(wrapper_v5.ino(), 0x0000_0001); + assert_eq!(wrapper_v6.ino(), 0x0000_0002); + wrapper_v5.set_parent(0x0000_0004); + assert_eq!(wrapper_v5.parent(), 0x0000_0004); + assert_eq!(wrapper_cache_v5.size(), 0); + wrapper_v5.set_uid(0x0000_0001); + wrapper_v6.set_uid(0x0000_0002); + assert_eq!(wrapper_v5.uid(), 0x0000_0001); + assert_eq!(wrapper_v6.uid(), 0x0000_0002); + wrapper_v5.set_gid(0x0000_0001); + wrapper_v6.set_gid(0x0000_0002); + assert_eq!(wrapper_v5.gid(), 0x0000_0001); + assert_eq!(wrapper_v6.gid(), 0x0000_0002); + wrapper_v5.set_mtime(0x0000_0004); + wrapper_v6.set_mtime(0x0000_0008); + assert_eq!(wrapper_v5.mtime(), 0x0000_0004); + assert_eq!(wrapper_v6.mtime(), 0x0000_0008); + assert_eq!(wrapper_cache_v5.mtime(), 0x0000_0000); + wrapper_v5.set_mtime_nsec(0x0000_0004); + wrapper_v6.set_mtime_nsec(0x0000_0008); + assert_eq!(wrapper_v5.mtime_nsec(), 0x0000_0004); + assert_eq!(wrapper_v6.mtime_nsec(), 0x0000_0008); + assert_eq!(wrapper_cache_v5.mtime_nsec(), 0x0000_0000); + wrapper_v5.set_blocks(0x0000_0010); + wrapper_v6.set_blocks(0x0000_0020); + assert_eq!(wrapper_v5.blocks(), 0x0000_0010); + assert_eq!(wrapper_v6.blocks(), 0x0000_0020); + assert_eq!(wrapper_cache_v5.blocks(), 0x0000_0000); + wrapper_v5.set_rdev(0x0000_0010); + wrapper_v6.set_rdev(0x0000_0020); + assert_eq!(wrapper_v5.rdev(), 0x0000_0010); + assert_eq!(wrapper_v6.rdev(), 0x0000_0020); + assert_eq!(wrapper_cache_v5.rdev(), 0x0000_0000); + wrapper_v5.set_projid(0x0000_0100); + wrapper_v6.set_projid(0x0000_0200); + wrapper_v5.set_nlink(0x0000_0010); + wrapper_v6.set_nlink(0x0000_0020); + assert_eq!(wrapper_v5.nlink(), 0x0000_0010); + assert_eq!(wrapper_v6.nlink(), 0x0000_0020); + assert_eq!(wrapper_cache_v5.nlink(), 0x0000_0000); + wrapper_v5.set_name_size(0x0000_0010); + wrapper_v6.set_name_size(0x0000_0020); + assert_eq!(wrapper_v5.name_size(), 0x0000_0010); + assert_eq!(wrapper_v6.name_size(), 0x0000_0020); + assert_eq!(wrapper_cache_v5.name_size(), 0x0000_0000); + wrapper_v5.set_symlink_size(0x0000_0010); + wrapper_v6.set_symlink_size(0x0000_0020); + assert_eq!(wrapper_v5.symlink_size(), 0x0000_0010); + assert_eq!(wrapper_v6.symlink_size(), 0x0000_0020); + assert_eq!(wrapper_cache_v5.symlink_size(), 0x0000_0000); + wrapper_v5.set_child_index(0x0000_0010); + wrapper_v6.set_child_index(0x0000_0020); + wrapper_cache_v5.set_child_index(0x0000_0008); + assert_eq!(wrapper_v5.child_index(), 0x0000_0010); + assert_eq!(wrapper_v6.child_index(), u32::MAX); + assert_eq!(wrapper_cache_v5.child_index(), 0x0000_0008); + wrapper_v5.set_child_count(0x0000_0010); + wrapper_v6.set_child_count(0x0000_0020); + assert_eq!(wrapper_v5.child_count(), 0x0000_0010); + assert_eq!(wrapper_v6.child_count(), 0x0000_0020); + assert_eq!(wrapper_cache_v5.child_count(), 0x0000_0000); + wrapper_v5.create_chunk(); + wrapper_v6.create_chunk(); + } + + #[test] + #[should_panic] + fn test_inode_size_v6() { + let wrapper_v6 = InodeWrapper::new(RafsVersion::V6); + wrapper_v6.inode_size(); + } + + #[test] + #[should_panic] + fn test_inode_size_ref() { + let wrapper_cache_v5 = InodeWrapper::from_inode_info(Arc::new(CachedInodeV5::default())); + wrapper_cache_v5.inode_size(); + } + + #[test] + #[should_panic] + fn test_set_mode_ref() { + let mut wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); + wrapper_mock.set_mode(0x0000_0001); + } + + #[test] + #[should_panic] + fn test_is_blk_dev_ref() { + let wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); + wrapper_mock.is_blkdev(); + } + + #[test] + #[should_panic] + fn test_is_fifo_ref() { + let wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); + wrapper_mock.is_fifo(); + } + + #[test] + #[should_panic] + fn test_has_hardlink_ref() { + let wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); + wrapper_mock.has_hardlink(); + } + + #[test] + #[should_panic] + fn test_set_has_hardlink_ref() { + let mut wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); + wrapper_mock.set_has_hardlink(true); + } + + #[test] + #[should_panic] + fn test_set_has_xattr_ref() { + let mut wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); + wrapper_mock.set_has_xattr(true); + } + + #[test] + #[should_panic] + fn test_set_ino_ref() { + let mut wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); + wrapper_mock.set_ino(Inode::default()); + } + + #[test] + #[should_panic] + fn test_set_parent_v6() { + let mut wrapper_v6 = InodeWrapper::new(RafsVersion::V6); + wrapper_v6.set_parent(Inode::default()); + } + + #[test] + #[should_panic] + fn test_set_parent_ref() { + let mut wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); + wrapper_mock.set_parent(Inode::default()); + } + + #[test] + #[should_panic] + fn test_get_parent_v6() { + let wrapper_v6 = InodeWrapper::new(RafsVersion::V6); + wrapper_v6.parent(); + } + + #[test] + #[should_panic] + fn test_get_parent_ref() { + let wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); + wrapper_mock.parent(); + } + + #[test] + #[should_panic] + fn test_set_size_ref() { + let mut wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); + wrapper_mock.set_size(0x0000_0001); + } + + #[test] + #[should_panic] + fn test_set_uid_ref() { + let mut wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); + wrapper_mock.set_uid(0x0000_0000_0001); + } + + #[test] + #[should_panic] + fn test_set_gid_ref() { + let mut wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); + wrapper_mock.set_gid(0x0000_0001); + } + + #[test] + #[should_panic] + fn test_set_mtime_ref() { + let mut wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); + wrapper_mock.set_mtime(0x0000_0001); + } + + #[test] + #[should_panic] + fn test_set_mtime_nsec_ref() { + let mut wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); + wrapper_mock.set_mtime_nsec(0x0000_0001); + } + + #[test] + #[should_panic] + fn test_set_blocks_ref() { + let mut wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); + wrapper_mock.set_blocks(0x0000_0001); + } + + #[test] + #[should_panic] + fn test_set_rdev_ref() { + let mut wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); + wrapper_mock.set_rdev(0x0000_0001); + } + + #[test] + #[should_panic] + fn test_set_projid_ref() { + let mut wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); + wrapper_mock.set_projid(0x0000_0001); + } + + #[test] + #[should_panic] + fn test_set_digest_ref() { + let mut wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); + wrapper_mock.set_digest(RafsDigest::default()); + } + + #[test] + #[should_panic] + fn test_get_digest_v6() { + let wrapper_v6 = InodeWrapper::new(RafsVersion::V6); + wrapper_v6.digest(); + } + + #[test] + #[should_panic] + fn test_set_namesize_ref() { + let mut wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); + wrapper_mock.set_name_size(0x0000_0000); + } + + #[test] + #[should_panic] + fn test_set_symlink_size_ref() { + let mut wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); + wrapper_mock.set_symlink_size(0x0000_0000); + } + + #[test] + #[should_panic] + fn test_set_child_count_ref() { + let mut wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); + wrapper_mock.set_child_count(0x0000_0000); + } + + #[test] + #[should_panic] + fn test_create_chunk_ref() { + let wrapper_mock = InodeWrapper::from_inode_info(Arc::new(MockInode::default())); + wrapper_mock.create_chunk(); + } + + #[test] + fn test_rafs_v6_inode() { + let mut inode = RafsV6Inode { + i_ino: 0x0000_0000, + i_uid: 0x0000_0001, + i_gid: 0x0000_0002, + i_projid: 0x0000_0003, + i_mode: 0x0000_0000, + i_size: 0x0000_0005, + i_blocks: 0x0000_0006, + i_flags: RafsInodeFlags::default(), + i_nlink: 0x0000_0007, + i_child_count: 0x0000_0008, + i_name_size: 0x0000_0010, + i_symlink_size: 0x0000_0011, + i_rdev: 0x0000_0012, + i_mtime_nsec: 0x0000_0013, + i_mtime: 0x0000_0014, + }; + + inode.set_name_size(0x0000_0001); + inode.set_symlink_size(0x0000_0002); + + assert_eq!(inode.i_name_size, 0x0000_0001); + assert_eq!(inode.i_symlink_size, 0x0000_0002); + assert_eq!(inode.uidgid(), (0x0000_0001, 0x0000_0002)); + assert_eq!(inode.mtime(), (0x0000_0014 as u64, 0x0000_0013)); + assert_eq!(inode.mode(), 0x0000_0000); + assert!(!inode.is_chrdev()); + assert!(!inode.is_blkdev()); + assert!(!inode.is_fifo()); + assert!(!inode.is_sock()); + assert!(!inode.is_hardlink()); + assert!(!inode.has_hardlink()); + assert!(!inode.has_xattr()); + assert!(!inode.has_hole()); + } +} diff --git a/rafs/src/metadata/layout/mod.rs b/rafs/src/metadata/layout/mod.rs index 8581e89578b..dfc67dbbf7e 100644 --- a/rafs/src/metadata/layout/mod.rs +++ b/rafs/src/metadata/layout/mod.rs @@ -1,438 +1,438 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2020-2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Rafs filesystem metadata layout and data structures. - -use std::collections::HashMap; -use std::convert::TryInto; -use std::ffi::{OsStr, OsString}; -use std::fmt::{Debug, Formatter}; -use std::io::Result; -use std::mem::size_of; -use std::os::unix::ffi::OsStrExt; - -use fuse_backend_rs::abi::fuse_abi::ROOT_ID; -use nydus_utils::ByteSize; - -use crate::metadata::layout::v5::RAFSV5_ALIGNMENT; - -/// Version number for Rafs v4. -pub const RAFS_SUPER_VERSION_V4: u32 = 0x400; -/// Version number for Rafs v5. -pub const RAFS_SUPER_VERSION_V5: u32 = 0x500; -/// Version number for Rafs v6. -pub const RAFS_SUPER_VERSION_V6: u32 = 0x600; -/// Minimal version of Rafs supported. -pub const RAFS_SUPER_MIN_VERSION: u32 = RAFS_SUPER_VERSION_V4; - -/// Inode number for Rafs root inode. -pub const RAFS_V5_ROOT_INODE: u64 = ROOT_ID; - -/// Type for filesystem xattr attribute key. -pub type XattrName = Vec; -/// Type for filesystem xattr attribute value. -pub type XattrValue = Vec; - -pub mod v5; -pub mod v6; - -pub enum RafsBlobTable { - V5(v5::RafsV5BlobTable), - V6(v6::RafsV6BlobTable), -} - -#[doc(hidden)] -#[macro_export] -macro_rules! impl_bootstrap_converter { - ($T: ty) => { - impl TryFrom<&[u8]> for &$T { - type Error = std::io::Error; - - fn try_from(buf: &[u8]) -> std::result::Result { - let ptr = buf.as_ptr() as *const u8; - if buf.len() != size_of::<$T>() - || ptr as usize & (std::mem::align_of::<$T>() - 1) != 0 - { - return Err(einval!("convert failed")); - } - - Ok(unsafe { &*(ptr as *const $T) }) - } - } - - impl TryFrom<&mut [u8]> for &mut $T { - type Error = std::io::Error; - - fn try_from(buf: &mut [u8]) -> std::result::Result { - let ptr = buf.as_ptr() as *mut u8 as *const u8; - if buf.len() != size_of::<$T>() - || ptr as usize & (std::mem::align_of::<$T>() - 1) != 0 - { - return Err(einval!("convert failed")); - } - - Ok(unsafe { &mut *(ptr as *const $T as *mut $T) }) - } - } - - impl AsRef<[u8]> for $T { - #[inline] - fn as_ref(&self) -> &[u8] { - let ptr = self as *const $T as *const u8; - unsafe { std::slice::from_raw_parts(ptr, size_of::<$T>()) } - } - } - - impl AsMut<[u8]> for $T { - #[inline] - fn as_mut(&mut self) -> &mut [u8] { - let ptr = self as *mut $T as *mut u8; - unsafe { std::slice::from_raw_parts_mut(ptr, size_of::<$T>()) } - } - } - }; -} - -#[doc(hidden)] -#[macro_export] -macro_rules! impl_pub_getter_setter { - ($G: ident, $S: ident, $F: ident, $U: ty) => { - #[inline] - pub fn $G(&self) -> $U { - <$U>::from_le(self.$F) - } - - #[inline] - pub fn $S(&mut self, $F: $U) { - self.$F = <$U>::to_le($F); - } - }; -} - -/// Parse a utf8 byte slice into two strings. -pub fn parse_string(buf: &[u8]) -> Result<(&str, &str)> { - std::str::from_utf8(buf) - .map(|origin| { - if let Some(pos) = origin.find('\0') { - let (a, b) = origin.split_at(pos); - (a, &b[1..]) - } else { - (origin, "") - } - }) - .map_err(|e| einval!(format!("failed in parsing string, {:?}", e))) -} - -/// Convert a byte slice into OsStr. -pub fn bytes_to_os_str(buf: &[u8]) -> &OsStr { - OsStr::from_bytes(buf) -} - -/// Parse a byte slice into xattr pairs and invoke the callback for each xattr pair. -/// -/// The iteration breaks if the callback returns false. -pub fn parse_xattr(data: &[u8], size: usize, mut cb: F) -> Result<()> -where - F: FnMut(&OsStr, XattrValue) -> bool, -{ - if data.len() < size { - return Err(einval!("invalid xattr content size")); - } - - let mut rest_data = &data[0..size]; - let mut i: usize = 0; - - while i < size { - if rest_data.len() < size_of::() { - return Err(einval!( - "invalid xattr content, no enough data for xattr pair size" - )); - } - - let (pair_size, rest) = rest_data.split_at(size_of::()); - let pair_size = u32::from_le_bytes( - pair_size - .try_into() - .map_err(|_| einval!("failed to parse xattr pair size"))?, - ) as usize; - i += size_of::(); - - if rest.len() < pair_size { - return Err(einval!( - "inconsistent xattr (size, data) pair, size is too big" - )); - } - - let (pair, rest) = rest.split_at(pair_size); - if let Some(pos) = pair.iter().position(|&c| c == 0) { - let (name, value) = pair.split_at(pos); - let name = OsStr::from_bytes(name); - let value = value[1..].to_vec(); - if !cb(name, value) { - break; - } - } - - i += pair_size; - rest_data = rest; - } - - Ok(()) -} - -/// Parse a byte slice into xattr name list. -pub fn parse_xattr_names(data: &[u8], size: usize) -> Result> { - let mut result = Vec::new(); - - parse_xattr(data, size, |name, _| { - result.push(name.as_bytes().to_vec()); - true - })?; - - Ok(result) -} - -/// Parse a 'buf' to xattr value by xattr name. -pub fn parse_xattr_value(data: &[u8], size: usize, name: &OsStr) -> Result> { - let mut value = None; - - parse_xattr(data, size, |_name, _value| { - if _name == name { - value = Some(_value); - // stop the iteration if we found the xattr name. - return false; - } - true - })?; - - Ok(value) -} - -/// Valid prefixes of extended attributes -/// -/// Please keep in consistence with `RAFSV6_XATTR_TYPES`. -pub const RAFS_XATTR_PREFIXES: [&str; 5] = [ - "user.", - "security.", - "trusted.", - "system.posix_acl_access", - "system.posix_acl_default", -]; - -/// Rafs inode extended attributes. -/// -/// An extended attribute is a (String, String) pair associated with a inode. -#[derive(Clone, Default)] -pub struct RafsXAttrs { - pairs: HashMap, -} - -impl Debug for RafsXAttrs { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!(f, "extended attributes[...]") - } -} - -impl RafsXAttrs { - /// Create a new instance of `RafsXattrs`. - pub fn new() -> Self { - Self { - pairs: HashMap::new(), - } - } - - /// Get size needed to store the extended attributes. - pub fn size(&self) -> usize { - let mut size: usize = 0; - - for (key, value) in self.pairs.iter() { - size += size_of::(); - size += key.byte_size() + 1 + value.len(); - } - - size - } - - /// Get extended attribute with key `name`. - pub fn get(&self, name: &OsStr) -> Option<&XattrValue> { - self.pairs.get(name) - } - - /// Add or update an extended attribute. - pub fn add(&mut self, name: OsString, value: XattrValue) -> Result<()> { - let buf = name.as_bytes(); - if buf.len() > 255 || value.len() > 0x10000 { - return Err(einval!("xattr key/value is too big")); - } - for p in RAFS_XATTR_PREFIXES { - if buf.len() >= p.as_bytes().len() && &buf[..p.as_bytes().len()] == p.as_bytes() { - self.pairs.insert(name, value); - return Ok(()); - } - } - Err(einval!("invalid xattr key")) - } - - /// Remove an extended attribute - pub fn remove(&mut self, name: &OsStr) { - self.pairs.remove(name); - } - - /// Check whether there's any extended attribute. - pub fn is_empty(&self) -> bool { - self.pairs.is_empty() - } -} - -pub(crate) struct MetaRange { - start: u64, - size: u64, -} - -impl MetaRange { - pub fn new(start: u64, size: u64, aligned_size: bool) -> std::io::Result { - let mask = RAFSV5_ALIGNMENT as u64 - 1; - if start & mask == 0 - && (!aligned_size || size & mask == 0) - && start.checked_add(size).is_some() - { - Ok(MetaRange { start, size }) - } else { - Err(einval!(format!( - "invalid metadata range {}:{}", - start, size - ))) - } - } - - #[allow(dead_code)] - pub fn start(&self) -> u64 { - self.start - } - - #[allow(dead_code)] - pub fn size(&self) -> u64 { - self.size - } - - pub fn end(&self) -> u64 { - self.start + self.size - } - - pub fn is_subrange_of(&self, other: &MetaRange) -> bool { - self.start >= other.start && self.end() <= other.end() - } - - pub fn intersect_with(&self, other: &MetaRange) -> bool { - self.start < other.end() && self.end() > other.start - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::convert::TryFrom; - use std::ffi::OsString; - use vm_memory::ByteValued; - - #[repr(transparent)] - struct MockU32 { - v: u32, - } - - impl_bootstrap_converter!(MockU32); - - #[test] - fn test_bootstrap_convert() { - let mut value = 0x504030201u64; - let buf = value.as_mut_slice(); - - let v: std::io::Result<&MockU32> = (&buf[1..5]).try_into(); - assert!(v.is_err()); - - let v: std::io::Result<&MockU32> = (&buf[0..3]).try_into(); - assert!(v.is_err()); - - let v: std::io::Result<&mut MockU32> = (&mut buf[0..4]).try_into(); - let v = v.unwrap(); - assert_eq!(v.v, 0x4030201); - assert_eq!(v.as_mut().len(), 4); - assert_eq!(v.as_ref(), &[0x1u8, 0x2u8, 0x3u8, 0x4u8]); - } - - #[test] - fn test_parse_string() { - let (str1, str2) = parse_string(&[b'a']).unwrap(); - assert_eq!(str1, "a"); - assert_eq!(str2, ""); - - let (str1, str2) = parse_string(&[b'a', 0]).unwrap(); - assert_eq!(str1, "a"); - assert_eq!(str2, ""); - - let (str1, str2) = parse_string(&[b'a', 0, b'b']).unwrap(); - assert_eq!(str1, "a"); - assert_eq!(str2, "b"); - - let (str1, str2) = parse_string(&[b'a', 0, b'b', 0]).unwrap(); - assert_eq!(str1, "a"); - assert_eq!(str2, "b\0"); - - parse_string(&[0xffu8, 0xffu8, 0xffu8, 0xffu8, 0xffu8]).unwrap_err(); - } - - #[test] - fn test_parse_xattrs() { - let buf = [0x4u8, 0x0, 0x0, 0x0, b'a', 0, b'b']; - parse_xattr_names(&buf, 3).unwrap_err(); - parse_xattr_names(&buf, 8).unwrap_err(); - parse_xattr_names(&buf, 7).unwrap_err(); - - let buf = [0x3u8, 0x0, 0x0, 0x0, b'a', 0, b'b']; - let names = parse_xattr_names(&buf, 7).unwrap(); - assert_eq!(names.len(), 1); - assert_eq!(names[0], &[b'a']); - - let value = parse_xattr_value(&buf, 7, &OsString::from("a")).unwrap(); - assert_eq!(value, Some(vec![b'b'])); - } - - #[test] - fn test_meta_range() { - assert!(MetaRange::new(u64::MAX, 1, true).is_err()); - assert!(MetaRange::new(u64::MAX, 1, true).is_err()); - assert!(MetaRange::new(1, 1, true).is_err()); - assert!(MetaRange::new(8, 0, true).is_ok()); - assert!(MetaRange::new(8, 1, true).is_err()); - assert_eq!(MetaRange::new(8, 8, true).unwrap().start(), 8); - assert_eq!(MetaRange::new(8, 8, true).unwrap().size(), 8); - assert_eq!(MetaRange::new(8, 8, true).unwrap().end(), 16); - - let range = MetaRange::new(16, 16, true).unwrap(); - - assert!(!MetaRange::new(0, 8, true).unwrap().is_subrange_of(&range)); - assert!(!MetaRange::new(0, 16, true).unwrap().is_subrange_of(&range)); - assert!(!MetaRange::new(8, 8, true).unwrap().is_subrange_of(&range)); - assert!(!MetaRange::new(8, 16, true).unwrap().is_subrange_of(&range)); - assert!(!MetaRange::new(8, 24, true).unwrap().is_subrange_of(&range)); - assert!(MetaRange::new(16, 8, true).unwrap().is_subrange_of(&range)); - assert!(MetaRange::new(16, 16, true).unwrap().is_subrange_of(&range)); - assert!(MetaRange::new(24, 8, true).unwrap().is_subrange_of(&range)); - assert!(!MetaRange::new(24, 16, true).unwrap().is_subrange_of(&range)); - assert!(!MetaRange::new(32, 8, true).unwrap().is_subrange_of(&range)); - - assert!(!MetaRange::new(0, 8, true).unwrap().intersect_with(&range)); - assert!(!MetaRange::new(0, 16, true).unwrap().intersect_with(&range)); - assert!(MetaRange::new(0, 24, true).unwrap().intersect_with(&range)); - assert!(MetaRange::new(8, 16, true).unwrap().intersect_with(&range)); - assert!(!MetaRange::new(8, 8, true).unwrap().intersect_with(&range)); - assert!(MetaRange::new(16, 8, true).unwrap().intersect_with(&range)); - assert!(MetaRange::new(16, 16, true).unwrap().intersect_with(&range)); - assert!(MetaRange::new(16, 24, true).unwrap().intersect_with(&range)); - assert!(MetaRange::new(24, 8, true).unwrap().intersect_with(&range)); - assert!(MetaRange::new(24, 16, true).unwrap().intersect_with(&range)); - assert!(!MetaRange::new(32, 8, true).unwrap().intersect_with(&range)); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2020-2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Rafs filesystem metadata layout and data structures. + +use std::collections::HashMap; +use std::convert::TryInto; +use std::ffi::{OsStr, OsString}; +use std::fmt::{Debug, Formatter}; +use std::io::Result; +use std::mem::size_of; +use std::os::unix::ffi::OsStrExt; + +use fuse_backend_rs::abi::fuse_abi::ROOT_ID; +use nydus_utils::ByteSize; + +use crate::metadata::layout::v5::RAFSV5_ALIGNMENT; + +/// Version number for Rafs v4. +pub const RAFS_SUPER_VERSION_V4: u32 = 0x400; +/// Version number for Rafs v5. +pub const RAFS_SUPER_VERSION_V5: u32 = 0x500; +/// Version number for Rafs v6. +pub const RAFS_SUPER_VERSION_V6: u32 = 0x600; +/// Minimal version of Rafs supported. +pub const RAFS_SUPER_MIN_VERSION: u32 = RAFS_SUPER_VERSION_V4; + +/// Inode number for Rafs root inode. +pub const RAFS_V5_ROOT_INODE: u64 = ROOT_ID; + +/// Type for filesystem xattr attribute key. +pub type XattrName = Vec; +/// Type for filesystem xattr attribute value. +pub type XattrValue = Vec; + +pub mod v5; +pub mod v6; + +pub enum RafsBlobTable { + V5(v5::RafsV5BlobTable), + V6(v6::RafsV6BlobTable), +} + +#[doc(hidden)] +#[macro_export] +macro_rules! impl_bootstrap_converter { + ($T: ty) => { + impl TryFrom<&[u8]> for &$T { + type Error = std::io::Error; + + fn try_from(buf: &[u8]) -> std::result::Result { + let ptr = buf.as_ptr() as *const u8; + if buf.len() != size_of::<$T>() + || ptr as usize & (std::mem::align_of::<$T>() - 1) != 0 + { + return Err(einval!("convert failed")); + } + + Ok(unsafe { &*(ptr as *const $T) }) + } + } + + impl TryFrom<&mut [u8]> for &mut $T { + type Error = std::io::Error; + + fn try_from(buf: &mut [u8]) -> std::result::Result { + let ptr = buf.as_ptr() as *mut u8 as *const u8; + if buf.len() != size_of::<$T>() + || ptr as usize & (std::mem::align_of::<$T>() - 1) != 0 + { + return Err(einval!("convert failed")); + } + + Ok(unsafe { &mut *(ptr as *const $T as *mut $T) }) + } + } + + impl AsRef<[u8]> for $T { + #[inline] + fn as_ref(&self) -> &[u8] { + let ptr = self as *const $T as *const u8; + unsafe { std::slice::from_raw_parts(ptr, size_of::<$T>()) } + } + } + + impl AsMut<[u8]> for $T { + #[inline] + fn as_mut(&mut self) -> &mut [u8] { + let ptr = self as *mut $T as *mut u8; + unsafe { std::slice::from_raw_parts_mut(ptr, size_of::<$T>()) } + } + } + }; +} + +#[doc(hidden)] +#[macro_export] +macro_rules! impl_pub_getter_setter { + ($G: ident, $S: ident, $F: ident, $U: ty) => { + #[inline] + pub fn $G(&self) -> $U { + <$U>::from_le(self.$F) + } + + #[inline] + pub fn $S(&mut self, $F: $U) { + self.$F = <$U>::to_le($F); + } + }; +} + +/// Parse a utf8 byte slice into two strings. +pub fn parse_string(buf: &[u8]) -> Result<(&str, &str)> { + std::str::from_utf8(buf) + .map(|origin| { + if let Some(pos) = origin.find('\0') { + let (a, b) = origin.split_at(pos); + (a, &b[1..]) + } else { + (origin, "") + } + }) + .map_err(|e| einval!(format!("failed in parsing string, {:?}", e))) +} + +/// Convert a byte slice into OsStr. +pub fn bytes_to_os_str(buf: &[u8]) -> &OsStr { + OsStr::from_bytes(buf) +} + +/// Parse a byte slice into xattr pairs and invoke the callback for each xattr pair. +/// +/// The iteration breaks if the callback returns false. +pub fn parse_xattr(data: &[u8], size: usize, mut cb: F) -> Result<()> +where + F: FnMut(&OsStr, XattrValue) -> bool, +{ + if data.len() < size { + return Err(einval!("invalid xattr content size")); + } + + let mut rest_data = &data[0..size]; + let mut i: usize = 0; + + while i < size { + if rest_data.len() < size_of::() { + return Err(einval!( + "invalid xattr content, no enough data for xattr pair size" + )); + } + + let (pair_size, rest) = rest_data.split_at(size_of::()); + let pair_size = u32::from_le_bytes( + pair_size + .try_into() + .map_err(|_| einval!("failed to parse xattr pair size"))?, + ) as usize; + i += size_of::(); + + if rest.len() < pair_size { + return Err(einval!( + "inconsistent xattr (size, data) pair, size is too big" + )); + } + + let (pair, rest) = rest.split_at(pair_size); + if let Some(pos) = pair.iter().position(|&c| c == 0) { + let (name, value) = pair.split_at(pos); + let name = OsStr::from_bytes(name); + let value = value[1..].to_vec(); + if !cb(name, value) { + break; + } + } + + i += pair_size; + rest_data = rest; + } + + Ok(()) +} + +/// Parse a byte slice into xattr name list. +pub fn parse_xattr_names(data: &[u8], size: usize) -> Result> { + let mut result = Vec::new(); + + parse_xattr(data, size, |name, _| { + result.push(name.as_bytes().to_vec()); + true + })?; + + Ok(result) +} + +/// Parse a 'buf' to xattr value by xattr name. +pub fn parse_xattr_value(data: &[u8], size: usize, name: &OsStr) -> Result> { + let mut value = None; + + parse_xattr(data, size, |_name, _value| { + if _name == name { + value = Some(_value); + // stop the iteration if we found the xattr name. + return false; + } + true + })?; + + Ok(value) +} + +/// Valid prefixes of extended attributes +/// +/// Please keep in consistence with `RAFSV6_XATTR_TYPES`. +pub const RAFS_XATTR_PREFIXES: [&str; 5] = [ + "user.", + "security.", + "trusted.", + "system.posix_acl_access", + "system.posix_acl_default", +]; + +/// Rafs inode extended attributes. +/// +/// An extended attribute is a (String, String) pair associated with a inode. +#[derive(Clone, Default)] +pub struct RafsXAttrs { + pairs: HashMap, +} + +impl Debug for RafsXAttrs { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "extended attributes[...]") + } +} + +impl RafsXAttrs { + /// Create a new instance of `RafsXattrs`. + pub fn new() -> Self { + Self { + pairs: HashMap::new(), + } + } + + /// Get size needed to store the extended attributes. + pub fn size(&self) -> usize { + let mut size: usize = 0; + + for (key, value) in self.pairs.iter() { + size += size_of::(); + size += key.byte_size() + 1 + value.len(); + } + + size + } + + /// Get extended attribute with key `name`. + pub fn get(&self, name: &OsStr) -> Option<&XattrValue> { + self.pairs.get(name) + } + + /// Add or update an extended attribute. + pub fn add(&mut self, name: OsString, value: XattrValue) -> Result<()> { + let buf = name.as_bytes(); + if buf.len() > 255 || value.len() > 0x10000 { + return Err(einval!("xattr key/value is too big")); + } + for p in RAFS_XATTR_PREFIXES { + if buf.len() >= p.as_bytes().len() && &buf[..p.as_bytes().len()] == p.as_bytes() { + self.pairs.insert(name, value); + return Ok(()); + } + } + Err(einval!("invalid xattr key")) + } + + /// Remove an extended attribute + pub fn remove(&mut self, name: &OsStr) { + self.pairs.remove(name); + } + + /// Check whether there's any extended attribute. + pub fn is_empty(&self) -> bool { + self.pairs.is_empty() + } +} + +pub(crate) struct MetaRange { + start: u64, + size: u64, +} + +impl MetaRange { + pub fn new(start: u64, size: u64, aligned_size: bool) -> std::io::Result { + let mask = RAFSV5_ALIGNMENT as u64 - 1; + if start & mask == 0 + && (!aligned_size || size & mask == 0) + && start.checked_add(size).is_some() + { + Ok(MetaRange { start, size }) + } else { + Err(einval!(format!( + "invalid metadata range {}:{}", + start, size + ))) + } + } + + #[allow(dead_code)] + pub fn start(&self) -> u64 { + self.start + } + + #[allow(dead_code)] + pub fn size(&self) -> u64 { + self.size + } + + pub fn end(&self) -> u64 { + self.start + self.size + } + + pub fn is_subrange_of(&self, other: &MetaRange) -> bool { + self.start >= other.start && self.end() <= other.end() + } + + pub fn intersect_with(&self, other: &MetaRange) -> bool { + self.start < other.end() && self.end() > other.start + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::convert::TryFrom; + use std::ffi::OsString; + use vm_memory::ByteValued; + + #[repr(transparent)] + struct MockU32 { + v: u32, + } + + impl_bootstrap_converter!(MockU32); + + #[test] + fn test_bootstrap_convert() { + let mut value = 0x504030201u64; + let buf = value.as_mut_slice(); + + let v: std::io::Result<&MockU32> = (&buf[1..5]).try_into(); + assert!(v.is_err()); + + let v: std::io::Result<&MockU32> = (&buf[0..3]).try_into(); + assert!(v.is_err()); + + let v: std::io::Result<&mut MockU32> = (&mut buf[0..4]).try_into(); + let v = v.unwrap(); + assert_eq!(v.v, 0x4030201); + assert_eq!(v.as_mut().len(), 4); + assert_eq!(v.as_ref(), &[0x1u8, 0x2u8, 0x3u8, 0x4u8]); + } + + #[test] + fn test_parse_string() { + let (str1, str2) = parse_string(&[b'a']).unwrap(); + assert_eq!(str1, "a"); + assert_eq!(str2, ""); + + let (str1, str2) = parse_string(&[b'a', 0]).unwrap(); + assert_eq!(str1, "a"); + assert_eq!(str2, ""); + + let (str1, str2) = parse_string(&[b'a', 0, b'b']).unwrap(); + assert_eq!(str1, "a"); + assert_eq!(str2, "b"); + + let (str1, str2) = parse_string(&[b'a', 0, b'b', 0]).unwrap(); + assert_eq!(str1, "a"); + assert_eq!(str2, "b\0"); + + parse_string(&[0xffu8, 0xffu8, 0xffu8, 0xffu8, 0xffu8]).unwrap_err(); + } + + #[test] + fn test_parse_xattrs() { + let buf = [0x4u8, 0x0, 0x0, 0x0, b'a', 0, b'b']; + parse_xattr_names(&buf, 3).unwrap_err(); + parse_xattr_names(&buf, 8).unwrap_err(); + parse_xattr_names(&buf, 7).unwrap_err(); + + let buf = [0x3u8, 0x0, 0x0, 0x0, b'a', 0, b'b']; + let names = parse_xattr_names(&buf, 7).unwrap(); + assert_eq!(names.len(), 1); + assert_eq!(names[0], &[b'a']); + + let value = parse_xattr_value(&buf, 7, &OsString::from("a")).unwrap(); + assert_eq!(value, Some(vec![b'b'])); + } + + #[test] + fn test_meta_range() { + assert!(MetaRange::new(u64::MAX, 1, true).is_err()); + assert!(MetaRange::new(u64::MAX, 1, true).is_err()); + assert!(MetaRange::new(1, 1, true).is_err()); + assert!(MetaRange::new(8, 0, true).is_ok()); + assert!(MetaRange::new(8, 1, true).is_err()); + assert_eq!(MetaRange::new(8, 8, true).unwrap().start(), 8); + assert_eq!(MetaRange::new(8, 8, true).unwrap().size(), 8); + assert_eq!(MetaRange::new(8, 8, true).unwrap().end(), 16); + + let range = MetaRange::new(16, 16, true).unwrap(); + + assert!(!MetaRange::new(0, 8, true).unwrap().is_subrange_of(&range)); + assert!(!MetaRange::new(0, 16, true).unwrap().is_subrange_of(&range)); + assert!(!MetaRange::new(8, 8, true).unwrap().is_subrange_of(&range)); + assert!(!MetaRange::new(8, 16, true).unwrap().is_subrange_of(&range)); + assert!(!MetaRange::new(8, 24, true).unwrap().is_subrange_of(&range)); + assert!(MetaRange::new(16, 8, true).unwrap().is_subrange_of(&range)); + assert!(MetaRange::new(16, 16, true).unwrap().is_subrange_of(&range)); + assert!(MetaRange::new(24, 8, true).unwrap().is_subrange_of(&range)); + assert!(!MetaRange::new(24, 16, true).unwrap().is_subrange_of(&range)); + assert!(!MetaRange::new(32, 8, true).unwrap().is_subrange_of(&range)); + + assert!(!MetaRange::new(0, 8, true).unwrap().intersect_with(&range)); + assert!(!MetaRange::new(0, 16, true).unwrap().intersect_with(&range)); + assert!(MetaRange::new(0, 24, true).unwrap().intersect_with(&range)); + assert!(MetaRange::new(8, 16, true).unwrap().intersect_with(&range)); + assert!(!MetaRange::new(8, 8, true).unwrap().intersect_with(&range)); + assert!(MetaRange::new(16, 8, true).unwrap().intersect_with(&range)); + assert!(MetaRange::new(16, 16, true).unwrap().intersect_with(&range)); + assert!(MetaRange::new(16, 24, true).unwrap().intersect_with(&range)); + assert!(MetaRange::new(24, 8, true).unwrap().intersect_with(&range)); + assert!(MetaRange::new(24, 16, true).unwrap().intersect_with(&range)); + assert!(!MetaRange::new(32, 8, true).unwrap().intersect_with(&range)); + } +} diff --git a/rafs/src/metadata/layout/v5.rs b/rafs/src/metadata/layout/v5.rs index 52a4c21a358..dd1de87b73a 100644 --- a/rafs/src/metadata/layout/v5.rs +++ b/rafs/src/metadata/layout/v5.rs @@ -1,1968 +1,1968 @@ -// Copyright 2020-2021 Ant Group. All rights reserved. -// Copyright (C) 2020-2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! RAFS v5 on disk layout structures. -//! -//! # RAFS File System Meta Data Format Version 5 -//! Previously RAFS has different formats for on disk meta data and runtime meta data. So when -//! initializing an RAFS instance, it will sequentially read and parse the on disk meta data, -//! build a copy of in memory runtime meta data. This may cause slow startup and cost too much -//! memory to build in memory meta data. -//! -//! The RAFS File System Meta Data Format Version 5 (aka V5) is defined to support directly mapping -//! RAFS meta data into process as runtime meta data, so we could parse RAFS on disk meta data on -//! demand. The V5 meta data format has following changes: -//! 1) file system version number been bumped to 0x500. -//! 2) Directory inodes will sequentially assign globally unique `child index` to it's child inodes. -//! Two fields, "child_index" and "child_count", have been added to the OndiskInode struct. -//! 3) For inodes with hard link count as 1, the `child index` equals to its assigned inode number. -//! 4) For inodes with hard link count bigger than 1, the `child index` may be different from the -//! assigned inode number. Among those child entries linking to the same inode, there's will be -//! one and only one child entry having the inode number as its assigned `child index'. -//! 5) A child index mapping table is introduced, which is used to map `child index` into offset -//! from the base of the super block. The formula to calculate the inode offset is: -//! `inode_offset_from_sb = inode_table[child_index] << 3` -//! 6) The child index mapping table follows the super block by default. -//! -//! Giving above definition, we could get the inode object for an inode number or child index as: -//! inode_ptr = sb_base_ptr + inode_offset_from_sb(inode_number) -//! inode_ptr = sb_base_ptr + inode_offset_from_sb(child_index) -//! -//! On the other hand, Rafs v4 is compatible with Rafs v5, so Rafs v5 implementation supports -//! both v4 and v5 metadata. - -use std::cmp; -use std::convert::TryFrom; -use std::ffi::{OsStr, OsString}; -use std::fmt::{Debug, Display, Formatter, Result as FmtResult}; -use std::io::{Read, Result}; -use std::mem::size_of; -use std::ops::Deref; -use std::os::unix::ffi::OsStrExt; -use std::sync::Arc; - -use nydus_utils::digest::{self, DigestHasher, RafsDigest}; -use nydus_utils::{compress, ByteSize}; -use vm_memory::VolatileMemory; -// With Rafs v5, the storage manager needs to access file system metadata to decompress the -// compressed blob file. To avoid circular dependency, the following Rafs v5 metadata structures -// have been moved into the storage manager. -use nydus_storage::device::v5::BlobV5ChunkInfo; -use nydus_storage::device::{ - BlobChunkFlags, BlobChunkInfo, BlobFeatures, BlobInfo, BlobIoDesc, BlobIoVec, -}; - -use crate::metadata::inode::RafsInodeFlags; -use crate::metadata::layout::{bytes_to_os_str, MetaRange, RafsXAttrs, RAFS_SUPER_VERSION_V5}; -use crate::metadata::md_v5::V5IoChunk; -use crate::metadata::{ - Inode, RafsInode, RafsStore, RafsSuperFlags, RAFS_DEFAULT_CHUNK_SIZE, RAFS_MAX_CHUNK_SIZE, -}; -use crate::{ - impl_bootstrap_converter, impl_pub_getter_setter, RafsInodeExt, RafsIoReader, RafsIoWrite, -}; - -pub(crate) const RAFSV5_ALIGNMENT: usize = 8; -pub(crate) const RAFSV5_SUPERBLOCK_SIZE: usize = 8192; -pub(crate) const RAFSV5_EXT_BLOB_ENTRY_SIZE: usize = 64; - -const RAFSV5_SUPER_MAGIC: u32 = 0x5241_4653; -const RAFSV5_SUPERBLOCK_RESERVED_SIZE: usize = RAFSV5_SUPERBLOCK_SIZE - 80; -const RAFSV5_EXT_BLOB_RESERVED_SIZE: usize = RAFSV5_EXT_BLOB_ENTRY_SIZE - 24; - -/// Trait to get information about a Rafs v5 inode. -pub(crate) trait RafsV5InodeOps { - /// Get the `BlobInfo` object corresponding to the `blob_index`. - fn get_blob_by_index(&self, blob_index: u32) -> Result>; - - /// Get chunk size for the inode. - fn get_chunk_size(&self) -> u32; - - /// Check whether the inode has hole chunk. - fn has_hole(&self) -> bool; -} - -pub(crate) trait RafsV5InodeChunkOps { - /// Get chunk info object for a chunk. - fn get_chunk_info_v5(&self, idx: u32) -> Result>; -} - -/// Rafs v5 superblock on disk metadata, 8192 bytes. -#[repr(C)] -#[derive(Clone, Copy)] -pub struct RafsV5SuperBlock { - /// RAFS super magic - s_magic: u32, - /// RAFS version - s_fs_version: u32, - /// superblock on disk size - s_sb_size: u32, - /// block size - s_block_size: u32, - /// superblock flags - s_flags: u64, - /// V5: Number of unique inodes(hard link counts as 1). - s_inodes_count: u64, - /// V5: Offset of inode table - s_inode_table_offset: u64, - /// Those inodes which need to prefetch will have there indexes put into this table. - /// Then Rafs has a hint to prefetch inodes and doesn't have to load all inodes to page cache - /// under *direct* metadata mode. It helps save memory usage. - /// [idx1:u32, idx2:u32, idx3:u32 ...] - s_prefetch_table_offset: u64, - /// V5: Offset of blob table - s_blob_table_offset: u64, - /// V5: Size of inode table - s_inode_table_entries: u32, - s_prefetch_table_entries: u32, // 64 bytes - /// V5: Entries of blob table - s_blob_table_size: u32, - s_extended_blob_table_entries: u32, // 72 bytes - /// Extended Blob Table - s_extended_blob_table_offset: u64, // 80 bytes --- reduce me from `RAFS_SUPERBLOCK_RESERVED_SIZE` - /// Unused area - s_reserved: [u8; RAFSV5_SUPERBLOCK_RESERVED_SIZE], -} - -impl RafsV5SuperBlock { - /// Create a new instance of `RafsV5SuperBlock`. - pub fn new() -> Self { - Self::default() - } - - /// Check whether it's a valid Rafs v5 super block. - pub fn detect(&self) -> bool { - self.is_rafs_v5() - } - - /// Check whether it's super block for Rafs v4/v5. - pub fn is_rafs_v5(&self) -> bool { - self.magic() == RAFSV5_SUPER_MAGIC && self.version() == RAFS_SUPER_VERSION_V5 - } - - /// Validate the Rafs v5 super block. - pub fn validate(&self, meta_size: u64) -> Result<()> { - if !self.is_rafs_v5() { - return Err(einval!("invalid super block version number")); - } else if self.sb_size() as usize != RAFSV5_SUPERBLOCK_SIZE - || meta_size <= RAFSV5_SUPERBLOCK_SIZE as u64 - { - return Err(einval!("invalid super block blob size")); - } else if !self.block_size().is_power_of_two() - || self.block_size() < 0x1000 - || (self.block_size() as u64 > RAFS_MAX_CHUNK_SIZE && self.block_size() != 4 << 20) - { - // Stargz has a special chunk size of 4MB. - return Err(einval!("invalid block size")); - } else if RafsSuperFlags::from_bits(self.flags()).is_none() { - return Err(einval!("invalid super block flags")); - } - - let meta_range = MetaRange::new( - RAFSV5_SUPERBLOCK_SIZE as u64, - meta_size - RAFSV5_SUPERBLOCK_SIZE as u64, - true, - )?; - - let inodes_count = self.inodes_count(); - let inode_table_offset = self.inode_table_offset(); - let inode_table_entries = self.inode_table_entries() as u64; - let inode_table_size = inode_table_entries * size_of::() as u64; - let inode_table_range = MetaRange::new(inode_table_offset, inode_table_size, false)?; - if inodes_count > inode_table_entries || !inode_table_range.is_subrange_of(&meta_range) { - return Err(einval!("invalid inode table count, offset or entries.")); - } - - let blob_table_offset = self.blob_table_offset(); - let blob_table_size = self.blob_table_size() as u64; - let blob_table_range = MetaRange::new(blob_table_offset, blob_table_size, false)?; - if !blob_table_range.is_subrange_of(&meta_range) - || blob_table_range.intersect_with(&inode_table_range) - { - return Err(einval!("invalid blob table offset or size.")); - } - - let ext_blob_table_offset = self.extended_blob_table_offset(); - let ext_blob_table_size = - self.extended_blob_table_entries() as u64 * RAFSV5_EXT_BLOB_ENTRY_SIZE as u64; - let ext_blob_table_range = - MetaRange::new(ext_blob_table_offset, ext_blob_table_size, true)?; - if ext_blob_table_size != 0 - && (!ext_blob_table_range.is_subrange_of(&meta_range) - || ext_blob_table_range.intersect_with(&inode_table_range) - || ext_blob_table_range.intersect_with(&blob_table_range)) - { - return Err(einval!("invalid extended blob table offset or size.")); - } - - let prefetch_table_offset = self.prefetch_table_offset(); - let prefetch_table_size = self.prefetch_table_entries() as u64 * size_of::() as u64; - let prefetch_table_range = - MetaRange::new(prefetch_table_offset, prefetch_table_size, false)?; - if prefetch_table_size != 0 - && (!prefetch_table_range.is_subrange_of(&meta_range) - || prefetch_table_range.intersect_with(&inode_table_range) - || prefetch_table_range.intersect_with(&blob_table_range) - || (ext_blob_table_size != 0 - && prefetch_table_range.intersect_with(&ext_blob_table_range))) - { - return Err(einval!("invalid prefetch table offset or size.")); - } - - Ok(()) - } - - /// Set chunk size. - pub fn set_chunk_size(&mut self, chunk_size: u32) { - debug_assert!(chunk_size.is_power_of_two()); - self.s_block_size = chunk_size; - } - - /// Set compression algorithm to handle chunk of the Rafs filesystem. - pub fn set_compressor(&mut self, compressor: compress::Algorithm) { - let c: RafsSuperFlags = compressor.into(); - - self.s_flags &= !RafsSuperFlags::COMPRESSION_NONE.bits(); - self.s_flags &= !RafsSuperFlags::COMPRESSION_LZ4.bits(); - self.s_flags &= !RafsSuperFlags::COMPRESSION_GZIP.bits(); - self.s_flags &= !RafsSuperFlags::COMPRESSION_ZSTD.bits(); - self.s_flags |= c.bits(); - } - - /// Set message digest algorithm to handle chunk of the Rafs filesystem. - pub fn set_digester(&mut self, digester: digest::Algorithm) { - let c: RafsSuperFlags = digester.into(); - - self.s_flags &= !RafsSuperFlags::HASH_BLAKE3.bits(); - self.s_flags &= !RafsSuperFlags::HASH_SHA256.bits(); - self.s_flags |= c.bits(); - } - - /// Enable explicit Uid/Gid feature. - pub fn set_explicit_uidgid(&mut self) { - self.s_flags |= RafsSuperFlags::EXPLICIT_UID_GID.bits(); - } - - /// Enable support of filesystem xattr. - pub fn set_has_xattr(&mut self) { - self.s_flags |= RafsSuperFlags::HAS_XATTR.bits(); - } - - impl_pub_getter_setter!(magic, set_magic, s_magic, u32); - impl_pub_getter_setter!(version, set_version, s_fs_version, u32); - impl_pub_getter_setter!(sb_size, set_sb_size, s_sb_size, u32); - impl_pub_getter_setter!(block_size, set_block_size, s_block_size, u32); - impl_pub_getter_setter!(flags, set_flags, s_flags, u64); - impl_pub_getter_setter!(inodes_count, set_inodes_count, s_inodes_count, u64); - impl_pub_getter_setter!( - inode_table_entries, - set_inode_table_entries, - s_inode_table_entries, - u32 - ); - impl_pub_getter_setter!( - inode_table_offset, - set_inode_table_offset, - s_inode_table_offset, - u64 - ); - impl_pub_getter_setter!(blob_table_size, set_blob_table_size, s_blob_table_size, u32); - impl_pub_getter_setter!( - blob_table_offset, - set_blob_table_offset, - s_blob_table_offset, - u64 - ); - impl_pub_getter_setter!( - prefetch_table_offset, - set_prefetch_table_offset, - s_prefetch_table_offset, - u64 - ); - impl_pub_getter_setter!( - prefetch_table_entries, - set_prefetch_table_entries, - s_prefetch_table_entries, - u32 - ); - impl_pub_getter_setter!( - extended_blob_table_offset, - set_extended_blob_table_offset, - s_extended_blob_table_offset, - u64 - ); - impl_pub_getter_setter!( - extended_blob_table_entries, - set_extended_blob_table_entries, - s_extended_blob_table_entries, - u32 - ); - - /// Load a super block from a `RafsIoReader` object. - pub fn load(&mut self, r: &mut RafsIoReader) -> Result<()> { - r.read_exact(self.as_mut()) - } - - /// Read Rafs v5 super block from a reader. - pub fn read(r: &mut RafsIoReader) -> Result { - let mut sb = RafsV5SuperBlock::new(); - - r.read_exact(sb.as_mut())?; - - Ok(sb) - } -} - -impl RafsStore for RafsV5SuperBlock { - fn store(&self, w: &mut dyn RafsIoWrite) -> Result { - w.write_all(self.as_ref())?; - w.validate_alignment(self.as_ref().len(), RAFSV5_ALIGNMENT) - } -} - -impl_bootstrap_converter!(RafsV5SuperBlock); - -impl Default for RafsV5SuperBlock { - fn default() -> Self { - Self { - s_magic: u32::to_le(RAFSV5_SUPER_MAGIC as u32), - s_fs_version: u32::to_le(RAFS_SUPER_VERSION_V5), - s_sb_size: u32::to_le(RAFSV5_SUPERBLOCK_SIZE as u32), - s_block_size: u32::to_le(RAFS_DEFAULT_CHUNK_SIZE as u32), - s_flags: u64::to_le(0), - s_inodes_count: u64::to_le(0), - s_inode_table_entries: u32::to_le(0), - s_inode_table_offset: u64::to_le(0), - s_prefetch_table_offset: u64::to_le(0), - s_prefetch_table_entries: u32::to_le(0), - s_blob_table_size: u32::to_le(0), - s_blob_table_offset: u64::to_le(0), - s_extended_blob_table_offset: u64::to_le(0), - s_extended_blob_table_entries: u32::to_le(0), - s_reserved: [0u8; RAFSV5_SUPERBLOCK_RESERVED_SIZE], - } - } -} - -impl Display for RafsV5SuperBlock { - fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { - write!(f, "superblock: magic {:x}, version {:x}, sb_size {:x}, block_size {:x}, flags {:x}, inode_count {}", - self.magic(), self.version(), self.sb_size(), self.block_size(), - self.flags(), self.s_inodes_count) - } -} - -/// Rafs v5 on disk inode offset table. -#[derive(Clone, Default)] -pub struct RafsV5InodeTable { - /// Inode offset array. - pub data: Vec, -} - -impl RafsV5InodeTable { - /// Create a new instance of `RafsV5InodeTable`. - pub fn new(entries: usize) -> Self { - let table_size = rafsv5_align(entries * size_of::()) / size_of::(); - RafsV5InodeTable { - data: vec![0; table_size], - } - } - - /// Get size in bytes of the Rafs v5 inode table. - #[inline] - pub fn size(&self) -> usize { - rafsv5_align(self.data.len() * size_of::()) - } - - /// Get number of inodes in the table. - #[inline] - pub fn len(&self) -> usize { - self.data.len() - } - - /// Check whether the table is empty or not. - #[inline] - pub fn is_empty(&self) -> bool { - self.data.is_empty() - } - - /// Set inode offset in the metadata blob for an inode. - pub fn set(&mut self, ino: Inode, offset: u32) -> Result<()> { - if ino == 0 || ino > self.data.len() as u64 { - return Err(einval!(format!( - "invalid inode number {}, max {}", - ino, - self.data.len() - ))); - } else if offset as usize <= RAFSV5_SUPERBLOCK_SIZE || offset & 0x7 != 0 { - return Err(einval!(format!("invalid inode offset 0x{:x}", offset))); - } - - // The offset is aligned with 8 bytes to make it easier to validate RafsV5Inode. - let offset = offset >> 3; - self.data[(ino - 1) as usize] = u32::to_le(offset as u32); - - Ok(()) - } - - /// Get inode offset in the metadata blob of an inode. - pub fn get(&self, ino: Inode) -> Result { - if ino == 0 || ino > self.data.len() as u64 { - return Err(enoent!()); - } - - let offset = u32::from_le(self.data[(ino - 1) as usize]) as usize; - if offset <= (RAFSV5_SUPERBLOCK_SIZE >> 3) || offset >= (1usize << 29) { - return Err(einval!(format!( - "invalid offset 0x{:x} for inode {}", - offset, ino - ))); - } - - Ok((offset << 3) as u32) - } - - /// Load inode offset table for a `RafsIoReader` object. - pub fn load(&mut self, r: &mut RafsIoReader) -> Result<()> { - let (_, data, _) = unsafe { self.data.align_to_mut::() }; - r.read_exact(data)?; - Ok(()) - } -} - -impl RafsStore for RafsV5InodeTable { - fn store(&self, w: &mut dyn RafsIoWrite) -> Result { - let (_, data, _) = unsafe { self.data.align_to::() }; - - w.write_all(data)?; - w.validate_alignment(data.len(), RAFSV5_ALIGNMENT) - } -} - -/// Rafs v5 on disk inode prefetch table. -/// -/// From super block disk structure, its start offset can be told. -/// In order not to load every meta/inode to page cache under rafs Direct -/// mode, which aims at saving physical memory. This prefetch table is -/// introduce. Regular files or directories which are specified during image -/// building will have their inode index persist in this disk table. -/// For a single directory, only its inode index will be put into the table. -/// But all of its descendants files(recursively) will be prefetch(by hint) -/// when rafs is mounted at the very beginning. -#[derive(Clone, Default)] -pub struct RafsV5PrefetchTable { - /// List of inode numbers for prefetch. - /// Note: It's not inode index of inodes table being stored here. - pub inodes: Vec, -} - -impl RafsV5PrefetchTable { - /// Create a new instance of `RafsV5PrefetchTable`. - pub fn new() -> RafsV5PrefetchTable { - RafsV5PrefetchTable { inodes: vec![] } - } - - /// Get content size of the inode prefetch table. - pub fn size(&self) -> usize { - rafsv5_align(self.len() * size_of::()) - } - - /// Get number of entries in the prefetch table. - pub fn len(&self) -> usize { - self.inodes.len() - } - - /// Check whether the inode prefetch table is empty. - pub fn is_empty(&self) -> bool { - self.inodes.is_empty() - } - - /// Add an inode into the inode prefetch table. - pub fn add_entry(&mut self, ino: u32) { - self.inodes.push(ino); - } - - /// Store the inode prefetch table to a writer. - pub fn store(&mut self, w: &mut dyn RafsIoWrite) -> Result { - let (_, data, _) = unsafe { self.inodes.align_to::() }; - w.write_all(data.as_ref())?; - - // OK. Let's see if we have to align... :-( - let cur_len = self.inodes.len() * size_of::(); - let padding_bytes = rafsv5_align(cur_len) - cur_len; - w.write_padding(padding_bytes)?; - - Ok(data.len() + padding_bytes) - } - - /// Load a inode prefetch table from a reader. - /// - /// Note: Generally, prefetch happens after loading bootstrap, so with methods operating - /// files with changing their offset won't bring errors. But we still use `pread` now so as - /// to make this method more stable and robust. Even dup(2) can't give us a separated file struct. - pub fn load_prefetch_table_from( - &mut self, - r: &mut RafsIoReader, - offset: u64, - entries: usize, - ) -> Result { - self.inodes = vec![0u32; entries]; - - let (_, data, _) = unsafe { self.inodes.align_to_mut::() }; - r.seek_to_offset(offset)?; - r.read_exact(data)?; - - Ok(data.len()) - } -} - -/// Rafs v5 blob description table. -#[derive(Clone, Debug, Default)] -pub struct RafsV5BlobTable { - /// Base blob information array. - pub entries: Vec>, - /// Extended blob information array. - pub extended: RafsV5ExtBlobTable, -} - -impl RafsV5BlobTable { - /// Create a new instance of `RafsV5BlobTable`. - pub fn new() -> Self { - RafsV5BlobTable { - entries: Vec::new(), - extended: RafsV5ExtBlobTable::new(), - } - } - - /// Get blob table size, aligned with RAFS_ALIGNMENT bytes - pub fn size(&self) -> usize { - if self.entries.is_empty() { - return 0; - } - // Blob entry split with '\0' - rafsv5_align( - self.entries.iter().fold(0usize, |size, entry| { - let entry_size = size_of::() * 2 + entry.blob_id().len(); - size + entry_size + 1 - }) - 1, - ) - } - - /// Add information for new blob into the blob information table. - #[allow(clippy::too_many_arguments)] - pub fn add( - &mut self, - blob_id: String, - prefetch_offset: u32, - prefetch_size: u32, - chunk_size: u32, - chunk_count: u32, - uncompressed_size: u64, - compressed_size: u64, - blob_features: BlobFeatures, - flags: RafsSuperFlags, - is_chunkdict: bool, - ) -> u32 { - let blob_index = self.entries.len() as u32; - let mut blob_info = BlobInfo::new( - blob_index, - blob_id, - uncompressed_size, - compressed_size, - chunk_size, - chunk_count, - blob_features, - ); - - blob_info.set_compressor(flags.into()); - blob_info.set_digester(flags.into()); - blob_info.set_prefetch_info(prefetch_offset as u64, prefetch_size as u64); - if is_chunkdict { - blob_info.set_chunkdict_generated(true); - } - - self.entries.push(Arc::new(blob_info)); - self.extended.add( - chunk_count, - uncompressed_size, - compressed_size, - blob_features.bits(), - ); - - blob_index - } - - /// Get base information for a blob. - #[inline] - pub fn get(&self, blob_index: u32) -> Result> { - if blob_index >= self.entries.len() as u32 { - return Err(enoent!("blob not found")); - } - Ok(self.entries[blob_index as usize].clone()) - } - - /// Load blob information table from a reader. - pub fn load( - &mut self, - r: &mut RafsIoReader, - blob_table_size: u32, - chunk_size: u32, - flags: RafsSuperFlags, - ) -> Result<()> { - if blob_table_size == 0 { - return Ok(()); - } - - debug!("RAFS v5 blob table size {}", blob_table_size); - let mut data = vec![0u8; blob_table_size as usize]; - r.read_exact(&mut data)?; - - // Each entry frame looks like: - // u32 | u32 | string | trailing '\0' , except that the last entry has no trailing '\0' - let mut buf = data.as_mut_slice(); - while buf.len() > 2 * size_of::() { - let readahead_offset = - unsafe { std::ptr::read_unaligned::(buf[0..4].as_ptr() as *const u32) }; - let readahead_size = - unsafe { std::ptr::read_unaligned::(buf[4..8].as_ptr() as *const u32) }; - - let mut pos = 8; - while pos < buf.len() && buf[pos] != 0 { - pos += 1; - } - let blob_id = std::str::from_utf8(&buf[8..pos]) - .map(|v| v.to_owned()) - .map_err(|e| einval!(e))?; - if pos == buf.len() { - buf = &mut buf[pos..]; - } else { - buf = &mut buf[pos + 1..]; - } - debug!("blob {} {:?}", self.entries.len(), blob_id); - - let index = self.entries.len(); - let (chunk_count, uncompressed_size, compressed_size, blob_features) = - // For compatibility, blob table might not be associated with extended blob table. - if !self.extended.entries.is_empty() { - let ext_len = self.extended.entries.len(); - if index >= ext_len { - error!( "Extended blob table({}) is shorter than blob table", ext_len); - return Err(einval!()); - } - let entry = &self.extended.entries[index]; - let blob_features = BlobFeatures::from_bits(entry.features).ok_or_else(|| einval!("invalid blob feature flags"))?; - (entry.chunk_count, entry.uncompressed_size, entry.compressed_size, blob_features) - } else { - (0, 0, 0, BlobFeatures::_V5_NO_EXT_BLOB_TABLE) - }; - - let mut blob_info = BlobInfo::new( - index as u32, - blob_id, - uncompressed_size, - compressed_size, - chunk_size, - chunk_count, - blob_features, - ); - - blob_info.set_compressor(flags.into()); - blob_info.set_digester(flags.into()); - blob_info.set_prefetch_info(readahead_offset as u64, readahead_size as u64); - - self.entries.push(Arc::new(blob_info)); - } - - Ok(()) - } - - /// Get the base blob information array. - pub fn get_all(&self) -> Vec> { - self.entries.clone() - } - - /// Store the extended blob information array. - pub fn store_extended(&self, w: &mut dyn RafsIoWrite) -> Result { - self.extended.store(w) - } -} - -impl RafsStore for RafsV5BlobTable { - fn store(&self, w: &mut dyn RafsIoWrite) -> Result { - let mut size = 0; - self.entries - .iter() - .enumerate() - .try_for_each::<_, Result<()>>(|(idx, entry)| { - w.write_all(&u32::to_le_bytes(entry.prefetch_offset() as u32))?; - w.write_all(&u32::to_le_bytes(entry.prefetch_size() as u32))?; - w.write_all(entry.blob_id().as_bytes())?; - if idx != self.entries.len() - 1 { - size += size_of::() * 2 + entry.blob_id().len() + 1; - w.write_all(&[b'\0'])?; - } else { - size += size_of::() * 2 + entry.blob_id().len(); - } - Ok(()) - })?; - - let padding = rafsv5_align(size) - size; - w.write_padding(padding)?; - size += padding; - - w.validate_alignment(size, RAFSV5_ALIGNMENT) - } -} - -/// Rafs v5 extended blob information on disk metadata. -/// -/// RafsV5ExtDBlobEntry is appended to the tail of bootstrap, -/// can be used as an extended table for the original blob table. -// This disk structure is well defined and rafs aligned. -#[repr(C)] -#[derive(Clone)] -pub struct RafsV5ExtBlobEntry { - /// Number of chunks in a blob file. - pub chunk_count: u32, - pub features: u32, - pub uncompressed_size: u64, // -- 16 Bytes - pub compressed_size: u64, // -- 24 Bytes - pub reserved2: [u8; RAFSV5_EXT_BLOB_RESERVED_SIZE], -} - -// Implement Debug trait ourselves, as rust prior to 1.47 doesn't impl Debug for array with size -// larger than 32 -impl Debug for RafsV5ExtBlobEntry { - fn fmt(&self, f: &mut Formatter) -> FmtResult { - f.debug_struct("ExtendedBlobTableEntry") - .field("chunk_count", &self.chunk_count) - .field("blob_cache_size", &self.uncompressed_size) - .field("compressed_blob_size", &self.compressed_size) - .field("features", &self.features) - .finish() - } -} - -impl Default for RafsV5ExtBlobEntry { - fn default() -> Self { - RafsV5ExtBlobEntry { - chunk_count: 0, - features: 0, - uncompressed_size: 0, - compressed_size: 0, - reserved2: [0; RAFSV5_EXT_BLOB_RESERVED_SIZE], - } - } -} - -impl RafsV5ExtBlobEntry { - pub fn new( - chunk_count: u32, - blob_cache_size: u64, - compressed_blob_size: u64, - features: u32, - ) -> Self { - Self { - chunk_count, - uncompressed_size: blob_cache_size, - compressed_size: compressed_blob_size, - features, - ..Default::default() - } - } -} - -/// Rafs v5 on disk extended blob information table. -#[derive(Clone, Debug, Default)] -pub struct RafsV5ExtBlobTable { - /// The vector index means blob index, every entry represents - /// extended information of a blob. - pub entries: Vec>, -} - -impl RafsV5ExtBlobTable { - /// Create a new instance of `RafsV5ExtBlobTable`. - pub fn new() -> Self { - Self { - entries: Vec::new(), - } - } - - /// Get content size of the extended blob information table. - pub fn size(&self) -> usize { - // `ExtendedBlobTableEntry` is already a well defined disk structure and rafs-aligned - // So directly use its `size_of()` is reliable. - rafsv5_align(size_of::() * self.entries.len()) - } - - /// Get number of entries in the extended blob information table. - pub fn entries(&self) -> usize { - self.entries.len() - } - - /// Add a new entry into the extended blob information table. - pub fn add( - &mut self, - chunk_count: u32, - blob_cache_size: u64, - compressed_blob_size: u64, - features: u32, - ) { - self.entries.push(Arc::new(RafsV5ExtBlobEntry::new( - chunk_count, - blob_cache_size, - compressed_blob_size, - features, - ))); - } - - /// Get extended information about a blob. - pub fn get(&self, blob_index: u32) -> Option> { - let len = self.entries.len(); - - if len == 0 || blob_index as usize >= len { - None - } else { - Some(self.entries[blob_index as usize].clone()) - } - } - - /// Load extended blob information table from a reader. - pub fn load(&mut self, r: &mut RafsIoReader, count: usize) -> Result<()> { - let mut entries = Vec::::with_capacity(count); - // Safe because it is already reserved enough space - let (_, data, _) = unsafe { - entries.set_len(count); - (&mut entries).align_to_mut::() - }; - - r.read_exact(data)?; - self.entries = entries.iter().cloned().map(Arc::new).collect(); - - Ok(()) - } -} - -impl RafsStore for RafsV5ExtBlobTable { - fn store(&self, w: &mut dyn RafsIoWrite) -> Result { - let mut size = 0; - - // Store the list of entries - self.entries - .iter() - .enumerate() - .try_for_each::<_, Result<()>>(|(_idx, entry)| { - w.write_all(&u32::to_le_bytes(entry.chunk_count))?; - w.write_all(&u32::to_le_bytes(entry.features))?; - w.write_all(&u64::to_le_bytes(entry.uncompressed_size))?; - w.write_all(&u64::to_le_bytes(entry.compressed_size))?; - w.write_all(&entry.reserved2)?; - size += RAFSV5_EXT_BLOB_ENTRY_SIZE; - Ok(()) - })?; - - // Append padding for RAFS alignment - let padding = rafsv5_align(size) - size; - w.write_padding(padding)?; - size += padding; - - w.validate_alignment(size, RAFSV5_ALIGNMENT) - } -} - -/// Rafs v5 inode on disk metadata. -#[repr(C)] -#[derive(Clone, Copy, Default, Debug)] -pub struct RafsV5Inode { - /// sha256(sha256(chunk) + ...), [char; RAFS_SHA256_LENGTH] - pub i_digest: RafsDigest, // 32 - /// parent inode number - pub i_parent: u64, - /// Artifact inode number set by the nydus image builder. Start from RAFS_ROOT_INODE = 1. - pub i_ino: u64, - pub i_uid: u32, - pub i_gid: u32, - pub i_projid: u32, - pub i_mode: u32, // 64 - pub i_size: u64, - pub i_blocks: u64, - pub i_flags: RafsInodeFlags, - pub i_nlink: u32, - /// for dir, child start index - pub i_child_index: u32, // 96 - /// for dir, means child count. - /// for regular file, means chunk info count. - pub i_child_count: u32, - /// file name size, [char; i_name_size] - pub i_name_size: u16, - /// symlink path size, [char; i_symlink_size] - pub i_symlink_size: u16, // 104 - // inode device block number, ignored for non-special files - pub i_rdev: u32, - // for alignment reason, we put nsec first - pub i_mtime_nsec: u32, - pub i_mtime: u64, // 120 - pub i_reserved: [u8; 8], // 128 -} - -impl RafsV5Inode { - /// Create a new instance of `RafsV5Inode`. - pub fn new() -> Self { - Self::default() - } - - /// Set size of the file name. - #[inline] - pub fn set_name_size(&mut self, name_len: usize) { - self.i_name_size = name_len as u16; - } - - /// Mark the inode as a symlink. - #[inline] - pub fn set_symlink_size(&mut self, symlink_len: usize) { - self.i_symlink_size = symlink_len as u16; - } - - /// Get on disk size of the inode content. - #[inline] - pub fn size(&self) -> usize { - size_of::() - + (rafsv5_align(self.i_name_size as usize) + rafsv5_align(self.i_symlink_size as usize)) - as usize - } - - /// Get the uid and the gid of the inode. - #[inline] - pub fn uidgid(&self) -> (u32, u32) { - (self.i_uid, self.i_gid) - } - - /// Get the uid and the gid of the inode. - #[inline] - pub fn mtime(&self) -> (u64, u32) { - (self.i_mtime, self.i_mtime_nsec) - } - - /// Get the mode of the inode. - #[inline] - pub fn mode(&self) -> u32 { - self.i_mode - } - - /// Check whether the inode is a directory. - #[inline] - pub fn is_dir(&self) -> bool { - self.i_mode & libc::S_IFMT as u32 == libc::S_IFDIR as u32 - } - - /// Check whether the inode is a symlink. - #[inline] - pub fn is_symlink(&self) -> bool { - self.i_mode & libc::S_IFMT as u32 == libc::S_IFLNK as u32 - } - - /// Check whether the inode is a regular file. - #[inline] - pub fn is_reg(&self) -> bool { - self.i_mode & libc::S_IFMT as u32 == libc::S_IFREG as u32 - } - - /// Check whether the inode is a char device node. - pub fn is_chrdev(&self) -> bool { - self.i_mode & libc::S_IFMT as u32 == libc::S_IFCHR as u32 - } - - /// Check whether the inode is a block device node. - pub fn is_blkdev(&self) -> bool { - self.i_mode & libc::S_IFMT as u32 == libc::S_IFBLK as u32 - } - - /// Check whether the inode is a FIFO. - pub fn is_fifo(&self) -> bool { - self.i_mode & libc::S_IFMT as u32 == libc::S_IFIFO as u32 - } - - /// Check whether the inode is a socket. - pub fn is_sock(&self) -> bool { - self.i_mode & libc::S_IFMT as u32 == libc::S_IFSOCK as u32 - } - - /// Check whether the inode is a hardlink. - #[inline] - pub fn is_hardlink(&self) -> bool { - self.is_reg() && self.i_nlink > 1 - } - - /// Get inode flags - pub fn has_hardlink(&self) -> bool { - self.i_flags.contains(RafsInodeFlags::HARDLINK) - } - - /// Mark the inode as having extended attributes. - #[inline] - pub fn has_xattr(&self) -> bool { - self.i_flags.contains(RafsInodeFlags::XATTR) - } - - /// Mark the inode as having hole chunks. - #[inline] - pub fn has_hole(&self) -> bool { - self.i_flags.contains(RafsInodeFlags::HAS_HOLE) - } - - /// Load an inode from a reader. - pub fn load(&mut self, r: &mut RafsIoReader) -> Result<()> { - r.read_exact(self.as_mut()) - } - - /// Set filename for the inode. - pub fn load_file_name(&self, r: &mut RafsIoReader) -> Result { - let mut name_buf = vec![0u8; self.i_name_size as usize]; - r.read_exact(name_buf.as_mut_slice())?; - r.seek_to_next_aligned(name_buf.len(), RAFSV5_ALIGNMENT)?; - Ok(bytes_to_os_str(&name_buf).to_os_string()) - } -} - -impl_bootstrap_converter!(RafsV5Inode); - -impl From<&dyn RafsInodeExt> for RafsV5Inode { - fn from(inode: &dyn RafsInodeExt) -> Self { - let attr = inode.get_attr(); - - RafsV5Inode { - i_digest: inode.get_digest(), - i_parent: inode.parent(), - i_ino: attr.ino, - i_uid: attr.uid, - i_gid: attr.gid, - i_projid: inode.projid(), - i_mode: attr.mode, - i_size: attr.size, - i_blocks: attr.blocks, - i_flags: RafsInodeFlags::from_bits_truncate(inode.flags()), - i_nlink: attr.nlink, - i_child_index: inode.get_child_index().unwrap_or(0), - i_child_count: inode.get_child_count(), - i_name_size: inode.get_name_size(), - i_symlink_size: inode.get_symlink_size(), - i_rdev: attr.rdev, - i_mtime_nsec: attr.mtimensec, - i_mtime: attr.mtime, - i_reserved: [0u8; 8], - } - } -} - -/// A in-memory wrapper of a Rafs v5 inode. -pub struct RafsV5InodeWrapper<'a> { - pub name: &'a OsStr, - pub symlink: Option<&'a OsStr>, - pub inode: &'a RafsV5Inode, -} - -impl<'a> RafsStore for RafsV5InodeWrapper<'a> { - fn store(&self, w: &mut dyn RafsIoWrite) -> Result { - let mut size: usize = 0; - - let inode_data = self.inode.as_ref(); - w.write_all(inode_data)?; - size += inode_data.len(); - - let name = self.name.as_bytes(); - w.write_all(name)?; - size += name.len(); - let padding = rafsv5_align(self.inode.i_name_size as usize) - name.len(); - w.write_padding(padding)?; - size += padding; - - if let Some(symlink) = self.symlink { - let symlink_path = symlink.as_bytes(); - w.write_all(symlink_path)?; - size += symlink_path.len(); - let padding = rafsv5_align(self.inode.i_symlink_size as usize) - symlink_path.len(); - w.write_padding(padding)?; - size += padding; - } - - w.validate_alignment(size, RAFSV5_ALIGNMENT) - } -} - -/// Rafs v5 chunk on disk metadata. -#[repr(C)] -#[derive(Default, Clone, Copy, Debug)] -pub struct RafsV5ChunkInfo { - /// sha256(chunk), [char; RAFS_SHA256_LENGTH] - pub block_id: RafsDigest, // 32 - /// blob index. - pub blob_index: u32, - /// chunk flags - pub flags: BlobChunkFlags, // 40 - /// compressed size in blob - pub compressed_size: u32, - /// uncompressed size in blob - pub uncompressed_size: u32, // 48 - /// compressed offset in blob - pub compressed_offset: u64, // 56 - /// uncompressed offset in blob - pub uncompressed_offset: u64, // 64 - /// offset in file - pub file_offset: u64, // 72 - /// chunk index, it's allocated sequentially and starting from 0 for one blob. - pub index: u32, - /// reserved - pub reserved: u32, //80 -} - -impl RafsV5ChunkInfo { - /// Create a new instance of `RafsV5ChunkInfo`. - pub fn new() -> Self { - RafsV5ChunkInfo::default() - } - - /// Load a Rafs v5 indoe from a reader. - pub fn load(&mut self, r: &mut RafsIoReader) -> Result<()> { - r.read_exact(self.as_mut()) - } -} - -impl RafsStore for RafsV5ChunkInfo { - fn store(&self, w: &mut dyn RafsIoWrite) -> Result { - w.write_all(self.as_ref())?; - w.validate_alignment(self.as_ref().len(), RAFSV5_ALIGNMENT) - } -} - -impl_bootstrap_converter!(RafsV5ChunkInfo); - -impl Display for RafsV5ChunkInfo { - fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { - write!( - f, - "file_offset {}, compress_offset {}, compress_size {}, uncompress_offset {}, uncompress_size {}, blob_index {}, block_id {}, index {}, is_compressed {}", - self.file_offset, - self.compressed_offset, - self.compressed_size, - self.uncompressed_offset, - self.uncompressed_size, - self.blob_index, - self.block_id, - self.index, - self.flags.contains(BlobChunkFlags::COMPRESSED), - ) - } -} - -/// Rafs v5 on disk extended attribute table. -/// -/// A on disk Rafs v5 extended attribute table contains an u64 content size, followed by extended -/// attribute pairs. -#[repr(C)] -#[derive(Copy, Clone, Default, Debug)] -pub struct RafsV5XAttrsTable { - pub size: u64, -} - -impl RafsV5XAttrsTable { - /// Create a new instance of `RafsV5XAttrsTable`. - pub fn new() -> Self { - RafsV5XAttrsTable { - ..Default::default() - } - } - - /// Get content size of the extended attribute table. - #[inline] - pub fn size(self) -> usize { - self.size as usize - } - - /// Get aligned content size of the extended attribute table. - #[inline] - pub fn aligned_size(self) -> usize { - rafsv5_align(self.size()) - } -} - -impl_bootstrap_converter!(RafsV5XAttrsTable); - -impl RafsXAttrs { - /// Get aligned content size of the extended attribute table. - #[inline] - pub fn aligned_size_v5(&self) -> usize { - rafsv5_align(self.size()) - } - - pub fn store_v5(&self, w: &mut dyn RafsIoWrite) -> Result { - let mut size = 0; - - if !self.pairs.is_empty() { - let size_data = (self.size() as u64).to_le_bytes(); - w.write_all(&size_data)?; - size += size_data.len(); - - for (key, value) in self.pairs.iter() { - let pair_size = key.byte_size() + 1 + value.len(); - let pair_size_data = (pair_size as u32).to_le_bytes(); - w.write_all(&pair_size_data)?; - size += pair_size_data.len(); - - let key_data = key.as_bytes(); - w.write_all(key_data)?; - w.write_all(&[0u8])?; - size += key_data.len() + 1; - - w.write_all(value)?; - size += value.len(); - } - } - - let padding = rafsv5_align(size) - size; - w.write_padding(padding)?; - size += padding; - - w.validate_alignment(size, RAFSV5_ALIGNMENT) - } -} - -/// Allocate a group of `BlobIoVec` to handle blob io to range `offset..(offset+size)`. -/// -/// The range `offset..(offset+size)` may be backed by multiple blobs, so a group of `BlobIoVec` will -/// be returned on success, each one covers a continuous range on a single blob. -pub(crate) fn rafsv5_alloc_bio_vecs( - inode: &I, - offset: u64, - size: usize, - user_io: bool, -) -> Result> { - let end = offset - .checked_add(size as u64) - .ok_or_else(|| einval!("invalid read size"))?; - let (index_start, index_end) = calculate_bio_chunk_index( - offset, - end, - inode.get_chunk_size() as u64, - inode.get_child_count(), - inode.has_hole(), - ); - trace!( - "alloc bio desc offset {} size {} i_size {} index_start {} index_end {} i_child_count {}", - offset, - size, - inode.size(), - index_start, - index_end, - inode.get_child_count() - ); - if size == 0 || index_start >= inode.get_chunk_count() { - return Ok(vec![]); - } - - let chunk = inode.get_chunk_info_v5(index_start)?; - let blob = inode.get_blob_by_index(chunk.blob_index())?; - let mut desc = BlobIoVec::new(blob.clone()); - if !add_chunk_to_bio_desc(&mut desc, offset, end, chunk, blob, user_io) { - return Err(einval!("failed to create blob io vector")); - } - - let mut descs = Vec::with_capacity(4); - for idx in index_start + 1..index_end { - let chunk = inode.get_chunk_info_v5(idx)?; - let blob = inode.get_blob_by_index(chunk.blob_index())?; - if blob.blob_index() != desc.blob_index() { - descs.push(desc); - desc = BlobIoVec::new(blob.clone()); - } - if !add_chunk_to_bio_desc(&mut desc, offset, end, chunk, blob, user_io) { - return Err(einval!("failed to create blob io vector")); - } - } - descs.push(desc); - - Ok(descs) -} - -/// Add a new bio covering the IO range into the provided bio desc. -/// -/// Returns true if caller should continue checking more chunks. -/// -/// # Parameters -/// - desc: the targeting bio desc. -/// - offset: IO offset to the file start, inclusive. -/// - end: IO end to the file start, exclusive. -/// - chunk: a data chunk overlapping with the IO range. -/// - chunk_size: chunk size. -/// - blob: the blob which the chunk data belongs to. -fn add_chunk_to_bio_desc( - desc: &mut BlobIoVec, - offset: u64, - end: u64, - chunk: Arc, - blob: Arc, - user_io: bool, -) -> bool { - // The chunk is ahead of the start of the range. - if offset >= (chunk.file_offset() + chunk.uncompressed_size() as u64) { - return true; - } - // The chunk is passing the end of the range. - if end <= chunk.file_offset() { - return false; - } - - let chunk_start = if offset > chunk.file_offset() { - offset - chunk.file_offset() - } else { - 0 - }; - let chunk_end = if end < (chunk.file_offset() + chunk.uncompressed_size() as u64) { - end - chunk.file_offset() - } else { - chunk.uncompressed_size() as u64 - }; - - let io_chunk = Arc::new(V5IoChunk { - // TODO: try to make `chunk_id` return Arc to get rid of potential memory copy - block_id: Arc::new(*chunk.chunk_id()), - blob_index: chunk.blob_index(), - index: chunk.index(), - compressed_offset: chunk.compressed_offset(), - uncompressed_offset: chunk.uncompressed_offset(), - compressed_size: chunk.compressed_size(), - uncompressed_size: chunk.uncompressed_size(), - flags: chunk.flags(), - }) as Arc; - let bio = BlobIoDesc::new( - blob, - io_chunk.into(), - chunk_start as u32, - (chunk_end - chunk_start) as u32, - user_io, - ); - desc.push(bio); - - true -} - -/// Calculate bio chunk indices that overlaps with the provided IO range. -/// -/// # Parameters -/// - offset: IO offset to the file start, inclusive. -/// - end: IO end to the file start, exclusive. -/// - chunk_size: chunk size. -/// - chunk_cnt: maximum number of chunks -/// - has_hole: whether a file has holes in it. -fn calculate_bio_chunk_index( - offset: u64, - end: u64, - chunk_size: u64, - chunk_cnt: u32, - has_hole: bool, -) -> (u32, u32) { - debug_assert!(offset < end); - - let index_start = if !has_hole { - (offset / chunk_size) as u32 - } else { - 0 - }; - let index_end = if !has_hole { - cmp::min(((end - 1) / chunk_size) as u32 + 1, chunk_cnt) - } else { - chunk_cnt - }; - - (index_start, index_end) -} - -pub(crate) fn rafsv5_align(size: usize) -> usize { - if size & (RAFSV5_ALIGNMENT - 1) == 0 { - size - } else { - size + (RAFSV5_ALIGNMENT - (size & (RAFSV5_ALIGNMENT - 1))) - } -} - -/// Validate inode metadata, include children, chunks and symblink etc. -/// -/// The default implementation is for rafs v5. The chunk data is not validated here, which will -/// be validate on fs read. -pub(crate) fn rafsv5_validate_inode( - inode: &dyn RafsInodeExt, - recursive: bool, - digester: digest::Algorithm, -) -> Result { - let child_count = inode.get_child_count(); - let expected_digest = inode.get_digest(); - let mut hasher = RafsDigest::hasher(digester); - - if inode.is_symlink() { - hasher.digest_update(inode.get_symlink()?.as_bytes()); - } else if inode.is_reg() { - for idx in 0..child_count { - let chunk = inode.get_chunk_info(idx)?; - let chunk_digest = chunk.chunk_id(); - - hasher.digest_update(chunk_digest.as_ref()); - } - } else if inode.is_dir() { - for idx in 0..child_count { - let child = inode.get_child_by_index(idx)?; - if (child.is_reg() || child.is_symlink() || (recursive && child.is_dir())) - && !rafsv5_validate_inode(child.deref(), recursive, digester)? - { - return Ok(false); - } - let child_digest = child.get_digest(); - let child_digest = child_digest.as_ref(); - - hasher.digest_update(child_digest); - } - } - - let digest = hasher.digest_finalize(); - let result = expected_digest == digest; - if !result { - error!( - "invalid inode digest {}, expected {}, ino: {} name: {:?}", - digest, - expected_digest, - inode.ino(), - inode.name() - ); - } - - Ok(result) -} - -#[cfg(test)] -pub mod tests { - use std::fs::OpenOptions; - use std::io::BufWriter; - use std::io::{SeekFrom, Write}; - - use storage::device::BlobChunkInfo; - use vmm_sys_util::tempfile::TempFile; - - use super::*; - use crate::metadata::RafsStore; - use crate::{RafsIoRead, RafsIoReader}; - use std::any::Any; - use std::str::FromStr; - - struct Entry { - foo: u32, - bar: u32, - } - - unsafe fn any_as_u8_slice(p: &T) -> &[u8] { - ::std::slice::from_raw_parts((p as *const T) as *const u8, ::std::mem::size_of::()) - } - - #[test] - fn test_load_blob_table() { - let mut buffer = Vec::new(); - let first = Entry { foo: 1, bar: 2 }; - let second = Entry { foo: 3, bar: 4 }; - let third = Entry { foo: 5, bar: 6 }; - - let first_id = "355d403e35d7120cbd6a145874a2705e6842ce9974985013ebdc1fa5199a0184"; - let second_id = "19ebb6e9bdcbbce3f24d694fe20e0e552ae705ce079e26023ad0ecd61d4b130019ebb6e9bdcbbce3f24d694fe20e0e552ae705ce079e26023ad0ecd61d4"; - let third_id = "19ebb6e9bdcbbce3f24d694fe20e0e552ae705ce079e"; - - let first_slice = unsafe { any_as_u8_slice(&first) }; - let second_slice = unsafe { any_as_u8_slice(&second) }; - let third_slice = unsafe { any_as_u8_slice(&third) }; - - buffer.extend_from_slice(first_slice); - buffer.extend_from_slice(first_id.as_bytes()); - buffer.push(b'\0'); - buffer.extend_from_slice(second_slice); - buffer.extend_from_slice(second_id.as_bytes()); - buffer.push(b'\0'); - buffer.extend_from_slice(third_slice); - buffer.extend_from_slice(third_id.as_bytes()); - // buffer.push(b'\0'); - - let tmp_file = TempFile::new().unwrap(); - - // Store extended blob table - let mut tmp_file = OpenOptions::new() - .read(true) - .write(true) - .open(tmp_file.as_path()) - .unwrap(); - tmp_file.write_all(&buffer).unwrap(); - tmp_file.flush().unwrap(); - - let mut file: RafsIoReader = Box::new(tmp_file); - let mut blob_table = RafsV5BlobTable::new(); - - file.seek(SeekFrom::Start(0)).unwrap(); - blob_table - .load( - &mut file, - buffer.len() as u32, - RAFS_DEFAULT_CHUNK_SIZE as u32, - RafsSuperFlags::empty(), - ) - .unwrap(); - for b in &blob_table.entries { - let _c = b.clone(); - trace!("{:?}", _c); - } - - assert_eq!(first.bar, first.foo + 1); - assert_eq!(blob_table.size(), rafsv5_align(buffer.len())); - assert_eq!(blob_table.get(0).unwrap().blob_id(), first_id); - assert_eq!(blob_table.get(1).unwrap().blob_id(), second_id); - assert_eq!(blob_table.get(2).unwrap().blob_id(), third_id); - assert!(blob_table.get(3).is_err()); - assert_eq!(blob_table.get_all().len(), 3); - - blob_table.entries.truncate(0); - file.seek(SeekFrom::Start(0)).unwrap(); - blob_table - .load( - &mut file, - 0, - RAFS_DEFAULT_CHUNK_SIZE as u32, - RafsSuperFlags::empty(), - ) - .unwrap(); - assert_eq!(blob_table.size(), 0); - assert_eq!(blob_table.entries.len(), 0); - assert!(blob_table.get(0).is_err()); - - blob_table.entries.truncate(0); - file.seek(SeekFrom::Start(0)).unwrap(); - blob_table - .load( - &mut file, - (buffer.len() - 100) as u32, - RAFS_DEFAULT_CHUNK_SIZE as u32, - RafsSuperFlags::empty(), - ) - .unwrap(); - assert_eq!(blob_table.entries[0].blob_id(), first_id); - assert_eq!(blob_table.get_all().len(), 2); - } - - #[test] - fn test_extended_blob_table() { - let tmp_file = TempFile::new().unwrap(); - - // Create extended blob table - let mut table = RafsV5ExtBlobTable::new(); - for i in 0..5 { - table.add(i * 3, 100, 100, 0); - } - - // Store extended blob table - let file = OpenOptions::new() - .read(true) - .write(true) - .open(tmp_file.as_path()) - .unwrap(); - let mut writer = BufWriter::new(file); - table.store(&mut writer).unwrap(); - - // Load extended blob table - let file = OpenOptions::new() - .read(true) - .write(true) - .open(tmp_file.as_path()) - .unwrap(); - let mut reader = Box::new(file) as Box; - let mut table = RafsV5ExtBlobTable::new(); - table.load(&mut reader, 5).unwrap(); - - assert_eq!(table.size(), 5 * RAFSV5_EXT_BLOB_ENTRY_SIZE); - assert_eq!(table.entries(), 5); - assert!(table.get(0).is_some()); - assert!(table.get(4).is_some()); - assert!(table.get(5).is_none()); - - // Check expected blob table - for i in 0..5 { - assert_eq!(table.get(i).unwrap().chunk_count, i * 3); - assert_eq!(table.get(i).unwrap().features, 0); - assert_eq!(table.get(i).unwrap().uncompressed_size, 100); - assert_eq!( - table.get(i).unwrap().reserved2, - [0u8; RAFSV5_EXT_BLOB_RESERVED_SIZE] - ); - } - } - - #[derive(Default, Copy, Clone)] - struct MockChunkInfo { - pub block_id: RafsDigest, - pub blob_index: u32, - pub flags: BlobChunkFlags, - pub compress_size: u32, - pub uncompress_size: u32, - pub compress_offset: u64, - pub uncompress_offset: u64, - pub file_offset: u64, - pub index: u32, - #[allow(unused)] - pub reserved: u32, - } - - impl MockChunkInfo { - fn new() -> Self { - MockChunkInfo::default() - } - } - - impl BlobChunkInfo for MockChunkInfo { - fn chunk_id(&self) -> &RafsDigest { - &self.block_id - } - - fn id(&self) -> u32 { - self.index - } - - fn is_batch(&self) -> bool { - false - } - - fn is_compressed(&self) -> bool { - self.flags.contains(BlobChunkFlags::COMPRESSED) - } - - fn is_encrypted(&self) -> bool { - false - } - - fn as_any(&self) -> &dyn Any { - self - } - - impl_getter!(blob_index, blob_index, u32); - impl_getter!(compressed_offset, compress_offset, u64); - impl_getter!(compressed_size, compress_size, u32); - impl_getter!(uncompressed_offset, uncompress_offset, u64); - impl_getter!(uncompressed_size, uncompress_size, u32); - } - - impl BlobV5ChunkInfo for MockChunkInfo { - fn as_base(&self) -> &dyn BlobChunkInfo { - self - } - - impl_getter!(index, index, u32); - impl_getter!(file_offset, file_offset, u64); - impl_getter!(flags, flags, BlobChunkFlags); - } - - #[test] - fn test_add_chunk_to_bio_desc() { - let mut chunk = MockChunkInfo::new(); - let offset = 4096; - let size: u64 = 1024; - // [offset, offset + size) - chunk.file_offset = offset; - chunk.uncompress_size = size as u32; - - // (offset, end, expected_chunk_start, expected_size) - let data = vec![ - // Non-overlapping IO - (0, 0, 0, 0, false), - (0, offset, 0, 0, false), - (offset + size, 0, 0, 0, true), - (offset + size + 1, 0, 0, 0, true), - // Overlapping IO - (0, offset + 1, 0, 1, true), - (0, offset + size, 0, size, true), - (0, offset + size + 1, 0, size, true), - (0, offset + size - 1, 0, size - 1, true), - (offset, offset + 1, 0, 1, true), - (offset, offset + size, 0, size, true), - (offset, offset + size - 1, 0, size - 1, true), - (offset, offset + size + 1, 0, size, true), - (offset + 1, offset + 2, 1, 1, true), - (offset + 1, offset + size, 1, size - 1, true), - (offset + 1, offset + size - 1, 1, size - 2, true), - (offset + 1, offset + size + 1, 1, size - 1, true), - ]; - - for (offset, end, expected_chunk_start, expected_size, result) in data.iter() { - let blob = Arc::new(BlobInfo::new( - 0, - String::from("blobid"), - 0, - 0, - 0, - 0, - BlobFeatures::_V5_NO_EXT_BLOB_TABLE, - )); - let mut desc = BlobIoVec::new(blob.clone()); - let res = add_chunk_to_bio_desc(&mut desc, *offset, *end, Arc::new(chunk), blob, true); - assert_eq!(*result, res); - if !desc.is_empty() { - assert_eq!(desc.len(), 1); - let bio = &desc.blob_io_desc(0).unwrap(); - assert_eq!(*expected_chunk_start, bio.offset); - assert_eq!(*expected_size as u32, bio.size); - } - } - } - - #[test] - fn test_calculate_bio_chunk_index() { - let (blksize, chunk_cnt) = (1024, 4); - - let io_range: Vec<(u64, u64, u32, u64)> = vec![ - (0, 1, 0, 1), - (0, blksize - 1, 0, 1), - (0, blksize, 0, 1), - (0, blksize + 1, 0, 2), - (0, blksize * chunk_cnt, 0, chunk_cnt), - (0, blksize * chunk_cnt + 1, 0, chunk_cnt), - (0, blksize * chunk_cnt - 1, 0, chunk_cnt), - (blksize - 1, 1, 0, 1), - (blksize - 1, 2, 0, 2), - (blksize - 1, 3, 0, 2), - (blksize - 1, blksize - 1, 0, 2), - (blksize - 1, blksize, 0, 2), - (blksize - 1, blksize + 1, 0, 2), - (blksize - 1, blksize * chunk_cnt, 0, chunk_cnt), - (blksize, 1, 1, 2), - (blksize, 2, 1, 2), - (blksize, blksize - 1, 1, 2), - (blksize, blksize + 1, 1, 3), - (blksize, blksize + 2, 1, 3), - (blksize, blksize * chunk_cnt, 1, chunk_cnt), - (blksize + 1, 1, 1, 2), - (blksize + 1, blksize - 2, 1, 2), - (blksize + 1, blksize - 1, 1, 2), - (blksize + 1, blksize, 1, 3), - (blksize + 1, blksize * chunk_cnt, 1, chunk_cnt), - ]; - - for (io_start, io_size, expected_start, expected_end) in io_range.iter() { - let (start, end) = calculate_bio_chunk_index( - *io_start, - *io_start + *io_size, - blksize, - chunk_cnt as u32, - false, - ); - - assert_eq!(start, *expected_start); - assert_eq!(end, *expected_end as u32); - } - } - - #[test] - fn test_rafsv5_align() { - assert_eq!(rafsv5_align(0), 0); - assert_eq!(rafsv5_align(1), 8); - assert_eq!(rafsv5_align(7), 8); - assert_eq!(rafsv5_align(8), 8); - assert_eq!(rafsv5_align(9), 16); - } - - #[test] - fn test_rafsv5_superflags() { - assert_eq!( - RafsSuperFlags::from(digest::Algorithm::Blake3), - RafsSuperFlags::HASH_BLAKE3 - ); - assert_eq!( - RafsSuperFlags::from(digest::Algorithm::Sha256), - RafsSuperFlags::HASH_SHA256 - ); - assert_eq!( - digest::Algorithm::from(RafsSuperFlags::HASH_BLAKE3), - digest::Algorithm::Blake3 - ); - assert_eq!( - digest::Algorithm::from(RafsSuperFlags::HASH_SHA256), - digest::Algorithm::Sha256 - ); - - assert_eq!( - RafsSuperFlags::from(compress::Algorithm::Zstd), - RafsSuperFlags::COMPRESSION_ZSTD - ); - assert_eq!( - RafsSuperFlags::from(compress::Algorithm::GZip), - RafsSuperFlags::COMPRESSION_GZIP - ); - assert_eq!( - RafsSuperFlags::from(compress::Algorithm::Lz4Block), - RafsSuperFlags::COMPRESSION_LZ4 - ); - assert_eq!( - RafsSuperFlags::from(compress::Algorithm::None), - RafsSuperFlags::COMPRESSION_NONE - ); - assert_eq!( - compress::Algorithm::from(RafsSuperFlags::COMPRESSION_ZSTD), - compress::Algorithm::Zstd - ); - assert_eq!( - compress::Algorithm::from(RafsSuperFlags::COMPRESSION_GZIP), - compress::Algorithm::GZip - ); - assert_eq!( - compress::Algorithm::from(RafsSuperFlags::COMPRESSION_LZ4), - compress::Algorithm::Lz4Block - ); - assert_eq!( - compress::Algorithm::from(RafsSuperFlags::COMPRESSION_NONE), - compress::Algorithm::None - ); - } - - #[test] - fn test_rafsv5_inode_table() { - let mut table = RafsV5InodeTable::new(1); - assert_eq!(table.size(), 8); - assert_eq!(table.len(), 2); - - assert!(table.set(0, 0x2000).is_err()); - assert!(table.set(2, 0x2000).is_err()); - assert!(table.set(1, 0x1000).is_err()); - assert!(table.set(1, 0x2001).is_err()); - - assert!(table.get(0).is_err()); - assert!(table.get(2).is_err()); - assert!(table.get(1).is_err()); - table.data[1] = 0x1000; - assert!(table.get(1).is_err()); - table.data[1] = 0x1 << 30; - assert!(table.get(1).is_err()); - assert!(table.set(1, 0x2008).is_ok()); - assert_eq!(table.get(1).unwrap(), 0x2008); - } - - #[test] - fn test_rafsv5_prefetch_table() { - let mut table = RafsV5PrefetchTable::new(); - - assert_eq!(table.size(), 0); - assert_eq!(table.len(), 0); - assert!(table.is_empty()); - table.add_entry(0x1); - assert_eq!(table.size(), 8); - assert_eq!(table.len(), 1); - assert!(!table.is_empty()); - - let tmp_file = TempFile::new().unwrap(); - let file = OpenOptions::new() - .read(true) - .write(true) - .open(tmp_file.as_path()) - .unwrap(); - let mut writer = BufWriter::new(file); - writer.write_all(&[0u8; 8]).unwrap(); - assert_eq!(table.store(&mut writer).unwrap(), 8); - writer.flush().unwrap(); - - // Load extended blob table - let file = OpenOptions::new() - .read(true) - .write(true) - .open(tmp_file.as_path()) - .unwrap(); - let mut reader = Box::new(file) as Box; - let mut table = RafsV5PrefetchTable::new(); - table.load_prefetch_table_from(&mut reader, 8, 2).unwrap(); - assert_eq!(table.size(), 8); - assert_eq!(table.len(), 2); - assert!(!table.is_empty()); - assert_eq!(table.inodes[0], 0x1); - assert_eq!(table.inodes[1], 0x0); - } - - #[test] - fn test_new_inode() { - let mut inode = RafsV5Inode::new(); - inode.set_name_size(3); - assert_eq!(inode.size(), 136); - assert!(!inode.is_symlink()); - assert!(!inode.is_hardlink()); - assert!(!inode.is_dir()); - assert!(!inode.is_reg()); - assert!(!inode.has_hole()); - assert!(!inode.has_xattr()); - - let mut inode = RafsV5Inode::new(); - inode.set_symlink_size(3); - assert_eq!(inode.size(), 136); - } - - #[test] - fn test_inode_load_store() { - let mut inode = RafsV5Inode::new(); - inode.i_size = 0x1000; - inode.i_blocks = 1; - inode.i_child_count = 10; - inode.i_child_index = 20; - inode.set_name_size(4); - inode.set_symlink_size(6); - inode.i_flags = RafsInodeFlags::SYMLINK; - - let name = OsString::from_str("test").unwrap(); - let symlink = OsString::from_str("/test12").unwrap(); - let inode_wrapper = RafsV5InodeWrapper { - name: &name, - symlink: Some(&symlink), - inode: &inode, - }; - - let tmp_file = TempFile::new().unwrap(); - let file = OpenOptions::new() - .read(true) - .write(true) - .open(tmp_file.as_path()) - .unwrap(); - let mut writer = BufWriter::new(file); - assert_eq!(inode_wrapper.store(&mut writer).unwrap(), 144); - writer.flush().unwrap(); - - // Load inode - let file = OpenOptions::new() - .read(true) - .write(true) - .open(tmp_file.as_path()) - .unwrap(); - let mut reader = Box::new(file) as Box; - let mut inode2 = RafsV5Inode::new(); - inode2.load(&mut reader).unwrap(); - assert_eq!(inode2.i_name_size, 4); - assert_eq!(inode2.i_symlink_size, 6); - assert_eq!(inode2.i_size, 0x1000); - assert_eq!(inode2.i_blocks, 1); - assert_eq!(inode2.i_child_count, 10); - assert_eq!(inode2.i_child_index, 20); - - let filename = inode2.load_file_name(&mut reader).unwrap(); - assert_eq!(filename, OsString::from_str("test").unwrap()); - } - - #[test] - fn test_rafsv5_new_xattrs() { - let mut xattrs = RafsXAttrs::new(); - assert_eq!(xattrs.size(), 0); - - xattrs - .add(OsString::from("user.key1"), vec![0x1u8, 0x2, 0x3, 0x4]) - .unwrap(); - assert_eq!(xattrs.size(), 18); - xattrs - .add(OsString::from("user.key21"), vec![0x1u8, 0x2, 0x3, 0x4]) - .unwrap(); - assert_eq!(xattrs.size(), 37); - xattrs.remove(&OsString::from("user.key1")); - assert_eq!(xattrs.size(), 19); - } -} +// Copyright 2020-2021 Ant Group. All rights reserved. +// Copyright (C) 2020-2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! RAFS v5 on disk layout structures. +//! +//! # RAFS File System Meta Data Format Version 5 +//! Previously RAFS has different formats for on disk meta data and runtime meta data. So when +//! initializing an RAFS instance, it will sequentially read and parse the on disk meta data, +//! build a copy of in memory runtime meta data. This may cause slow startup and cost too much +//! memory to build in memory meta data. +//! +//! The RAFS File System Meta Data Format Version 5 (aka V5) is defined to support directly mapping +//! RAFS meta data into process as runtime meta data, so we could parse RAFS on disk meta data on +//! demand. The V5 meta data format has following changes: +//! 1) file system version number been bumped to 0x500. +//! 2) Directory inodes will sequentially assign globally unique `child index` to it's child inodes. +//! Two fields, "child_index" and "child_count", have been added to the OndiskInode struct. +//! 3) For inodes with hard link count as 1, the `child index` equals to its assigned inode number. +//! 4) For inodes with hard link count bigger than 1, the `child index` may be different from the +//! assigned inode number. Among those child entries linking to the same inode, there's will be +//! one and only one child entry having the inode number as its assigned `child index'. +//! 5) A child index mapping table is introduced, which is used to map `child index` into offset +//! from the base of the super block. The formula to calculate the inode offset is: +//! `inode_offset_from_sb = inode_table[child_index] << 3` +//! 6) The child index mapping table follows the super block by default. +//! +//! Giving above definition, we could get the inode object for an inode number or child index as: +//! inode_ptr = sb_base_ptr + inode_offset_from_sb(inode_number) +//! inode_ptr = sb_base_ptr + inode_offset_from_sb(child_index) +//! +//! On the other hand, Rafs v4 is compatible with Rafs v5, so Rafs v5 implementation supports +//! both v4 and v5 metadata. + +use std::cmp; +use std::convert::TryFrom; +use std::ffi::{OsStr, OsString}; +use std::fmt::{Debug, Display, Formatter, Result as FmtResult}; +use std::io::{Read, Result}; +use std::mem::size_of; +use std::ops::Deref; +use std::os::unix::ffi::OsStrExt; +use std::sync::Arc; + +use nydus_utils::digest::{self, DigestHasher, RafsDigest}; +use nydus_utils::{compress, ByteSize}; +use vm_memory::VolatileMemory; +// With Rafs v5, the storage manager needs to access file system metadata to decompress the +// compressed blob file. To avoid circular dependency, the following Rafs v5 metadata structures +// have been moved into the storage manager. +use nydus_storage::device::v5::BlobV5ChunkInfo; +use nydus_storage::device::{ + BlobChunkFlags, BlobChunkInfo, BlobFeatures, BlobInfo, BlobIoDesc, BlobIoVec, +}; + +use crate::metadata::inode::RafsInodeFlags; +use crate::metadata::layout::{bytes_to_os_str, MetaRange, RafsXAttrs, RAFS_SUPER_VERSION_V5}; +use crate::metadata::md_v5::V5IoChunk; +use crate::metadata::{ + Inode, RafsInode, RafsStore, RafsSuperFlags, RAFS_DEFAULT_CHUNK_SIZE, RAFS_MAX_CHUNK_SIZE, +}; +use crate::{ + impl_bootstrap_converter, impl_pub_getter_setter, RafsInodeExt, RafsIoReader, RafsIoWrite, +}; + +pub(crate) const RAFSV5_ALIGNMENT: usize = 8; +pub(crate) const RAFSV5_SUPERBLOCK_SIZE: usize = 8192; +pub(crate) const RAFSV5_EXT_BLOB_ENTRY_SIZE: usize = 64; + +const RAFSV5_SUPER_MAGIC: u32 = 0x5241_4653; +const RAFSV5_SUPERBLOCK_RESERVED_SIZE: usize = RAFSV5_SUPERBLOCK_SIZE - 80; +const RAFSV5_EXT_BLOB_RESERVED_SIZE: usize = RAFSV5_EXT_BLOB_ENTRY_SIZE - 24; + +/// Trait to get information about a Rafs v5 inode. +pub(crate) trait RafsV5InodeOps { + /// Get the `BlobInfo` object corresponding to the `blob_index`. + fn get_blob_by_index(&self, blob_index: u32) -> Result>; + + /// Get chunk size for the inode. + fn get_chunk_size(&self) -> u32; + + /// Check whether the inode has hole chunk. + fn has_hole(&self) -> bool; +} + +pub(crate) trait RafsV5InodeChunkOps { + /// Get chunk info object for a chunk. + fn get_chunk_info_v5(&self, idx: u32) -> Result>; +} + +/// Rafs v5 superblock on disk metadata, 8192 bytes. +#[repr(C)] +#[derive(Clone, Copy)] +pub struct RafsV5SuperBlock { + /// RAFS super magic + s_magic: u32, + /// RAFS version + s_fs_version: u32, + /// superblock on disk size + s_sb_size: u32, + /// block size + s_block_size: u32, + /// superblock flags + s_flags: u64, + /// V5: Number of unique inodes(hard link counts as 1). + s_inodes_count: u64, + /// V5: Offset of inode table + s_inode_table_offset: u64, + /// Those inodes which need to prefetch will have there indexes put into this table. + /// Then Rafs has a hint to prefetch inodes and doesn't have to load all inodes to page cache + /// under *direct* metadata mode. It helps save memory usage. + /// [idx1:u32, idx2:u32, idx3:u32 ...] + s_prefetch_table_offset: u64, + /// V5: Offset of blob table + s_blob_table_offset: u64, + /// V5: Size of inode table + s_inode_table_entries: u32, + s_prefetch_table_entries: u32, // 64 bytes + /// V5: Entries of blob table + s_blob_table_size: u32, + s_extended_blob_table_entries: u32, // 72 bytes + /// Extended Blob Table + s_extended_blob_table_offset: u64, // 80 bytes --- reduce me from `RAFS_SUPERBLOCK_RESERVED_SIZE` + /// Unused area + s_reserved: [u8; RAFSV5_SUPERBLOCK_RESERVED_SIZE], +} + +impl RafsV5SuperBlock { + /// Create a new instance of `RafsV5SuperBlock`. + pub fn new() -> Self { + Self::default() + } + + /// Check whether it's a valid Rafs v5 super block. + pub fn detect(&self) -> bool { + self.is_rafs_v5() + } + + /// Check whether it's super block for Rafs v4/v5. + pub fn is_rafs_v5(&self) -> bool { + self.magic() == RAFSV5_SUPER_MAGIC && self.version() == RAFS_SUPER_VERSION_V5 + } + + /// Validate the Rafs v5 super block. + pub fn validate(&self, meta_size: u64) -> Result<()> { + if !self.is_rafs_v5() { + return Err(einval!("invalid super block version number")); + } else if self.sb_size() as usize != RAFSV5_SUPERBLOCK_SIZE + || meta_size <= RAFSV5_SUPERBLOCK_SIZE as u64 + { + return Err(einval!("invalid super block blob size")); + } else if !self.block_size().is_power_of_two() + || self.block_size() < 0x1000 + || (self.block_size() as u64 > RAFS_MAX_CHUNK_SIZE && self.block_size() != 4 << 20) + { + // Stargz has a special chunk size of 4MB. + return Err(einval!("invalid block size")); + } else if RafsSuperFlags::from_bits(self.flags()).is_none() { + return Err(einval!("invalid super block flags")); + } + + let meta_range = MetaRange::new( + RAFSV5_SUPERBLOCK_SIZE as u64, + meta_size - RAFSV5_SUPERBLOCK_SIZE as u64, + true, + )?; + + let inodes_count = self.inodes_count(); + let inode_table_offset = self.inode_table_offset(); + let inode_table_entries = self.inode_table_entries() as u64; + let inode_table_size = inode_table_entries * size_of::() as u64; + let inode_table_range = MetaRange::new(inode_table_offset, inode_table_size, false)?; + if inodes_count > inode_table_entries || !inode_table_range.is_subrange_of(&meta_range) { + return Err(einval!("invalid inode table count, offset or entries.")); + } + + let blob_table_offset = self.blob_table_offset(); + let blob_table_size = self.blob_table_size() as u64; + let blob_table_range = MetaRange::new(blob_table_offset, blob_table_size, false)?; + if !blob_table_range.is_subrange_of(&meta_range) + || blob_table_range.intersect_with(&inode_table_range) + { + return Err(einval!("invalid blob table offset or size.")); + } + + let ext_blob_table_offset = self.extended_blob_table_offset(); + let ext_blob_table_size = + self.extended_blob_table_entries() as u64 * RAFSV5_EXT_BLOB_ENTRY_SIZE as u64; + let ext_blob_table_range = + MetaRange::new(ext_blob_table_offset, ext_blob_table_size, true)?; + if ext_blob_table_size != 0 + && (!ext_blob_table_range.is_subrange_of(&meta_range) + || ext_blob_table_range.intersect_with(&inode_table_range) + || ext_blob_table_range.intersect_with(&blob_table_range)) + { + return Err(einval!("invalid extended blob table offset or size.")); + } + + let prefetch_table_offset = self.prefetch_table_offset(); + let prefetch_table_size = self.prefetch_table_entries() as u64 * size_of::() as u64; + let prefetch_table_range = + MetaRange::new(prefetch_table_offset, prefetch_table_size, false)?; + if prefetch_table_size != 0 + && (!prefetch_table_range.is_subrange_of(&meta_range) + || prefetch_table_range.intersect_with(&inode_table_range) + || prefetch_table_range.intersect_with(&blob_table_range) + || (ext_blob_table_size != 0 + && prefetch_table_range.intersect_with(&ext_blob_table_range))) + { + return Err(einval!("invalid prefetch table offset or size.")); + } + + Ok(()) + } + + /// Set chunk size. + pub fn set_chunk_size(&mut self, chunk_size: u32) { + debug_assert!(chunk_size.is_power_of_two()); + self.s_block_size = chunk_size; + } + + /// Set compression algorithm to handle chunk of the Rafs filesystem. + pub fn set_compressor(&mut self, compressor: compress::Algorithm) { + let c: RafsSuperFlags = compressor.into(); + + self.s_flags &= !RafsSuperFlags::COMPRESSION_NONE.bits(); + self.s_flags &= !RafsSuperFlags::COMPRESSION_LZ4.bits(); + self.s_flags &= !RafsSuperFlags::COMPRESSION_GZIP.bits(); + self.s_flags &= !RafsSuperFlags::COMPRESSION_ZSTD.bits(); + self.s_flags |= c.bits(); + } + + /// Set message digest algorithm to handle chunk of the Rafs filesystem. + pub fn set_digester(&mut self, digester: digest::Algorithm) { + let c: RafsSuperFlags = digester.into(); + + self.s_flags &= !RafsSuperFlags::HASH_BLAKE3.bits(); + self.s_flags &= !RafsSuperFlags::HASH_SHA256.bits(); + self.s_flags |= c.bits(); + } + + /// Enable explicit Uid/Gid feature. + pub fn set_explicit_uidgid(&mut self) { + self.s_flags |= RafsSuperFlags::EXPLICIT_UID_GID.bits(); + } + + /// Enable support of filesystem xattr. + pub fn set_has_xattr(&mut self) { + self.s_flags |= RafsSuperFlags::HAS_XATTR.bits(); + } + + impl_pub_getter_setter!(magic, set_magic, s_magic, u32); + impl_pub_getter_setter!(version, set_version, s_fs_version, u32); + impl_pub_getter_setter!(sb_size, set_sb_size, s_sb_size, u32); + impl_pub_getter_setter!(block_size, set_block_size, s_block_size, u32); + impl_pub_getter_setter!(flags, set_flags, s_flags, u64); + impl_pub_getter_setter!(inodes_count, set_inodes_count, s_inodes_count, u64); + impl_pub_getter_setter!( + inode_table_entries, + set_inode_table_entries, + s_inode_table_entries, + u32 + ); + impl_pub_getter_setter!( + inode_table_offset, + set_inode_table_offset, + s_inode_table_offset, + u64 + ); + impl_pub_getter_setter!(blob_table_size, set_blob_table_size, s_blob_table_size, u32); + impl_pub_getter_setter!( + blob_table_offset, + set_blob_table_offset, + s_blob_table_offset, + u64 + ); + impl_pub_getter_setter!( + prefetch_table_offset, + set_prefetch_table_offset, + s_prefetch_table_offset, + u64 + ); + impl_pub_getter_setter!( + prefetch_table_entries, + set_prefetch_table_entries, + s_prefetch_table_entries, + u32 + ); + impl_pub_getter_setter!( + extended_blob_table_offset, + set_extended_blob_table_offset, + s_extended_blob_table_offset, + u64 + ); + impl_pub_getter_setter!( + extended_blob_table_entries, + set_extended_blob_table_entries, + s_extended_blob_table_entries, + u32 + ); + + /// Load a super block from a `RafsIoReader` object. + pub fn load(&mut self, r: &mut RafsIoReader) -> Result<()> { + r.read_exact(self.as_mut()) + } + + /// Read Rafs v5 super block from a reader. + pub fn read(r: &mut RafsIoReader) -> Result { + let mut sb = RafsV5SuperBlock::new(); + + r.read_exact(sb.as_mut())?; + + Ok(sb) + } +} + +impl RafsStore for RafsV5SuperBlock { + fn store(&self, w: &mut dyn RafsIoWrite) -> Result { + w.write_all(self.as_ref())?; + w.validate_alignment(self.as_ref().len(), RAFSV5_ALIGNMENT) + } +} + +impl_bootstrap_converter!(RafsV5SuperBlock); + +impl Default for RafsV5SuperBlock { + fn default() -> Self { + Self { + s_magic: u32::to_le(RAFSV5_SUPER_MAGIC as u32), + s_fs_version: u32::to_le(RAFS_SUPER_VERSION_V5), + s_sb_size: u32::to_le(RAFSV5_SUPERBLOCK_SIZE as u32), + s_block_size: u32::to_le(RAFS_DEFAULT_CHUNK_SIZE as u32), + s_flags: u64::to_le(0), + s_inodes_count: u64::to_le(0), + s_inode_table_entries: u32::to_le(0), + s_inode_table_offset: u64::to_le(0), + s_prefetch_table_offset: u64::to_le(0), + s_prefetch_table_entries: u32::to_le(0), + s_blob_table_size: u32::to_le(0), + s_blob_table_offset: u64::to_le(0), + s_extended_blob_table_offset: u64::to_le(0), + s_extended_blob_table_entries: u32::to_le(0), + s_reserved: [0u8; RAFSV5_SUPERBLOCK_RESERVED_SIZE], + } + } +} + +impl Display for RafsV5SuperBlock { + fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { + write!(f, "superblock: magic {:x}, version {:x}, sb_size {:x}, block_size {:x}, flags {:x}, inode_count {}", + self.magic(), self.version(), self.sb_size(), self.block_size(), + self.flags(), self.s_inodes_count) + } +} + +/// Rafs v5 on disk inode offset table. +#[derive(Clone, Default)] +pub struct RafsV5InodeTable { + /// Inode offset array. + pub data: Vec, +} + +impl RafsV5InodeTable { + /// Create a new instance of `RafsV5InodeTable`. + pub fn new(entries: usize) -> Self { + let table_size = rafsv5_align(entries * size_of::()) / size_of::(); + RafsV5InodeTable { + data: vec![0; table_size], + } + } + + /// Get size in bytes of the Rafs v5 inode table. + #[inline] + pub fn size(&self) -> usize { + rafsv5_align(self.data.len() * size_of::()) + } + + /// Get number of inodes in the table. + #[inline] + pub fn len(&self) -> usize { + self.data.len() + } + + /// Check whether the table is empty or not. + #[inline] + pub fn is_empty(&self) -> bool { + self.data.is_empty() + } + + /// Set inode offset in the metadata blob for an inode. + pub fn set(&mut self, ino: Inode, offset: u32) -> Result<()> { + if ino == 0 || ino > self.data.len() as u64 { + return Err(einval!(format!( + "invalid inode number {}, max {}", + ino, + self.data.len() + ))); + } else if offset as usize <= RAFSV5_SUPERBLOCK_SIZE || offset & 0x7 != 0 { + return Err(einval!(format!("invalid inode offset 0x{:x}", offset))); + } + + // The offset is aligned with 8 bytes to make it easier to validate RafsV5Inode. + let offset = offset >> 3; + self.data[(ino - 1) as usize] = u32::to_le(offset as u32); + + Ok(()) + } + + /// Get inode offset in the metadata blob of an inode. + pub fn get(&self, ino: Inode) -> Result { + if ino == 0 || ino > self.data.len() as u64 { + return Err(enoent!()); + } + + let offset = u32::from_le(self.data[(ino - 1) as usize]) as usize; + if offset <= (RAFSV5_SUPERBLOCK_SIZE >> 3) || offset >= (1usize << 29) { + return Err(einval!(format!( + "invalid offset 0x{:x} for inode {}", + offset, ino + ))); + } + + Ok((offset << 3) as u32) + } + + /// Load inode offset table for a `RafsIoReader` object. + pub fn load(&mut self, r: &mut RafsIoReader) -> Result<()> { + let (_, data, _) = unsafe { self.data.align_to_mut::() }; + r.read_exact(data)?; + Ok(()) + } +} + +impl RafsStore for RafsV5InodeTable { + fn store(&self, w: &mut dyn RafsIoWrite) -> Result { + let (_, data, _) = unsafe { self.data.align_to::() }; + + w.write_all(data)?; + w.validate_alignment(data.len(), RAFSV5_ALIGNMENT) + } +} + +/// Rafs v5 on disk inode prefetch table. +/// +/// From super block disk structure, its start offset can be told. +/// In order not to load every meta/inode to page cache under rafs Direct +/// mode, which aims at saving physical memory. This prefetch table is +/// introduce. Regular files or directories which are specified during image +/// building will have their inode index persist in this disk table. +/// For a single directory, only its inode index will be put into the table. +/// But all of its descendants files(recursively) will be prefetch(by hint) +/// when rafs is mounted at the very beginning. +#[derive(Clone, Default)] +pub struct RafsV5PrefetchTable { + /// List of inode numbers for prefetch. + /// Note: It's not inode index of inodes table being stored here. + pub inodes: Vec, +} + +impl RafsV5PrefetchTable { + /// Create a new instance of `RafsV5PrefetchTable`. + pub fn new() -> RafsV5PrefetchTable { + RafsV5PrefetchTable { inodes: vec![] } + } + + /// Get content size of the inode prefetch table. + pub fn size(&self) -> usize { + rafsv5_align(self.len() * size_of::()) + } + + /// Get number of entries in the prefetch table. + pub fn len(&self) -> usize { + self.inodes.len() + } + + /// Check whether the inode prefetch table is empty. + pub fn is_empty(&self) -> bool { + self.inodes.is_empty() + } + + /// Add an inode into the inode prefetch table. + pub fn add_entry(&mut self, ino: u32) { + self.inodes.push(ino); + } + + /// Store the inode prefetch table to a writer. + pub fn store(&mut self, w: &mut dyn RafsIoWrite) -> Result { + let (_, data, _) = unsafe { self.inodes.align_to::() }; + w.write_all(data.as_ref())?; + + // OK. Let's see if we have to align... :-( + let cur_len = self.inodes.len() * size_of::(); + let padding_bytes = rafsv5_align(cur_len) - cur_len; + w.write_padding(padding_bytes)?; + + Ok(data.len() + padding_bytes) + } + + /// Load a inode prefetch table from a reader. + /// + /// Note: Generally, prefetch happens after loading bootstrap, so with methods operating + /// files with changing their offset won't bring errors. But we still use `pread` now so as + /// to make this method more stable and robust. Even dup(2) can't give us a separated file struct. + pub fn load_prefetch_table_from( + &mut self, + r: &mut RafsIoReader, + offset: u64, + entries: usize, + ) -> Result { + self.inodes = vec![0u32; entries]; + + let (_, data, _) = unsafe { self.inodes.align_to_mut::() }; + r.seek_to_offset(offset)?; + r.read_exact(data)?; + + Ok(data.len()) + } +} + +/// Rafs v5 blob description table. +#[derive(Clone, Debug, Default)] +pub struct RafsV5BlobTable { + /// Base blob information array. + pub entries: Vec>, + /// Extended blob information array. + pub extended: RafsV5ExtBlobTable, +} + +impl RafsV5BlobTable { + /// Create a new instance of `RafsV5BlobTable`. + pub fn new() -> Self { + RafsV5BlobTable { + entries: Vec::new(), + extended: RafsV5ExtBlobTable::new(), + } + } + + /// Get blob table size, aligned with RAFS_ALIGNMENT bytes + pub fn size(&self) -> usize { + if self.entries.is_empty() { + return 0; + } + // Blob entry split with '\0' + rafsv5_align( + self.entries.iter().fold(0usize, |size, entry| { + let entry_size = size_of::() * 2 + entry.blob_id().len(); + size + entry_size + 1 + }) - 1, + ) + } + + /// Add information for new blob into the blob information table. + #[allow(clippy::too_many_arguments)] + pub fn add( + &mut self, + blob_id: String, + prefetch_offset: u32, + prefetch_size: u32, + chunk_size: u32, + chunk_count: u32, + uncompressed_size: u64, + compressed_size: u64, + blob_features: BlobFeatures, + flags: RafsSuperFlags, + is_chunkdict: bool, + ) -> u32 { + let blob_index = self.entries.len() as u32; + let mut blob_info = BlobInfo::new( + blob_index, + blob_id, + uncompressed_size, + compressed_size, + chunk_size, + chunk_count, + blob_features, + ); + + blob_info.set_compressor(flags.into()); + blob_info.set_digester(flags.into()); + blob_info.set_prefetch_info(prefetch_offset as u64, prefetch_size as u64); + if is_chunkdict { + blob_info.set_chunkdict_generated(true); + } + + self.entries.push(Arc::new(blob_info)); + self.extended.add( + chunk_count, + uncompressed_size, + compressed_size, + blob_features.bits(), + ); + + blob_index + } + + /// Get base information for a blob. + #[inline] + pub fn get(&self, blob_index: u32) -> Result> { + if blob_index >= self.entries.len() as u32 { + return Err(enoent!("blob not found")); + } + Ok(self.entries[blob_index as usize].clone()) + } + + /// Load blob information table from a reader. + pub fn load( + &mut self, + r: &mut RafsIoReader, + blob_table_size: u32, + chunk_size: u32, + flags: RafsSuperFlags, + ) -> Result<()> { + if blob_table_size == 0 { + return Ok(()); + } + + debug!("RAFS v5 blob table size {}", blob_table_size); + let mut data = vec![0u8; blob_table_size as usize]; + r.read_exact(&mut data)?; + + // Each entry frame looks like: + // u32 | u32 | string | trailing '\0' , except that the last entry has no trailing '\0' + let mut buf = data.as_mut_slice(); + while buf.len() > 2 * size_of::() { + let readahead_offset = + unsafe { std::ptr::read_unaligned::(buf[0..4].as_ptr() as *const u32) }; + let readahead_size = + unsafe { std::ptr::read_unaligned::(buf[4..8].as_ptr() as *const u32) }; + + let mut pos = 8; + while pos < buf.len() && buf[pos] != 0 { + pos += 1; + } + let blob_id = std::str::from_utf8(&buf[8..pos]) + .map(|v| v.to_owned()) + .map_err(|e| einval!(e))?; + if pos == buf.len() { + buf = &mut buf[pos..]; + } else { + buf = &mut buf[pos + 1..]; + } + debug!("blob {} {:?}", self.entries.len(), blob_id); + + let index = self.entries.len(); + let (chunk_count, uncompressed_size, compressed_size, blob_features) = + // For compatibility, blob table might not be associated with extended blob table. + if !self.extended.entries.is_empty() { + let ext_len = self.extended.entries.len(); + if index >= ext_len { + error!( "Extended blob table({}) is shorter than blob table", ext_len); + return Err(einval!()); + } + let entry = &self.extended.entries[index]; + let blob_features = BlobFeatures::from_bits(entry.features).ok_or_else(|| einval!("invalid blob feature flags"))?; + (entry.chunk_count, entry.uncompressed_size, entry.compressed_size, blob_features) + } else { + (0, 0, 0, BlobFeatures::_V5_NO_EXT_BLOB_TABLE) + }; + + let mut blob_info = BlobInfo::new( + index as u32, + blob_id, + uncompressed_size, + compressed_size, + chunk_size, + chunk_count, + blob_features, + ); + + blob_info.set_compressor(flags.into()); + blob_info.set_digester(flags.into()); + blob_info.set_prefetch_info(readahead_offset as u64, readahead_size as u64); + + self.entries.push(Arc::new(blob_info)); + } + + Ok(()) + } + + /// Get the base blob information array. + pub fn get_all(&self) -> Vec> { + self.entries.clone() + } + + /// Store the extended blob information array. + pub fn store_extended(&self, w: &mut dyn RafsIoWrite) -> Result { + self.extended.store(w) + } +} + +impl RafsStore for RafsV5BlobTable { + fn store(&self, w: &mut dyn RafsIoWrite) -> Result { + let mut size = 0; + self.entries + .iter() + .enumerate() + .try_for_each::<_, Result<()>>(|(idx, entry)| { + w.write_all(&u32::to_le_bytes(entry.prefetch_offset() as u32))?; + w.write_all(&u32::to_le_bytes(entry.prefetch_size() as u32))?; + w.write_all(entry.blob_id().as_bytes())?; + if idx != self.entries.len() - 1 { + size += size_of::() * 2 + entry.blob_id().len() + 1; + w.write_all(&[b'\0'])?; + } else { + size += size_of::() * 2 + entry.blob_id().len(); + } + Ok(()) + })?; + + let padding = rafsv5_align(size) - size; + w.write_padding(padding)?; + size += padding; + + w.validate_alignment(size, RAFSV5_ALIGNMENT) + } +} + +/// Rafs v5 extended blob information on disk metadata. +/// +/// RafsV5ExtDBlobEntry is appended to the tail of bootstrap, +/// can be used as an extended table for the original blob table. +// This disk structure is well defined and rafs aligned. +#[repr(C)] +#[derive(Clone)] +pub struct RafsV5ExtBlobEntry { + /// Number of chunks in a blob file. + pub chunk_count: u32, + pub features: u32, + pub uncompressed_size: u64, // -- 16 Bytes + pub compressed_size: u64, // -- 24 Bytes + pub reserved2: [u8; RAFSV5_EXT_BLOB_RESERVED_SIZE], +} + +// Implement Debug trait ourselves, as rust prior to 1.47 doesn't impl Debug for array with size +// larger than 32 +impl Debug for RafsV5ExtBlobEntry { + fn fmt(&self, f: &mut Formatter) -> FmtResult { + f.debug_struct("ExtendedBlobTableEntry") + .field("chunk_count", &self.chunk_count) + .field("blob_cache_size", &self.uncompressed_size) + .field("compressed_blob_size", &self.compressed_size) + .field("features", &self.features) + .finish() + } +} + +impl Default for RafsV5ExtBlobEntry { + fn default() -> Self { + RafsV5ExtBlobEntry { + chunk_count: 0, + features: 0, + uncompressed_size: 0, + compressed_size: 0, + reserved2: [0; RAFSV5_EXT_BLOB_RESERVED_SIZE], + } + } +} + +impl RafsV5ExtBlobEntry { + pub fn new( + chunk_count: u32, + blob_cache_size: u64, + compressed_blob_size: u64, + features: u32, + ) -> Self { + Self { + chunk_count, + uncompressed_size: blob_cache_size, + compressed_size: compressed_blob_size, + features, + ..Default::default() + } + } +} + +/// Rafs v5 on disk extended blob information table. +#[derive(Clone, Debug, Default)] +pub struct RafsV5ExtBlobTable { + /// The vector index means blob index, every entry represents + /// extended information of a blob. + pub entries: Vec>, +} + +impl RafsV5ExtBlobTable { + /// Create a new instance of `RafsV5ExtBlobTable`. + pub fn new() -> Self { + Self { + entries: Vec::new(), + } + } + + /// Get content size of the extended blob information table. + pub fn size(&self) -> usize { + // `ExtendedBlobTableEntry` is already a well defined disk structure and rafs-aligned + // So directly use its `size_of()` is reliable. + rafsv5_align(size_of::() * self.entries.len()) + } + + /// Get number of entries in the extended blob information table. + pub fn entries(&self) -> usize { + self.entries.len() + } + + /// Add a new entry into the extended blob information table. + pub fn add( + &mut self, + chunk_count: u32, + blob_cache_size: u64, + compressed_blob_size: u64, + features: u32, + ) { + self.entries.push(Arc::new(RafsV5ExtBlobEntry::new( + chunk_count, + blob_cache_size, + compressed_blob_size, + features, + ))); + } + + /// Get extended information about a blob. + pub fn get(&self, blob_index: u32) -> Option> { + let len = self.entries.len(); + + if len == 0 || blob_index as usize >= len { + None + } else { + Some(self.entries[blob_index as usize].clone()) + } + } + + /// Load extended blob information table from a reader. + pub fn load(&mut self, r: &mut RafsIoReader, count: usize) -> Result<()> { + let mut entries = Vec::::with_capacity(count); + // Safe because it is already reserved enough space + let (_, data, _) = unsafe { + entries.set_len(count); + (&mut entries).align_to_mut::() + }; + + r.read_exact(data)?; + self.entries = entries.iter().cloned().map(Arc::new).collect(); + + Ok(()) + } +} + +impl RafsStore for RafsV5ExtBlobTable { + fn store(&self, w: &mut dyn RafsIoWrite) -> Result { + let mut size = 0; + + // Store the list of entries + self.entries + .iter() + .enumerate() + .try_for_each::<_, Result<()>>(|(_idx, entry)| { + w.write_all(&u32::to_le_bytes(entry.chunk_count))?; + w.write_all(&u32::to_le_bytes(entry.features))?; + w.write_all(&u64::to_le_bytes(entry.uncompressed_size))?; + w.write_all(&u64::to_le_bytes(entry.compressed_size))?; + w.write_all(&entry.reserved2)?; + size += RAFSV5_EXT_BLOB_ENTRY_SIZE; + Ok(()) + })?; + + // Append padding for RAFS alignment + let padding = rafsv5_align(size) - size; + w.write_padding(padding)?; + size += padding; + + w.validate_alignment(size, RAFSV5_ALIGNMENT) + } +} + +/// Rafs v5 inode on disk metadata. +#[repr(C)] +#[derive(Clone, Copy, Default, Debug)] +pub struct RafsV5Inode { + /// sha256(sha256(chunk) + ...), [char; RAFS_SHA256_LENGTH] + pub i_digest: RafsDigest, // 32 + /// parent inode number + pub i_parent: u64, + /// Artifact inode number set by the nydus image builder. Start from RAFS_ROOT_INODE = 1. + pub i_ino: u64, + pub i_uid: u32, + pub i_gid: u32, + pub i_projid: u32, + pub i_mode: u32, // 64 + pub i_size: u64, + pub i_blocks: u64, + pub i_flags: RafsInodeFlags, + pub i_nlink: u32, + /// for dir, child start index + pub i_child_index: u32, // 96 + /// for dir, means child count. + /// for regular file, means chunk info count. + pub i_child_count: u32, + /// file name size, [char; i_name_size] + pub i_name_size: u16, + /// symlink path size, [char; i_symlink_size] + pub i_symlink_size: u16, // 104 + // inode device block number, ignored for non-special files + pub i_rdev: u32, + // for alignment reason, we put nsec first + pub i_mtime_nsec: u32, + pub i_mtime: u64, // 120 + pub i_reserved: [u8; 8], // 128 +} + +impl RafsV5Inode { + /// Create a new instance of `RafsV5Inode`. + pub fn new() -> Self { + Self::default() + } + + /// Set size of the file name. + #[inline] + pub fn set_name_size(&mut self, name_len: usize) { + self.i_name_size = name_len as u16; + } + + /// Mark the inode as a symlink. + #[inline] + pub fn set_symlink_size(&mut self, symlink_len: usize) { + self.i_symlink_size = symlink_len as u16; + } + + /// Get on disk size of the inode content. + #[inline] + pub fn size(&self) -> usize { + size_of::() + + (rafsv5_align(self.i_name_size as usize) + rafsv5_align(self.i_symlink_size as usize)) + as usize + } + + /// Get the uid and the gid of the inode. + #[inline] + pub fn uidgid(&self) -> (u32, u32) { + (self.i_uid, self.i_gid) + } + + /// Get the uid and the gid of the inode. + #[inline] + pub fn mtime(&self) -> (u64, u32) { + (self.i_mtime, self.i_mtime_nsec) + } + + /// Get the mode of the inode. + #[inline] + pub fn mode(&self) -> u32 { + self.i_mode + } + + /// Check whether the inode is a directory. + #[inline] + pub fn is_dir(&self) -> bool { + self.i_mode & libc::S_IFMT as u32 == libc::S_IFDIR as u32 + } + + /// Check whether the inode is a symlink. + #[inline] + pub fn is_symlink(&self) -> bool { + self.i_mode & libc::S_IFMT as u32 == libc::S_IFLNK as u32 + } + + /// Check whether the inode is a regular file. + #[inline] + pub fn is_reg(&self) -> bool { + self.i_mode & libc::S_IFMT as u32 == libc::S_IFREG as u32 + } + + /// Check whether the inode is a char device node. + pub fn is_chrdev(&self) -> bool { + self.i_mode & libc::S_IFMT as u32 == libc::S_IFCHR as u32 + } + + /// Check whether the inode is a block device node. + pub fn is_blkdev(&self) -> bool { + self.i_mode & libc::S_IFMT as u32 == libc::S_IFBLK as u32 + } + + /// Check whether the inode is a FIFO. + pub fn is_fifo(&self) -> bool { + self.i_mode & libc::S_IFMT as u32 == libc::S_IFIFO as u32 + } + + /// Check whether the inode is a socket. + pub fn is_sock(&self) -> bool { + self.i_mode & libc::S_IFMT as u32 == libc::S_IFSOCK as u32 + } + + /// Check whether the inode is a hardlink. + #[inline] + pub fn is_hardlink(&self) -> bool { + self.is_reg() && self.i_nlink > 1 + } + + /// Get inode flags + pub fn has_hardlink(&self) -> bool { + self.i_flags.contains(RafsInodeFlags::HARDLINK) + } + + /// Mark the inode as having extended attributes. + #[inline] + pub fn has_xattr(&self) -> bool { + self.i_flags.contains(RafsInodeFlags::XATTR) + } + + /// Mark the inode as having hole chunks. + #[inline] + pub fn has_hole(&self) -> bool { + self.i_flags.contains(RafsInodeFlags::HAS_HOLE) + } + + /// Load an inode from a reader. + pub fn load(&mut self, r: &mut RafsIoReader) -> Result<()> { + r.read_exact(self.as_mut()) + } + + /// Set filename for the inode. + pub fn load_file_name(&self, r: &mut RafsIoReader) -> Result { + let mut name_buf = vec![0u8; self.i_name_size as usize]; + r.read_exact(name_buf.as_mut_slice())?; + r.seek_to_next_aligned(name_buf.len(), RAFSV5_ALIGNMENT)?; + Ok(bytes_to_os_str(&name_buf).to_os_string()) + } +} + +impl_bootstrap_converter!(RafsV5Inode); + +impl From<&dyn RafsInodeExt> for RafsV5Inode { + fn from(inode: &dyn RafsInodeExt) -> Self { + let attr = inode.get_attr(); + + RafsV5Inode { + i_digest: inode.get_digest(), + i_parent: inode.parent(), + i_ino: attr.ino, + i_uid: attr.uid, + i_gid: attr.gid, + i_projid: inode.projid(), + i_mode: attr.mode, + i_size: attr.size, + i_blocks: attr.blocks, + i_flags: RafsInodeFlags::from_bits_truncate(inode.flags()), + i_nlink: attr.nlink, + i_child_index: inode.get_child_index().unwrap_or(0), + i_child_count: inode.get_child_count(), + i_name_size: inode.get_name_size(), + i_symlink_size: inode.get_symlink_size(), + i_rdev: attr.rdev, + i_mtime_nsec: attr.mtimensec, + i_mtime: attr.mtime, + i_reserved: [0u8; 8], + } + } +} + +/// A in-memory wrapper of a Rafs v5 inode. +pub struct RafsV5InodeWrapper<'a> { + pub name: &'a OsStr, + pub symlink: Option<&'a OsStr>, + pub inode: &'a RafsV5Inode, +} + +impl<'a> RafsStore for RafsV5InodeWrapper<'a> { + fn store(&self, w: &mut dyn RafsIoWrite) -> Result { + let mut size: usize = 0; + + let inode_data = self.inode.as_ref(); + w.write_all(inode_data)?; + size += inode_data.len(); + + let name = self.name.as_bytes(); + w.write_all(name)?; + size += name.len(); + let padding = rafsv5_align(self.inode.i_name_size as usize) - name.len(); + w.write_padding(padding)?; + size += padding; + + if let Some(symlink) = self.symlink { + let symlink_path = symlink.as_bytes(); + w.write_all(symlink_path)?; + size += symlink_path.len(); + let padding = rafsv5_align(self.inode.i_symlink_size as usize) - symlink_path.len(); + w.write_padding(padding)?; + size += padding; + } + + w.validate_alignment(size, RAFSV5_ALIGNMENT) + } +} + +/// Rafs v5 chunk on disk metadata. +#[repr(C)] +#[derive(Default, Clone, Copy, Debug)] +pub struct RafsV5ChunkInfo { + /// sha256(chunk), [char; RAFS_SHA256_LENGTH] + pub block_id: RafsDigest, // 32 + /// blob index. + pub blob_index: u32, + /// chunk flags + pub flags: BlobChunkFlags, // 40 + /// compressed size in blob + pub compressed_size: u32, + /// uncompressed size in blob + pub uncompressed_size: u32, // 48 + /// compressed offset in blob + pub compressed_offset: u64, // 56 + /// uncompressed offset in blob + pub uncompressed_offset: u64, // 64 + /// offset in file + pub file_offset: u64, // 72 + /// chunk index, it's allocated sequentially and starting from 0 for one blob. + pub index: u32, + /// reserved + pub reserved: u32, //80 +} + +impl RafsV5ChunkInfo { + /// Create a new instance of `RafsV5ChunkInfo`. + pub fn new() -> Self { + RafsV5ChunkInfo::default() + } + + /// Load a Rafs v5 indoe from a reader. + pub fn load(&mut self, r: &mut RafsIoReader) -> Result<()> { + r.read_exact(self.as_mut()) + } +} + +impl RafsStore for RafsV5ChunkInfo { + fn store(&self, w: &mut dyn RafsIoWrite) -> Result { + w.write_all(self.as_ref())?; + w.validate_alignment(self.as_ref().len(), RAFSV5_ALIGNMENT) + } +} + +impl_bootstrap_converter!(RafsV5ChunkInfo); + +impl Display for RafsV5ChunkInfo { + fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { + write!( + f, + "file_offset {}, compress_offset {}, compress_size {}, uncompress_offset {}, uncompress_size {}, blob_index {}, block_id {}, index {}, is_compressed {}", + self.file_offset, + self.compressed_offset, + self.compressed_size, + self.uncompressed_offset, + self.uncompressed_size, + self.blob_index, + self.block_id, + self.index, + self.flags.contains(BlobChunkFlags::COMPRESSED), + ) + } +} + +/// Rafs v5 on disk extended attribute table. +/// +/// A on disk Rafs v5 extended attribute table contains an u64 content size, followed by extended +/// attribute pairs. +#[repr(C)] +#[derive(Copy, Clone, Default, Debug)] +pub struct RafsV5XAttrsTable { + pub size: u64, +} + +impl RafsV5XAttrsTable { + /// Create a new instance of `RafsV5XAttrsTable`. + pub fn new() -> Self { + RafsV5XAttrsTable { + ..Default::default() + } + } + + /// Get content size of the extended attribute table. + #[inline] + pub fn size(self) -> usize { + self.size as usize + } + + /// Get aligned content size of the extended attribute table. + #[inline] + pub fn aligned_size(self) -> usize { + rafsv5_align(self.size()) + } +} + +impl_bootstrap_converter!(RafsV5XAttrsTable); + +impl RafsXAttrs { + /// Get aligned content size of the extended attribute table. + #[inline] + pub fn aligned_size_v5(&self) -> usize { + rafsv5_align(self.size()) + } + + pub fn store_v5(&self, w: &mut dyn RafsIoWrite) -> Result { + let mut size = 0; + + if !self.pairs.is_empty() { + let size_data = (self.size() as u64).to_le_bytes(); + w.write_all(&size_data)?; + size += size_data.len(); + + for (key, value) in self.pairs.iter() { + let pair_size = key.byte_size() + 1 + value.len(); + let pair_size_data = (pair_size as u32).to_le_bytes(); + w.write_all(&pair_size_data)?; + size += pair_size_data.len(); + + let key_data = key.as_bytes(); + w.write_all(key_data)?; + w.write_all(&[0u8])?; + size += key_data.len() + 1; + + w.write_all(value)?; + size += value.len(); + } + } + + let padding = rafsv5_align(size) - size; + w.write_padding(padding)?; + size += padding; + + w.validate_alignment(size, RAFSV5_ALIGNMENT) + } +} + +/// Allocate a group of `BlobIoVec` to handle blob io to range `offset..(offset+size)`. +/// +/// The range `offset..(offset+size)` may be backed by multiple blobs, so a group of `BlobIoVec` will +/// be returned on success, each one covers a continuous range on a single blob. +pub(crate) fn rafsv5_alloc_bio_vecs( + inode: &I, + offset: u64, + size: usize, + user_io: bool, +) -> Result> { + let end = offset + .checked_add(size as u64) + .ok_or_else(|| einval!("invalid read size"))?; + let (index_start, index_end) = calculate_bio_chunk_index( + offset, + end, + inode.get_chunk_size() as u64, + inode.get_child_count(), + inode.has_hole(), + ); + trace!( + "alloc bio desc offset {} size {} i_size {} index_start {} index_end {} i_child_count {}", + offset, + size, + inode.size(), + index_start, + index_end, + inode.get_child_count() + ); + if size == 0 || index_start >= inode.get_chunk_count() { + return Ok(vec![]); + } + + let chunk = inode.get_chunk_info_v5(index_start)?; + let blob = inode.get_blob_by_index(chunk.blob_index())?; + let mut desc = BlobIoVec::new(blob.clone()); + if !add_chunk_to_bio_desc(&mut desc, offset, end, chunk, blob, user_io) { + return Err(einval!("failed to create blob io vector")); + } + + let mut descs = Vec::with_capacity(4); + for idx in index_start + 1..index_end { + let chunk = inode.get_chunk_info_v5(idx)?; + let blob = inode.get_blob_by_index(chunk.blob_index())?; + if blob.blob_index() != desc.blob_index() { + descs.push(desc); + desc = BlobIoVec::new(blob.clone()); + } + if !add_chunk_to_bio_desc(&mut desc, offset, end, chunk, blob, user_io) { + return Err(einval!("failed to create blob io vector")); + } + } + descs.push(desc); + + Ok(descs) +} + +/// Add a new bio covering the IO range into the provided bio desc. +/// +/// Returns true if caller should continue checking more chunks. +/// +/// # Parameters +/// - desc: the targeting bio desc. +/// - offset: IO offset to the file start, inclusive. +/// - end: IO end to the file start, exclusive. +/// - chunk: a data chunk overlapping with the IO range. +/// - chunk_size: chunk size. +/// - blob: the blob which the chunk data belongs to. +fn add_chunk_to_bio_desc( + desc: &mut BlobIoVec, + offset: u64, + end: u64, + chunk: Arc, + blob: Arc, + user_io: bool, +) -> bool { + // The chunk is ahead of the start of the range. + if offset >= (chunk.file_offset() + chunk.uncompressed_size() as u64) { + return true; + } + // The chunk is passing the end of the range. + if end <= chunk.file_offset() { + return false; + } + + let chunk_start = if offset > chunk.file_offset() { + offset - chunk.file_offset() + } else { + 0 + }; + let chunk_end = if end < (chunk.file_offset() + chunk.uncompressed_size() as u64) { + end - chunk.file_offset() + } else { + chunk.uncompressed_size() as u64 + }; + + let io_chunk = Arc::new(V5IoChunk { + // TODO: try to make `chunk_id` return Arc to get rid of potential memory copy + block_id: Arc::new(*chunk.chunk_id()), + blob_index: chunk.blob_index(), + index: chunk.index(), + compressed_offset: chunk.compressed_offset(), + uncompressed_offset: chunk.uncompressed_offset(), + compressed_size: chunk.compressed_size(), + uncompressed_size: chunk.uncompressed_size(), + flags: chunk.flags(), + }) as Arc; + let bio = BlobIoDesc::new( + blob, + io_chunk.into(), + chunk_start as u32, + (chunk_end - chunk_start) as u32, + user_io, + ); + desc.push(bio); + + true +} + +/// Calculate bio chunk indices that overlaps with the provided IO range. +/// +/// # Parameters +/// - offset: IO offset to the file start, inclusive. +/// - end: IO end to the file start, exclusive. +/// - chunk_size: chunk size. +/// - chunk_cnt: maximum number of chunks +/// - has_hole: whether a file has holes in it. +fn calculate_bio_chunk_index( + offset: u64, + end: u64, + chunk_size: u64, + chunk_cnt: u32, + has_hole: bool, +) -> (u32, u32) { + debug_assert!(offset < end); + + let index_start = if !has_hole { + (offset / chunk_size) as u32 + } else { + 0 + }; + let index_end = if !has_hole { + cmp::min(((end - 1) / chunk_size) as u32 + 1, chunk_cnt) + } else { + chunk_cnt + }; + + (index_start, index_end) +} + +pub(crate) fn rafsv5_align(size: usize) -> usize { + if size & (RAFSV5_ALIGNMENT - 1) == 0 { + size + } else { + size + (RAFSV5_ALIGNMENT - (size & (RAFSV5_ALIGNMENT - 1))) + } +} + +/// Validate inode metadata, include children, chunks and symblink etc. +/// +/// The default implementation is for rafs v5. The chunk data is not validated here, which will +/// be validate on fs read. +pub(crate) fn rafsv5_validate_inode( + inode: &dyn RafsInodeExt, + recursive: bool, + digester: digest::Algorithm, +) -> Result { + let child_count = inode.get_child_count(); + let expected_digest = inode.get_digest(); + let mut hasher = RafsDigest::hasher(digester); + + if inode.is_symlink() { + hasher.digest_update(inode.get_symlink()?.as_bytes()); + } else if inode.is_reg() { + for idx in 0..child_count { + let chunk = inode.get_chunk_info(idx)?; + let chunk_digest = chunk.chunk_id(); + + hasher.digest_update(chunk_digest.as_ref()); + } + } else if inode.is_dir() { + for idx in 0..child_count { + let child = inode.get_child_by_index(idx)?; + if (child.is_reg() || child.is_symlink() || (recursive && child.is_dir())) + && !rafsv5_validate_inode(child.deref(), recursive, digester)? + { + return Ok(false); + } + let child_digest = child.get_digest(); + let child_digest = child_digest.as_ref(); + + hasher.digest_update(child_digest); + } + } + + let digest = hasher.digest_finalize(); + let result = expected_digest == digest; + if !result { + error!( + "invalid inode digest {}, expected {}, ino: {} name: {:?}", + digest, + expected_digest, + inode.ino(), + inode.name() + ); + } + + Ok(result) +} + +#[cfg(test)] +pub mod tests { + use std::fs::OpenOptions; + use std::io::BufWriter; + use std::io::{SeekFrom, Write}; + + use storage::device::BlobChunkInfo; + use vmm_sys_util::tempfile::TempFile; + + use super::*; + use crate::metadata::RafsStore; + use crate::{RafsIoRead, RafsIoReader}; + use std::any::Any; + use std::str::FromStr; + + struct Entry { + foo: u32, + bar: u32, + } + + unsafe fn any_as_u8_slice(p: &T) -> &[u8] { + ::std::slice::from_raw_parts((p as *const T) as *const u8, ::std::mem::size_of::()) + } + + #[test] + fn test_load_blob_table() { + let mut buffer = Vec::new(); + let first = Entry { foo: 1, bar: 2 }; + let second = Entry { foo: 3, bar: 4 }; + let third = Entry { foo: 5, bar: 6 }; + + let first_id = "355d403e35d7120cbd6a145874a2705e6842ce9974985013ebdc1fa5199a0184"; + let second_id = "19ebb6e9bdcbbce3f24d694fe20e0e552ae705ce079e26023ad0ecd61d4b130019ebb6e9bdcbbce3f24d694fe20e0e552ae705ce079e26023ad0ecd61d4"; + let third_id = "19ebb6e9bdcbbce3f24d694fe20e0e552ae705ce079e"; + + let first_slice = unsafe { any_as_u8_slice(&first) }; + let second_slice = unsafe { any_as_u8_slice(&second) }; + let third_slice = unsafe { any_as_u8_slice(&third) }; + + buffer.extend_from_slice(first_slice); + buffer.extend_from_slice(first_id.as_bytes()); + buffer.push(b'\0'); + buffer.extend_from_slice(second_slice); + buffer.extend_from_slice(second_id.as_bytes()); + buffer.push(b'\0'); + buffer.extend_from_slice(third_slice); + buffer.extend_from_slice(third_id.as_bytes()); + // buffer.push(b'\0'); + + let tmp_file = TempFile::new().unwrap(); + + // Store extended blob table + let mut tmp_file = OpenOptions::new() + .read(true) + .write(true) + .open(tmp_file.as_path()) + .unwrap(); + tmp_file.write_all(&buffer).unwrap(); + tmp_file.flush().unwrap(); + + let mut file: RafsIoReader = Box::new(tmp_file); + let mut blob_table = RafsV5BlobTable::new(); + + file.seek(SeekFrom::Start(0)).unwrap(); + blob_table + .load( + &mut file, + buffer.len() as u32, + RAFS_DEFAULT_CHUNK_SIZE as u32, + RafsSuperFlags::empty(), + ) + .unwrap(); + for b in &blob_table.entries { + let _c = b.clone(); + trace!("{:?}", _c); + } + + assert_eq!(first.bar, first.foo + 1); + assert_eq!(blob_table.size(), rafsv5_align(buffer.len())); + assert_eq!(blob_table.get(0).unwrap().blob_id(), first_id); + assert_eq!(blob_table.get(1).unwrap().blob_id(), second_id); + assert_eq!(blob_table.get(2).unwrap().blob_id(), third_id); + assert!(blob_table.get(3).is_err()); + assert_eq!(blob_table.get_all().len(), 3); + + blob_table.entries.truncate(0); + file.seek(SeekFrom::Start(0)).unwrap(); + blob_table + .load( + &mut file, + 0, + RAFS_DEFAULT_CHUNK_SIZE as u32, + RafsSuperFlags::empty(), + ) + .unwrap(); + assert_eq!(blob_table.size(), 0); + assert_eq!(blob_table.entries.len(), 0); + assert!(blob_table.get(0).is_err()); + + blob_table.entries.truncate(0); + file.seek(SeekFrom::Start(0)).unwrap(); + blob_table + .load( + &mut file, + (buffer.len() - 100) as u32, + RAFS_DEFAULT_CHUNK_SIZE as u32, + RafsSuperFlags::empty(), + ) + .unwrap(); + assert_eq!(blob_table.entries[0].blob_id(), first_id); + assert_eq!(blob_table.get_all().len(), 2); + } + + #[test] + fn test_extended_blob_table() { + let tmp_file = TempFile::new().unwrap(); + + // Create extended blob table + let mut table = RafsV5ExtBlobTable::new(); + for i in 0..5 { + table.add(i * 3, 100, 100, 0); + } + + // Store extended blob table + let file = OpenOptions::new() + .read(true) + .write(true) + .open(tmp_file.as_path()) + .unwrap(); + let mut writer = BufWriter::new(file); + table.store(&mut writer).unwrap(); + + // Load extended blob table + let file = OpenOptions::new() + .read(true) + .write(true) + .open(tmp_file.as_path()) + .unwrap(); + let mut reader = Box::new(file) as Box; + let mut table = RafsV5ExtBlobTable::new(); + table.load(&mut reader, 5).unwrap(); + + assert_eq!(table.size(), 5 * RAFSV5_EXT_BLOB_ENTRY_SIZE); + assert_eq!(table.entries(), 5); + assert!(table.get(0).is_some()); + assert!(table.get(4).is_some()); + assert!(table.get(5).is_none()); + + // Check expected blob table + for i in 0..5 { + assert_eq!(table.get(i).unwrap().chunk_count, i * 3); + assert_eq!(table.get(i).unwrap().features, 0); + assert_eq!(table.get(i).unwrap().uncompressed_size, 100); + assert_eq!( + table.get(i).unwrap().reserved2, + [0u8; RAFSV5_EXT_BLOB_RESERVED_SIZE] + ); + } + } + + #[derive(Default, Copy, Clone)] + struct MockChunkInfo { + pub block_id: RafsDigest, + pub blob_index: u32, + pub flags: BlobChunkFlags, + pub compress_size: u32, + pub uncompress_size: u32, + pub compress_offset: u64, + pub uncompress_offset: u64, + pub file_offset: u64, + pub index: u32, + #[allow(unused)] + pub reserved: u32, + } + + impl MockChunkInfo { + fn new() -> Self { + MockChunkInfo::default() + } + } + + impl BlobChunkInfo for MockChunkInfo { + fn chunk_id(&self) -> &RafsDigest { + &self.block_id + } + + fn id(&self) -> u32 { + self.index + } + + fn is_batch(&self) -> bool { + false + } + + fn is_compressed(&self) -> bool { + self.flags.contains(BlobChunkFlags::COMPRESSED) + } + + fn is_encrypted(&self) -> bool { + false + } + + fn as_any(&self) -> &dyn Any { + self + } + + impl_getter!(blob_index, blob_index, u32); + impl_getter!(compressed_offset, compress_offset, u64); + impl_getter!(compressed_size, compress_size, u32); + impl_getter!(uncompressed_offset, uncompress_offset, u64); + impl_getter!(uncompressed_size, uncompress_size, u32); + } + + impl BlobV5ChunkInfo for MockChunkInfo { + fn as_base(&self) -> &dyn BlobChunkInfo { + self + } + + impl_getter!(index, index, u32); + impl_getter!(file_offset, file_offset, u64); + impl_getter!(flags, flags, BlobChunkFlags); + } + + #[test] + fn test_add_chunk_to_bio_desc() { + let mut chunk = MockChunkInfo::new(); + let offset = 4096; + let size: u64 = 1024; + // [offset, offset + size) + chunk.file_offset = offset; + chunk.uncompress_size = size as u32; + + // (offset, end, expected_chunk_start, expected_size) + let data = vec![ + // Non-overlapping IO + (0, 0, 0, 0, false), + (0, offset, 0, 0, false), + (offset + size, 0, 0, 0, true), + (offset + size + 1, 0, 0, 0, true), + // Overlapping IO + (0, offset + 1, 0, 1, true), + (0, offset + size, 0, size, true), + (0, offset + size + 1, 0, size, true), + (0, offset + size - 1, 0, size - 1, true), + (offset, offset + 1, 0, 1, true), + (offset, offset + size, 0, size, true), + (offset, offset + size - 1, 0, size - 1, true), + (offset, offset + size + 1, 0, size, true), + (offset + 1, offset + 2, 1, 1, true), + (offset + 1, offset + size, 1, size - 1, true), + (offset + 1, offset + size - 1, 1, size - 2, true), + (offset + 1, offset + size + 1, 1, size - 1, true), + ]; + + for (offset, end, expected_chunk_start, expected_size, result) in data.iter() { + let blob = Arc::new(BlobInfo::new( + 0, + String::from("blobid"), + 0, + 0, + 0, + 0, + BlobFeatures::_V5_NO_EXT_BLOB_TABLE, + )); + let mut desc = BlobIoVec::new(blob.clone()); + let res = add_chunk_to_bio_desc(&mut desc, *offset, *end, Arc::new(chunk), blob, true); + assert_eq!(*result, res); + if !desc.is_empty() { + assert_eq!(desc.len(), 1); + let bio = &desc.blob_io_desc(0).unwrap(); + assert_eq!(*expected_chunk_start, bio.offset); + assert_eq!(*expected_size as u32, bio.size); + } + } + } + + #[test] + fn test_calculate_bio_chunk_index() { + let (blksize, chunk_cnt) = (1024, 4); + + let io_range: Vec<(u64, u64, u32, u64)> = vec![ + (0, 1, 0, 1), + (0, blksize - 1, 0, 1), + (0, blksize, 0, 1), + (0, blksize + 1, 0, 2), + (0, blksize * chunk_cnt, 0, chunk_cnt), + (0, blksize * chunk_cnt + 1, 0, chunk_cnt), + (0, blksize * chunk_cnt - 1, 0, chunk_cnt), + (blksize - 1, 1, 0, 1), + (blksize - 1, 2, 0, 2), + (blksize - 1, 3, 0, 2), + (blksize - 1, blksize - 1, 0, 2), + (blksize - 1, blksize, 0, 2), + (blksize - 1, blksize + 1, 0, 2), + (blksize - 1, blksize * chunk_cnt, 0, chunk_cnt), + (blksize, 1, 1, 2), + (blksize, 2, 1, 2), + (blksize, blksize - 1, 1, 2), + (blksize, blksize + 1, 1, 3), + (blksize, blksize + 2, 1, 3), + (blksize, blksize * chunk_cnt, 1, chunk_cnt), + (blksize + 1, 1, 1, 2), + (blksize + 1, blksize - 2, 1, 2), + (blksize + 1, blksize - 1, 1, 2), + (blksize + 1, blksize, 1, 3), + (blksize + 1, blksize * chunk_cnt, 1, chunk_cnt), + ]; + + for (io_start, io_size, expected_start, expected_end) in io_range.iter() { + let (start, end) = calculate_bio_chunk_index( + *io_start, + *io_start + *io_size, + blksize, + chunk_cnt as u32, + false, + ); + + assert_eq!(start, *expected_start); + assert_eq!(end, *expected_end as u32); + } + } + + #[test] + fn test_rafsv5_align() { + assert_eq!(rafsv5_align(0), 0); + assert_eq!(rafsv5_align(1), 8); + assert_eq!(rafsv5_align(7), 8); + assert_eq!(rafsv5_align(8), 8); + assert_eq!(rafsv5_align(9), 16); + } + + #[test] + fn test_rafsv5_superflags() { + assert_eq!( + RafsSuperFlags::from(digest::Algorithm::Blake3), + RafsSuperFlags::HASH_BLAKE3 + ); + assert_eq!( + RafsSuperFlags::from(digest::Algorithm::Sha256), + RafsSuperFlags::HASH_SHA256 + ); + assert_eq!( + digest::Algorithm::from(RafsSuperFlags::HASH_BLAKE3), + digest::Algorithm::Blake3 + ); + assert_eq!( + digest::Algorithm::from(RafsSuperFlags::HASH_SHA256), + digest::Algorithm::Sha256 + ); + + assert_eq!( + RafsSuperFlags::from(compress::Algorithm::Zstd), + RafsSuperFlags::COMPRESSION_ZSTD + ); + assert_eq!( + RafsSuperFlags::from(compress::Algorithm::GZip), + RafsSuperFlags::COMPRESSION_GZIP + ); + assert_eq!( + RafsSuperFlags::from(compress::Algorithm::Lz4Block), + RafsSuperFlags::COMPRESSION_LZ4 + ); + assert_eq!( + RafsSuperFlags::from(compress::Algorithm::None), + RafsSuperFlags::COMPRESSION_NONE + ); + assert_eq!( + compress::Algorithm::from(RafsSuperFlags::COMPRESSION_ZSTD), + compress::Algorithm::Zstd + ); + assert_eq!( + compress::Algorithm::from(RafsSuperFlags::COMPRESSION_GZIP), + compress::Algorithm::GZip + ); + assert_eq!( + compress::Algorithm::from(RafsSuperFlags::COMPRESSION_LZ4), + compress::Algorithm::Lz4Block + ); + assert_eq!( + compress::Algorithm::from(RafsSuperFlags::COMPRESSION_NONE), + compress::Algorithm::None + ); + } + + #[test] + fn test_rafsv5_inode_table() { + let mut table = RafsV5InodeTable::new(1); + assert_eq!(table.size(), 8); + assert_eq!(table.len(), 2); + + assert!(table.set(0, 0x2000).is_err()); + assert!(table.set(2, 0x2000).is_err()); + assert!(table.set(1, 0x1000).is_err()); + assert!(table.set(1, 0x2001).is_err()); + + assert!(table.get(0).is_err()); + assert!(table.get(2).is_err()); + assert!(table.get(1).is_err()); + table.data[1] = 0x1000; + assert!(table.get(1).is_err()); + table.data[1] = 0x1 << 30; + assert!(table.get(1).is_err()); + assert!(table.set(1, 0x2008).is_ok()); + assert_eq!(table.get(1).unwrap(), 0x2008); + } + + #[test] + fn test_rafsv5_prefetch_table() { + let mut table = RafsV5PrefetchTable::new(); + + assert_eq!(table.size(), 0); + assert_eq!(table.len(), 0); + assert!(table.is_empty()); + table.add_entry(0x1); + assert_eq!(table.size(), 8); + assert_eq!(table.len(), 1); + assert!(!table.is_empty()); + + let tmp_file = TempFile::new().unwrap(); + let file = OpenOptions::new() + .read(true) + .write(true) + .open(tmp_file.as_path()) + .unwrap(); + let mut writer = BufWriter::new(file); + writer.write_all(&[0u8; 8]).unwrap(); + assert_eq!(table.store(&mut writer).unwrap(), 8); + writer.flush().unwrap(); + + // Load extended blob table + let file = OpenOptions::new() + .read(true) + .write(true) + .open(tmp_file.as_path()) + .unwrap(); + let mut reader = Box::new(file) as Box; + let mut table = RafsV5PrefetchTable::new(); + table.load_prefetch_table_from(&mut reader, 8, 2).unwrap(); + assert_eq!(table.size(), 8); + assert_eq!(table.len(), 2); + assert!(!table.is_empty()); + assert_eq!(table.inodes[0], 0x1); + assert_eq!(table.inodes[1], 0x0); + } + + #[test] + fn test_new_inode() { + let mut inode = RafsV5Inode::new(); + inode.set_name_size(3); + assert_eq!(inode.size(), 136); + assert!(!inode.is_symlink()); + assert!(!inode.is_hardlink()); + assert!(!inode.is_dir()); + assert!(!inode.is_reg()); + assert!(!inode.has_hole()); + assert!(!inode.has_xattr()); + + let mut inode = RafsV5Inode::new(); + inode.set_symlink_size(3); + assert_eq!(inode.size(), 136); + } + + #[test] + fn test_inode_load_store() { + let mut inode = RafsV5Inode::new(); + inode.i_size = 0x1000; + inode.i_blocks = 1; + inode.i_child_count = 10; + inode.i_child_index = 20; + inode.set_name_size(4); + inode.set_symlink_size(6); + inode.i_flags = RafsInodeFlags::SYMLINK; + + let name = OsString::from_str("test").unwrap(); + let symlink = OsString::from_str("/test12").unwrap(); + let inode_wrapper = RafsV5InodeWrapper { + name: &name, + symlink: Some(&symlink), + inode: &inode, + }; + + let tmp_file = TempFile::new().unwrap(); + let file = OpenOptions::new() + .read(true) + .write(true) + .open(tmp_file.as_path()) + .unwrap(); + let mut writer = BufWriter::new(file); + assert_eq!(inode_wrapper.store(&mut writer).unwrap(), 144); + writer.flush().unwrap(); + + // Load inode + let file = OpenOptions::new() + .read(true) + .write(true) + .open(tmp_file.as_path()) + .unwrap(); + let mut reader = Box::new(file) as Box; + let mut inode2 = RafsV5Inode::new(); + inode2.load(&mut reader).unwrap(); + assert_eq!(inode2.i_name_size, 4); + assert_eq!(inode2.i_symlink_size, 6); + assert_eq!(inode2.i_size, 0x1000); + assert_eq!(inode2.i_blocks, 1); + assert_eq!(inode2.i_child_count, 10); + assert_eq!(inode2.i_child_index, 20); + + let filename = inode2.load_file_name(&mut reader).unwrap(); + assert_eq!(filename, OsString::from_str("test").unwrap()); + } + + #[test] + fn test_rafsv5_new_xattrs() { + let mut xattrs = RafsXAttrs::new(); + assert_eq!(xattrs.size(), 0); + + xattrs + .add(OsString::from("user.key1"), vec![0x1u8, 0x2, 0x3, 0x4]) + .unwrap(); + assert_eq!(xattrs.size(), 18); + xattrs + .add(OsString::from("user.key21"), vec![0x1u8, 0x2, 0x3, 0x4]) + .unwrap(); + assert_eq!(xattrs.size(), 37); + xattrs.remove(&OsString::from("user.key1")); + assert_eq!(xattrs.size(), 19); + } +} diff --git a/rafs/src/metadata/layout/v6.rs b/rafs/src/metadata/layout/v6.rs index 6a64607fb07..24e0f934a74 100644 --- a/rafs/src/metadata/layout/v6.rs +++ b/rafs/src/metadata/layout/v6.rs @@ -1,2813 +1,2813 @@ -// Copyright 2020-2021 Ant Group. All rights reserved. -// Copyright (C) 2020-2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::collections::HashMap; -use std::convert::{TryFrom, TryInto}; -use std::ffi::{OsStr, OsString}; -use std::fmt::Debug; -use std::io::{Read, Result}; -use std::mem::size_of; -use std::os::unix::ffi::OsStrExt; -use std::str::FromStr; -use std::sync::Arc; - -use lazy_static::lazy_static; -use nydus_storage::device::{BlobFeatures, BlobInfo}; -use nydus_storage::meta::{ - BlobChunkInfoV1Ondisk, BlobChunkInfoV2Ondisk, BlobCompressionContextHeader, -}; -use nydus_storage::{RAFS_MAX_CHUNKS_PER_BLOB, RAFS_MAX_CHUNK_SIZE}; -use nydus_utils::crypt::{self, Cipher, CipherContext}; -use nydus_utils::{compress, digest, round_up, ByteSize}; - -use crate::metadata::inode::InodeWrapper; -use crate::metadata::layout::v5::RafsV5ChunkInfo; -use crate::metadata::layout::{MetaRange, RafsXAttrs}; -use crate::metadata::{Inode, RafsBlobExtraInfo, RafsStore, RafsSuperFlags, RafsSuperMeta}; -use crate::{impl_bootstrap_converter, impl_pub_getter_setter, RafsIoReader, RafsIoWrite}; - -/// EROFS metadata slot size. -pub const EROFS_INODE_SLOT_SIZE: usize = 1 << EROFS_INODE_SLOT_BITS; -/// Bits of EROFS logical block size. -pub const EROFS_BLOCK_BITS_12: u8 = 12; -/// EROFS logical block size. -pub const EROFS_BLOCK_SIZE_4096: u64 = 1u64 << EROFS_BLOCK_BITS_12; -pub const EROFS_BLOCK_BITS_9: u8 = 9; -/// EROFS logical block size. -pub const EROFS_BLOCK_SIZE_512: u64 = 1u64 << EROFS_BLOCK_BITS_9; - -/// Offset of EROFS super block. -pub const EROFS_SUPER_OFFSET: u16 = 1024; -/// Size of EROFS super block. -pub const EROFS_SUPER_BLOCK_SIZE: u16 = 128; -/// Size of extended super block, used for rafs v6 specific fields -pub const EROFS_EXT_SUPER_BLOCK_SIZE: u16 = 256; -/// EROFS device table offset. -pub const EROFS_DEVTABLE_OFFSET: u16 = - EROFS_SUPER_OFFSET + EROFS_SUPER_BLOCK_SIZE + EROFS_EXT_SUPER_BLOCK_SIZE; - -/// Offseet for inode format flags: compact or extended. -pub const EROFS_I_VERSION_BIT: u16 = 0; -/// Number of bits for inode format flags. -pub const EROFS_I_VERSION_BITS: u16 = 1; -/// 32-byte on-disk inode -pub const EROFS_INODE_LAYOUT_COMPACT: u16 = 0; -/// 64-byte on-disk inode -pub const EROFS_INODE_LAYOUT_EXTENDED: u16 = 1; -/// Number of bits for inode data layout. -pub const EROFS_I_DATALAYOUT_BITS: u16 = 3; -/// EROFS plain inode. -pub const EROFS_INODE_FLAT_PLAIN: u16 = 0; -/// EROFS inline inode. -pub const EROFS_INODE_FLAT_INLINE: u16 = 2; -/// EROFS chunked inode. -pub const EROFS_INODE_CHUNK_BASED: u16 = 4; - -// Magic number for EROFS super block. -const EROFS_SUPER_MAGIC_V1: u32 = 0xE0F5_E1E2; -// Bits of EROFS metadata slot size. -const EROFS_INODE_SLOT_BITS: u8 = 5; -// Bit flag indicating whether the inode is chunked or not. -const EROFS_CHUNK_FORMAT_INDEXES_FLAG: u16 = 0x0020; -// Encoded chunk size (log2(chunk_size) - EROFS_BLOCK_BITS). -const EROFS_CHUNK_FORMAT_SIZE_MASK: u16 = 0x001F; - -/// Checksum of superblock, compatible with EROFS versions prior to Linux kernel 5.5. -#[allow(dead_code)] -const EROFS_FEATURE_COMPAT_SB_CHKSUM: u32 = 0x0000_0001; -/// Rafs v6 specific metadata, compatible with EROFS versions since Linux kernel 5.16. -const EROFS_FEATURE_COMPAT_RAFS_V6: u32 = 0x4000_0000; -/// Chunked inode, incompatible with EROFS versions prior to Linux kernel 5.15. -const EROFS_FEATURE_INCOMPAT_CHUNKED_FILE: u32 = 0x0000_0004; -/// Multi-devices, incompatible with EROFS versions prior to Linux kernel 5.16. -const EROFS_FEATURE_INCOMPAT_DEVICE_TABLE: u32 = 0x0000_0008; - -/// Size of SHA256 digest string. -const BLOB_SHA256_LEN: usize = 64; -const BLOB_MAX_SIZE_UNCOMPRESSED: u64 = 1u64 << 44; -const BLOB_MAX_SIZE_COMPRESSED: u64 = 1u64 << 40; - -/// RAFS v6 superblock on-disk format, 128 bytes. -/// -/// The structure is designed to be compatible with EROFS superblock, so the in kernel EROFS file -/// system driver could be used to mount a RAFS v6 image. -#[repr(C)] -#[derive(Clone, Copy)] -pub struct RafsV6SuperBlock { - /// File system magic number - s_magic: u32, - /// Crc32 checksum of the superblock, ignored by Rafs v6. - s_checksum: u32, - /// Compatible filesystem features. - s_feature_compat: u32, - /// Bits of block size, 4K or 512 bytes. - s_blkszbits: u8, - /// Number of extended superblock slots, ignored by Rafs v6. - /// `superblock size = 128(size of RafsV6SuperBlock) + s_extslots * 16`. - s_extslots: u8, - /// Nid of the root directory. - /// `root inode offset = s_meta_blkaddr * 4096 + s_root_nid * 32`. - s_root_nid: u16, - /// Total valid ino # - s_inos: u64, - /// Timestamp of filesystem creation. - s_build_time: u64, - /// Timestamp of filesystem creation. - s_build_time_nsec: u32, - /// Total size of file system in blocks, used for statfs - s_blocks: u32, - /// Start block address of the metadata area. - s_meta_blkaddr: u32, - /// Start block address of the shared xattr area. - s_xattr_blkaddr: u32, - /// 128-bit uuid for volume - s_uuid: [u8; 16], - /// Volume name. - s_volume_name: [u8; 16], - /// Incompatible filesystem feature flags. - s_feature_incompat: u32, - /// A union of `u16` for miscellaneous usage. - s_u: u16, - /// # of devices besides the primary device. - s_extra_devices: u16, - /// Offset of the device table, `startoff = s_devt_slotoff * 128`. - s_devt_slotoff: u16, - /// Padding. - s_reserved: [u8; 38], -} - -impl_bootstrap_converter!(RafsV6SuperBlock); - -impl RafsV6SuperBlock { - /// Create a new instance of `RafsV6SuperBlock`. - pub fn new() -> Self { - Self::default() - } - - /// Load a `RafsV6SuperBlock` from a reader. - pub fn load(&mut self, r: &mut RafsIoReader) -> Result<()> { - let mut buf1 = [0u8; EROFS_SUPER_OFFSET as usize]; - - r.read_exact(&mut buf1)?; - r.read_exact(self.as_mut()) - } - - /// Validate the Rafs v6 super block. - pub fn validate(&self, meta_size: u64) -> Result<()> { - if meta_size < EROFS_BLOCK_SIZE_4096 { - return Err(einval!(format!( - "invalid Rafs v6 metadata size: {}", - meta_size - ))); - } - let block_size = if self.s_blkszbits == EROFS_BLOCK_BITS_9 { - EROFS_BLOCK_SIZE_512 - } else { - EROFS_BLOCK_SIZE_4096 - }; - if meta_size & (block_size - 1) != 0 { - return Err(einval!(format!( - "invalid Rafs v6 metadata size: bootstrap size {} is not aligned", - meta_size - ))); - } - let meta_addr = u32::from_le(self.s_meta_blkaddr) as u64 * block_size; - if meta_addr > meta_size { - return Err(einval!(format!( - "invalid Rafs v6 meta block address 0x{:x}, meta file size 0x{:x}", - meta_addr, meta_size - ))); - } - - if u32::from_le(self.s_magic) != EROFS_SUPER_MAGIC_V1 { - return Err(einval!(format!( - "invalid EROFS magic number 0x{:x} in Rafsv6 superblock", - u32::from_le(self.s_magic) - ))); - } - - if self.s_checksum != 0 { - return Err(einval!(format!( - "invalid checksum {} in Rafsv6 superblock", - u32::from_le(self.s_checksum) - ))); - } - - if self.s_blkszbits != EROFS_BLOCK_BITS_12 && self.s_blkszbits != EROFS_BLOCK_BITS_9 { - return Err(einval!(format!( - "invalid block size bits {} in Rafsv6 superblock", - self.s_blkszbits - ))); - } - - if self.s_extslots != 0 { - return Err(einval!("invalid extended slots in Rafsv6 superblock")); - } - - if self.s_inos == 0 { - return Err(einval!("invalid inode number in Rafsv6 superblock")); - } - - if self.s_u != 0 { - return Err(einval!("invalid union field in Rafsv6 superblock")); - } - - if self.s_xattr_blkaddr != 0 { - return Err(einval!( - "unsupported shared extended attribute namespace in Rafsv6 superblock" - )); - } - - // There's a bug in old RAFS v6 images, which has set s_blocks to a fixed value 4096. - if self.s_extra_devices == 0 && self.s_blocks != 0 && u32::from_le(self.s_blocks) != 4096 { - warn!( - "rafs v6 extra devices {}, blocks {}", - self.s_extra_devices, self.s_blocks - ); - return Err(einval!("invalid extra device count in Rafsv6 superblock")); - } - - let devtable_off = - u16::from_le(self.s_devt_slotoff) as u64 * size_of::() as u64; - if devtable_off != EROFS_DEVTABLE_OFFSET as u64 { - return Err(einval!(format!( - "invalid device table slot offset {} in Rafsv6 superblock", - u16::from_le(self.s_devt_slotoff) - ))); - } - let devtable_end = devtable_off + u16::from_le(self.s_extra_devices) as u64; - if devtable_end > meta_size { - return Err(einval!(format!( - "invalid device table slot count {} in Rafsv6 superblock", - u16::from_le(self.s_extra_devices) - ))); - } - - // s_build_time may be used as compact_inode's timestamp in the future. - // if u64::from_le(self.s_build_time) != 0 || u32::from_le(self.s_build_time_nsec) != 0 { - // return Err(einval!("invalid build time in Rafsv6 superblock")); - // } - - if u32::from_le(self.s_feature_incompat) - != EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | EROFS_FEATURE_INCOMPAT_DEVICE_TABLE - { - return Err(einval!( - "invalid incompatible feature bits in Rafsv6 superblock" - )); - } - - if u32::from_le(self.s_feature_compat) & EROFS_FEATURE_COMPAT_RAFS_V6 - != EROFS_FEATURE_COMPAT_RAFS_V6 - { - return Err(einval!( - "invalid compatible feature bits in Rafsv6 superblock" - )); - } - - Ok(()) - } - - /// Check whether it's super block for Rafs v6. - pub fn is_rafs_v6(&self) -> bool { - self.magic() == EROFS_SUPER_MAGIC_V1 - } - - /// Set number of inodes. - pub fn set_inos(&mut self, inos: u64) { - self.s_inos = inos.to_le(); - } - - /// Get total inodes count of this Rafs - pub fn inodes_count(&self) -> u64 { - u64::from_le(self.s_inos) - } - - /// Set number of logical blocks. - pub fn set_blocks(&mut self, blocks: u32) { - self.s_blocks = blocks.to_le(); - } - - /// Get root nid. - pub fn root_nid(&self) -> u16 { - u16::from_le(self.s_root_nid) - } - - /// Set EROFS root nid. - pub fn set_root_nid(&mut self, nid: u16) { - self.s_root_nid = nid.to_le(); - } - - /// Get meta block address. - pub fn meta_addr(&self) -> u32 { - u32::from_le(self.s_meta_blkaddr) - } - - /// Set EROFS meta block address. - pub fn set_meta_addr(&mut self, meta_addr: u64) { - if self.s_blkszbits == EROFS_BLOCK_BITS_12 { - assert!((meta_addr / EROFS_BLOCK_SIZE_4096) <= u32::MAX as u64); - self.s_meta_blkaddr = u32::to_le((meta_addr / EROFS_BLOCK_SIZE_4096) as u32); - } else if self.s_blkszbits == EROFS_BLOCK_BITS_9 { - assert!((meta_addr / EROFS_BLOCK_SIZE_512) <= u32::MAX as u64); - self.s_meta_blkaddr = u32::to_le((meta_addr / EROFS_BLOCK_SIZE_512) as u32); - } else { - error!("v6: unsupported block bits {}", self.s_blkszbits); - } - } - - /// Get device table offset. - pub fn device_table_offset(&self) -> u64 { - u16::from_le(self.s_devt_slotoff) as u64 * size_of::() as u64 - } - - /// Set bits of block size. - pub fn set_block_bits(&mut self, block_bits: u8) { - assert!(block_bits == EROFS_BLOCK_BITS_12 || block_bits == EROFS_BLOCK_BITS_9); - self.s_blkszbits = block_bits; - } - - impl_pub_getter_setter!(magic, set_magic, s_magic, u32); - impl_pub_getter_setter!(extra_devices, set_extra_devices, s_extra_devices, u16); -} - -impl RafsStore for RafsV6SuperBlock { - // This method must be called before RafsV6SuperBlockExt::store(), otherwise data written by - // RafsV6SuperBlockExt::store() will be overwritten. - fn store(&self, w: &mut dyn RafsIoWrite) -> Result { - debug_assert!( - ((EROFS_SUPER_OFFSET + EROFS_SUPER_BLOCK_SIZE) as u64) < EROFS_BLOCK_SIZE_4096 - ); - w.write_all(&[0u8; EROFS_SUPER_OFFSET as usize])?; - w.write_all(self.as_ref())?; - w.write_all( - &[0u8; (EROFS_BLOCK_SIZE_4096 as usize - - (EROFS_SUPER_OFFSET + EROFS_SUPER_BLOCK_SIZE) as usize)], - )?; - - Ok(EROFS_BLOCK_SIZE_4096 as usize) - } -} - -impl Default for RafsV6SuperBlock { - fn default() -> Self { - debug_assert!(size_of::() == 128); - Self { - s_magic: u32::to_le(EROFS_SUPER_MAGIC_V1), - s_checksum: 0, - s_feature_compat: u32::to_le(EROFS_FEATURE_COMPAT_RAFS_V6), - s_blkszbits: EROFS_BLOCK_BITS_12, - s_extslots: 0u8, - s_root_nid: 0, - s_inos: 0, - s_build_time: 0, - s_build_time_nsec: 0, - s_blocks: u32::to_le(1), - s_meta_blkaddr: 0, - s_xattr_blkaddr: 0, - s_uuid: [0u8; 16], - s_volume_name: [0u8; 16], - s_feature_incompat: u32::to_le( - EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | EROFS_FEATURE_INCOMPAT_DEVICE_TABLE, - ), - s_u: 0, - s_extra_devices: 0, - s_devt_slotoff: u16::to_le(EROFS_DEVTABLE_OFFSET / size_of::() as u16), - s_reserved: [0u8; 38], - } - } -} - -/// Extended superblock for RAFS v6, 256 bytes -#[repr(C)] -#[derive(Clone, Copy)] -pub struct RafsV6SuperBlockExt { - /// superblock flags - s_flags: u64, - /// offset of blob table - s_blob_table_offset: u64, - /// size of blob table - s_blob_table_size: u32, - /// chunk size - s_chunk_size: u32, - /// offset of chunk table - s_chunk_table_offset: u64, - /// size of chunk table - s_chunk_table_size: u64, - s_prefetch_table_offset: u64, - s_prefetch_table_size: u32, - s_padding: u32, - /// Reserved - s_reserved: [u8; 200], -} - -impl_bootstrap_converter!(RafsV6SuperBlockExt); - -impl RafsV6SuperBlockExt { - /// Create a new instance `RafsV6SuperBlockExt`. - pub fn new() -> Self { - debug_assert!(size_of::() == 256); - Self::default() - } - - /// Load an `RafsV6SuperBlockExt` from a reader. - pub fn load(&mut self, r: &mut RafsIoReader) -> Result<()> { - r.seek_to_offset((EROFS_SUPER_OFFSET + EROFS_SUPER_BLOCK_SIZE) as u64)?; - r.read_exact(self.as_mut())?; - r.seek_to_offset(EROFS_BLOCK_SIZE_4096 as u64)?; - - Ok(()) - } - - /// Validate the Rafs v6 super block. - pub fn validate(&self, meta_size: u64, meta: &RafsSuperMeta) -> Result<()> { - let mut flags = self.flags(); - flags &= RafsSuperFlags::COMPRESSION_NONE.bits() - | RafsSuperFlags::COMPRESSION_LZ4.bits() - | RafsSuperFlags::COMPRESSION_GZIP.bits() - | RafsSuperFlags::COMPRESSION_ZSTD.bits(); - if flags.count_ones() != 1 { - return Err(einval!(format!( - "invalid flags {:#x} related to compression algorithm in Rafs v6 extended superblock", - flags - ))); - } - - let mut flags = self.flags(); - flags &= RafsSuperFlags::HASH_BLAKE3.bits() | RafsSuperFlags::HASH_SHA256.bits(); - if flags.count_ones() != 1 { - return Err(einval!(format!( - "invalid flags {:#x} related to digest algorithm in Rafs v6 extended superblock", - flags - ))); - } - - let chunk_size = u32::from_le(self.s_chunk_size) as u64; - if !chunk_size.is_power_of_two() - || !(EROFS_BLOCK_SIZE_4096..=RAFS_MAX_CHUNK_SIZE).contains(&chunk_size) - { - return Err(einval!("invalid chunk size in Rafs v6 extended superblock")); - } - - let devslot_end = meta.blob_device_table_offset + meta.blob_table_size as u64; - - let blob_offset = self.blob_table_offset(); - let blob_size = self.blob_table_size() as u64; - if blob_offset & (EROFS_BLOCK_SIZE_4096 - 1) != 0 - || blob_offset < EROFS_BLOCK_SIZE_4096 - || blob_offset < devslot_end - || blob_size % size_of::() as u64 != 0 - || blob_offset.checked_add(blob_size).is_none() - || blob_offset + blob_size > meta_size - { - return Err(einval!(format!( - "invalid blob table offset 0x{:x}/size 0x{:x} in Rafs v6 extended superblock", - blob_offset, blob_size - ))); - } - let blob_range = MetaRange::new(blob_offset, blob_size, true)?; - - let mut chunk_info_tbl_range = None; - if self.chunk_table_size() > 0 { - let chunk_tbl_offset = self.chunk_table_offset(); - let chunk_tbl_size = self.chunk_table_size(); - if chunk_tbl_offset < EROFS_BLOCK_SIZE_4096 - || chunk_tbl_offset % EROFS_BLOCK_SIZE_4096 != 0 - || chunk_tbl_offset < devslot_end - || chunk_tbl_size % size_of::() as u64 != 0 - || chunk_tbl_offset.checked_add(chunk_tbl_size).is_none() - || chunk_tbl_offset + chunk_tbl_size > meta_size - { - return Err(einval!(format!( - "invalid chunk table offset 0x{:x}/size 0x{:x} in Rafs v6 extended superblock", - chunk_tbl_offset, chunk_tbl_size - ))); - } - let chunk_range = MetaRange::new(chunk_tbl_offset, chunk_tbl_size, true)?; - if blob_range.intersect_with(&chunk_range) { - return Err(einval!(format!( - "blob table intersects with chunk table in Rafs v6 extended superblock", - ))); - } - chunk_info_tbl_range = Some(chunk_range); - } - - // Legacy RAFS may have zero prefetch table offset but non-zero prefetch table size for - // empty filesystems. - if self.prefetch_table_size() > 0 && self.prefetch_table_offset() != 0 { - let tbl_offset = self.prefetch_table_offset(); - let tbl_size = self.prefetch_table_size() as u64; - if tbl_offset < EROFS_BLOCK_SIZE_4096 - || tbl_size % size_of::() as u64 != 0 - || tbl_offset < devslot_end - || tbl_offset.checked_add(tbl_size).is_none() - || tbl_offset + tbl_size > meta_size - { - return Err(einval!(format!( - "invalid prefetch table offset 0x{:x}/size 0x{:x} in Rafs v6 extended superblock", - tbl_offset, tbl_size - ))); - } - let prefetch_range = MetaRange::new(tbl_offset, tbl_size, false)?; - if blob_range.intersect_with(&prefetch_range) { - return Err(einval!(format!( - "blob table intersects with prefetch table in Rafs v6 extended superblock", - ))); - } - if let Some(chunk_range) = chunk_info_tbl_range.as_ref() { - if chunk_range.intersect_with(&prefetch_range) { - return Err(einval!(format!( - "chunk information table intersects with prefetch table in Rafs v6 extended superblock", - ))); - } - } - } - - Ok(()) - } - - /// Set compression algorithm to handle chunk of the Rafs filesystem. - pub fn set_compressor(&mut self, compressor: compress::Algorithm) { - let c: RafsSuperFlags = compressor.into(); - - self.s_flags &= !RafsSuperFlags::COMPRESSION_NONE.bits(); - self.s_flags &= !RafsSuperFlags::COMPRESSION_LZ4.bits(); - self.s_flags &= !RafsSuperFlags::COMPRESSION_GZIP.bits(); - self.s_flags &= !RafsSuperFlags::COMPRESSION_ZSTD.bits(); - self.s_flags |= c.bits(); - } - - /// Set the `has_xattr` flag for the RAFS filesystem. - pub fn set_has_xattr(&mut self) { - self.s_flags |= RafsSuperFlags::HAS_XATTR.bits(); - } - - /// Enable explicit Uid/Gid feature. - pub fn set_explicit_uidgid(&mut self) { - self.s_flags |= RafsSuperFlags::EXPLICIT_UID_GID.bits(); - } - - /// Set flag indicating that chunk digest is inlined in the data blob. - pub fn set_inlined_chunk_digest(&mut self) { - self.s_flags |= RafsSuperFlags::INLINED_CHUNK_DIGEST.bits(); - } - - /// Enable `tarfs` mode, which directly use a tar stream/file as RAFS data blob and do not - /// generate any blob meta data. - pub fn set_tarfs_mode(&mut self) { - self.s_flags |= RafsSuperFlags::TARTFS_MODE.bits(); - } - - /// Set message digest algorithm to handle chunk of the Rafs filesystem. - pub fn set_digester(&mut self, digester: digest::Algorithm) { - let c: RafsSuperFlags = digester.into(); - - self.s_flags &= !RafsSuperFlags::HASH_BLAKE3.bits(); - self.s_flags &= !RafsSuperFlags::HASH_SHA256.bits(); - self.s_flags |= c.bits(); - } - - /// Set offset and size of chunk information table. - pub fn set_chunk_table(&mut self, offset: u64, size: u64) { - self.set_chunk_table_offset(offset); - self.set_chunk_table_size(size); - } - - /// Set encryption algorithm to encrypt chunks of the Rafs filesystem. - pub fn set_cipher(&mut self, cipher: crypt::Algorithm) { - let c: RafsSuperFlags = cipher.into(); - - self.s_flags &= !RafsSuperFlags::ENCRYPTION_NONE.bits(); - self.s_flags &= !RafsSuperFlags::ENCRYPTION_ASE_128_XTS.bits(); - self.s_flags |= c.bits(); - } - - impl_pub_getter_setter!( - chunk_table_offset, - set_chunk_table_offset, - s_chunk_table_offset, - u64 - ); - impl_pub_getter_setter!( - chunk_table_size, - set_chunk_table_size, - s_chunk_table_size, - u64 - ); - impl_pub_getter_setter!(chunk_size, set_chunk_size, s_chunk_size, u32); - impl_pub_getter_setter!(flags, set_flags, s_flags, u64); - impl_pub_getter_setter!( - blob_table_offset, - set_blob_table_offset, - s_blob_table_offset, - u64 - ); - impl_pub_getter_setter!(blob_table_size, set_blob_table_size, s_blob_table_size, u32); - impl_pub_getter_setter!( - prefetch_table_size, - set_prefetch_table_size, - s_prefetch_table_size, - u32 - ); - impl_pub_getter_setter!( - prefetch_table_offset, - set_prefetch_table_offset, - s_prefetch_table_offset, - u64 - ); -} - -impl RafsStore for RafsV6SuperBlockExt { - fn store(&self, w: &mut dyn RafsIoWrite) -> Result { - w.write_all(self.as_ref())?; - w.seek_offset(EROFS_BLOCK_SIZE_4096 as u64)?; - - Ok(EROFS_BLOCK_SIZE_4096 as usize - (EROFS_SUPER_OFFSET + EROFS_SUPER_BLOCK_SIZE) as usize) - } -} - -impl Default for RafsV6SuperBlockExt { - fn default() -> Self { - Self { - s_flags: 0, - s_blob_table_offset: 0, - s_blob_table_size: 0, - s_chunk_size: 0, - s_chunk_table_offset: 0, - s_chunk_table_size: 0, - s_prefetch_table_offset: 0, - s_prefetch_table_size: 0, - s_padding: u32::to_le(0), - s_reserved: [0u8; 200], - } - } -} - -/// Type of EROFS inodes. -#[repr(u8)] -#[allow(non_camel_case_types, dead_code)] -enum EROFS_FILE_TYPE { - /// Unknown file type. - EROFS_FT_UNKNOWN, - /// Regular file. - EROFS_FT_REG_FILE, - /// Directory. - EROFS_FT_DIR, - /// Character device. - EROFS_FT_CHRDEV, - /// Block device. - EROFS_FT_BLKDEV, - /// FIFO pipe. - EROFS_FT_FIFO, - /// Socket. - EROFS_FT_SOCK, - /// Symlink. - EROFS_FT_SYMLINK, - /// Maximum value of file type. - EROFS_FT_MAX, -} - -/// Trait to manipulate data fields of on-disk RAFS v6 inodes. -/// -/// There are two types of on disk inode formats defined by EROFS: -/// - compact inode with 32-byte data -/// - extended inode with 64-byte data -pub trait RafsV6OndiskInode: RafsStore { - fn set_size(&mut self, size: u64); - fn set_ino(&mut self, ino: u32); - fn set_nlink(&mut self, nlinks: u32); - fn set_mode(&mut self, mode: u16); - fn set_u(&mut self, u: u32); - fn set_uidgid(&mut self, uid: u32, gid: u32); - fn set_mtime(&mut self, _sec: u64, _nsec: u32); - fn set_rdev(&mut self, rdev: u32); - fn set_xattr_inline_count(&mut self, count: u16); - fn set_data_layout(&mut self, data_layout: u16); - - /// Set inode data layout format to be PLAIN. - #[inline] - fn set_inline_plain_layout(&mut self) { - self.set_data_layout(EROFS_INODE_FLAT_PLAIN); - } - - /// Set inode data layout format to be INLINE. - #[inline] - fn set_inline_inline_layout(&mut self) { - self.set_data_layout(EROFS_INODE_FLAT_INLINE); - } - - /// Set inode data layout format to be CHUNKED. - #[inline] - fn set_chunk_based_layout(&mut self) { - self.set_data_layout(EROFS_INODE_CHUNK_BASED); - } - - fn format(&self) -> u16; - fn mode(&self) -> u16; - fn size(&self) -> u64; - fn union(&self) -> u32; - fn ino(&self) -> u32; - fn ugid(&self) -> (u32, u32); - fn mtime_s_ns(&self) -> (u64, u32); - fn nlink(&self) -> u32; - fn rdev(&self) -> u32; - fn xattr_inline_count(&self) -> u16; - - fn load(&mut self, r: &mut RafsIoReader) -> Result<()>; -} - -impl Debug for &dyn RafsV6OndiskInode { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - f.debug_struct("RafsV6OndiskInode") - .field("format", &self.format()) - .field("ino", &self.ino()) - .field("mode", &self.mode()) - .field("size", &self.size()) - .field("union", &self.union()) - .field("nlink", &self.nlink()) - .field("xattr count", &self.xattr_inline_count()) - .finish() - } -} - -/// RAFS v6 inode on-disk format, 32 bytes. -/// -/// This structure is designed to be compatible with EROFS compact inode format. -#[repr(C)] -#[derive(Clone, Copy, Default, Debug)] -pub struct RafsV6InodeCompact { - /// inode format hints - pub i_format: u16, - pub i_xattr_icount: u16, - pub i_mode: u16, - pub i_nlink: u16, - pub i_size: u32, - pub i_reserved: u32, - /// raw_blkaddr or rdev or rafs_v6_inode_chunk_info - pub i_u: u32, - pub i_ino: u32, - pub i_uid: u16, - pub i_gid: u16, - pub i_reserved2: [u8; 4], -} - -impl RafsV6InodeCompact { - pub fn new() -> Self { - Self { - i_format: u16::to_le(EROFS_INODE_LAYOUT_COMPACT | (EROFS_INODE_FLAT_PLAIN << 1)), - i_xattr_icount: 0, - i_mode: 0, - i_nlink: 0, - i_size: 0, - i_reserved: 0, - i_u: 0, - i_ino: 0, - i_uid: 0, - i_gid: 0, - i_reserved2: [0u8; 4], - } - } -} - -impl RafsV6OndiskInode for RafsV6InodeCompact { - /// Set file size for inode. - fn set_size(&mut self, size: u64) { - self.i_size = u32::to_le(size as u32); - } - - /// Set ino for inode. - fn set_ino(&mut self, ino: u32) { - self.i_ino = ino.to_le(); - } - - /// Set number of hardlink. - fn set_nlink(&mut self, nlinks: u32) { - self.i_nlink = u16::to_le(nlinks as u16); - } - - /// Set file protection mode. - fn set_mode(&mut self, mode: u16) { - self.i_mode = mode.to_le(); - } - - /// Set the union field. - fn set_u(&mut self, u: u32) { - self.i_u = u.to_le(); - } - - /// Set uid and gid for the inode. - fn set_uidgid(&mut self, uid: u32, gid: u32) { - self.i_uid = u16::to_le(uid as u16); - self.i_gid = u16::to_le(gid as u16); - } - - /// Set last modification time for the inode. - fn set_mtime(&mut self, _sec: u64, _nsec: u32) {} - - /// Set real device id. - fn set_rdev(&mut self, _rdev: u32) {} - - /// Set xattr inline count. - fn set_xattr_inline_count(&mut self, count: u16) { - self.i_xattr_icount = count.to_le(); - } - - /// Set inode data layout format. - fn set_data_layout(&mut self, data_layout: u16) { - self.i_format = u16::to_le(EROFS_INODE_LAYOUT_COMPACT | (data_layout << 1)); - } - - fn format(&self) -> u16 { - u16::from_le(self.i_format) - } - - fn mode(&self) -> u16 { - u16::from_le(self.i_mode) - } - - fn size(&self) -> u64 { - u32::from_le(self.i_size) as u64 - } - - fn union(&self) -> u32 { - u32::from_le(self.i_u) - } - - fn ino(&self) -> u32 { - u32::from_le(self.i_ino) - } - - fn ugid(&self) -> (u32, u32) { - ( - u16::from_le(self.i_uid) as u32, - u16::from_le(self.i_gid) as u32, - ) - } - - fn mtime_s_ns(&self) -> (u64, u32) { - (0, 0) - } - - fn nlink(&self) -> u32 { - u16::from_le(self.i_nlink) as u32 - } - - fn rdev(&self) -> u32 { - 0 - } - - fn xattr_inline_count(&self) -> u16 { - u16::from_le(self.i_xattr_icount) - } - - /// Load a `RafsV6InodeCompact` from a reader. - fn load(&mut self, r: &mut RafsIoReader) -> Result<()> { - r.read_exact(self.as_mut()) - } -} - -impl_bootstrap_converter!(RafsV6InodeCompact); - -impl RafsStore for RafsV6InodeCompact { - fn store(&self, w: &mut dyn RafsIoWrite) -> Result { - // TODO: need to write xattr as well. - w.write_all(self.as_ref())?; - Ok(self.as_ref().len()) - } -} - -/// RAFS v6 inode on-disk format, 64 bytes. -/// -/// This structure is designed to be compatible with EROFS extended inode format. -#[repr(C)] -#[derive(Clone, Copy, Default, Debug)] -pub struct RafsV6InodeExtended { - /// Layout format for of the inode. - pub i_format: u16, - /// Size of extended attributes, in unit of 4Byte - pub i_xattr_icount: u16, - /// Protection mode. - pub i_mode: u16, - i_reserved: u16, - /// Size of the file content. - pub i_size: u64, - /// A `u32` union: raw_blkaddr or `rdev` or `rafs_v6_inode_chunk_info` - pub i_u: u32, - /// Inode number. - pub i_ino: u32, - /// User ID of owner. - pub i_uid: u32, - /// Group ID of owner - pub i_gid: u32, - /// Time of last modification - second part. - pub i_mtime: u64, - /// Time of last modification - nanoseconds part. - pub i_mtime_nsec: u32, - /// Number of links. - pub i_nlink: u32, - i_reserved2: [u8; 16], -} - -impl RafsV6InodeExtended { - /// Create a new instance of `RafsV6InodeExtended`. - pub fn new() -> Self { - Self { - i_format: u16::to_le(EROFS_INODE_LAYOUT_EXTENDED | (EROFS_INODE_FLAT_PLAIN << 1)), - i_xattr_icount: 0, - i_mode: 0, - i_reserved: 0, - i_size: 0, - i_u: 0, - i_ino: 0, - i_uid: 0, - i_gid: 0, - i_mtime: 0, - i_mtime_nsec: 0, - i_nlink: 0, - i_reserved2: [0u8; 16], - } - } -} - -impl RafsV6OndiskInode for RafsV6InodeExtended { - /// Set file size for inode. - fn set_size(&mut self, size: u64) { - self.i_size = size.to_le(); - } - - /// Set ino for inode. - fn set_ino(&mut self, ino: u32) { - self.i_ino = ino.to_le(); - } - - /// Set number of hardlink. - fn set_nlink(&mut self, nlinks: u32) { - self.i_nlink = nlinks.to_le(); - } - - /// Set file protection mode. - fn set_mode(&mut self, mode: u16) { - self.i_mode = mode.to_le(); - } - - /// Set the union field. - fn set_u(&mut self, u: u32) { - self.i_u = u.to_le(); - } - - /// Set uid and gid for the inode. - fn set_uidgid(&mut self, uid: u32, gid: u32) { - self.i_uid = u32::to_le(uid); - self.i_gid = u32::to_le(gid); - } - - /// Set last modification time for the inode. - fn set_mtime(&mut self, sec: u64, nsec: u32) { - self.i_mtime = u64::to_le(sec); - self.i_mtime_nsec = u32::to_le(nsec); - } - - fn set_rdev(&mut self, rdev: u32) { - self.i_u = rdev.to_le() - } - - /// Set xattr inline count. - fn set_xattr_inline_count(&mut self, count: u16) { - self.i_xattr_icount = count.to_le(); - } - - /// Set inode data layout format. - fn set_data_layout(&mut self, data_layout: u16) { - self.i_format = u16::to_le(EROFS_INODE_LAYOUT_EXTENDED | (data_layout << 1)); - } - - fn format(&self) -> u16 { - u16::from_le(self.i_format) - } - - fn mode(&self) -> u16 { - u16::from_le(self.i_mode) - } - - fn size(&self) -> u64 { - u64::from_le(self.i_size) - } - - fn union(&self) -> u32 { - u32::from_le(self.i_u) - } - - fn ino(&self) -> u32 { - u32::from_le(self.i_ino) - } - - fn ugid(&self) -> (u32, u32) { - (u32::from_le(self.i_uid), u32::from_le(self.i_gid)) - } - - fn mtime_s_ns(&self) -> (u64, u32) { - (u64::from_le(self.i_mtime), u32::from_le(self.i_mtime_nsec)) - } - - fn nlink(&self) -> u32 { - u32::from_le(self.i_nlink) - } - - fn rdev(&self) -> u32 { - u32::from_le(self.i_u) - } - - fn xattr_inline_count(&self) -> u16 { - u16::from_le(self.i_xattr_icount) - } - - /// Load a `RafsV6InodeExtended` from a reader. - fn load(&mut self, r: &mut RafsIoReader) -> Result<()> { - r.read_exact(self.as_mut()) - } -} - -impl_bootstrap_converter!(RafsV6InodeExtended); - -impl RafsStore for RafsV6InodeExtended { - fn store(&self, w: &mut dyn RafsIoWrite) -> Result { - // TODO: need to write xattr as well. - w.write_all(self.as_ref())?; - Ok(self.as_ref().len()) - } -} - -/// Create RAFS v6 on-disk inode object. -pub fn new_v6_inode( - inode: &InodeWrapper, - datalayout: u16, - xattr_inline_count: u16, - compact: bool, -) -> Box { - let mut i: Box = match compact { - true => Box::new(RafsV6InodeCompact::new()), - false => Box::new(RafsV6InodeExtended::new()), - }; - - assert!(inode.ino() <= i32::MAX as Inode); - i.set_ino(inode.ino() as u32); - i.set_size(inode.size()); - i.set_uidgid(inode.uid(), inode.gid()); - i.set_mtime(inode.mtime(), inode.mtime_nsec()); - i.set_nlink(inode.nlink()); - i.set_mode(inode.mode() as u16); - i.set_data_layout(datalayout); - i.set_xattr_inline_count(xattr_inline_count); - if inode.is_special() { - i.set_rdev(inode.rdev() as u32); - } - - i -} - -/// Dirent sorted in alphabet order to improve performance by binary search. -#[repr(C, packed(2))] -#[derive(Default, Clone, Copy, Debug)] -pub struct RafsV6Dirent { - /// Node number, inode offset = s_meta_blkaddr * 4096 + nid * 32 - pub e_nid: u64, - /// start offset of file name in the block - pub e_nameoff: u16, - /// file type - pub e_file_type: u8, - /// reserved - e_reserved: u8, -} - -impl_bootstrap_converter!(RafsV6Dirent); - -impl RafsV6Dirent { - /// Create a new instance of `RafsV6Dirent`. - pub fn new(nid: u64, nameoff: u16, file_type: u8) -> Self { - Self { - e_nid: u64::to_le(nid), - e_nameoff: u16::to_le(nameoff), - e_file_type: u8::to_le(file_type), - e_reserved: 0, - } - } - - /// Get file type from file mode. - pub fn file_type(mode: u32) -> u8 { - let val = match mode as libc::mode_t & libc::S_IFMT { - libc::S_IFREG => EROFS_FILE_TYPE::EROFS_FT_REG_FILE, - libc::S_IFDIR => EROFS_FILE_TYPE::EROFS_FT_DIR, - libc::S_IFCHR => EROFS_FILE_TYPE::EROFS_FT_CHRDEV, - libc::S_IFBLK => EROFS_FILE_TYPE::EROFS_FT_BLKDEV, - libc::S_IFIFO => EROFS_FILE_TYPE::EROFS_FT_FIFO, - libc::S_IFSOCK => EROFS_FILE_TYPE::EROFS_FT_SOCK, - libc::S_IFLNK => EROFS_FILE_TYPE::EROFS_FT_SYMLINK, - _ => EROFS_FILE_TYPE::EROFS_FT_UNKNOWN, - }; - - val as u8 - } - - /// Set name offset of the dirent. - pub fn set_name_offset(&mut self, offset: u16) { - assert!(offset < EROFS_BLOCK_SIZE_4096 as u16); - self.e_nameoff = u16::to_le(offset); - } - - /// Load a `RafsV6Dirent` from a reader. - pub fn load(&mut self, r: &mut RafsIoReader) -> Result<()> { - r.read_exact(self.as_mut()) - } -} - -impl RafsStore for RafsV6Dirent { - fn store(&self, w: &mut dyn RafsIoWrite) -> Result { - w.write_all(self.as_ref())?; - Ok(self.as_ref().len()) - } -} - -/// Rafs v6 ChunkHeader on-disk format. -#[repr(C)] -#[derive(Default, Clone, Copy, Debug)] -pub struct RafsV6InodeChunkHeader { - /// Chunk layout format. - format: u16, - reserved: u16, -} - -impl RafsV6InodeChunkHeader { - /// Create a new instance of `RafsV6InodeChunkHeader`. - /// - /// If all chunks are continous in uncompressed cache file, the `chunk_size` will set to - /// `inode.size().next_power_of_two()`, so EROFS can optimize page cache in this case. - /// Otherwise `chunk_size` is set to RAFS filesystem's chunk size. - pub fn new(chunk_size: u64, block_size: u64) -> Self { - assert!(chunk_size.is_power_of_two()); - assert!(block_size == EROFS_BLOCK_SIZE_4096 || block_size == EROFS_BLOCK_SIZE_512); - let chunk_bits = chunk_size.trailing_zeros() as u16; - assert!(chunk_bits >= EROFS_BLOCK_BITS_12 as u16); - let chunk_bits = if block_size == EROFS_BLOCK_SIZE_4096 { - chunk_bits - EROFS_BLOCK_BITS_12 as u16 - } else { - chunk_bits - EROFS_BLOCK_BITS_9 as u16 - }; - assert!(chunk_bits <= EROFS_CHUNK_FORMAT_SIZE_MASK); - let format = EROFS_CHUNK_FORMAT_INDEXES_FLAG | chunk_bits; - - Self { - format: u16::to_le(format), - reserved: 0, - } - } - - /// Convert to a u32 value. - pub fn to_u32(&self) -> u32 { - (u16::from_le(self.format) as u32) | ((u16::from_le(self.reserved) as u32) << 16) - } - - /// Convert a u32 value to `RafsV6InodeChunkHeader`. - pub fn from_u32(val: u32) -> Self { - Self { - format: (val as u16).to_le(), - reserved: ((val >> 16) as u16).to_le(), - } - } -} - -impl_bootstrap_converter!(RafsV6InodeChunkHeader); - -/// Rafs v6 chunk address on-disk format, 8 bytes. -#[repr(C)] -#[derive(Default, Clone, Copy, Debug, Hash, Eq, PartialEq)] -pub struct RafsV6InodeChunkAddr { - /// Lower part of encoded blob address. - c_blob_addr_lo: u16, - /// Higher part of encoded blob address. - c_blob_addr_hi: u16, - /// start block address of this inode chunk - /// decompressed offset must be aligned, in unit of block - c_blk_addr: u32, -} - -impl RafsV6InodeChunkAddr { - /// Create a new instance of `RafsV6InodeChunkIndex`. - pub fn new() -> Self { - Self { - c_blob_addr_lo: 0, - c_blob_addr_hi: 0, - c_blk_addr: 0, - } - } - - /// Get the blob index associated with the chunk. - /// - /// Note: for erofs, bump id by 1 since device id 0 is bootstrap. - /// The index in BlobInfo grows from 0, so when using this method to index the corresponding blob, - /// the index always needs to be minus 1 - /// Get the blob index of the chunk. - pub fn blob_index(&self) -> Result { - let idx = (u16::from_le(self.c_blob_addr_hi) & 0x00ff) as u32; - if idx == 0 { - Err(einval!("invalid zero blob index from RafsV6InodeChunkAddr")) - } else { - Ok(idx - 1) - } - } - - /// Set the blob index of the chunk. - pub fn set_blob_index(&mut self, blob_idx: u32) { - assert!(blob_idx < u8::MAX as u32); - let mut val = u16::from_le(self.c_blob_addr_hi); - val &= 0xff00; - val |= (blob_idx + 1) as u16; - self.c_blob_addr_hi = val.to_le(); - } - - /// Get the 24-bits index into the blob compression information array. - pub fn blob_ci_index(&self) -> u32 { - let val = (u16::from_le(self.c_blob_addr_hi) as u32) >> 8; - (val << 16) | (u16::from_le(self.c_blob_addr_lo) as u32) - } - - /// Set the index into the blob compression information array. - pub fn set_blob_ci_index(&mut self, ci_index: u32) { - assert!(ci_index <= 0x00ff_ffff); - let val = (ci_index >> 8) as u16 & 0xff00 | (u16::from_le(self.c_blob_addr_hi) & 0x00ff); - self.c_blob_addr_hi = val.to_le(); - self.c_blob_addr_lo = u16::to_le(ci_index as u16); - } - - /// Get block address. - pub fn block_addr(&self) -> u32 { - u32::from_le(self.c_blk_addr) - } - - /// Set block address. - pub fn set_block_addr(&mut self, addr: u32) { - self.c_blk_addr = addr.to_le(); - } - - /// Validate the 'RafsV6InodeChunkAddr' object. - pub fn validate(&self, max_blob_index: u32) -> bool { - let blob_idx = (u16::from_le(self.c_blob_addr_hi) & 0x00ff) as u32; - blob_idx > 0 && blob_idx - 1 <= max_blob_index - } - - /// Load a `RafsV6InodeChunkAddr` from a reader. - pub fn load(&mut self, r: &mut RafsIoReader) -> Result<()> { - r.read_exact(self.as_mut()) - } -} - -impl_bootstrap_converter!(RafsV6InodeChunkAddr); - -impl RafsStore for RafsV6InodeChunkAddr { - fn store(&self, w: &mut dyn RafsIoWrite) -> Result { - w.write_all(self.as_ref())?; - Ok(self.as_ref().len()) - } -} - -/// Rafs v6 device information on-disk format, 128 bytes. -#[repr(C)] -#[derive(Clone, Copy, Debug)] -pub struct RafsV6Device { - /// Blob id of sha256. - blob_id: [u8; BLOB_SHA256_LEN], - /// Number of blocks on the device. - blocks: u32, - /// Mapping start address. - mapped_blkaddr: u32, - reserved2: [u8; 56], -} - -impl Default for RafsV6Device { - fn default() -> Self { - Self { - blob_id: [0u8; 64], - blocks: 0, - mapped_blkaddr: 0, - reserved2: [0u8; 56], - } - } -} - -impl RafsV6Device { - /// Create a new instance of `RafsV6DeviceSlot`. - pub fn new() -> Self { - Self::default() - } - - /// Get blob id. - pub fn blob_id(&self) -> &[u8] { - &self.blob_id - } - - /// Set blob id. - pub fn set_blob_id(&mut self, id: &[u8; 64]) { - self.blob_id.copy_from_slice(id); - } - - /// Load a `RafsV6Device` from a reader. - pub fn load(&mut self, r: &mut RafsIoReader) -> Result<()> { - r.read_exact(self.as_mut()) - } - - /// Validate the Rafs v6 Device slot. - pub fn validate(&self) -> Result<()> { - match String::from_utf8(self.blob_id.to_vec()) { - Ok(v) => { - if v.len() != BLOB_SHA256_LEN { - return Err(einval!(format!( - "Length of blob_id {} in RAFS v6 device entry is invalid", - v.len() - ))); - } - } - Err(_) => return Err(einval!("blob_id in RAFS v6 device entry is invalid")), - } - - if self.blocks() == 0 { - let msg = format!("invalid blocks {} in Rafs v6 device entry", self.blocks()); - return Err(einval!(msg)); - } - - Ok(()) - } - - impl_pub_getter_setter!(blocks, set_blocks, blocks, u32); - impl_pub_getter_setter!(mapped_blkaddr, set_mapped_blkaddr, mapped_blkaddr, u32); -} - -impl_bootstrap_converter!(RafsV6Device); - -impl RafsStore for RafsV6Device { - fn store(&self, w: &mut dyn RafsIoWrite) -> Result { - w.write_all(self.as_ref())?; - - Ok(self.as_ref().len()) - } -} - -/// Load blob information table from a reader. -pub fn rafsv6_load_blob_extra_info( - meta: &RafsSuperMeta, - r: &mut RafsIoReader, -) -> Result> { - let mut infos = HashMap::new(); - if meta.blob_device_table_count == 0 { - return Ok(infos); - } - r.seek_to_offset(meta.blob_device_table_offset)?; - for _idx in 0..meta.blob_device_table_count { - let mut devslot = RafsV6Device::new(); - r.read_exact(devslot.as_mut())?; - devslot.validate()?; - let id = String::from_utf8(devslot.blob_id.to_vec()) - .map_err(|e| einval!(format!("invalid blob id, {}", e)))?; - let info = RafsBlobExtraInfo { - mapped_blkaddr: devslot.mapped_blkaddr(), - }; - if infos.contains_key(&id) { - return Err(einval!("duplicated blob id in RAFS v6 device table")); - } - infos.insert(id, info); - } - - Ok(infos) -} - -#[inline] -pub fn align_offset(offset: u64, aligned_size: u64) -> u64 { - round_up(offset, aligned_size) -} - -/// Generate EROFS `nid` from `offset`. -pub fn calculate_nid(offset: u64, meta_size: u64) -> u64 { - (offset - meta_size) >> EROFS_INODE_SLOT_BITS -} - -#[repr(C)] -#[derive(Clone, Copy, Debug)] -struct RafsV6Blob { - // SHA256 digest of the blob containing chunk data. - blob_id: [u8; BLOB_SHA256_LEN], - // Index in the blob table. - blob_index: u32, - // Chunk size of the blob. - chunk_size: u32, - // Number of chunks in the blob. - chunk_count: u32, - // Compression algorithm for chunks in the blob. - compression_algo: u32, - // Digest algorithm for chunks in the blob. - digest_algo: u32, - // Feature flags. - features: u32, - // Size of the compressed blob, not including CI array and header. - compressed_size: u64, - // Size of the uncompressed blob, not including CI array and header. - uncompressed_size: u64, - - // Size of blob ToC content, it's zero for blobs with inlined-meta. - blob_toc_size: u32, - // Compression algorithm for the compression information array. - ci_compressor: u32, - // Offset into the compressed blob for the compression information array. - ci_offset: u64, - // Size of the compressed compression information array. - ci_compressed_size: u64, - // Size of the uncompressed compression information array. - ci_uncompressed_size: u64, - - // SHA256 digest of blob ToC content, including the toc tar header. - // It's all zero for blobs with inlined-meta. - blob_toc_digest: [u8; 32], - // SHA256 digest of RAFS blob for ZRAN, containing `blob.meta`, `blob.digest` `blob.toc` and - // optionally 'image.boot`. It's all zero for ZRAN blobs with inlined-meta, so need special - // handling. - // When using encryption mod, it's reused for saving encryption key. - blob_meta_digest: [u8; 32], - // Size of RAFS blob for ZRAN. It's zero ZRAN blobs with inlined-meta. - // When using encryption mod, it's reused for saving encryption iv first 8 bytes. - blob_meta_size: u64, - // When using encryption mod, used for cipher_iv last 8 bytes. - // 0 7 15 - // +------------------+------------------+ - // | blob_meta_size | cipher_iv[8..16] | - // | 8bytes | 8bytes | - // +------------------+------------------+ - // \_ cipher_iv[0..16] _/ - cipher_iv: [u8; 8], - // Crypt algorithm for chunks in the blob. - cipher_algo: u32, - - reserved2: [u8; 36], -} - -impl Default for RafsV6Blob { - fn default() -> Self { - RafsV6Blob { - blob_id: [0u8; BLOB_SHA256_LEN], - blob_index: 0u32, - chunk_size: 0u32, - chunk_count: 0u32, - compression_algo: (compress::Algorithm::None as u32).to_le(), - digest_algo: (digest::Algorithm::Blake3 as u32).to_le(), - features: 0u32, - compressed_size: 0u64, - uncompressed_size: 0u64, - ci_compressor: (compress::Algorithm::None as u32).to_le(), - ci_offset: 0u64, - ci_compressed_size: 0u64, - ci_uncompressed_size: 0u64, - - blob_toc_digest: [0u8; 32], - blob_meta_digest: [0u8; 32], - blob_meta_size: 0, - blob_toc_size: 0u32, - cipher_iv: [0u8; 8], - cipher_algo: (crypt::Algorithm::None as u32).to_le(), - - reserved2: [0u8; 36], - } - } -} - -impl_bootstrap_converter!(RafsV6Blob); - -impl RafsV6Blob { - #[allow(clippy::wrong_self_convention)] - fn to_blob_info(&self) -> Result { - // debug_assert!(RAFS_DIGEST_LENGTH == 32); - debug_assert!(size_of::() == 256); - - let blob_id = String::from_utf8(self.blob_id.to_vec()) - .map_err(|e| einval!(format!("invalid blob id, {}", e)))?; - let blob_features = BlobFeatures::try_from(u32::from_le(self.features))?; - let mut blob_info = BlobInfo::new( - u32::from_le(self.blob_index), - blob_id, - u64::from_le(self.uncompressed_size), - u64::from_le(self.compressed_size), - u32::from_le(self.chunk_size), - u32::from_le(self.chunk_count), - blob_features, - ); - - let comp = compress::Algorithm::try_from(u32::from_le(self.compression_algo)) - .map_err(|_| einval!("invalid compression algorithm in Rafs v6 blob entry"))?; - blob_info.set_compressor(comp); - let digest = digest::Algorithm::try_from(u32::from_le(self.digest_algo)) - .map_err(|_| einval!("invalid digest algorithm in Rafs v6 blob entry"))?; - blob_info.set_digester(digest); - let cipher = crypt::Algorithm::try_from(u32::from_le(self.cipher_algo)) - .map_err(|_| einval!("invalid cipher algorithm in Rafs v6 blob entry"))?; - let cipher_object = cipher - .new_cipher() - .map_err(|e| einval!(format!("failed to create new cipher object {}", e)))?; - let cipher_context = match cipher { - crypt::Algorithm::None => None, - crypt::Algorithm::Aes128Xts => { - let mut cipher_iv = [0u8; 16]; - cipher_iv[..8].copy_from_slice(&self.blob_meta_size.to_le_bytes()); - cipher_iv[8..].copy_from_slice(&self.cipher_iv); - Some(CipherContext::new( - self.blob_meta_digest.to_vec(), - cipher_iv.to_vec(), - false, - cipher, - )?) - } - _ => { - return Err(einval!(format!( - "invalid cipher algorithm {:?} when creating cipher context", - cipher - ))) - } - }; - blob_info.set_cipher_info(cipher, Arc::new(cipher_object), cipher_context); - blob_info.set_blob_meta_info( - u64::from_le(self.ci_offset), - u64::from_le(self.ci_compressed_size), - u64::from_le(self.ci_uncompressed_size), - u32::from_le(self.ci_compressor), - ); - blob_info.set_blob_toc_digest(self.blob_toc_digest); - blob_info.set_blob_meta_digest(self.blob_meta_digest); - blob_info.set_blob_meta_size(self.blob_meta_size); - blob_info.set_blob_toc_size(self.blob_toc_size); - - Ok(blob_info) - } - - fn from_blob_info(blob_info: &BlobInfo) -> Result { - if blob_info.blob_id().len() > BLOB_SHA256_LEN || blob_info.blob_id().is_empty() { - let msg = format!("invalid blob id in blob info, {}", blob_info.blob_id()); - return Err(einval!(msg)); - } - - let blob_id = blob_info.blob_id(); - let id = blob_id.as_bytes(); - let mut blob_id = [0u8; BLOB_SHA256_LEN]; - blob_id[..id.len()].copy_from_slice(id); - - let (blob_meta_digest, blob_meta_size, cipher_iv) = match blob_info.cipher() { - crypt::Algorithm::None => ( - *blob_info.blob_meta_digest(), - blob_info.blob_meta_size(), - [0u8; 8], - ), - crypt::Algorithm::Aes128Xts => { - let cipher_ctx = match blob_info.cipher_context() { - Some(ctx) => ctx, - None => { - return Err(einval!( - "cipher context is unset while using Aes128Xts encryption algorithm" - )) - } - }; - let cipher_key: [u8; 32] = cipher_ctx.get_cipher_meta().0.try_into().unwrap(); - let (cipher_iv_top_half, cipher_iv_bottom_half) = - cipher_ctx.get_cipher_meta().1.split_at(8); - ( - cipher_key, - u64::from_le_bytes(cipher_iv_top_half.try_into().unwrap()), - cipher_iv_bottom_half.try_into().unwrap(), - ) - } - _ => { - return Err(einval!(format!( - "invalid cipher algorithm type {:?} in blob info", - blob_info.cipher() - ))) - } - }; - - Ok(RafsV6Blob { - blob_id, - blob_index: blob_info.blob_index().to_le(), - chunk_size: blob_info.chunk_size().to_le(), - chunk_count: blob_info.chunk_count().to_le(), - compression_algo: (blob_info.compressor() as u32).to_le(), - digest_algo: (blob_info.digester() as u32).to_le(), - compressed_size: blob_info.compressed_size().to_le(), - uncompressed_size: blob_info.uncompressed_size().to_le(), - features: blob_info.features().bits().to_le(), - ci_compressor: (blob_info.meta_ci_compressor() as u32).to_le(), - ci_offset: blob_info.meta_ci_offset().to_le(), - ci_compressed_size: blob_info.meta_ci_compressed_size().to_le(), - ci_uncompressed_size: blob_info.meta_ci_uncompressed_size().to_le(), - - blob_toc_digest: *blob_info.blob_toc_digest(), - blob_meta_digest, - blob_meta_size, - blob_toc_size: blob_info.blob_toc_size(), - cipher_iv, - cipher_algo: (blob_info.cipher() as u32).to_le(), - - reserved2: [0u8; 36], - }) - } - - fn validate(&self, blob_index: u32, chunk_size: u32, flags: RafsSuperFlags) -> bool { - match String::from_utf8(self.blob_id.to_vec()) { - Ok(v) => { - if v.len() != BLOB_SHA256_LEN { - error!( - "RafsV6Blob: idx {} blob id length {:x} is invalid", - blob_index, - v.len() - ); - return false; - } - } - Err(_) => { - error!( - "RafsV6Blob: idx {} blob_id from_utf8 is invalid", - blob_index - ); - return false; - } - } - - if u32::from_le(self.blob_index) != blob_index { - error!( - "RafsV6Blob: blob_index doesn't match {} {}", - u32::from_le(self.blob_index), - blob_index - ); - return false; - } - - let c_size = u32::from_le(self.chunk_size) as u64; - if c_size.count_ones() != 1 - || !(EROFS_BLOCK_SIZE_4096..=RAFS_MAX_CHUNK_SIZE).contains(&c_size) - || c_size != chunk_size as u64 - { - error!( - "RafsV6Blob: idx {} invalid chunk_size 0x{:x}, expect 0x{:x}", - blob_index, c_size, chunk_size - ); - return false; - } - - let chunk_count = u32::from_le(self.chunk_count); - if chunk_count > RAFS_MAX_CHUNKS_PER_BLOB { - error!( - "RafsV6Blob: idx {} invalid chunk_count {:x}", - blob_index, chunk_count - ); - return false; - } - - if compress::Algorithm::try_from(u32::from_le(self.compression_algo)).is_err() - || compress::Algorithm::try_from(u32::from_le(self.ci_compressor)).is_err() - || digest::Algorithm::try_from(u32::from_le(self.digest_algo)).is_err() - || crypt::Algorithm::try_from(self.cipher_algo).is_err() - { - error!( - "RafsV6Blob: idx {} invalid compression_algo {} ci_compressor {} digest_algo {} cipher_algo {}", - blob_index, self.compression_algo, self.ci_compressor, self.digest_algo, self.cipher_algo, - ); - return false; - } - - let uncompressed_blob_size = u64::from_le(self.uncompressed_size); - let compressed_blob_size = u64::from_le(self.compressed_size); - if uncompressed_blob_size > BLOB_MAX_SIZE_UNCOMPRESSED { - error!( - "RafsV6Blob: idx {} invalid uncompressed_size {:x}", - blob_index, uncompressed_blob_size - ); - return false; - } - if compressed_blob_size > BLOB_MAX_SIZE_COMPRESSED { - error!( - "RafsV6Blob: idx {} invalid compressed_size {:x}", - blob_index, compressed_blob_size - ); - return false; - } - - let blob_features = match BlobFeatures::try_from(self.features) { - Ok(v) => v, - Err(_) => return false, - }; - let tarfs_mode = flags.contains(RafsSuperFlags::TARTFS_MODE); - match (blob_features.contains(BlobFeatures::ALIGNED), tarfs_mode) { - (false, false) => { - error!( - "RafsV6Blob: idx {} should have `ALIGNED` feature bit set", - blob_index - ); - return false; - } - (true, true) => { - error!("RafsV6Blob: `ALIGNED` flag should not be set for `TARFS` mode"); - return false; - } - _ => {} - } - - let ci_offset = u64::from_le(self.ci_offset); - let ci_compr_size = u64::from_le(self.ci_compressed_size); - let ci_uncompr_size = u64::from_le(self.ci_uncompressed_size); - if ci_offset.checked_add(ci_compr_size).is_none() { - error!("RafsV6Blob: idx {} invalid fields, ci_compressed_size {:x} + ci_offset {:x} wraps around", blob_index, ci_compr_size, ci_offset); - return false; - } else if ci_compr_size > ci_uncompr_size { - error!("RafsV6Blob: idx {} invalid fields, ci_compressed_size {:x} is greater than ci_uncompressed_size {:x}", blob_index, ci_compr_size, ci_uncompr_size); - return false; - } - - let count = chunk_count as u64; - if blob_features.contains(BlobFeatures::CHUNK_INFO_V2) - && (blob_features.contains(BlobFeatures::BATCH) - || blob_features.contains(BlobFeatures::ZRAN) - || blob_features.contains(BlobFeatures::ENCRYPTED)) - { - if ci_uncompr_size < count * size_of::() as u64 { - error!( - "RafsV6Blob: idx {} invalid ci_d_size {}", - blob_index, ci_uncompr_size - ); - return false; - } - } else if blob_features.contains(BlobFeatures::CHUNK_INFO_V2) { - if ci_uncompr_size != count * size_of::() as u64 { - error!( - "RafsV6Blob: idx {} invalid ci_d_size {}", - blob_index, ci_uncompr_size - ); - return false; - } - } else if blob_features.contains(BlobFeatures::BATCH) - || blob_features.contains(BlobFeatures::ZRAN) - || blob_features.contains(BlobFeatures::ENCRYPTED) - { - error!( - "RafsV6Blob: idx {} invalid feature bits {}", - blob_index, - blob_features.bits() - ); - return false; - } else if !blob_features.contains(BlobFeatures::IS_CHUNKDICT_GENERATED) - && !tarfs_mode - && ci_uncompr_size != count * size_of::() as u64 - { - error!( - "RafsV6Blob: idx {} invalid fields, ci_d_size {:x}, chunk_count {:x}", - blob_index, ci_uncompr_size, chunk_count - ); - return false; - } - - true - } -} - -/// Rafs v6 blob description table. -#[derive(Clone, Debug, Default)] -pub struct RafsV6BlobTable { - /// Base blob information array. - entries: Vec>, -} - -impl RafsV6BlobTable { - /// Create a new instance of `RafsV6BlobTable`. - pub fn new() -> Self { - RafsV6BlobTable { - entries: Vec::new(), - } - } - - /// Get blob table size. - pub fn size(&self) -> usize { - self.entries.len() * size_of::() - } - - /// Get base information for a blob. - #[inline] - pub fn get(&self, blob_index: u32) -> Result> { - if blob_index >= self.entries.len() as u32 { - Err(enoent!("blob not found")) - } else { - Ok(self.entries[blob_index as usize].clone()) - } - } - - /// Get the base blob information array. - pub fn get_all(&self) -> Vec> { - self.entries.clone() - } - - /// Add information for new blob into the blob information table. - #[allow(clippy::too_many_arguments)] - pub fn add( - &mut self, - blob_id: String, - prefetch_offset: u32, - prefetch_size: u32, - chunk_size: u32, - chunk_count: u32, - uncompressed_size: u64, - compressed_size: u64, - flags: RafsSuperFlags, - blob_meta_digest: [u8; 32], - blob_toc_digest: [u8; 32], - blob_meta_size: u64, - blob_toc_size: u32, - is_chunkdict: bool, - header: BlobCompressionContextHeader, - cipher_object: Arc, - cipher_context: Option, - ) -> u32 { - let blob_index = self.entries.len() as u32; - let blob_features = BlobFeatures::try_from(header.features()).unwrap(); - let mut blob_info = BlobInfo::new( - blob_index, - blob_id, - uncompressed_size, - compressed_size, - chunk_size, - chunk_count, - blob_features, - ); - - blob_info.set_compressor(flags.into()); - blob_info.set_digester(flags.into()); - blob_info.set_cipher(flags.into()); - blob_info.set_prefetch_info(prefetch_offset as u64, prefetch_size as u64); - blob_info.set_blob_meta_info( - header.ci_compressed_offset(), - header.ci_compressed_size(), - header.ci_uncompressed_size(), - header.ci_compressor() as u32, - ); - blob_info.set_blob_meta_digest(blob_meta_digest); - blob_info.set_blob_toc_digest(blob_toc_digest); - blob_info.set_blob_meta_size(blob_meta_size); - blob_info.set_blob_toc_size(blob_toc_size); - blob_info.set_cipher_info(flags.into(), cipher_object, cipher_context); - - blob_info.set_chunkdict_generated(is_chunkdict); - - self.entries.push(Arc::new(blob_info)); - - blob_index - } - - /// Load blob information table from a reader. - pub fn load( - &mut self, - r: &mut RafsIoReader, - blob_table_size: u32, - chunk_size: u32, - flags: RafsSuperFlags, - ) -> Result<()> { - if blob_table_size == 0 { - return Ok(()); - } - if blob_table_size as usize % size_of::() != 0 { - let msg = format!("invalid Rafs v6 blob table size {}", blob_table_size); - return Err(einval!(msg)); - } - - for idx in 0..(blob_table_size as usize / size_of::()) { - let mut blob = RafsV6Blob::default(); - r.read_exact(blob.as_mut())?; - if !blob.validate(idx as u32, chunk_size, flags) { - return Err(einval!("invalid Rafs v6 blob entry")); - } - let blob_info = blob.to_blob_info()?; - self.entries.push(Arc::new(blob_info)); - } - - Ok(()) - } -} - -impl RafsStore for RafsV6BlobTable { - fn store(&self, w: &mut dyn RafsIoWrite) -> Result { - for blob_info in self.entries.iter() { - let blob: RafsV6Blob = RafsV6Blob::from_blob_info(blob_info)?; - trace!( - "blob_info index {}, chunk_count {} blob_id {:?}", - blob_info.blob_index(), - blob_info.chunk_count(), - blob_info.blob_id(), - ); - w.write_all(blob.as_ref())?; - } - - Ok(self.entries.len() * size_of::()) - } -} - -// RafsV6 xattr -const EROFS_XATTR_INDEX_USER: u8 = 1; -const EROFS_XATTR_INDEX_POSIX_ACL_ACCESS: u8 = 2; -const EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT: u8 = 3; -const EROFS_XATTR_INDEX_TRUSTED: u8 = 4; -// const EROFS_XATTR_INDEX_LUSTRE: u8 = 5; -const EROFS_XATTR_INDEX_SECURITY: u8 = 6; - -const XATTR_USER_PREFIX: &str = "user."; -const XATTR_SECURITY_PREFIX: &str = "security."; -const XATTR_TRUSTED_PREFIX: &str = "trusted."; -const XATTR_NAME_POSIX_ACL_ACCESS: &str = "system.posix_acl_access"; -const XATTR_NAME_POSIX_ACL_DEFAULT: &str = "system.posix_acl_default"; - -struct RafsV6XattrPrefix { - index: u8, - prefix: &'static str, - prefix_len: usize, -} - -impl RafsV6XattrPrefix { - fn new(prefix: &'static str, index: u8, prefix_len: usize) -> Self { - RafsV6XattrPrefix { - index, - prefix, - prefix_len, - } - } -} - -lazy_static! { - static ref RAFSV6_XATTR_TYPES: Vec = vec![ - RafsV6XattrPrefix::new( - XATTR_USER_PREFIX, - EROFS_XATTR_INDEX_USER, - XATTR_USER_PREFIX.as_bytes().len() - ), - RafsV6XattrPrefix::new( - XATTR_NAME_POSIX_ACL_ACCESS, - EROFS_XATTR_INDEX_POSIX_ACL_ACCESS, - XATTR_NAME_POSIX_ACL_ACCESS.as_bytes().len() - ), - RafsV6XattrPrefix::new( - XATTR_NAME_POSIX_ACL_DEFAULT, - EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT, - XATTR_NAME_POSIX_ACL_DEFAULT.as_bytes().len() - ), - RafsV6XattrPrefix::new( - XATTR_TRUSTED_PREFIX, - EROFS_XATTR_INDEX_TRUSTED, - XATTR_TRUSTED_PREFIX.as_bytes().len() - ), - RafsV6XattrPrefix::new( - XATTR_SECURITY_PREFIX, - EROFS_XATTR_INDEX_SECURITY, - XATTR_SECURITY_PREFIX.as_bytes().len() - ), - ]; -} - -// inline xattrs (n == i_xattr_icount): -// erofs_xattr_ibody_header(1) + (n - 1) * 4 bytes -// 12 bytes / \ -// / \ -// /-----------------------\ -// | erofs_xattr_entries+ | -// +-----------------------+ -// inline xattrs must starts with erofs_xattr_ibody_header. -#[repr(C)] -#[derive(Default)] -pub struct RafsV6XattrIbodyHeader { - h_reserved: u32, - h_shared_count: u8, - h_reserved2: [u8; 7], - // may be followed by shared xattr id array -} - -impl_bootstrap_converter!(RafsV6XattrIbodyHeader); - -impl RafsV6XattrIbodyHeader { - pub fn new() -> Self { - RafsV6XattrIbodyHeader::default() - } - - /// Load a `RafsV6XattrIbodyHeader` from a reader. - pub fn load(&mut self, r: &mut RafsIoReader) -> Result<()> { - r.read_exact(self.as_mut()) - } -} - -// RafsV6 xattr entry (for both inline & shared xattrs) -#[repr(C)] -#[derive(Default, PartialEq)] -pub struct RafsV6XattrEntry { - // length of name - e_name_len: u8, - // attribute name index - e_name_index: u8, - // size of attribute value - e_value_size: u16, - // followed by e_name and e_value -} - -impl_bootstrap_converter!(RafsV6XattrEntry); - -impl RafsV6XattrEntry { - fn new() -> Self { - RafsV6XattrEntry::default() - } - - pub fn name_len(&self) -> u32 { - self.e_name_len as u32 - } - - pub fn name_index(&self) -> u8 { - self.e_name_index - } - - pub fn value_size(&self) -> u32 { - u16::from_le(self.e_value_size) as u32 - } - - fn set_name_len(&mut self, v: u8) { - self.e_name_len = v; - } - - fn set_name_index(&mut self, v: u8) { - self.e_name_index = v; - } - - fn set_value_size(&mut self, v: u16) { - self.e_value_size = v.to_le(); - } -} - -pub(crate) fn recover_namespace(index: u8) -> Result { - let pos = RAFSV6_XATTR_TYPES - .iter() - .position(|x| x.index == index) - .ok_or_else(|| einval!(format!("invalid xattr name index {}", index)))?; - OsString::from_str(RAFSV6_XATTR_TYPES[pos].prefix) - .map_err(|_e| einval!("invalid xattr name prefix")) -} - -impl RafsXAttrs { - /// Get the number of xattr pairs. - pub fn count_v6(&self) -> usize { - if self.is_empty() { - 0 - } else { - let size = self.aligned_size_v6(); - (size - size_of::()) / size_of::() + 1 - } - } - - /// Get aligned size of all xattr pairs. - pub fn aligned_size_v6(&self) -> usize { - if self.is_empty() { - 0 - } else { - let mut size: usize = size_of::(); - for (key, value) in self.pairs.iter() { - // Safe to unwrap() because RafsXAttrs.add()/adds() has validated the prefix. - let (_, prefix_len) = Self::match_prefix(key).expect("xattr is not valid"); - - size += size_of::(); - size += key.byte_size() - prefix_len + value.len(); - size = round_up(size as u64, size_of::() as u64) as usize; - } - size - } - } - - /// Write Xattr to rafsv6 ondisk inode. - pub fn store_v6(&self, w: &mut dyn RafsIoWrite) -> Result { - let header = RafsV6XattrIbodyHeader::new(); - w.write_all(header.as_ref())?; - - if !self.pairs.is_empty() { - for (key, value) in self.pairs.iter() { - let (index, prefix_len) = Self::match_prefix(key) - .map_err(|_| einval!(format!("invalid xattr key {:?}", key)))?; - if key.len() < prefix_len { - return Err(einval!(format!("invalid xattr key {:?}", key))); - } - if value.len() > u16::MAX as usize { - return Err(einval!("xattr value size is too big")); - } - - let mut entry = RafsV6XattrEntry::new(); - entry.set_name_len((key.byte_size() - prefix_len) as u8); - entry.set_name_index(index); - entry.set_value_size(value.len() as u16); - - w.write_all(entry.as_ref())?; - w.write_all(&key.as_bytes()[prefix_len..])?; - w.write_all(value.as_ref())?; - - let size = - size_of::() + key.byte_size() - prefix_len + value.len(); - let padding = - round_up(size as u64, size_of::() as u64) as usize - size; - w.write_padding(padding)?; - } - } - - Ok(0) - } - - fn match_prefix(key: &OsStr) -> Result<(u8, usize)> { - let key_str = key.to_string_lossy(); - let pos = RAFSV6_XATTR_TYPES - .iter() - .position(|x| key_str.starts_with(x.prefix)) - .ok_or_else(|| einval!(format!("xattr prefix {:?} is not valid", key)))?; - Ok(( - RAFSV6_XATTR_TYPES[pos].index, - RAFSV6_XATTR_TYPES[pos].prefix_len, - )) - } -} - -#[derive(Clone, Default, Debug)] -pub struct RafsV6PrefetchTable { - /// List of inode numbers for prefetch. - /// Note: It's not inode index of inodes table being stored here. - pub inodes: Vec, -} - -impl RafsV6PrefetchTable { - /// Create a new instance of `RafsV6PrefetchTable`. - pub fn new() -> RafsV6PrefetchTable { - RafsV6PrefetchTable { inodes: vec![] } - } - - /// Get content size of the inode prefetch table. - pub fn size(&self) -> usize { - self.len() * size_of::() - } - - /// Get number of entries in the prefetch table. - pub fn len(&self) -> usize { - self.inodes.len() - } - - /// Check whether the inode prefetch table is empty. - pub fn is_empty(&self) -> bool { - self.inodes.is_empty() - } - - /// Add an inode into the inode prefetch table. - pub fn add_entry(&mut self, ino: u32) { - self.inodes.push(ino); - } - - /// Store the inode prefetch table to a writer. - pub fn store(&mut self, w: &mut dyn RafsIoWrite) -> Result { - let (_, data, _) = unsafe { self.inodes.align_to::() }; - w.write_all(data.as_ref())?; - - // OK. Let's see if we have to align... :-( - // let cur_len = self.inodes.len() * size_of::(); - - Ok(data.len()) - } - - /// Load a inode prefetch table from a reader. - /// - /// Note: Generally, prefetch happens after loading bootstrap, so with methods operating - /// files with changing their offset won't bring errors. But we still use `pread` now so as - /// to make this method more stable and robust. Even dup(2) can't give us a separated file struct. - pub fn load_prefetch_table_from( - &mut self, - r: &mut RafsIoReader, - offset: u64, - entries: usize, - ) -> Result { - self.inodes = vec![0u32; entries]; - - let (_, data, _) = unsafe { self.inodes.align_to_mut::() }; - r.seek_to_offset(offset)?; - r.read_exact(data)?; - - Ok(data.len()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::metadata::RafsVersion; - use crate::{BufWriter, RafsIoRead}; - use std::fs::OpenOptions; - use std::io::Write; - use vmm_sys_util::tempfile::TempFile; - - #[test] - fn test_super_block_load_store() { - let mut sb = RafsV6SuperBlock::new(); - let temp = TempFile::new().unwrap(); - let w = OpenOptions::new() - .read(true) - .write(true) - .open(temp.as_path()) - .unwrap(); - let r = OpenOptions::new() - .read(true) - .write(false) - .open(temp.as_path()) - .unwrap(); - let mut writer = BufWriter::new(w); - let mut reader: Box = Box::new(r); - - sb.s_blocks = 0x1000; - sb.s_extra_devices = 5; - sb.s_inos = 0x200; - sb.store(&mut writer).unwrap(); - writer.flush().unwrap(); - - let mut sb2 = RafsV6SuperBlock::new(); - sb2.load(&mut reader).unwrap(); - assert_eq!(sb2.s_magic, EROFS_SUPER_MAGIC_V1.to_le()); - assert_eq!(sb2.s_blocks, 0x1000u32.to_le()); - assert_eq!(sb2.s_extra_devices, 5u16.to_le()); - assert_eq!(sb2.s_inos, 0x200u64.to_le()); - assert_eq!(sb2.s_feature_compat, EROFS_FEATURE_COMPAT_RAFS_V6.to_le()); - assert_eq!( - sb2.s_feature_incompat, - (EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | EROFS_FEATURE_INCOMPAT_DEVICE_TABLE).to_le() - ); - } - - #[test] - fn test_rafs_v6_inode_extended() { - let temp = TempFile::new().unwrap(); - let w = OpenOptions::new() - .read(true) - .write(true) - .open(temp.as_path()) - .unwrap(); - let r = OpenOptions::new() - .read(true) - .write(false) - .open(temp.as_path()) - .unwrap(); - let mut writer = BufWriter::new(w); - let mut reader: Box = Box::new(r); - - let mut inode = RafsV6InodeExtended::new(); - assert_eq!( - inode.i_format, - u16::to_le(EROFS_INODE_LAYOUT_EXTENDED | (EROFS_INODE_FLAT_PLAIN << 1)) - ); - inode.set_data_layout(EROFS_INODE_FLAT_INLINE); - assert_eq!( - inode.i_format, - u16::to_le(EROFS_INODE_LAYOUT_EXTENDED | (EROFS_INODE_FLAT_INLINE << 1)) - ); - inode.set_inline_plain_layout(); - assert_eq!( - inode.i_format, - u16::to_le(EROFS_INODE_LAYOUT_EXTENDED | (EROFS_INODE_FLAT_PLAIN << 1)) - ); - inode.set_inline_inline_layout(); - assert_eq!( - inode.i_format, - u16::to_le(EROFS_INODE_LAYOUT_EXTENDED | (EROFS_INODE_FLAT_INLINE << 1)) - ); - inode.set_chunk_based_layout(); - assert_eq!( - inode.i_format, - u16::to_le(EROFS_INODE_LAYOUT_EXTENDED | (EROFS_INODE_CHUNK_BASED << 1)) - ); - inode.set_uidgid(1, 2); - inode.set_mtime(3, 4); - inode.store(&mut writer).unwrap(); - writer.flush().unwrap(); - - let mut inode2 = RafsV6InodeExtended::new(); - inode2.load(&mut reader).unwrap(); - assert_eq!(inode2.i_uid, 1u32.to_le()); - assert_eq!(inode2.i_gid, 2u32.to_le()); - assert_eq!(inode2.i_mtime, 3u64.to_le()); - assert_eq!(inode2.i_mtime_nsec, 4u32.to_le()); - assert_eq!( - inode2.i_format, - u16::to_le(EROFS_INODE_LAYOUT_EXTENDED | (EROFS_INODE_CHUNK_BASED << 1)) - ); - } - - #[test] - fn test_rafs_v6_chunk_header() { - let chunk_size: u32 = 1024 * 1024; - let header = RafsV6InodeChunkHeader::new(chunk_size as u64, EROFS_BLOCK_SIZE_4096); - let target = EROFS_CHUNK_FORMAT_INDEXES_FLAG | (20 - 12) as u16; - assert_eq!(u16::from_le(header.format), target); - } - - #[test] - fn test_rafs_v6_chunk_addr() { - let temp = TempFile::new().unwrap(); - let w = OpenOptions::new() - .read(true) - .write(true) - .open(temp.as_path()) - .unwrap(); - let r = OpenOptions::new() - .read(true) - .write(false) - .open(temp.as_path()) - .unwrap(); - let mut writer = BufWriter::new(w); - let mut reader: Box = Box::new(r); - - let mut chunk = RafsV6InodeChunkAddr::new(); - chunk.set_blob_index(3); - chunk.set_blob_ci_index(0x123456); - chunk.set_block_addr(0xa5a53412); - chunk.store(&mut writer).unwrap(); - writer.flush().unwrap(); - let mut chunk2 = RafsV6InodeChunkAddr::new(); - chunk2.load(&mut reader).unwrap(); - assert_eq!(chunk2.blob_index().unwrap(), 3); - assert_eq!(chunk2.blob_ci_index(), 0x123456); - assert_eq!(chunk2.block_addr(), 0xa5a53412); - assert!(chunk2.validate(4)); - assert!(chunk2.validate(3)); - assert!(!chunk2.validate(2)); - } - - #[test] - fn test_rafs_v6_device() { - let temp = TempFile::new().unwrap(); - let w = OpenOptions::new() - .read(true) - .write(true) - .open(temp.as_path()) - .unwrap(); - let r = OpenOptions::new() - .read(true) - .write(false) - .open(temp.as_path()) - .unwrap(); - let mut writer = BufWriter::new(w); - let mut reader: Box = Box::new(r); - - let id = [0xa5u8; 64]; - let mut device = RafsV6Device::new(); - device.set_blocks(0x1234); - device.set_blob_id(&id); - device.store(&mut writer).unwrap(); - writer.flush().unwrap(); - let mut device2 = RafsV6Device::new(); - device2.load(&mut reader).unwrap(); - assert_eq!(device2.blocks(), 0x1234); - assert_eq!(device.blob_id(), &id); - } - - #[test] - fn test_rafs_xattr_count_v6() { - let mut xattrs = RafsXAttrs::new(); - xattrs.add(OsString::from("user.a"), vec![1u8]).unwrap(); - xattrs.add(OsString::from("trusted.b"), vec![2u8]).unwrap(); - - assert_eq!(xattrs.count_v6(), 5); - - let xattrs2 = RafsXAttrs::new(); - assert_eq!(xattrs2.count_v6(), 0); - } - - #[test] - fn test_rafs_xattr_size_v6() { - let mut xattrs = RafsXAttrs::new(); - xattrs.add(OsString::from("user.a"), vec![1u8]).unwrap(); - xattrs.add(OsString::from("trusted.b"), vec![2u8]).unwrap(); - - let size = 12 + 8 + 8; - assert_eq!(xattrs.aligned_size_v6(), size); - - let xattrs2 = RafsXAttrs::new(); - assert_eq!(xattrs2.aligned_size_v6(), 0); - - let mut xattrs2 = RafsXAttrs::new(); - xattrs2.add(OsString::from("user.a"), vec![1u8]).unwrap(); - xattrs2 - .add(OsString::from("unknown.b"), vec![2u8]) - .unwrap_err(); - } - - #[test] - fn test_rafs_xattr_store_v6() { - let temp = TempFile::new().unwrap(); - let w = OpenOptions::new() - .read(true) - .write(true) - .open(temp.as_path()) - .unwrap(); - let r = OpenOptions::new() - .read(true) - .write(false) - .open(temp.as_path()) - .unwrap(); - let mut writer = BufWriter::new(w); - let mut reader: Box = Box::new(r); - - let mut xattrs = RafsXAttrs::new(); - // These xattrs are in "e_name_index" order for easier reading: - xattrs - .add(OsString::from("security.rafs"), vec![2u8, 3u8]) - .unwrap(); - xattrs - .add( - OsString::from("system.posix_acl_access"), - vec![4u8, 5u8, 6u8], - ) - .unwrap(); - xattrs - .add( - OsString::from("system.posix_acl_default"), - vec![7u8, 8u8, 9u8, 10u8], - ) - .unwrap(); - xattrs - .add( - OsString::from("trusted.abc"), - vec![11u8, 12u8, 13u8, 14u8, 15u8], - ) - .unwrap(); - xattrs.add(OsString::from("user.nydus"), vec![1u8]).unwrap(); - xattrs.store_v6(&mut writer).unwrap(); - writer.flush().unwrap(); - - let mut header = RafsV6XattrIbodyHeader::new(); - header.load(&mut reader).unwrap(); - let mut size = size_of::(); - - assert_eq!(header.h_shared_count, 0u8); - - let target1 = RafsV6XattrEntry { - e_name_len: 5u8, // "nydus" - e_name_index: 1u8, // EROFS_XATTR_INDEX_USER - e_value_size: u16::to_le(1u16), - }; - - let target2 = RafsV6XattrEntry { - e_name_len: 0u8, // "" - e_name_index: 2u8, // EROFS_XATTR_INDEX_POSIX_ACL_ACCESS - e_value_size: u16::to_le(3u16), - }; - - let target3 = RafsV6XattrEntry { - e_name_len: 0u8, // "" - e_name_index: 3u8, // EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT - e_value_size: u16::to_le(4u16), - }; - - let target4 = RafsV6XattrEntry { - e_name_len: 3u8, // "abc" - e_name_index: 4u8, // EROFS_XATTR_INDEX_TRUSTED - e_value_size: u16::to_le(5u16), - }; - - let target5 = RafsV6XattrEntry { - e_name_len: 4u8, // "rafs" - e_name_index: 6u8, // EROFS_XATTR_INDEX_SECURITY - e_value_size: u16::to_le(2u16), - }; - - let targets = vec![target1, target2, target3, target4, target5]; - - let mut entries: Vec = Vec::with_capacity(targets.len()); - for _i in 0..targets.len() { - let mut entry = RafsV6XattrEntry::new(); - reader.read_exact(entry.as_mut()).unwrap(); - size += round_up( - (size_of::() - + entry.e_name_len as usize - + entry.e_value_size as usize) as u64, - size_of::() as u64, - ) as usize; - reader.seek_to_offset(size as u64).unwrap(); - entries.push(entry); - } - - for (i, target) in targets.iter().enumerate() { - let j = entries - .iter() - .position(|entry| entry == target) - .unwrap_or_else(|| panic!("Test failed for: target{}", i + 1)); - // Note: swap_remove() is faster than remove() when order doesn't matter: - entries.swap_remove(j); - } - } - - #[test] - fn test_invalid_blob_idx_from_chunk_addr() { - let mut addr = RafsV6InodeChunkAddr::new(); - assert!(addr.blob_index().is_err()); - addr.set_blob_index(8); - assert_eq!(addr.blob_index().unwrap(), 8); - - assert_eq!(addr.blob_ci_index(), 0); - addr.set_blob_ci_index(131); - assert_eq!(addr.blob_ci_index(), 131); - - assert_eq!(addr.block_addr(), 0); - addr.set_block_addr(179); - assert_eq!(addr.block_addr(), 179); - } - - #[test] - fn test_rsfs_v6_super_block() { - let mut blk = RafsV6SuperBlock::new(); - assert!(blk.validate(0).is_err()); - - blk.set_inos(10); - blk.set_blocks(100); - blk.set_root_nid(1000); - assert_eq!(blk.s_inos, 10); - assert_eq!(blk.s_blocks, 100); - assert_eq!(blk.s_root_nid, 1000); - - blk.set_block_bits(EROFS_BLOCK_BITS_9); - blk.set_meta_addr(1024 * 1024); - assert_eq!( - blk.s_meta_blkaddr, - (1024 * 1024) / EROFS_BLOCK_SIZE_512 as u32 - ); - - blk.set_block_bits(EROFS_BLOCK_BITS_12); - blk.set_meta_addr(1024 * 1024); - assert_eq!( - blk.s_meta_blkaddr, - (1024 * 1024) / EROFS_BLOCK_SIZE_4096 as u32 - ); - } - - #[test] - fn test_rafs_v6_super_block_ext() { - let mut ext = RafsV6SuperBlockExt::new(); - ext.set_compressor(compress::Algorithm::GZip); - ext.set_has_xattr(); - ext.set_explicit_uidgid(); - ext.set_inlined_chunk_digest(); - ext.set_tarfs_mode(); - ext.set_digester(digest::Algorithm::Blake3); - ext.set_chunk_table(1024, 1024); - ext.set_cipher(crypt::Algorithm::Aes128Xts); - - assert_ne!(ext.s_flags & RafsSuperFlags::COMPRESSION_GZIP.bits(), 0); - assert_ne!(ext.s_flags & RafsSuperFlags::HAS_XATTR.bits(), 0); - assert_ne!(ext.s_flags & RafsSuperFlags::EXPLICIT_UID_GID.bits(), 0); - assert_ne!(ext.s_flags & RafsSuperFlags::INLINED_CHUNK_DIGEST.bits(), 0); - assert_ne!(ext.s_flags & RafsSuperFlags::TARTFS_MODE.bits(), 0); - assert_ne!(ext.s_flags & RafsSuperFlags::HASH_BLAKE3.bits(), 0); - assert_eq!(ext.chunk_table_size(), 1024); - assert_eq!(ext.chunk_table_offset(), 1024); - assert_ne!( - ext.s_flags & RafsSuperFlags::ENCRYPTION_ASE_128_XTS.bits(), - 0 - ); - } - - #[test] - fn test_rafs_v6_inode_compact() { - let mut cpt = RafsV6InodeCompact::new(); - cpt.set_size(1024); - cpt.set_ino(10); - cpt.set_nlink(2048); - cpt.set_mode(1); - cpt.set_u(8); - cpt.set_uidgid(1, 1); - cpt.set_mtime(1, 1000); - cpt.set_rdev(20); - cpt.set_xattr_inline_count(10); - cpt.set_data_layout(1); - assert_eq!(cpt.format().to_le(), 2); - assert_eq!(cpt.mode(), 1); - assert_eq!(cpt.size(), 1024); - assert_eq!(cpt.union(), 8); - assert_eq!(cpt.ino(), 10); - assert_eq!(cpt.ugid(), (1, 1)); - assert_eq!(cpt.mtime_s_ns(), (0, 0)); - assert_eq!(cpt.nlink(), 2048); - assert_eq!(cpt.rdev(), 0); - assert_eq!(cpt.xattr_inline_count(), 10); - } - - #[test] - fn test_rafs_v6_inode_extended_inode() { - let mut ext = RafsV6InodeExtended::new(); - ext.set_size(1024); - ext.set_ino(1024); - ext.set_nlink(1024); - ext.set_mode(1024); - ext.set_u(1024); - ext.set_rdev(1024); - ext.set_xattr_inline_count(1024); - - assert_eq!(ext.format(), 1); - assert_eq!(ext.mode(), 1024); - assert_eq!(ext.size(), 1024); - assert_eq!(ext.union(), 1024); - assert_eq!(ext.ino(), 1024); - assert_eq!(ext.ugid(), (0, 0)); - assert_eq!(ext.mtime_s_ns(), (0, 0)); - assert_eq!(ext.nlink(), 1024); - assert_eq!(ext.rdev(), 1024); - assert_eq!(ext.xattr_inline_count(), 1024); - } - - #[test] - fn test_v6_inode() { - let i = new_v6_inode( - &InodeWrapper::new(RafsVersion::V6), - EROFS_INODE_FLAT_INLINE, - 1024, - true, - ); - assert_eq!(i.ino(), 0); - assert_eq!(i.size(), 0); - assert_eq!(i.mtime_s_ns(), (0, 0)); - assert_eq!(i.nlink(), 0); - } - - #[test] - fn test_rafs_v6_dirent() { - let mut dir = RafsV6Dirent::new(0, 1024, EROFS_FILE_TYPE::EROFS_FT_BLKDEV as u8); - dir.set_name_offset(2048); - assert_eq!(dir.e_nameoff, 2048); - - assert_eq!( - RafsV6Dirent::file_type(libc::S_IFREG as u32), - EROFS_FILE_TYPE::EROFS_FT_REG_FILE as u8 - ); - assert_eq!( - RafsV6Dirent::file_type(libc::S_IFDIR as u32), - EROFS_FILE_TYPE::EROFS_FT_DIR as u8 - ); - assert_eq!( - RafsV6Dirent::file_type(libc::S_IFCHR as u32), - EROFS_FILE_TYPE::EROFS_FT_CHRDEV as u8 - ); - assert_eq!( - RafsV6Dirent::file_type(libc::S_IFBLK as u32), - EROFS_FILE_TYPE::EROFS_FT_BLKDEV as u8 - ); - assert_eq!( - RafsV6Dirent::file_type(libc::S_IFIFO as u32), - EROFS_FILE_TYPE::EROFS_FT_FIFO as u8 - ); - assert_eq!( - RafsV6Dirent::file_type(libc::S_IFSOCK as u32), - EROFS_FILE_TYPE::EROFS_FT_SOCK as u8 - ); - assert_eq!( - RafsV6Dirent::file_type(libc::S_IFLNK as u32), - EROFS_FILE_TYPE::EROFS_FT_SYMLINK as u8 - ); - } - - #[test] - fn test_rafs_v6_inode_chunk_header() { - let hdr = RafsV6InodeChunkHeader::new(0x1000_0000, EROFS_BLOCK_SIZE_4096); - let val = hdr.to_u32(); - let newhdr = RafsV6InodeChunkHeader::from_u32(val); - assert_eq!(newhdr.format, hdr.format); - assert_eq!(newhdr.reserved, hdr.reserved); - } - #[test] - fn test_align_offset() { - assert_eq!(align_offset(1099, 8), 1104); - assert_eq!(align_offset(1099, 16), 1104); - assert_eq!(align_offset(1099, 32), 1120); - } - #[test] - fn test_calculate_nid() { - assert_eq!(calculate_nid(1024, 512), 16); - assert_eq!(calculate_nid(1024, 768), 8); - assert_eq!(calculate_nid(2048, 768), 40); - } - - #[test] - fn test_rafs_v6_blob() { - let mut blob = RafsV6Blob { - cipher_algo: crypt::Algorithm::Aes256Gcm as u32, - ..RafsV6Blob::default() - }; - assert!(blob.to_blob_info().is_err()); - - blob.blob_id = [0x1u8; BLOB_SHA256_LEN]; - blob.blob_meta_digest = [0xcu8; 32]; - blob.blob_meta_digest[31] = 0xau8; - - blob.cipher_algo = crypt::Algorithm::Aes128Xts as u32; - let info: BlobInfo = blob.to_blob_info().unwrap(); - RafsV6Blob::from_blob_info(&info).unwrap(); - assert!(RafsV6Blob::from_blob_info(&info).is_ok()); - - blob.cipher_algo = crypt::Algorithm::None as u32; - let info: BlobInfo = blob.to_blob_info().unwrap(); - RafsV6Blob::from_blob_info(&info).unwrap(); - assert!(RafsV6Blob::from_blob_info(&info).is_ok()); - } - - #[test] - fn test_rafs_v6_blob_table() { - let mut table = RafsV6BlobTable::new(); - assert_eq!(table.size(), 0); - table.add( - "0".to_string(), - 0, - 0, - 1024, - 10, - 0, - 0, - RafsSuperFlags { bits: 0 }, - [0; 32], - [0; 32], - 0, - 0, - false, - BlobCompressionContextHeader::default(), - Arc::new(crypt::Algorithm::Aes128Xts.new_cipher().unwrap()), - Some(CipherContext::default()), - ); - assert_eq!(table.size(), size_of::()); - assert!(table.get(0).is_ok()); - assert!(table.get(1).is_err()); - } - - fn get_streams() -> (Box, BufWriter) { - let temp = TempFile::new().unwrap(); - let w = OpenOptions::new() - .read(true) - .write(true) - .open(temp.as_path()) - .unwrap(); - let r = OpenOptions::new() - .read(true) - .write(false) - .open(temp.as_path()) - .unwrap(); - let writer: BufWriter = BufWriter::new(w); - let reader: Box = Box::new(r); - (reader, writer) - } - - #[test] - fn test_rafs_v6_blob_table_store() { - let mut table = RafsV6BlobTable::new(); - table.add( - "0".to_string(), - 0, - 0, - 1024, - 10, - 0, - 0, - RafsSuperFlags { bits: 0 }, - [0; 32], - [0; 32], - 0, - 0, - false, - BlobCompressionContextHeader::default(), - Arc::new(crypt::Algorithm::Aes128Xts.new_cipher().unwrap()), - Some(CipherContext::default()), - ); - - let (_reader, mut writer) = get_streams(); - table.store(&mut writer).unwrap(); - writer.flush().unwrap(); - } - - #[test] - fn test_rafs_v6_xattr_entry() { - let ent = RafsV6XattrEntry::new(); - assert_eq!(ent.name_index(), 0); - assert_eq!(ent.name_len(), 0); - assert_eq!(ent.value_size(), 0); - } - - #[test] - fn test_rafs_prefetch_table() { - let mut table = RafsV6PrefetchTable::new(); - assert_eq!(table.size(), 0); - assert_eq!(table.len(), 0); - assert!(table.is_empty()); - table.add_entry(0); - table.add_entry(1); - assert_eq!(table.len(), 2); - assert!(!table.is_empty()); - - let (mut reader, mut writer) = get_streams(); - table.store(&mut writer).unwrap(); - writer.flush().unwrap(); - table.inodes.clear(); - assert_eq!(table.len(), 0); - assert!(table.load_prefetch_table_from(&mut reader, 0, 2).is_ok()); - assert_eq!(table.len(), 2); - } -} +// Copyright 2020-2021 Ant Group. All rights reserved. +// Copyright (C) 2020-2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::collections::HashMap; +use std::convert::{TryFrom, TryInto}; +use std::ffi::{OsStr, OsString}; +use std::fmt::Debug; +use std::io::{Read, Result}; +use std::mem::size_of; +use std::os::unix::ffi::OsStrExt; +use std::str::FromStr; +use std::sync::Arc; + +use lazy_static::lazy_static; +use nydus_storage::device::{BlobFeatures, BlobInfo}; +use nydus_storage::meta::{ + BlobChunkInfoV1Ondisk, BlobChunkInfoV2Ondisk, BlobCompressionContextHeader, +}; +use nydus_storage::{RAFS_MAX_CHUNKS_PER_BLOB, RAFS_MAX_CHUNK_SIZE}; +use nydus_utils::crypt::{self, Cipher, CipherContext}; +use nydus_utils::{compress, digest, round_up, ByteSize}; + +use crate::metadata::inode::InodeWrapper; +use crate::metadata::layout::v5::RafsV5ChunkInfo; +use crate::metadata::layout::{MetaRange, RafsXAttrs}; +use crate::metadata::{Inode, RafsBlobExtraInfo, RafsStore, RafsSuperFlags, RafsSuperMeta}; +use crate::{impl_bootstrap_converter, impl_pub_getter_setter, RafsIoReader, RafsIoWrite}; + +/// EROFS metadata slot size. +pub const EROFS_INODE_SLOT_SIZE: usize = 1 << EROFS_INODE_SLOT_BITS; +/// Bits of EROFS logical block size. +pub const EROFS_BLOCK_BITS_12: u8 = 12; +/// EROFS logical block size. +pub const EROFS_BLOCK_SIZE_4096: u64 = 1u64 << EROFS_BLOCK_BITS_12; +pub const EROFS_BLOCK_BITS_9: u8 = 9; +/// EROFS logical block size. +pub const EROFS_BLOCK_SIZE_512: u64 = 1u64 << EROFS_BLOCK_BITS_9; + +/// Offset of EROFS super block. +pub const EROFS_SUPER_OFFSET: u16 = 1024; +/// Size of EROFS super block. +pub const EROFS_SUPER_BLOCK_SIZE: u16 = 128; +/// Size of extended super block, used for rafs v6 specific fields +pub const EROFS_EXT_SUPER_BLOCK_SIZE: u16 = 256; +/// EROFS device table offset. +pub const EROFS_DEVTABLE_OFFSET: u16 = + EROFS_SUPER_OFFSET + EROFS_SUPER_BLOCK_SIZE + EROFS_EXT_SUPER_BLOCK_SIZE; + +/// Offseet for inode format flags: compact or extended. +pub const EROFS_I_VERSION_BIT: u16 = 0; +/// Number of bits for inode format flags. +pub const EROFS_I_VERSION_BITS: u16 = 1; +/// 32-byte on-disk inode +pub const EROFS_INODE_LAYOUT_COMPACT: u16 = 0; +/// 64-byte on-disk inode +pub const EROFS_INODE_LAYOUT_EXTENDED: u16 = 1; +/// Number of bits for inode data layout. +pub const EROFS_I_DATALAYOUT_BITS: u16 = 3; +/// EROFS plain inode. +pub const EROFS_INODE_FLAT_PLAIN: u16 = 0; +/// EROFS inline inode. +pub const EROFS_INODE_FLAT_INLINE: u16 = 2; +/// EROFS chunked inode. +pub const EROFS_INODE_CHUNK_BASED: u16 = 4; + +// Magic number for EROFS super block. +const EROFS_SUPER_MAGIC_V1: u32 = 0xE0F5_E1E2; +// Bits of EROFS metadata slot size. +const EROFS_INODE_SLOT_BITS: u8 = 5; +// Bit flag indicating whether the inode is chunked or not. +const EROFS_CHUNK_FORMAT_INDEXES_FLAG: u16 = 0x0020; +// Encoded chunk size (log2(chunk_size) - EROFS_BLOCK_BITS). +const EROFS_CHUNK_FORMAT_SIZE_MASK: u16 = 0x001F; + +/// Checksum of superblock, compatible with EROFS versions prior to Linux kernel 5.5. +#[allow(dead_code)] +const EROFS_FEATURE_COMPAT_SB_CHKSUM: u32 = 0x0000_0001; +/// Rafs v6 specific metadata, compatible with EROFS versions since Linux kernel 5.16. +const EROFS_FEATURE_COMPAT_RAFS_V6: u32 = 0x4000_0000; +/// Chunked inode, incompatible with EROFS versions prior to Linux kernel 5.15. +const EROFS_FEATURE_INCOMPAT_CHUNKED_FILE: u32 = 0x0000_0004; +/// Multi-devices, incompatible with EROFS versions prior to Linux kernel 5.16. +const EROFS_FEATURE_INCOMPAT_DEVICE_TABLE: u32 = 0x0000_0008; + +/// Size of SHA256 digest string. +const BLOB_SHA256_LEN: usize = 64; +const BLOB_MAX_SIZE_UNCOMPRESSED: u64 = 1u64 << 44; +const BLOB_MAX_SIZE_COMPRESSED: u64 = 1u64 << 40; + +/// RAFS v6 superblock on-disk format, 128 bytes. +/// +/// The structure is designed to be compatible with EROFS superblock, so the in kernel EROFS file +/// system driver could be used to mount a RAFS v6 image. +#[repr(C)] +#[derive(Clone, Copy)] +pub struct RafsV6SuperBlock { + /// File system magic number + s_magic: u32, + /// Crc32 checksum of the superblock, ignored by Rafs v6. + s_checksum: u32, + /// Compatible filesystem features. + s_feature_compat: u32, + /// Bits of block size, 4K or 512 bytes. + s_blkszbits: u8, + /// Number of extended superblock slots, ignored by Rafs v6. + /// `superblock size = 128(size of RafsV6SuperBlock) + s_extslots * 16`. + s_extslots: u8, + /// Nid of the root directory. + /// `root inode offset = s_meta_blkaddr * 4096 + s_root_nid * 32`. + s_root_nid: u16, + /// Total valid ino # + s_inos: u64, + /// Timestamp of filesystem creation. + s_build_time: u64, + /// Timestamp of filesystem creation. + s_build_time_nsec: u32, + /// Total size of file system in blocks, used for statfs + s_blocks: u32, + /// Start block address of the metadata area. + s_meta_blkaddr: u32, + /// Start block address of the shared xattr area. + s_xattr_blkaddr: u32, + /// 128-bit uuid for volume + s_uuid: [u8; 16], + /// Volume name. + s_volume_name: [u8; 16], + /// Incompatible filesystem feature flags. + s_feature_incompat: u32, + /// A union of `u16` for miscellaneous usage. + s_u: u16, + /// # of devices besides the primary device. + s_extra_devices: u16, + /// Offset of the device table, `startoff = s_devt_slotoff * 128`. + s_devt_slotoff: u16, + /// Padding. + s_reserved: [u8; 38], +} + +impl_bootstrap_converter!(RafsV6SuperBlock); + +impl RafsV6SuperBlock { + /// Create a new instance of `RafsV6SuperBlock`. + pub fn new() -> Self { + Self::default() + } + + /// Load a `RafsV6SuperBlock` from a reader. + pub fn load(&mut self, r: &mut RafsIoReader) -> Result<()> { + let mut buf1 = [0u8; EROFS_SUPER_OFFSET as usize]; + + r.read_exact(&mut buf1)?; + r.read_exact(self.as_mut()) + } + + /// Validate the Rafs v6 super block. + pub fn validate(&self, meta_size: u64) -> Result<()> { + if meta_size < EROFS_BLOCK_SIZE_4096 { + return Err(einval!(format!( + "invalid Rafs v6 metadata size: {}", + meta_size + ))); + } + let block_size = if self.s_blkszbits == EROFS_BLOCK_BITS_9 { + EROFS_BLOCK_SIZE_512 + } else { + EROFS_BLOCK_SIZE_4096 + }; + if meta_size & (block_size - 1) != 0 { + return Err(einval!(format!( + "invalid Rafs v6 metadata size: bootstrap size {} is not aligned", + meta_size + ))); + } + let meta_addr = u32::from_le(self.s_meta_blkaddr) as u64 * block_size; + if meta_addr > meta_size { + return Err(einval!(format!( + "invalid Rafs v6 meta block address 0x{:x}, meta file size 0x{:x}", + meta_addr, meta_size + ))); + } + + if u32::from_le(self.s_magic) != EROFS_SUPER_MAGIC_V1 { + return Err(einval!(format!( + "invalid EROFS magic number 0x{:x} in Rafsv6 superblock", + u32::from_le(self.s_magic) + ))); + } + + if self.s_checksum != 0 { + return Err(einval!(format!( + "invalid checksum {} in Rafsv6 superblock", + u32::from_le(self.s_checksum) + ))); + } + + if self.s_blkszbits != EROFS_BLOCK_BITS_12 && self.s_blkszbits != EROFS_BLOCK_BITS_9 { + return Err(einval!(format!( + "invalid block size bits {} in Rafsv6 superblock", + self.s_blkszbits + ))); + } + + if self.s_extslots != 0 { + return Err(einval!("invalid extended slots in Rafsv6 superblock")); + } + + if self.s_inos == 0 { + return Err(einval!("invalid inode number in Rafsv6 superblock")); + } + + if self.s_u != 0 { + return Err(einval!("invalid union field in Rafsv6 superblock")); + } + + if self.s_xattr_blkaddr != 0 { + return Err(einval!( + "unsupported shared extended attribute namespace in Rafsv6 superblock" + )); + } + + // There's a bug in old RAFS v6 images, which has set s_blocks to a fixed value 4096. + if self.s_extra_devices == 0 && self.s_blocks != 0 && u32::from_le(self.s_blocks) != 4096 { + warn!( + "rafs v6 extra devices {}, blocks {}", + self.s_extra_devices, self.s_blocks + ); + return Err(einval!("invalid extra device count in Rafsv6 superblock")); + } + + let devtable_off = + u16::from_le(self.s_devt_slotoff) as u64 * size_of::() as u64; + if devtable_off != EROFS_DEVTABLE_OFFSET as u64 { + return Err(einval!(format!( + "invalid device table slot offset {} in Rafsv6 superblock", + u16::from_le(self.s_devt_slotoff) + ))); + } + let devtable_end = devtable_off + u16::from_le(self.s_extra_devices) as u64; + if devtable_end > meta_size { + return Err(einval!(format!( + "invalid device table slot count {} in Rafsv6 superblock", + u16::from_le(self.s_extra_devices) + ))); + } + + // s_build_time may be used as compact_inode's timestamp in the future. + // if u64::from_le(self.s_build_time) != 0 || u32::from_le(self.s_build_time_nsec) != 0 { + // return Err(einval!("invalid build time in Rafsv6 superblock")); + // } + + if u32::from_le(self.s_feature_incompat) + != EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | EROFS_FEATURE_INCOMPAT_DEVICE_TABLE + { + return Err(einval!( + "invalid incompatible feature bits in Rafsv6 superblock" + )); + } + + if u32::from_le(self.s_feature_compat) & EROFS_FEATURE_COMPAT_RAFS_V6 + != EROFS_FEATURE_COMPAT_RAFS_V6 + { + return Err(einval!( + "invalid compatible feature bits in Rafsv6 superblock" + )); + } + + Ok(()) + } + + /// Check whether it's super block for Rafs v6. + pub fn is_rafs_v6(&self) -> bool { + self.magic() == EROFS_SUPER_MAGIC_V1 + } + + /// Set number of inodes. + pub fn set_inos(&mut self, inos: u64) { + self.s_inos = inos.to_le(); + } + + /// Get total inodes count of this Rafs + pub fn inodes_count(&self) -> u64 { + u64::from_le(self.s_inos) + } + + /// Set number of logical blocks. + pub fn set_blocks(&mut self, blocks: u32) { + self.s_blocks = blocks.to_le(); + } + + /// Get root nid. + pub fn root_nid(&self) -> u16 { + u16::from_le(self.s_root_nid) + } + + /// Set EROFS root nid. + pub fn set_root_nid(&mut self, nid: u16) { + self.s_root_nid = nid.to_le(); + } + + /// Get meta block address. + pub fn meta_addr(&self) -> u32 { + u32::from_le(self.s_meta_blkaddr) + } + + /// Set EROFS meta block address. + pub fn set_meta_addr(&mut self, meta_addr: u64) { + if self.s_blkszbits == EROFS_BLOCK_BITS_12 { + assert!((meta_addr / EROFS_BLOCK_SIZE_4096) <= u32::MAX as u64); + self.s_meta_blkaddr = u32::to_le((meta_addr / EROFS_BLOCK_SIZE_4096) as u32); + } else if self.s_blkszbits == EROFS_BLOCK_BITS_9 { + assert!((meta_addr / EROFS_BLOCK_SIZE_512) <= u32::MAX as u64); + self.s_meta_blkaddr = u32::to_le((meta_addr / EROFS_BLOCK_SIZE_512) as u32); + } else { + error!("v6: unsupported block bits {}", self.s_blkszbits); + } + } + + /// Get device table offset. + pub fn device_table_offset(&self) -> u64 { + u16::from_le(self.s_devt_slotoff) as u64 * size_of::() as u64 + } + + /// Set bits of block size. + pub fn set_block_bits(&mut self, block_bits: u8) { + assert!(block_bits == EROFS_BLOCK_BITS_12 || block_bits == EROFS_BLOCK_BITS_9); + self.s_blkszbits = block_bits; + } + + impl_pub_getter_setter!(magic, set_magic, s_magic, u32); + impl_pub_getter_setter!(extra_devices, set_extra_devices, s_extra_devices, u16); +} + +impl RafsStore for RafsV6SuperBlock { + // This method must be called before RafsV6SuperBlockExt::store(), otherwise data written by + // RafsV6SuperBlockExt::store() will be overwritten. + fn store(&self, w: &mut dyn RafsIoWrite) -> Result { + debug_assert!( + ((EROFS_SUPER_OFFSET + EROFS_SUPER_BLOCK_SIZE) as u64) < EROFS_BLOCK_SIZE_4096 + ); + w.write_all(&[0u8; EROFS_SUPER_OFFSET as usize])?; + w.write_all(self.as_ref())?; + w.write_all( + &[0u8; (EROFS_BLOCK_SIZE_4096 as usize + - (EROFS_SUPER_OFFSET + EROFS_SUPER_BLOCK_SIZE) as usize)], + )?; + + Ok(EROFS_BLOCK_SIZE_4096 as usize) + } +} + +impl Default for RafsV6SuperBlock { + fn default() -> Self { + debug_assert!(size_of::() == 128); + Self { + s_magic: u32::to_le(EROFS_SUPER_MAGIC_V1), + s_checksum: 0, + s_feature_compat: u32::to_le(EROFS_FEATURE_COMPAT_RAFS_V6), + s_blkszbits: EROFS_BLOCK_BITS_12, + s_extslots: 0u8, + s_root_nid: 0, + s_inos: 0, + s_build_time: 0, + s_build_time_nsec: 0, + s_blocks: u32::to_le(1), + s_meta_blkaddr: 0, + s_xattr_blkaddr: 0, + s_uuid: [0u8; 16], + s_volume_name: [0u8; 16], + s_feature_incompat: u32::to_le( + EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | EROFS_FEATURE_INCOMPAT_DEVICE_TABLE, + ), + s_u: 0, + s_extra_devices: 0, + s_devt_slotoff: u16::to_le(EROFS_DEVTABLE_OFFSET / size_of::() as u16), + s_reserved: [0u8; 38], + } + } +} + +/// Extended superblock for RAFS v6, 256 bytes +#[repr(C)] +#[derive(Clone, Copy)] +pub struct RafsV6SuperBlockExt { + /// superblock flags + s_flags: u64, + /// offset of blob table + s_blob_table_offset: u64, + /// size of blob table + s_blob_table_size: u32, + /// chunk size + s_chunk_size: u32, + /// offset of chunk table + s_chunk_table_offset: u64, + /// size of chunk table + s_chunk_table_size: u64, + s_prefetch_table_offset: u64, + s_prefetch_table_size: u32, + s_padding: u32, + /// Reserved + s_reserved: [u8; 200], +} + +impl_bootstrap_converter!(RafsV6SuperBlockExt); + +impl RafsV6SuperBlockExt { + /// Create a new instance `RafsV6SuperBlockExt`. + pub fn new() -> Self { + debug_assert!(size_of::() == 256); + Self::default() + } + + /// Load an `RafsV6SuperBlockExt` from a reader. + pub fn load(&mut self, r: &mut RafsIoReader) -> Result<()> { + r.seek_to_offset((EROFS_SUPER_OFFSET + EROFS_SUPER_BLOCK_SIZE) as u64)?; + r.read_exact(self.as_mut())?; + r.seek_to_offset(EROFS_BLOCK_SIZE_4096 as u64)?; + + Ok(()) + } + + /// Validate the Rafs v6 super block. + pub fn validate(&self, meta_size: u64, meta: &RafsSuperMeta) -> Result<()> { + let mut flags = self.flags(); + flags &= RafsSuperFlags::COMPRESSION_NONE.bits() + | RafsSuperFlags::COMPRESSION_LZ4.bits() + | RafsSuperFlags::COMPRESSION_GZIP.bits() + | RafsSuperFlags::COMPRESSION_ZSTD.bits(); + if flags.count_ones() != 1 { + return Err(einval!(format!( + "invalid flags {:#x} related to compression algorithm in Rafs v6 extended superblock", + flags + ))); + } + + let mut flags = self.flags(); + flags &= RafsSuperFlags::HASH_BLAKE3.bits() | RafsSuperFlags::HASH_SHA256.bits(); + if flags.count_ones() != 1 { + return Err(einval!(format!( + "invalid flags {:#x} related to digest algorithm in Rafs v6 extended superblock", + flags + ))); + } + + let chunk_size = u32::from_le(self.s_chunk_size) as u64; + if !chunk_size.is_power_of_two() + || !(EROFS_BLOCK_SIZE_4096..=RAFS_MAX_CHUNK_SIZE).contains(&chunk_size) + { + return Err(einval!("invalid chunk size in Rafs v6 extended superblock")); + } + + let devslot_end = meta.blob_device_table_offset + meta.blob_table_size as u64; + + let blob_offset = self.blob_table_offset(); + let blob_size = self.blob_table_size() as u64; + if blob_offset & (EROFS_BLOCK_SIZE_4096 - 1) != 0 + || blob_offset < EROFS_BLOCK_SIZE_4096 + || blob_offset < devslot_end + || blob_size % size_of::() as u64 != 0 + || blob_offset.checked_add(blob_size).is_none() + || blob_offset + blob_size > meta_size + { + return Err(einval!(format!( + "invalid blob table offset 0x{:x}/size 0x{:x} in Rafs v6 extended superblock", + blob_offset, blob_size + ))); + } + let blob_range = MetaRange::new(blob_offset, blob_size, true)?; + + let mut chunk_info_tbl_range = None; + if self.chunk_table_size() > 0 { + let chunk_tbl_offset = self.chunk_table_offset(); + let chunk_tbl_size = self.chunk_table_size(); + if chunk_tbl_offset < EROFS_BLOCK_SIZE_4096 + || chunk_tbl_offset % EROFS_BLOCK_SIZE_4096 != 0 + || chunk_tbl_offset < devslot_end + || chunk_tbl_size % size_of::() as u64 != 0 + || chunk_tbl_offset.checked_add(chunk_tbl_size).is_none() + || chunk_tbl_offset + chunk_tbl_size > meta_size + { + return Err(einval!(format!( + "invalid chunk table offset 0x{:x}/size 0x{:x} in Rafs v6 extended superblock", + chunk_tbl_offset, chunk_tbl_size + ))); + } + let chunk_range = MetaRange::new(chunk_tbl_offset, chunk_tbl_size, true)?; + if blob_range.intersect_with(&chunk_range) { + return Err(einval!(format!( + "blob table intersects with chunk table in Rafs v6 extended superblock", + ))); + } + chunk_info_tbl_range = Some(chunk_range); + } + + // Legacy RAFS may have zero prefetch table offset but non-zero prefetch table size for + // empty filesystems. + if self.prefetch_table_size() > 0 && self.prefetch_table_offset() != 0 { + let tbl_offset = self.prefetch_table_offset(); + let tbl_size = self.prefetch_table_size() as u64; + if tbl_offset < EROFS_BLOCK_SIZE_4096 + || tbl_size % size_of::() as u64 != 0 + || tbl_offset < devslot_end + || tbl_offset.checked_add(tbl_size).is_none() + || tbl_offset + tbl_size > meta_size + { + return Err(einval!(format!( + "invalid prefetch table offset 0x{:x}/size 0x{:x} in Rafs v6 extended superblock", + tbl_offset, tbl_size + ))); + } + let prefetch_range = MetaRange::new(tbl_offset, tbl_size, false)?; + if blob_range.intersect_with(&prefetch_range) { + return Err(einval!(format!( + "blob table intersects with prefetch table in Rafs v6 extended superblock", + ))); + } + if let Some(chunk_range) = chunk_info_tbl_range.as_ref() { + if chunk_range.intersect_with(&prefetch_range) { + return Err(einval!(format!( + "chunk information table intersects with prefetch table in Rafs v6 extended superblock", + ))); + } + } + } + + Ok(()) + } + + /// Set compression algorithm to handle chunk of the Rafs filesystem. + pub fn set_compressor(&mut self, compressor: compress::Algorithm) { + let c: RafsSuperFlags = compressor.into(); + + self.s_flags &= !RafsSuperFlags::COMPRESSION_NONE.bits(); + self.s_flags &= !RafsSuperFlags::COMPRESSION_LZ4.bits(); + self.s_flags &= !RafsSuperFlags::COMPRESSION_GZIP.bits(); + self.s_flags &= !RafsSuperFlags::COMPRESSION_ZSTD.bits(); + self.s_flags |= c.bits(); + } + + /// Set the `has_xattr` flag for the RAFS filesystem. + pub fn set_has_xattr(&mut self) { + self.s_flags |= RafsSuperFlags::HAS_XATTR.bits(); + } + + /// Enable explicit Uid/Gid feature. + pub fn set_explicit_uidgid(&mut self) { + self.s_flags |= RafsSuperFlags::EXPLICIT_UID_GID.bits(); + } + + /// Set flag indicating that chunk digest is inlined in the data blob. + pub fn set_inlined_chunk_digest(&mut self) { + self.s_flags |= RafsSuperFlags::INLINED_CHUNK_DIGEST.bits(); + } + + /// Enable `tarfs` mode, which directly use a tar stream/file as RAFS data blob and do not + /// generate any blob meta data. + pub fn set_tarfs_mode(&mut self) { + self.s_flags |= RafsSuperFlags::TARTFS_MODE.bits(); + } + + /// Set message digest algorithm to handle chunk of the Rafs filesystem. + pub fn set_digester(&mut self, digester: digest::Algorithm) { + let c: RafsSuperFlags = digester.into(); + + self.s_flags &= !RafsSuperFlags::HASH_BLAKE3.bits(); + self.s_flags &= !RafsSuperFlags::HASH_SHA256.bits(); + self.s_flags |= c.bits(); + } + + /// Set offset and size of chunk information table. + pub fn set_chunk_table(&mut self, offset: u64, size: u64) { + self.set_chunk_table_offset(offset); + self.set_chunk_table_size(size); + } + + /// Set encryption algorithm to encrypt chunks of the Rafs filesystem. + pub fn set_cipher(&mut self, cipher: crypt::Algorithm) { + let c: RafsSuperFlags = cipher.into(); + + self.s_flags &= !RafsSuperFlags::ENCRYPTION_NONE.bits(); + self.s_flags &= !RafsSuperFlags::ENCRYPTION_ASE_128_XTS.bits(); + self.s_flags |= c.bits(); + } + + impl_pub_getter_setter!( + chunk_table_offset, + set_chunk_table_offset, + s_chunk_table_offset, + u64 + ); + impl_pub_getter_setter!( + chunk_table_size, + set_chunk_table_size, + s_chunk_table_size, + u64 + ); + impl_pub_getter_setter!(chunk_size, set_chunk_size, s_chunk_size, u32); + impl_pub_getter_setter!(flags, set_flags, s_flags, u64); + impl_pub_getter_setter!( + blob_table_offset, + set_blob_table_offset, + s_blob_table_offset, + u64 + ); + impl_pub_getter_setter!(blob_table_size, set_blob_table_size, s_blob_table_size, u32); + impl_pub_getter_setter!( + prefetch_table_size, + set_prefetch_table_size, + s_prefetch_table_size, + u32 + ); + impl_pub_getter_setter!( + prefetch_table_offset, + set_prefetch_table_offset, + s_prefetch_table_offset, + u64 + ); +} + +impl RafsStore for RafsV6SuperBlockExt { + fn store(&self, w: &mut dyn RafsIoWrite) -> Result { + w.write_all(self.as_ref())?; + w.seek_offset(EROFS_BLOCK_SIZE_4096 as u64)?; + + Ok(EROFS_BLOCK_SIZE_4096 as usize - (EROFS_SUPER_OFFSET + EROFS_SUPER_BLOCK_SIZE) as usize) + } +} + +impl Default for RafsV6SuperBlockExt { + fn default() -> Self { + Self { + s_flags: 0, + s_blob_table_offset: 0, + s_blob_table_size: 0, + s_chunk_size: 0, + s_chunk_table_offset: 0, + s_chunk_table_size: 0, + s_prefetch_table_offset: 0, + s_prefetch_table_size: 0, + s_padding: u32::to_le(0), + s_reserved: [0u8; 200], + } + } +} + +/// Type of EROFS inodes. +#[repr(u8)] +#[allow(non_camel_case_types, dead_code)] +enum EROFS_FILE_TYPE { + /// Unknown file type. + EROFS_FT_UNKNOWN, + /// Regular file. + EROFS_FT_REG_FILE, + /// Directory. + EROFS_FT_DIR, + /// Character device. + EROFS_FT_CHRDEV, + /// Block device. + EROFS_FT_BLKDEV, + /// FIFO pipe. + EROFS_FT_FIFO, + /// Socket. + EROFS_FT_SOCK, + /// Symlink. + EROFS_FT_SYMLINK, + /// Maximum value of file type. + EROFS_FT_MAX, +} + +/// Trait to manipulate data fields of on-disk RAFS v6 inodes. +/// +/// There are two types of on disk inode formats defined by EROFS: +/// - compact inode with 32-byte data +/// - extended inode with 64-byte data +pub trait RafsV6OndiskInode: RafsStore { + fn set_size(&mut self, size: u64); + fn set_ino(&mut self, ino: u32); + fn set_nlink(&mut self, nlinks: u32); + fn set_mode(&mut self, mode: u16); + fn set_u(&mut self, u: u32); + fn set_uidgid(&mut self, uid: u32, gid: u32); + fn set_mtime(&mut self, _sec: u64, _nsec: u32); + fn set_rdev(&mut self, rdev: u32); + fn set_xattr_inline_count(&mut self, count: u16); + fn set_data_layout(&mut self, data_layout: u16); + + /// Set inode data layout format to be PLAIN. + #[inline] + fn set_inline_plain_layout(&mut self) { + self.set_data_layout(EROFS_INODE_FLAT_PLAIN); + } + + /// Set inode data layout format to be INLINE. + #[inline] + fn set_inline_inline_layout(&mut self) { + self.set_data_layout(EROFS_INODE_FLAT_INLINE); + } + + /// Set inode data layout format to be CHUNKED. + #[inline] + fn set_chunk_based_layout(&mut self) { + self.set_data_layout(EROFS_INODE_CHUNK_BASED); + } + + fn format(&self) -> u16; + fn mode(&self) -> u16; + fn size(&self) -> u64; + fn union(&self) -> u32; + fn ino(&self) -> u32; + fn ugid(&self) -> (u32, u32); + fn mtime_s_ns(&self) -> (u64, u32); + fn nlink(&self) -> u32; + fn rdev(&self) -> u32; + fn xattr_inline_count(&self) -> u16; + + fn load(&mut self, r: &mut RafsIoReader) -> Result<()>; +} + +impl Debug for &dyn RafsV6OndiskInode { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + f.debug_struct("RafsV6OndiskInode") + .field("format", &self.format()) + .field("ino", &self.ino()) + .field("mode", &self.mode()) + .field("size", &self.size()) + .field("union", &self.union()) + .field("nlink", &self.nlink()) + .field("xattr count", &self.xattr_inline_count()) + .finish() + } +} + +/// RAFS v6 inode on-disk format, 32 bytes. +/// +/// This structure is designed to be compatible with EROFS compact inode format. +#[repr(C)] +#[derive(Clone, Copy, Default, Debug)] +pub struct RafsV6InodeCompact { + /// inode format hints + pub i_format: u16, + pub i_xattr_icount: u16, + pub i_mode: u16, + pub i_nlink: u16, + pub i_size: u32, + pub i_reserved: u32, + /// raw_blkaddr or rdev or rafs_v6_inode_chunk_info + pub i_u: u32, + pub i_ino: u32, + pub i_uid: u16, + pub i_gid: u16, + pub i_reserved2: [u8; 4], +} + +impl RafsV6InodeCompact { + pub fn new() -> Self { + Self { + i_format: u16::to_le(EROFS_INODE_LAYOUT_COMPACT | (EROFS_INODE_FLAT_PLAIN << 1)), + i_xattr_icount: 0, + i_mode: 0, + i_nlink: 0, + i_size: 0, + i_reserved: 0, + i_u: 0, + i_ino: 0, + i_uid: 0, + i_gid: 0, + i_reserved2: [0u8; 4], + } + } +} + +impl RafsV6OndiskInode for RafsV6InodeCompact { + /// Set file size for inode. + fn set_size(&mut self, size: u64) { + self.i_size = u32::to_le(size as u32); + } + + /// Set ino for inode. + fn set_ino(&mut self, ino: u32) { + self.i_ino = ino.to_le(); + } + + /// Set number of hardlink. + fn set_nlink(&mut self, nlinks: u32) { + self.i_nlink = u16::to_le(nlinks as u16); + } + + /// Set file protection mode. + fn set_mode(&mut self, mode: u16) { + self.i_mode = mode.to_le(); + } + + /// Set the union field. + fn set_u(&mut self, u: u32) { + self.i_u = u.to_le(); + } + + /// Set uid and gid for the inode. + fn set_uidgid(&mut self, uid: u32, gid: u32) { + self.i_uid = u16::to_le(uid as u16); + self.i_gid = u16::to_le(gid as u16); + } + + /// Set last modification time for the inode. + fn set_mtime(&mut self, _sec: u64, _nsec: u32) {} + + /// Set real device id. + fn set_rdev(&mut self, _rdev: u32) {} + + /// Set xattr inline count. + fn set_xattr_inline_count(&mut self, count: u16) { + self.i_xattr_icount = count.to_le(); + } + + /// Set inode data layout format. + fn set_data_layout(&mut self, data_layout: u16) { + self.i_format = u16::to_le(EROFS_INODE_LAYOUT_COMPACT | (data_layout << 1)); + } + + fn format(&self) -> u16 { + u16::from_le(self.i_format) + } + + fn mode(&self) -> u16 { + u16::from_le(self.i_mode) + } + + fn size(&self) -> u64 { + u32::from_le(self.i_size) as u64 + } + + fn union(&self) -> u32 { + u32::from_le(self.i_u) + } + + fn ino(&self) -> u32 { + u32::from_le(self.i_ino) + } + + fn ugid(&self) -> (u32, u32) { + ( + u16::from_le(self.i_uid) as u32, + u16::from_le(self.i_gid) as u32, + ) + } + + fn mtime_s_ns(&self) -> (u64, u32) { + (0, 0) + } + + fn nlink(&self) -> u32 { + u16::from_le(self.i_nlink) as u32 + } + + fn rdev(&self) -> u32 { + 0 + } + + fn xattr_inline_count(&self) -> u16 { + u16::from_le(self.i_xattr_icount) + } + + /// Load a `RafsV6InodeCompact` from a reader. + fn load(&mut self, r: &mut RafsIoReader) -> Result<()> { + r.read_exact(self.as_mut()) + } +} + +impl_bootstrap_converter!(RafsV6InodeCompact); + +impl RafsStore for RafsV6InodeCompact { + fn store(&self, w: &mut dyn RafsIoWrite) -> Result { + // TODO: need to write xattr as well. + w.write_all(self.as_ref())?; + Ok(self.as_ref().len()) + } +} + +/// RAFS v6 inode on-disk format, 64 bytes. +/// +/// This structure is designed to be compatible with EROFS extended inode format. +#[repr(C)] +#[derive(Clone, Copy, Default, Debug)] +pub struct RafsV6InodeExtended { + /// Layout format for of the inode. + pub i_format: u16, + /// Size of extended attributes, in unit of 4Byte + pub i_xattr_icount: u16, + /// Protection mode. + pub i_mode: u16, + i_reserved: u16, + /// Size of the file content. + pub i_size: u64, + /// A `u32` union: raw_blkaddr or `rdev` or `rafs_v6_inode_chunk_info` + pub i_u: u32, + /// Inode number. + pub i_ino: u32, + /// User ID of owner. + pub i_uid: u32, + /// Group ID of owner + pub i_gid: u32, + /// Time of last modification - second part. + pub i_mtime: u64, + /// Time of last modification - nanoseconds part. + pub i_mtime_nsec: u32, + /// Number of links. + pub i_nlink: u32, + i_reserved2: [u8; 16], +} + +impl RafsV6InodeExtended { + /// Create a new instance of `RafsV6InodeExtended`. + pub fn new() -> Self { + Self { + i_format: u16::to_le(EROFS_INODE_LAYOUT_EXTENDED | (EROFS_INODE_FLAT_PLAIN << 1)), + i_xattr_icount: 0, + i_mode: 0, + i_reserved: 0, + i_size: 0, + i_u: 0, + i_ino: 0, + i_uid: 0, + i_gid: 0, + i_mtime: 0, + i_mtime_nsec: 0, + i_nlink: 0, + i_reserved2: [0u8; 16], + } + } +} + +impl RafsV6OndiskInode for RafsV6InodeExtended { + /// Set file size for inode. + fn set_size(&mut self, size: u64) { + self.i_size = size.to_le(); + } + + /// Set ino for inode. + fn set_ino(&mut self, ino: u32) { + self.i_ino = ino.to_le(); + } + + /// Set number of hardlink. + fn set_nlink(&mut self, nlinks: u32) { + self.i_nlink = nlinks.to_le(); + } + + /// Set file protection mode. + fn set_mode(&mut self, mode: u16) { + self.i_mode = mode.to_le(); + } + + /// Set the union field. + fn set_u(&mut self, u: u32) { + self.i_u = u.to_le(); + } + + /// Set uid and gid for the inode. + fn set_uidgid(&mut self, uid: u32, gid: u32) { + self.i_uid = u32::to_le(uid); + self.i_gid = u32::to_le(gid); + } + + /// Set last modification time for the inode. + fn set_mtime(&mut self, sec: u64, nsec: u32) { + self.i_mtime = u64::to_le(sec); + self.i_mtime_nsec = u32::to_le(nsec); + } + + fn set_rdev(&mut self, rdev: u32) { + self.i_u = rdev.to_le() + } + + /// Set xattr inline count. + fn set_xattr_inline_count(&mut self, count: u16) { + self.i_xattr_icount = count.to_le(); + } + + /// Set inode data layout format. + fn set_data_layout(&mut self, data_layout: u16) { + self.i_format = u16::to_le(EROFS_INODE_LAYOUT_EXTENDED | (data_layout << 1)); + } + + fn format(&self) -> u16 { + u16::from_le(self.i_format) + } + + fn mode(&self) -> u16 { + u16::from_le(self.i_mode) + } + + fn size(&self) -> u64 { + u64::from_le(self.i_size) + } + + fn union(&self) -> u32 { + u32::from_le(self.i_u) + } + + fn ino(&self) -> u32 { + u32::from_le(self.i_ino) + } + + fn ugid(&self) -> (u32, u32) { + (u32::from_le(self.i_uid), u32::from_le(self.i_gid)) + } + + fn mtime_s_ns(&self) -> (u64, u32) { + (u64::from_le(self.i_mtime), u32::from_le(self.i_mtime_nsec)) + } + + fn nlink(&self) -> u32 { + u32::from_le(self.i_nlink) + } + + fn rdev(&self) -> u32 { + u32::from_le(self.i_u) + } + + fn xattr_inline_count(&self) -> u16 { + u16::from_le(self.i_xattr_icount) + } + + /// Load a `RafsV6InodeExtended` from a reader. + fn load(&mut self, r: &mut RafsIoReader) -> Result<()> { + r.read_exact(self.as_mut()) + } +} + +impl_bootstrap_converter!(RafsV6InodeExtended); + +impl RafsStore for RafsV6InodeExtended { + fn store(&self, w: &mut dyn RafsIoWrite) -> Result { + // TODO: need to write xattr as well. + w.write_all(self.as_ref())?; + Ok(self.as_ref().len()) + } +} + +/// Create RAFS v6 on-disk inode object. +pub fn new_v6_inode( + inode: &InodeWrapper, + datalayout: u16, + xattr_inline_count: u16, + compact: bool, +) -> Box { + let mut i: Box = match compact { + true => Box::new(RafsV6InodeCompact::new()), + false => Box::new(RafsV6InodeExtended::new()), + }; + + assert!(inode.ino() <= i32::MAX as Inode); + i.set_ino(inode.ino() as u32); + i.set_size(inode.size()); + i.set_uidgid(inode.uid(), inode.gid()); + i.set_mtime(inode.mtime(), inode.mtime_nsec()); + i.set_nlink(inode.nlink()); + i.set_mode(inode.mode() as u16); + i.set_data_layout(datalayout); + i.set_xattr_inline_count(xattr_inline_count); + if inode.is_special() { + i.set_rdev(inode.rdev() as u32); + } + + i +} + +/// Dirent sorted in alphabet order to improve performance by binary search. +#[repr(C, packed(2))] +#[derive(Default, Clone, Copy, Debug)] +pub struct RafsV6Dirent { + /// Node number, inode offset = s_meta_blkaddr * 4096 + nid * 32 + pub e_nid: u64, + /// start offset of file name in the block + pub e_nameoff: u16, + /// file type + pub e_file_type: u8, + /// reserved + e_reserved: u8, +} + +impl_bootstrap_converter!(RafsV6Dirent); + +impl RafsV6Dirent { + /// Create a new instance of `RafsV6Dirent`. + pub fn new(nid: u64, nameoff: u16, file_type: u8) -> Self { + Self { + e_nid: u64::to_le(nid), + e_nameoff: u16::to_le(nameoff), + e_file_type: u8::to_le(file_type), + e_reserved: 0, + } + } + + /// Get file type from file mode. + pub fn file_type(mode: u32) -> u8 { + let val = match mode as libc::mode_t & libc::S_IFMT { + libc::S_IFREG => EROFS_FILE_TYPE::EROFS_FT_REG_FILE, + libc::S_IFDIR => EROFS_FILE_TYPE::EROFS_FT_DIR, + libc::S_IFCHR => EROFS_FILE_TYPE::EROFS_FT_CHRDEV, + libc::S_IFBLK => EROFS_FILE_TYPE::EROFS_FT_BLKDEV, + libc::S_IFIFO => EROFS_FILE_TYPE::EROFS_FT_FIFO, + libc::S_IFSOCK => EROFS_FILE_TYPE::EROFS_FT_SOCK, + libc::S_IFLNK => EROFS_FILE_TYPE::EROFS_FT_SYMLINK, + _ => EROFS_FILE_TYPE::EROFS_FT_UNKNOWN, + }; + + val as u8 + } + + /// Set name offset of the dirent. + pub fn set_name_offset(&mut self, offset: u16) { + assert!(offset < EROFS_BLOCK_SIZE_4096 as u16); + self.e_nameoff = u16::to_le(offset); + } + + /// Load a `RafsV6Dirent` from a reader. + pub fn load(&mut self, r: &mut RafsIoReader) -> Result<()> { + r.read_exact(self.as_mut()) + } +} + +impl RafsStore for RafsV6Dirent { + fn store(&self, w: &mut dyn RafsIoWrite) -> Result { + w.write_all(self.as_ref())?; + Ok(self.as_ref().len()) + } +} + +/// Rafs v6 ChunkHeader on-disk format. +#[repr(C)] +#[derive(Default, Clone, Copy, Debug)] +pub struct RafsV6InodeChunkHeader { + /// Chunk layout format. + format: u16, + reserved: u16, +} + +impl RafsV6InodeChunkHeader { + /// Create a new instance of `RafsV6InodeChunkHeader`. + /// + /// If all chunks are continous in uncompressed cache file, the `chunk_size` will set to + /// `inode.size().next_power_of_two()`, so EROFS can optimize page cache in this case. + /// Otherwise `chunk_size` is set to RAFS filesystem's chunk size. + pub fn new(chunk_size: u64, block_size: u64) -> Self { + assert!(chunk_size.is_power_of_two()); + assert!(block_size == EROFS_BLOCK_SIZE_4096 || block_size == EROFS_BLOCK_SIZE_512); + let chunk_bits = chunk_size.trailing_zeros() as u16; + assert!(chunk_bits >= EROFS_BLOCK_BITS_12 as u16); + let chunk_bits = if block_size == EROFS_BLOCK_SIZE_4096 { + chunk_bits - EROFS_BLOCK_BITS_12 as u16 + } else { + chunk_bits - EROFS_BLOCK_BITS_9 as u16 + }; + assert!(chunk_bits <= EROFS_CHUNK_FORMAT_SIZE_MASK); + let format = EROFS_CHUNK_FORMAT_INDEXES_FLAG | chunk_bits; + + Self { + format: u16::to_le(format), + reserved: 0, + } + } + + /// Convert to a u32 value. + pub fn to_u32(&self) -> u32 { + (u16::from_le(self.format) as u32) | ((u16::from_le(self.reserved) as u32) << 16) + } + + /// Convert a u32 value to `RafsV6InodeChunkHeader`. + pub fn from_u32(val: u32) -> Self { + Self { + format: (val as u16).to_le(), + reserved: ((val >> 16) as u16).to_le(), + } + } +} + +impl_bootstrap_converter!(RafsV6InodeChunkHeader); + +/// Rafs v6 chunk address on-disk format, 8 bytes. +#[repr(C)] +#[derive(Default, Clone, Copy, Debug, Hash, Eq, PartialEq)] +pub struct RafsV6InodeChunkAddr { + /// Lower part of encoded blob address. + c_blob_addr_lo: u16, + /// Higher part of encoded blob address. + c_blob_addr_hi: u16, + /// start block address of this inode chunk + /// decompressed offset must be aligned, in unit of block + c_blk_addr: u32, +} + +impl RafsV6InodeChunkAddr { + /// Create a new instance of `RafsV6InodeChunkIndex`. + pub fn new() -> Self { + Self { + c_blob_addr_lo: 0, + c_blob_addr_hi: 0, + c_blk_addr: 0, + } + } + + /// Get the blob index associated with the chunk. + /// + /// Note: for erofs, bump id by 1 since device id 0 is bootstrap. + /// The index in BlobInfo grows from 0, so when using this method to index the corresponding blob, + /// the index always needs to be minus 1 + /// Get the blob index of the chunk. + pub fn blob_index(&self) -> Result { + let idx = (u16::from_le(self.c_blob_addr_hi) & 0x00ff) as u32; + if idx == 0 { + Err(einval!("invalid zero blob index from RafsV6InodeChunkAddr")) + } else { + Ok(idx - 1) + } + } + + /// Set the blob index of the chunk. + pub fn set_blob_index(&mut self, blob_idx: u32) { + assert!(blob_idx < u8::MAX as u32); + let mut val = u16::from_le(self.c_blob_addr_hi); + val &= 0xff00; + val |= (blob_idx + 1) as u16; + self.c_blob_addr_hi = val.to_le(); + } + + /// Get the 24-bits index into the blob compression information array. + pub fn blob_ci_index(&self) -> u32 { + let val = (u16::from_le(self.c_blob_addr_hi) as u32) >> 8; + (val << 16) | (u16::from_le(self.c_blob_addr_lo) as u32) + } + + /// Set the index into the blob compression information array. + pub fn set_blob_ci_index(&mut self, ci_index: u32) { + assert!(ci_index <= 0x00ff_ffff); + let val = (ci_index >> 8) as u16 & 0xff00 | (u16::from_le(self.c_blob_addr_hi) & 0x00ff); + self.c_blob_addr_hi = val.to_le(); + self.c_blob_addr_lo = u16::to_le(ci_index as u16); + } + + /// Get block address. + pub fn block_addr(&self) -> u32 { + u32::from_le(self.c_blk_addr) + } + + /// Set block address. + pub fn set_block_addr(&mut self, addr: u32) { + self.c_blk_addr = addr.to_le(); + } + + /// Validate the 'RafsV6InodeChunkAddr' object. + pub fn validate(&self, max_blob_index: u32) -> bool { + let blob_idx = (u16::from_le(self.c_blob_addr_hi) & 0x00ff) as u32; + blob_idx > 0 && blob_idx - 1 <= max_blob_index + } + + /// Load a `RafsV6InodeChunkAddr` from a reader. + pub fn load(&mut self, r: &mut RafsIoReader) -> Result<()> { + r.read_exact(self.as_mut()) + } +} + +impl_bootstrap_converter!(RafsV6InodeChunkAddr); + +impl RafsStore for RafsV6InodeChunkAddr { + fn store(&self, w: &mut dyn RafsIoWrite) -> Result { + w.write_all(self.as_ref())?; + Ok(self.as_ref().len()) + } +} + +/// Rafs v6 device information on-disk format, 128 bytes. +#[repr(C)] +#[derive(Clone, Copy, Debug)] +pub struct RafsV6Device { + /// Blob id of sha256. + blob_id: [u8; BLOB_SHA256_LEN], + /// Number of blocks on the device. + blocks: u32, + /// Mapping start address. + mapped_blkaddr: u32, + reserved2: [u8; 56], +} + +impl Default for RafsV6Device { + fn default() -> Self { + Self { + blob_id: [0u8; 64], + blocks: 0, + mapped_blkaddr: 0, + reserved2: [0u8; 56], + } + } +} + +impl RafsV6Device { + /// Create a new instance of `RafsV6DeviceSlot`. + pub fn new() -> Self { + Self::default() + } + + /// Get blob id. + pub fn blob_id(&self) -> &[u8] { + &self.blob_id + } + + /// Set blob id. + pub fn set_blob_id(&mut self, id: &[u8; 64]) { + self.blob_id.copy_from_slice(id); + } + + /// Load a `RafsV6Device` from a reader. + pub fn load(&mut self, r: &mut RafsIoReader) -> Result<()> { + r.read_exact(self.as_mut()) + } + + /// Validate the Rafs v6 Device slot. + pub fn validate(&self) -> Result<()> { + match String::from_utf8(self.blob_id.to_vec()) { + Ok(v) => { + if v.len() != BLOB_SHA256_LEN { + return Err(einval!(format!( + "Length of blob_id {} in RAFS v6 device entry is invalid", + v.len() + ))); + } + } + Err(_) => return Err(einval!("blob_id in RAFS v6 device entry is invalid")), + } + + if self.blocks() == 0 { + let msg = format!("invalid blocks {} in Rafs v6 device entry", self.blocks()); + return Err(einval!(msg)); + } + + Ok(()) + } + + impl_pub_getter_setter!(blocks, set_blocks, blocks, u32); + impl_pub_getter_setter!(mapped_blkaddr, set_mapped_blkaddr, mapped_blkaddr, u32); +} + +impl_bootstrap_converter!(RafsV6Device); + +impl RafsStore for RafsV6Device { + fn store(&self, w: &mut dyn RafsIoWrite) -> Result { + w.write_all(self.as_ref())?; + + Ok(self.as_ref().len()) + } +} + +/// Load blob information table from a reader. +pub fn rafsv6_load_blob_extra_info( + meta: &RafsSuperMeta, + r: &mut RafsIoReader, +) -> Result> { + let mut infos = HashMap::new(); + if meta.blob_device_table_count == 0 { + return Ok(infos); + } + r.seek_to_offset(meta.blob_device_table_offset)?; + for _idx in 0..meta.blob_device_table_count { + let mut devslot = RafsV6Device::new(); + r.read_exact(devslot.as_mut())?; + devslot.validate()?; + let id = String::from_utf8(devslot.blob_id.to_vec()) + .map_err(|e| einval!(format!("invalid blob id, {}", e)))?; + let info = RafsBlobExtraInfo { + mapped_blkaddr: devslot.mapped_blkaddr(), + }; + if infos.contains_key(&id) { + return Err(einval!("duplicated blob id in RAFS v6 device table")); + } + infos.insert(id, info); + } + + Ok(infos) +} + +#[inline] +pub fn align_offset(offset: u64, aligned_size: u64) -> u64 { + round_up(offset, aligned_size) +} + +/// Generate EROFS `nid` from `offset`. +pub fn calculate_nid(offset: u64, meta_size: u64) -> u64 { + (offset - meta_size) >> EROFS_INODE_SLOT_BITS +} + +#[repr(C)] +#[derive(Clone, Copy, Debug)] +struct RafsV6Blob { + // SHA256 digest of the blob containing chunk data. + blob_id: [u8; BLOB_SHA256_LEN], + // Index in the blob table. + blob_index: u32, + // Chunk size of the blob. + chunk_size: u32, + // Number of chunks in the blob. + chunk_count: u32, + // Compression algorithm for chunks in the blob. + compression_algo: u32, + // Digest algorithm for chunks in the blob. + digest_algo: u32, + // Feature flags. + features: u32, + // Size of the compressed blob, not including CI array and header. + compressed_size: u64, + // Size of the uncompressed blob, not including CI array and header. + uncompressed_size: u64, + + // Size of blob ToC content, it's zero for blobs with inlined-meta. + blob_toc_size: u32, + // Compression algorithm for the compression information array. + ci_compressor: u32, + // Offset into the compressed blob for the compression information array. + ci_offset: u64, + // Size of the compressed compression information array. + ci_compressed_size: u64, + // Size of the uncompressed compression information array. + ci_uncompressed_size: u64, + + // SHA256 digest of blob ToC content, including the toc tar header. + // It's all zero for blobs with inlined-meta. + blob_toc_digest: [u8; 32], + // SHA256 digest of RAFS blob for ZRAN, containing `blob.meta`, `blob.digest` `blob.toc` and + // optionally 'image.boot`. It's all zero for ZRAN blobs with inlined-meta, so need special + // handling. + // When using encryption mod, it's reused for saving encryption key. + blob_meta_digest: [u8; 32], + // Size of RAFS blob for ZRAN. It's zero ZRAN blobs with inlined-meta. + // When using encryption mod, it's reused for saving encryption iv first 8 bytes. + blob_meta_size: u64, + // When using encryption mod, used for cipher_iv last 8 bytes. + // 0 7 15 + // +------------------+------------------+ + // | blob_meta_size | cipher_iv[8..16] | + // | 8bytes | 8bytes | + // +------------------+------------------+ + // \_ cipher_iv[0..16] _/ + cipher_iv: [u8; 8], + // Crypt algorithm for chunks in the blob. + cipher_algo: u32, + + reserved2: [u8; 36], +} + +impl Default for RafsV6Blob { + fn default() -> Self { + RafsV6Blob { + blob_id: [0u8; BLOB_SHA256_LEN], + blob_index: 0u32, + chunk_size: 0u32, + chunk_count: 0u32, + compression_algo: (compress::Algorithm::None as u32).to_le(), + digest_algo: (digest::Algorithm::Blake3 as u32).to_le(), + features: 0u32, + compressed_size: 0u64, + uncompressed_size: 0u64, + ci_compressor: (compress::Algorithm::None as u32).to_le(), + ci_offset: 0u64, + ci_compressed_size: 0u64, + ci_uncompressed_size: 0u64, + + blob_toc_digest: [0u8; 32], + blob_meta_digest: [0u8; 32], + blob_meta_size: 0, + blob_toc_size: 0u32, + cipher_iv: [0u8; 8], + cipher_algo: (crypt::Algorithm::None as u32).to_le(), + + reserved2: [0u8; 36], + } + } +} + +impl_bootstrap_converter!(RafsV6Blob); + +impl RafsV6Blob { + #[allow(clippy::wrong_self_convention)] + fn to_blob_info(&self) -> Result { + // debug_assert!(RAFS_DIGEST_LENGTH == 32); + debug_assert!(size_of::() == 256); + + let blob_id = String::from_utf8(self.blob_id.to_vec()) + .map_err(|e| einval!(format!("invalid blob id, {}", e)))?; + let blob_features = BlobFeatures::try_from(u32::from_le(self.features))?; + let mut blob_info = BlobInfo::new( + u32::from_le(self.blob_index), + blob_id, + u64::from_le(self.uncompressed_size), + u64::from_le(self.compressed_size), + u32::from_le(self.chunk_size), + u32::from_le(self.chunk_count), + blob_features, + ); + + let comp = compress::Algorithm::try_from(u32::from_le(self.compression_algo)) + .map_err(|_| einval!("invalid compression algorithm in Rafs v6 blob entry"))?; + blob_info.set_compressor(comp); + let digest = digest::Algorithm::try_from(u32::from_le(self.digest_algo)) + .map_err(|_| einval!("invalid digest algorithm in Rafs v6 blob entry"))?; + blob_info.set_digester(digest); + let cipher = crypt::Algorithm::try_from(u32::from_le(self.cipher_algo)) + .map_err(|_| einval!("invalid cipher algorithm in Rafs v6 blob entry"))?; + let cipher_object = cipher + .new_cipher() + .map_err(|e| einval!(format!("failed to create new cipher object {}", e)))?; + let cipher_context = match cipher { + crypt::Algorithm::None => None, + crypt::Algorithm::Aes128Xts => { + let mut cipher_iv = [0u8; 16]; + cipher_iv[..8].copy_from_slice(&self.blob_meta_size.to_le_bytes()); + cipher_iv[8..].copy_from_slice(&self.cipher_iv); + Some(CipherContext::new( + self.blob_meta_digest.to_vec(), + cipher_iv.to_vec(), + false, + cipher, + )?) + } + _ => { + return Err(einval!(format!( + "invalid cipher algorithm {:?} when creating cipher context", + cipher + ))) + } + }; + blob_info.set_cipher_info(cipher, Arc::new(cipher_object), cipher_context); + blob_info.set_blob_meta_info( + u64::from_le(self.ci_offset), + u64::from_le(self.ci_compressed_size), + u64::from_le(self.ci_uncompressed_size), + u32::from_le(self.ci_compressor), + ); + blob_info.set_blob_toc_digest(self.blob_toc_digest); + blob_info.set_blob_meta_digest(self.blob_meta_digest); + blob_info.set_blob_meta_size(self.blob_meta_size); + blob_info.set_blob_toc_size(self.blob_toc_size); + + Ok(blob_info) + } + + fn from_blob_info(blob_info: &BlobInfo) -> Result { + if blob_info.blob_id().len() > BLOB_SHA256_LEN || blob_info.blob_id().is_empty() { + let msg = format!("invalid blob id in blob info, {}", blob_info.blob_id()); + return Err(einval!(msg)); + } + + let blob_id = blob_info.blob_id(); + let id = blob_id.as_bytes(); + let mut blob_id = [0u8; BLOB_SHA256_LEN]; + blob_id[..id.len()].copy_from_slice(id); + + let (blob_meta_digest, blob_meta_size, cipher_iv) = match blob_info.cipher() { + crypt::Algorithm::None => ( + *blob_info.blob_meta_digest(), + blob_info.blob_meta_size(), + [0u8; 8], + ), + crypt::Algorithm::Aes128Xts => { + let cipher_ctx = match blob_info.cipher_context() { + Some(ctx) => ctx, + None => { + return Err(einval!( + "cipher context is unset while using Aes128Xts encryption algorithm" + )) + } + }; + let cipher_key: [u8; 32] = cipher_ctx.get_cipher_meta().0.try_into().unwrap(); + let (cipher_iv_top_half, cipher_iv_bottom_half) = + cipher_ctx.get_cipher_meta().1.split_at(8); + ( + cipher_key, + u64::from_le_bytes(cipher_iv_top_half.try_into().unwrap()), + cipher_iv_bottom_half.try_into().unwrap(), + ) + } + _ => { + return Err(einval!(format!( + "invalid cipher algorithm type {:?} in blob info", + blob_info.cipher() + ))) + } + }; + + Ok(RafsV6Blob { + blob_id, + blob_index: blob_info.blob_index().to_le(), + chunk_size: blob_info.chunk_size().to_le(), + chunk_count: blob_info.chunk_count().to_le(), + compression_algo: (blob_info.compressor() as u32).to_le(), + digest_algo: (blob_info.digester() as u32).to_le(), + compressed_size: blob_info.compressed_size().to_le(), + uncompressed_size: blob_info.uncompressed_size().to_le(), + features: blob_info.features().bits().to_le(), + ci_compressor: (blob_info.meta_ci_compressor() as u32).to_le(), + ci_offset: blob_info.meta_ci_offset().to_le(), + ci_compressed_size: blob_info.meta_ci_compressed_size().to_le(), + ci_uncompressed_size: blob_info.meta_ci_uncompressed_size().to_le(), + + blob_toc_digest: *blob_info.blob_toc_digest(), + blob_meta_digest, + blob_meta_size, + blob_toc_size: blob_info.blob_toc_size(), + cipher_iv, + cipher_algo: (blob_info.cipher() as u32).to_le(), + + reserved2: [0u8; 36], + }) + } + + fn validate(&self, blob_index: u32, chunk_size: u32, flags: RafsSuperFlags) -> bool { + match String::from_utf8(self.blob_id.to_vec()) { + Ok(v) => { + if v.len() != BLOB_SHA256_LEN { + error!( + "RafsV6Blob: idx {} blob id length {:x} is invalid", + blob_index, + v.len() + ); + return false; + } + } + Err(_) => { + error!( + "RafsV6Blob: idx {} blob_id from_utf8 is invalid", + blob_index + ); + return false; + } + } + + if u32::from_le(self.blob_index) != blob_index { + error!( + "RafsV6Blob: blob_index doesn't match {} {}", + u32::from_le(self.blob_index), + blob_index + ); + return false; + } + + let c_size = u32::from_le(self.chunk_size) as u64; + if c_size.count_ones() != 1 + || !(EROFS_BLOCK_SIZE_4096..=RAFS_MAX_CHUNK_SIZE).contains(&c_size) + || c_size != chunk_size as u64 + { + error!( + "RafsV6Blob: idx {} invalid chunk_size 0x{:x}, expect 0x{:x}", + blob_index, c_size, chunk_size + ); + return false; + } + + let chunk_count = u32::from_le(self.chunk_count); + if chunk_count > RAFS_MAX_CHUNKS_PER_BLOB { + error!( + "RafsV6Blob: idx {} invalid chunk_count {:x}", + blob_index, chunk_count + ); + return false; + } + + if compress::Algorithm::try_from(u32::from_le(self.compression_algo)).is_err() + || compress::Algorithm::try_from(u32::from_le(self.ci_compressor)).is_err() + || digest::Algorithm::try_from(u32::from_le(self.digest_algo)).is_err() + || crypt::Algorithm::try_from(self.cipher_algo).is_err() + { + error!( + "RafsV6Blob: idx {} invalid compression_algo {} ci_compressor {} digest_algo {} cipher_algo {}", + blob_index, self.compression_algo, self.ci_compressor, self.digest_algo, self.cipher_algo, + ); + return false; + } + + let uncompressed_blob_size = u64::from_le(self.uncompressed_size); + let compressed_blob_size = u64::from_le(self.compressed_size); + if uncompressed_blob_size > BLOB_MAX_SIZE_UNCOMPRESSED { + error!( + "RafsV6Blob: idx {} invalid uncompressed_size {:x}", + blob_index, uncompressed_blob_size + ); + return false; + } + if compressed_blob_size > BLOB_MAX_SIZE_COMPRESSED { + error!( + "RafsV6Blob: idx {} invalid compressed_size {:x}", + blob_index, compressed_blob_size + ); + return false; + } + + let blob_features = match BlobFeatures::try_from(self.features) { + Ok(v) => v, + Err(_) => return false, + }; + let tarfs_mode = flags.contains(RafsSuperFlags::TARTFS_MODE); + match (blob_features.contains(BlobFeatures::ALIGNED), tarfs_mode) { + (false, false) => { + error!( + "RafsV6Blob: idx {} should have `ALIGNED` feature bit set", + blob_index + ); + return false; + } + (true, true) => { + error!("RafsV6Blob: `ALIGNED` flag should not be set for `TARFS` mode"); + return false; + } + _ => {} + } + + let ci_offset = u64::from_le(self.ci_offset); + let ci_compr_size = u64::from_le(self.ci_compressed_size); + let ci_uncompr_size = u64::from_le(self.ci_uncompressed_size); + if ci_offset.checked_add(ci_compr_size).is_none() { + error!("RafsV6Blob: idx {} invalid fields, ci_compressed_size {:x} + ci_offset {:x} wraps around", blob_index, ci_compr_size, ci_offset); + return false; + } else if ci_compr_size > ci_uncompr_size { + error!("RafsV6Blob: idx {} invalid fields, ci_compressed_size {:x} is greater than ci_uncompressed_size {:x}", blob_index, ci_compr_size, ci_uncompr_size); + return false; + } + + let count = chunk_count as u64; + if blob_features.contains(BlobFeatures::CHUNK_INFO_V2) + && (blob_features.contains(BlobFeatures::BATCH) + || blob_features.contains(BlobFeatures::ZRAN) + || blob_features.contains(BlobFeatures::ENCRYPTED)) + { + if ci_uncompr_size < count * size_of::() as u64 { + error!( + "RafsV6Blob: idx {} invalid ci_d_size {}", + blob_index, ci_uncompr_size + ); + return false; + } + } else if blob_features.contains(BlobFeatures::CHUNK_INFO_V2) { + if ci_uncompr_size != count * size_of::() as u64 { + error!( + "RafsV6Blob: idx {} invalid ci_d_size {}", + blob_index, ci_uncompr_size + ); + return false; + } + } else if blob_features.contains(BlobFeatures::BATCH) + || blob_features.contains(BlobFeatures::ZRAN) + || blob_features.contains(BlobFeatures::ENCRYPTED) + { + error!( + "RafsV6Blob: idx {} invalid feature bits {}", + blob_index, + blob_features.bits() + ); + return false; + } else if !blob_features.contains(BlobFeatures::IS_CHUNKDICT_GENERATED) + && !tarfs_mode + && ci_uncompr_size != count * size_of::() as u64 + { + error!( + "RafsV6Blob: idx {} invalid fields, ci_d_size {:x}, chunk_count {:x}", + blob_index, ci_uncompr_size, chunk_count + ); + return false; + } + + true + } +} + +/// Rafs v6 blob description table. +#[derive(Clone, Debug, Default)] +pub struct RafsV6BlobTable { + /// Base blob information array. + entries: Vec>, +} + +impl RafsV6BlobTable { + /// Create a new instance of `RafsV6BlobTable`. + pub fn new() -> Self { + RafsV6BlobTable { + entries: Vec::new(), + } + } + + /// Get blob table size. + pub fn size(&self) -> usize { + self.entries.len() * size_of::() + } + + /// Get base information for a blob. + #[inline] + pub fn get(&self, blob_index: u32) -> Result> { + if blob_index >= self.entries.len() as u32 { + Err(enoent!("blob not found")) + } else { + Ok(self.entries[blob_index as usize].clone()) + } + } + + /// Get the base blob information array. + pub fn get_all(&self) -> Vec> { + self.entries.clone() + } + + /// Add information for new blob into the blob information table. + #[allow(clippy::too_many_arguments)] + pub fn add( + &mut self, + blob_id: String, + prefetch_offset: u32, + prefetch_size: u32, + chunk_size: u32, + chunk_count: u32, + uncompressed_size: u64, + compressed_size: u64, + flags: RafsSuperFlags, + blob_meta_digest: [u8; 32], + blob_toc_digest: [u8; 32], + blob_meta_size: u64, + blob_toc_size: u32, + is_chunkdict: bool, + header: BlobCompressionContextHeader, + cipher_object: Arc, + cipher_context: Option, + ) -> u32 { + let blob_index = self.entries.len() as u32; + let blob_features = BlobFeatures::try_from(header.features()).unwrap(); + let mut blob_info = BlobInfo::new( + blob_index, + blob_id, + uncompressed_size, + compressed_size, + chunk_size, + chunk_count, + blob_features, + ); + + blob_info.set_compressor(flags.into()); + blob_info.set_digester(flags.into()); + blob_info.set_cipher(flags.into()); + blob_info.set_prefetch_info(prefetch_offset as u64, prefetch_size as u64); + blob_info.set_blob_meta_info( + header.ci_compressed_offset(), + header.ci_compressed_size(), + header.ci_uncompressed_size(), + header.ci_compressor() as u32, + ); + blob_info.set_blob_meta_digest(blob_meta_digest); + blob_info.set_blob_toc_digest(blob_toc_digest); + blob_info.set_blob_meta_size(blob_meta_size); + blob_info.set_blob_toc_size(blob_toc_size); + blob_info.set_cipher_info(flags.into(), cipher_object, cipher_context); + + blob_info.set_chunkdict_generated(is_chunkdict); + + self.entries.push(Arc::new(blob_info)); + + blob_index + } + + /// Load blob information table from a reader. + pub fn load( + &mut self, + r: &mut RafsIoReader, + blob_table_size: u32, + chunk_size: u32, + flags: RafsSuperFlags, + ) -> Result<()> { + if blob_table_size == 0 { + return Ok(()); + } + if blob_table_size as usize % size_of::() != 0 { + let msg = format!("invalid Rafs v6 blob table size {}", blob_table_size); + return Err(einval!(msg)); + } + + for idx in 0..(blob_table_size as usize / size_of::()) { + let mut blob = RafsV6Blob::default(); + r.read_exact(blob.as_mut())?; + if !blob.validate(idx as u32, chunk_size, flags) { + return Err(einval!("invalid Rafs v6 blob entry")); + } + let blob_info = blob.to_blob_info()?; + self.entries.push(Arc::new(blob_info)); + } + + Ok(()) + } +} + +impl RafsStore for RafsV6BlobTable { + fn store(&self, w: &mut dyn RafsIoWrite) -> Result { + for blob_info in self.entries.iter() { + let blob: RafsV6Blob = RafsV6Blob::from_blob_info(blob_info)?; + trace!( + "blob_info index {}, chunk_count {} blob_id {:?}", + blob_info.blob_index(), + blob_info.chunk_count(), + blob_info.blob_id(), + ); + w.write_all(blob.as_ref())?; + } + + Ok(self.entries.len() * size_of::()) + } +} + +// RafsV6 xattr +const EROFS_XATTR_INDEX_USER: u8 = 1; +const EROFS_XATTR_INDEX_POSIX_ACL_ACCESS: u8 = 2; +const EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT: u8 = 3; +const EROFS_XATTR_INDEX_TRUSTED: u8 = 4; +// const EROFS_XATTR_INDEX_LUSTRE: u8 = 5; +const EROFS_XATTR_INDEX_SECURITY: u8 = 6; + +const XATTR_USER_PREFIX: &str = "user."; +const XATTR_SECURITY_PREFIX: &str = "security."; +const XATTR_TRUSTED_PREFIX: &str = "trusted."; +const XATTR_NAME_POSIX_ACL_ACCESS: &str = "system.posix_acl_access"; +const XATTR_NAME_POSIX_ACL_DEFAULT: &str = "system.posix_acl_default"; + +struct RafsV6XattrPrefix { + index: u8, + prefix: &'static str, + prefix_len: usize, +} + +impl RafsV6XattrPrefix { + fn new(prefix: &'static str, index: u8, prefix_len: usize) -> Self { + RafsV6XattrPrefix { + index, + prefix, + prefix_len, + } + } +} + +lazy_static! { + static ref RAFSV6_XATTR_TYPES: Vec = vec![ + RafsV6XattrPrefix::new( + XATTR_USER_PREFIX, + EROFS_XATTR_INDEX_USER, + XATTR_USER_PREFIX.as_bytes().len() + ), + RafsV6XattrPrefix::new( + XATTR_NAME_POSIX_ACL_ACCESS, + EROFS_XATTR_INDEX_POSIX_ACL_ACCESS, + XATTR_NAME_POSIX_ACL_ACCESS.as_bytes().len() + ), + RafsV6XattrPrefix::new( + XATTR_NAME_POSIX_ACL_DEFAULT, + EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT, + XATTR_NAME_POSIX_ACL_DEFAULT.as_bytes().len() + ), + RafsV6XattrPrefix::new( + XATTR_TRUSTED_PREFIX, + EROFS_XATTR_INDEX_TRUSTED, + XATTR_TRUSTED_PREFIX.as_bytes().len() + ), + RafsV6XattrPrefix::new( + XATTR_SECURITY_PREFIX, + EROFS_XATTR_INDEX_SECURITY, + XATTR_SECURITY_PREFIX.as_bytes().len() + ), + ]; +} + +// inline xattrs (n == i_xattr_icount): +// erofs_xattr_ibody_header(1) + (n - 1) * 4 bytes +// 12 bytes / \ +// / \ +// /-----------------------\ +// | erofs_xattr_entries+ | +// +-----------------------+ +// inline xattrs must starts with erofs_xattr_ibody_header. +#[repr(C)] +#[derive(Default)] +pub struct RafsV6XattrIbodyHeader { + h_reserved: u32, + h_shared_count: u8, + h_reserved2: [u8; 7], + // may be followed by shared xattr id array +} + +impl_bootstrap_converter!(RafsV6XattrIbodyHeader); + +impl RafsV6XattrIbodyHeader { + pub fn new() -> Self { + RafsV6XattrIbodyHeader::default() + } + + /// Load a `RafsV6XattrIbodyHeader` from a reader. + pub fn load(&mut self, r: &mut RafsIoReader) -> Result<()> { + r.read_exact(self.as_mut()) + } +} + +// RafsV6 xattr entry (for both inline & shared xattrs) +#[repr(C)] +#[derive(Default, PartialEq)] +pub struct RafsV6XattrEntry { + // length of name + e_name_len: u8, + // attribute name index + e_name_index: u8, + // size of attribute value + e_value_size: u16, + // followed by e_name and e_value +} + +impl_bootstrap_converter!(RafsV6XattrEntry); + +impl RafsV6XattrEntry { + fn new() -> Self { + RafsV6XattrEntry::default() + } + + pub fn name_len(&self) -> u32 { + self.e_name_len as u32 + } + + pub fn name_index(&self) -> u8 { + self.e_name_index + } + + pub fn value_size(&self) -> u32 { + u16::from_le(self.e_value_size) as u32 + } + + fn set_name_len(&mut self, v: u8) { + self.e_name_len = v; + } + + fn set_name_index(&mut self, v: u8) { + self.e_name_index = v; + } + + fn set_value_size(&mut self, v: u16) { + self.e_value_size = v.to_le(); + } +} + +pub(crate) fn recover_namespace(index: u8) -> Result { + let pos = RAFSV6_XATTR_TYPES + .iter() + .position(|x| x.index == index) + .ok_or_else(|| einval!(format!("invalid xattr name index {}", index)))?; + OsString::from_str(RAFSV6_XATTR_TYPES[pos].prefix) + .map_err(|_e| einval!("invalid xattr name prefix")) +} + +impl RafsXAttrs { + /// Get the number of xattr pairs. + pub fn count_v6(&self) -> usize { + if self.is_empty() { + 0 + } else { + let size = self.aligned_size_v6(); + (size - size_of::()) / size_of::() + 1 + } + } + + /// Get aligned size of all xattr pairs. + pub fn aligned_size_v6(&self) -> usize { + if self.is_empty() { + 0 + } else { + let mut size: usize = size_of::(); + for (key, value) in self.pairs.iter() { + // Safe to unwrap() because RafsXAttrs.add()/adds() has validated the prefix. + let (_, prefix_len) = Self::match_prefix(key).expect("xattr is not valid"); + + size += size_of::(); + size += key.byte_size() - prefix_len + value.len(); + size = round_up(size as u64, size_of::() as u64) as usize; + } + size + } + } + + /// Write Xattr to rafsv6 ondisk inode. + pub fn store_v6(&self, w: &mut dyn RafsIoWrite) -> Result { + let header = RafsV6XattrIbodyHeader::new(); + w.write_all(header.as_ref())?; + + if !self.pairs.is_empty() { + for (key, value) in self.pairs.iter() { + let (index, prefix_len) = Self::match_prefix(key) + .map_err(|_| einval!(format!("invalid xattr key {:?}", key)))?; + if key.len() < prefix_len { + return Err(einval!(format!("invalid xattr key {:?}", key))); + } + if value.len() > u16::MAX as usize { + return Err(einval!("xattr value size is too big")); + } + + let mut entry = RafsV6XattrEntry::new(); + entry.set_name_len((key.byte_size() - prefix_len) as u8); + entry.set_name_index(index); + entry.set_value_size(value.len() as u16); + + w.write_all(entry.as_ref())?; + w.write_all(&key.as_bytes()[prefix_len..])?; + w.write_all(value.as_ref())?; + + let size = + size_of::() + key.byte_size() - prefix_len + value.len(); + let padding = + round_up(size as u64, size_of::() as u64) as usize - size; + w.write_padding(padding)?; + } + } + + Ok(0) + } + + fn match_prefix(key: &OsStr) -> Result<(u8, usize)> { + let key_str = key.to_string_lossy(); + let pos = RAFSV6_XATTR_TYPES + .iter() + .position(|x| key_str.starts_with(x.prefix)) + .ok_or_else(|| einval!(format!("xattr prefix {:?} is not valid", key)))?; + Ok(( + RAFSV6_XATTR_TYPES[pos].index, + RAFSV6_XATTR_TYPES[pos].prefix_len, + )) + } +} + +#[derive(Clone, Default, Debug)] +pub struct RafsV6PrefetchTable { + /// List of inode numbers for prefetch. + /// Note: It's not inode index of inodes table being stored here. + pub inodes: Vec, +} + +impl RafsV6PrefetchTable { + /// Create a new instance of `RafsV6PrefetchTable`. + pub fn new() -> RafsV6PrefetchTable { + RafsV6PrefetchTable { inodes: vec![] } + } + + /// Get content size of the inode prefetch table. + pub fn size(&self) -> usize { + self.len() * size_of::() + } + + /// Get number of entries in the prefetch table. + pub fn len(&self) -> usize { + self.inodes.len() + } + + /// Check whether the inode prefetch table is empty. + pub fn is_empty(&self) -> bool { + self.inodes.is_empty() + } + + /// Add an inode into the inode prefetch table. + pub fn add_entry(&mut self, ino: u32) { + self.inodes.push(ino); + } + + /// Store the inode prefetch table to a writer. + pub fn store(&mut self, w: &mut dyn RafsIoWrite) -> Result { + let (_, data, _) = unsafe { self.inodes.align_to::() }; + w.write_all(data.as_ref())?; + + // OK. Let's see if we have to align... :-( + // let cur_len = self.inodes.len() * size_of::(); + + Ok(data.len()) + } + + /// Load a inode prefetch table from a reader. + /// + /// Note: Generally, prefetch happens after loading bootstrap, so with methods operating + /// files with changing their offset won't bring errors. But we still use `pread` now so as + /// to make this method more stable and robust. Even dup(2) can't give us a separated file struct. + pub fn load_prefetch_table_from( + &mut self, + r: &mut RafsIoReader, + offset: u64, + entries: usize, + ) -> Result { + self.inodes = vec![0u32; entries]; + + let (_, data, _) = unsafe { self.inodes.align_to_mut::() }; + r.seek_to_offset(offset)?; + r.read_exact(data)?; + + Ok(data.len()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::metadata::RafsVersion; + use crate::{BufWriter, RafsIoRead}; + use std::fs::OpenOptions; + use std::io::Write; + use vmm_sys_util::tempfile::TempFile; + + #[test] + fn test_super_block_load_store() { + let mut sb = RafsV6SuperBlock::new(); + let temp = TempFile::new().unwrap(); + let w = OpenOptions::new() + .read(true) + .write(true) + .open(temp.as_path()) + .unwrap(); + let r = OpenOptions::new() + .read(true) + .write(false) + .open(temp.as_path()) + .unwrap(); + let mut writer = BufWriter::new(w); + let mut reader: Box = Box::new(r); + + sb.s_blocks = 0x1000; + sb.s_extra_devices = 5; + sb.s_inos = 0x200; + sb.store(&mut writer).unwrap(); + writer.flush().unwrap(); + + let mut sb2 = RafsV6SuperBlock::new(); + sb2.load(&mut reader).unwrap(); + assert_eq!(sb2.s_magic, EROFS_SUPER_MAGIC_V1.to_le()); + assert_eq!(sb2.s_blocks, 0x1000u32.to_le()); + assert_eq!(sb2.s_extra_devices, 5u16.to_le()); + assert_eq!(sb2.s_inos, 0x200u64.to_le()); + assert_eq!(sb2.s_feature_compat, EROFS_FEATURE_COMPAT_RAFS_V6.to_le()); + assert_eq!( + sb2.s_feature_incompat, + (EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | EROFS_FEATURE_INCOMPAT_DEVICE_TABLE).to_le() + ); + } + + #[test] + fn test_rafs_v6_inode_extended() { + let temp = TempFile::new().unwrap(); + let w = OpenOptions::new() + .read(true) + .write(true) + .open(temp.as_path()) + .unwrap(); + let r = OpenOptions::new() + .read(true) + .write(false) + .open(temp.as_path()) + .unwrap(); + let mut writer = BufWriter::new(w); + let mut reader: Box = Box::new(r); + + let mut inode = RafsV6InodeExtended::new(); + assert_eq!( + inode.i_format, + u16::to_le(EROFS_INODE_LAYOUT_EXTENDED | (EROFS_INODE_FLAT_PLAIN << 1)) + ); + inode.set_data_layout(EROFS_INODE_FLAT_INLINE); + assert_eq!( + inode.i_format, + u16::to_le(EROFS_INODE_LAYOUT_EXTENDED | (EROFS_INODE_FLAT_INLINE << 1)) + ); + inode.set_inline_plain_layout(); + assert_eq!( + inode.i_format, + u16::to_le(EROFS_INODE_LAYOUT_EXTENDED | (EROFS_INODE_FLAT_PLAIN << 1)) + ); + inode.set_inline_inline_layout(); + assert_eq!( + inode.i_format, + u16::to_le(EROFS_INODE_LAYOUT_EXTENDED | (EROFS_INODE_FLAT_INLINE << 1)) + ); + inode.set_chunk_based_layout(); + assert_eq!( + inode.i_format, + u16::to_le(EROFS_INODE_LAYOUT_EXTENDED | (EROFS_INODE_CHUNK_BASED << 1)) + ); + inode.set_uidgid(1, 2); + inode.set_mtime(3, 4); + inode.store(&mut writer).unwrap(); + writer.flush().unwrap(); + + let mut inode2 = RafsV6InodeExtended::new(); + inode2.load(&mut reader).unwrap(); + assert_eq!(inode2.i_uid, 1u32.to_le()); + assert_eq!(inode2.i_gid, 2u32.to_le()); + assert_eq!(inode2.i_mtime, 3u64.to_le()); + assert_eq!(inode2.i_mtime_nsec, 4u32.to_le()); + assert_eq!( + inode2.i_format, + u16::to_le(EROFS_INODE_LAYOUT_EXTENDED | (EROFS_INODE_CHUNK_BASED << 1)) + ); + } + + #[test] + fn test_rafs_v6_chunk_header() { + let chunk_size: u32 = 1024 * 1024; + let header = RafsV6InodeChunkHeader::new(chunk_size as u64, EROFS_BLOCK_SIZE_4096); + let target = EROFS_CHUNK_FORMAT_INDEXES_FLAG | (20 - 12) as u16; + assert_eq!(u16::from_le(header.format), target); + } + + #[test] + fn test_rafs_v6_chunk_addr() { + let temp = TempFile::new().unwrap(); + let w = OpenOptions::new() + .read(true) + .write(true) + .open(temp.as_path()) + .unwrap(); + let r = OpenOptions::new() + .read(true) + .write(false) + .open(temp.as_path()) + .unwrap(); + let mut writer = BufWriter::new(w); + let mut reader: Box = Box::new(r); + + let mut chunk = RafsV6InodeChunkAddr::new(); + chunk.set_blob_index(3); + chunk.set_blob_ci_index(0x123456); + chunk.set_block_addr(0xa5a53412); + chunk.store(&mut writer).unwrap(); + writer.flush().unwrap(); + let mut chunk2 = RafsV6InodeChunkAddr::new(); + chunk2.load(&mut reader).unwrap(); + assert_eq!(chunk2.blob_index().unwrap(), 3); + assert_eq!(chunk2.blob_ci_index(), 0x123456); + assert_eq!(chunk2.block_addr(), 0xa5a53412); + assert!(chunk2.validate(4)); + assert!(chunk2.validate(3)); + assert!(!chunk2.validate(2)); + } + + #[test] + fn test_rafs_v6_device() { + let temp = TempFile::new().unwrap(); + let w = OpenOptions::new() + .read(true) + .write(true) + .open(temp.as_path()) + .unwrap(); + let r = OpenOptions::new() + .read(true) + .write(false) + .open(temp.as_path()) + .unwrap(); + let mut writer = BufWriter::new(w); + let mut reader: Box = Box::new(r); + + let id = [0xa5u8; 64]; + let mut device = RafsV6Device::new(); + device.set_blocks(0x1234); + device.set_blob_id(&id); + device.store(&mut writer).unwrap(); + writer.flush().unwrap(); + let mut device2 = RafsV6Device::new(); + device2.load(&mut reader).unwrap(); + assert_eq!(device2.blocks(), 0x1234); + assert_eq!(device.blob_id(), &id); + } + + #[test] + fn test_rafs_xattr_count_v6() { + let mut xattrs = RafsXAttrs::new(); + xattrs.add(OsString::from("user.a"), vec![1u8]).unwrap(); + xattrs.add(OsString::from("trusted.b"), vec![2u8]).unwrap(); + + assert_eq!(xattrs.count_v6(), 5); + + let xattrs2 = RafsXAttrs::new(); + assert_eq!(xattrs2.count_v6(), 0); + } + + #[test] + fn test_rafs_xattr_size_v6() { + let mut xattrs = RafsXAttrs::new(); + xattrs.add(OsString::from("user.a"), vec![1u8]).unwrap(); + xattrs.add(OsString::from("trusted.b"), vec![2u8]).unwrap(); + + let size = 12 + 8 + 8; + assert_eq!(xattrs.aligned_size_v6(), size); + + let xattrs2 = RafsXAttrs::new(); + assert_eq!(xattrs2.aligned_size_v6(), 0); + + let mut xattrs2 = RafsXAttrs::new(); + xattrs2.add(OsString::from("user.a"), vec![1u8]).unwrap(); + xattrs2 + .add(OsString::from("unknown.b"), vec![2u8]) + .unwrap_err(); + } + + #[test] + fn test_rafs_xattr_store_v6() { + let temp = TempFile::new().unwrap(); + let w = OpenOptions::new() + .read(true) + .write(true) + .open(temp.as_path()) + .unwrap(); + let r = OpenOptions::new() + .read(true) + .write(false) + .open(temp.as_path()) + .unwrap(); + let mut writer = BufWriter::new(w); + let mut reader: Box = Box::new(r); + + let mut xattrs = RafsXAttrs::new(); + // These xattrs are in "e_name_index" order for easier reading: + xattrs + .add(OsString::from("security.rafs"), vec![2u8, 3u8]) + .unwrap(); + xattrs + .add( + OsString::from("system.posix_acl_access"), + vec![4u8, 5u8, 6u8], + ) + .unwrap(); + xattrs + .add( + OsString::from("system.posix_acl_default"), + vec![7u8, 8u8, 9u8, 10u8], + ) + .unwrap(); + xattrs + .add( + OsString::from("trusted.abc"), + vec![11u8, 12u8, 13u8, 14u8, 15u8], + ) + .unwrap(); + xattrs.add(OsString::from("user.nydus"), vec![1u8]).unwrap(); + xattrs.store_v6(&mut writer).unwrap(); + writer.flush().unwrap(); + + let mut header = RafsV6XattrIbodyHeader::new(); + header.load(&mut reader).unwrap(); + let mut size = size_of::(); + + assert_eq!(header.h_shared_count, 0u8); + + let target1 = RafsV6XattrEntry { + e_name_len: 5u8, // "nydus" + e_name_index: 1u8, // EROFS_XATTR_INDEX_USER + e_value_size: u16::to_le(1u16), + }; + + let target2 = RafsV6XattrEntry { + e_name_len: 0u8, // "" + e_name_index: 2u8, // EROFS_XATTR_INDEX_POSIX_ACL_ACCESS + e_value_size: u16::to_le(3u16), + }; + + let target3 = RafsV6XattrEntry { + e_name_len: 0u8, // "" + e_name_index: 3u8, // EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT + e_value_size: u16::to_le(4u16), + }; + + let target4 = RafsV6XattrEntry { + e_name_len: 3u8, // "abc" + e_name_index: 4u8, // EROFS_XATTR_INDEX_TRUSTED + e_value_size: u16::to_le(5u16), + }; + + let target5 = RafsV6XattrEntry { + e_name_len: 4u8, // "rafs" + e_name_index: 6u8, // EROFS_XATTR_INDEX_SECURITY + e_value_size: u16::to_le(2u16), + }; + + let targets = vec![target1, target2, target3, target4, target5]; + + let mut entries: Vec = Vec::with_capacity(targets.len()); + for _i in 0..targets.len() { + let mut entry = RafsV6XattrEntry::new(); + reader.read_exact(entry.as_mut()).unwrap(); + size += round_up( + (size_of::() + + entry.e_name_len as usize + + entry.e_value_size as usize) as u64, + size_of::() as u64, + ) as usize; + reader.seek_to_offset(size as u64).unwrap(); + entries.push(entry); + } + + for (i, target) in targets.iter().enumerate() { + let j = entries + .iter() + .position(|entry| entry == target) + .unwrap_or_else(|| panic!("Test failed for: target{}", i + 1)); + // Note: swap_remove() is faster than remove() when order doesn't matter: + entries.swap_remove(j); + } + } + + #[test] + fn test_invalid_blob_idx_from_chunk_addr() { + let mut addr = RafsV6InodeChunkAddr::new(); + assert!(addr.blob_index().is_err()); + addr.set_blob_index(8); + assert_eq!(addr.blob_index().unwrap(), 8); + + assert_eq!(addr.blob_ci_index(), 0); + addr.set_blob_ci_index(131); + assert_eq!(addr.blob_ci_index(), 131); + + assert_eq!(addr.block_addr(), 0); + addr.set_block_addr(179); + assert_eq!(addr.block_addr(), 179); + } + + #[test] + fn test_rsfs_v6_super_block() { + let mut blk = RafsV6SuperBlock::new(); + assert!(blk.validate(0).is_err()); + + blk.set_inos(10); + blk.set_blocks(100); + blk.set_root_nid(1000); + assert_eq!(blk.s_inos, 10); + assert_eq!(blk.s_blocks, 100); + assert_eq!(blk.s_root_nid, 1000); + + blk.set_block_bits(EROFS_BLOCK_BITS_9); + blk.set_meta_addr(1024 * 1024); + assert_eq!( + blk.s_meta_blkaddr, + (1024 * 1024) / EROFS_BLOCK_SIZE_512 as u32 + ); + + blk.set_block_bits(EROFS_BLOCK_BITS_12); + blk.set_meta_addr(1024 * 1024); + assert_eq!( + blk.s_meta_blkaddr, + (1024 * 1024) / EROFS_BLOCK_SIZE_4096 as u32 + ); + } + + #[test] + fn test_rafs_v6_super_block_ext() { + let mut ext = RafsV6SuperBlockExt::new(); + ext.set_compressor(compress::Algorithm::GZip); + ext.set_has_xattr(); + ext.set_explicit_uidgid(); + ext.set_inlined_chunk_digest(); + ext.set_tarfs_mode(); + ext.set_digester(digest::Algorithm::Blake3); + ext.set_chunk_table(1024, 1024); + ext.set_cipher(crypt::Algorithm::Aes128Xts); + + assert_ne!(ext.s_flags & RafsSuperFlags::COMPRESSION_GZIP.bits(), 0); + assert_ne!(ext.s_flags & RafsSuperFlags::HAS_XATTR.bits(), 0); + assert_ne!(ext.s_flags & RafsSuperFlags::EXPLICIT_UID_GID.bits(), 0); + assert_ne!(ext.s_flags & RafsSuperFlags::INLINED_CHUNK_DIGEST.bits(), 0); + assert_ne!(ext.s_flags & RafsSuperFlags::TARTFS_MODE.bits(), 0); + assert_ne!(ext.s_flags & RafsSuperFlags::HASH_BLAKE3.bits(), 0); + assert_eq!(ext.chunk_table_size(), 1024); + assert_eq!(ext.chunk_table_offset(), 1024); + assert_ne!( + ext.s_flags & RafsSuperFlags::ENCRYPTION_ASE_128_XTS.bits(), + 0 + ); + } + + #[test] + fn test_rafs_v6_inode_compact() { + let mut cpt = RafsV6InodeCompact::new(); + cpt.set_size(1024); + cpt.set_ino(10); + cpt.set_nlink(2048); + cpt.set_mode(1); + cpt.set_u(8); + cpt.set_uidgid(1, 1); + cpt.set_mtime(1, 1000); + cpt.set_rdev(20); + cpt.set_xattr_inline_count(10); + cpt.set_data_layout(1); + assert_eq!(cpt.format().to_le(), 2); + assert_eq!(cpt.mode(), 1); + assert_eq!(cpt.size(), 1024); + assert_eq!(cpt.union(), 8); + assert_eq!(cpt.ino(), 10); + assert_eq!(cpt.ugid(), (1, 1)); + assert_eq!(cpt.mtime_s_ns(), (0, 0)); + assert_eq!(cpt.nlink(), 2048); + assert_eq!(cpt.rdev(), 0); + assert_eq!(cpt.xattr_inline_count(), 10); + } + + #[test] + fn test_rafs_v6_inode_extended_inode() { + let mut ext = RafsV6InodeExtended::new(); + ext.set_size(1024); + ext.set_ino(1024); + ext.set_nlink(1024); + ext.set_mode(1024); + ext.set_u(1024); + ext.set_rdev(1024); + ext.set_xattr_inline_count(1024); + + assert_eq!(ext.format(), 1); + assert_eq!(ext.mode(), 1024); + assert_eq!(ext.size(), 1024); + assert_eq!(ext.union(), 1024); + assert_eq!(ext.ino(), 1024); + assert_eq!(ext.ugid(), (0, 0)); + assert_eq!(ext.mtime_s_ns(), (0, 0)); + assert_eq!(ext.nlink(), 1024); + assert_eq!(ext.rdev(), 1024); + assert_eq!(ext.xattr_inline_count(), 1024); + } + + #[test] + fn test_v6_inode() { + let i = new_v6_inode( + &InodeWrapper::new(RafsVersion::V6), + EROFS_INODE_FLAT_INLINE, + 1024, + true, + ); + assert_eq!(i.ino(), 0); + assert_eq!(i.size(), 0); + assert_eq!(i.mtime_s_ns(), (0, 0)); + assert_eq!(i.nlink(), 0); + } + + #[test] + fn test_rafs_v6_dirent() { + let mut dir = RafsV6Dirent::new(0, 1024, EROFS_FILE_TYPE::EROFS_FT_BLKDEV as u8); + dir.set_name_offset(2048); + assert_eq!(dir.e_nameoff, 2048); + + assert_eq!( + RafsV6Dirent::file_type(libc::S_IFREG as u32), + EROFS_FILE_TYPE::EROFS_FT_REG_FILE as u8 + ); + assert_eq!( + RafsV6Dirent::file_type(libc::S_IFDIR as u32), + EROFS_FILE_TYPE::EROFS_FT_DIR as u8 + ); + assert_eq!( + RafsV6Dirent::file_type(libc::S_IFCHR as u32), + EROFS_FILE_TYPE::EROFS_FT_CHRDEV as u8 + ); + assert_eq!( + RafsV6Dirent::file_type(libc::S_IFBLK as u32), + EROFS_FILE_TYPE::EROFS_FT_BLKDEV as u8 + ); + assert_eq!( + RafsV6Dirent::file_type(libc::S_IFIFO as u32), + EROFS_FILE_TYPE::EROFS_FT_FIFO as u8 + ); + assert_eq!( + RafsV6Dirent::file_type(libc::S_IFSOCK as u32), + EROFS_FILE_TYPE::EROFS_FT_SOCK as u8 + ); + assert_eq!( + RafsV6Dirent::file_type(libc::S_IFLNK as u32), + EROFS_FILE_TYPE::EROFS_FT_SYMLINK as u8 + ); + } + + #[test] + fn test_rafs_v6_inode_chunk_header() { + let hdr = RafsV6InodeChunkHeader::new(0x1000_0000, EROFS_BLOCK_SIZE_4096); + let val = hdr.to_u32(); + let newhdr = RafsV6InodeChunkHeader::from_u32(val); + assert_eq!(newhdr.format, hdr.format); + assert_eq!(newhdr.reserved, hdr.reserved); + } + #[test] + fn test_align_offset() { + assert_eq!(align_offset(1099, 8), 1104); + assert_eq!(align_offset(1099, 16), 1104); + assert_eq!(align_offset(1099, 32), 1120); + } + #[test] + fn test_calculate_nid() { + assert_eq!(calculate_nid(1024, 512), 16); + assert_eq!(calculate_nid(1024, 768), 8); + assert_eq!(calculate_nid(2048, 768), 40); + } + + #[test] + fn test_rafs_v6_blob() { + let mut blob = RafsV6Blob { + cipher_algo: crypt::Algorithm::Aes256Gcm as u32, + ..RafsV6Blob::default() + }; + assert!(blob.to_blob_info().is_err()); + + blob.blob_id = [0x1u8; BLOB_SHA256_LEN]; + blob.blob_meta_digest = [0xcu8; 32]; + blob.blob_meta_digest[31] = 0xau8; + + blob.cipher_algo = crypt::Algorithm::Aes128Xts as u32; + let info: BlobInfo = blob.to_blob_info().unwrap(); + RafsV6Blob::from_blob_info(&info).unwrap(); + assert!(RafsV6Blob::from_blob_info(&info).is_ok()); + + blob.cipher_algo = crypt::Algorithm::None as u32; + let info: BlobInfo = blob.to_blob_info().unwrap(); + RafsV6Blob::from_blob_info(&info).unwrap(); + assert!(RafsV6Blob::from_blob_info(&info).is_ok()); + } + + #[test] + fn test_rafs_v6_blob_table() { + let mut table = RafsV6BlobTable::new(); + assert_eq!(table.size(), 0); + table.add( + "0".to_string(), + 0, + 0, + 1024, + 10, + 0, + 0, + RafsSuperFlags { bits: 0 }, + [0; 32], + [0; 32], + 0, + 0, + false, + BlobCompressionContextHeader::default(), + Arc::new(crypt::Algorithm::Aes128Xts.new_cipher().unwrap()), + Some(CipherContext::default()), + ); + assert_eq!(table.size(), size_of::()); + assert!(table.get(0).is_ok()); + assert!(table.get(1).is_err()); + } + + fn get_streams() -> (Box, BufWriter) { + let temp = TempFile::new().unwrap(); + let w = OpenOptions::new() + .read(true) + .write(true) + .open(temp.as_path()) + .unwrap(); + let r = OpenOptions::new() + .read(true) + .write(false) + .open(temp.as_path()) + .unwrap(); + let writer: BufWriter = BufWriter::new(w); + let reader: Box = Box::new(r); + (reader, writer) + } + + #[test] + fn test_rafs_v6_blob_table_store() { + let mut table = RafsV6BlobTable::new(); + table.add( + "0".to_string(), + 0, + 0, + 1024, + 10, + 0, + 0, + RafsSuperFlags { bits: 0 }, + [0; 32], + [0; 32], + 0, + 0, + false, + BlobCompressionContextHeader::default(), + Arc::new(crypt::Algorithm::Aes128Xts.new_cipher().unwrap()), + Some(CipherContext::default()), + ); + + let (_reader, mut writer) = get_streams(); + table.store(&mut writer).unwrap(); + writer.flush().unwrap(); + } + + #[test] + fn test_rafs_v6_xattr_entry() { + let ent = RafsV6XattrEntry::new(); + assert_eq!(ent.name_index(), 0); + assert_eq!(ent.name_len(), 0); + assert_eq!(ent.value_size(), 0); + } + + #[test] + fn test_rafs_prefetch_table() { + let mut table = RafsV6PrefetchTable::new(); + assert_eq!(table.size(), 0); + assert_eq!(table.len(), 0); + assert!(table.is_empty()); + table.add_entry(0); + table.add_entry(1); + assert_eq!(table.len(), 2); + assert!(!table.is_empty()); + + let (mut reader, mut writer) = get_streams(); + table.store(&mut writer).unwrap(); + writer.flush().unwrap(); + table.inodes.clear(); + assert_eq!(table.len(), 0); + assert!(table.load_prefetch_table_from(&mut reader, 0, 2).is_ok()); + assert_eq!(table.len(), 2); + } +} diff --git a/rafs/src/metadata/md_v5.rs b/rafs/src/metadata/md_v5.rs index 7b332454a57..64c2c38154b 100644 --- a/rafs/src/metadata/md_v5.rs +++ b/rafs/src/metadata/md_v5.rs @@ -1,285 +1,285 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2020 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use nydus_storage::device::BlobChunkFlags; -use nydus_storage::RAFS_BATCH_SIZE_TO_GAP_SHIFT; - -use super::cached_v5::CachedSuperBlockV5; -use super::direct_v5::DirectSuperBlockV5; -use super::layout::v5::{RafsV5PrefetchTable, RafsV5SuperBlock}; -use super::*; - -impl RafsSuper { - pub(crate) fn try_load_v5(&mut self, r: &mut RafsIoReader) -> Result { - let end = r.seek_to_end(0)?; - r.seek_to_offset(0)?; - let mut sb = RafsV5SuperBlock::new(); - r.read_exact(sb.as_mut())?; - if !sb.is_rafs_v5() { - return Ok(false); - } - sb.validate(end)?; - - self.meta.magic = sb.magic(); - self.meta.version = sb.version(); - self.meta.sb_size = sb.sb_size(); - self.meta.chunk_size = sb.block_size(); - self.meta.flags = RafsSuperFlags::from_bits(sb.flags()) - .ok_or_else(|| einval!(format!("invalid super flags 0x{:x}", sb.flags())))?; - info!("RAFS v5 super block features: {}", self.meta.flags); - - self.meta.inodes_count = sb.inodes_count(); - self.meta.inode_table_entries = sb.inode_table_entries(); - self.meta.inode_table_offset = sb.inode_table_offset(); - self.meta.blob_table_offset = sb.blob_table_offset(); - self.meta.blob_table_size = sb.blob_table_size(); - self.meta.extended_blob_table_offset = sb.extended_blob_table_offset(); - self.meta.extended_blob_table_entries = sb.extended_blob_table_entries(); - self.meta.prefetch_table_entries = sb.prefetch_table_entries(); - self.meta.prefetch_table_offset = sb.prefetch_table_offset(); - - match self.mode { - RafsMode::Direct => { - let mut inodes = DirectSuperBlockV5::new(&self.meta, self.validate_digest); - inodes.load(r)?; - self.superblock = Arc::new(inodes); - } - RafsMode::Cached => { - let mut inodes = CachedSuperBlockV5::new(self.meta, self.validate_digest); - inodes.load(r)?; - self.superblock = Arc::new(inodes); - } - } - - Ok(true) - } - - pub(crate) fn prefetch_data_v5( - &self, - device: &BlobDevice, - r: &mut RafsIoReader, - root_ino: Inode, - fetcher: F, - ) -> RafsResult - where - F: Fn(&mut BlobIoVec, bool), - { - let hint_entries = self.meta.prefetch_table_entries as usize; - if hint_entries == 0 { - return Ok(false); - } - - // Try to prefetch according to the list of files specified by the - // builder's `--prefetch-policy fs` option. - let mut prefetch_table = RafsV5PrefetchTable::new(); - prefetch_table - .load_prefetch_table_from(r, self.meta.prefetch_table_offset, hint_entries) - .map_err(|e| { - RafsError::Prefetch(format!( - "Failed in loading hint prefetch table at offset {}. {:?}", - self.meta.prefetch_table_offset, e - )) - })?; - - let mut hardlinks: HashSet = HashSet::new(); - let mut state = BlobIoMerge::default(); - let mut found_root_inode = false; - for ino in prefetch_table.inodes { - // Inode number 0 is invalid, it was added because prefetch table has to be aligned. - if ino == 0 { - break; - } - if ino as Inode == root_ino { - found_root_inode = true; - } - debug!("hint prefetch inode {}", ino); - self.prefetch_data(device, ino as u64, &mut state, &mut hardlinks, &fetcher) - .map_err(|e| RafsError::Prefetch(e.to_string()))?; - } - for (_id, mut desc) in state.drain() { - fetcher(&mut desc, true); - } - - Ok(found_root_inode) - } - - pub(crate) fn skip_v5_superblock(&self, r: &mut RafsIoReader) -> Result<()> { - let _ = RafsV5SuperBlock::read(r)?; - - Ok(()) - } - - fn merge_chunks_io(orig: &mut BlobIoVec, vec: BlobIoVec, max_gap: u64) { - assert!(!orig.is_empty()); - if !vec.is_empty() { - let last = orig.blob_io_desc(orig.len() - 1).unwrap().clone(); - let head = vec.blob_io_desc(0).unwrap(); - if last.is_continuous(head, max_gap) { - // Safe to unwrap since d is not empty. - orig.append(vec); - } - } - } - - // TODO: Add a UT for me. - // `window_base` is calculated by caller, which MUST be the chunk that does - // not overlap user IO's chunk. - // V5 rafs tries to amplify user IO by expanding more chunks to user IO and - // expect that those chunks are likely to be continuous with user IO's chunks. - pub(crate) fn amplify_user_io( - &self, - device: &BlobDevice, - max_uncomp_size: u32, - descs: &mut [BlobIoVec], - inode: &Arc, - window_base: u64, - mut window_size: u64, - ) -> Result<()> { - let inode_size = inode.size(); - let last_desc = match descs.last_mut() { - Some(d) if !d.is_empty() => d, - _ => return Ok(()), - }; - - // Read left content of current file. - if window_base < inode_size { - let size = std::cmp::min(inode_size - window_base, window_size); - let amplified_io_vec = - inode.alloc_bio_vecs(device, window_base, size as usize, false)?; - for vec in amplified_io_vec { - if last_desc.has_same_blob(&vec) { - window_size = if window_size > vec.size() as u64 { - window_size - vec.size() as u64 - } else { - 0 - }; - Self::merge_chunks_io( - last_desc, - vec, - (max_uncomp_size as u64) >> RAFS_BATCH_SIZE_TO_GAP_SHIFT, - ); - } - } - } - - // Read more small files. - let mut max_tries = 64; - let mut next_ino = inode.ino(); - while window_size > 0 && max_tries > 0 { - next_ino += 1; - if let Ok(ni) = self.get_inode(next_ino, false) { - if ni.is_reg() { - let next_size = ni.size(); - let next_size = if next_size == 0 { - continue; - } else if next_size < window_size { - next_size - } else if window_size >= self.meta.chunk_size as u64 { - window_size / self.meta.chunk_size as u64 * self.meta.chunk_size as u64 - } else { - break; - }; - - let amplified_io_vec = - ni.alloc_bio_vecs(device, 0, next_size as usize, false)?; - for vec in amplified_io_vec { - max_tries -= 1; - if last_desc.has_same_blob(&vec) { - window_size = if window_size > vec.size() as u64 { - window_size - vec.size() as u64 - } else { - 0 - }; - Self::merge_chunks_io( - last_desc, - vec, - (max_uncomp_size as u64) >> RAFS_BATCH_SIZE_TO_GAP_SHIFT, - ); - } - } - } - } else { - break; - } - } - - Ok(()) - } -} - -/// Represents backend storage chunked IO address for V5 since V5 format has to -/// load below chunk address from rafs layer and pass it to storage layer. -pub struct V5IoChunk { - // block hash - pub block_id: Arc, - // blob containing the block - pub blob_index: u32, - // chunk index in blob - pub index: u32, - // position of the block within the file - // offset of the block within the blob - pub compressed_offset: u64, - pub uncompressed_offset: u64, - // size of the block, compressed - pub compressed_size: u32, - pub uncompressed_size: u32, - pub flags: BlobChunkFlags, -} - -impl BlobChunkInfo for V5IoChunk { - fn chunk_id(&self) -> &RafsDigest { - &self.block_id - } - - fn id(&self) -> u32 { - self.index - } - - fn is_batch(&self) -> bool { - false - } - - fn is_compressed(&self) -> bool { - self.flags.contains(BlobChunkFlags::COMPRESSED) - } - - fn is_encrypted(&self) -> bool { - false - } - - fn as_any(&self) -> &dyn Any { - self - } - - impl_getter!(blob_index, blob_index, u32); - impl_getter!(compressed_offset, compressed_offset, u64); - impl_getter!(compressed_size, compressed_size, u32); - impl_getter!(uncompressed_offset, uncompressed_offset, u64); - impl_getter!(uncompressed_size, uncompressed_size, u32); -} - -#[cfg(test)] -mod tests { - use super::*; - // TODO: add unit test cases for RafsSuper::{try_load_v5, amplify_io} - #[test] - fn test_v5_io_chunk() { - let info = V5IoChunk { - block_id: RafsDigest::default().into(), - blob_index: 2, - index: 3, - compressed_offset: 1024, - uncompressed_offset: 2048, - compressed_size: 10, - uncompressed_size: 20, - flags: BlobChunkFlags::BATCH, - }; - - assert_eq!(info.chunk_id(), &RafsDigest::default()); - assert_eq!(info.id(), 3); - assert!(!info.is_compressed()); - assert!(!info.is_encrypted()); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2020 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use nydus_storage::device::BlobChunkFlags; +use nydus_storage::RAFS_BATCH_SIZE_TO_GAP_SHIFT; + +use super::cached_v5::CachedSuperBlockV5; +use super::direct_v5::DirectSuperBlockV5; +use super::layout::v5::{RafsV5PrefetchTable, RafsV5SuperBlock}; +use super::*; + +impl RafsSuper { + pub(crate) fn try_load_v5(&mut self, r: &mut RafsIoReader) -> Result { + let end = r.seek_to_end(0)?; + r.seek_to_offset(0)?; + let mut sb = RafsV5SuperBlock::new(); + r.read_exact(sb.as_mut())?; + if !sb.is_rafs_v5() { + return Ok(false); + } + sb.validate(end)?; + + self.meta.magic = sb.magic(); + self.meta.version = sb.version(); + self.meta.sb_size = sb.sb_size(); + self.meta.chunk_size = sb.block_size(); + self.meta.flags = RafsSuperFlags::from_bits(sb.flags()) + .ok_or_else(|| einval!(format!("invalid super flags 0x{:x}", sb.flags())))?; + info!("RAFS v5 super block features: {}", self.meta.flags); + + self.meta.inodes_count = sb.inodes_count(); + self.meta.inode_table_entries = sb.inode_table_entries(); + self.meta.inode_table_offset = sb.inode_table_offset(); + self.meta.blob_table_offset = sb.blob_table_offset(); + self.meta.blob_table_size = sb.blob_table_size(); + self.meta.extended_blob_table_offset = sb.extended_blob_table_offset(); + self.meta.extended_blob_table_entries = sb.extended_blob_table_entries(); + self.meta.prefetch_table_entries = sb.prefetch_table_entries(); + self.meta.prefetch_table_offset = sb.prefetch_table_offset(); + + match self.mode { + RafsMode::Direct => { + let mut inodes = DirectSuperBlockV5::new(&self.meta, self.validate_digest); + inodes.load(r)?; + self.superblock = Arc::new(inodes); + } + RafsMode::Cached => { + let mut inodes = CachedSuperBlockV5::new(self.meta, self.validate_digest); + inodes.load(r)?; + self.superblock = Arc::new(inodes); + } + } + + Ok(true) + } + + pub(crate) fn prefetch_data_v5( + &self, + device: &BlobDevice, + r: &mut RafsIoReader, + root_ino: Inode, + fetcher: F, + ) -> RafsResult + where + F: Fn(&mut BlobIoVec, bool), + { + let hint_entries = self.meta.prefetch_table_entries as usize; + if hint_entries == 0 { + return Ok(false); + } + + // Try to prefetch according to the list of files specified by the + // builder's `--prefetch-policy fs` option. + let mut prefetch_table = RafsV5PrefetchTable::new(); + prefetch_table + .load_prefetch_table_from(r, self.meta.prefetch_table_offset, hint_entries) + .map_err(|e| { + RafsError::Prefetch(format!( + "Failed in loading hint prefetch table at offset {}. {:?}", + self.meta.prefetch_table_offset, e + )) + })?; + + let mut hardlinks: HashSet = HashSet::new(); + let mut state = BlobIoMerge::default(); + let mut found_root_inode = false; + for ino in prefetch_table.inodes { + // Inode number 0 is invalid, it was added because prefetch table has to be aligned. + if ino == 0 { + break; + } + if ino as Inode == root_ino { + found_root_inode = true; + } + debug!("hint prefetch inode {}", ino); + self.prefetch_data(device, ino as u64, &mut state, &mut hardlinks, &fetcher) + .map_err(|e| RafsError::Prefetch(e.to_string()))?; + } + for (_id, mut desc) in state.drain() { + fetcher(&mut desc, true); + } + + Ok(found_root_inode) + } + + pub(crate) fn skip_v5_superblock(&self, r: &mut RafsIoReader) -> Result<()> { + let _ = RafsV5SuperBlock::read(r)?; + + Ok(()) + } + + fn merge_chunks_io(orig: &mut BlobIoVec, vec: BlobIoVec, max_gap: u64) { + assert!(!orig.is_empty()); + if !vec.is_empty() { + let last = orig.blob_io_desc(orig.len() - 1).unwrap().clone(); + let head = vec.blob_io_desc(0).unwrap(); + if last.is_continuous(head, max_gap) { + // Safe to unwrap since d is not empty. + orig.append(vec); + } + } + } + + // TODO: Add a UT for me. + // `window_base` is calculated by caller, which MUST be the chunk that does + // not overlap user IO's chunk. + // V5 rafs tries to amplify user IO by expanding more chunks to user IO and + // expect that those chunks are likely to be continuous with user IO's chunks. + pub(crate) fn amplify_user_io( + &self, + device: &BlobDevice, + max_uncomp_size: u32, + descs: &mut [BlobIoVec], + inode: &Arc, + window_base: u64, + mut window_size: u64, + ) -> Result<()> { + let inode_size = inode.size(); + let last_desc = match descs.last_mut() { + Some(d) if !d.is_empty() => d, + _ => return Ok(()), + }; + + // Read left content of current file. + if window_base < inode_size { + let size = std::cmp::min(inode_size - window_base, window_size); + let amplified_io_vec = + inode.alloc_bio_vecs(device, window_base, size as usize, false)?; + for vec in amplified_io_vec { + if last_desc.has_same_blob(&vec) { + window_size = if window_size > vec.size() as u64 { + window_size - vec.size() as u64 + } else { + 0 + }; + Self::merge_chunks_io( + last_desc, + vec, + (max_uncomp_size as u64) >> RAFS_BATCH_SIZE_TO_GAP_SHIFT, + ); + } + } + } + + // Read more small files. + let mut max_tries = 64; + let mut next_ino = inode.ino(); + while window_size > 0 && max_tries > 0 { + next_ino += 1; + if let Ok(ni) = self.get_inode(next_ino, false) { + if ni.is_reg() { + let next_size = ni.size(); + let next_size = if next_size == 0 { + continue; + } else if next_size < window_size { + next_size + } else if window_size >= self.meta.chunk_size as u64 { + window_size / self.meta.chunk_size as u64 * self.meta.chunk_size as u64 + } else { + break; + }; + + let amplified_io_vec = + ni.alloc_bio_vecs(device, 0, next_size as usize, false)?; + for vec in amplified_io_vec { + max_tries -= 1; + if last_desc.has_same_blob(&vec) { + window_size = if window_size > vec.size() as u64 { + window_size - vec.size() as u64 + } else { + 0 + }; + Self::merge_chunks_io( + last_desc, + vec, + (max_uncomp_size as u64) >> RAFS_BATCH_SIZE_TO_GAP_SHIFT, + ); + } + } + } + } else { + break; + } + } + + Ok(()) + } +} + +/// Represents backend storage chunked IO address for V5 since V5 format has to +/// load below chunk address from rafs layer and pass it to storage layer. +pub struct V5IoChunk { + // block hash + pub block_id: Arc, + // blob containing the block + pub blob_index: u32, + // chunk index in blob + pub index: u32, + // position of the block within the file + // offset of the block within the blob + pub compressed_offset: u64, + pub uncompressed_offset: u64, + // size of the block, compressed + pub compressed_size: u32, + pub uncompressed_size: u32, + pub flags: BlobChunkFlags, +} + +impl BlobChunkInfo for V5IoChunk { + fn chunk_id(&self) -> &RafsDigest { + &self.block_id + } + + fn id(&self) -> u32 { + self.index + } + + fn is_batch(&self) -> bool { + false + } + + fn is_compressed(&self) -> bool { + self.flags.contains(BlobChunkFlags::COMPRESSED) + } + + fn is_encrypted(&self) -> bool { + false + } + + fn as_any(&self) -> &dyn Any { + self + } + + impl_getter!(blob_index, blob_index, u32); + impl_getter!(compressed_offset, compressed_offset, u64); + impl_getter!(compressed_size, compressed_size, u32); + impl_getter!(uncompressed_offset, uncompressed_offset, u64); + impl_getter!(uncompressed_size, uncompressed_size, u32); +} + +#[cfg(test)] +mod tests { + use super::*; + // TODO: add unit test cases for RafsSuper::{try_load_v5, amplify_io} + #[test] + fn test_v5_io_chunk() { + let info = V5IoChunk { + block_id: RafsDigest::default().into(), + blob_index: 2, + index: 3, + compressed_offset: 1024, + uncompressed_offset: 2048, + compressed_size: 10, + uncompressed_size: 20, + flags: BlobChunkFlags::BATCH, + }; + + assert_eq!(info.chunk_id(), &RafsDigest::default()); + assert_eq!(info.id(), 3); + assert!(!info.is_compressed()); + assert!(!info.is_encrypted()); + } +} diff --git a/rafs/src/metadata/md_v6.rs b/rafs/src/metadata/md_v6.rs index a0b11d0dd00..b2058cf1851 100644 --- a/rafs/src/metadata/md_v6.rs +++ b/rafs/src/metadata/md_v6.rs @@ -1,255 +1,255 @@ -// Copyright (C) 2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::collections::HashSet; -use std::io::Result; -use std::mem::size_of; -use std::sync::Arc; - -use super::direct_v6::DirectSuperBlockV6; -use super::layout::v6::{RafsV6PrefetchTable, RafsV6SuperBlock, RafsV6SuperBlockExt}; -use super::layout::RAFS_SUPER_VERSION_V6; -use super::*; -use super::{RafsMode, RafsSuper, RafsSuperBlock, RafsSuperFlags}; - -use crate::RafsIoReader; -use crate::{RafsError, RafsResult}; - -impl RafsSuper { - pub(crate) fn try_load_v6(&mut self, r: &mut RafsIoReader) -> Result { - let end = r.seek_to_end(0)?; - r.seek_to_offset(0)?; - - let mut sb = RafsV6SuperBlock::new(); - if sb.load(r).is_err() { - return Ok(false); - } - if !sb.is_rafs_v6() { - return Ok(false); - } - sb.validate(end)?; - self.meta.version = RAFS_SUPER_VERSION_V6; - self.meta.magic = sb.magic(); - self.meta.meta_blkaddr = sb.meta_addr(); - self.meta.root_nid = sb.root_nid(); - self.meta.blob_device_table_count = sb.extra_devices() as u32; - self.meta.blob_device_table_offset = sb.device_table_offset(); - - let mut ext_sb = RafsV6SuperBlockExt::new(); - ext_sb.load(r)?; - ext_sb.validate(end, &self.meta)?; - self.meta.chunk_size = ext_sb.chunk_size(); - self.meta.blob_table_offset = ext_sb.blob_table_offset(); - self.meta.blob_table_size = ext_sb.blob_table_size(); - self.meta.chunk_table_offset = ext_sb.chunk_table_offset(); - self.meta.chunk_table_size = ext_sb.chunk_table_size(); - self.meta.inodes_count = sb.inodes_count(); - - self.meta.flags = RafsSuperFlags::from_bits(ext_sb.flags()) - .ok_or_else(|| einval!(format!("invalid RAFS flags 0x{:x}", ext_sb.flags())))?; - info!("RAFS features: {}", self.meta.flags); - - self.meta.prefetch_table_entries = ext_sb.prefetch_table_size() / size_of::() as u32; - self.meta.prefetch_table_offset = ext_sb.prefetch_table_offset(); - trace!( - "prefetch table offset {} entries {} ", - self.meta.prefetch_table_offset, - self.meta.prefetch_table_entries - ); - - match self.mode { - RafsMode::Direct => { - let mut sb_v6 = DirectSuperBlockV6::new(&self.meta); - sb_v6.load(r)?; - self.superblock = Arc::new(sb_v6); - Ok(true) - } - RafsMode::Cached => Err(enosys!("Rafs v6 does not support cached mode")), - } - } - - pub(crate) fn is_inlay_prefetch_all(&self, r: &mut RafsIoReader) -> RafsResult { - let hint_entries = self.meta.prefetch_table_entries as usize; - if hint_entries != 1 { - return Ok(false); - } - let unique = if self.meta.is_v6() { - let mut prefetch_table = RafsV6PrefetchTable::new(); - prefetch_table - .load_prefetch_table_from(r, self.meta.prefetch_table_offset, hint_entries) - .map_err(|e| { - RafsError::Prefetch(format!( - "Failed in loading hint prefetch table at offset {}. {:?}", - self.meta.prefetch_table_offset, e - )) - })?; - prefetch_table.inodes[0] as u64 - } else { - let mut prefetch_table = RafsV5PrefetchTable::new(); - prefetch_table - .load_prefetch_table_from(r, self.meta.prefetch_table_offset, hint_entries) - .map_err(|e| { - RafsError::Prefetch(format!( - "Failed in loading hint prefetch table at offset {}. {:?}", - self.meta.prefetch_table_offset, e - )) - })?; - prefetch_table.inodes[0] as u64 - }; - - Ok(unique == self.superblock.root_ino()) - } - - pub(crate) fn prefetch_data_v6( - &self, - device: &BlobDevice, - r: &mut RafsIoReader, - root_ino: Inode, - fetcher: F, - ) -> RafsResult - where - F: Fn(&mut BlobIoVec, bool), - { - let hint_entries = self.meta.prefetch_table_entries as usize; - if hint_entries == 0 { - return Ok(false); - } - - // Try to prefetch according to the list of files specified by the - // builder's `--prefetch-policy fs` option. - let mut prefetch_table = RafsV6PrefetchTable::new(); - prefetch_table - .load_prefetch_table_from(r, self.meta.prefetch_table_offset, hint_entries) - .map_err(|e| { - RafsError::Prefetch(format!( - "Failed in loading hint prefetch table at offset {}. {:?}", - self.meta.prefetch_table_offset, e - )) - })?; - trace!("prefetch table contents {:?}", prefetch_table); - - let mut hardlinks: HashSet = HashSet::new(); - let mut state = BlobIoMerge::default(); - let mut found_root_inode = false; - for ino in prefetch_table.inodes { - // Inode number 0 is invalid, it was added because prefetch table has to be aligned. - if ino == 0 { - break; - } - if ino as Inode == root_ino { - found_root_inode = true; - } - trace!("hint prefetch inode {}", ino); - self.prefetch_data(device, ino as u64, &mut state, &mut hardlinks, &fetcher) - .map_err(|e| RafsError::Prefetch(e.to_string()))?; - } - // The left chunks whose size is smaller than 4MB will be fetched here. - for (_id, mut desc) in state.drain() { - fetcher(&mut desc, true); - } - - Ok(found_root_inode) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::metadata::RafsStore; - use crate::BufWriter; - use std::fs::OpenOptions; - use std::io::Write; - use vmm_sys_util::tempfile::TempFile; - - #[test] - fn test_v6_load_too_small_superblock() { - let t_file = TempFile::new().unwrap(); - - let file = OpenOptions::new() - .read(true) - .write(false) - .open(t_file.as_path()) - .unwrap(); - let mut reader = Box::new(file) as RafsIoReader; - let mut rs = RafsSuper { - mode: RafsMode::Direct, - validate_digest: true, - ..Default::default() - }; - - assert!(!rs.try_load_v6(&mut reader).unwrap()); - } - - #[test] - fn test_v6_load_invalid_magic() { - let t_file = TempFile::new().unwrap(); - - let mut file = OpenOptions::new() - .read(true) - .write(true) - .open(t_file.as_path()) - .unwrap(); - file.write_all(&[0u8; 4096]).unwrap(); - let mut reader = Box::new(file) as RafsIoReader; - let mut rs = RafsSuper { - mode: RafsMode::Direct, - validate_digest: true, - ..Default::default() - }; - - assert!(!rs.try_load_v6(&mut reader).unwrap()); - } - - #[test] - fn test_v6_load_invalid_superblock() { - let t_file = TempFile::new().unwrap(); - - let file = OpenOptions::new() - .read(true) - .write(true) - .open(t_file.as_path()) - .unwrap(); - let sb = RafsV6SuperBlock::new(); - let mut writer = BufWriter::new(file); - sb.store(&mut writer).unwrap(); - writer.flush().unwrap(); - - let file = OpenOptions::new() - .read(true) - .write(false) - .open(t_file.as_path()) - .unwrap(); - let mut reader = Box::new(file) as RafsIoReader; - let mut rs = RafsSuper { - mode: RafsMode::Direct, - validate_digest: true, - ..Default::default() - }; - - assert!(rs.try_load_v6(&mut reader).is_err()); - } - - /* - #[test] - fn test_try_load_v6() { - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let mut source_path = PathBuf::from(root_dir); - source_path.push("../tests/texture/bootstrap/rafs-v6.boot"); - - let file = OpenOptions::new() - .read(true) - .write(false) - .open(source_path) - .unwrap(); - let mut reader = Box::new(file) as RafsIoReader; - let mut rs = RafsSuper { - mode: RafsMode::Direct, - validate_digest: true, - ..Default::default() - }; - - rs.try_load_v6(&mut reader).unwrap(); - } - */ -} +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::collections::HashSet; +use std::io::Result; +use std::mem::size_of; +use std::sync::Arc; + +use super::direct_v6::DirectSuperBlockV6; +use super::layout::v6::{RafsV6PrefetchTable, RafsV6SuperBlock, RafsV6SuperBlockExt}; +use super::layout::RAFS_SUPER_VERSION_V6; +use super::*; +use super::{RafsMode, RafsSuper, RafsSuperBlock, RafsSuperFlags}; + +use crate::RafsIoReader; +use crate::{RafsError, RafsResult}; + +impl RafsSuper { + pub(crate) fn try_load_v6(&mut self, r: &mut RafsIoReader) -> Result { + let end = r.seek_to_end(0)?; + r.seek_to_offset(0)?; + + let mut sb = RafsV6SuperBlock::new(); + if sb.load(r).is_err() { + return Ok(false); + } + if !sb.is_rafs_v6() { + return Ok(false); + } + sb.validate(end)?; + self.meta.version = RAFS_SUPER_VERSION_V6; + self.meta.magic = sb.magic(); + self.meta.meta_blkaddr = sb.meta_addr(); + self.meta.root_nid = sb.root_nid(); + self.meta.blob_device_table_count = sb.extra_devices() as u32; + self.meta.blob_device_table_offset = sb.device_table_offset(); + + let mut ext_sb = RafsV6SuperBlockExt::new(); + ext_sb.load(r)?; + ext_sb.validate(end, &self.meta)?; + self.meta.chunk_size = ext_sb.chunk_size(); + self.meta.blob_table_offset = ext_sb.blob_table_offset(); + self.meta.blob_table_size = ext_sb.blob_table_size(); + self.meta.chunk_table_offset = ext_sb.chunk_table_offset(); + self.meta.chunk_table_size = ext_sb.chunk_table_size(); + self.meta.inodes_count = sb.inodes_count(); + + self.meta.flags = RafsSuperFlags::from_bits(ext_sb.flags()) + .ok_or_else(|| einval!(format!("invalid RAFS flags 0x{:x}", ext_sb.flags())))?; + info!("RAFS features: {}", self.meta.flags); + + self.meta.prefetch_table_entries = ext_sb.prefetch_table_size() / size_of::() as u32; + self.meta.prefetch_table_offset = ext_sb.prefetch_table_offset(); + trace!( + "prefetch table offset {} entries {} ", + self.meta.prefetch_table_offset, + self.meta.prefetch_table_entries + ); + + match self.mode { + RafsMode::Direct => { + let mut sb_v6 = DirectSuperBlockV6::new(&self.meta); + sb_v6.load(r)?; + self.superblock = Arc::new(sb_v6); + Ok(true) + } + RafsMode::Cached => Err(enosys!("Rafs v6 does not support cached mode")), + } + } + + pub(crate) fn is_inlay_prefetch_all(&self, r: &mut RafsIoReader) -> RafsResult { + let hint_entries = self.meta.prefetch_table_entries as usize; + if hint_entries != 1 { + return Ok(false); + } + let unique = if self.meta.is_v6() { + let mut prefetch_table = RafsV6PrefetchTable::new(); + prefetch_table + .load_prefetch_table_from(r, self.meta.prefetch_table_offset, hint_entries) + .map_err(|e| { + RafsError::Prefetch(format!( + "Failed in loading hint prefetch table at offset {}. {:?}", + self.meta.prefetch_table_offset, e + )) + })?; + prefetch_table.inodes[0] as u64 + } else { + let mut prefetch_table = RafsV5PrefetchTable::new(); + prefetch_table + .load_prefetch_table_from(r, self.meta.prefetch_table_offset, hint_entries) + .map_err(|e| { + RafsError::Prefetch(format!( + "Failed in loading hint prefetch table at offset {}. {:?}", + self.meta.prefetch_table_offset, e + )) + })?; + prefetch_table.inodes[0] as u64 + }; + + Ok(unique == self.superblock.root_ino()) + } + + pub(crate) fn prefetch_data_v6( + &self, + device: &BlobDevice, + r: &mut RafsIoReader, + root_ino: Inode, + fetcher: F, + ) -> RafsResult + where + F: Fn(&mut BlobIoVec, bool), + { + let hint_entries = self.meta.prefetch_table_entries as usize; + if hint_entries == 0 { + return Ok(false); + } + + // Try to prefetch according to the list of files specified by the + // builder's `--prefetch-policy fs` option. + let mut prefetch_table = RafsV6PrefetchTable::new(); + prefetch_table + .load_prefetch_table_from(r, self.meta.prefetch_table_offset, hint_entries) + .map_err(|e| { + RafsError::Prefetch(format!( + "Failed in loading hint prefetch table at offset {}. {:?}", + self.meta.prefetch_table_offset, e + )) + })?; + trace!("prefetch table contents {:?}", prefetch_table); + + let mut hardlinks: HashSet = HashSet::new(); + let mut state = BlobIoMerge::default(); + let mut found_root_inode = false; + for ino in prefetch_table.inodes { + // Inode number 0 is invalid, it was added because prefetch table has to be aligned. + if ino == 0 { + break; + } + if ino as Inode == root_ino { + found_root_inode = true; + } + trace!("hint prefetch inode {}", ino); + self.prefetch_data(device, ino as u64, &mut state, &mut hardlinks, &fetcher) + .map_err(|e| RafsError::Prefetch(e.to_string()))?; + } + // The left chunks whose size is smaller than 4MB will be fetched here. + for (_id, mut desc) in state.drain() { + fetcher(&mut desc, true); + } + + Ok(found_root_inode) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::metadata::RafsStore; + use crate::BufWriter; + use std::fs::OpenOptions; + use std::io::Write; + use vmm_sys_util::tempfile::TempFile; + + #[test] + fn test_v6_load_too_small_superblock() { + let t_file = TempFile::new().unwrap(); + + let file = OpenOptions::new() + .read(true) + .write(false) + .open(t_file.as_path()) + .unwrap(); + let mut reader = Box::new(file) as RafsIoReader; + let mut rs = RafsSuper { + mode: RafsMode::Direct, + validate_digest: true, + ..Default::default() + }; + + assert!(!rs.try_load_v6(&mut reader).unwrap()); + } + + #[test] + fn test_v6_load_invalid_magic() { + let t_file = TempFile::new().unwrap(); + + let mut file = OpenOptions::new() + .read(true) + .write(true) + .open(t_file.as_path()) + .unwrap(); + file.write_all(&[0u8; 4096]).unwrap(); + let mut reader = Box::new(file) as RafsIoReader; + let mut rs = RafsSuper { + mode: RafsMode::Direct, + validate_digest: true, + ..Default::default() + }; + + assert!(!rs.try_load_v6(&mut reader).unwrap()); + } + + #[test] + fn test_v6_load_invalid_superblock() { + let t_file = TempFile::new().unwrap(); + + let file = OpenOptions::new() + .read(true) + .write(true) + .open(t_file.as_path()) + .unwrap(); + let sb = RafsV6SuperBlock::new(); + let mut writer = BufWriter::new(file); + sb.store(&mut writer).unwrap(); + writer.flush().unwrap(); + + let file = OpenOptions::new() + .read(true) + .write(false) + .open(t_file.as_path()) + .unwrap(); + let mut reader = Box::new(file) as RafsIoReader; + let mut rs = RafsSuper { + mode: RafsMode::Direct, + validate_digest: true, + ..Default::default() + }; + + assert!(rs.try_load_v6(&mut reader).is_err()); + } + + /* + #[test] + fn test_try_load_v6() { + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let mut source_path = PathBuf::from(root_dir); + source_path.push("../tests/texture/bootstrap/rafs-v6.boot"); + + let file = OpenOptions::new() + .read(true) + .write(false) + .open(source_path) + .unwrap(); + let mut reader = Box::new(file) as RafsIoReader; + let mut rs = RafsSuper { + mode: RafsMode::Direct, + validate_digest: true, + ..Default::default() + }; + + rs.try_load_v6(&mut reader).unwrap(); + } + */ +} diff --git a/rafs/src/metadata/mod.rs b/rafs/src/metadata/mod.rs index 27f20fb448f..4fd1e7da620 100644 --- a/rafs/src/metadata/mod.rs +++ b/rafs/src/metadata/mod.rs @@ -1,1326 +1,1326 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2020-2022 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Enums, Structs and Traits to access and manage Rafs filesystem metadata. - -use std::any::Any; -use std::collections::{HashMap, HashSet}; -use std::convert::{TryFrom, TryInto}; -use std::ffi::{OsStr, OsString}; -use std::fmt::{Debug, Display, Formatter, Result as FmtResult}; -use std::fs::OpenOptions; -use std::io::{Error, ErrorKind, Result}; -use std::os::unix::ffi::OsStrExt; -use std::path::{Component, Path, PathBuf}; -use std::str::FromStr; -use std::sync::Arc; -use std::time::Duration; -use thiserror::Error; - -use anyhow::{bail, ensure}; -use fuse_backend_rs::abi::fuse_abi::Attr; -use fuse_backend_rs::api::filesystem::Entry; -use nydus_api::{ConfigV2, RafsConfigV2}; -use nydus_storage::device::{ - BlobChunkInfo, BlobDevice, BlobFeatures, BlobInfo, BlobIoMerge, BlobIoVec, -}; -use nydus_storage::meta::toc::TocEntryList; -use nydus_utils::digest::{self, RafsDigest}; -use nydus_utils::{compress, crypt}; -use serde::Serialize; - -use self::layout::v5::RafsV5PrefetchTable; -use self::layout::v6::RafsV6PrefetchTable; -use self::layout::{XattrName, XattrValue, RAFS_SUPER_VERSION_V5, RAFS_SUPER_VERSION_V6}; -use self::noop::NoopSuperBlock; -use crate::fs::{RAFS_DEFAULT_ATTR_TIMEOUT, RAFS_DEFAULT_ENTRY_TIMEOUT}; -use crate::{RafsError, RafsIoReader, RafsIoWrite, RafsResult}; - -mod md_v5; -mod md_v6; -mod noop; - -pub mod cached_v5; -pub mod chunk; -pub mod direct_v5; -pub mod direct_v6; -pub mod inode; -pub mod layout; - -// Reexport from nydus_storage crate. -pub use nydus_storage::{RAFS_DEFAULT_CHUNK_SIZE, RAFS_MAX_CHUNK_SIZE}; - -/// Maximum size of blob identifier string. -pub const RAFS_BLOB_ID_MAX_LENGTH: usize = 64; -/// Block size reported by get_attr(). -pub const RAFS_ATTR_BLOCK_SIZE: u32 = 4096; -/// Maximum size of file name supported by RAFS. -pub const RAFS_MAX_NAME: usize = 255; -/// Maximum size of RAFS filesystem metadata blobs. -pub const RAFS_MAX_METADATA_SIZE: usize = 0x8000_0000; -/// File name for Unix current directory. -pub const DOT: &str = "."; -/// File name for Unix parent directory. -pub const DOTDOT: &str = ".."; - -/// Type for RAFS filesystem inode number. -pub type Inode = u64; -pub type ArcRafsInodeExt = Arc; - -#[derive(Debug, Clone)] -pub struct RafsBlobExtraInfo { - /// Mapped block address from RAFS v6 devslot table. - /// - /// It's the offset of the uncompressed blob used to convert an image into a disk. - pub mapped_blkaddr: u32, -} - -/// Trait to access filesystem inodes managed by a RAFS filesystem. -pub trait RafsSuperInodes { - /// Get the maximum inode number managed by the RAFS filesystem. - fn get_max_ino(&self) -> Inode; - - /// Get the `RafsInode` trait object corresponding to the inode number `ino`. - fn get_inode(&self, ino: Inode, validate_inode: bool) -> Result>; - - /// Get the `RafsInodeExt` trait object corresponding to the 'ino`. - fn get_extended_inode(&self, ino: Inode, validate_inode: bool) - -> Result>; -} - -/// Trait to access RAFS filesystem metadata, including the RAFS super block and inodes. -pub trait RafsSuperBlock: RafsSuperInodes + Send + Sync { - /// Load and validate the RAFS filesystem super block from the specified reader. - fn load(&mut self, r: &mut RafsIoReader) -> Result<()>; - - /// Update/reload the RAFS filesystem super block from the specified reader. - fn update(&self, r: &mut RafsIoReader) -> RafsResult<()>; - - /// Destroy the RAFS filesystem super block object. - fn destroy(&mut self); - - /// Get all blob objects referenced by the RAFS filesystem. - fn get_blob_infos(&self) -> Vec>; - - /// Get extra information associated with blob objects. - fn get_blob_extra_infos(&self) -> Result> { - Ok(HashMap::new()) - } - - /// Get the inode number of the RAFS filesystem root. - fn root_ino(&self) -> u64; - - /// Get the `BlobChunkInfo` object by a chunk index, used by RAFS v6. - fn get_chunk_info(&self, _idx: usize) -> Result>; - - /// Associate `BlobDevice` object with the `RafsSuperBlock` object, used by RAFS v6. - fn set_blob_device(&self, blob_device: BlobDevice); -} - -/// Result codes for `RafsInodeWalkHandler`. -pub enum RafsInodeWalkAction { - /// Indicates the need to continue iterating - Continue, - /// Indicates that it is necessary to stop continuing to iterate - Break, -} - -/// Callback handler for RafsInode::walk_children_inodes(). -pub type RafsInodeWalkHandler<'a> = &'a mut dyn FnMut( - Option>, - OsString, - u64, - u64, -) -> Result; - -/// Trait to provide readonly accessors for RAFS filesystem inode. -/// -/// The RAFS filesystem is a readonly filesystem, so does its inodes. The `RafsInode` trait provides -/// readonly accessors for RAFS filesystem inode. The `nydus-image` crate provides its own -/// InodeWrapper to generate RAFS filesystem inodes. -pub trait RafsInode: Any { - /// RAFS: validate format and integrity of the RAFS filesystem inode. - /// - /// Inodes objects may be transmuted from raw buffers or loaded from untrusted source. - /// It must be validated for integrity before accessing any of its data fields . - fn validate(&self, max_inode: Inode, chunk_size: u64) -> Result<()>; - - /// RAFS: allocate blob io vectors to read file data in range [offset, offset + size). - fn alloc_bio_vecs( - &self, - device: &BlobDevice, - offset: u64, - size: usize, - user_io: bool, - ) -> Result>; - - /// RAFS: collect all descendants of the inode for image building. - fn collect_descendants_inodes( - &self, - descendants: &mut Vec>, - ) -> Result; - - /// Posix: generate a `Entry` object required by libc/fuse from the inode. - fn get_entry(&self) -> Entry; - - /// Posix: generate a posix `Attr` object required by libc/fuse from the inode. - fn get_attr(&self) -> Attr; - - /// Posix: get the inode number. - fn ino(&self) -> u64; - - /// Posix: get real device number. - fn rdev(&self) -> u32; - - /// Posix: get project id associated with the inode. - fn projid(&self) -> u32; - - /// Mode: check whether the inode is a block device. - fn is_blkdev(&self) -> bool; - - /// Mode: check whether the inode is a char device. - fn is_chrdev(&self) -> bool; - - /// Mode: check whether the inode is a sock. - fn is_sock(&self) -> bool; - - /// Mode: check whether the inode is a fifo. - fn is_fifo(&self) -> bool; - - /// Mode: check whether the inode is a directory. - fn is_dir(&self) -> bool; - - /// Mode: check whether the inode is a symlink. - fn is_symlink(&self) -> bool; - - /// Mode: check whether the inode is a regular file. - fn is_reg(&self) -> bool; - - /// Mode: check whether the inode is a hardlink. - fn is_hardlink(&self) -> bool; - - /// Xattr: check whether the inode has extended attributes. - fn has_xattr(&self) -> bool; - - /// Xattr: get the value of xattr with key `name`. - fn get_xattr(&self, name: &OsStr) -> Result>; - - /// Xattr: get all xattr keys. - fn get_xattrs(&self) -> Result>; - - /// Symlink: get the symlink target. - fn get_symlink(&self) -> Result; - - /// Symlink: get size of the symlink target path. - fn get_symlink_size(&self) -> u16; - - /// Directory: walk/enumerate child inodes. - fn walk_children_inodes(&self, entry_offset: u64, handler: RafsInodeWalkHandler) -> Result<()>; - - /// Directory: get child inode by name. - fn get_child_by_name(&self, name: &OsStr) -> Result>; - - /// Directory: get child inode by child index, child index starts from 0. - fn get_child_by_index(&self, idx: u32) -> Result>; - - /// Directory: get number of child inodes. - fn get_child_count(&self) -> u32; - - /// Directory: get the inode number corresponding to the first child inode. - fn get_child_index(&self) -> Result; - - /// Regular: get size of file content - fn size(&self) -> u64; - - /// Regular: check whether the inode has no content. - fn is_empty_size(&self) -> bool { - self.size() == 0 - } - - /// Regular: get number of data chunks. - fn get_chunk_count(&self) -> u32; - - fn as_any(&self) -> &dyn Any; -} - -/// Extended inode information for builder and directory walker. -pub trait RafsInodeExt: RafsInode { - /// Convert to the base type `RafsInode`. - fn as_inode(&self) -> &dyn RafsInode; - - /// Posix: get inode number of the parent inode. - fn parent(&self) -> u64; - - /// Posix: get file name. - fn name(&self) -> OsString; - - /// Posix: get file name size. - fn get_name_size(&self) -> u16; - - /// RAFS V5: get RAFS v5 specific inode flags. - fn flags(&self) -> u64; - - /// RAFS v5: get digest value of the inode metadata. - fn get_digest(&self) -> RafsDigest; - - /// RAFS v5: get chunk info object by chunk index, chunk index starts from 0. - fn get_chunk_info(&self, idx: u32) -> Result>; -} - -/// Trait to write out RAFS filesystem meta objects into the metadata blob. -pub trait RafsStore { - /// Write out the Rafs filesystem meta object to the writer. - fn store(&self, w: &mut dyn RafsIoWrite) -> Result; -} - -bitflags! { - /// Rafs filesystem feature flags. - #[derive(Serialize)] - pub struct RafsSuperFlags: u64 { - /// Data chunks are not compressed. - const COMPRESSION_NONE = 0x0000_0001; - /// Data chunks are compressed with lz4_block. - const COMPRESSION_LZ4 = 0x0000_0002; - /// Use blake3 hash algorithm to calculate digest. - const HASH_BLAKE3 = 0x0000_0004; - /// Use sha256 hash algorithm to calculate digest. - const HASH_SHA256 = 0x0000_0008; - /// Inode has explicit uid gid fields. - /// - /// If unset, use nydusd process euid/egid for all inodes at runtime. - const EXPLICIT_UID_GID = 0x0000_0010; - /// Inode may have associated extended attributes. - const HAS_XATTR = 0x0000_0020; - /// Data chunks are compressed with gzip - const COMPRESSION_GZIP = 0x0000_0040; - /// Data chunks are compressed with zstd - const COMPRESSION_ZSTD = 0x0000_0080; - /// Chunk digests are inlined in RAFS v6 data blob. - const INLINED_CHUNK_DIGEST = 0x0000_0100; - /// RAFS works in Tarfs mode, which directly uses tar streams as data blobs. - const TARTFS_MODE = 0x0000_0200; - /// Data chunks are not encrypted. - const ENCRYPTION_NONE = 0x0100_0000; - /// Data chunks are encrypted with AES-128-XTS. - const ENCRYPTION_ASE_128_XTS = 0x0200_0000; - - // Reserved for future compatible changes. - const PRESERVED_COMPAT_5 = 0x0400_0000; - const PRESERVED_COMPAT_4 = 0x0800_0000; - const PRESERVED_COMPAT_3 = 0x1000_0000; - const PRESERVED_COMPAT_2 = 0x2000_0000; - const PRESERVED_COMPAT_1 = 0x4000_0000; - const PRESERVED_COMPAT_0 = 0x8000_0000; - } -} - -impl Default for RafsSuperFlags { - fn default() -> Self { - RafsSuperFlags::empty() - } -} - -impl Display for RafsSuperFlags { - fn fmt(&self, f: &mut Formatter) -> FmtResult { - write!(f, "{:?}", self)?; - Ok(()) - } -} - -impl From for digest::Algorithm { - fn from(flags: RafsSuperFlags) -> Self { - match flags { - x if x.contains(RafsSuperFlags::HASH_BLAKE3) => digest::Algorithm::Blake3, - x if x.contains(RafsSuperFlags::HASH_SHA256) => digest::Algorithm::Sha256, - _ => digest::Algorithm::Blake3, - } - } -} - -impl From for RafsSuperFlags { - fn from(d: digest::Algorithm) -> RafsSuperFlags { - match d { - digest::Algorithm::Blake3 => RafsSuperFlags::HASH_BLAKE3, - digest::Algorithm::Sha256 => RafsSuperFlags::HASH_SHA256, - } - } -} - -impl From for compress::Algorithm { - fn from(flags: RafsSuperFlags) -> Self { - match flags { - x if x.contains(RafsSuperFlags::COMPRESSION_NONE) => compress::Algorithm::None, - x if x.contains(RafsSuperFlags::COMPRESSION_LZ4) => compress::Algorithm::Lz4Block, - x if x.contains(RafsSuperFlags::COMPRESSION_GZIP) => compress::Algorithm::GZip, - x if x.contains(RafsSuperFlags::COMPRESSION_ZSTD) => compress::Algorithm::Zstd, - _ => compress::Algorithm::Lz4Block, - } - } -} - -impl From for RafsSuperFlags { - fn from(c: compress::Algorithm) -> RafsSuperFlags { - match c { - compress::Algorithm::None => RafsSuperFlags::COMPRESSION_NONE, - compress::Algorithm::Lz4Block => RafsSuperFlags::COMPRESSION_LZ4, - compress::Algorithm::GZip => RafsSuperFlags::COMPRESSION_GZIP, - compress::Algorithm::Zstd => RafsSuperFlags::COMPRESSION_ZSTD, - } - } -} - -impl From for crypt::Algorithm { - fn from(flags: RafsSuperFlags) -> Self { - match flags { - // NOTE: only aes-128-xts encryption algorithm supported. - x if x.contains(RafsSuperFlags::ENCRYPTION_ASE_128_XTS) => crypt::Algorithm::Aes128Xts, - _ => crypt::Algorithm::None, - } - } -} - -impl From for RafsSuperFlags { - fn from(c: crypt::Algorithm) -> RafsSuperFlags { - match c { - // NOTE: only aes-128-xts encryption algorithm supported. - crypt::Algorithm::Aes128Xts => RafsSuperFlags::ENCRYPTION_ASE_128_XTS, - _ => RafsSuperFlags::ENCRYPTION_NONE, - } - } -} - -/// Configuration information to check compatibility between RAFS filesystems. -#[derive(Clone, Copy, Debug)] -pub struct RafsSuperConfig { - /// RAFS filesystem version. - pub version: RafsVersion, - /// Compression algorithm. - pub compressor: compress::Algorithm, - /// Digest algorithm. - pub digester: digest::Algorithm, - /// Size of data chunks. - pub chunk_size: u32, - /// Size of batch data chunks. - pub batch_size: u32, - /// Whether `explicit_uidgid` enabled or not. - pub explicit_uidgid: bool, - /// RAFS in TARFS mode. - pub is_tarfs_mode: bool, -} - -#[derive(Error, Debug)] -pub enum MergeError { - #[error("Inconsistent RAFS Filesystem: {0}")] - InconsistentFilesystem(String), - #[error(transparent)] - Other(#[from] anyhow::Error), -} - -impl RafsSuperConfig { - /// Check compatibility for two RAFS filesystems. - pub fn check_compatibility(&self, meta: &RafsSuperMeta) -> anyhow::Result<()> { - ensure!( - self.chunk_size == meta.chunk_size, - MergeError::InconsistentFilesystem(format!( - "Inconsistent configuration of chunk_size: {} vs {}", - self.chunk_size, meta.chunk_size - )) - ); - - ensure!( - self.explicit_uidgid == meta.explicit_uidgid(), - MergeError::InconsistentFilesystem(format!( - "Using inconsistent explicit_uidgid setting {:?}, target explicit_uidgid setting {:?}", - self.explicit_uidgid, - meta.explicit_uidgid() - )) - ); - - let meta_version = RafsVersion::try_from(meta.version); - ensure!( - u32::from(self.version) == meta.version, - MergeError::InconsistentFilesystem(format!( - "Using inconsistent RAFS version {:?}, target RAFS version {:?}", - self.version, meta_version - )) - ); - - ensure!( - self.version != RafsVersion::V5 || self.digester == meta.get_digester(), - MergeError::InconsistentFilesystem(format!( - "RAFS v5 can not support different digest algorithm due to inode digest, {} vs {}", - self.digester, - meta.get_digester() - )) - ); - let is_tarfs_mode = meta.flags.contains(RafsSuperFlags::TARTFS_MODE); - ensure!( - is_tarfs_mode == self.is_tarfs_mode, - MergeError::InconsistentFilesystem("Using inconsistent RAFS TARFS mode".to_string(),) - ); - - Ok(()) - } -} - -/// Rafs filesystem meta-data cached from on disk RAFS super block. -#[derive(Clone, Copy, Debug, Serialize)] -pub struct RafsSuperMeta { - /// Filesystem magic number. - pub magic: u32, - /// Filesystem version number. - pub version: u32, - /// Size of on disk super block. - pub sb_size: u32, - /// Inode number of root inode. - pub root_inode: Inode, - /// Chunk size. - pub chunk_size: u32, - /// Batch chunk size. - pub batch_size: u32, - /// Number of inodes in the filesystem. - pub inodes_count: u64, - /// V5: superblock flags for Rafs v5. - pub flags: RafsSuperFlags, - /// Number of inode entries in inode offset table. - pub inode_table_entries: u32, - /// Offset of the inode offset table into the metadata blob. - pub inode_table_offset: u64, - /// Size of blob information table. - pub blob_table_size: u32, - /// Offset of the blob information table into the metadata blob. - pub blob_table_offset: u64, - /// Size of extended blob information table. - pub extended_blob_table_offset: u64, - /// Offset of the extended blob information table into the metadata blob. - pub extended_blob_table_entries: u32, - /// Number of RAFS v6 blob device entries in the devslot table. - pub blob_device_table_count: u32, - /// Offset of the RAFS v6 devslot table. - pub blob_device_table_offset: u64, - /// Offset of the inode prefetch table into the metadata blob. - pub prefetch_table_offset: u64, - /// Size of the inode prefetch table. - pub prefetch_table_entries: u32, - /// Default attribute timeout value. - pub attr_timeout: Duration, - /// Default inode timeout value. - pub entry_timeout: Duration, - /// Whether the RAFS instance is a chunk dictionary. - pub is_chunk_dict: bool, - /// Metadata block address for RAFS v6. - pub meta_blkaddr: u32, - /// Root nid for RAFS v6. - pub root_nid: u16, - /// Offset of the chunk table for RAFS v6. - pub chunk_table_offset: u64, - /// Size of the chunk table for RAFS v6. - pub chunk_table_size: u64, -} - -impl RafsSuperMeta { - /// Check whether the superblock is for Rafs v5 filesystems. - pub fn is_v5(&self) -> bool { - self.version == RAFS_SUPER_VERSION_V5 - } - - /// Check whether the superblock is for Rafs v6 filesystems. - pub fn is_v6(&self) -> bool { - self.version == RAFS_SUPER_VERSION_V6 - } - - /// Check whether the RAFS instance is a chunk dictionary. - pub fn is_chunk_dict(&self) -> bool { - self.is_chunk_dict - } - - /// Check whether the explicit UID/GID feature has been enable or not. - pub fn explicit_uidgid(&self) -> bool { - self.flags.contains(RafsSuperFlags::EXPLICIT_UID_GID) - } - - /// Check whether the filesystem supports extended attribute or not. - pub fn has_xattr(&self) -> bool { - self.flags.contains(RafsSuperFlags::HAS_XATTR) - } - - /// Check whether data blobs have inlined chunk digest array. - pub fn has_inlined_chunk_digest(&self) -> bool { - self.is_v6() && self.flags.contains(RafsSuperFlags::INLINED_CHUNK_DIGEST) - } - - /// Get compression algorithm to handle chunk data for the filesystem. - pub fn get_compressor(&self) -> compress::Algorithm { - if self.is_v5() || self.is_v6() { - self.flags.into() - } else { - compress::Algorithm::None - } - } - - /// V5: get message digest algorithm to validate chunk data for the filesystem. - pub fn get_digester(&self) -> digest::Algorithm { - if self.is_v5() || self.is_v6() { - self.flags.into() - } else { - digest::Algorithm::Blake3 - } - } - - /// V6: Check whether any data blobs may be encrypted. - pub fn get_cipher(&self) -> crypt::Algorithm { - if self.is_v6() { - self.flags.into() - } else { - crypt::Algorithm::None - } - } - - /// Get `RafsSuperConfig` object to check compatibility. - pub fn get_config(&self) -> RafsSuperConfig { - RafsSuperConfig { - version: self.version.try_into().unwrap_or_default(), - compressor: self.get_compressor(), - digester: self.get_digester(), - chunk_size: self.chunk_size, - batch_size: self.batch_size, - explicit_uidgid: self.explicit_uidgid(), - is_tarfs_mode: self.flags.contains(RafsSuperFlags::TARTFS_MODE), - } - } -} - -impl Default for RafsSuperMeta { - fn default() -> Self { - RafsSuperMeta { - magic: 0, - version: 0, - sb_size: 0, - inodes_count: 0, - root_inode: 0, - chunk_size: 0, - batch_size: 0, - flags: RafsSuperFlags::empty(), - inode_table_entries: 0, - inode_table_offset: 0, - blob_table_size: 0, - blob_table_offset: 0, - extended_blob_table_offset: 0, - extended_blob_table_entries: 0, - blob_device_table_count: 0, - blob_device_table_offset: 0, - prefetch_table_offset: 0, - prefetch_table_entries: 0, - attr_timeout: Duration::from_secs(RAFS_DEFAULT_ATTR_TIMEOUT), - entry_timeout: Duration::from_secs(RAFS_DEFAULT_ENTRY_TIMEOUT), - meta_blkaddr: 0, - root_nid: 0, - is_chunk_dict: false, - chunk_table_offset: 0, - chunk_table_size: 0, - } - } -} - -/// RAFS filesystem versions. -#[derive(Clone, Copy, Debug, Default, PartialEq)] -pub enum RafsVersion { - /// RAFS v5 - #[default] - V5, - /// RAFS v6 - V6, -} - -impl TryFrom for RafsVersion { - type Error = Error; - - fn try_from(version: u32) -> std::result::Result { - if version == RAFS_SUPER_VERSION_V5 { - return Ok(RafsVersion::V5); - } else if version == RAFS_SUPER_VERSION_V6 { - return Ok(RafsVersion::V6); - } - Err(einval!(format!("invalid RAFS version number {}", version))) - } -} - -impl From for u32 { - fn from(v: RafsVersion) -> Self { - match v { - RafsVersion::V5 => RAFS_SUPER_VERSION_V5, - RafsVersion::V6 => RAFS_SUPER_VERSION_V6, - } - } -} - -impl std::fmt::Display for RafsVersion { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - RafsVersion::V5 => write!(f, "5"), - RafsVersion::V6 => write!(f, "6"), - } - } -} - -impl RafsVersion { - /// Check whether it's RAFS v5. - pub fn is_v5(&self) -> bool { - self == &Self::V5 - } - - /// Check whether it's RAFS v6. - pub fn is_v6(&self) -> bool { - self == &Self::V6 - } -} - -/// Rafs metadata working mode. -#[derive(Clone, Debug, Default, Eq, PartialEq)] -pub enum RafsMode { - /// Directly mapping and accessing metadata into process by mmap(). - #[default] - Direct, - /// Read metadata into memory before using, for RAFS v5. - Cached, -} - -impl FromStr for RafsMode { - type Err = Error; - - fn from_str(s: &str) -> std::result::Result { - match s { - "direct" => Ok(Self::Direct), - "cached" => Ok(Self::Cached), - _ => Err(einval!("rafs mode should be direct or cached")), - } - } -} - -impl Display for RafsMode { - fn fmt(&self, f: &mut Formatter) -> FmtResult { - match self { - Self::Direct => write!(f, "direct"), - Self::Cached => write!(f, "cached"), - } - } -} - -/// Cached Rafs super block and inode information. -pub struct RafsSuper { - /// Rafs metadata working mode. - pub mode: RafsMode, - /// Whether validate data read from storage backend. - pub validate_digest: bool, - /// Cached metadata from on disk super block. - pub meta: RafsSuperMeta, - /// Rafs filesystem super block. - pub superblock: Arc, -} - -impl Default for RafsSuper { - fn default() -> Self { - Self { - mode: RafsMode::Direct, - validate_digest: false, - meta: RafsSuperMeta::default(), - superblock: Arc::new(NoopSuperBlock::new()), - } - } -} - -impl RafsSuper { - /// Create a new `RafsSuper` instance from a `RafsConfigV2` object. - pub fn new(conf: &RafsConfigV2) -> Result { - Ok(Self { - mode: RafsMode::from_str(conf.mode.as_str())?, - validate_digest: conf.validate, - ..Default::default() - }) - } - - /// Destroy the filesystem super block. - pub fn destroy(&mut self) { - Arc::get_mut(&mut self.superblock) - .expect("Inodes are no longer used.") - .destroy(); - } - - /// Load Rafs super block from a metadata file. - pub fn load_from_file>( - path: P, - config: Arc, - is_chunk_dict: bool, - ) -> Result<(Self, RafsIoReader)> { - let validate_digest = config - .rafs - .as_ref() - .map(|rafs| rafs.validate) - .unwrap_or_default(); - let mut rs = RafsSuper { - mode: RafsMode::Direct, - validate_digest, - ..Default::default() - }; - rs.meta.is_chunk_dict = is_chunk_dict; - - // open bootstrap file - let file = OpenOptions::new() - .read(true) - .write(false) - .open(path.as_ref())?; - let mut reader = Box::new(file) as RafsIoReader; - let mut blob_accessible = config.internal.blob_accessible(); - - if let Err(e) = rs.load(&mut reader) { - let id = BlobInfo::get_blob_id_from_meta_path(path.as_ref())?; - let new_path = match TocEntryList::extract_rafs_meta(&id, config.clone()) { - Ok(v) => v, - Err(_e) => { - debug!("failed to load inlined RAFS meta, {}", _e); - return Err(e); - } - }; - let file = OpenOptions::new().read(true).write(false).open(new_path)?; - reader = Box::new(file) as RafsIoReader; - rs.load(&mut reader)?; - rs.set_blob_id_from_meta_path(path.as_ref())?; - blob_accessible = true; - } else { - // Backward compatibility: try to fix blob id for old converters. - // Old converters extracts bootstraps from data blobs with inlined bootstrap - // use blob digest as the bootstrap file name. The last blob in the blob table from - // the bootstrap has wrong blob id, so we need to fix it. - let blobs = rs.superblock.get_blob_infos(); - for blob in blobs.iter() { - // Fix blob id for new images with old converters. - if blob.has_feature(BlobFeatures::INLINED_FS_META) { - blob.set_blob_id_from_meta_path(path.as_ref())?; - } - } - } - - if !config.is_fs_cache() - && blob_accessible - && (validate_digest || config.is_chunk_validation_enabled()) - && rs.meta.has_inlined_chunk_digest() - { - rs.create_blob_device(config)?; - } - - Ok((rs, reader)) - } - - /// Load RAFS metadata and optionally cache inodes. - pub(crate) fn load(&mut self, r: &mut RafsIoReader) -> Result<()> { - // Try to load the filesystem as Rafs v5 - if self.try_load_v5(r)? { - return Ok(()); - } - - if self.try_load_v6(r)? { - return Ok(()); - } - - Err(Error::new(ErrorKind::Other, "invalid RAFS superblock")) - } - - /// Set meta blob file path from which the `RafsSuper` object is loaded from. - /// - /// It's used to support inlined-meta and ZRan blobs. - pub fn set_blob_id_from_meta_path(&self, meta_path: &Path) -> Result<()> { - let blobs = self.superblock.get_blob_infos(); - for blob in blobs.iter() { - if blob.has_feature(BlobFeatures::INLINED_FS_META) - || !blob.has_feature(BlobFeatures::CAP_TAR_TOC) - { - blob.set_blob_id_from_meta_path(meta_path)?; - } - } - Ok(()) - } - - /// Create a `BlobDevice` object and associated it with the `RafsSuper` object. - /// - /// The `BlobDevice` object is needed to get meta information from RAFS V6 data blobs. - pub fn create_blob_device(&self, config: Arc) -> Result<()> { - let blobs = self.superblock.get_blob_infos(); - let device = BlobDevice::new(&config, &blobs)?; - self.superblock.set_blob_device(device); - Ok(()) - } - - /// Update the filesystem metadata and storage backend. - pub fn update(&self, r: &mut RafsIoReader) -> RafsResult<()> { - if self.meta.is_v5() { - self.skip_v5_superblock(r) - .map_err(RafsError::FillSuperBlock)?; - } - - self.superblock.update(r) - } - - /// Get the maximum inode number supported by the filesystem instance. - pub fn get_max_ino(&self) -> Inode { - self.superblock.get_max_ino() - } - - /// Get the `RafsInode` object corresponding to `ino`. - pub fn get_inode(&self, ino: Inode, validate_inode: bool) -> Result> { - self.superblock.get_inode(ino, validate_inode) - } - - /// Get the `RafsInodeExt` object corresponding to `ino`. - pub fn get_extended_inode( - &self, - ino: Inode, - validate_inode: bool, - ) -> Result> { - self.superblock.get_extended_inode(ino, validate_inode) - } - - /// Convert a file path to an inode number. - pub fn ino_from_path(&self, f: &Path) -> Result { - let root_ino = self.superblock.root_ino(); - if f == Path::new("/") { - return Ok(root_ino); - } else if !f.starts_with("/") { - return Err(einval!()); - } - - let entries = f - .components() - .filter(|comp| *comp != Component::RootDir) - .map(|comp| match comp { - Component::Normal(name) => Some(name), - Component::ParentDir => Some(OsStr::from_bytes(DOTDOT.as_bytes())), - Component::CurDir => Some(OsStr::from_bytes(DOT.as_bytes())), - _ => None, - }) - .collect::>(); - if entries.is_empty() { - warn!("Path can't be parsed {:?}", f); - return Err(enoent!()); - } - - let mut parent = self.get_extended_inode(root_ino, self.validate_digest)?; - for p in entries { - match p { - None => { - error!("Illegal specified path {:?}", f); - return Err(einval!()); - } - Some(name) => { - parent = parent.get_child_by_name(name).map_err(|e| { - warn!("File {:?} not in RAFS filesystem, {}", name, e); - enoent!() - })?; - } - } - } - - Ok(parent.ino()) - } - - /// Prefetch filesystem and file data to improve performance. - /// - /// To improve application filesystem access performance, the filesystem may prefetch file or - /// metadata in advance. There are ways to configure the file list to be prefetched. - /// 1. Static file prefetch list configured during image building, recorded in prefetch list - /// in Rafs v5 file system metadata. - /// Base on prefetch table which is persisted to bootstrap when building image. - /// 2. Dynamic file prefetch list configured by command line. The dynamic file prefetch list - /// has higher priority and the static file prefetch list will be ignored if there's dynamic - /// prefetch list. When a directory is specified for dynamic prefetch list, all sub directory - /// and files under the directory will be prefetched. - /// - /// Each inode passed into should correspond to directory. And it already does the file type - /// check inside. - pub fn prefetch_files( - &self, - device: &BlobDevice, - r: &mut RafsIoReader, - root_ino: Inode, - files: Option>, - fetcher: &dyn Fn(&mut BlobIoVec, bool), - ) -> RafsResult { - // Try to prefetch files according to the list specified by the `--prefetch-files` option. - if let Some(files) = files { - // Avoid prefetching multiple times for hardlinks to the same file. - let mut hardlinks: HashSet = HashSet::new(); - let mut state = BlobIoMerge::default(); - for f_ino in files { - self.prefetch_data(device, f_ino, &mut state, &mut hardlinks, fetcher) - .map_err(|e| RafsError::Prefetch(e.to_string()))?; - } - for (_id, mut desc) in state.drain() { - fetcher(&mut desc, true); - } - // Flush the pending prefetch requests. - Ok(false) - } else if self.meta.is_v5() { - self.prefetch_data_v5(device, r, root_ino, fetcher) - } else if self.meta.is_v6() { - self.prefetch_data_v6(device, r, root_ino, fetcher) - } else { - Err(RafsError::Prefetch( - "Unknown filesystem version, prefetch disabled".to_string(), - )) - } - } - - #[inline] - fn prefetch_inode( - device: &BlobDevice, - inode: &Arc, - state: &mut BlobIoMerge, - hardlinks: &mut HashSet, - fetcher: &dyn Fn(&mut BlobIoVec, bool), - ) -> Result<()> { - // Check for duplicated hardlinks. - if inode.is_hardlink() { - if hardlinks.contains(&inode.ino()) { - return Ok(()); - } else { - hardlinks.insert(inode.ino()); - } - } - - let descs = inode.alloc_bio_vecs(device, 0, inode.size() as usize, false)?; - for desc in descs { - state.append(desc); - if let Some(desc) = state.get_current_element() { - fetcher(desc, false); - } - } - - Ok(()) - } - - fn prefetch_data( - &self, - device: &BlobDevice, - ino: u64, - state: &mut BlobIoMerge, - hardlinks: &mut HashSet, - fetcher: &dyn Fn(&mut BlobIoVec, bool), - ) -> Result<()> { - let inode = self - .superblock - .get_inode(ino, self.validate_digest) - .map_err(|_e| enoent!("Can't find inode"))?; - - if inode.is_dir() { - let mut descendants = Vec::new(); - let _ = inode.collect_descendants_inodes(&mut descendants)?; - for i in descendants.iter() { - Self::prefetch_inode(device, i, state, hardlinks, fetcher)?; - } - } else if !inode.is_empty_size() && inode.is_reg() { - // An empty regular file will also be packed into nydus image, - // then it has a size of zero. - // Moreover, for rafs v5, symlink has size of zero but non-zero size - // for symlink size. For rafs v6, symlink size is also represented by i_size. - // So we have to restrain the condition here. - Self::prefetch_inode(device, &inode, state, hardlinks, fetcher)?; - } - - Ok(()) - } -} - -// For nydus-image -impl RafsSuper { - /// Convert an inode number to a file path. - pub fn path_from_ino(&self, ino: Inode) -> Result { - if ino == self.superblock.root_ino() { - return Ok(self.get_extended_inode(ino, false)?.name().into()); - } - - let mut path = PathBuf::new(); - let mut cur_ino = ino; - let mut inode; - - loop { - inode = self.get_extended_inode(cur_ino, false)?; - let e: PathBuf = inode.name().into(); - path = e.join(path); - - if inode.ino() == self.superblock.root_ino() { - break; - } else { - cur_ino = inode.parent(); - } - } - - Ok(path) - } - - /// Get prefetched inos - pub fn get_prefetched_inos(&self, bootstrap: &mut RafsIoReader) -> Result> { - if self.meta.is_v5() { - let mut pt = RafsV5PrefetchTable::new(); - pt.load_prefetch_table_from( - bootstrap, - self.meta.prefetch_table_offset, - self.meta.prefetch_table_entries as usize, - )?; - Ok(pt.inodes) - } else { - let mut pt = RafsV6PrefetchTable::new(); - pt.load_prefetch_table_from( - bootstrap, - self.meta.prefetch_table_offset, - self.meta.prefetch_table_entries as usize, - )?; - Ok(pt.inodes) - } - } - - /// Walk through the file tree rooted at ino, calling cb for each file or directory - /// in the tree by DFS order, including ino, please ensure ino is a directory. - pub fn walk_directory>( - &self, - ino: Inode, - parent: Option

, - cb: &mut dyn FnMut(ArcRafsInodeExt, &Path) -> anyhow::Result<()>, - ) -> anyhow::Result<()> { - let inode = self.get_extended_inode(ino, false)?; - if !inode.is_dir() { - bail!("inode {} is not a directory", ino); - } - self.do_walk_directory(inode, parent, cb) - } - - #[allow(clippy::only_used_in_recursion)] - fn do_walk_directory>( - &self, - inode: Arc, - parent: Option

, - cb: &mut dyn FnMut(ArcRafsInodeExt, &Path) -> anyhow::Result<()>, - ) -> anyhow::Result<()> { - let path = if let Some(parent) = parent { - parent.as_ref().join(inode.name()) - } else { - PathBuf::from("/") - }; - cb(inode.clone(), &path)?; - if inode.is_dir() { - for idx in 0..inode.get_child_count() { - let child = inode.get_child_by_index(idx)?; - self.do_walk_directory(child, Some(&path), cb)?; - } - } - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_rafs_mode() { - assert!(RafsMode::from_str("").is_err()); - assert!(RafsMode::from_str("directed").is_err()); - assert!(RafsMode::from_str("Direct").is_err()); - assert!(RafsMode::from_str("Cached").is_err()); - assert_eq!(RafsMode::from_str("direct").unwrap(), RafsMode::Direct); - assert_eq!(RafsMode::from_str("cached").unwrap(), RafsMode::Cached); - assert_eq!(&format!("{}", RafsMode::Direct), "direct"); - assert_eq!(&format!("{}", RafsMode::Cached), "cached"); - } - - #[test] - fn test_rafs_compressor() { - assert_eq!( - compress::Algorithm::from(RafsSuperFlags::COMPRESSION_NONE), - compress::Algorithm::None - ); - assert_eq!( - compress::Algorithm::from(RafsSuperFlags::COMPRESSION_GZIP), - compress::Algorithm::GZip - ); - assert_eq!( - compress::Algorithm::from(RafsSuperFlags::COMPRESSION_LZ4), - compress::Algorithm::Lz4Block - ); - assert_eq!( - compress::Algorithm::from(RafsSuperFlags::COMPRESSION_ZSTD), - compress::Algorithm::Zstd - ); - assert_eq!( - compress::Algorithm::from( - RafsSuperFlags::COMPRESSION_ZSTD | RafsSuperFlags::COMPRESSION_LZ4, - ), - compress::Algorithm::Lz4Block - ); - assert_eq!( - compress::Algorithm::from(RafsSuperFlags::empty()), - compress::Algorithm::Lz4Block - ); - } - - #[test] - fn test_rafs_digestor() { - assert_eq!( - digest::Algorithm::from(RafsSuperFlags::HASH_BLAKE3), - digest::Algorithm::Blake3 - ); - assert_eq!( - digest::Algorithm::from(RafsSuperFlags::HASH_SHA256), - digest::Algorithm::Sha256 - ); - assert_eq!( - digest::Algorithm::from(RafsSuperFlags::HASH_SHA256 | RafsSuperFlags::HASH_BLAKE3,), - digest::Algorithm::Blake3 - ); - assert_eq!( - digest::Algorithm::from(RafsSuperFlags::empty()), - digest::Algorithm::Blake3 - ); - } - - #[test] - fn test_rafs_crypt_from() { - assert_eq!( - crypt::Algorithm::from(RafsSuperFlags::ENCRYPTION_ASE_128_XTS), - crypt::Algorithm::Aes128Xts - ); - assert_eq!( - crypt::Algorithm::from(RafsSuperFlags::empty()), - crypt::Algorithm::None - ); - } - - #[test] - fn test_rafs_super_meta() { - let mut meta = RafsSuperMeta::default(); - assert!(!meta.has_xattr()); - assert!(!meta.has_inlined_chunk_digest()); - assert_eq!(meta.get_compressor(), compress::Algorithm::None); - assert_eq!(meta.get_digester(), digest::Algorithm::Blake3); - assert_eq!(meta.get_cipher(), crypt::Algorithm::None); - - meta.version = RAFS_SUPER_VERSION_V6; - meta.flags |= RafsSuperFlags::INLINED_CHUNK_DIGEST; - meta.flags |= RafsSuperFlags::HASH_SHA256; - meta.flags |= RafsSuperFlags::COMPRESSION_GZIP; - meta.flags |= RafsSuperFlags::ENCRYPTION_ASE_128_XTS; - - assert!(meta.has_inlined_chunk_digest()); - assert_eq!(meta.get_compressor(), compress::Algorithm::GZip); - assert_eq!(meta.get_digester(), digest::Algorithm::Sha256); - assert_eq!(meta.get_cipher(), crypt::Algorithm::Aes128Xts); - - meta.version = RAFS_SUPER_VERSION_V5; - assert_eq!(meta.get_compressor(), compress::Algorithm::GZip); - assert_eq!(meta.get_digester(), digest::Algorithm::Sha256); - assert_eq!(meta.get_cipher(), crypt::Algorithm::None); - - let cfg = meta.get_config(); - assert!(cfg.check_compatibility(&meta).is_ok()); - } - - #[test] - fn test_rafs_super_new() { - let cfg = RafsConfigV2 { - mode: "direct".into(), - ..RafsConfigV2::default() - }; - let mut rs = RafsSuper::new(&cfg).unwrap(); - rs.destroy(); - } - - fn get_meta( - chunk_size: u32, - explice_uidgid: bool, - tartfs_mode: bool, - hash: RafsSuperFlags, - comp: RafsSuperFlags, - crypt: RafsSuperFlags, - version: u32, - ) -> RafsSuperMeta { - let mut meta = RafsSuperMeta { - chunk_size, - ..Default::default() - }; - if explice_uidgid { - meta.flags |= RafsSuperFlags::EXPLICIT_UID_GID; - } - if tartfs_mode { - meta.flags |= RafsSuperFlags::TARTFS_MODE; - } - meta.flags |= hash; - meta.flags |= comp; - meta.flags |= crypt; - meta.version = version; - meta - } - - #[test] - fn test_rafs_super_config_check_compatibility_fail() { - let meta1 = get_meta( - 1024 as u32, - true, - true, - RafsSuperFlags::HASH_BLAKE3, - RafsSuperFlags::COMPRESSION_GZIP, - RafsSuperFlags::ENCRYPTION_ASE_128_XTS, - RAFS_SUPER_VERSION_V5, - ); - let meta2 = get_meta( - 2048 as u32, - true, - true, - RafsSuperFlags::HASH_BLAKE3, - RafsSuperFlags::COMPRESSION_GZIP, - RafsSuperFlags::ENCRYPTION_ASE_128_XTS, - RAFS_SUPER_VERSION_V5, - ); - let meta3 = get_meta( - 1024 as u32, - false, - true, - RafsSuperFlags::HASH_BLAKE3, - RafsSuperFlags::COMPRESSION_GZIP, - RafsSuperFlags::ENCRYPTION_ASE_128_XTS, - RAFS_SUPER_VERSION_V5, - ); - let meta4 = get_meta( - 1024 as u32, - true, - false, - RafsSuperFlags::HASH_BLAKE3, - RafsSuperFlags::COMPRESSION_GZIP, - RafsSuperFlags::ENCRYPTION_ASE_128_XTS, - RAFS_SUPER_VERSION_V5, - ); - let meta5 = get_meta( - 1024 as u32, - true, - true, - RafsSuperFlags::HASH_SHA256, - RafsSuperFlags::COMPRESSION_GZIP, - RafsSuperFlags::ENCRYPTION_ASE_128_XTS, - RAFS_SUPER_VERSION_V5, - ); - let meta6 = get_meta( - 1024 as u32, - true, - true, - RafsSuperFlags::HASH_BLAKE3, - RafsSuperFlags::COMPRESSION_GZIP, - RafsSuperFlags::ENCRYPTION_NONE, - RAFS_SUPER_VERSION_V6, - ); - - assert!(meta1.get_config().check_compatibility(&meta2).is_err()); - assert!(meta1.get_config().check_compatibility(&meta3).is_err()); - assert!(meta1.get_config().check_compatibility(&meta4).is_err()); - assert!(meta1.get_config().check_compatibility(&meta5).is_err()); - assert!(meta1.get_config().check_compatibility(&meta6).is_err()); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2020-2022 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Enums, Structs and Traits to access and manage Rafs filesystem metadata. + +use std::any::Any; +use std::collections::{HashMap, HashSet}; +use std::convert::{TryFrom, TryInto}; +use std::ffi::{OsStr, OsString}; +use std::fmt::{Debug, Display, Formatter, Result as FmtResult}; +use std::fs::OpenOptions; +use std::io::{Error, ErrorKind, Result}; +use std::os::unix::ffi::OsStrExt; +use std::path::{Component, Path, PathBuf}; +use std::str::FromStr; +use std::sync::Arc; +use std::time::Duration; +use thiserror::Error; + +use anyhow::{bail, ensure}; +use fuse_backend_rs::abi::fuse_abi::Attr; +use fuse_backend_rs::api::filesystem::Entry; +use nydus_api::{ConfigV2, RafsConfigV2}; +use nydus_storage::device::{ + BlobChunkInfo, BlobDevice, BlobFeatures, BlobInfo, BlobIoMerge, BlobIoVec, +}; +use nydus_storage::meta::toc::TocEntryList; +use nydus_utils::digest::{self, RafsDigest}; +use nydus_utils::{compress, crypt}; +use serde::Serialize; + +use self::layout::v5::RafsV5PrefetchTable; +use self::layout::v6::RafsV6PrefetchTable; +use self::layout::{XattrName, XattrValue, RAFS_SUPER_VERSION_V5, RAFS_SUPER_VERSION_V6}; +use self::noop::NoopSuperBlock; +use crate::fs::{RAFS_DEFAULT_ATTR_TIMEOUT, RAFS_DEFAULT_ENTRY_TIMEOUT}; +use crate::{RafsError, RafsIoReader, RafsIoWrite, RafsResult}; + +mod md_v5; +mod md_v6; +mod noop; + +pub mod cached_v5; +pub mod chunk; +pub mod direct_v5; +pub mod direct_v6; +pub mod inode; +pub mod layout; + +// Reexport from nydus_storage crate. +pub use nydus_storage::{RAFS_DEFAULT_CHUNK_SIZE, RAFS_MAX_CHUNK_SIZE}; + +/// Maximum size of blob identifier string. +pub const RAFS_BLOB_ID_MAX_LENGTH: usize = 64; +/// Block size reported by get_attr(). +pub const RAFS_ATTR_BLOCK_SIZE: u32 = 4096; +/// Maximum size of file name supported by RAFS. +pub const RAFS_MAX_NAME: usize = 255; +/// Maximum size of RAFS filesystem metadata blobs. +pub const RAFS_MAX_METADATA_SIZE: usize = 0x8000_0000; +/// File name for Unix current directory. +pub const DOT: &str = "."; +/// File name for Unix parent directory. +pub const DOTDOT: &str = ".."; + +/// Type for RAFS filesystem inode number. +pub type Inode = u64; +pub type ArcRafsInodeExt = Arc; + +#[derive(Debug, Clone)] +pub struct RafsBlobExtraInfo { + /// Mapped block address from RAFS v6 devslot table. + /// + /// It's the offset of the uncompressed blob used to convert an image into a disk. + pub mapped_blkaddr: u32, +} + +/// Trait to access filesystem inodes managed by a RAFS filesystem. +pub trait RafsSuperInodes { + /// Get the maximum inode number managed by the RAFS filesystem. + fn get_max_ino(&self) -> Inode; + + /// Get the `RafsInode` trait object corresponding to the inode number `ino`. + fn get_inode(&self, ino: Inode, validate_inode: bool) -> Result>; + + /// Get the `RafsInodeExt` trait object corresponding to the 'ino`. + fn get_extended_inode(&self, ino: Inode, validate_inode: bool) + -> Result>; +} + +/// Trait to access RAFS filesystem metadata, including the RAFS super block and inodes. +pub trait RafsSuperBlock: RafsSuperInodes + Send + Sync { + /// Load and validate the RAFS filesystem super block from the specified reader. + fn load(&mut self, r: &mut RafsIoReader) -> Result<()>; + + /// Update/reload the RAFS filesystem super block from the specified reader. + fn update(&self, r: &mut RafsIoReader) -> RafsResult<()>; + + /// Destroy the RAFS filesystem super block object. + fn destroy(&mut self); + + /// Get all blob objects referenced by the RAFS filesystem. + fn get_blob_infos(&self) -> Vec>; + + /// Get extra information associated with blob objects. + fn get_blob_extra_infos(&self) -> Result> { + Ok(HashMap::new()) + } + + /// Get the inode number of the RAFS filesystem root. + fn root_ino(&self) -> u64; + + /// Get the `BlobChunkInfo` object by a chunk index, used by RAFS v6. + fn get_chunk_info(&self, _idx: usize) -> Result>; + + /// Associate `BlobDevice` object with the `RafsSuperBlock` object, used by RAFS v6. + fn set_blob_device(&self, blob_device: BlobDevice); +} + +/// Result codes for `RafsInodeWalkHandler`. +pub enum RafsInodeWalkAction { + /// Indicates the need to continue iterating + Continue, + /// Indicates that it is necessary to stop continuing to iterate + Break, +} + +/// Callback handler for RafsInode::walk_children_inodes(). +pub type RafsInodeWalkHandler<'a> = &'a mut dyn FnMut( + Option>, + OsString, + u64, + u64, +) -> Result; + +/// Trait to provide readonly accessors for RAFS filesystem inode. +/// +/// The RAFS filesystem is a readonly filesystem, so does its inodes. The `RafsInode` trait provides +/// readonly accessors for RAFS filesystem inode. The `nydus-image` crate provides its own +/// InodeWrapper to generate RAFS filesystem inodes. +pub trait RafsInode: Any { + /// RAFS: validate format and integrity of the RAFS filesystem inode. + /// + /// Inodes objects may be transmuted from raw buffers or loaded from untrusted source. + /// It must be validated for integrity before accessing any of its data fields . + fn validate(&self, max_inode: Inode, chunk_size: u64) -> Result<()>; + + /// RAFS: allocate blob io vectors to read file data in range [offset, offset + size). + fn alloc_bio_vecs( + &self, + device: &BlobDevice, + offset: u64, + size: usize, + user_io: bool, + ) -> Result>; + + /// RAFS: collect all descendants of the inode for image building. + fn collect_descendants_inodes( + &self, + descendants: &mut Vec>, + ) -> Result; + + /// Posix: generate a `Entry` object required by libc/fuse from the inode. + fn get_entry(&self) -> Entry; + + /// Posix: generate a posix `Attr` object required by libc/fuse from the inode. + fn get_attr(&self) -> Attr; + + /// Posix: get the inode number. + fn ino(&self) -> u64; + + /// Posix: get real device number. + fn rdev(&self) -> u32; + + /// Posix: get project id associated with the inode. + fn projid(&self) -> u32; + + /// Mode: check whether the inode is a block device. + fn is_blkdev(&self) -> bool; + + /// Mode: check whether the inode is a char device. + fn is_chrdev(&self) -> bool; + + /// Mode: check whether the inode is a sock. + fn is_sock(&self) -> bool; + + /// Mode: check whether the inode is a fifo. + fn is_fifo(&self) -> bool; + + /// Mode: check whether the inode is a directory. + fn is_dir(&self) -> bool; + + /// Mode: check whether the inode is a symlink. + fn is_symlink(&self) -> bool; + + /// Mode: check whether the inode is a regular file. + fn is_reg(&self) -> bool; + + /// Mode: check whether the inode is a hardlink. + fn is_hardlink(&self) -> bool; + + /// Xattr: check whether the inode has extended attributes. + fn has_xattr(&self) -> bool; + + /// Xattr: get the value of xattr with key `name`. + fn get_xattr(&self, name: &OsStr) -> Result>; + + /// Xattr: get all xattr keys. + fn get_xattrs(&self) -> Result>; + + /// Symlink: get the symlink target. + fn get_symlink(&self) -> Result; + + /// Symlink: get size of the symlink target path. + fn get_symlink_size(&self) -> u16; + + /// Directory: walk/enumerate child inodes. + fn walk_children_inodes(&self, entry_offset: u64, handler: RafsInodeWalkHandler) -> Result<()>; + + /// Directory: get child inode by name. + fn get_child_by_name(&self, name: &OsStr) -> Result>; + + /// Directory: get child inode by child index, child index starts from 0. + fn get_child_by_index(&self, idx: u32) -> Result>; + + /// Directory: get number of child inodes. + fn get_child_count(&self) -> u32; + + /// Directory: get the inode number corresponding to the first child inode. + fn get_child_index(&self) -> Result; + + /// Regular: get size of file content + fn size(&self) -> u64; + + /// Regular: check whether the inode has no content. + fn is_empty_size(&self) -> bool { + self.size() == 0 + } + + /// Regular: get number of data chunks. + fn get_chunk_count(&self) -> u32; + + fn as_any(&self) -> &dyn Any; +} + +/// Extended inode information for builder and directory walker. +pub trait RafsInodeExt: RafsInode { + /// Convert to the base type `RafsInode`. + fn as_inode(&self) -> &dyn RafsInode; + + /// Posix: get inode number of the parent inode. + fn parent(&self) -> u64; + + /// Posix: get file name. + fn name(&self) -> OsString; + + /// Posix: get file name size. + fn get_name_size(&self) -> u16; + + /// RAFS V5: get RAFS v5 specific inode flags. + fn flags(&self) -> u64; + + /// RAFS v5: get digest value of the inode metadata. + fn get_digest(&self) -> RafsDigest; + + /// RAFS v5: get chunk info object by chunk index, chunk index starts from 0. + fn get_chunk_info(&self, idx: u32) -> Result>; +} + +/// Trait to write out RAFS filesystem meta objects into the metadata blob. +pub trait RafsStore { + /// Write out the Rafs filesystem meta object to the writer. + fn store(&self, w: &mut dyn RafsIoWrite) -> Result; +} + +bitflags! { + /// Rafs filesystem feature flags. + #[derive(Serialize)] + pub struct RafsSuperFlags: u64 { + /// Data chunks are not compressed. + const COMPRESSION_NONE = 0x0000_0001; + /// Data chunks are compressed with lz4_block. + const COMPRESSION_LZ4 = 0x0000_0002; + /// Use blake3 hash algorithm to calculate digest. + const HASH_BLAKE3 = 0x0000_0004; + /// Use sha256 hash algorithm to calculate digest. + const HASH_SHA256 = 0x0000_0008; + /// Inode has explicit uid gid fields. + /// + /// If unset, use nydusd process euid/egid for all inodes at runtime. + const EXPLICIT_UID_GID = 0x0000_0010; + /// Inode may have associated extended attributes. + const HAS_XATTR = 0x0000_0020; + /// Data chunks are compressed with gzip + const COMPRESSION_GZIP = 0x0000_0040; + /// Data chunks are compressed with zstd + const COMPRESSION_ZSTD = 0x0000_0080; + /// Chunk digests are inlined in RAFS v6 data blob. + const INLINED_CHUNK_DIGEST = 0x0000_0100; + /// RAFS works in Tarfs mode, which directly uses tar streams as data blobs. + const TARTFS_MODE = 0x0000_0200; + /// Data chunks are not encrypted. + const ENCRYPTION_NONE = 0x0100_0000; + /// Data chunks are encrypted with AES-128-XTS. + const ENCRYPTION_ASE_128_XTS = 0x0200_0000; + + // Reserved for future compatible changes. + const PRESERVED_COMPAT_5 = 0x0400_0000; + const PRESERVED_COMPAT_4 = 0x0800_0000; + const PRESERVED_COMPAT_3 = 0x1000_0000; + const PRESERVED_COMPAT_2 = 0x2000_0000; + const PRESERVED_COMPAT_1 = 0x4000_0000; + const PRESERVED_COMPAT_0 = 0x8000_0000; + } +} + +impl Default for RafsSuperFlags { + fn default() -> Self { + RafsSuperFlags::empty() + } +} + +impl Display for RafsSuperFlags { + fn fmt(&self, f: &mut Formatter) -> FmtResult { + write!(f, "{:?}", self)?; + Ok(()) + } +} + +impl From for digest::Algorithm { + fn from(flags: RafsSuperFlags) -> Self { + match flags { + x if x.contains(RafsSuperFlags::HASH_BLAKE3) => digest::Algorithm::Blake3, + x if x.contains(RafsSuperFlags::HASH_SHA256) => digest::Algorithm::Sha256, + _ => digest::Algorithm::Blake3, + } + } +} + +impl From for RafsSuperFlags { + fn from(d: digest::Algorithm) -> RafsSuperFlags { + match d { + digest::Algorithm::Blake3 => RafsSuperFlags::HASH_BLAKE3, + digest::Algorithm::Sha256 => RafsSuperFlags::HASH_SHA256, + } + } +} + +impl From for compress::Algorithm { + fn from(flags: RafsSuperFlags) -> Self { + match flags { + x if x.contains(RafsSuperFlags::COMPRESSION_NONE) => compress::Algorithm::None, + x if x.contains(RafsSuperFlags::COMPRESSION_LZ4) => compress::Algorithm::Lz4Block, + x if x.contains(RafsSuperFlags::COMPRESSION_GZIP) => compress::Algorithm::GZip, + x if x.contains(RafsSuperFlags::COMPRESSION_ZSTD) => compress::Algorithm::Zstd, + _ => compress::Algorithm::Lz4Block, + } + } +} + +impl From for RafsSuperFlags { + fn from(c: compress::Algorithm) -> RafsSuperFlags { + match c { + compress::Algorithm::None => RafsSuperFlags::COMPRESSION_NONE, + compress::Algorithm::Lz4Block => RafsSuperFlags::COMPRESSION_LZ4, + compress::Algorithm::GZip => RafsSuperFlags::COMPRESSION_GZIP, + compress::Algorithm::Zstd => RafsSuperFlags::COMPRESSION_ZSTD, + } + } +} + +impl From for crypt::Algorithm { + fn from(flags: RafsSuperFlags) -> Self { + match flags { + // NOTE: only aes-128-xts encryption algorithm supported. + x if x.contains(RafsSuperFlags::ENCRYPTION_ASE_128_XTS) => crypt::Algorithm::Aes128Xts, + _ => crypt::Algorithm::None, + } + } +} + +impl From for RafsSuperFlags { + fn from(c: crypt::Algorithm) -> RafsSuperFlags { + match c { + // NOTE: only aes-128-xts encryption algorithm supported. + crypt::Algorithm::Aes128Xts => RafsSuperFlags::ENCRYPTION_ASE_128_XTS, + _ => RafsSuperFlags::ENCRYPTION_NONE, + } + } +} + +/// Configuration information to check compatibility between RAFS filesystems. +#[derive(Clone, Copy, Debug)] +pub struct RafsSuperConfig { + /// RAFS filesystem version. + pub version: RafsVersion, + /// Compression algorithm. + pub compressor: compress::Algorithm, + /// Digest algorithm. + pub digester: digest::Algorithm, + /// Size of data chunks. + pub chunk_size: u32, + /// Size of batch data chunks. + pub batch_size: u32, + /// Whether `explicit_uidgid` enabled or not. + pub explicit_uidgid: bool, + /// RAFS in TARFS mode. + pub is_tarfs_mode: bool, +} + +#[derive(Error, Debug)] +pub enum MergeError { + #[error("Inconsistent RAFS Filesystem: {0}")] + InconsistentFilesystem(String), + #[error(transparent)] + Other(#[from] anyhow::Error), +} + +impl RafsSuperConfig { + /// Check compatibility for two RAFS filesystems. + pub fn check_compatibility(&self, meta: &RafsSuperMeta) -> anyhow::Result<()> { + ensure!( + self.chunk_size == meta.chunk_size, + MergeError::InconsistentFilesystem(format!( + "Inconsistent configuration of chunk_size: {} vs {}", + self.chunk_size, meta.chunk_size + )) + ); + + ensure!( + self.explicit_uidgid == meta.explicit_uidgid(), + MergeError::InconsistentFilesystem(format!( + "Using inconsistent explicit_uidgid setting {:?}, target explicit_uidgid setting {:?}", + self.explicit_uidgid, + meta.explicit_uidgid() + )) + ); + + let meta_version = RafsVersion::try_from(meta.version); + ensure!( + u32::from(self.version) == meta.version, + MergeError::InconsistentFilesystem(format!( + "Using inconsistent RAFS version {:?}, target RAFS version {:?}", + self.version, meta_version + )) + ); + + ensure!( + self.version != RafsVersion::V5 || self.digester == meta.get_digester(), + MergeError::InconsistentFilesystem(format!( + "RAFS v5 can not support different digest algorithm due to inode digest, {} vs {}", + self.digester, + meta.get_digester() + )) + ); + let is_tarfs_mode = meta.flags.contains(RafsSuperFlags::TARTFS_MODE); + ensure!( + is_tarfs_mode == self.is_tarfs_mode, + MergeError::InconsistentFilesystem("Using inconsistent RAFS TARFS mode".to_string(),) + ); + + Ok(()) + } +} + +/// Rafs filesystem meta-data cached from on disk RAFS super block. +#[derive(Clone, Copy, Debug, Serialize)] +pub struct RafsSuperMeta { + /// Filesystem magic number. + pub magic: u32, + /// Filesystem version number. + pub version: u32, + /// Size of on disk super block. + pub sb_size: u32, + /// Inode number of root inode. + pub root_inode: Inode, + /// Chunk size. + pub chunk_size: u32, + /// Batch chunk size. + pub batch_size: u32, + /// Number of inodes in the filesystem. + pub inodes_count: u64, + /// V5: superblock flags for Rafs v5. + pub flags: RafsSuperFlags, + /// Number of inode entries in inode offset table. + pub inode_table_entries: u32, + /// Offset of the inode offset table into the metadata blob. + pub inode_table_offset: u64, + /// Size of blob information table. + pub blob_table_size: u32, + /// Offset of the blob information table into the metadata blob. + pub blob_table_offset: u64, + /// Size of extended blob information table. + pub extended_blob_table_offset: u64, + /// Offset of the extended blob information table into the metadata blob. + pub extended_blob_table_entries: u32, + /// Number of RAFS v6 blob device entries in the devslot table. + pub blob_device_table_count: u32, + /// Offset of the RAFS v6 devslot table. + pub blob_device_table_offset: u64, + /// Offset of the inode prefetch table into the metadata blob. + pub prefetch_table_offset: u64, + /// Size of the inode prefetch table. + pub prefetch_table_entries: u32, + /// Default attribute timeout value. + pub attr_timeout: Duration, + /// Default inode timeout value. + pub entry_timeout: Duration, + /// Whether the RAFS instance is a chunk dictionary. + pub is_chunk_dict: bool, + /// Metadata block address for RAFS v6. + pub meta_blkaddr: u32, + /// Root nid for RAFS v6. + pub root_nid: u16, + /// Offset of the chunk table for RAFS v6. + pub chunk_table_offset: u64, + /// Size of the chunk table for RAFS v6. + pub chunk_table_size: u64, +} + +impl RafsSuperMeta { + /// Check whether the superblock is for Rafs v5 filesystems. + pub fn is_v5(&self) -> bool { + self.version == RAFS_SUPER_VERSION_V5 + } + + /// Check whether the superblock is for Rafs v6 filesystems. + pub fn is_v6(&self) -> bool { + self.version == RAFS_SUPER_VERSION_V6 + } + + /// Check whether the RAFS instance is a chunk dictionary. + pub fn is_chunk_dict(&self) -> bool { + self.is_chunk_dict + } + + /// Check whether the explicit UID/GID feature has been enable or not. + pub fn explicit_uidgid(&self) -> bool { + self.flags.contains(RafsSuperFlags::EXPLICIT_UID_GID) + } + + /// Check whether the filesystem supports extended attribute or not. + pub fn has_xattr(&self) -> bool { + self.flags.contains(RafsSuperFlags::HAS_XATTR) + } + + /// Check whether data blobs have inlined chunk digest array. + pub fn has_inlined_chunk_digest(&self) -> bool { + self.is_v6() && self.flags.contains(RafsSuperFlags::INLINED_CHUNK_DIGEST) + } + + /// Get compression algorithm to handle chunk data for the filesystem. + pub fn get_compressor(&self) -> compress::Algorithm { + if self.is_v5() || self.is_v6() { + self.flags.into() + } else { + compress::Algorithm::None + } + } + + /// V5: get message digest algorithm to validate chunk data for the filesystem. + pub fn get_digester(&self) -> digest::Algorithm { + if self.is_v5() || self.is_v6() { + self.flags.into() + } else { + digest::Algorithm::Blake3 + } + } + + /// V6: Check whether any data blobs may be encrypted. + pub fn get_cipher(&self) -> crypt::Algorithm { + if self.is_v6() { + self.flags.into() + } else { + crypt::Algorithm::None + } + } + + /// Get `RafsSuperConfig` object to check compatibility. + pub fn get_config(&self) -> RafsSuperConfig { + RafsSuperConfig { + version: self.version.try_into().unwrap_or_default(), + compressor: self.get_compressor(), + digester: self.get_digester(), + chunk_size: self.chunk_size, + batch_size: self.batch_size, + explicit_uidgid: self.explicit_uidgid(), + is_tarfs_mode: self.flags.contains(RafsSuperFlags::TARTFS_MODE), + } + } +} + +impl Default for RafsSuperMeta { + fn default() -> Self { + RafsSuperMeta { + magic: 0, + version: 0, + sb_size: 0, + inodes_count: 0, + root_inode: 0, + chunk_size: 0, + batch_size: 0, + flags: RafsSuperFlags::empty(), + inode_table_entries: 0, + inode_table_offset: 0, + blob_table_size: 0, + blob_table_offset: 0, + extended_blob_table_offset: 0, + extended_blob_table_entries: 0, + blob_device_table_count: 0, + blob_device_table_offset: 0, + prefetch_table_offset: 0, + prefetch_table_entries: 0, + attr_timeout: Duration::from_secs(RAFS_DEFAULT_ATTR_TIMEOUT), + entry_timeout: Duration::from_secs(RAFS_DEFAULT_ENTRY_TIMEOUT), + meta_blkaddr: 0, + root_nid: 0, + is_chunk_dict: false, + chunk_table_offset: 0, + chunk_table_size: 0, + } + } +} + +/// RAFS filesystem versions. +#[derive(Clone, Copy, Debug, Default, PartialEq)] +pub enum RafsVersion { + /// RAFS v5 + #[default] + V5, + /// RAFS v6 + V6, +} + +impl TryFrom for RafsVersion { + type Error = Error; + + fn try_from(version: u32) -> std::result::Result { + if version == RAFS_SUPER_VERSION_V5 { + return Ok(RafsVersion::V5); + } else if version == RAFS_SUPER_VERSION_V6 { + return Ok(RafsVersion::V6); + } + Err(einval!(format!("invalid RAFS version number {}", version))) + } +} + +impl From for u32 { + fn from(v: RafsVersion) -> Self { + match v { + RafsVersion::V5 => RAFS_SUPER_VERSION_V5, + RafsVersion::V6 => RAFS_SUPER_VERSION_V6, + } + } +} + +impl std::fmt::Display for RafsVersion { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + RafsVersion::V5 => write!(f, "5"), + RafsVersion::V6 => write!(f, "6"), + } + } +} + +impl RafsVersion { + /// Check whether it's RAFS v5. + pub fn is_v5(&self) -> bool { + self == &Self::V5 + } + + /// Check whether it's RAFS v6. + pub fn is_v6(&self) -> bool { + self == &Self::V6 + } +} + +/// Rafs metadata working mode. +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub enum RafsMode { + /// Directly mapping and accessing metadata into process by mmap(). + #[default] + Direct, + /// Read metadata into memory before using, for RAFS v5. + Cached, +} + +impl FromStr for RafsMode { + type Err = Error; + + fn from_str(s: &str) -> std::result::Result { + match s { + "direct" => Ok(Self::Direct), + "cached" => Ok(Self::Cached), + _ => Err(einval!("rafs mode should be direct or cached")), + } + } +} + +impl Display for RafsMode { + fn fmt(&self, f: &mut Formatter) -> FmtResult { + match self { + Self::Direct => write!(f, "direct"), + Self::Cached => write!(f, "cached"), + } + } +} + +/// Cached Rafs super block and inode information. +pub struct RafsSuper { + /// Rafs metadata working mode. + pub mode: RafsMode, + /// Whether validate data read from storage backend. + pub validate_digest: bool, + /// Cached metadata from on disk super block. + pub meta: RafsSuperMeta, + /// Rafs filesystem super block. + pub superblock: Arc, +} + +impl Default for RafsSuper { + fn default() -> Self { + Self { + mode: RafsMode::Direct, + validate_digest: false, + meta: RafsSuperMeta::default(), + superblock: Arc::new(NoopSuperBlock::new()), + } + } +} + +impl RafsSuper { + /// Create a new `RafsSuper` instance from a `RafsConfigV2` object. + pub fn new(conf: &RafsConfigV2) -> Result { + Ok(Self { + mode: RafsMode::from_str(conf.mode.as_str())?, + validate_digest: conf.validate, + ..Default::default() + }) + } + + /// Destroy the filesystem super block. + pub fn destroy(&mut self) { + Arc::get_mut(&mut self.superblock) + .expect("Inodes are no longer used.") + .destroy(); + } + + /// Load Rafs super block from a metadata file. + pub fn load_from_file>( + path: P, + config: Arc, + is_chunk_dict: bool, + ) -> Result<(Self, RafsIoReader)> { + let validate_digest = config + .rafs + .as_ref() + .map(|rafs| rafs.validate) + .unwrap_or_default(); + let mut rs = RafsSuper { + mode: RafsMode::Direct, + validate_digest, + ..Default::default() + }; + rs.meta.is_chunk_dict = is_chunk_dict; + + // open bootstrap file + let file = OpenOptions::new() + .read(true) + .write(false) + .open(path.as_ref())?; + let mut reader = Box::new(file) as RafsIoReader; + let mut blob_accessible = config.internal.blob_accessible(); + + if let Err(e) = rs.load(&mut reader) { + let id = BlobInfo::get_blob_id_from_meta_path(path.as_ref())?; + let new_path = match TocEntryList::extract_rafs_meta(&id, config.clone()) { + Ok(v) => v, + Err(_e) => { + debug!("failed to load inlined RAFS meta, {}", _e); + return Err(e); + } + }; + let file = OpenOptions::new().read(true).write(false).open(new_path)?; + reader = Box::new(file) as RafsIoReader; + rs.load(&mut reader)?; + rs.set_blob_id_from_meta_path(path.as_ref())?; + blob_accessible = true; + } else { + // Backward compatibility: try to fix blob id for old converters. + // Old converters extracts bootstraps from data blobs with inlined bootstrap + // use blob digest as the bootstrap file name. The last blob in the blob table from + // the bootstrap has wrong blob id, so we need to fix it. + let blobs = rs.superblock.get_blob_infos(); + for blob in blobs.iter() { + // Fix blob id for new images with old converters. + if blob.has_feature(BlobFeatures::INLINED_FS_META) { + blob.set_blob_id_from_meta_path(path.as_ref())?; + } + } + } + + if !config.is_fs_cache() + && blob_accessible + && (validate_digest || config.is_chunk_validation_enabled()) + && rs.meta.has_inlined_chunk_digest() + { + rs.create_blob_device(config)?; + } + + Ok((rs, reader)) + } + + /// Load RAFS metadata and optionally cache inodes. + pub(crate) fn load(&mut self, r: &mut RafsIoReader) -> Result<()> { + // Try to load the filesystem as Rafs v5 + if self.try_load_v5(r)? { + return Ok(()); + } + + if self.try_load_v6(r)? { + return Ok(()); + } + + Err(Error::new(ErrorKind::Other, "invalid RAFS superblock")) + } + + /// Set meta blob file path from which the `RafsSuper` object is loaded from. + /// + /// It's used to support inlined-meta and ZRan blobs. + pub fn set_blob_id_from_meta_path(&self, meta_path: &Path) -> Result<()> { + let blobs = self.superblock.get_blob_infos(); + for blob in blobs.iter() { + if blob.has_feature(BlobFeatures::INLINED_FS_META) + || !blob.has_feature(BlobFeatures::CAP_TAR_TOC) + { + blob.set_blob_id_from_meta_path(meta_path)?; + } + } + Ok(()) + } + + /// Create a `BlobDevice` object and associated it with the `RafsSuper` object. + /// + /// The `BlobDevice` object is needed to get meta information from RAFS V6 data blobs. + pub fn create_blob_device(&self, config: Arc) -> Result<()> { + let blobs = self.superblock.get_blob_infos(); + let device = BlobDevice::new(&config, &blobs)?; + self.superblock.set_blob_device(device); + Ok(()) + } + + /// Update the filesystem metadata and storage backend. + pub fn update(&self, r: &mut RafsIoReader) -> RafsResult<()> { + if self.meta.is_v5() { + self.skip_v5_superblock(r) + .map_err(RafsError::FillSuperBlock)?; + } + + self.superblock.update(r) + } + + /// Get the maximum inode number supported by the filesystem instance. + pub fn get_max_ino(&self) -> Inode { + self.superblock.get_max_ino() + } + + /// Get the `RafsInode` object corresponding to `ino`. + pub fn get_inode(&self, ino: Inode, validate_inode: bool) -> Result> { + self.superblock.get_inode(ino, validate_inode) + } + + /// Get the `RafsInodeExt` object corresponding to `ino`. + pub fn get_extended_inode( + &self, + ino: Inode, + validate_inode: bool, + ) -> Result> { + self.superblock.get_extended_inode(ino, validate_inode) + } + + /// Convert a file path to an inode number. + pub fn ino_from_path(&self, f: &Path) -> Result { + let root_ino = self.superblock.root_ino(); + if f == Path::new("/") { + return Ok(root_ino); + } else if !f.starts_with("/") { + return Err(einval!()); + } + + let entries = f + .components() + .filter(|comp| *comp != Component::RootDir) + .map(|comp| match comp { + Component::Normal(name) => Some(name), + Component::ParentDir => Some(OsStr::from_bytes(DOTDOT.as_bytes())), + Component::CurDir => Some(OsStr::from_bytes(DOT.as_bytes())), + _ => None, + }) + .collect::>(); + if entries.is_empty() { + warn!("Path can't be parsed {:?}", f); + return Err(enoent!()); + } + + let mut parent = self.get_extended_inode(root_ino, self.validate_digest)?; + for p in entries { + match p { + None => { + error!("Illegal specified path {:?}", f); + return Err(einval!()); + } + Some(name) => { + parent = parent.get_child_by_name(name).map_err(|e| { + warn!("File {:?} not in RAFS filesystem, {}", name, e); + enoent!() + })?; + } + } + } + + Ok(parent.ino()) + } + + /// Prefetch filesystem and file data to improve performance. + /// + /// To improve application filesystem access performance, the filesystem may prefetch file or + /// metadata in advance. There are ways to configure the file list to be prefetched. + /// 1. Static file prefetch list configured during image building, recorded in prefetch list + /// in Rafs v5 file system metadata. + /// Base on prefetch table which is persisted to bootstrap when building image. + /// 2. Dynamic file prefetch list configured by command line. The dynamic file prefetch list + /// has higher priority and the static file prefetch list will be ignored if there's dynamic + /// prefetch list. When a directory is specified for dynamic prefetch list, all sub directory + /// and files under the directory will be prefetched. + /// + /// Each inode passed into should correspond to directory. And it already does the file type + /// check inside. + pub fn prefetch_files( + &self, + device: &BlobDevice, + r: &mut RafsIoReader, + root_ino: Inode, + files: Option>, + fetcher: &dyn Fn(&mut BlobIoVec, bool), + ) -> RafsResult { + // Try to prefetch files according to the list specified by the `--prefetch-files` option. + if let Some(files) = files { + // Avoid prefetching multiple times for hardlinks to the same file. + let mut hardlinks: HashSet = HashSet::new(); + let mut state = BlobIoMerge::default(); + for f_ino in files { + self.prefetch_data(device, f_ino, &mut state, &mut hardlinks, fetcher) + .map_err(|e| RafsError::Prefetch(e.to_string()))?; + } + for (_id, mut desc) in state.drain() { + fetcher(&mut desc, true); + } + // Flush the pending prefetch requests. + Ok(false) + } else if self.meta.is_v5() { + self.prefetch_data_v5(device, r, root_ino, fetcher) + } else if self.meta.is_v6() { + self.prefetch_data_v6(device, r, root_ino, fetcher) + } else { + Err(RafsError::Prefetch( + "Unknown filesystem version, prefetch disabled".to_string(), + )) + } + } + + #[inline] + fn prefetch_inode( + device: &BlobDevice, + inode: &Arc, + state: &mut BlobIoMerge, + hardlinks: &mut HashSet, + fetcher: &dyn Fn(&mut BlobIoVec, bool), + ) -> Result<()> { + // Check for duplicated hardlinks. + if inode.is_hardlink() { + if hardlinks.contains(&inode.ino()) { + return Ok(()); + } else { + hardlinks.insert(inode.ino()); + } + } + + let descs = inode.alloc_bio_vecs(device, 0, inode.size() as usize, false)?; + for desc in descs { + state.append(desc); + if let Some(desc) = state.get_current_element() { + fetcher(desc, false); + } + } + + Ok(()) + } + + fn prefetch_data( + &self, + device: &BlobDevice, + ino: u64, + state: &mut BlobIoMerge, + hardlinks: &mut HashSet, + fetcher: &dyn Fn(&mut BlobIoVec, bool), + ) -> Result<()> { + let inode = self + .superblock + .get_inode(ino, self.validate_digest) + .map_err(|_e| enoent!("Can't find inode"))?; + + if inode.is_dir() { + let mut descendants = Vec::new(); + let _ = inode.collect_descendants_inodes(&mut descendants)?; + for i in descendants.iter() { + Self::prefetch_inode(device, i, state, hardlinks, fetcher)?; + } + } else if !inode.is_empty_size() && inode.is_reg() { + // An empty regular file will also be packed into nydus image, + // then it has a size of zero. + // Moreover, for rafs v5, symlink has size of zero but non-zero size + // for symlink size. For rafs v6, symlink size is also represented by i_size. + // So we have to restrain the condition here. + Self::prefetch_inode(device, &inode, state, hardlinks, fetcher)?; + } + + Ok(()) + } +} + +// For nydus-image +impl RafsSuper { + /// Convert an inode number to a file path. + pub fn path_from_ino(&self, ino: Inode) -> Result { + if ino == self.superblock.root_ino() { + return Ok(self.get_extended_inode(ino, false)?.name().into()); + } + + let mut path = PathBuf::new(); + let mut cur_ino = ino; + let mut inode; + + loop { + inode = self.get_extended_inode(cur_ino, false)?; + let e: PathBuf = inode.name().into(); + path = e.join(path); + + if inode.ino() == self.superblock.root_ino() { + break; + } else { + cur_ino = inode.parent(); + } + } + + Ok(path) + } + + /// Get prefetched inos + pub fn get_prefetched_inos(&self, bootstrap: &mut RafsIoReader) -> Result> { + if self.meta.is_v5() { + let mut pt = RafsV5PrefetchTable::new(); + pt.load_prefetch_table_from( + bootstrap, + self.meta.prefetch_table_offset, + self.meta.prefetch_table_entries as usize, + )?; + Ok(pt.inodes) + } else { + let mut pt = RafsV6PrefetchTable::new(); + pt.load_prefetch_table_from( + bootstrap, + self.meta.prefetch_table_offset, + self.meta.prefetch_table_entries as usize, + )?; + Ok(pt.inodes) + } + } + + /// Walk through the file tree rooted at ino, calling cb for each file or directory + /// in the tree by DFS order, including ino, please ensure ino is a directory. + pub fn walk_directory>( + &self, + ino: Inode, + parent: Option

, + cb: &mut dyn FnMut(ArcRafsInodeExt, &Path) -> anyhow::Result<()>, + ) -> anyhow::Result<()> { + let inode = self.get_extended_inode(ino, false)?; + if !inode.is_dir() { + bail!("inode {} is not a directory", ino); + } + self.do_walk_directory(inode, parent, cb) + } + + #[allow(clippy::only_used_in_recursion)] + fn do_walk_directory>( + &self, + inode: Arc, + parent: Option

, + cb: &mut dyn FnMut(ArcRafsInodeExt, &Path) -> anyhow::Result<()>, + ) -> anyhow::Result<()> { + let path = if let Some(parent) = parent { + parent.as_ref().join(inode.name()) + } else { + PathBuf::from("/") + }; + cb(inode.clone(), &path)?; + if inode.is_dir() { + for idx in 0..inode.get_child_count() { + let child = inode.get_child_by_index(idx)?; + self.do_walk_directory(child, Some(&path), cb)?; + } + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_rafs_mode() { + assert!(RafsMode::from_str("").is_err()); + assert!(RafsMode::from_str("directed").is_err()); + assert!(RafsMode::from_str("Direct").is_err()); + assert!(RafsMode::from_str("Cached").is_err()); + assert_eq!(RafsMode::from_str("direct").unwrap(), RafsMode::Direct); + assert_eq!(RafsMode::from_str("cached").unwrap(), RafsMode::Cached); + assert_eq!(&format!("{}", RafsMode::Direct), "direct"); + assert_eq!(&format!("{}", RafsMode::Cached), "cached"); + } + + #[test] + fn test_rafs_compressor() { + assert_eq!( + compress::Algorithm::from(RafsSuperFlags::COMPRESSION_NONE), + compress::Algorithm::None + ); + assert_eq!( + compress::Algorithm::from(RafsSuperFlags::COMPRESSION_GZIP), + compress::Algorithm::GZip + ); + assert_eq!( + compress::Algorithm::from(RafsSuperFlags::COMPRESSION_LZ4), + compress::Algorithm::Lz4Block + ); + assert_eq!( + compress::Algorithm::from(RafsSuperFlags::COMPRESSION_ZSTD), + compress::Algorithm::Zstd + ); + assert_eq!( + compress::Algorithm::from( + RafsSuperFlags::COMPRESSION_ZSTD | RafsSuperFlags::COMPRESSION_LZ4, + ), + compress::Algorithm::Lz4Block + ); + assert_eq!( + compress::Algorithm::from(RafsSuperFlags::empty()), + compress::Algorithm::Lz4Block + ); + } + + #[test] + fn test_rafs_digestor() { + assert_eq!( + digest::Algorithm::from(RafsSuperFlags::HASH_BLAKE3), + digest::Algorithm::Blake3 + ); + assert_eq!( + digest::Algorithm::from(RafsSuperFlags::HASH_SHA256), + digest::Algorithm::Sha256 + ); + assert_eq!( + digest::Algorithm::from(RafsSuperFlags::HASH_SHA256 | RafsSuperFlags::HASH_BLAKE3,), + digest::Algorithm::Blake3 + ); + assert_eq!( + digest::Algorithm::from(RafsSuperFlags::empty()), + digest::Algorithm::Blake3 + ); + } + + #[test] + fn test_rafs_crypt_from() { + assert_eq!( + crypt::Algorithm::from(RafsSuperFlags::ENCRYPTION_ASE_128_XTS), + crypt::Algorithm::Aes128Xts + ); + assert_eq!( + crypt::Algorithm::from(RafsSuperFlags::empty()), + crypt::Algorithm::None + ); + } + + #[test] + fn test_rafs_super_meta() { + let mut meta = RafsSuperMeta::default(); + assert!(!meta.has_xattr()); + assert!(!meta.has_inlined_chunk_digest()); + assert_eq!(meta.get_compressor(), compress::Algorithm::None); + assert_eq!(meta.get_digester(), digest::Algorithm::Blake3); + assert_eq!(meta.get_cipher(), crypt::Algorithm::None); + + meta.version = RAFS_SUPER_VERSION_V6; + meta.flags |= RafsSuperFlags::INLINED_CHUNK_DIGEST; + meta.flags |= RafsSuperFlags::HASH_SHA256; + meta.flags |= RafsSuperFlags::COMPRESSION_GZIP; + meta.flags |= RafsSuperFlags::ENCRYPTION_ASE_128_XTS; + + assert!(meta.has_inlined_chunk_digest()); + assert_eq!(meta.get_compressor(), compress::Algorithm::GZip); + assert_eq!(meta.get_digester(), digest::Algorithm::Sha256); + assert_eq!(meta.get_cipher(), crypt::Algorithm::Aes128Xts); + + meta.version = RAFS_SUPER_VERSION_V5; + assert_eq!(meta.get_compressor(), compress::Algorithm::GZip); + assert_eq!(meta.get_digester(), digest::Algorithm::Sha256); + assert_eq!(meta.get_cipher(), crypt::Algorithm::None); + + let cfg = meta.get_config(); + assert!(cfg.check_compatibility(&meta).is_ok()); + } + + #[test] + fn test_rafs_super_new() { + let cfg = RafsConfigV2 { + mode: "direct".into(), + ..RafsConfigV2::default() + }; + let mut rs = RafsSuper::new(&cfg).unwrap(); + rs.destroy(); + } + + fn get_meta( + chunk_size: u32, + explice_uidgid: bool, + tartfs_mode: bool, + hash: RafsSuperFlags, + comp: RafsSuperFlags, + crypt: RafsSuperFlags, + version: u32, + ) -> RafsSuperMeta { + let mut meta = RafsSuperMeta { + chunk_size, + ..Default::default() + }; + if explice_uidgid { + meta.flags |= RafsSuperFlags::EXPLICIT_UID_GID; + } + if tartfs_mode { + meta.flags |= RafsSuperFlags::TARTFS_MODE; + } + meta.flags |= hash; + meta.flags |= comp; + meta.flags |= crypt; + meta.version = version; + meta + } + + #[test] + fn test_rafs_super_config_check_compatibility_fail() { + let meta1 = get_meta( + 1024 as u32, + true, + true, + RafsSuperFlags::HASH_BLAKE3, + RafsSuperFlags::COMPRESSION_GZIP, + RafsSuperFlags::ENCRYPTION_ASE_128_XTS, + RAFS_SUPER_VERSION_V5, + ); + let meta2 = get_meta( + 2048 as u32, + true, + true, + RafsSuperFlags::HASH_BLAKE3, + RafsSuperFlags::COMPRESSION_GZIP, + RafsSuperFlags::ENCRYPTION_ASE_128_XTS, + RAFS_SUPER_VERSION_V5, + ); + let meta3 = get_meta( + 1024 as u32, + false, + true, + RafsSuperFlags::HASH_BLAKE3, + RafsSuperFlags::COMPRESSION_GZIP, + RafsSuperFlags::ENCRYPTION_ASE_128_XTS, + RAFS_SUPER_VERSION_V5, + ); + let meta4 = get_meta( + 1024 as u32, + true, + false, + RafsSuperFlags::HASH_BLAKE3, + RafsSuperFlags::COMPRESSION_GZIP, + RafsSuperFlags::ENCRYPTION_ASE_128_XTS, + RAFS_SUPER_VERSION_V5, + ); + let meta5 = get_meta( + 1024 as u32, + true, + true, + RafsSuperFlags::HASH_SHA256, + RafsSuperFlags::COMPRESSION_GZIP, + RafsSuperFlags::ENCRYPTION_ASE_128_XTS, + RAFS_SUPER_VERSION_V5, + ); + let meta6 = get_meta( + 1024 as u32, + true, + true, + RafsSuperFlags::HASH_BLAKE3, + RafsSuperFlags::COMPRESSION_GZIP, + RafsSuperFlags::ENCRYPTION_NONE, + RAFS_SUPER_VERSION_V6, + ); + + assert!(meta1.get_config().check_compatibility(&meta2).is_err()); + assert!(meta1.get_config().check_compatibility(&meta3).is_err()); + assert!(meta1.get_config().check_compatibility(&meta4).is_err()); + assert!(meta1.get_config().check_compatibility(&meta5).is_err()); + assert!(meta1.get_config().check_compatibility(&meta6).is_err()); + } +} diff --git a/rafs/src/metadata/noop.rs b/rafs/src/metadata/noop.rs index 2c16a289b2a..4e2856fea1b 100644 --- a/rafs/src/metadata/noop.rs +++ b/rafs/src/metadata/noop.rs @@ -1,121 +1,121 @@ -// Copyright (C) 2020 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! A noop meta data driver for place-holding. - -use std::io::Result; -use std::sync::Arc; - -use storage::device::{BlobChunkInfo, BlobDevice, BlobInfo}; - -use crate::metadata::{Inode, RafsInode, RafsSuperBlock, RafsSuperInodes}; -use crate::{RafsInodeExt, RafsIoReader, RafsResult}; - -#[derive(Default)] -pub struct NoopSuperBlock {} - -impl NoopSuperBlock { - pub fn new() -> Self { - Self::default() - } -} - -impl RafsSuperInodes for NoopSuperBlock { - fn get_max_ino(&self) -> Inode { - unimplemented!() - } - - fn get_inode(&self, _ino: Inode, _digest_validate: bool) -> Result> { - unimplemented!() - } - - fn get_extended_inode( - &self, - _ino: Inode, - _validate_digest: bool, - ) -> Result> { - unimplemented!() - } -} - -impl RafsSuperBlock for NoopSuperBlock { - fn load(&mut self, _r: &mut RafsIoReader) -> Result<()> { - unimplemented!() - } - - fn update(&self, _r: &mut RafsIoReader) -> RafsResult<()> { - unimplemented!() - } - - fn destroy(&mut self) {} - - fn get_blob_infos(&self) -> Vec> { - Vec::new() - } - - fn root_ino(&self) -> u64 { - unimplemented!() - } - - fn get_chunk_info(&self, _idx: usize) -> Result> { - unimplemented!("used by RAFS v6 only") - } - - fn set_blob_device(&self, _blob_device: BlobDevice) { - unimplemented!("used by RAFS v6 only") - } -} - -#[cfg(test)] -mod tests { - use super::*; - #[test] - #[should_panic] - fn test_get_max_ino() { - let blk = NoopSuperBlock::new(); - blk.get_max_ino(); - } - - #[test] - #[should_panic] - fn test_get_inode() { - let blk = NoopSuperBlock::new(); - blk.get_inode(Inode::default(), false).unwrap(); - } - - #[test] - #[should_panic] - fn test_get_extended_inode() { - let blk = NoopSuperBlock::new(); - blk.get_extended_inode(Inode::default(), false).unwrap(); - } - - #[test] - #[should_panic] - fn test_root_ino() { - let blk = NoopSuperBlock::new(); - blk.root_ino(); - } - - #[test] - #[should_panic] - fn test_get_chunk_info() { - let blk = NoopSuperBlock::new(); - blk.get_chunk_info(0).unwrap(); - } - - #[test] - #[should_panic] - fn test_set_blob_device() { - let blk = NoopSuperBlock::new(); - blk.set_blob_device(BlobDevice::default()); - } - - #[test] - fn test_noop_super_block() { - let mut blk = NoopSuperBlock::new(); - assert!(blk.get_blob_infos().is_empty()); - blk.destroy(); - } -} +// Copyright (C) 2020 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! A noop meta data driver for place-holding. + +use std::io::Result; +use std::sync::Arc; + +use storage::device::{BlobChunkInfo, BlobDevice, BlobInfo}; + +use crate::metadata::{Inode, RafsInode, RafsSuperBlock, RafsSuperInodes}; +use crate::{RafsInodeExt, RafsIoReader, RafsResult}; + +#[derive(Default)] +pub struct NoopSuperBlock {} + +impl NoopSuperBlock { + pub fn new() -> Self { + Self::default() + } +} + +impl RafsSuperInodes for NoopSuperBlock { + fn get_max_ino(&self) -> Inode { + unimplemented!() + } + + fn get_inode(&self, _ino: Inode, _digest_validate: bool) -> Result> { + unimplemented!() + } + + fn get_extended_inode( + &self, + _ino: Inode, + _validate_digest: bool, + ) -> Result> { + unimplemented!() + } +} + +impl RafsSuperBlock for NoopSuperBlock { + fn load(&mut self, _r: &mut RafsIoReader) -> Result<()> { + unimplemented!() + } + + fn update(&self, _r: &mut RafsIoReader) -> RafsResult<()> { + unimplemented!() + } + + fn destroy(&mut self) {} + + fn get_blob_infos(&self) -> Vec> { + Vec::new() + } + + fn root_ino(&self) -> u64 { + unimplemented!() + } + + fn get_chunk_info(&self, _idx: usize) -> Result> { + unimplemented!("used by RAFS v6 only") + } + + fn set_blob_device(&self, _blob_device: BlobDevice) { + unimplemented!("used by RAFS v6 only") + } +} + +#[cfg(test)] +mod tests { + use super::*; + #[test] + #[should_panic] + fn test_get_max_ino() { + let blk = NoopSuperBlock::new(); + blk.get_max_ino(); + } + + #[test] + #[should_panic] + fn test_get_inode() { + let blk = NoopSuperBlock::new(); + blk.get_inode(Inode::default(), false).unwrap(); + } + + #[test] + #[should_panic] + fn test_get_extended_inode() { + let blk = NoopSuperBlock::new(); + blk.get_extended_inode(Inode::default(), false).unwrap(); + } + + #[test] + #[should_panic] + fn test_root_ino() { + let blk = NoopSuperBlock::new(); + blk.root_ino(); + } + + #[test] + #[should_panic] + fn test_get_chunk_info() { + let blk = NoopSuperBlock::new(); + blk.get_chunk_info(0).unwrap(); + } + + #[test] + #[should_panic] + fn test_set_blob_device() { + let blk = NoopSuperBlock::new(); + blk.set_blob_device(BlobDevice::default()); + } + + #[test] + fn test_noop_super_block() { + let mut blk = NoopSuperBlock::new(); + assert!(blk.get_blob_infos().is_empty()); + blk.destroy(); + } +} diff --git a/rafs/src/mock/mock_chunk.rs b/rafs/src/mock/mock_chunk.rs index a058918e9e1..a2e529ab961 100644 --- a/rafs/src/mock/mock_chunk.rs +++ b/rafs/src/mock/mock_chunk.rs @@ -1,140 +1,140 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2020 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::any::Any; -use std::sync::Arc; - -use nydus_utils::digest::RafsDigest; -use storage::device::v5::BlobV5ChunkInfo; -use storage::device::{BlobChunkFlags, BlobChunkInfo}; - -/// Cached information about an Rafs Data Chunk. -#[derive(Clone, Default, Debug)] -pub struct MockChunkInfo { - // block hash - c_block_id: Arc, - // blob containing the block - c_blob_index: u32, - // chunk index in blob - c_index: u32, - // position of the block within the file - c_file_offset: u64, - // offset of the block within the blob - c_compress_offset: u64, - c_decompress_offset: u64, - // size of the block, compressed - c_compr_size: u32, - c_decompress_size: u32, - c_flags: BlobChunkFlags, -} - -impl MockChunkInfo { - pub fn mock( - file_offset: u64, - compress_offset: u64, - compress_size: u32, - decompress_offset: u64, - decompress_size: u32, - ) -> Self { - MockChunkInfo { - c_file_offset: file_offset, - c_compress_offset: compress_offset, - c_compr_size: compress_size, - c_decompress_offset: decompress_offset, - c_decompress_size: decompress_size, - ..Default::default() - } - } -} - -impl BlobChunkInfo for MockChunkInfo { - fn chunk_id(&self) -> &RafsDigest { - &self.c_block_id - } - - fn id(&self) -> u32 { - self.c_index - } - - fn is_batch(&self) -> bool { - false - } - - fn is_compressed(&self) -> bool { - self.c_flags.contains(BlobChunkFlags::COMPRESSED) - } - - fn is_encrypted(&self) -> bool { - false - } - - fn as_any(&self) -> &dyn Any { - self - } - - impl_getter!(blob_index, c_blob_index, u32); - impl_getter!(compressed_offset, c_compress_offset, u64); - impl_getter!(compressed_size, c_compr_size, u32); - impl_getter!(uncompressed_offset, c_decompress_offset, u64); - impl_getter!(uncompressed_size, c_decompress_size, u32); -} - -impl BlobV5ChunkInfo for MockChunkInfo { - fn index(&self) -> u32 { - self.c_index - } - - fn file_offset(&self) -> u64 { - self.c_file_offset - } - - fn flags(&self) -> BlobChunkFlags { - self.c_flags - } - - fn as_base(&self) -> &dyn BlobChunkInfo { - self - } -} - -#[cfg(test)] -mod tests { - use std::sync::Arc; - - use nydus_utils::digest::{Algorithm, RafsDigest}; - use storage::device::{v5::BlobV5ChunkInfo, BlobChunkFlags, BlobChunkInfo}; - - use super::MockChunkInfo; - - #[test] - fn test_mock_chunk_info() { - let mut info = MockChunkInfo::mock(0, 1024, 512, 2048, 512); - let digest = RafsDigest::from_buf("foobar".as_bytes(), Algorithm::Blake3); - info.c_block_id = Arc::new(digest); - info.c_blob_index = 1; - info.c_flags = BlobChunkFlags::COMPRESSED; - info.c_index = 2; - - let base = info.as_base(); - let any = info.as_any(); - let rev = any.downcast_ref::().unwrap(); - - assert_eq!(info.chunk_id().data, digest.data); - assert_eq!(info.id(), 2); - assert_eq!(base.id(), rev.id()); - assert!(info.is_compressed()); - assert!(!info.is_encrypted()); - assert_eq!(info.blob_index(), 1); - assert_eq!(info.flags(), BlobChunkFlags::COMPRESSED); - assert_eq!(info.compressed_offset(), 1024); - assert_eq!(info.compressed_size(), 512); - assert_eq!(info.compressed_end(), 1024 + 512); - - assert_eq!(info.uncompressed_offset(), 2048); - assert_eq!(info.uncompressed_size(), 512); - assert_eq!(info.uncompressed_end(), 2048 + 512); - assert_eq!(info.file_offset(), 0); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2020 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::any::Any; +use std::sync::Arc; + +use nydus_utils::digest::RafsDigest; +use storage::device::v5::BlobV5ChunkInfo; +use storage::device::{BlobChunkFlags, BlobChunkInfo}; + +/// Cached information about an Rafs Data Chunk. +#[derive(Clone, Default, Debug)] +pub struct MockChunkInfo { + // block hash + c_block_id: Arc, + // blob containing the block + c_blob_index: u32, + // chunk index in blob + c_index: u32, + // position of the block within the file + c_file_offset: u64, + // offset of the block within the blob + c_compress_offset: u64, + c_decompress_offset: u64, + // size of the block, compressed + c_compr_size: u32, + c_decompress_size: u32, + c_flags: BlobChunkFlags, +} + +impl MockChunkInfo { + pub fn mock( + file_offset: u64, + compress_offset: u64, + compress_size: u32, + decompress_offset: u64, + decompress_size: u32, + ) -> Self { + MockChunkInfo { + c_file_offset: file_offset, + c_compress_offset: compress_offset, + c_compr_size: compress_size, + c_decompress_offset: decompress_offset, + c_decompress_size: decompress_size, + ..Default::default() + } + } +} + +impl BlobChunkInfo for MockChunkInfo { + fn chunk_id(&self) -> &RafsDigest { + &self.c_block_id + } + + fn id(&self) -> u32 { + self.c_index + } + + fn is_batch(&self) -> bool { + false + } + + fn is_compressed(&self) -> bool { + self.c_flags.contains(BlobChunkFlags::COMPRESSED) + } + + fn is_encrypted(&self) -> bool { + false + } + + fn as_any(&self) -> &dyn Any { + self + } + + impl_getter!(blob_index, c_blob_index, u32); + impl_getter!(compressed_offset, c_compress_offset, u64); + impl_getter!(compressed_size, c_compr_size, u32); + impl_getter!(uncompressed_offset, c_decompress_offset, u64); + impl_getter!(uncompressed_size, c_decompress_size, u32); +} + +impl BlobV5ChunkInfo for MockChunkInfo { + fn index(&self) -> u32 { + self.c_index + } + + fn file_offset(&self) -> u64 { + self.c_file_offset + } + + fn flags(&self) -> BlobChunkFlags { + self.c_flags + } + + fn as_base(&self) -> &dyn BlobChunkInfo { + self + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use nydus_utils::digest::{Algorithm, RafsDigest}; + use storage::device::{v5::BlobV5ChunkInfo, BlobChunkFlags, BlobChunkInfo}; + + use super::MockChunkInfo; + + #[test] + fn test_mock_chunk_info() { + let mut info = MockChunkInfo::mock(0, 1024, 512, 2048, 512); + let digest = RafsDigest::from_buf("foobar".as_bytes(), Algorithm::Blake3); + info.c_block_id = Arc::new(digest); + info.c_blob_index = 1; + info.c_flags = BlobChunkFlags::COMPRESSED; + info.c_index = 2; + + let base = info.as_base(); + let any = info.as_any(); + let rev = any.downcast_ref::().unwrap(); + + assert_eq!(info.chunk_id().data, digest.data); + assert_eq!(info.id(), 2); + assert_eq!(base.id(), rev.id()); + assert!(info.is_compressed()); + assert!(!info.is_encrypted()); + assert_eq!(info.blob_index(), 1); + assert_eq!(info.flags(), BlobChunkFlags::COMPRESSED); + assert_eq!(info.compressed_offset(), 1024); + assert_eq!(info.compressed_size(), 512); + assert_eq!(info.compressed_end(), 1024 + 512); + + assert_eq!(info.uncompressed_offset(), 2048); + assert_eq!(info.uncompressed_size(), 512); + assert_eq!(info.uncompressed_end(), 2048 + 512); + assert_eq!(info.file_offset(), 0); + } +} diff --git a/rafs/src/mock/mock_inode.rs b/rafs/src/mock/mock_inode.rs index 920abebed05..04bdd58741b 100644 --- a/rafs/src/mock/mock_inode.rs +++ b/rafs/src/mock/mock_inode.rs @@ -1,404 +1,404 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2020 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::any::Any; -use std::collections::HashMap; -use std::ffi::{OsStr, OsString}; -use std::io::Result; -use std::os::unix::ffi::OsStrExt; -use std::sync::Arc; - -use fuse_backend_rs::abi::fuse_abi; -use fuse_backend_rs::api::filesystem::Entry; -use nydus_storage::device::v5::BlobV5ChunkInfo; -use nydus_storage::device::{BlobChunkInfo, BlobDevice, BlobInfo, BlobIoVec}; -use nydus_utils::{digest::RafsDigest, ByteSize}; - -use super::mock_chunk::MockChunkInfo; -use super::mock_super::CHUNK_SIZE; -use crate::metadata::inode::RafsInodeFlags; -use crate::metadata::layout::v5::{ - rafsv5_alloc_bio_vecs, RafsV5BlobTable, RafsV5InodeChunkOps, RafsV5InodeOps, -}; -use crate::metadata::{ - layout::{XattrName, XattrValue}, - Inode, RafsInode, RafsInodeWalkHandler, RafsSuperMeta, RAFS_ATTR_BLOCK_SIZE, -}; -use crate::RafsInodeExt; - -#[derive(Default, Clone, Debug)] -#[allow(unused)] -pub struct MockInode { - i_ino: Inode, - i_name: OsString, - i_digest: RafsDigest, - i_parent: u64, - i_mode: u32, - i_projid: u32, - i_uid: u32, - i_gid: u32, - i_flags: RafsInodeFlags, - i_size: u64, - i_blocks: u64, - i_nlink: u32, - i_child_idx: u32, - i_child_cnt: u32, - // extra info need cache - i_blksize: u32, - i_rdev: u32, - i_mtime_nsec: u32, - i_mtime: u64, - i_target: OsString, // for symbol link - i_xattr: HashMap>, - i_data: Vec>, - i_child: Vec>, - i_blob_table: Arc, - i_meta: Arc, -} - -impl MockInode { - pub fn mock(ino: Inode, size: u64, chunks: Vec>) -> Self { - Self { - i_ino: ino, - i_size: size, - i_child_cnt: chunks.len() as u32, - i_data: chunks, - // Ignore other bits for now. - i_mode: libc::S_IFREG as u32, - // It can't be changed yet. - i_blksize: CHUNK_SIZE, - ..Default::default() - } - } -} - -impl RafsInode for MockInode { - fn validate(&self, _max_inode: Inode, _chunk_size: u64) -> Result<()> { - if self.is_symlink() && self.i_target.is_empty() { - return Err(einval!("invalid inode")); - } - Ok(()) - } - - #[inline] - fn get_entry(&self) -> Entry { - Entry { - attr: self.get_attr().into(), - inode: self.i_ino, - generation: 0, - attr_flags: 0, - attr_timeout: self.i_meta.attr_timeout, - entry_timeout: self.i_meta.entry_timeout, - } - } - - #[inline] - fn get_attr(&self) -> fuse_abi::Attr { - fuse_abi::Attr { - ino: self.i_ino, - size: self.i_size, - blocks: self.i_blocks, - mode: self.i_mode, - nlink: self.i_nlink as u32, - blksize: RAFS_ATTR_BLOCK_SIZE, - rdev: self.i_rdev, - ..Default::default() - } - } - - fn walk_children_inodes( - &self, - _entry_offset: u64, - _handler: RafsInodeWalkHandler, - ) -> Result<()> { - todo!() - } - - fn get_symlink(&self) -> Result { - if !self.is_symlink() { - Err(einval!("inode is not a symlink")) - } else { - Ok(self.i_target.clone()) - } - } - - fn get_symlink_size(&self) -> u16 { - if self.is_symlink() { - self.i_target.byte_size() as u16 - } else { - 0 - } - } - - fn get_child_by_name(&self, name: &OsStr) -> Result> { - let idx = self - .i_child - .binary_search_by(|c| c.i_name.as_os_str().cmp(name)) - .map_err(|_| enoent!())?; - Ok(self.i_child[idx].clone()) - } - - #[inline] - fn get_child_by_index(&self, index: u32) -> Result> { - Ok(self.i_child[index as usize].clone()) - } - - #[inline] - fn get_child_count(&self) -> u32 { - self.i_child_cnt - } - - fn get_child_index(&self) -> Result { - Ok(self.i_child_idx) - } - - fn get_chunk_count(&self) -> u32 { - self.get_child_count() - } - - fn has_xattr(&self) -> bool { - self.i_flags.contains(RafsInodeFlags::XATTR) - } - - #[inline] - fn get_xattr(&self, name: &OsStr) -> Result> { - Ok(self.i_xattr.get(name).cloned()) - } - - fn get_xattrs(&self) -> Result> { - Ok(self - .i_xattr - .keys() - .map(|k| k.as_bytes().to_vec()) - .collect::>()) - } - - #[inline] - fn is_blkdev(&self) -> bool { - self.i_mode & libc::S_IFMT as u32 == libc::S_IFBLK as u32 - } - - #[inline] - fn is_chrdev(&self) -> bool { - self.i_mode & libc::S_IFMT as u32 == libc::S_IFCHR as u32 - } - - #[inline] - fn is_sock(&self) -> bool { - self.i_mode & libc::S_IFMT as u32 == libc::S_IFSOCK as u32 - } - - #[inline] - fn is_fifo(&self) -> bool { - self.i_mode & libc::S_IFMT as u32 == libc::S_IFIFO as u32 - } - - fn is_dir(&self) -> bool { - self.i_mode & libc::S_IFMT as u32 == libc::S_IFDIR as u32 - } - - fn is_symlink(&self) -> bool { - self.i_mode & libc::S_IFMT as u32 == libc::S_IFLNK as u32 - } - - fn is_reg(&self) -> bool { - self.i_mode & libc::S_IFMT as u32 == libc::S_IFREG as u32 - } - - fn is_hardlink(&self) -> bool { - !self.is_dir() && self.i_nlink > 1 - } - - fn collect_descendants_inodes( - &self, - descendants: &mut Vec>, - ) -> Result { - if !self.is_dir() { - return Err(enotdir!()); - } - - let mut child_dirs: Vec> = Vec::new(); - - for child_inode in &self.i_child { - if child_inode.is_dir() { - trace!("Got dir {:?}", child_inode.name()); - child_dirs.push(child_inode.clone()); - } else { - if child_inode.is_empty_size() { - continue; - } - descendants.push(child_inode.clone()); - } - } - - for d in child_dirs { - d.collect_descendants_inodes(descendants)?; - } - - Ok(0) - } - - fn alloc_bio_vecs( - &self, - _device: &BlobDevice, - offset: u64, - size: usize, - user_io: bool, - ) -> Result> { - rafsv5_alloc_bio_vecs(self, offset, size, user_io) - } - - fn as_any(&self) -> &dyn Any { - self - } - - impl_getter!(ino, i_ino, u64); - impl_getter!(size, i_size, u64); - impl_getter!(rdev, i_rdev, u32); - impl_getter!(projid, i_projid, u32); -} - -impl RafsInodeExt for MockInode { - fn name(&self) -> OsString { - self.i_name.clone() - } - - fn flags(&self) -> u64 { - self.i_flags.bits() - } - - fn get_digest(&self) -> RafsDigest { - self.i_digest - } - - fn get_name_size(&self) -> u16 { - self.i_name.byte_size() as u16 - } - - #[inline] - fn get_chunk_info(&self, idx: u32) -> Result> { - Ok(self.i_data[idx as usize].clone()) - } - - fn as_inode(&self) -> &dyn RafsInode { - self - } - - impl_getter!(parent, i_parent, u64); -} - -impl RafsV5InodeChunkOps for MockInode { - fn get_chunk_info_v5(&self, idx: u32) -> Result> { - Ok(self.i_data[idx as usize].clone()) - } -} - -impl RafsV5InodeOps for MockInode { - fn get_blob_by_index(&self, _idx: u32) -> Result> { - Ok(Arc::new(BlobInfo::default())) - } - - fn get_chunk_size(&self) -> u32 { - CHUNK_SIZE - } - - fn has_hole(&self) -> bool { - false - } -} - -#[cfg(test)] -mod tests { - use nydus_utils::digest::Algorithm; - - use crate::metadata::layout::RAFS_V5_ROOT_INODE; - - use super::*; - - #[test] - fn test_mock_node() { - let size = 20; - let mut chunks = Vec::>::new(); - let digest = RafsDigest::from_buf("foobar".as_bytes(), Algorithm::Blake3); - let info = MockChunkInfo::mock(0, 1024, 1024, 2048, 1024); - - chunks.push(Arc::new(info.clone())); - chunks.push(Arc::new(info)); - - let mut node = MockInode::mock(13, size, chunks); - node.i_flags = RafsInodeFlags::XATTR; - node.i_mode = libc::S_IFDIR as u32; - node.i_name = "foo".into(); - node.i_digest = digest; - node.i_parent = RAFS_V5_ROOT_INODE; - let mut child_node1 = MockInode::mock(14, size, Vec::>::new()); - child_node1.i_name = OsStr::new("child1").into(); - child_node1.i_size = 10; - let mut child_node2 = MockInode::mock(15, size, Vec::>::new()); - child_node2.i_name = OsStr::new("child2").into(); - child_node1.i_size = 20; - - node.i_child.push(Arc::new(child_node1)); - node.i_child.push(Arc::new(child_node2)); - node.i_child_cnt = 2; - node.i_child_idx = 2; - - node.i_xattr.insert("attr1".into(), "bar1".into()); - node.i_xattr.insert("attr2".into(), "bar2".into()); - node.i_xattr.insert("attr3".into(), "bar3".into()); - - node.i_data.push(Arc::new(MockChunkInfo::default())); - - assert!(node.validate(0, 0).is_ok()); - assert_eq!(node.ino(), 13); - assert_eq!(node.size(), 20); - assert_eq!(node.rdev(), 0); - assert_eq!(node.projid(), 0); - assert_eq!(node.name(), "foo"); - assert_eq!(node.flags(), RafsInodeFlags::XATTR.bits()); - assert_eq!(node.get_digest(), digest); - assert_eq!(node.get_name_size(), "foo".len() as u16); - assert!(node.get_chunk_info(0).is_ok()); - assert!(node.get_chunk_info_v5(0).is_ok()); - assert_eq!(node.parent(), RAFS_V5_ROOT_INODE); - assert!(node.get_blob_by_index(0).is_ok()); - assert_eq!(node.get_chunk_size(), CHUNK_SIZE); - assert!(!node.has_hole()); - - let ent = node.get_entry(); - assert_eq!(ent.inode, node.ino()); - assert_eq!(ent.attr_timeout, node.i_meta.attr_timeout); - assert_eq!(ent.entry_timeout, node.i_meta.entry_timeout); - assert_eq!(ent.attr, node.get_attr().into()); - - assert!(node.get_symlink().is_err()); - assert_eq!(node.get_symlink_size(), 0 as u16); - - assert!(node.get_child_by_name(OsStr::new("child1")).is_ok()); - assert!(node.get_child_by_index(0).is_ok()); - assert!(node.get_child_by_index(1).is_ok()); - assert_eq!(node.get_child_count(), 2 as u32); - assert_eq!(node.get_child_index().unwrap(), 2 as u32); - assert_eq!(node.get_chunk_count(), 2 as u32); - assert!(node.has_xattr()); - assert_eq!( - node.get_xattr(OsStr::new("attr2")).unwrap().unwrap(), - "bar2".as_bytes() - ); - assert_eq!(node.get_xattrs().unwrap().len(), 3); - - assert!(!node.is_blkdev()); - assert!(!node.is_chrdev()); - assert!(!node.is_sock()); - assert!(!node.is_fifo()); - assert!(node.is_dir()); - assert!(!node.is_symlink()); - assert!(!node.is_reg()); - assert!(!node.is_hardlink()); - let mut inodes = Vec::>::new(); - node.collect_descendants_inodes(&mut inodes).unwrap(); - assert_eq!(inodes.len(), 2); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2020 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::any::Any; +use std::collections::HashMap; +use std::ffi::{OsStr, OsString}; +use std::io::Result; +use std::os::unix::ffi::OsStrExt; +use std::sync::Arc; + +use fuse_backend_rs::abi::fuse_abi; +use fuse_backend_rs::api::filesystem::Entry; +use nydus_storage::device::v5::BlobV5ChunkInfo; +use nydus_storage::device::{BlobChunkInfo, BlobDevice, BlobInfo, BlobIoVec}; +use nydus_utils::{digest::RafsDigest, ByteSize}; + +use super::mock_chunk::MockChunkInfo; +use super::mock_super::CHUNK_SIZE; +use crate::metadata::inode::RafsInodeFlags; +use crate::metadata::layout::v5::{ + rafsv5_alloc_bio_vecs, RafsV5BlobTable, RafsV5InodeChunkOps, RafsV5InodeOps, +}; +use crate::metadata::{ + layout::{XattrName, XattrValue}, + Inode, RafsInode, RafsInodeWalkHandler, RafsSuperMeta, RAFS_ATTR_BLOCK_SIZE, +}; +use crate::RafsInodeExt; + +#[derive(Default, Clone, Debug)] +#[allow(unused)] +pub struct MockInode { + i_ino: Inode, + i_name: OsString, + i_digest: RafsDigest, + i_parent: u64, + i_mode: u32, + i_projid: u32, + i_uid: u32, + i_gid: u32, + i_flags: RafsInodeFlags, + i_size: u64, + i_blocks: u64, + i_nlink: u32, + i_child_idx: u32, + i_child_cnt: u32, + // extra info need cache + i_blksize: u32, + i_rdev: u32, + i_mtime_nsec: u32, + i_mtime: u64, + i_target: OsString, // for symbol link + i_xattr: HashMap>, + i_data: Vec>, + i_child: Vec>, + i_blob_table: Arc, + i_meta: Arc, +} + +impl MockInode { + pub fn mock(ino: Inode, size: u64, chunks: Vec>) -> Self { + Self { + i_ino: ino, + i_size: size, + i_child_cnt: chunks.len() as u32, + i_data: chunks, + // Ignore other bits for now. + i_mode: libc::S_IFREG as u32, + // It can't be changed yet. + i_blksize: CHUNK_SIZE, + ..Default::default() + } + } +} + +impl RafsInode for MockInode { + fn validate(&self, _max_inode: Inode, _chunk_size: u64) -> Result<()> { + if self.is_symlink() && self.i_target.is_empty() { + return Err(einval!("invalid inode")); + } + Ok(()) + } + + #[inline] + fn get_entry(&self) -> Entry { + Entry { + attr: self.get_attr().into(), + inode: self.i_ino, + generation: 0, + attr_flags: 0, + attr_timeout: self.i_meta.attr_timeout, + entry_timeout: self.i_meta.entry_timeout, + } + } + + #[inline] + fn get_attr(&self) -> fuse_abi::Attr { + fuse_abi::Attr { + ino: self.i_ino, + size: self.i_size, + blocks: self.i_blocks, + mode: self.i_mode, + nlink: self.i_nlink as u32, + blksize: RAFS_ATTR_BLOCK_SIZE, + rdev: self.i_rdev, + ..Default::default() + } + } + + fn walk_children_inodes( + &self, + _entry_offset: u64, + _handler: RafsInodeWalkHandler, + ) -> Result<()> { + todo!() + } + + fn get_symlink(&self) -> Result { + if !self.is_symlink() { + Err(einval!("inode is not a symlink")) + } else { + Ok(self.i_target.clone()) + } + } + + fn get_symlink_size(&self) -> u16 { + if self.is_symlink() { + self.i_target.byte_size() as u16 + } else { + 0 + } + } + + fn get_child_by_name(&self, name: &OsStr) -> Result> { + let idx = self + .i_child + .binary_search_by(|c| c.i_name.as_os_str().cmp(name)) + .map_err(|_| enoent!())?; + Ok(self.i_child[idx].clone()) + } + + #[inline] + fn get_child_by_index(&self, index: u32) -> Result> { + Ok(self.i_child[index as usize].clone()) + } + + #[inline] + fn get_child_count(&self) -> u32 { + self.i_child_cnt + } + + fn get_child_index(&self) -> Result { + Ok(self.i_child_idx) + } + + fn get_chunk_count(&self) -> u32 { + self.get_child_count() + } + + fn has_xattr(&self) -> bool { + self.i_flags.contains(RafsInodeFlags::XATTR) + } + + #[inline] + fn get_xattr(&self, name: &OsStr) -> Result> { + Ok(self.i_xattr.get(name).cloned()) + } + + fn get_xattrs(&self) -> Result> { + Ok(self + .i_xattr + .keys() + .map(|k| k.as_bytes().to_vec()) + .collect::>()) + } + + #[inline] + fn is_blkdev(&self) -> bool { + self.i_mode & libc::S_IFMT as u32 == libc::S_IFBLK as u32 + } + + #[inline] + fn is_chrdev(&self) -> bool { + self.i_mode & libc::S_IFMT as u32 == libc::S_IFCHR as u32 + } + + #[inline] + fn is_sock(&self) -> bool { + self.i_mode & libc::S_IFMT as u32 == libc::S_IFSOCK as u32 + } + + #[inline] + fn is_fifo(&self) -> bool { + self.i_mode & libc::S_IFMT as u32 == libc::S_IFIFO as u32 + } + + fn is_dir(&self) -> bool { + self.i_mode & libc::S_IFMT as u32 == libc::S_IFDIR as u32 + } + + fn is_symlink(&self) -> bool { + self.i_mode & libc::S_IFMT as u32 == libc::S_IFLNK as u32 + } + + fn is_reg(&self) -> bool { + self.i_mode & libc::S_IFMT as u32 == libc::S_IFREG as u32 + } + + fn is_hardlink(&self) -> bool { + !self.is_dir() && self.i_nlink > 1 + } + + fn collect_descendants_inodes( + &self, + descendants: &mut Vec>, + ) -> Result { + if !self.is_dir() { + return Err(enotdir!()); + } + + let mut child_dirs: Vec> = Vec::new(); + + for child_inode in &self.i_child { + if child_inode.is_dir() { + trace!("Got dir {:?}", child_inode.name()); + child_dirs.push(child_inode.clone()); + } else { + if child_inode.is_empty_size() { + continue; + } + descendants.push(child_inode.clone()); + } + } + + for d in child_dirs { + d.collect_descendants_inodes(descendants)?; + } + + Ok(0) + } + + fn alloc_bio_vecs( + &self, + _device: &BlobDevice, + offset: u64, + size: usize, + user_io: bool, + ) -> Result> { + rafsv5_alloc_bio_vecs(self, offset, size, user_io) + } + + fn as_any(&self) -> &dyn Any { + self + } + + impl_getter!(ino, i_ino, u64); + impl_getter!(size, i_size, u64); + impl_getter!(rdev, i_rdev, u32); + impl_getter!(projid, i_projid, u32); +} + +impl RafsInodeExt for MockInode { + fn name(&self) -> OsString { + self.i_name.clone() + } + + fn flags(&self) -> u64 { + self.i_flags.bits() + } + + fn get_digest(&self) -> RafsDigest { + self.i_digest + } + + fn get_name_size(&self) -> u16 { + self.i_name.byte_size() as u16 + } + + #[inline] + fn get_chunk_info(&self, idx: u32) -> Result> { + Ok(self.i_data[idx as usize].clone()) + } + + fn as_inode(&self) -> &dyn RafsInode { + self + } + + impl_getter!(parent, i_parent, u64); +} + +impl RafsV5InodeChunkOps for MockInode { + fn get_chunk_info_v5(&self, idx: u32) -> Result> { + Ok(self.i_data[idx as usize].clone()) + } +} + +impl RafsV5InodeOps for MockInode { + fn get_blob_by_index(&self, _idx: u32) -> Result> { + Ok(Arc::new(BlobInfo::default())) + } + + fn get_chunk_size(&self) -> u32 { + CHUNK_SIZE + } + + fn has_hole(&self) -> bool { + false + } +} + +#[cfg(test)] +mod tests { + use nydus_utils::digest::Algorithm; + + use crate::metadata::layout::RAFS_V5_ROOT_INODE; + + use super::*; + + #[test] + fn test_mock_node() { + let size = 20; + let mut chunks = Vec::>::new(); + let digest = RafsDigest::from_buf("foobar".as_bytes(), Algorithm::Blake3); + let info = MockChunkInfo::mock(0, 1024, 1024, 2048, 1024); + + chunks.push(Arc::new(info.clone())); + chunks.push(Arc::new(info)); + + let mut node = MockInode::mock(13, size, chunks); + node.i_flags = RafsInodeFlags::XATTR; + node.i_mode = libc::S_IFDIR as u32; + node.i_name = "foo".into(); + node.i_digest = digest; + node.i_parent = RAFS_V5_ROOT_INODE; + let mut child_node1 = MockInode::mock(14, size, Vec::>::new()); + child_node1.i_name = OsStr::new("child1").into(); + child_node1.i_size = 10; + let mut child_node2 = MockInode::mock(15, size, Vec::>::new()); + child_node2.i_name = OsStr::new("child2").into(); + child_node1.i_size = 20; + + node.i_child.push(Arc::new(child_node1)); + node.i_child.push(Arc::new(child_node2)); + node.i_child_cnt = 2; + node.i_child_idx = 2; + + node.i_xattr.insert("attr1".into(), "bar1".into()); + node.i_xattr.insert("attr2".into(), "bar2".into()); + node.i_xattr.insert("attr3".into(), "bar3".into()); + + node.i_data.push(Arc::new(MockChunkInfo::default())); + + assert!(node.validate(0, 0).is_ok()); + assert_eq!(node.ino(), 13); + assert_eq!(node.size(), 20); + assert_eq!(node.rdev(), 0); + assert_eq!(node.projid(), 0); + assert_eq!(node.name(), "foo"); + assert_eq!(node.flags(), RafsInodeFlags::XATTR.bits()); + assert_eq!(node.get_digest(), digest); + assert_eq!(node.get_name_size(), "foo".len() as u16); + assert!(node.get_chunk_info(0).is_ok()); + assert!(node.get_chunk_info_v5(0).is_ok()); + assert_eq!(node.parent(), RAFS_V5_ROOT_INODE); + assert!(node.get_blob_by_index(0).is_ok()); + assert_eq!(node.get_chunk_size(), CHUNK_SIZE); + assert!(!node.has_hole()); + + let ent = node.get_entry(); + assert_eq!(ent.inode, node.ino()); + assert_eq!(ent.attr_timeout, node.i_meta.attr_timeout); + assert_eq!(ent.entry_timeout, node.i_meta.entry_timeout); + assert_eq!(ent.attr, node.get_attr().into()); + + assert!(node.get_symlink().is_err()); + assert_eq!(node.get_symlink_size(), 0 as u16); + + assert!(node.get_child_by_name(OsStr::new("child1")).is_ok()); + assert!(node.get_child_by_index(0).is_ok()); + assert!(node.get_child_by_index(1).is_ok()); + assert_eq!(node.get_child_count(), 2 as u32); + assert_eq!(node.get_child_index().unwrap(), 2 as u32); + assert_eq!(node.get_chunk_count(), 2 as u32); + assert!(node.has_xattr()); + assert_eq!( + node.get_xattr(OsStr::new("attr2")).unwrap().unwrap(), + "bar2".as_bytes() + ); + assert_eq!(node.get_xattrs().unwrap().len(), 3); + + assert!(!node.is_blkdev()); + assert!(!node.is_chrdev()); + assert!(!node.is_sock()); + assert!(!node.is_fifo()); + assert!(node.is_dir()); + assert!(!node.is_symlink()); + assert!(!node.is_reg()); + assert!(!node.is_hardlink()); + let mut inodes = Vec::>::new(); + node.collect_descendants_inodes(&mut inodes).unwrap(); + assert_eq!(inodes.len(), 2); + } +} diff --git a/rafs/src/mock/mock_super.rs b/rafs/src/mock/mock_super.rs index 50652ddf676..562f2c2493e 100644 --- a/rafs/src/mock/mock_super.rs +++ b/rafs/src/mock/mock_super.rs @@ -1,168 +1,168 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2020 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::collections::HashMap; -use std::io::Result; -use std::sync::Arc; - -use nydus_storage::device::{BlobChunkInfo, BlobDevice, BlobInfo}; - -use crate::metadata::{Inode, RafsInode, RafsSuperBlock, RafsSuperInodes}; -use crate::mock::MockInode; -use crate::{RafsInodeExt, RafsIoReader, RafsResult}; - -#[derive(Default)] -pub struct MockSuperBlock { - pub inodes: HashMap>, -} - -pub const CHUNK_SIZE: u32 = 200; - -impl MockSuperBlock { - pub fn new() -> Self { - Self { - inodes: HashMap::new(), - } - } -} - -impl RafsSuperInodes for MockSuperBlock { - fn get_max_ino(&self) -> Inode { - unimplemented!() - } - - fn get_inode(&self, ino: Inode, _validate_inode: bool) -> Result> { - self.inodes - .get(&ino) - .map_or(Err(enoent!()), |i| Ok(i.clone())) - } - - fn get_extended_inode( - &self, - ino: Inode, - _validate_inode: bool, - ) -> Result> { - self.inodes - .get(&ino) - .map_or(Err(enoent!()), |i| Ok(i.clone())) - } -} - -impl RafsSuperBlock for MockSuperBlock { - fn load(&mut self, _r: &mut RafsIoReader) -> Result<()> { - unimplemented!() - } - fn update(&self, _r: &mut RafsIoReader) -> RafsResult<()> { - unimplemented!() - } - fn destroy(&mut self) {} - fn get_blob_infos(&self) -> Vec> { - unimplemented!() - } - - fn root_ino(&self) -> u64 { - unimplemented!() - } - - fn get_chunk_info(&self, _idx: usize) -> Result> { - unimplemented!() - } - - fn set_blob_device(&self, _blob_device: BlobDevice) { - unimplemented!() - } -} - -#[cfg(test)] -mod tests { - use std::fs::OpenOptions; - - use vmm_sys_util::tempfile::TempFile; - - use crate::{mock::MockChunkInfo, RafsIoRead}; - - use super::*; - - #[test] - fn test_mock_super_block() { - let chunks = Vec::>::new(); - let node1 = MockInode::mock(0, 20, chunks.clone()); - let node2 = MockInode::mock(1, 20, chunks); - let mut blk = MockSuperBlock::new(); - blk.inodes.insert(node1.ino(), Arc::new(node1)); - blk.inodes.insert(node2.ino(), Arc::new(node2)); - assert!(blk.get_inode(0, false).is_ok()); - assert!(blk.get_inode(1, false).is_ok()); - assert!(blk.get_inode(2, false).is_err()); - - assert!(blk.get_extended_inode(0, false).is_ok()); - assert!(blk.get_extended_inode(1, false).is_ok()); - assert!(blk.get_extended_inode(2, false).is_err()); - } - #[test] - #[should_panic] - fn test_get_max_ino() { - let blk = MockSuperBlock::new(); - blk.get_max_ino(); - } - - fn get_reader() -> Box { - let temp = TempFile::new().unwrap(); - let r = OpenOptions::new() - .read(true) - .write(false) - .open(temp.as_path()) - .unwrap(); - let reader: Box = Box::new(r); - reader - } - - #[test] - #[should_panic] - fn test_load() { - let mut blk = MockSuperBlock::new(); - let mut reader: Box = get_reader(); - blk.load(&mut reader).unwrap(); - } - - #[test] - #[should_panic] - fn test_update() { - let blk = MockSuperBlock::new(); - let mut reader: Box = get_reader(); - blk.update(&mut reader).unwrap(); - } - - #[test] - #[should_panic] - fn test_rootino() { - let blk = MockSuperBlock::new(); - blk.root_ino(); - } - #[test] - #[should_panic] - fn test_get_chunk_info() { - let blk = MockSuperBlock::new(); - blk.get_chunk_info(0).unwrap(); - } - - #[test] - #[should_panic] - fn test_set_blob_device() { - let blk = MockSuperBlock::new(); - blk.set_blob_device(BlobDevice::default()); - } - - #[test] - fn test_mock_super_block_func() { - let mut blk = MockSuperBlock::new(); - assert!(blk.get_inode(0, true).is_err()); - assert!(blk.get_extended_inode(0, true).is_err()); - blk.inodes.insert(0, Arc::new(MockInode::default())); - assert!(blk.get_inode(0, true).is_ok()); - assert!(blk.get_extended_inode(0, true).is_ok()); - blk.destroy(); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2020 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::collections::HashMap; +use std::io::Result; +use std::sync::Arc; + +use nydus_storage::device::{BlobChunkInfo, BlobDevice, BlobInfo}; + +use crate::metadata::{Inode, RafsInode, RafsSuperBlock, RafsSuperInodes}; +use crate::mock::MockInode; +use crate::{RafsInodeExt, RafsIoReader, RafsResult}; + +#[derive(Default)] +pub struct MockSuperBlock { + pub inodes: HashMap>, +} + +pub const CHUNK_SIZE: u32 = 200; + +impl MockSuperBlock { + pub fn new() -> Self { + Self { + inodes: HashMap::new(), + } + } +} + +impl RafsSuperInodes for MockSuperBlock { + fn get_max_ino(&self) -> Inode { + unimplemented!() + } + + fn get_inode(&self, ino: Inode, _validate_inode: bool) -> Result> { + self.inodes + .get(&ino) + .map_or(Err(enoent!()), |i| Ok(i.clone())) + } + + fn get_extended_inode( + &self, + ino: Inode, + _validate_inode: bool, + ) -> Result> { + self.inodes + .get(&ino) + .map_or(Err(enoent!()), |i| Ok(i.clone())) + } +} + +impl RafsSuperBlock for MockSuperBlock { + fn load(&mut self, _r: &mut RafsIoReader) -> Result<()> { + unimplemented!() + } + fn update(&self, _r: &mut RafsIoReader) -> RafsResult<()> { + unimplemented!() + } + fn destroy(&mut self) {} + fn get_blob_infos(&self) -> Vec> { + unimplemented!() + } + + fn root_ino(&self) -> u64 { + unimplemented!() + } + + fn get_chunk_info(&self, _idx: usize) -> Result> { + unimplemented!() + } + + fn set_blob_device(&self, _blob_device: BlobDevice) { + unimplemented!() + } +} + +#[cfg(test)] +mod tests { + use std::fs::OpenOptions; + + use vmm_sys_util::tempfile::TempFile; + + use crate::{mock::MockChunkInfo, RafsIoRead}; + + use super::*; + + #[test] + fn test_mock_super_block() { + let chunks = Vec::>::new(); + let node1 = MockInode::mock(0, 20, chunks.clone()); + let node2 = MockInode::mock(1, 20, chunks); + let mut blk = MockSuperBlock::new(); + blk.inodes.insert(node1.ino(), Arc::new(node1)); + blk.inodes.insert(node2.ino(), Arc::new(node2)); + assert!(blk.get_inode(0, false).is_ok()); + assert!(blk.get_inode(1, false).is_ok()); + assert!(blk.get_inode(2, false).is_err()); + + assert!(blk.get_extended_inode(0, false).is_ok()); + assert!(blk.get_extended_inode(1, false).is_ok()); + assert!(blk.get_extended_inode(2, false).is_err()); + } + #[test] + #[should_panic] + fn test_get_max_ino() { + let blk = MockSuperBlock::new(); + blk.get_max_ino(); + } + + fn get_reader() -> Box { + let temp = TempFile::new().unwrap(); + let r = OpenOptions::new() + .read(true) + .write(false) + .open(temp.as_path()) + .unwrap(); + let reader: Box = Box::new(r); + reader + } + + #[test] + #[should_panic] + fn test_load() { + let mut blk = MockSuperBlock::new(); + let mut reader: Box = get_reader(); + blk.load(&mut reader).unwrap(); + } + + #[test] + #[should_panic] + fn test_update() { + let blk = MockSuperBlock::new(); + let mut reader: Box = get_reader(); + blk.update(&mut reader).unwrap(); + } + + #[test] + #[should_panic] + fn test_rootino() { + let blk = MockSuperBlock::new(); + blk.root_ino(); + } + #[test] + #[should_panic] + fn test_get_chunk_info() { + let blk = MockSuperBlock::new(); + blk.get_chunk_info(0).unwrap(); + } + + #[test] + #[should_panic] + fn test_set_blob_device() { + let blk = MockSuperBlock::new(); + blk.set_blob_device(BlobDevice::default()); + } + + #[test] + fn test_mock_super_block_func() { + let mut blk = MockSuperBlock::new(); + assert!(blk.get_inode(0, true).is_err()); + assert!(blk.get_extended_inode(0, true).is_err()); + blk.inodes.insert(0, Arc::new(MockInode::default())); + assert!(blk.get_inode(0, true).is_ok()); + assert!(blk.get_extended_inode(0, true).is_ok()); + blk.destroy(); + } +} diff --git a/rafs/src/mock/mod.rs b/rafs/src/mock/mod.rs index 4d85b871d01..b259ea2271a 100644 --- a/rafs/src/mock/mod.rs +++ b/rafs/src/mock/mod.rs @@ -1,12 +1,12 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2020 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -pub mod mock_chunk; -pub mod mock_inode; -pub mod mock_super; - -pub use mock_chunk::*; -pub use mock_inode::*; -pub use mock_super::*; +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2020 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +pub mod mock_chunk; +pub mod mock_inode; +pub mod mock_super; + +pub use mock_chunk::*; +pub use mock_inode::*; +pub use mock_super::*; diff --git a/rafs/tests/io_amplify.rs b/rafs/tests/io_amplify.rs index 22ed2dd1f38..ddaf541c403 100644 --- a/rafs/tests/io_amplify.rs +++ b/rafs/tests/io_amplify.rs @@ -1,320 +1,320 @@ -// Temporarily disable -/* -#[macro_use] -extern crate assert_matches; - -use std::sync::Arc; - -use rafs::fs::RafsConfig; -use rafs::metadata::RafsSuper; -use rafs::mock::{MockChunkInfo, MockInode, MockSuperBlock, CHUNK_SIZE}; - -#[test] -fn test_user_io_amplification_lack_chunks_small_expected() { - let mut rafs_config = RafsConfig::new(); - rafs_config.mode = "cached".to_string(); - let mut super_sb = RafsSuper::new(&rafs_config).unwrap(); - let mut rafs_super_block = MockSuperBlock::new(); - - // (1)file offset + - // (2)compress offset + (3)compress size + - // (4)decompress offset + (5)decompress size - // not-trailing chunks have size of 200 - let ck1 = Arc::new(MockChunkInfo::mock(0, 700, 80, 900, CHUNK_SIZE)); - let ck2 = Arc::new(MockChunkInfo::mock( - CHUNK_SIZE as u64, - 780, - 110, - CHUNK_SIZE as u64, - 112, - )); - let chunks = vec![ck1.clone(), ck2]; - - rafs_super_block.inodes.insert( - 1, - Arc::new(MockInode::mock(1, CHUNK_SIZE as u64 + 112, chunks)), - ); - - super_sb.superblock = Arc::new(rafs_super_block); - let inode_1 = super_sb.get_inode(1, false).unwrap(); - - let desc = super_sb.carry_more_until(inode_1.as_ref(), 20, ck1.as_ref(), 60); - assert_matches!(desc, Ok(None)); - - let desc = super_sb.carry_more_until(inode_1.as_ref(), 20, ck1.as_ref(), 61); - assert_matches!(desc, Ok(None)); - - let desc = super_sb.carry_more_until(inode_1.as_ref(), 20, ck1.as_ref(), 60 + 110); - assert_matches!(desc, Ok(None)); - - let desc = super_sb.carry_more_until(inode_1.as_ref(), 20, ck1.as_ref(), CHUNK_SIZE as u64 + 1); - assert_matches!(desc.unwrap().unwrap().bi_vec.len(), 1); - - let desc = - super_sb.carry_more_until(inode_1.as_ref(), 20, ck1.as_ref(), CHUNK_SIZE as u64 * 10); - assert_matches!(desc.unwrap().unwrap().bi_vec.len(), 1); -} - -#[test] -fn test_user_io_amplification_lack_chunks_normal_expected() { - let mut rafs_config = RafsConfig::new(); - rafs_config.mode = "cached".to_string(); - let mut super_sb = RafsSuper::new(&rafs_config).unwrap(); - let mut rafs_super_block = MockSuperBlock::new(); - - // (1)file offset + - // (2)compress offset + (3)compress size + - // (4)decompress offset + (5)decompress size - // not-trailing chunks have size of 200 - let ck1 = Arc::new(MockChunkInfo::mock(0, 700, 80, 900, 100)); - let ck2 = Arc::new(MockChunkInfo::mock(100, 780, 110, 1000, 300)); - let chunks = vec![ck1.clone(), ck2]; - - rafs_super_block - .inodes - .insert(1, Arc::new(MockInode::mock(1, 400, chunks))); - - super_sb.superblock = Arc::new(rafs_super_block); - let inode_1 = super_sb.get_inode(1, false).unwrap(); - - let desc = super_sb.carry_more_until(inode_1.as_ref(), 81, ck1.as_ref(), 60); - - assert_matches!(desc, Ok(None)); -} -#[test] -fn test_user_io_amplification_large_boundary() { - let mut rafs_config = RafsConfig::new(); - rafs_config.mode = "cached".to_string(); - let mut super_sb = RafsSuper::new(&rafs_config).unwrap(); - let mut rafs_super_block = MockSuperBlock::new(); - - // (1)file offset + - // (2)compress offset + (3)compress size + - // (4)decompress offset + (5)decompress size - // not-trailing chunks have size of 200 - let ck1 = Arc::new(MockChunkInfo::mock(0, 700, 80, 900, CHUNK_SIZE)); - let ck2 = Arc::new(MockChunkInfo::mock(CHUNK_SIZE as u64, 780, 110, 1000, 120)); - let tail_ck = ck2.clone(); - let chunks = vec![ck1, ck2]; - - rafs_super_block.inodes.insert( - 1, - Arc::new(MockInode::mock(1, CHUNK_SIZE as u64 + 120, chunks)), - ); - - // Next file, not chunk continuous - let discontinuous_blob_offset = 780 + 110; - let ck1 = Arc::new(MockChunkInfo::mock( - 0, - discontinuous_blob_offset, - 100, - 900, - CHUNK_SIZE, - )); - let ck2 = Arc::new(MockChunkInfo::mock( - CHUNK_SIZE as u64, - discontinuous_blob_offset + 100, - 110, - CHUNK_SIZE as u64, - 80, - )); - let chunks = vec![ck1, ck2]; - rafs_super_block.inodes.insert( - 2, - Arc::new(MockInode::mock(2, CHUNK_SIZE as u64 + 80, chunks)), - ); - - super_sb.superblock = Arc::new(rafs_super_block); - let inode_1 = super_sb.get_inode(1, false).unwrap(); - - let desc = super_sb.carry_more_until(inode_1.as_ref(), 10000, tail_ck.as_ref(), 201); - - assert_matches!(desc, Ok(Some(_))); - let appending = desc.unwrap().unwrap(); - assert_eq!( - appending.bi_vec[0].chunkinfo.compress_offset(), - discontinuous_blob_offset - ); - assert_eq!(appending.bi_vec.len(), 1); - - let desc = super_sb.carry_more_until(inode_1.as_ref(), 10000, tail_ck.as_ref(), 100 + 1); - - assert_matches!(desc, Ok(Some(_))); - let appending = desc.unwrap().unwrap(); - assert_eq!( - appending.bi_vec[0].chunkinfo.compress_offset(), - discontinuous_blob_offset - ); - assert_eq!(appending.bi_vec.len(), 1); - - let desc = super_sb.carry_more_until(inode_1.as_ref(), 10000, tail_ck.as_ref(), 110 + 100 + 1); - - // 60 is smaller than real chunk - // assert_matches!(desc, Ok(None)); - - assert_matches!(desc, Ok(Some(_))); - let appending = desc.unwrap().unwrap(); - assert_eq!( - appending.bi_vec[0].chunkinfo.compress_offset(), - discontinuous_blob_offset - ); - assert_eq!( - appending.bi_vec[1].chunkinfo.compress_offset(), - discontinuous_blob_offset + 100 - ); - assert_eq!(appending.bi_vec.len(), 2); - - // 60 is smaller than real chunk - // assert_matches!(desc, Ok(None)); - let desc = super_sb.carry_more_until(inode_1.as_ref(), 10000, tail_ck.as_ref(), 60); - assert_matches!(desc, Ok(None)); -} -#[test] -fn test_user_io_amplification_sparse_inodes() { - let mut rafs_config = RafsConfig::new(); - rafs_config.mode = "cached".to_string(); - let mut super_sb = RafsSuper::new(&rafs_config).unwrap(); - let mut rafs_super_block = MockSuperBlock::new(); - - // (1)file offset + - // (2)compress offset + (3)compress size + - // (4)decompress offset + (5)decompress size - // not-trailing chunks have size of 200 - let ck1 = Arc::new(MockChunkInfo::mock(0, 700, 80, 900, CHUNK_SIZE)); - let ck2 = Arc::new(MockChunkInfo::mock(CHUNK_SIZE as u64, 780, 110, 1100, 100)); - let chunks = vec![ck1.clone(), ck2]; - - let tail_ck = ck1; - - rafs_super_block.inodes.insert( - 1, - Arc::new(MockInode::mock(1, CHUNK_SIZE as u64 + 100, chunks)), - ); - - // Next file, not chunk continuous - let discontinuous_blob_offset = 780 + 110 + 140; - let ck1 = Arc::new(MockChunkInfo::mock( - 0, - discontinuous_blob_offset, - 100, - 900, - CHUNK_SIZE, - )); - let ck2 = Arc::new(MockChunkInfo::mock( - CHUNK_SIZE as u64, - discontinuous_blob_offset + 100, - 110, - CHUNK_SIZE as u64, - 80, - )); - let chunks = vec![ck1, ck2]; - rafs_super_block.inodes.insert( - 2, - Arc::new(MockInode::mock(2, CHUNK_SIZE as u64 + 80, chunks)), - ); - - super_sb.superblock = Arc::new(rafs_super_block); - let inode_1 = super_sb.get_inode(1, false).unwrap(); - - let desc = super_sb.carry_more_until(inode_1.as_ref(), 20, tail_ck.as_ref(), (0 - 1) as u64); - - assert_matches!(desc, Ok(Some(_))); - - let appending = desc.unwrap().unwrap(); - assert_eq!(appending.bi_vec.len(), 1); - assert_eq!(appending.bi_vec[0].chunkinfo.compress_offset(), 780); -} - -#[test] -fn test_user_io_amplification_2_inodes_4_chunks_3_amplified() { - let mut rafs_config = RafsConfig::new(); - rafs_config.mode = "cached".to_string(); - let mut super_sb = RafsSuper::new(&rafs_config).unwrap(); - let mut rafs_super_block = MockSuperBlock::new(); - - // (1)file offset + - // (2)compress offset + (3)compress size + - // (4)decompress offset + (5)decompress size - // not-trailing chunks have size of 200 - let ck1 = Arc::new(MockChunkInfo::mock(0, 700, 80, 900, CHUNK_SIZE)); - let ck2 = Arc::new(MockChunkInfo::mock(CHUNK_SIZE as u64, 780, 110, 1100, 100)); - let chunks = vec![ck1.clone(), ck2]; - - let tail_ck = ck1; - - rafs_super_block.inodes.insert( - 1, - Arc::new(MockInode::mock(1, CHUNK_SIZE as u64 + 100, chunks)), - ); - - // Next file - let discontinuous_blob_offset = 780 + 110; - let ck1 = Arc::new(MockChunkInfo::mock( - 0, - discontinuous_blob_offset, - 100, - 900, - CHUNK_SIZE, - )); - let ck2 = Arc::new(MockChunkInfo::mock( - CHUNK_SIZE as u64, - discontinuous_blob_offset + 100, - 110, - CHUNK_SIZE as u64, - 80, - )); - let chunks = vec![ck1, ck2]; - rafs_super_block.inodes.insert( - 2, - Arc::new(MockInode::mock(2, CHUNK_SIZE as u64 + 80, chunks)), - ); - - super_sb.superblock = Arc::new(rafs_super_block); - let inode_1 = super_sb.get_inode(1, false).unwrap(); - - let desc = super_sb.carry_more_until(inode_1.as_ref(), 20, tail_ck.as_ref(), (0 - 1) as u64); - - assert_matches!(desc, Ok(Some(_))); - - let appending = desc.unwrap().unwrap(); - assert_eq!(appending.bi_vec.len(), 3); - assert_eq!( - appending.bi_vec[2].chunkinfo.compress_offset(), - discontinuous_blob_offset + 100 - ); -} - -#[test] -fn test_user_io_amplification_huge_expected() { - let mut rafs_config = RafsConfig::new(); - rafs_config.mode = "cached".to_string(); - let mut super_sb = RafsSuper::new(&rafs_config).unwrap(); - let mut rafs_super_block = MockSuperBlock::new(); - - // (1)file offset + - // (2)compress offset + (3)compress size + - // (4)decompress offset + (5)decompress size - // not-trailing chunks have size of 200 - let ck1 = Arc::new(MockChunkInfo::mock(0, 700, 80, 900, CHUNK_SIZE)); - let ck2 = Arc::new(MockChunkInfo::mock(CHUNK_SIZE as u64, 780, 110, 1100, 100)); - let chunks = vec![ck1.clone(), ck2]; - - // Only a file resided - rafs_super_block - .inodes - .insert(1, Arc::new(MockInode::mock(1, 300, chunks))); - - super_sb.superblock = Arc::new(rafs_super_block); - let inode_1 = super_sb.get_inode(1, false).unwrap(); - - // File size is 400 bytes, first chunk is 80 bytes, should amplify by next chunk - let desc = super_sb.carry_more_until(inode_1.as_ref(), 81, ck1.as_ref(), (0 - 1) as u64); - - if let Ok(Some(d)) = desc { - assert_eq!(d.bi_vec.len(), 1); - assert_eq!(d.bi_vec[0].chunkinfo.compress_offset(), 780); - } else { - panic!(); - } -} - */ +// Temporarily disable +/* +#[macro_use] +extern crate assert_matches; + +use std::sync::Arc; + +use rafs::fs::RafsConfig; +use rafs::metadata::RafsSuper; +use rafs::mock::{MockChunkInfo, MockInode, MockSuperBlock, CHUNK_SIZE}; + +#[test] +fn test_user_io_amplification_lack_chunks_small_expected() { + let mut rafs_config = RafsConfig::new(); + rafs_config.mode = "cached".to_string(); + let mut super_sb = RafsSuper::new(&rafs_config).unwrap(); + let mut rafs_super_block = MockSuperBlock::new(); + + // (1)file offset + + // (2)compress offset + (3)compress size + + // (4)decompress offset + (5)decompress size + // not-trailing chunks have size of 200 + let ck1 = Arc::new(MockChunkInfo::mock(0, 700, 80, 900, CHUNK_SIZE)); + let ck2 = Arc::new(MockChunkInfo::mock( + CHUNK_SIZE as u64, + 780, + 110, + CHUNK_SIZE as u64, + 112, + )); + let chunks = vec![ck1.clone(), ck2]; + + rafs_super_block.inodes.insert( + 1, + Arc::new(MockInode::mock(1, CHUNK_SIZE as u64 + 112, chunks)), + ); + + super_sb.superblock = Arc::new(rafs_super_block); + let inode_1 = super_sb.get_inode(1, false).unwrap(); + + let desc = super_sb.carry_more_until(inode_1.as_ref(), 20, ck1.as_ref(), 60); + assert_matches!(desc, Ok(None)); + + let desc = super_sb.carry_more_until(inode_1.as_ref(), 20, ck1.as_ref(), 61); + assert_matches!(desc, Ok(None)); + + let desc = super_sb.carry_more_until(inode_1.as_ref(), 20, ck1.as_ref(), 60 + 110); + assert_matches!(desc, Ok(None)); + + let desc = super_sb.carry_more_until(inode_1.as_ref(), 20, ck1.as_ref(), CHUNK_SIZE as u64 + 1); + assert_matches!(desc.unwrap().unwrap().bi_vec.len(), 1); + + let desc = + super_sb.carry_more_until(inode_1.as_ref(), 20, ck1.as_ref(), CHUNK_SIZE as u64 * 10); + assert_matches!(desc.unwrap().unwrap().bi_vec.len(), 1); +} + +#[test] +fn test_user_io_amplification_lack_chunks_normal_expected() { + let mut rafs_config = RafsConfig::new(); + rafs_config.mode = "cached".to_string(); + let mut super_sb = RafsSuper::new(&rafs_config).unwrap(); + let mut rafs_super_block = MockSuperBlock::new(); + + // (1)file offset + + // (2)compress offset + (3)compress size + + // (4)decompress offset + (5)decompress size + // not-trailing chunks have size of 200 + let ck1 = Arc::new(MockChunkInfo::mock(0, 700, 80, 900, 100)); + let ck2 = Arc::new(MockChunkInfo::mock(100, 780, 110, 1000, 300)); + let chunks = vec![ck1.clone(), ck2]; + + rafs_super_block + .inodes + .insert(1, Arc::new(MockInode::mock(1, 400, chunks))); + + super_sb.superblock = Arc::new(rafs_super_block); + let inode_1 = super_sb.get_inode(1, false).unwrap(); + + let desc = super_sb.carry_more_until(inode_1.as_ref(), 81, ck1.as_ref(), 60); + + assert_matches!(desc, Ok(None)); +} +#[test] +fn test_user_io_amplification_large_boundary() { + let mut rafs_config = RafsConfig::new(); + rafs_config.mode = "cached".to_string(); + let mut super_sb = RafsSuper::new(&rafs_config).unwrap(); + let mut rafs_super_block = MockSuperBlock::new(); + + // (1)file offset + + // (2)compress offset + (3)compress size + + // (4)decompress offset + (5)decompress size + // not-trailing chunks have size of 200 + let ck1 = Arc::new(MockChunkInfo::mock(0, 700, 80, 900, CHUNK_SIZE)); + let ck2 = Arc::new(MockChunkInfo::mock(CHUNK_SIZE as u64, 780, 110, 1000, 120)); + let tail_ck = ck2.clone(); + let chunks = vec![ck1, ck2]; + + rafs_super_block.inodes.insert( + 1, + Arc::new(MockInode::mock(1, CHUNK_SIZE as u64 + 120, chunks)), + ); + + // Next file, not chunk continuous + let discontinuous_blob_offset = 780 + 110; + let ck1 = Arc::new(MockChunkInfo::mock( + 0, + discontinuous_blob_offset, + 100, + 900, + CHUNK_SIZE, + )); + let ck2 = Arc::new(MockChunkInfo::mock( + CHUNK_SIZE as u64, + discontinuous_blob_offset + 100, + 110, + CHUNK_SIZE as u64, + 80, + )); + let chunks = vec![ck1, ck2]; + rafs_super_block.inodes.insert( + 2, + Arc::new(MockInode::mock(2, CHUNK_SIZE as u64 + 80, chunks)), + ); + + super_sb.superblock = Arc::new(rafs_super_block); + let inode_1 = super_sb.get_inode(1, false).unwrap(); + + let desc = super_sb.carry_more_until(inode_1.as_ref(), 10000, tail_ck.as_ref(), 201); + + assert_matches!(desc, Ok(Some(_))); + let appending = desc.unwrap().unwrap(); + assert_eq!( + appending.bi_vec[0].chunkinfo.compress_offset(), + discontinuous_blob_offset + ); + assert_eq!(appending.bi_vec.len(), 1); + + let desc = super_sb.carry_more_until(inode_1.as_ref(), 10000, tail_ck.as_ref(), 100 + 1); + + assert_matches!(desc, Ok(Some(_))); + let appending = desc.unwrap().unwrap(); + assert_eq!( + appending.bi_vec[0].chunkinfo.compress_offset(), + discontinuous_blob_offset + ); + assert_eq!(appending.bi_vec.len(), 1); + + let desc = super_sb.carry_more_until(inode_1.as_ref(), 10000, tail_ck.as_ref(), 110 + 100 + 1); + + // 60 is smaller than real chunk + // assert_matches!(desc, Ok(None)); + + assert_matches!(desc, Ok(Some(_))); + let appending = desc.unwrap().unwrap(); + assert_eq!( + appending.bi_vec[0].chunkinfo.compress_offset(), + discontinuous_blob_offset + ); + assert_eq!( + appending.bi_vec[1].chunkinfo.compress_offset(), + discontinuous_blob_offset + 100 + ); + assert_eq!(appending.bi_vec.len(), 2); + + // 60 is smaller than real chunk + // assert_matches!(desc, Ok(None)); + let desc = super_sb.carry_more_until(inode_1.as_ref(), 10000, tail_ck.as_ref(), 60); + assert_matches!(desc, Ok(None)); +} +#[test] +fn test_user_io_amplification_sparse_inodes() { + let mut rafs_config = RafsConfig::new(); + rafs_config.mode = "cached".to_string(); + let mut super_sb = RafsSuper::new(&rafs_config).unwrap(); + let mut rafs_super_block = MockSuperBlock::new(); + + // (1)file offset + + // (2)compress offset + (3)compress size + + // (4)decompress offset + (5)decompress size + // not-trailing chunks have size of 200 + let ck1 = Arc::new(MockChunkInfo::mock(0, 700, 80, 900, CHUNK_SIZE)); + let ck2 = Arc::new(MockChunkInfo::mock(CHUNK_SIZE as u64, 780, 110, 1100, 100)); + let chunks = vec![ck1.clone(), ck2]; + + let tail_ck = ck1; + + rafs_super_block.inodes.insert( + 1, + Arc::new(MockInode::mock(1, CHUNK_SIZE as u64 + 100, chunks)), + ); + + // Next file, not chunk continuous + let discontinuous_blob_offset = 780 + 110 + 140; + let ck1 = Arc::new(MockChunkInfo::mock( + 0, + discontinuous_blob_offset, + 100, + 900, + CHUNK_SIZE, + )); + let ck2 = Arc::new(MockChunkInfo::mock( + CHUNK_SIZE as u64, + discontinuous_blob_offset + 100, + 110, + CHUNK_SIZE as u64, + 80, + )); + let chunks = vec![ck1, ck2]; + rafs_super_block.inodes.insert( + 2, + Arc::new(MockInode::mock(2, CHUNK_SIZE as u64 + 80, chunks)), + ); + + super_sb.superblock = Arc::new(rafs_super_block); + let inode_1 = super_sb.get_inode(1, false).unwrap(); + + let desc = super_sb.carry_more_until(inode_1.as_ref(), 20, tail_ck.as_ref(), (0 - 1) as u64); + + assert_matches!(desc, Ok(Some(_))); + + let appending = desc.unwrap().unwrap(); + assert_eq!(appending.bi_vec.len(), 1); + assert_eq!(appending.bi_vec[0].chunkinfo.compress_offset(), 780); +} + +#[test] +fn test_user_io_amplification_2_inodes_4_chunks_3_amplified() { + let mut rafs_config = RafsConfig::new(); + rafs_config.mode = "cached".to_string(); + let mut super_sb = RafsSuper::new(&rafs_config).unwrap(); + let mut rafs_super_block = MockSuperBlock::new(); + + // (1)file offset + + // (2)compress offset + (3)compress size + + // (4)decompress offset + (5)decompress size + // not-trailing chunks have size of 200 + let ck1 = Arc::new(MockChunkInfo::mock(0, 700, 80, 900, CHUNK_SIZE)); + let ck2 = Arc::new(MockChunkInfo::mock(CHUNK_SIZE as u64, 780, 110, 1100, 100)); + let chunks = vec![ck1.clone(), ck2]; + + let tail_ck = ck1; + + rafs_super_block.inodes.insert( + 1, + Arc::new(MockInode::mock(1, CHUNK_SIZE as u64 + 100, chunks)), + ); + + // Next file + let discontinuous_blob_offset = 780 + 110; + let ck1 = Arc::new(MockChunkInfo::mock( + 0, + discontinuous_blob_offset, + 100, + 900, + CHUNK_SIZE, + )); + let ck2 = Arc::new(MockChunkInfo::mock( + CHUNK_SIZE as u64, + discontinuous_blob_offset + 100, + 110, + CHUNK_SIZE as u64, + 80, + )); + let chunks = vec![ck1, ck2]; + rafs_super_block.inodes.insert( + 2, + Arc::new(MockInode::mock(2, CHUNK_SIZE as u64 + 80, chunks)), + ); + + super_sb.superblock = Arc::new(rafs_super_block); + let inode_1 = super_sb.get_inode(1, false).unwrap(); + + let desc = super_sb.carry_more_until(inode_1.as_ref(), 20, tail_ck.as_ref(), (0 - 1) as u64); + + assert_matches!(desc, Ok(Some(_))); + + let appending = desc.unwrap().unwrap(); + assert_eq!(appending.bi_vec.len(), 3); + assert_eq!( + appending.bi_vec[2].chunkinfo.compress_offset(), + discontinuous_blob_offset + 100 + ); +} + +#[test] +fn test_user_io_amplification_huge_expected() { + let mut rafs_config = RafsConfig::new(); + rafs_config.mode = "cached".to_string(); + let mut super_sb = RafsSuper::new(&rafs_config).unwrap(); + let mut rafs_super_block = MockSuperBlock::new(); + + // (1)file offset + + // (2)compress offset + (3)compress size + + // (4)decompress offset + (5)decompress size + // not-trailing chunks have size of 200 + let ck1 = Arc::new(MockChunkInfo::mock(0, 700, 80, 900, CHUNK_SIZE)); + let ck2 = Arc::new(MockChunkInfo::mock(CHUNK_SIZE as u64, 780, 110, 1100, 100)); + let chunks = vec![ck1.clone(), ck2]; + + // Only a file resided + rafs_super_block + .inodes + .insert(1, Arc::new(MockInode::mock(1, 300, chunks))); + + super_sb.superblock = Arc::new(rafs_super_block); + let inode_1 = super_sb.get_inode(1, false).unwrap(); + + // File size is 400 bytes, first chunk is 80 bytes, should amplify by next chunk + let desc = super_sb.carry_more_until(inode_1.as_ref(), 81, ck1.as_ref(), (0 - 1) as u64); + + if let Ok(Some(d)) = desc { + assert_eq!(d.bi_vec.len(), 1); + assert_eq!(d.bi_vec[0].chunkinfo.compress_offset(), 780); + } else { + panic!(); + } +} + */ diff --git a/rust-toolchain.toml b/rust-toolchain.toml index b36fa2701c4..756affe4b63 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,3 +1,3 @@ -[toolchain] -channel = "1.72.1" -components = ["rustfmt", "clippy"] +[toolchain] +channel = "1.72.1" +components = ["rustfmt", "clippy"] diff --git a/service/Cargo.toml b/service/Cargo.toml index 56c980b8926..2678ca8b8c3 100644 --- a/service/Cargo.toml +++ b/service/Cargo.toml @@ -1,63 +1,63 @@ -[package] -name = "nydus-service" -version = "0.3.0" -description = "Nydus Image Service Manager" -authors = ["The Nydus Developers"] -license = "Apache-2.0" -homepage = "https://nydus.dev/" -repository = "https://github.com/dragonflyoss/nydus" -edition = "2021" -resolver = "2" - -[dependencies] -bytes = { version = "1", optional = true } -dbs-allocator = { version = "0.1.1", optional = true } -fuse-backend-rs = { version = "^0.12.0", features = ["persist"] } -libc = "0.2" -log = "0.4.8" -mio = { version = "0.8", features = ["os-poll", "os-ext"] } -nix = "0.24.0" -rust-fsm = "0.6.0" -serde = { version = "1.0.110", features = ["serde_derive", "rc"] } -serde_json = "1.0.51" -thiserror = "1.0" -time = { version = "0.3.14", features = ["serde-human-readable"] } -tokio = { version = "1.24", features = ["macros"] } -versionize_derive = "0.1.6" -versionize = "0.1.10" - -nydus-api = { version = "0.3.0", path = "../api" } -nydus-rafs = { version = "0.3.1", path = "../rafs" } -nydus-storage = { version = "0.6.3", path = "../storage" } -nydus-upgrade = { version = "0.1.0", path = "../upgrade" } -nydus-utils = { version = "0.4.2", path = "../utils" } - -vhost = { version = "0.6.0", features = ["vhost-user-slave"], optional = true } -vhost-user-backend = { version = "0.8.0", optional = true } -virtio-bindings = { version = "0.1", features = [ - "virtio-v5_0_0", -], optional = true } -virtio-queue = { version = "0.7.0", optional = true } -vm-memory = { version = "0.10.0", features = ["backend-mmap"], optional = true } - -[target.'cfg(target_os = "linux")'.dependencies] -tokio-uring = "0.4" - -[dev-dependencies] -vmm-sys-util = "0.11.0" - -[features] -default = ["fuse-backend-rs/fusedev"] -virtiofs = [ - "fuse-backend-rs/vhost-user-fs", - "vm-memory", - "vhost", - "vhost-user-backend", - "virtio-queue", - "virtio-bindings", -] - -block-device = ["dbs-allocator", "tokio/fs"] -block-nbd = ["block-device", "bytes"] - -coco = ["fuse-backend-rs/fusedev", "nydus-storage/backend-registry"] +[package] +name = "nydus-service" +version = "0.3.0" +description = "Nydus Image Service Manager" +authors = ["The Nydus Developers"] +license = "Apache-2.0" +homepage = "https://nydus.dev/" +repository = "https://github.com/dragonflyoss/nydus" +edition = "2021" +resolver = "2" + +[dependencies] +bytes = { version = "1", optional = true } +dbs-allocator = { version = "0.1.1", optional = true } +fuse-backend-rs = { version = "^0.12.0", features = ["persist"] } +libc = "0.2" +log = "0.4.8" +mio = { version = "0.8", features = ["os-poll", "os-ext"] } +nix = "0.24.0" +rust-fsm = "0.6.0" +serde = { version = "1.0.110", features = ["serde_derive", "rc"] } +serde_json = "1.0.51" +thiserror = "1.0" +time = { version = "0.3.14", features = ["serde-human-readable"] } +tokio = { version = "1.24", features = ["macros"] } +versionize_derive = "0.1.6" +versionize = "0.1.10" + +nydus-api = { version = "0.3.0", path = "../api" } +nydus-rafs = { version = "0.3.1", path = "../rafs" } +nydus-storage = { version = "0.6.3", path = "../storage" } +nydus-upgrade = { version = "0.1.0", path = "../upgrade" } +nydus-utils = { version = "0.4.2", path = "../utils" } + +vhost = { version = "0.6.0", features = ["vhost-user-slave"], optional = true } +vhost-user-backend = { version = "0.8.0", optional = true } +virtio-bindings = { version = "0.1", features = [ + "virtio-v5_0_0", +], optional = true } +virtio-queue = { version = "0.7.0", optional = true } +vm-memory = { version = "0.10.0", features = ["backend-mmap"], optional = true } + +[target.'cfg(target_os = "linux")'.dependencies] +tokio-uring = "0.4" + +[dev-dependencies] +vmm-sys-util = "0.11.0" + +[features] +default = ["fuse-backend-rs/fusedev"] +virtiofs = [ + "fuse-backend-rs/vhost-user-fs", + "vm-memory", + "vhost", + "vhost-user-backend", + "virtio-queue", + "virtio-bindings", +] + +block-device = ["dbs-allocator", "tokio/fs"] +block-nbd = ["block-device", "bytes"] + +coco = ["fuse-backend-rs/fusedev", "nydus-storage/backend-registry"] diff --git a/service/README.md b/service/README.md index faa0a13cce1..18cac19cbc0 100644 --- a/service/README.md +++ b/service/README.md @@ -1,157 +1,157 @@ -# nydus-service - -The `nydus-service` crate helps to reuse the core services of nydus, allowing you to integrate nydus services into your project elegantly and easily. It provides: - -* fuse service -* virtio-fs service -* fscache service -* blobcache service - -It also supplies the nydus daemon and the daemon controller to help manage these services. - -## Why you need - -You're supposed to know that `nydusd` running as daemon to expose a [FUSE](https://www.kernel.org/doc/html/latest/filesystems/fuse.html) mountpoint, a [Virtio-FS](https://virtio-fs.gitlab.io/) mountpoint or an [EROFS](https://docs.kernel.org/filesystems/erofs.html) mountpoint inside guest for containers to access, and it provides key features include: - -- Container images are downloaded on demand -- Chunk level data deduplication -- Flatten image metadata and data to remove all intermediate layers -- Only usable image data is saved when building a container image -- Only usable image data is downloaded when running a container -- End-to-end image data integrity -- Compatible with the OCI artifacts spec and distribution spec -- Integrated with existing CNCF project Dragonfly to support image distribution in large clusters -- Different container image storage backends are supported - -If you want to use these features as native in your project without preparing and invoking `nydusd` deliberately, `nydus-service` is just born for this. - -## How to use - -For example, reuse the fuse service with `nydus-service` in three steps. - -**prepare the config**: - -```rust -{ - "device": { - "backend": { - "type": "registry", - "config": { - "scheme": "", - "skip_verify": true, - "timeout": 5, - "connect_timeout": 5, - "retry_limit": 4, - "auth": "YOUR_LOGIN_AUTH=" - } - }, - "cache": { - "type": "blobcache", - "config": { - "work_dir": "cache" - } - } - }, - "mode": "direct", - "digest_validate": false, - "iostats_files": false, - "enable_xattr": true, - "fs_prefetch": { - "enable": true, - "threads_count": 4 - } -} -``` - -**create a daemon**: - -```Rust -static ref DAEMON_CONTROLLER: DaemonController = DaemonController::default() - -let cmd = FsBackendMountCmd { - fs_type: FsBackendType::Rafs, - // Bootstrap path - source: bootstrap, - // Backend config - config, - // Virutal mountpoint - mountpoint: "/".to_string(), - // Prefetch files - prefetch_files: None, -}; - -let daemon = { - create_fuse_daemon( - // Mountpoint for the FUSE filesystem, target for `mount.fuse` - mountpoint, - // Vfs associated with the filesystem service object - vfs, - // Supervisor - None, - // Service instance identifier - id, - // Number of working threads to serve fuse requests - fuse_threads, - // daemon controller's waker - waker, - // Path to the Nydus daemon administration API socket - Some("api_sock"), - // Start Nydus daemon in upgrade mode - upgrade, - // Mounts FUSE filesystem in rw mode - !writable, - // FUSE server failover policy - failvoer-policy, - // Request structure to mount a backend filesystem instance - Some(cmd), - BTI.to_owned(), - ) - .map(|d| { - info!("Fuse daemon started!"); - d - }) - .map_err(|e| { - error!("Failed in starting daemon: {}", e); - e - })? -}; - -DAEMON_CONTROLLER.set_daemon(daemon); -``` - -**start daemon controller**: - -```rust -thread::spawn(move || { - let daemon = DAEMON_CONTROLLER.get_daemon(); - if let Some(fs) = daemon.get_default_fs_service() { - DAEMON_CONTROLLER.set_fs_service(fs); - } - - // Run the main event loop - if DAEMON_CONTROLLER.is_active() { - DAEMON_CONTROLLER.run_loop(); - } - - // Gracefully shutdown system. - info!("nydusd quits"); - DAEMON_CONTROLLER.shutdown(); -}); -``` - -Then, you can make the most of nydus services in your project. - -## Support - -**Platforms**: - -- x86_64 -- aarch64 - -**Operating Systems**: - -- Linux - -## License - -This code is licensed under [Apache-2.0](LICENSE-APACHE) or [BSD-3-Clause](LICENSE-BSD-3-Clause). +# nydus-service + +The `nydus-service` crate helps to reuse the core services of nydus, allowing you to integrate nydus services into your project elegantly and easily. It provides: + +* fuse service +* virtio-fs service +* fscache service +* blobcache service + +It also supplies the nydus daemon and the daemon controller to help manage these services. + +## Why you need + +You're supposed to know that `nydusd` running as daemon to expose a [FUSE](https://www.kernel.org/doc/html/latest/filesystems/fuse.html) mountpoint, a [Virtio-FS](https://virtio-fs.gitlab.io/) mountpoint or an [EROFS](https://docs.kernel.org/filesystems/erofs.html) mountpoint inside guest for containers to access, and it provides key features include: + +- Container images are downloaded on demand +- Chunk level data deduplication +- Flatten image metadata and data to remove all intermediate layers +- Only usable image data is saved when building a container image +- Only usable image data is downloaded when running a container +- End-to-end image data integrity +- Compatible with the OCI artifacts spec and distribution spec +- Integrated with existing CNCF project Dragonfly to support image distribution in large clusters +- Different container image storage backends are supported + +If you want to use these features as native in your project without preparing and invoking `nydusd` deliberately, `nydus-service` is just born for this. + +## How to use + +For example, reuse the fuse service with `nydus-service` in three steps. + +**prepare the config**: + +```rust +{ + "device": { + "backend": { + "type": "registry", + "config": { + "scheme": "", + "skip_verify": true, + "timeout": 5, + "connect_timeout": 5, + "retry_limit": 4, + "auth": "YOUR_LOGIN_AUTH=" + } + }, + "cache": { + "type": "blobcache", + "config": { + "work_dir": "cache" + } + } + }, + "mode": "direct", + "digest_validate": false, + "iostats_files": false, + "enable_xattr": true, + "fs_prefetch": { + "enable": true, + "threads_count": 4 + } +} +``` + +**create a daemon**: + +```Rust +static ref DAEMON_CONTROLLER: DaemonController = DaemonController::default() + +let cmd = FsBackendMountCmd { + fs_type: FsBackendType::Rafs, + // Bootstrap path + source: bootstrap, + // Backend config + config, + // Virutal mountpoint + mountpoint: "/".to_string(), + // Prefetch files + prefetch_files: None, +}; + +let daemon = { + create_fuse_daemon( + // Mountpoint for the FUSE filesystem, target for `mount.fuse` + mountpoint, + // Vfs associated with the filesystem service object + vfs, + // Supervisor + None, + // Service instance identifier + id, + // Number of working threads to serve fuse requests + fuse_threads, + // daemon controller's waker + waker, + // Path to the Nydus daemon administration API socket + Some("api_sock"), + // Start Nydus daemon in upgrade mode + upgrade, + // Mounts FUSE filesystem in rw mode + !writable, + // FUSE server failover policy + failvoer-policy, + // Request structure to mount a backend filesystem instance + Some(cmd), + BTI.to_owned(), + ) + .map(|d| { + info!("Fuse daemon started!"); + d + }) + .map_err(|e| { + error!("Failed in starting daemon: {}", e); + e + })? +}; + +DAEMON_CONTROLLER.set_daemon(daemon); +``` + +**start daemon controller**: + +```rust +thread::spawn(move || { + let daemon = DAEMON_CONTROLLER.get_daemon(); + if let Some(fs) = daemon.get_default_fs_service() { + DAEMON_CONTROLLER.set_fs_service(fs); + } + + // Run the main event loop + if DAEMON_CONTROLLER.is_active() { + DAEMON_CONTROLLER.run_loop(); + } + + // Gracefully shutdown system. + info!("nydusd quits"); + DAEMON_CONTROLLER.shutdown(); +}); +``` + +Then, you can make the most of nydus services in your project. + +## Support + +**Platforms**: + +- x86_64 +- aarch64 + +**Operating Systems**: + +- Linux + +## License + +This code is licensed under [Apache-2.0](LICENSE-APACHE) or [BSD-3-Clause](LICENSE-BSD-3-Clause). diff --git a/service/src/blob_cache.rs b/service/src/blob_cache.rs index 55ac1f27077..40bee749f11 100644 --- a/service/src/blob_cache.rs +++ b/service/src/blob_cache.rs @@ -1,796 +1,796 @@ -// Copyright (C) 2022 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: (Apache-2.0 AND BSD-3-Clause) - -//! Blob cache manager to cache RAFS meta/data blob objects. - -use std::collections::HashMap; -use std::fs::OpenOptions; -use std::io::{Error, ErrorKind, Result}; -use std::os::fd::FromRawFd; -use std::path::{Path, PathBuf}; -use std::sync::atomic::{AtomicU32, Ordering}; -use std::sync::{Arc, Mutex, MutexGuard}; - -use nydus_api::{ - BlobCacheEntry, BlobCacheList, BlobCacheObjectId, ConfigV2, BLOB_CACHE_TYPE_DATA_BLOB, - BLOB_CACHE_TYPE_META_BLOB, -}; -use nydus_rafs::metadata::layout::v6::{EROFS_BLOCK_BITS_12, EROFS_BLOCK_SIZE_4096}; -use nydus_rafs::metadata::{RafsBlobExtraInfo, RafsSuper, RafsSuperFlags}; -use nydus_storage::cache::BlobCache; -use nydus_storage::device::BlobInfo; -use nydus_storage::factory::BLOB_FACTORY; -use tokio_uring::buf::IoBufMut; -use tokio_uring::fs::File; - -const ID_SPLITTER: &str = "/"; - -/// Generate keys for cached blob objects from domain identifiers and blob identifiers. -pub fn generate_blob_key(domain_id: &str, blob_id: &str) -> String { - if domain_id.is_empty() { - blob_id.to_string() - } else { - format!("{}{}{}", domain_id, ID_SPLITTER, blob_id) - } -} - -/// Configuration information for a cached metadata blob. -pub struct MetaBlobConfig { - blob_id: String, - scoped_blob_id: String, - path: PathBuf, - config: Arc, - blobs: Mutex>>, - blob_extra_infos: HashMap, - is_tarfs_mode: bool, -} - -impl MetaBlobConfig { - /// Get blob id. - pub fn blob_id(&self) -> &str { - &self.blob_id - } - - /// Get file path to access the meta blob. - pub fn path(&self) -> &Path { - &self.path - } - - /// Get the ['ConfigV2'] object associated with the cached data blob. - pub fn config_v2(&self) -> &Arc { - &self.config - } - - pub fn get_blobs(&self) -> Vec> { - self.blobs.lock().unwrap().clone() - } - - /// Get optional extra information associated with a blob object. - pub fn get_blob_extra_info(&self, blob_id: &str) -> Option<&RafsBlobExtraInfo> { - self.blob_extra_infos.get(blob_id) - } - - /// Check whether the filesystem is in `TARFS` mode. - pub fn is_tarfs_mode(&self) -> bool { - self.is_tarfs_mode - } - - fn add_data_blob(&self, blob: Arc) { - self.blobs.lock().unwrap().push(blob); - } -} - -/// Configuration information for a cached data blob. -pub struct DataBlobConfig { - scoped_blob_id: String, - blob_info: Arc, - config: Arc, - ref_count: AtomicU32, -} - -impl DataBlobConfig { - /// Get the [`BlobInfo`](https://docs.rs/nydus-storage/latest/nydus_storage/device/struct.BlobInfo.html) object associated with the cached data blob. - pub fn blob_info(&self) -> &Arc { - &self.blob_info - } - - /// Get the ['ConfigV2'] object associated with the cached data blob. - pub fn config_v2(&self) -> &Arc { - &self.config - } -} - -/// Configuration information for a cached metadata/data blob. -#[derive(Clone)] -pub enum BlobConfig { - /// Configuration information for cached meta blob objects. - MetaBlob(Arc), - /// Configuration information for cached data blob objects. - DataBlob(Arc), -} - -impl BlobConfig { - /// Get the ['ConfigV2'] object associated with the cached data blob. - pub fn config_v2(&self) -> &Arc { - match self { - BlobConfig::MetaBlob(v) => v.config_v2(), - BlobConfig::DataBlob(v) => v.config_v2(), - } - } - - fn new_data_blob(domain_id: String, blob_info: Arc, config: Arc) -> Self { - let scoped_blob_id = generate_blob_key(&domain_id, &blob_info.blob_id()); - - BlobConfig::DataBlob(Arc::new(DataBlobConfig { - blob_info, - scoped_blob_id, - config, - ref_count: AtomicU32::new(1), - })) - } - - fn new_meta_blob( - domain_id: String, - blob_id: String, - path: PathBuf, - config: Arc, - blob_extra_infos: HashMap, - is_tarfs_mode: bool, - ) -> Self { - let scoped_blob_id = generate_blob_key(&domain_id, &blob_id); - - BlobConfig::MetaBlob(Arc::new(MetaBlobConfig { - blob_id, - scoped_blob_id, - path, - config, - blobs: Mutex::new(Vec::new()), - blob_extra_infos, - is_tarfs_mode, - })) - } - - fn key(&self) -> &str { - match self { - BlobConfig::MetaBlob(o) => &o.scoped_blob_id, - BlobConfig::DataBlob(o) => &o.scoped_blob_id, - } - } - - fn meta_config(&self) -> Option> { - match self { - BlobConfig::MetaBlob(o) => Some(o.clone()), - BlobConfig::DataBlob(_o) => None, - } - } -} - -#[derive(Default)] -struct BlobCacheState { - id_to_config_map: HashMap, -} - -impl BlobCacheState { - fn new() -> Self { - Self::default() - } - - fn try_add(&mut self, config: BlobConfig) -> Result<()> { - let key = config.key(); - - if let Some(entry) = self.id_to_config_map.get(key) { - match entry { - BlobConfig::MetaBlob(_o) => { - // Meta blob must be unique. - return Err(Error::new( - ErrorKind::AlreadyExists, - "blob_cache: bootstrap blob already exists", - )); - } - BlobConfig::DataBlob(o) => { - // Data blob is reference counted. - o.ref_count.fetch_add(1, Ordering::AcqRel); - } - } - } else { - self.id_to_config_map.insert(key.to_owned(), config); - } - - Ok(()) - } - - fn remove(&mut self, param: &BlobCacheObjectId) -> Result<()> { - if param.blob_id.is_empty() && !param.domain_id.is_empty() { - // Remove all blobs associated with the domain. - let scoped_blob_prefix = format!("{}{}", param.domain_id, ID_SPLITTER); - self.id_to_config_map.retain(|_k, v| match v { - BlobConfig::MetaBlob(o) => !o.scoped_blob_id.starts_with(&scoped_blob_prefix), - BlobConfig::DataBlob(o) => !o.scoped_blob_id.starts_with(&scoped_blob_prefix), - }); - } else { - let mut data_blobs = Vec::new(); - let mut is_meta = false; - let scoped_blob_prefix = generate_blob_key(¶m.domain_id, ¶m.blob_id); - - match self.id_to_config_map.get(&scoped_blob_prefix) { - None => return Err(enoent!("blob_cache: cache entry not found")), - Some(BlobConfig::MetaBlob(o)) => { - is_meta = true; - data_blobs = o.blobs.lock().unwrap().clone(); - } - Some(BlobConfig::DataBlob(o)) => { - data_blobs.push(o.clone()); - } - } - - for entry in data_blobs { - if entry.ref_count.fetch_sub(1, Ordering::AcqRel) == 1 { - self.id_to_config_map.remove(&entry.scoped_blob_id); - } - } - - if is_meta { - self.id_to_config_map.remove(&scoped_blob_prefix); - } - } - - Ok(()) - } - - fn get(&self, key: &str) -> Option { - self.id_to_config_map.get(key).cloned() - } -} - -/// Structure to manage and cache RAFS meta/data blob objects. -#[derive(Default)] -pub struct BlobCacheMgr { - state: Mutex, -} - -impl BlobCacheMgr { - /// Create a new instance of `BlobCacheMgr`. - pub fn new() -> Self { - BlobCacheMgr { - state: Mutex::new(BlobCacheState::new()), - } - } - - /// Add a meta/data blob to be managed by the cache manager. - /// - /// When adding a RAFS meta blob to the cache manager, all data blobs referenced by the - /// bootstrap blob will also be added to the cache manager too. It may be used to add a RAFS - /// container image to the cache manager. - /// - /// Domains are used to control the blob sharing scope. All meta and data blobs associated - /// with the same domain will be shared/reused, but blobs associated with different domains are - /// isolated. The `domain_id` is used to identify the associated domain. - pub fn add_blob_entry(&self, entry: &BlobCacheEntry) -> Result<()> { - match entry.blob_type.as_str() { - BLOB_CACHE_TYPE_META_BLOB => { - let (path, config) = self.get_meta_info(entry)?; - self.add_meta_object(&entry.domain_id, &entry.blob_id, path, config) - .map_err(|e| { - warn!( - "blob_cache: failed to add cache entry for meta blob: {:?}", - entry - ); - e - }) - } - BLOB_CACHE_TYPE_DATA_BLOB => Err(einval!(format!( - "blob_cache: invalid data blob cache entry: {:?}", - entry - ))), - _ => Err(einval!(format!( - "blob_cache: invalid blob cache entry, {:?}", - entry - ))), - } - } - - /// Add a list of meta/data blobs to be cached by the cache manager. - /// - /// If failed to add some blob, the blobs already added won't be rolled back. - pub fn add_blob_list(&self, blobs: &BlobCacheList) -> Result<()> { - for entry in blobs.blobs.iter() { - self.add_blob_entry(entry)?; - } - - Ok(()) - } - - /// Remove a meta/data blob object from the cache manager. - pub fn remove_blob_entry(&self, param: &BlobCacheObjectId) -> Result<()> { - self.get_state().remove(param) - } - - /// Get configuration information of the cached blob with specified `key`. - pub fn get_config(&self, key: &str) -> Option { - self.get_state().get(key) - } - - #[inline] - fn get_state(&self) -> MutexGuard { - self.state.lock().unwrap() - } - - fn get_meta_info(&self, entry: &BlobCacheEntry) -> Result<(PathBuf, Arc)> { - let config = entry - .blob_config - .as_ref() - .ok_or_else(|| einval!("blob_cache: missing blob cache configuration information"))?; - - if entry.blob_id.contains(ID_SPLITTER) { - return Err(einval!("blob_cache: `blob_id` for meta blob is invalid")); - } else if entry.domain_id.contains(ID_SPLITTER) { - return Err(einval!("blob_cache: `domain_id` for meta blob is invalid")); - } - - let path = config.metadata_path.clone().unwrap_or_default(); - if path.is_empty() { - return Err(einval!( - "blob_cache: `config.metadata_path` for meta blob is empty" - )); - } - let path = Path::new(&path).canonicalize().map_err(|_e| { - einval!(format!( - "blob_cache: `config.metadata_path={}` for meta blob is invalid", - path - )) - })?; - if !path.is_file() { - return Err(einval!( - "blob_cache: `config.metadata_path` for meta blob is not a file" - )); - } - - // Validate type of backend and cache. - if config.cache.is_fscache() { - // Validate the working directory for fscache - let cache_config = config.cache.get_fscache_config()?; - let path2 = Path::new(&cache_config.work_dir); - let path2 = path2 - .canonicalize() - .map_err(|_e| eio!("blob_cache: `config.cache_config.work_dir` is invalid"))?; - if !path2.is_dir() { - return Err(einval!( - "blob_cache: `config.cache_config.work_dir` is not a directory" - )); - } - } else if config.cache.is_filecache() { - // Validate the working directory for filecache - let cache_config = config.cache.get_filecache_config()?; - let path2 = Path::new(&cache_config.work_dir); - let path2 = path2 - .canonicalize() - .map_err(|_e| eio!("blob_cache: `config.cache_config.work_dir` is invalid"))?; - if !path2.is_dir() { - return Err(einval!( - "blob_cache: `config.cache_config.work_dir` is not a directory" - )); - } - } else { - return Err(einval!("blob_cache: unknown cache type")); - } - - let config: Arc = Arc::new(config.into()); - config.internal.set_blob_accessible(true); - - Ok((path, config)) - } - - fn add_meta_object( - &self, - domain_id: &str, - id: &str, - path: PathBuf, - config: Arc, - ) -> Result<()> { - let (rs, _) = RafsSuper::load_from_file(&path, config.clone(), false)?; - if rs.meta.is_v5() { - return Err(einval!("blob_cache: RAFSv5 image is not supported")); - } - - let blob_extra_infos = rs.superblock.get_blob_extra_infos()?; - let meta = BlobConfig::new_meta_blob( - domain_id.to_string(), - id.to_string(), - path, - config, - blob_extra_infos, - rs.meta.flags.contains(RafsSuperFlags::TARTFS_MODE), - ); - // Safe to unwrap because it's a meta blob object. - let meta_obj = meta.meta_config().unwrap(); - let mut state = self.get_state(); - state.try_add(meta)?; - - // Try to add the referenced data blob object if it doesn't exist yet. - for bi in rs.superblock.get_blob_infos() { - debug!( - "blob_cache: add data blob {} to domain {}", - &bi.blob_id(), - domain_id - ); - let data_blob = - BlobConfig::new_data_blob(domain_id.to_string(), bi, meta_obj.config.clone()); - let data_blob_config = match &data_blob { - BlobConfig::DataBlob(entry) => entry.clone(), - _ => panic!("blob_cache: internal error"), - }; - - if let Err(e) = state.try_add(data_blob) { - // Rollback added bootstrap/data blobs. - let id = BlobCacheObjectId { - domain_id: domain_id.to_string(), - blob_id: id.to_string(), - }; - let _ = state.remove(&id); - return Err(e); - } - - // Associate the data blob with the bootstrap blob. - meta_obj.add_data_blob(data_blob_config); - } - - Ok(()) - } -} - -/// Structure representing a cached metadata blob. -pub struct MetaBlob { - file: File, - size: u64, -} - -impl MetaBlob { - /// Create a new [MetaBlob] object from - pub fn new>(path: P) -> Result { - let file = OpenOptions::new() - .read(true) - .write(false) - .open(path.as_ref()) - .map_err(|e| { - warn!( - "blob_cache: failed to open metadata blob {}", - path.as_ref().display() - ); - e - })?; - let md = file.metadata().map_err(|e| { - warn!( - "blob_cache: failed to get metadata about metadata blob {}", - path.as_ref().display() - ); - e - })?; - let size = md.len(); - if size % EROFS_BLOCK_SIZE_4096 != 0 || (size >> EROFS_BLOCK_BITS_12) > u32::MAX as u64 { - return Err(einval!(format!( - "blob_cache: metadata blob size (0x{:x}) is invalid", - size - ))); - } - - Ok(MetaBlob { - file: File::from_std(file), - size, - }) - } - - /// Get number of blocks in unit of EROFS_BLOCK_SIZE. - pub fn blocks(&self) -> u32 { - (self.size >> EROFS_BLOCK_BITS_12) as u32 - } - - /// Read data from the cached metadata blob in asynchronous mode. - pub async fn async_read(&self, pos: u64, buf: T) -> (Result, T) { - self.file.read_at(buf, pos).await - } -} - -/// Structure representing a cached data blob. -pub struct DataBlob { - blob_id: String, - blob: Arc, - file: File, -} - -impl DataBlob { - /// Create a new instance of [DataBlob]. - pub fn new(config: &Arc) -> Result { - let blob_id = config.blob_info().blob_id(); - let blob = BLOB_FACTORY - .new_blob_cache(config.config_v2(), &config.blob_info) - .map_err(|e| { - warn!( - "blob_cache: failed to create cache object for blob {}", - blob_id - ); - e - })?; - - match blob.get_blob_object() { - Some(obj) => { - let fd = nix::unistd::dup(obj.as_raw_fd())?; - // Safe because the `fd` is valid. - let file = unsafe { File::from_raw_fd(fd) }; - Ok(DataBlob { - blob_id, - blob, - file, - }) - } - None => Err(eio!(format!( - "blob_cache: failed to get BlobObject for blob {}", - blob_id - ))), - } - } - - /// Read data from the cached data blob in asynchronous mode. - pub async fn async_read(&self, pos: u64, buf: T) -> (Result, T) { - match self.blob.get_blob_object() { - Some(obj) => match obj.fetch_range_uncompressed(pos, buf.bytes_total() as u64) { - Ok(_) => self.file.read_at(buf, pos).await, - Err(e) => (Err(e), buf), - }, - None => ( - Err(eio!(format!( - "blob_cache: failed to get BlobObject for blob {}", - self.blob_id - ))), - buf, - ), - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use vmm_sys_util::tempdir::TempDir; - - fn create_factory_config() -> String { - let config = r#" - { - "type": "bootstrap", - "id": "bootstrap1", - "domain_id": "userid1", - "config": { - "id": "factory1", - "backend_type": "localfs", - "backend_config": { - "dir": "/tmp/nydus" - }, - "cache_type": "fscache", - "cache_config": { - "work_dir": "/tmp/nydus" - }, - "metadata_path": "/tmp/nydus/bootstrap1" - } - }"#; - - config.to_string() - } - - #[test] - fn test_generate_blob_key() { - assert_eq!(&generate_blob_key("", "blob1"), "blob1"); - assert_eq!(&generate_blob_key("domain1", "blob1"), "domain1/blob1"); - } - - #[test] - fn test_blob_cache_entry() { - let tmpdir = TempDir::new().unwrap(); - let path = tmpdir.as_path().join("bootstrap1"); - std::fs::write(path, "metadata").unwrap(); - let cfg = create_factory_config(); - let content = cfg.replace("/tmp/nydus", tmpdir.as_path().to_str().unwrap()); - let mut entry: BlobCacheEntry = serde_json::from_str(&content).unwrap(); - assert!(entry.prepare_configuration_info()); - let blob_config = entry.blob_config.as_ref().unwrap(); - - assert_eq!(&entry.blob_type, "bootstrap"); - assert_eq!(&entry.blob_id, "bootstrap1"); - assert_eq!(&entry.domain_id, "userid1"); - assert_eq!(&blob_config.id, "factory1"); - assert_eq!(&blob_config.backend.backend_type, "localfs"); - assert_eq!(&blob_config.cache.cache_type, "fscache"); - assert!(blob_config.metadata_path.is_some()); - assert!(blob_config.backend.localfs.is_some()); - assert!(blob_config.cache.fs_cache.is_some()); - - let mgr = BlobCacheMgr::new(); - let (path, config) = mgr.get_meta_info(&entry).unwrap(); - let backend_cfg = config.get_backend_config().unwrap(); - let cache_cfg = config.get_cache_config().unwrap(); - assert_eq!(path, tmpdir.as_path().join("bootstrap1")); - assert_eq!(&config.id, "factory1"); - assert_eq!(&backend_cfg.backend_type, "localfs"); - assert_eq!(&cache_cfg.cache_type, "fscache"); - - let blob = MetaBlobConfig { - blob_id: "123456789-123".to_string(), - scoped_blob_id: "domain1".to_string(), - path: path.clone(), - config, - blobs: Mutex::new(Vec::new()), - blob_extra_infos: HashMap::new(), - is_tarfs_mode: false, - }; - assert_eq!(blob.path(), &path); - assert_eq!(blob.blob_id(), "123456789-123"); - } - - #[test] - fn test_invalid_blob_id() { - let tmpdir = TempDir::new().unwrap(); - let path = tmpdir.as_path().join("bootstrap1"); - std::fs::write(path, "metadata").unwrap(); - let config = create_factory_config(); - let content = config.replace("/tmp/nydus", tmpdir.as_path().to_str().unwrap()); - let mut entry: BlobCacheEntry = serde_json::from_str(&content).unwrap(); - let mgr = BlobCacheMgr::new(); - - entry.blob_id = "domain2/blob1".to_string(); - mgr.get_meta_info(&entry).unwrap_err(); - } - - #[test] - fn test_blob_cache_list() { - let config = r#" - { - "blobs" : [ - { - "type": "bootstrap", - "id": "bootstrap1", - "domain_id": "userid1", - "config": { - "id": "factory1", - "backend_type": "localfs", - "backend_config": { - "dir": "/tmp/nydus" - }, - "cache_type": "fscache", - "cache_config": { - "work_dir": "/tmp/nydus" - }, - "metadata_path": "/tmp/nydus/bootstrap1" - } - }, - { - "type": "bootstrap", - "id": "bootstrap2", - "domain_id": "userid2", - "config": { - "id": "factory1", - "backend_type": "localfs", - "backend_config": { - "dir": "/tmp/nydus" - }, - "cache_type": "fscache", - "cache_config": { - "work_dir": "/tmp/nydus" - }, - "metadata_path": "/tmp/nydus/bootstrap2" - } - } - ] - }"#; - let mut list: BlobCacheList = serde_json::from_str(config).unwrap(); - assert!(list.blobs[0].prepare_configuration_info()); - - assert_eq!(list.blobs.len(), 2); - assert_eq!(&list.blobs[0].blob_type, "bootstrap"); - assert_eq!(&list.blobs[0].blob_id, "bootstrap1"); - let blob_config = &list.blobs[0].blob_config.as_ref().unwrap(); - assert_eq!(&blob_config.id, "factory1"); - assert_eq!(&blob_config.backend.backend_type, "localfs"); - assert_eq!(&blob_config.cache.cache_type, "fscache"); - assert_eq!(&list.blobs[1].blob_type, "bootstrap"); - assert_eq!(&list.blobs[1].blob_id, "bootstrap2"); - } - - #[test] - fn test_add_bootstrap() { - let tmpdir = TempDir::new().unwrap(); - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let mut source_path = PathBuf::from(root_dir); - source_path.push("../tests/texture/bootstrap/rafs-v6-2.2.boot"); - - let config = r#" - { - "type": "bootstrap", - "id": "rafs-v6", - "domain_id": "domain2", - "config_v2": { - "version": 2, - "id": "factory1", - "backend": { - "type": "localfs", - "localfs": { - "dir": "/tmp/nydus" - } - }, - "cache": { - "type": "fscache", - "fscache": { - "work_dir": "/tmp/nydus" - } - }, - "metadata_path": "RAFS_V5" - } - }"#; - let content = config - .replace("/tmp/nydus", tmpdir.as_path().to_str().unwrap()) - .replace("RAFS_V5", &source_path.display().to_string()); - let mut entry: BlobCacheEntry = serde_json::from_str(&content).unwrap(); - assert!(entry.prepare_configuration_info()); - - let mgr = BlobCacheMgr::new(); - mgr.add_blob_entry(&entry).unwrap(); - let blob_id = generate_blob_key(&entry.domain_id, &entry.blob_id); - assert!(mgr.get_config(&blob_id).is_some()); - - // add the same entry will trigger an error - assert!(mgr.add_blob_entry(&entry).is_err()); - - // Check existence of data blob referenced by the bootstrap. - let key = generate_blob_key( - &entry.domain_id, - "be7d77eeb719f70884758d1aa800ed0fb09d701aaec469964e9d54325f0d5fef", - ); - assert!(mgr.get_config(&key).is_some()); - - assert_eq!(mgr.get_state().id_to_config_map.len(), 2); - - entry.blob_id = "rafs-v6-cloned".to_string(); - let blob_id_cloned = generate_blob_key(&entry.domain_id, &entry.blob_id); - mgr.add_blob_entry(&entry).unwrap(); - assert_eq!(mgr.get_state().id_to_config_map.len(), 3); - assert!(mgr.get_config(&blob_id).is_some()); - assert!(mgr.get_config(&blob_id_cloned).is_some()); - - mgr.remove_blob_entry(&BlobCacheObjectId { - domain_id: entry.domain_id.clone(), - blob_id: "rafs-v6".to_string(), - }) - .unwrap(); - assert_eq!(mgr.get_state().id_to_config_map.len(), 2); - assert!(mgr.get_config(&blob_id).is_none()); - assert!(mgr.get_config(&blob_id_cloned).is_some()); - - mgr.remove_blob_entry(&BlobCacheObjectId { - domain_id: entry.domain_id, - blob_id: "rafs-v6-cloned".to_string(), - }) - .unwrap(); - assert_eq!(mgr.get_state().id_to_config_map.len(), 0); - assert!(mgr.get_config(&blob_id).is_none()); - assert!(mgr.get_config(&blob_id_cloned).is_none()); - } - - #[test] - fn test_meta_blob() { - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let mut source_path = PathBuf::from(root_dir); - source_path.push("../tests/texture/bootstrap/rafs-v6-2.2.boot"); - - tokio_uring::start(async move { - let meta_blob = MetaBlob::new(&source_path).unwrap(); - assert_eq!(meta_blob.blocks(), 5); - let buf = vec![0u8; 4096]; - let (res, buf) = meta_blob.async_read(0, buf).await; - assert_eq!(res.unwrap(), 4096); - assert_eq!(buf[0], 0); - assert_eq!(buf[1023], 0); - assert_eq!(buf[1024], 0xe2); - assert_eq!(buf[1027], 0xe0); - let (res, _buf) = meta_blob.async_read(0x6000, buf).await; - assert_eq!(res.unwrap(), 0); - }); - } -} +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: (Apache-2.0 AND BSD-3-Clause) + +//! Blob cache manager to cache RAFS meta/data blob objects. + +use std::collections::HashMap; +use std::fs::OpenOptions; +use std::io::{Error, ErrorKind, Result}; +use std::os::fd::FromRawFd; +use std::path::{Path, PathBuf}; +use std::sync::atomic::{AtomicU32, Ordering}; +use std::sync::{Arc, Mutex, MutexGuard}; + +use nydus_api::{ + BlobCacheEntry, BlobCacheList, BlobCacheObjectId, ConfigV2, BLOB_CACHE_TYPE_DATA_BLOB, + BLOB_CACHE_TYPE_META_BLOB, +}; +use nydus_rafs::metadata::layout::v6::{EROFS_BLOCK_BITS_12, EROFS_BLOCK_SIZE_4096}; +use nydus_rafs::metadata::{RafsBlobExtraInfo, RafsSuper, RafsSuperFlags}; +use nydus_storage::cache::BlobCache; +use nydus_storage::device::BlobInfo; +use nydus_storage::factory::BLOB_FACTORY; +use tokio_uring::buf::IoBufMut; +use tokio_uring::fs::File; + +const ID_SPLITTER: &str = "/"; + +/// Generate keys for cached blob objects from domain identifiers and blob identifiers. +pub fn generate_blob_key(domain_id: &str, blob_id: &str) -> String { + if domain_id.is_empty() { + blob_id.to_string() + } else { + format!("{}{}{}", domain_id, ID_SPLITTER, blob_id) + } +} + +/// Configuration information for a cached metadata blob. +pub struct MetaBlobConfig { + blob_id: String, + scoped_blob_id: String, + path: PathBuf, + config: Arc, + blobs: Mutex>>, + blob_extra_infos: HashMap, + is_tarfs_mode: bool, +} + +impl MetaBlobConfig { + /// Get blob id. + pub fn blob_id(&self) -> &str { + &self.blob_id + } + + /// Get file path to access the meta blob. + pub fn path(&self) -> &Path { + &self.path + } + + /// Get the ['ConfigV2'] object associated with the cached data blob. + pub fn config_v2(&self) -> &Arc { + &self.config + } + + pub fn get_blobs(&self) -> Vec> { + self.blobs.lock().unwrap().clone() + } + + /// Get optional extra information associated with a blob object. + pub fn get_blob_extra_info(&self, blob_id: &str) -> Option<&RafsBlobExtraInfo> { + self.blob_extra_infos.get(blob_id) + } + + /// Check whether the filesystem is in `TARFS` mode. + pub fn is_tarfs_mode(&self) -> bool { + self.is_tarfs_mode + } + + fn add_data_blob(&self, blob: Arc) { + self.blobs.lock().unwrap().push(blob); + } +} + +/// Configuration information for a cached data blob. +pub struct DataBlobConfig { + scoped_blob_id: String, + blob_info: Arc, + config: Arc, + ref_count: AtomicU32, +} + +impl DataBlobConfig { + /// Get the [`BlobInfo`](https://docs.rs/nydus-storage/latest/nydus_storage/device/struct.BlobInfo.html) object associated with the cached data blob. + pub fn blob_info(&self) -> &Arc { + &self.blob_info + } + + /// Get the ['ConfigV2'] object associated with the cached data blob. + pub fn config_v2(&self) -> &Arc { + &self.config + } +} + +/// Configuration information for a cached metadata/data blob. +#[derive(Clone)] +pub enum BlobConfig { + /// Configuration information for cached meta blob objects. + MetaBlob(Arc), + /// Configuration information for cached data blob objects. + DataBlob(Arc), +} + +impl BlobConfig { + /// Get the ['ConfigV2'] object associated with the cached data blob. + pub fn config_v2(&self) -> &Arc { + match self { + BlobConfig::MetaBlob(v) => v.config_v2(), + BlobConfig::DataBlob(v) => v.config_v2(), + } + } + + fn new_data_blob(domain_id: String, blob_info: Arc, config: Arc) -> Self { + let scoped_blob_id = generate_blob_key(&domain_id, &blob_info.blob_id()); + + BlobConfig::DataBlob(Arc::new(DataBlobConfig { + blob_info, + scoped_blob_id, + config, + ref_count: AtomicU32::new(1), + })) + } + + fn new_meta_blob( + domain_id: String, + blob_id: String, + path: PathBuf, + config: Arc, + blob_extra_infos: HashMap, + is_tarfs_mode: bool, + ) -> Self { + let scoped_blob_id = generate_blob_key(&domain_id, &blob_id); + + BlobConfig::MetaBlob(Arc::new(MetaBlobConfig { + blob_id, + scoped_blob_id, + path, + config, + blobs: Mutex::new(Vec::new()), + blob_extra_infos, + is_tarfs_mode, + })) + } + + fn key(&self) -> &str { + match self { + BlobConfig::MetaBlob(o) => &o.scoped_blob_id, + BlobConfig::DataBlob(o) => &o.scoped_blob_id, + } + } + + fn meta_config(&self) -> Option> { + match self { + BlobConfig::MetaBlob(o) => Some(o.clone()), + BlobConfig::DataBlob(_o) => None, + } + } +} + +#[derive(Default)] +struct BlobCacheState { + id_to_config_map: HashMap, +} + +impl BlobCacheState { + fn new() -> Self { + Self::default() + } + + fn try_add(&mut self, config: BlobConfig) -> Result<()> { + let key = config.key(); + + if let Some(entry) = self.id_to_config_map.get(key) { + match entry { + BlobConfig::MetaBlob(_o) => { + // Meta blob must be unique. + return Err(Error::new( + ErrorKind::AlreadyExists, + "blob_cache: bootstrap blob already exists", + )); + } + BlobConfig::DataBlob(o) => { + // Data blob is reference counted. + o.ref_count.fetch_add(1, Ordering::AcqRel); + } + } + } else { + self.id_to_config_map.insert(key.to_owned(), config); + } + + Ok(()) + } + + fn remove(&mut self, param: &BlobCacheObjectId) -> Result<()> { + if param.blob_id.is_empty() && !param.domain_id.is_empty() { + // Remove all blobs associated with the domain. + let scoped_blob_prefix = format!("{}{}", param.domain_id, ID_SPLITTER); + self.id_to_config_map.retain(|_k, v| match v { + BlobConfig::MetaBlob(o) => !o.scoped_blob_id.starts_with(&scoped_blob_prefix), + BlobConfig::DataBlob(o) => !o.scoped_blob_id.starts_with(&scoped_blob_prefix), + }); + } else { + let mut data_blobs = Vec::new(); + let mut is_meta = false; + let scoped_blob_prefix = generate_blob_key(¶m.domain_id, ¶m.blob_id); + + match self.id_to_config_map.get(&scoped_blob_prefix) { + None => return Err(enoent!("blob_cache: cache entry not found")), + Some(BlobConfig::MetaBlob(o)) => { + is_meta = true; + data_blobs = o.blobs.lock().unwrap().clone(); + } + Some(BlobConfig::DataBlob(o)) => { + data_blobs.push(o.clone()); + } + } + + for entry in data_blobs { + if entry.ref_count.fetch_sub(1, Ordering::AcqRel) == 1 { + self.id_to_config_map.remove(&entry.scoped_blob_id); + } + } + + if is_meta { + self.id_to_config_map.remove(&scoped_blob_prefix); + } + } + + Ok(()) + } + + fn get(&self, key: &str) -> Option { + self.id_to_config_map.get(key).cloned() + } +} + +/// Structure to manage and cache RAFS meta/data blob objects. +#[derive(Default)] +pub struct BlobCacheMgr { + state: Mutex, +} + +impl BlobCacheMgr { + /// Create a new instance of `BlobCacheMgr`. + pub fn new() -> Self { + BlobCacheMgr { + state: Mutex::new(BlobCacheState::new()), + } + } + + /// Add a meta/data blob to be managed by the cache manager. + /// + /// When adding a RAFS meta blob to the cache manager, all data blobs referenced by the + /// bootstrap blob will also be added to the cache manager too. It may be used to add a RAFS + /// container image to the cache manager. + /// + /// Domains are used to control the blob sharing scope. All meta and data blobs associated + /// with the same domain will be shared/reused, but blobs associated with different domains are + /// isolated. The `domain_id` is used to identify the associated domain. + pub fn add_blob_entry(&self, entry: &BlobCacheEntry) -> Result<()> { + match entry.blob_type.as_str() { + BLOB_CACHE_TYPE_META_BLOB => { + let (path, config) = self.get_meta_info(entry)?; + self.add_meta_object(&entry.domain_id, &entry.blob_id, path, config) + .map_err(|e| { + warn!( + "blob_cache: failed to add cache entry for meta blob: {:?}", + entry + ); + e + }) + } + BLOB_CACHE_TYPE_DATA_BLOB => Err(einval!(format!( + "blob_cache: invalid data blob cache entry: {:?}", + entry + ))), + _ => Err(einval!(format!( + "blob_cache: invalid blob cache entry, {:?}", + entry + ))), + } + } + + /// Add a list of meta/data blobs to be cached by the cache manager. + /// + /// If failed to add some blob, the blobs already added won't be rolled back. + pub fn add_blob_list(&self, blobs: &BlobCacheList) -> Result<()> { + for entry in blobs.blobs.iter() { + self.add_blob_entry(entry)?; + } + + Ok(()) + } + + /// Remove a meta/data blob object from the cache manager. + pub fn remove_blob_entry(&self, param: &BlobCacheObjectId) -> Result<()> { + self.get_state().remove(param) + } + + /// Get configuration information of the cached blob with specified `key`. + pub fn get_config(&self, key: &str) -> Option { + self.get_state().get(key) + } + + #[inline] + fn get_state(&self) -> MutexGuard { + self.state.lock().unwrap() + } + + fn get_meta_info(&self, entry: &BlobCacheEntry) -> Result<(PathBuf, Arc)> { + let config = entry + .blob_config + .as_ref() + .ok_or_else(|| einval!("blob_cache: missing blob cache configuration information"))?; + + if entry.blob_id.contains(ID_SPLITTER) { + return Err(einval!("blob_cache: `blob_id` for meta blob is invalid")); + } else if entry.domain_id.contains(ID_SPLITTER) { + return Err(einval!("blob_cache: `domain_id` for meta blob is invalid")); + } + + let path = config.metadata_path.clone().unwrap_or_default(); + if path.is_empty() { + return Err(einval!( + "blob_cache: `config.metadata_path` for meta blob is empty" + )); + } + let path = Path::new(&path).canonicalize().map_err(|_e| { + einval!(format!( + "blob_cache: `config.metadata_path={}` for meta blob is invalid", + path + )) + })?; + if !path.is_file() { + return Err(einval!( + "blob_cache: `config.metadata_path` for meta blob is not a file" + )); + } + + // Validate type of backend and cache. + if config.cache.is_fscache() { + // Validate the working directory for fscache + let cache_config = config.cache.get_fscache_config()?; + let path2 = Path::new(&cache_config.work_dir); + let path2 = path2 + .canonicalize() + .map_err(|_e| eio!("blob_cache: `config.cache_config.work_dir` is invalid"))?; + if !path2.is_dir() { + return Err(einval!( + "blob_cache: `config.cache_config.work_dir` is not a directory" + )); + } + } else if config.cache.is_filecache() { + // Validate the working directory for filecache + let cache_config = config.cache.get_filecache_config()?; + let path2 = Path::new(&cache_config.work_dir); + let path2 = path2 + .canonicalize() + .map_err(|_e| eio!("blob_cache: `config.cache_config.work_dir` is invalid"))?; + if !path2.is_dir() { + return Err(einval!( + "blob_cache: `config.cache_config.work_dir` is not a directory" + )); + } + } else { + return Err(einval!("blob_cache: unknown cache type")); + } + + let config: Arc = Arc::new(config.into()); + config.internal.set_blob_accessible(true); + + Ok((path, config)) + } + + fn add_meta_object( + &self, + domain_id: &str, + id: &str, + path: PathBuf, + config: Arc, + ) -> Result<()> { + let (rs, _) = RafsSuper::load_from_file(&path, config.clone(), false)?; + if rs.meta.is_v5() { + return Err(einval!("blob_cache: RAFSv5 image is not supported")); + } + + let blob_extra_infos = rs.superblock.get_blob_extra_infos()?; + let meta = BlobConfig::new_meta_blob( + domain_id.to_string(), + id.to_string(), + path, + config, + blob_extra_infos, + rs.meta.flags.contains(RafsSuperFlags::TARTFS_MODE), + ); + // Safe to unwrap because it's a meta blob object. + let meta_obj = meta.meta_config().unwrap(); + let mut state = self.get_state(); + state.try_add(meta)?; + + // Try to add the referenced data blob object if it doesn't exist yet. + for bi in rs.superblock.get_blob_infos() { + debug!( + "blob_cache: add data blob {} to domain {}", + &bi.blob_id(), + domain_id + ); + let data_blob = + BlobConfig::new_data_blob(domain_id.to_string(), bi, meta_obj.config.clone()); + let data_blob_config = match &data_blob { + BlobConfig::DataBlob(entry) => entry.clone(), + _ => panic!("blob_cache: internal error"), + }; + + if let Err(e) = state.try_add(data_blob) { + // Rollback added bootstrap/data blobs. + let id = BlobCacheObjectId { + domain_id: domain_id.to_string(), + blob_id: id.to_string(), + }; + let _ = state.remove(&id); + return Err(e); + } + + // Associate the data blob with the bootstrap blob. + meta_obj.add_data_blob(data_blob_config); + } + + Ok(()) + } +} + +/// Structure representing a cached metadata blob. +pub struct MetaBlob { + file: File, + size: u64, +} + +impl MetaBlob { + /// Create a new [MetaBlob] object from + pub fn new>(path: P) -> Result { + let file = OpenOptions::new() + .read(true) + .write(false) + .open(path.as_ref()) + .map_err(|e| { + warn!( + "blob_cache: failed to open metadata blob {}", + path.as_ref().display() + ); + e + })?; + let md = file.metadata().map_err(|e| { + warn!( + "blob_cache: failed to get metadata about metadata blob {}", + path.as_ref().display() + ); + e + })?; + let size = md.len(); + if size % EROFS_BLOCK_SIZE_4096 != 0 || (size >> EROFS_BLOCK_BITS_12) > u32::MAX as u64 { + return Err(einval!(format!( + "blob_cache: metadata blob size (0x{:x}) is invalid", + size + ))); + } + + Ok(MetaBlob { + file: File::from_std(file), + size, + }) + } + + /// Get number of blocks in unit of EROFS_BLOCK_SIZE. + pub fn blocks(&self) -> u32 { + (self.size >> EROFS_BLOCK_BITS_12) as u32 + } + + /// Read data from the cached metadata blob in asynchronous mode. + pub async fn async_read(&self, pos: u64, buf: T) -> (Result, T) { + self.file.read_at(buf, pos).await + } +} + +/// Structure representing a cached data blob. +pub struct DataBlob { + blob_id: String, + blob: Arc, + file: File, +} + +impl DataBlob { + /// Create a new instance of [DataBlob]. + pub fn new(config: &Arc) -> Result { + let blob_id = config.blob_info().blob_id(); + let blob = BLOB_FACTORY + .new_blob_cache(config.config_v2(), &config.blob_info) + .map_err(|e| { + warn!( + "blob_cache: failed to create cache object for blob {}", + blob_id + ); + e + })?; + + match blob.get_blob_object() { + Some(obj) => { + let fd = nix::unistd::dup(obj.as_raw_fd())?; + // Safe because the `fd` is valid. + let file = unsafe { File::from_raw_fd(fd) }; + Ok(DataBlob { + blob_id, + blob, + file, + }) + } + None => Err(eio!(format!( + "blob_cache: failed to get BlobObject for blob {}", + blob_id + ))), + } + } + + /// Read data from the cached data blob in asynchronous mode. + pub async fn async_read(&self, pos: u64, buf: T) -> (Result, T) { + match self.blob.get_blob_object() { + Some(obj) => match obj.fetch_range_uncompressed(pos, buf.bytes_total() as u64) { + Ok(_) => self.file.read_at(buf, pos).await, + Err(e) => (Err(e), buf), + }, + None => ( + Err(eio!(format!( + "blob_cache: failed to get BlobObject for blob {}", + self.blob_id + ))), + buf, + ), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use vmm_sys_util::tempdir::TempDir; + + fn create_factory_config() -> String { + let config = r#" + { + "type": "bootstrap", + "id": "bootstrap1", + "domain_id": "userid1", + "config": { + "id": "factory1", + "backend_type": "localfs", + "backend_config": { + "dir": "/tmp/nydus" + }, + "cache_type": "fscache", + "cache_config": { + "work_dir": "/tmp/nydus" + }, + "metadata_path": "/tmp/nydus/bootstrap1" + } + }"#; + + config.to_string() + } + + #[test] + fn test_generate_blob_key() { + assert_eq!(&generate_blob_key("", "blob1"), "blob1"); + assert_eq!(&generate_blob_key("domain1", "blob1"), "domain1/blob1"); + } + + #[test] + fn test_blob_cache_entry() { + let tmpdir = TempDir::new().unwrap(); + let path = tmpdir.as_path().join("bootstrap1"); + std::fs::write(path, "metadata").unwrap(); + let cfg = create_factory_config(); + let content = cfg.replace("/tmp/nydus", tmpdir.as_path().to_str().unwrap()); + let mut entry: BlobCacheEntry = serde_json::from_str(&content).unwrap(); + assert!(entry.prepare_configuration_info()); + let blob_config = entry.blob_config.as_ref().unwrap(); + + assert_eq!(&entry.blob_type, "bootstrap"); + assert_eq!(&entry.blob_id, "bootstrap1"); + assert_eq!(&entry.domain_id, "userid1"); + assert_eq!(&blob_config.id, "factory1"); + assert_eq!(&blob_config.backend.backend_type, "localfs"); + assert_eq!(&blob_config.cache.cache_type, "fscache"); + assert!(blob_config.metadata_path.is_some()); + assert!(blob_config.backend.localfs.is_some()); + assert!(blob_config.cache.fs_cache.is_some()); + + let mgr = BlobCacheMgr::new(); + let (path, config) = mgr.get_meta_info(&entry).unwrap(); + let backend_cfg = config.get_backend_config().unwrap(); + let cache_cfg = config.get_cache_config().unwrap(); + assert_eq!(path, tmpdir.as_path().join("bootstrap1")); + assert_eq!(&config.id, "factory1"); + assert_eq!(&backend_cfg.backend_type, "localfs"); + assert_eq!(&cache_cfg.cache_type, "fscache"); + + let blob = MetaBlobConfig { + blob_id: "123456789-123".to_string(), + scoped_blob_id: "domain1".to_string(), + path: path.clone(), + config, + blobs: Mutex::new(Vec::new()), + blob_extra_infos: HashMap::new(), + is_tarfs_mode: false, + }; + assert_eq!(blob.path(), &path); + assert_eq!(blob.blob_id(), "123456789-123"); + } + + #[test] + fn test_invalid_blob_id() { + let tmpdir = TempDir::new().unwrap(); + let path = tmpdir.as_path().join("bootstrap1"); + std::fs::write(path, "metadata").unwrap(); + let config = create_factory_config(); + let content = config.replace("/tmp/nydus", tmpdir.as_path().to_str().unwrap()); + let mut entry: BlobCacheEntry = serde_json::from_str(&content).unwrap(); + let mgr = BlobCacheMgr::new(); + + entry.blob_id = "domain2/blob1".to_string(); + mgr.get_meta_info(&entry).unwrap_err(); + } + + #[test] + fn test_blob_cache_list() { + let config = r#" + { + "blobs" : [ + { + "type": "bootstrap", + "id": "bootstrap1", + "domain_id": "userid1", + "config": { + "id": "factory1", + "backend_type": "localfs", + "backend_config": { + "dir": "/tmp/nydus" + }, + "cache_type": "fscache", + "cache_config": { + "work_dir": "/tmp/nydus" + }, + "metadata_path": "/tmp/nydus/bootstrap1" + } + }, + { + "type": "bootstrap", + "id": "bootstrap2", + "domain_id": "userid2", + "config": { + "id": "factory1", + "backend_type": "localfs", + "backend_config": { + "dir": "/tmp/nydus" + }, + "cache_type": "fscache", + "cache_config": { + "work_dir": "/tmp/nydus" + }, + "metadata_path": "/tmp/nydus/bootstrap2" + } + } + ] + }"#; + let mut list: BlobCacheList = serde_json::from_str(config).unwrap(); + assert!(list.blobs[0].prepare_configuration_info()); + + assert_eq!(list.blobs.len(), 2); + assert_eq!(&list.blobs[0].blob_type, "bootstrap"); + assert_eq!(&list.blobs[0].blob_id, "bootstrap1"); + let blob_config = &list.blobs[0].blob_config.as_ref().unwrap(); + assert_eq!(&blob_config.id, "factory1"); + assert_eq!(&blob_config.backend.backend_type, "localfs"); + assert_eq!(&blob_config.cache.cache_type, "fscache"); + assert_eq!(&list.blobs[1].blob_type, "bootstrap"); + assert_eq!(&list.blobs[1].blob_id, "bootstrap2"); + } + + #[test] + fn test_add_bootstrap() { + let tmpdir = TempDir::new().unwrap(); + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let mut source_path = PathBuf::from(root_dir); + source_path.push("../tests/texture/bootstrap/rafs-v6-2.2.boot"); + + let config = r#" + { + "type": "bootstrap", + "id": "rafs-v6", + "domain_id": "domain2", + "config_v2": { + "version": 2, + "id": "factory1", + "backend": { + "type": "localfs", + "localfs": { + "dir": "/tmp/nydus" + } + }, + "cache": { + "type": "fscache", + "fscache": { + "work_dir": "/tmp/nydus" + } + }, + "metadata_path": "RAFS_V5" + } + }"#; + let content = config + .replace("/tmp/nydus", tmpdir.as_path().to_str().unwrap()) + .replace("RAFS_V5", &source_path.display().to_string()); + let mut entry: BlobCacheEntry = serde_json::from_str(&content).unwrap(); + assert!(entry.prepare_configuration_info()); + + let mgr = BlobCacheMgr::new(); + mgr.add_blob_entry(&entry).unwrap(); + let blob_id = generate_blob_key(&entry.domain_id, &entry.blob_id); + assert!(mgr.get_config(&blob_id).is_some()); + + // add the same entry will trigger an error + assert!(mgr.add_blob_entry(&entry).is_err()); + + // Check existence of data blob referenced by the bootstrap. + let key = generate_blob_key( + &entry.domain_id, + "be7d77eeb719f70884758d1aa800ed0fb09d701aaec469964e9d54325f0d5fef", + ); + assert!(mgr.get_config(&key).is_some()); + + assert_eq!(mgr.get_state().id_to_config_map.len(), 2); + + entry.blob_id = "rafs-v6-cloned".to_string(); + let blob_id_cloned = generate_blob_key(&entry.domain_id, &entry.blob_id); + mgr.add_blob_entry(&entry).unwrap(); + assert_eq!(mgr.get_state().id_to_config_map.len(), 3); + assert!(mgr.get_config(&blob_id).is_some()); + assert!(mgr.get_config(&blob_id_cloned).is_some()); + + mgr.remove_blob_entry(&BlobCacheObjectId { + domain_id: entry.domain_id.clone(), + blob_id: "rafs-v6".to_string(), + }) + .unwrap(); + assert_eq!(mgr.get_state().id_to_config_map.len(), 2); + assert!(mgr.get_config(&blob_id).is_none()); + assert!(mgr.get_config(&blob_id_cloned).is_some()); + + mgr.remove_blob_entry(&BlobCacheObjectId { + domain_id: entry.domain_id, + blob_id: "rafs-v6-cloned".to_string(), + }) + .unwrap(); + assert_eq!(mgr.get_state().id_to_config_map.len(), 0); + assert!(mgr.get_config(&blob_id).is_none()); + assert!(mgr.get_config(&blob_id_cloned).is_none()); + } + + #[test] + fn test_meta_blob() { + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let mut source_path = PathBuf::from(root_dir); + source_path.push("../tests/texture/bootstrap/rafs-v6-2.2.boot"); + + tokio_uring::start(async move { + let meta_blob = MetaBlob::new(&source_path).unwrap(); + assert_eq!(meta_blob.blocks(), 5); + let buf = vec![0u8; 4096]; + let (res, buf) = meta_blob.async_read(0, buf).await; + assert_eq!(res.unwrap(), 4096); + assert_eq!(buf[0], 0); + assert_eq!(buf[1023], 0); + assert_eq!(buf[1024], 0xe2); + assert_eq!(buf[1027], 0xe0); + let (res, _buf) = meta_blob.async_read(0x6000, buf).await; + assert_eq!(res.unwrap(), 0); + }); + } +} diff --git a/service/src/block_device.rs b/service/src/block_device.rs index dcb72d2b6da..8912e9be9c0 100644 --- a/service/src/block_device.rs +++ b/service/src/block_device.rs @@ -1,762 +1,762 @@ -// Copyright (C) 2023 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: (Apache-2.0) - -//! Represent a RAFSv6 image as a block device. -//! -//! Metadata of RAFSv6 image has two address encoding schemes: -//! - blob address: data is located by (blob_index, chunk_index) -//! - block address: data is located by (block_addr) -//! -//! Based on the block address scheme, an RAFSv6 image can be converted into/represented as a block -//! device, so it can be directly mounted by Linux EROFS fs driver. - -use std::cmp::{max, min}; -use std::fs::OpenOptions; -use std::io::Result; -use std::path::PathBuf; -use std::rc::Rc; -use std::sync::{Arc, Mutex}; -use std::thread; -use std::thread::JoinHandle; - -use dbs_allocator::{Constraint, IntervalTree, NodeState, Range}; -use nydus_api::BlobCacheEntry; -use nydus_rafs::metadata::layout::v6::{ - EROFS_BLOCK_BITS_12, EROFS_BLOCK_BITS_9, EROFS_BLOCK_SIZE_4096, EROFS_BLOCK_SIZE_512, -}; -use nydus_storage::utils::alloc_buf; -use nydus_utils::digest::{self, RafsDigest}; -use nydus_utils::round_up; -use nydus_utils::verity::VerityGenerator; -use tokio_uring::buf::IoBufMut; - -use crate::blob_cache::{generate_blob_key, BlobCacheMgr, BlobConfig, DataBlob, MetaBlob}; - -const BLOCK_DEVICE_EXPORT_BATCH_SIZE: usize = 0x80000; - -enum BlockRange { - Hole, - MetaBlob(Rc), - DataBlob(Rc), -} - -/// A block device composed up from a RAFSv6 image. -/// -/// RAFSv6 metadata has two encoding schemes: -/// - blob address: data is located by (blob_index, chunk_index) -/// - block address: data is located by (block_addr) -/// -/// Based on the block address scheme, an RAFSv6 image can be converted into/represented as a block -/// device, so it can be directly mounted by Linux EROFS fs driver. -pub struct BlockDevice { - blocks: u32, - blob_id: String, - cache_mgr: Arc, - ranges: IntervalTree, - is_tarfs_mode: bool, -} - -impl BlockDevice { - /// Create a new instance of [BlockDevice]. - pub fn new(blob_entry: BlobCacheEntry) -> Result { - let cache_mgr = Arc::new(BlobCacheMgr::new()); - cache_mgr.add_blob_entry(&blob_entry).map_err(|e| { - eother!(format!( - "block_device: failed to add blob into CacheMgr, {}", - e - )) - })?; - let blob_id = generate_blob_key(&blob_entry.domain_id, &blob_entry.blob_id); - - BlockDevice::new_with_cache_manager(blob_id, cache_mgr) - } - - /// Create a new instance of [BlockDevice] with provided blob cache manager. - pub fn new_with_cache_manager(blob_id: String, cache_mgr: Arc) -> Result { - let mut ranges = IntervalTree::new(); - ranges.insert(Range::new(0, u32::MAX - 1), None); - - let meta_blob_config = match cache_mgr.get_config(&blob_id) { - None => { - return Err(enoent!(format!( - "block_device: can not find blob {} in blob cache manager", - blob_id - ))) - } - Some(BlobConfig::DataBlob(_v)) => { - return Err(einval!(format!( - "block_device: blob {} is not a metadata blob", - blob_id - ))) - } - Some(BlobConfig::MetaBlob(v)) => v, - }; - let is_tarfs_mode = meta_blob_config.is_tarfs_mode(); - let meta_blob = MetaBlob::new(meta_blob_config.path())?; - let meta_blob = Rc::new(meta_blob); - let blocks = if is_tarfs_mode { - meta_blob.blocks() * 8 - } else { - meta_blob.blocks() - }; - let constraint = Constraint::new(blocks).min(0u32).max(blocks - 1); - let range = ranges.allocate(&constraint).ok_or_else(|| { - enoent!(format!( - "block_device: failed to allocate address range for meta blob {}", - meta_blob_config.blob_id() - )) - })?; - ranges.update(&range, BlockRange::MetaBlob(meta_blob)); - - let mut pos = blocks; - let data_blobs = meta_blob_config.get_blobs(); - for blob in data_blobs.iter() { - let blob_info = blob.blob_info(); - let blob_id = blob_info.blob_id(); - let extra_info = meta_blob_config - .get_blob_extra_info(&blob_id) - .ok_or_else(|| { - let msg = format!( - "block_device: can not get extra information for blob {}", - blob_id - ); - enoent!(msg) - })?; - if extra_info.mapped_blkaddr == 0 { - let msg = format!( - "block_device: mapped block address for blob {} is zero", - blob_id - ); - return Err(einval!(msg)); - } - if is_tarfs_mode != blob_info.features().is_tarfs() { - let msg = format!( - "block_device: inconsistent `TARFS` mode from meta and data blob {}", - blob_id - ); - return Err(einval!(msg)); - } - - if pos < extra_info.mapped_blkaddr { - let constraint = Constraint::new(extra_info.mapped_blkaddr - pos) - .min(pos) - .max(extra_info.mapped_blkaddr - 1); - let range = ranges.allocate(&constraint).ok_or_else(|| { - enoent!("block_device: failed to allocate address range for hole between blobs") - })?; - ranges.update(&range, BlockRange::Hole); - } - - let blocks = if is_tarfs_mode { - blob_info.uncompressed_size() >> EROFS_BLOCK_BITS_9 - } else { - blob_info.uncompressed_size() >> EROFS_BLOCK_BITS_12 - }; - if blocks > u32::MAX as u64 - || blocks + extra_info.mapped_blkaddr as u64 > u32::MAX as u64 - { - return Err(einval!(format!( - "block_device: uncompressed size 0x{:x} of blob {} is invalid", - blob_info.uncompressed_size(), - blob_info.blob_id() - ))); - } - let data_blob = DataBlob::new(blob)?; - let constraint = Constraint::new(blocks as u32) - .min(extra_info.mapped_blkaddr) - .max(extra_info.mapped_blkaddr + blocks as u32 - 1); - let range = ranges.allocate(&constraint).ok_or_else(|| { - enoent!(format!( - "block_device: can not allocate address range for blob {}", - blob_info.blob_id() - )) - })?; - ranges.update(&range, BlockRange::DataBlob(Rc::new(data_blob))); - pos = extra_info.mapped_blkaddr + blocks as u32; - } - - Ok(BlockDevice { - blocks: pos, - blob_id, - cache_mgr, - ranges, - is_tarfs_mode, - }) - } - - /// Get blob id of the metadata blob. - pub fn meta_blob_id(&self) -> &str { - &self.blob_id - } - - /// Get the [BlobCacheMgr](../blob_cache/struct.BlobCacheMgr.html) associated with the block device. - pub fn cache_mgr(&self) -> Arc { - self.cache_mgr.clone() - } - - /// Get number of blocks of the block device. - pub fn blocks(&self) -> u32 { - self.blocks - } - - /// Get block size of block device. - pub fn block_size(&self) -> u64 { - if self.is_tarfs_mode { - EROFS_BLOCK_SIZE_512 - } else { - EROFS_BLOCK_SIZE_4096 - } - } - - /// Convert data size to number of blocks. - pub fn size_to_blocks(&self, sz: u64) -> u64 { - if self.is_tarfs_mode { - sz >> EROFS_BLOCK_BITS_9 - } else { - sz >> EROFS_BLOCK_BITS_12 - } - } - - /// Convert number of blocks to data size. - pub fn blocks_to_size(&self, blocks: u32) -> u64 { - if self.is_tarfs_mode { - (blocks as u64) << EROFS_BLOCK_BITS_9 - } else { - (blocks as u64) << EROFS_BLOCK_BITS_12 - } - } - - /// Read block range [start, start + blocks) from the block device. - pub async fn async_read( - &self, - mut start: u32, - mut blocks: u32, - mut buf: T, - ) -> (Result, T) { - let sz = self.blocks_to_size(blocks); - if start.checked_add(blocks).is_none() || sz > buf.bytes_total() as u64 { - return ( - Err(einval!("block_device: invalid parameters to read()")), - buf, - ); - } - - let total_size = sz as usize; - let mut pos = 0; - while blocks > 0 { - let (range, node) = match self.ranges.get_superset(&Range::new_point(start)) { - Some(v) => v, - None => { - return ( - Err(eio!(format!( - "block_device: can not locate block 0x{:x} for meta blob {}", - start, self.blob_id - ))), - buf, - ); - } - }; - - if let NodeState::Valued(r) = node { - let count = min(range.max as u32 - start + 1, blocks); - let sz = self.blocks_to_size(count) as usize; - let mut s = buf.slice(pos..pos + sz); - let (res, s) = match r { - BlockRange::Hole => { - s.fill(0); - (Ok(sz), s) - } - BlockRange::MetaBlob(m) => { - let offset = self.blocks_to_size(start); - m.async_read(offset, s).await - } - BlockRange::DataBlob(d) => { - let offset = start - range.min as u32; - let offset = self.blocks_to_size(offset); - d.async_read(offset, s).await - } - }; - - buf = s.into_inner(); - if res.is_err() { - return (res, buf); - } - start += count; - blocks -= count; - pos += sz; - } else { - return ( - Err(eio!(format!( - "block_device: block range 0x{:x}/0x{:x} of meta blob {} is unhandled", - start, blocks, self.blob_id, - ))), - buf, - ); - } - } - - (Ok(total_size), buf) - } - - /// Export a RAFS filesystem as a raw block disk image. - pub fn export( - blob_entry: BlobCacheEntry, - output: Option, - data_dir: Option, - threads: u32, - verity: bool, - ) -> Result<()> { - let block_device = BlockDevice::new(blob_entry)?; - let block_device = Rc::new(block_device); - let blocks = block_device.blocks(); - let blob_id = block_device.meta_blob_id(); - - let path = match output { - Some(v) => PathBuf::from(v), - None => { - let path = match block_device.cache_mgr.get_config(&blob_id) { - Some(BlobConfig::MetaBlob(meta)) => meta.path().to_path_buf(), - _ => return Err(enoent!("block_device: failed to get meta blob")), - }; - if !path.is_file() { - return Err(eother!(format!( - "block_device: meta blob {} is not a file", - path.display() - ))); - } - let name = path - .file_name() - .ok_or_else(|| { - eother!(format!( - "block_device: failed to get file name from {}", - path.display() - )) - })? - .to_str() - .ok_or_else(|| { - eother!(format!( - "block_device: failed to get file name from {}", - path.display() - )) - })?; - let dir = data_dir - .ok_or_else(|| einval!("block_device: parameter `data_dir` is missing"))?; - let path = PathBuf::from(dir); - path.join(name.to_string() + ".disk") - } - }; - - let output_file = OpenOptions::new() - .create(true) - .read(true) - .write(true) - .open(&path) - .map_err(|e| { - eother!(format!( - "block_device: failed to create output file {}, {}", - path.display(), - e - )) - })?; - let output_file = Rc::new(tokio_uring::fs::File::from_std(output_file)); - - let mut verity_offset = 0; - let generator = if verity { - let file = OpenOptions::new() - .read(true) - .write(true) - .open(&path) - .map_err(|e| { - eother!(format!( - "block_device: failed to create output file {}, {}", - path.display(), - e - )) - })?; - verity_offset = round_up(block_device.blocks_to_size(blocks), 4096); - let mut generator = VerityGenerator::new(file, verity_offset, blocks)?; - generator.initialize()?; - Some(Arc::new(Mutex::new(generator))) - } else { - None - }; - - let batch_size = BLOCK_DEVICE_EXPORT_BATCH_SIZE as u32 / block_device.block_size() as u32; - assert_eq!(batch_size.count_ones(), 1); - let threads = max(threads, 1); - let mut threads = min(threads, 32); - while blocks / threads < batch_size && threads > 1 { - threads /= 2; - } - - if threads == 1 { - let generator = generator.clone(); - let block_device = block_device.clone(); - tokio_uring::start(async move { - Self::do_export(block_device, output_file, 0, blocks, generator).await - })?; - } else { - let mut thread_handlers: Vec>> = - Vec::with_capacity(threads as usize); - let step = (blocks + batch_size - 1) & !(batch_size - 1); - let mut pos = 0; - - for _i in 0..threads { - let count = min(blocks - pos, step); - let mgr = block_device.cache_mgr.clone(); - let id = blob_id.to_string(); - let path = path.to_path_buf(); - let generator = generator.clone(); - - let handler = thread::spawn(move || { - let output_file = OpenOptions::new() - .read(true) - .write(true) - .open(&path) - .map_err(|e| { - eother!(format!( - "block_device: failed to create output file {}, {}", - path.display(), - e - )) - })?; - let file = Rc::new(tokio_uring::fs::File::from_std(output_file)); - let block_device = - BlockDevice::new_with_cache_manager(id, mgr).map_err(|e| { - eother!(format!( - "block_device: failed to create block device object, {}", - e - )) - })?; - let device = Rc::new(block_device); - - tokio_uring::start(async move { - Self::do_export(device, file, pos, count, generator).await - })?; - Ok(()) - }); - pos += count; - thread_handlers.push(handler); - } - assert_eq!(pos, blocks); - assert_eq!(thread_handlers.len(), threads as usize); - - for handler in thread_handlers { - handler - .join() - .map_err(|e| { - eother!(format!( - "block_device: failed to wait for worker thread, {:?}", - e - )) - })? - .map_err(|e| { - eother!(format!("block_device: failed to export disk image, {}", e)) - })?; - } - } - - if let Some(generator) = generator.as_ref() { - let mut guard = generator.lock().unwrap(); - let root_digest = guard.generate_all_digests()?; - let root_digest: String = root_digest - .data - .iter() - .fold(String::new(), |acc, v| acc + &format!("{:02x}", v)); - println!( - "dm-verity options: --no-superblock --format=1 -s \"\" --hash=sha256 --data-block-size={} --hash-block-size=4096 --data-blocks {} --hash-offset {} {}", - block_device.block_size(), blocks, verity_offset, root_digest - ); - } - - Ok(()) - } - - async fn do_export( - block_device: Rc, - output_file: Rc, - start: u32, - mut blocks: u32, - generator: Option>>, - ) -> Result<()> { - let batch_size = BLOCK_DEVICE_EXPORT_BATCH_SIZE as u32 / block_device.block_size() as u32; - let block_size = block_device.block_size() as usize; - let mut pos = start; - let mut buf = alloc_buf(BLOCK_DEVICE_EXPORT_BATCH_SIZE); - - while blocks > 0 { - let count = min(batch_size, blocks); - let (res, buf1) = block_device.async_read(pos, count, buf).await; - let sz = res?; - if sz != count as usize * block_size { - return Err(eio!( - "block_device: failed to read data, got less data than requested" - )); - } - buf = buf1; - - if sz != buf.len() { - buf.resize(sz, 0); - } - let (res, buf2) = output_file - .write_at(buf, block_device.blocks_to_size(pos)) - .await; - let sz1 = res?; - if sz1 != sz { - return Err(eio!( - "block_device: failed to write data to disk image file, written less data than requested" - )); - } - buf = buf2; - - // Generate Merkle tree leaf nodes. - if let Some(generator) = generator.as_ref() { - let mut page_idx = (block_device.blocks_to_size(pos) / block_size as u64) as u32; - let mut offset = 0; - while offset < buf.len() { - let digest = RafsDigest::from_buf( - &buf[offset..offset + block_size], - digest::Algorithm::Sha256, - ); - let mut guard = generator.lock().unwrap(); - guard.set_digest(1, page_idx, &digest.data)?; - offset += block_size; - page_idx += 1; - } - } - - pos += count; - blocks -= count; - } - - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::blob_cache::generate_blob_key; - use nydus_api::BlobCacheEntry; - use nydus_utils::digest::{DigestHasher, RafsDigest}; - use std::fs::{self, File}; - use std::io::{BufReader, Read}; - use std::path::PathBuf; - use vmm_sys_util::tempdir::TempDir; - - #[test] - fn test_block_device() { - let tmp_dir = TempDir::new().unwrap(); - let entry = create_bootstrap_entry(&tmp_dir); - - let mgr = BlobCacheMgr::new(); - mgr.add_blob_entry(&entry).unwrap(); - let blob_id = generate_blob_key(&entry.domain_id, &entry.blob_id); - assert!(mgr.get_config(&blob_id).is_some()); - - // Check existence of data blob referenced by the bootstrap. - let key = generate_blob_key( - &entry.domain_id, - "be7d77eeb719f70884758d1aa800ed0fb09d701aaec469964e9d54325f0d5fef", - ); - assert!(mgr.get_config(&key).is_some()); - - let mgr = Arc::new(mgr); - // assert with wrong blob_id - assert!(BlockDevice::new_with_cache_manager(String::from("blob_id"), mgr.clone()).is_err()); - let device = BlockDevice::new_with_cache_manager(blob_id, mgr).unwrap(); - assert_eq!(device.blocks(), 0x209); - - tokio_uring::start(async move { - let buf = vec![0u8; 8192]; - let (res, buf) = device.async_read(u32::MAX, u32::MAX, buf).await; - assert!(res.is_err()); - assert_eq!(buf.len(), 8192); - let (res, _buf) = device.async_read(0, 1, vec![0u8]).await; - assert!(res.is_err()); - - let (res, buf) = device.async_read(0, 1, buf).await; - assert_eq!(buf.len(), 8192); - assert_eq!(res.unwrap(), 4096); - assert_eq!(buf[0], 0); - assert_eq!(buf[1023], 0); - assert_eq!(buf[1024], 0xe2); - assert_eq!(buf[1027], 0xe0); - - let (res, buf) = device.async_read(4, 2, buf).await; - assert_eq!(res.unwrap(), 8192); - assert_eq!(buf[4096], 0); - assert_eq!(buf[5119], 0); - assert_eq!(buf[5120], 0); - assert_eq!(buf[5123], 0); - assert_eq!(buf[5372], 0); - assert_eq!(buf[8191], 0); - - let (res, buf) = device.async_read(0x200, 2, buf).await; - assert_eq!(buf.len(), 8192); - assert_eq!(res.unwrap(), 8192); - - let (res, buf) = device.async_read(0x208, 2, buf).await; - assert_eq!(buf.len(), 8192); - assert!(res.is_err()); - - let (res, buf) = device.async_read(0x208, 1, buf).await; - assert_eq!(buf.len(), 8192); - assert_eq!(res.unwrap(), 4096); - - let (res, buf) = device.async_read(0x209, 1, buf).await; - assert_eq!(buf.len(), 8192); - assert!(res.is_err()); - }); - } - - fn create_bootstrap_entry(tmp_dir: &TempDir) -> BlobCacheEntry { - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let mut source_path = PathBuf::from(root_dir); - source_path.push("../tests/texture/blobs/be7d77eeb719f70884758d1aa800ed0fb09d701aaec469964e9d54325f0d5fef"); - let mut dest_path = tmp_dir.as_path().to_path_buf(); - dest_path.push("be7d77eeb719f70884758d1aa800ed0fb09d701aaec469964e9d54325f0d5fef"); - fs::copy(&source_path, &dest_path).unwrap(); - - let mut source_path = PathBuf::from(root_dir); - source_path.push("../tests/texture/bootstrap/rafs-v6-2.2.boot"); - let config = r#" - { - "type": "bootstrap", - "id": "rafs-v6", - "domain_id": "domain2", - "config_v2": { - "version": 2, - "id": "factory1", - "backend": { - "type": "localfs", - "localfs": { - "dir": "/tmp/nydus" - } - }, - "cache": { - "type": "filecache", - "filecache": { - "work_dir": "/tmp/nydus" - } - }, - "metadata_path": "RAFS_V5" - } - }"#; - - // config with non-existing path - let entry: BlobCacheEntry = serde_json::from_str(&config).unwrap(); - assert!(BlockDevice::new(entry).is_err()); - - // config with correct path - let content = config - .replace("/tmp/nydus", tmp_dir.as_path().to_str().unwrap()) - .replace("RAFS_V5", &source_path.display().to_string()); - let mut entry: BlobCacheEntry = serde_json::from_str(&content).unwrap(); - assert!(entry.prepare_configuration_info()); - entry - } - - fn create_block_device() -> BlockDevice { - let tmp_dir = TempDir::new().unwrap(); - let entry = create_bootstrap_entry(&tmp_dir); - - let device = BlockDevice::new(entry); - assert!(device.is_ok()); - let device = device.unwrap(); - assert_eq!(device.blocks(), 0x209); - - device - } - - #[test] - fn test_block_size() { - let mut device = create_block_device(); - - assert!(!device.is_tarfs_mode); - assert_eq!(device.block_size(), EROFS_BLOCK_SIZE_4096); - assert_ne!(device.block_size(), EROFS_BLOCK_SIZE_512); - - device.is_tarfs_mode = true; - assert_ne!(device.block_size(), EROFS_BLOCK_SIZE_4096); - assert_eq!(device.block_size(), EROFS_BLOCK_SIZE_512); - } - - #[test] - fn test_size_to_blocks() { - let mut device = create_block_device(); - - assert!(!device.is_tarfs_mode); - assert_eq!(device.size_to_blocks(0), 0); - assert_eq!(device.size_to_blocks(4096), 1); - assert_ne!(device.size_to_blocks(4096), 4096); - assert_ne!(device.size_to_blocks(4096), 8); - - device.is_tarfs_mode = true; - assert_eq!(device.size_to_blocks(0), 0); - assert_eq!(device.size_to_blocks(512), 1); - assert_ne!(device.size_to_blocks(512), 512); - assert_ne!(device.size_to_blocks(4096), 1); - } - - #[test] - fn test_blocks_to_size() { - let mut device = create_block_device(); - - assert!(!device.is_tarfs_mode); - assert_eq!(device.blocks_to_size(0), 0); - assert_eq!(device.blocks_to_size(1), 4096); - assert_ne!(device.blocks_to_size(4096), 4096); - assert_ne!(device.blocks_to_size(8), 4096); - - device.is_tarfs_mode = true; - assert_eq!(device.blocks_to_size(0), 0); - assert_eq!(device.blocks_to_size(1), 512); - assert_ne!(device.blocks_to_size(512), 512); - assert_ne!(device.blocks_to_size(1), 4096); - } - - fn sha256_digest(mut reader: R) -> Result { - let mut hasher = RafsDigest::hasher(digest::Algorithm::Sha256); - let mut buffer = [0; 1024]; - - loop { - let count = reader.read(&mut buffer)?; - if count == 0 { - break; - } - hasher.digest_update(&buffer[..count]); - } - - Ok(hasher.digest_finalize().into()) - } - - fn test_export_arg_thread(thread: u32) -> Result<()> { - let entry_tmp_dir = TempDir::new()?; - let entry = create_bootstrap_entry(&entry_tmp_dir); - - let tmp_dir = TempDir::new().unwrap(); - let data_dir = Some(String::from(tmp_dir.as_path().to_str().unwrap())); - - assert!(BlockDevice::export(entry, None, data_dir, thread, true).is_ok()); - - let mut disk_path = PathBuf::from(tmp_dir.as_path()); - disk_path.push("rafs-v6-2.2.boot.disk"); - let input = File::open(disk_path)?; - let reader = BufReader::new(input); - let sha256 = sha256_digest(reader)?; - assert_eq!( - sha256, - String::from("5684c330c622350c12d633d0773201f862b9955375d806670e1aaf36ef038b31") - ); - - Ok(()) - } - - #[test] - fn test_export() { - assert!(test_export_arg_thread(1).is_ok()); - assert!(test_export_arg_thread(2).is_ok()); - } -} +// Copyright (C) 2023 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: (Apache-2.0) + +//! Represent a RAFSv6 image as a block device. +//! +//! Metadata of RAFSv6 image has two address encoding schemes: +//! - blob address: data is located by (blob_index, chunk_index) +//! - block address: data is located by (block_addr) +//! +//! Based on the block address scheme, an RAFSv6 image can be converted into/represented as a block +//! device, so it can be directly mounted by Linux EROFS fs driver. + +use std::cmp::{max, min}; +use std::fs::OpenOptions; +use std::io::Result; +use std::path::PathBuf; +use std::rc::Rc; +use std::sync::{Arc, Mutex}; +use std::thread; +use std::thread::JoinHandle; + +use dbs_allocator::{Constraint, IntervalTree, NodeState, Range}; +use nydus_api::BlobCacheEntry; +use nydus_rafs::metadata::layout::v6::{ + EROFS_BLOCK_BITS_12, EROFS_BLOCK_BITS_9, EROFS_BLOCK_SIZE_4096, EROFS_BLOCK_SIZE_512, +}; +use nydus_storage::utils::alloc_buf; +use nydus_utils::digest::{self, RafsDigest}; +use nydus_utils::round_up; +use nydus_utils::verity::VerityGenerator; +use tokio_uring::buf::IoBufMut; + +use crate::blob_cache::{generate_blob_key, BlobCacheMgr, BlobConfig, DataBlob, MetaBlob}; + +const BLOCK_DEVICE_EXPORT_BATCH_SIZE: usize = 0x80000; + +enum BlockRange { + Hole, + MetaBlob(Rc), + DataBlob(Rc), +} + +/// A block device composed up from a RAFSv6 image. +/// +/// RAFSv6 metadata has two encoding schemes: +/// - blob address: data is located by (blob_index, chunk_index) +/// - block address: data is located by (block_addr) +/// +/// Based on the block address scheme, an RAFSv6 image can be converted into/represented as a block +/// device, so it can be directly mounted by Linux EROFS fs driver. +pub struct BlockDevice { + blocks: u32, + blob_id: String, + cache_mgr: Arc, + ranges: IntervalTree, + is_tarfs_mode: bool, +} + +impl BlockDevice { + /// Create a new instance of [BlockDevice]. + pub fn new(blob_entry: BlobCacheEntry) -> Result { + let cache_mgr = Arc::new(BlobCacheMgr::new()); + cache_mgr.add_blob_entry(&blob_entry).map_err(|e| { + eother!(format!( + "block_device: failed to add blob into CacheMgr, {}", + e + )) + })?; + let blob_id = generate_blob_key(&blob_entry.domain_id, &blob_entry.blob_id); + + BlockDevice::new_with_cache_manager(blob_id, cache_mgr) + } + + /// Create a new instance of [BlockDevice] with provided blob cache manager. + pub fn new_with_cache_manager(blob_id: String, cache_mgr: Arc) -> Result { + let mut ranges = IntervalTree::new(); + ranges.insert(Range::new(0, u32::MAX - 1), None); + + let meta_blob_config = match cache_mgr.get_config(&blob_id) { + None => { + return Err(enoent!(format!( + "block_device: can not find blob {} in blob cache manager", + blob_id + ))) + } + Some(BlobConfig::DataBlob(_v)) => { + return Err(einval!(format!( + "block_device: blob {} is not a metadata blob", + blob_id + ))) + } + Some(BlobConfig::MetaBlob(v)) => v, + }; + let is_tarfs_mode = meta_blob_config.is_tarfs_mode(); + let meta_blob = MetaBlob::new(meta_blob_config.path())?; + let meta_blob = Rc::new(meta_blob); + let blocks = if is_tarfs_mode { + meta_blob.blocks() * 8 + } else { + meta_blob.blocks() + }; + let constraint = Constraint::new(blocks).min(0u32).max(blocks - 1); + let range = ranges.allocate(&constraint).ok_or_else(|| { + enoent!(format!( + "block_device: failed to allocate address range for meta blob {}", + meta_blob_config.blob_id() + )) + })?; + ranges.update(&range, BlockRange::MetaBlob(meta_blob)); + + let mut pos = blocks; + let data_blobs = meta_blob_config.get_blobs(); + for blob in data_blobs.iter() { + let blob_info = blob.blob_info(); + let blob_id = blob_info.blob_id(); + let extra_info = meta_blob_config + .get_blob_extra_info(&blob_id) + .ok_or_else(|| { + let msg = format!( + "block_device: can not get extra information for blob {}", + blob_id + ); + enoent!(msg) + })?; + if extra_info.mapped_blkaddr == 0 { + let msg = format!( + "block_device: mapped block address for blob {} is zero", + blob_id + ); + return Err(einval!(msg)); + } + if is_tarfs_mode != blob_info.features().is_tarfs() { + let msg = format!( + "block_device: inconsistent `TARFS` mode from meta and data blob {}", + blob_id + ); + return Err(einval!(msg)); + } + + if pos < extra_info.mapped_blkaddr { + let constraint = Constraint::new(extra_info.mapped_blkaddr - pos) + .min(pos) + .max(extra_info.mapped_blkaddr - 1); + let range = ranges.allocate(&constraint).ok_or_else(|| { + enoent!("block_device: failed to allocate address range for hole between blobs") + })?; + ranges.update(&range, BlockRange::Hole); + } + + let blocks = if is_tarfs_mode { + blob_info.uncompressed_size() >> EROFS_BLOCK_BITS_9 + } else { + blob_info.uncompressed_size() >> EROFS_BLOCK_BITS_12 + }; + if blocks > u32::MAX as u64 + || blocks + extra_info.mapped_blkaddr as u64 > u32::MAX as u64 + { + return Err(einval!(format!( + "block_device: uncompressed size 0x{:x} of blob {} is invalid", + blob_info.uncompressed_size(), + blob_info.blob_id() + ))); + } + let data_blob = DataBlob::new(blob)?; + let constraint = Constraint::new(blocks as u32) + .min(extra_info.mapped_blkaddr) + .max(extra_info.mapped_blkaddr + blocks as u32 - 1); + let range = ranges.allocate(&constraint).ok_or_else(|| { + enoent!(format!( + "block_device: can not allocate address range for blob {}", + blob_info.blob_id() + )) + })?; + ranges.update(&range, BlockRange::DataBlob(Rc::new(data_blob))); + pos = extra_info.mapped_blkaddr + blocks as u32; + } + + Ok(BlockDevice { + blocks: pos, + blob_id, + cache_mgr, + ranges, + is_tarfs_mode, + }) + } + + /// Get blob id of the metadata blob. + pub fn meta_blob_id(&self) -> &str { + &self.blob_id + } + + /// Get the [BlobCacheMgr](../blob_cache/struct.BlobCacheMgr.html) associated with the block device. + pub fn cache_mgr(&self) -> Arc { + self.cache_mgr.clone() + } + + /// Get number of blocks of the block device. + pub fn blocks(&self) -> u32 { + self.blocks + } + + /// Get block size of block device. + pub fn block_size(&self) -> u64 { + if self.is_tarfs_mode { + EROFS_BLOCK_SIZE_512 + } else { + EROFS_BLOCK_SIZE_4096 + } + } + + /// Convert data size to number of blocks. + pub fn size_to_blocks(&self, sz: u64) -> u64 { + if self.is_tarfs_mode { + sz >> EROFS_BLOCK_BITS_9 + } else { + sz >> EROFS_BLOCK_BITS_12 + } + } + + /// Convert number of blocks to data size. + pub fn blocks_to_size(&self, blocks: u32) -> u64 { + if self.is_tarfs_mode { + (blocks as u64) << EROFS_BLOCK_BITS_9 + } else { + (blocks as u64) << EROFS_BLOCK_BITS_12 + } + } + + /// Read block range [start, start + blocks) from the block device. + pub async fn async_read( + &self, + mut start: u32, + mut blocks: u32, + mut buf: T, + ) -> (Result, T) { + let sz = self.blocks_to_size(blocks); + if start.checked_add(blocks).is_none() || sz > buf.bytes_total() as u64 { + return ( + Err(einval!("block_device: invalid parameters to read()")), + buf, + ); + } + + let total_size = sz as usize; + let mut pos = 0; + while blocks > 0 { + let (range, node) = match self.ranges.get_superset(&Range::new_point(start)) { + Some(v) => v, + None => { + return ( + Err(eio!(format!( + "block_device: can not locate block 0x{:x} for meta blob {}", + start, self.blob_id + ))), + buf, + ); + } + }; + + if let NodeState::Valued(r) = node { + let count = min(range.max as u32 - start + 1, blocks); + let sz = self.blocks_to_size(count) as usize; + let mut s = buf.slice(pos..pos + sz); + let (res, s) = match r { + BlockRange::Hole => { + s.fill(0); + (Ok(sz), s) + } + BlockRange::MetaBlob(m) => { + let offset = self.blocks_to_size(start); + m.async_read(offset, s).await + } + BlockRange::DataBlob(d) => { + let offset = start - range.min as u32; + let offset = self.blocks_to_size(offset); + d.async_read(offset, s).await + } + }; + + buf = s.into_inner(); + if res.is_err() { + return (res, buf); + } + start += count; + blocks -= count; + pos += sz; + } else { + return ( + Err(eio!(format!( + "block_device: block range 0x{:x}/0x{:x} of meta blob {} is unhandled", + start, blocks, self.blob_id, + ))), + buf, + ); + } + } + + (Ok(total_size), buf) + } + + /// Export a RAFS filesystem as a raw block disk image. + pub fn export( + blob_entry: BlobCacheEntry, + output: Option, + data_dir: Option, + threads: u32, + verity: bool, + ) -> Result<()> { + let block_device = BlockDevice::new(blob_entry)?; + let block_device = Rc::new(block_device); + let blocks = block_device.blocks(); + let blob_id = block_device.meta_blob_id(); + + let path = match output { + Some(v) => PathBuf::from(v), + None => { + let path = match block_device.cache_mgr.get_config(&blob_id) { + Some(BlobConfig::MetaBlob(meta)) => meta.path().to_path_buf(), + _ => return Err(enoent!("block_device: failed to get meta blob")), + }; + if !path.is_file() { + return Err(eother!(format!( + "block_device: meta blob {} is not a file", + path.display() + ))); + } + let name = path + .file_name() + .ok_or_else(|| { + eother!(format!( + "block_device: failed to get file name from {}", + path.display() + )) + })? + .to_str() + .ok_or_else(|| { + eother!(format!( + "block_device: failed to get file name from {}", + path.display() + )) + })?; + let dir = data_dir + .ok_or_else(|| einval!("block_device: parameter `data_dir` is missing"))?; + let path = PathBuf::from(dir); + path.join(name.to_string() + ".disk") + } + }; + + let output_file = OpenOptions::new() + .create(true) + .read(true) + .write(true) + .open(&path) + .map_err(|e| { + eother!(format!( + "block_device: failed to create output file {}, {}", + path.display(), + e + )) + })?; + let output_file = Rc::new(tokio_uring::fs::File::from_std(output_file)); + + let mut verity_offset = 0; + let generator = if verity { + let file = OpenOptions::new() + .read(true) + .write(true) + .open(&path) + .map_err(|e| { + eother!(format!( + "block_device: failed to create output file {}, {}", + path.display(), + e + )) + })?; + verity_offset = round_up(block_device.blocks_to_size(blocks), 4096); + let mut generator = VerityGenerator::new(file, verity_offset, blocks)?; + generator.initialize()?; + Some(Arc::new(Mutex::new(generator))) + } else { + None + }; + + let batch_size = BLOCK_DEVICE_EXPORT_BATCH_SIZE as u32 / block_device.block_size() as u32; + assert_eq!(batch_size.count_ones(), 1); + let threads = max(threads, 1); + let mut threads = min(threads, 32); + while blocks / threads < batch_size && threads > 1 { + threads /= 2; + } + + if threads == 1 { + let generator = generator.clone(); + let block_device = block_device.clone(); + tokio_uring::start(async move { + Self::do_export(block_device, output_file, 0, blocks, generator).await + })?; + } else { + let mut thread_handlers: Vec>> = + Vec::with_capacity(threads as usize); + let step = (blocks + batch_size - 1) & !(batch_size - 1); + let mut pos = 0; + + for _i in 0..threads { + let count = min(blocks - pos, step); + let mgr = block_device.cache_mgr.clone(); + let id = blob_id.to_string(); + let path = path.to_path_buf(); + let generator = generator.clone(); + + let handler = thread::spawn(move || { + let output_file = OpenOptions::new() + .read(true) + .write(true) + .open(&path) + .map_err(|e| { + eother!(format!( + "block_device: failed to create output file {}, {}", + path.display(), + e + )) + })?; + let file = Rc::new(tokio_uring::fs::File::from_std(output_file)); + let block_device = + BlockDevice::new_with_cache_manager(id, mgr).map_err(|e| { + eother!(format!( + "block_device: failed to create block device object, {}", + e + )) + })?; + let device = Rc::new(block_device); + + tokio_uring::start(async move { + Self::do_export(device, file, pos, count, generator).await + })?; + Ok(()) + }); + pos += count; + thread_handlers.push(handler); + } + assert_eq!(pos, blocks); + assert_eq!(thread_handlers.len(), threads as usize); + + for handler in thread_handlers { + handler + .join() + .map_err(|e| { + eother!(format!( + "block_device: failed to wait for worker thread, {:?}", + e + )) + })? + .map_err(|e| { + eother!(format!("block_device: failed to export disk image, {}", e)) + })?; + } + } + + if let Some(generator) = generator.as_ref() { + let mut guard = generator.lock().unwrap(); + let root_digest = guard.generate_all_digests()?; + let root_digest: String = root_digest + .data + .iter() + .fold(String::new(), |acc, v| acc + &format!("{:02x}", v)); + println!( + "dm-verity options: --no-superblock --format=1 -s \"\" --hash=sha256 --data-block-size={} --hash-block-size=4096 --data-blocks {} --hash-offset {} {}", + block_device.block_size(), blocks, verity_offset, root_digest + ); + } + + Ok(()) + } + + async fn do_export( + block_device: Rc, + output_file: Rc, + start: u32, + mut blocks: u32, + generator: Option>>, + ) -> Result<()> { + let batch_size = BLOCK_DEVICE_EXPORT_BATCH_SIZE as u32 / block_device.block_size() as u32; + let block_size = block_device.block_size() as usize; + let mut pos = start; + let mut buf = alloc_buf(BLOCK_DEVICE_EXPORT_BATCH_SIZE); + + while blocks > 0 { + let count = min(batch_size, blocks); + let (res, buf1) = block_device.async_read(pos, count, buf).await; + let sz = res?; + if sz != count as usize * block_size { + return Err(eio!( + "block_device: failed to read data, got less data than requested" + )); + } + buf = buf1; + + if sz != buf.len() { + buf.resize(sz, 0); + } + let (res, buf2) = output_file + .write_at(buf, block_device.blocks_to_size(pos)) + .await; + let sz1 = res?; + if sz1 != sz { + return Err(eio!( + "block_device: failed to write data to disk image file, written less data than requested" + )); + } + buf = buf2; + + // Generate Merkle tree leaf nodes. + if let Some(generator) = generator.as_ref() { + let mut page_idx = (block_device.blocks_to_size(pos) / block_size as u64) as u32; + let mut offset = 0; + while offset < buf.len() { + let digest = RafsDigest::from_buf( + &buf[offset..offset + block_size], + digest::Algorithm::Sha256, + ); + let mut guard = generator.lock().unwrap(); + guard.set_digest(1, page_idx, &digest.data)?; + offset += block_size; + page_idx += 1; + } + } + + pos += count; + blocks -= count; + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::blob_cache::generate_blob_key; + use nydus_api::BlobCacheEntry; + use nydus_utils::digest::{DigestHasher, RafsDigest}; + use std::fs::{self, File}; + use std::io::{BufReader, Read}; + use std::path::PathBuf; + use vmm_sys_util::tempdir::TempDir; + + #[test] + fn test_block_device() { + let tmp_dir = TempDir::new().unwrap(); + let entry = create_bootstrap_entry(&tmp_dir); + + let mgr = BlobCacheMgr::new(); + mgr.add_blob_entry(&entry).unwrap(); + let blob_id = generate_blob_key(&entry.domain_id, &entry.blob_id); + assert!(mgr.get_config(&blob_id).is_some()); + + // Check existence of data blob referenced by the bootstrap. + let key = generate_blob_key( + &entry.domain_id, + "be7d77eeb719f70884758d1aa800ed0fb09d701aaec469964e9d54325f0d5fef", + ); + assert!(mgr.get_config(&key).is_some()); + + let mgr = Arc::new(mgr); + // assert with wrong blob_id + assert!(BlockDevice::new_with_cache_manager(String::from("blob_id"), mgr.clone()).is_err()); + let device = BlockDevice::new_with_cache_manager(blob_id, mgr).unwrap(); + assert_eq!(device.blocks(), 0x209); + + tokio_uring::start(async move { + let buf = vec![0u8; 8192]; + let (res, buf) = device.async_read(u32::MAX, u32::MAX, buf).await; + assert!(res.is_err()); + assert_eq!(buf.len(), 8192); + let (res, _buf) = device.async_read(0, 1, vec![0u8]).await; + assert!(res.is_err()); + + let (res, buf) = device.async_read(0, 1, buf).await; + assert_eq!(buf.len(), 8192); + assert_eq!(res.unwrap(), 4096); + assert_eq!(buf[0], 0); + assert_eq!(buf[1023], 0); + assert_eq!(buf[1024], 0xe2); + assert_eq!(buf[1027], 0xe0); + + let (res, buf) = device.async_read(4, 2, buf).await; + assert_eq!(res.unwrap(), 8192); + assert_eq!(buf[4096], 0); + assert_eq!(buf[5119], 0); + assert_eq!(buf[5120], 0); + assert_eq!(buf[5123], 0); + assert_eq!(buf[5372], 0); + assert_eq!(buf[8191], 0); + + let (res, buf) = device.async_read(0x200, 2, buf).await; + assert_eq!(buf.len(), 8192); + assert_eq!(res.unwrap(), 8192); + + let (res, buf) = device.async_read(0x208, 2, buf).await; + assert_eq!(buf.len(), 8192); + assert!(res.is_err()); + + let (res, buf) = device.async_read(0x208, 1, buf).await; + assert_eq!(buf.len(), 8192); + assert_eq!(res.unwrap(), 4096); + + let (res, buf) = device.async_read(0x209, 1, buf).await; + assert_eq!(buf.len(), 8192); + assert!(res.is_err()); + }); + } + + fn create_bootstrap_entry(tmp_dir: &TempDir) -> BlobCacheEntry { + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let mut source_path = PathBuf::from(root_dir); + source_path.push("../tests/texture/blobs/be7d77eeb719f70884758d1aa800ed0fb09d701aaec469964e9d54325f0d5fef"); + let mut dest_path = tmp_dir.as_path().to_path_buf(); + dest_path.push("be7d77eeb719f70884758d1aa800ed0fb09d701aaec469964e9d54325f0d5fef"); + fs::copy(&source_path, &dest_path).unwrap(); + + let mut source_path = PathBuf::from(root_dir); + source_path.push("../tests/texture/bootstrap/rafs-v6-2.2.boot"); + let config = r#" + { + "type": "bootstrap", + "id": "rafs-v6", + "domain_id": "domain2", + "config_v2": { + "version": 2, + "id": "factory1", + "backend": { + "type": "localfs", + "localfs": { + "dir": "/tmp/nydus" + } + }, + "cache": { + "type": "filecache", + "filecache": { + "work_dir": "/tmp/nydus" + } + }, + "metadata_path": "RAFS_V5" + } + }"#; + + // config with non-existing path + let entry: BlobCacheEntry = serde_json::from_str(&config).unwrap(); + assert!(BlockDevice::new(entry).is_err()); + + // config with correct path + let content = config + .replace("/tmp/nydus", tmp_dir.as_path().to_str().unwrap()) + .replace("RAFS_V5", &source_path.display().to_string()); + let mut entry: BlobCacheEntry = serde_json::from_str(&content).unwrap(); + assert!(entry.prepare_configuration_info()); + entry + } + + fn create_block_device() -> BlockDevice { + let tmp_dir = TempDir::new().unwrap(); + let entry = create_bootstrap_entry(&tmp_dir); + + let device = BlockDevice::new(entry); + assert!(device.is_ok()); + let device = device.unwrap(); + assert_eq!(device.blocks(), 0x209); + + device + } + + #[test] + fn test_block_size() { + let mut device = create_block_device(); + + assert!(!device.is_tarfs_mode); + assert_eq!(device.block_size(), EROFS_BLOCK_SIZE_4096); + assert_ne!(device.block_size(), EROFS_BLOCK_SIZE_512); + + device.is_tarfs_mode = true; + assert_ne!(device.block_size(), EROFS_BLOCK_SIZE_4096); + assert_eq!(device.block_size(), EROFS_BLOCK_SIZE_512); + } + + #[test] + fn test_size_to_blocks() { + let mut device = create_block_device(); + + assert!(!device.is_tarfs_mode); + assert_eq!(device.size_to_blocks(0), 0); + assert_eq!(device.size_to_blocks(4096), 1); + assert_ne!(device.size_to_blocks(4096), 4096); + assert_ne!(device.size_to_blocks(4096), 8); + + device.is_tarfs_mode = true; + assert_eq!(device.size_to_blocks(0), 0); + assert_eq!(device.size_to_blocks(512), 1); + assert_ne!(device.size_to_blocks(512), 512); + assert_ne!(device.size_to_blocks(4096), 1); + } + + #[test] + fn test_blocks_to_size() { + let mut device = create_block_device(); + + assert!(!device.is_tarfs_mode); + assert_eq!(device.blocks_to_size(0), 0); + assert_eq!(device.blocks_to_size(1), 4096); + assert_ne!(device.blocks_to_size(4096), 4096); + assert_ne!(device.blocks_to_size(8), 4096); + + device.is_tarfs_mode = true; + assert_eq!(device.blocks_to_size(0), 0); + assert_eq!(device.blocks_to_size(1), 512); + assert_ne!(device.blocks_to_size(512), 512); + assert_ne!(device.blocks_to_size(1), 4096); + } + + fn sha256_digest(mut reader: R) -> Result { + let mut hasher = RafsDigest::hasher(digest::Algorithm::Sha256); + let mut buffer = [0; 1024]; + + loop { + let count = reader.read(&mut buffer)?; + if count == 0 { + break; + } + hasher.digest_update(&buffer[..count]); + } + + Ok(hasher.digest_finalize().into()) + } + + fn test_export_arg_thread(thread: u32) -> Result<()> { + let entry_tmp_dir = TempDir::new()?; + let entry = create_bootstrap_entry(&entry_tmp_dir); + + let tmp_dir = TempDir::new().unwrap(); + let data_dir = Some(String::from(tmp_dir.as_path().to_str().unwrap())); + + assert!(BlockDevice::export(entry, None, data_dir, thread, true).is_ok()); + + let mut disk_path = PathBuf::from(tmp_dir.as_path()); + disk_path.push("rafs-v6-2.2.boot.disk"); + let input = File::open(disk_path)?; + let reader = BufReader::new(input); + let sha256 = sha256_digest(reader)?; + assert_eq!( + sha256, + String::from("5684c330c622350c12d633d0773201f862b9955375d806670e1aaf36ef038b31") + ); + + Ok(()) + } + + #[test] + fn test_export() { + assert!(test_export_arg_thread(1).is_ok()); + assert!(test_export_arg_thread(2).is_ok()); + } +} diff --git a/service/src/block_nbd.rs b/service/src/block_nbd.rs index 4bb6a746d72..5019dc04520 100644 --- a/service/src/block_nbd.rs +++ b/service/src/block_nbd.rs @@ -1,622 +1,622 @@ -// Copyright (C) 2023 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: (Apache-2.0) - -//! Export a RAFSv6 image as a block device through NBD(Network Block Device) protocol. -//! -//! The [Network Block Device](https://github.com/NetworkBlockDevice/nbd/blob/master/doc/proto.md) -//! is a Linux-originated lightweight block access protocol that allows one to export a block device -//! to a client. RAFSv6 images have an block address based encoding, so an RAFSv6 image can be -//! exposed as a block device. The [NbdService] exposes a RAFSv6 image as a block device based on -//! the Linux Network Block Device driver. - -use std::any::Any; -use std::fs::{self, OpenOptions}; -use std::io::{Error, Result}; -use std::os::fd::{AsRawFd, FromRawFd, RawFd}; -use std::sync::atomic::{AtomicBool, AtomicI32, Ordering}; -use std::sync::{Arc, Mutex}; -use std::thread::JoinHandle; - -use bytes::{Buf, BufMut}; -use mio::Waker; -use nydus_api::{BlobCacheEntry, BuildTimeInfo}; -use nydus_storage::utils::alloc_buf; -use tokio::sync::broadcast::{channel, Sender}; -use tokio_uring::buf::IoBuf; -use tokio_uring::net::UnixStream; - -use crate::blob_cache::{generate_blob_key, BlobCacheMgr}; -use crate::block_device::BlockDevice; -use crate::daemon::{ - DaemonState, DaemonStateMachineContext, DaemonStateMachineInput, DaemonStateMachineSubscriber, - NydusDaemon, -}; -use crate::{Error as NydusError, Result as NydusResult}; - -const NBD_SET_SOCK: u32 = 0; -const NBD_SET_BLOCK_SIZE: u32 = 1; -const NBD_DO_IT: u32 = 3; -const NBD_CLEAR_SOCK: u32 = 4; -const NBD_SET_BLOCKS: u32 = 7; -//const NBD_DISCONNECT: u32 = 8; -const NBD_SET_TIMEOUT: u32 = 9; -const NBD_SET_FLAGS: u32 = 10; -const NBD_FLAG_HAS_FLAGS: u32 = 0x1; -const NBD_FLAG_READ_ONLY: u32 = 0x2; -const NBD_FLAG_CAN_MULTI_CONN: u32 = 0x100; -const NBD_CMD_READ: u32 = 0; -const NBD_CMD_DISC: u32 = 2; -const NBD_REQUEST_HEADER_SIZE: usize = 28; -const NBD_REQUEST_MAGIC: u32 = 0x25609513; -const NBD_REPLY_MAGIC: u32 = 0x67446698; -const NBD_OK: u32 = 0; -const NBD_EIO: u32 = 5; -const NBD_EINVAL: u32 = 22; - -fn nbd_ioctl(fd: RawFd, cmd: u32, arg: u64) -> nix::Result { - let code = nix::request_code_none!(0xab, cmd); - unsafe { nix::convert_ioctl_res!(libc::ioctl(fd, code, arg)) } -} - -/// Network Block Device server to expose RAFSv6 images as block devices. -pub struct NbdService { - active: Arc, - blob_id: String, - cache_mgr: Arc, - nbd_dev: fs::File, - sender: Arc>, -} - -impl NbdService { - /// Create a new instance of [NbdService] to expose a RAFSv6 image as a block device. - /// - /// It opens the NBD device at `nbd_path` and initialize it according to information from - /// the block device composed from a RAFSv6 image. The caller needs to ensure that the NBD - /// device is available. - pub fn new(device: Arc, nbd_path: String) -> Result { - // Initialize the NBD device: set block size, block count and flags. - let nbd_dev = OpenOptions::new() - .read(true) - .write(true) - .open(&nbd_path) - .map_err(|e| { - error!("block_nbd: failed to open NBD device {}", nbd_path); - e - })?; - nbd_ioctl(nbd_dev.as_raw_fd(), NBD_SET_BLOCK_SIZE, device.block_size())?; - nbd_ioctl(nbd_dev.as_raw_fd(), NBD_SET_BLOCKS, device.blocks() as u64)?; - nbd_ioctl(nbd_dev.as_raw_fd(), NBD_SET_TIMEOUT, 60)?; - nbd_ioctl(nbd_dev.as_raw_fd(), NBD_CLEAR_SOCK, 0)?; - nbd_ioctl( - nbd_dev.as_raw_fd(), - NBD_SET_FLAGS, - (NBD_FLAG_HAS_FLAGS | NBD_FLAG_READ_ONLY | NBD_FLAG_CAN_MULTI_CONN) as u64, - )?; - - let (sender, _receiver) = channel(4); - - Ok(NbdService { - active: Arc::new(AtomicBool::new(true)), - blob_id: device.meta_blob_id().to_string(), - cache_mgr: device.cache_mgr().clone(), - nbd_dev, - sender: Arc::new(sender), - }) - } - - /// Create a [NbdWorker] to run the event loop to handle NBD requests from kernel. - pub fn create_worker(&self) -> Result { - // Let the NBD driver go. - let (sock1, sock2) = std::os::unix::net::UnixStream::pair()?; - nbd_ioctl( - self.nbd_dev.as_raw_fd(), - NBD_SET_SOCK, - sock1.as_raw_fd() as u64, - )?; - - Ok(NbdWorker { - active: self.active.clone(), - blob_id: self.blob_id.clone(), - cache_mgr: self.cache_mgr.clone(), - _sock_kern: sock1, - sock_user: sock2, - sender: self.sender.clone(), - }) - } - - /// Run the event loop to handle incoming NBD requests. - /// - /// The caller will get blocked until the NBD device get destroyed or `NbdService::stop()` get - /// called. - pub fn run(&self) -> Result<()> { - let _ = nbd_ioctl(self.nbd_dev.as_raw_fd(), NBD_DO_IT, 0); - self.active.store(false, Ordering::Release); - let _ = self.sender.send(1); - let _ = nbd_ioctl(self.nbd_dev.as_raw_fd(), NBD_CLEAR_SOCK, 0); - - Ok(()) - } - - /// Shutdown the NBD session and send exit notification to workers. - pub fn stop(&self) { - self.active.store(false, Ordering::Release); - let _ = self.sender.send(0); - //let _ = nbd_ioctl(self.nbd_dev.as_raw_fd(), NBD_DISCONNECT, 0); - let _ = nbd_ioctl(self.nbd_dev.as_raw_fd(), NBD_CLEAR_SOCK, 0); - } -} - -/// A worker to handle NBD requests in asynchronous mode. -pub struct NbdWorker { - active: Arc, - blob_id: String, - cache_mgr: Arc, - _sock_kern: std::os::unix::net::UnixStream, - sock_user: std::os::unix::net::UnixStream, - sender: Arc>, -} - -impl NbdWorker { - /// Run the event loop to handle NBD requests from kernel in asynchronous mode. - pub async fn run(self) { - let device = - match BlockDevice::new_with_cache_manager(self.blob_id.clone(), self.cache_mgr.clone()) - { - Ok(v) => v, - Err(e) => { - error!( - "block_nbd: failed to create block device for {}, {}", - self.blob_id, e - ); - return; - } - }; - - // Safe because the RawFd is valid during the lifetime of run(). - let mut sock = unsafe { UnixStream::from_raw_fd(self.sock_user.as_raw_fd()) }; - let mut receiver = self.sender.subscribe(); - let mut buf = vec![0u8; NBD_REQUEST_HEADER_SIZE]; - let mut pos = 0; - - while self.active.load(Ordering::Acquire) { - tokio::select! { - (res, s) = sock.read(buf.slice(pos..)) => { - match res { - Err(e) => { - warn!("block_nbd: failed to get request from kernel for {}, {}", self.blob_id, e); - break; - } - Ok(sz) => { - buf = s.into_inner(); - pos += sz; - if pos == NBD_REQUEST_HEADER_SIZE { - match self.handle_request(&buf, &mut sock, &device).await { - Ok(true) => {} - Ok(false) => break, - Err(e) => { - warn!("block_nbd: failed to handle request for {}, {}", self.blob_id, e); - break; - } - } - pos = 0; - } - } - } - } - _ = receiver.recv() => { - break; - } - } - } - } - - async fn handle_request( - &self, - mut request: &[u8], - sock: &mut UnixStream, - device: &BlockDevice, - ) -> Result { - let magic = request.get_u32(); - let ty = request.get_u32(); - let handle = request.get_u64(); - let pos = request.get_u64(); - let len = request.get_u32(); - - let block_size = device.block_size(); - let mut code = NBD_OK; - let mut data_buf = alloc_buf(len as usize); - if magic != NBD_REQUEST_MAGIC || pos % block_size != 0 || len as u64 % block_size != 0 { - warn!( - "block_nbd: invalid request magic 0x{:x}, type {}, pos 0x{:x}, len 0x{:x}", - magic, ty, pos, len - ); - code = NBD_EINVAL; - } else if ty == NBD_CMD_READ { - let start = (pos / block_size) as u32; - let count = len / block_size as u32; - let (res, buf) = device.async_read(start, count, data_buf).await; - data_buf = buf; - match res { - Ok(sz) => { - if sz != len as usize { - warn!("block_nbd: got 0x{:x} bytes, expect 0x{:x}", sz, len); - code = NBD_EIO; - } - } - Err(e) => { - warn!("block_nbd: failed to read data from block device, {}", e); - code = NBD_EIO; - } - } - } else if ty == NBD_CMD_DISC { - return Ok(false); - } - - let mut reply = Vec::with_capacity(16); - reply.put_u32(NBD_REPLY_MAGIC); - reply.put_u32(code); - reply.put_u64(handle); - assert_eq!(reply.len(), 16); - assert_eq!(data_buf.len(), len as usize); - sock.write_all(reply).await.0?; - if code == NBD_OK { - sock.write_all(data_buf).await.0?; - } - - Ok(true) - } -} - -/// A [NydusDaemon] implementation to expose RAFS v6 images as block devices through NBD. -pub struct NbdDaemon { - cache_mgr: Arc, - service: Arc, - - bti: BuildTimeInfo, - id: Option, - supervisor: Option, - - nbd_threads: u32, - nbd_control_thread: Mutex>>, - nbd_service_threads: Mutex>>>, - request_sender: Arc>>, - result_receiver: Mutex>>, - state: AtomicI32, - state_machine_thread: Mutex>>>, - waker: Arc, -} - -impl NbdDaemon { - fn new( - nbd_path: String, - threads: u32, - blob_entry: BlobCacheEntry, - trigger: std::sync::mpsc::Sender, - receiver: std::sync::mpsc::Receiver>, - waker: Arc, - bti: BuildTimeInfo, - id: Option, - supervisor: Option, - ) -> Result { - let blob_id = generate_blob_key(&blob_entry.domain_id, &blob_entry.blob_id); - let cache_mgr = Arc::new(BlobCacheMgr::new()); - cache_mgr.add_blob_entry(&blob_entry)?; - let block_device = BlockDevice::new_with_cache_manager(blob_id.clone(), cache_mgr.clone())?; - let nbd_service = NbdService::new(Arc::new(block_device), nbd_path)?; - - Ok(NbdDaemon { - cache_mgr, - service: Arc::new(nbd_service), - - bti, - id, - supervisor, - - nbd_threads: threads, - nbd_control_thread: Mutex::new(None), - nbd_service_threads: Mutex::new(Vec::new()), - state: AtomicI32::new(DaemonState::INIT as i32), - request_sender: Arc::new(Mutex::new(trigger)), - result_receiver: Mutex::new(receiver), - state_machine_thread: Mutex::new(None), - waker, - }) - } -} - -impl DaemonStateMachineSubscriber for NbdDaemon { - fn on_event(&self, event: DaemonStateMachineInput) -> NydusResult<()> { - self.request_sender - .lock() - .expect("block_nbd: failed to lock request sender!") - .send(event) - .map_err(NydusError::ChannelSend)?; - - self.result_receiver - .lock() - .expect("block_nbd: failed to lock result receiver!") - .recv() - .map_err(NydusError::ChannelReceive)? - } -} - -impl NydusDaemon for NbdDaemon { - fn as_any(&self) -> &dyn Any { - self - } - - fn id(&self) -> Option { - self.id.clone() - } - - fn version(&self) -> BuildTimeInfo { - self.bti.clone() - } - - fn get_state(&self) -> DaemonState { - self.state.load(Ordering::Relaxed).into() - } - - fn set_state(&self, state: DaemonState) { - self.state.store(state as i32, Ordering::Relaxed); - } - - fn start(&self) -> NydusResult<()> { - info!("start NBD service with {} worker threads", self.nbd_threads); - for _ in 0..self.nbd_threads { - let waker = self.waker.clone(); - let worker = self - .service - .create_worker() - .map_err(|e| NydusError::StartService(format!("{}", e)))?; - let thread = std::thread::Builder::new() - .name("nbd_worker".to_string()) - .spawn(move || { - tokio_uring::start(async move { - worker.run().await; - // Notify the daemon controller that one working thread has exited. - if let Err(err) = waker.wake() { - error!("block_nbd: fail to exit daemon, error: {:?}", err); - } - }); - Ok(()) - }) - .map_err(NydusError::ThreadSpawn)?; - self.nbd_service_threads.lock().unwrap().push(thread); - } - - let nbd = self.service.clone(); - let thread = std::thread::spawn(move || { - if let Err(e) = nbd.run() { - error!("block_nbd: failed to run NBD control loop, {e}"); - } - }); - *self.nbd_control_thread.lock().unwrap() = Some(thread); - - Ok(()) - } - - fn umount(&self) -> NydusResult<()> { - Ok(()) - } - - fn stop(&self) { - self.service.stop(); - } - - fn wait(&self) -> NydusResult<()> { - self.wait_state_machine()?; - self.wait_service() - } - - fn wait_service(&self) -> NydusResult<()> { - loop { - let handle = self.nbd_service_threads.lock().unwrap().pop(); - if let Some(handle) = handle { - handle - .join() - .map_err(|e| { - let e = *e - .downcast::() - .unwrap_or_else(|e| Box::new(eother!(e))); - NydusError::WaitDaemon(e) - })? - .map_err(NydusError::WaitDaemon)?; - } else { - // No more handles to wait - break; - } - } - - Ok(()) - } - - fn wait_state_machine(&self) -> NydusResult<()> { - let mut guard = self.state_machine_thread.lock().unwrap(); - if let Some(handler) = guard.take() { - let result = handler.join().map_err(|e| { - let e = *e - .downcast::() - .unwrap_or_else(|e| Box::new(eother!(e))); - NydusError::WaitDaemon(e) - })?; - result.map_err(NydusError::WaitDaemon) - } else { - Ok(()) - } - } - - fn supervisor(&self) -> Option { - self.supervisor.clone() - } - - fn save(&self) -> NydusResult<()> { - unimplemented!() - } - - fn restore(&self) -> NydusResult<()> { - unimplemented!() - } - - fn get_blob_cache_mgr(&self) -> Option> { - Some(self.cache_mgr.clone()) - } -} - -/// Create and start a [NbdDaemon] instance to expose a RAFS v6 image as a block device through NBD. -#[allow(clippy::too_many_arguments)] -pub fn create_nbd_daemon( - device: String, - threads: u32, - blob_entry: BlobCacheEntry, - bti: BuildTimeInfo, - id: Option, - supervisor: Option, - waker: Arc, -) -> Result> { - let (trigger, events_rx) = std::sync::mpsc::channel::(); - let (result_sender, result_receiver) = std::sync::mpsc::channel::>(); - let daemon = NbdDaemon::new( - device, - threads, - blob_entry, - trigger, - result_receiver, - waker, - bti, - id, - supervisor, - )?; - let daemon = Arc::new(daemon); - let machine = DaemonStateMachineContext::new(daemon.clone(), events_rx, result_sender); - let machine_thread = machine.kick_state_machine()?; - *daemon.state_machine_thread.lock().unwrap() = Some(machine_thread); - daemon - .on_event(DaemonStateMachineInput::Mount) - .map_err(|e| eother!(e))?; - daemon - .on_event(DaemonStateMachineInput::Start) - .map_err(|e| eother!(e))?; - - /* - // TODO: support crash recover and hot-upgrade. - // Without api socket, nydusd can't do neither live-upgrade nor failover, so the helper - // finding a victim is not necessary. - if (api_sock.as_ref().is_some() && !upgrade && !is_crashed(&mnt, api_sock.as_ref().unwrap())?) - || api_sock.is_none() - { - if let Some(cmd) = mount_cmd { - daemon.service.mount(cmd)?; - } - daemon - .service - .session - .lock() - .unwrap() - .mount() - .map_err(|e| eother!(e))?; - daemon - .on_event(DaemonStateMachineInput::Mount) - .map_err(|e| eother!(e))?; - daemon - .on_event(DaemonStateMachineInput::Start) - .map_err(|e| eother!(e))?; - daemon - .service - .conn - .store(calc_fuse_conn(mnt)?, Ordering::Relaxed); - } - */ - - Ok(daemon) -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::blob_cache::{generate_blob_key, BlobCacheMgr}; - use nydus_api::BlobCacheEntry; - use std::path::PathBuf; - use std::time::Duration; - use vmm_sys_util::tempdir::TempDir; - - fn create_block_device(tmpdir: PathBuf) -> Result> { - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let mut source_path = PathBuf::from(root_dir); - source_path.push("../tests/texture/blobs/be7d77eeb719f70884758d1aa800ed0fb09d701aaec469964e9d54325f0d5fef"); - let mut dest_path = tmpdir.clone(); - dest_path.push("be7d77eeb719f70884758d1aa800ed0fb09d701aaec469964e9d54325f0d5fef"); - fs::copy(&source_path, &dest_path).unwrap(); - - let mut source_path = PathBuf::from(root_dir); - source_path.push("../tests/texture/bootstrap/rafs-v6-2.2.boot"); - let config = r#" - { - "type": "bootstrap", - "id": "rafs-v6", - "domain_id": "domain2", - "config_v2": { - "version": 2, - "id": "factory1", - "backend": { - "type": "localfs", - "localfs": { - "dir": "/tmp/nydus" - } - }, - "cache": { - "type": "filecache", - "filecache": { - "work_dir": "/tmp/nydus" - } - }, - "metadata_path": "RAFS_V5" - } - }"#; - let content = config - .replace("/tmp/nydus", tmpdir.as_path().to_str().unwrap()) - .replace("RAFS_V5", &source_path.display().to_string()); - let mut entry: BlobCacheEntry = serde_json::from_str(&content).unwrap(); - assert!(entry.prepare_configuration_info()); - - let mgr = BlobCacheMgr::new(); - mgr.add_blob_entry(&entry).unwrap(); - let blob_id = generate_blob_key(&entry.domain_id, &entry.blob_id); - assert!(mgr.get_config(&blob_id).is_some()); - - // Check existence of data blob referenced by the bootstrap. - let key = generate_blob_key( - &entry.domain_id, - "be7d77eeb719f70884758d1aa800ed0fb09d701aaec469964e9d54325f0d5fef", - ); - assert!(mgr.get_config(&key).is_some()); - - let mgr = Arc::new(mgr); - let device = BlockDevice::new_with_cache_manager(blob_id.clone(), mgr).unwrap(); - - Ok(Arc::new(device)) - } - - #[ignore] - #[test] - fn test_nbd_device() { - tokio_uring::start(async { - let tmpdir = TempDir::new().unwrap(); - let device = create_block_device(tmpdir.as_path().to_path_buf()).unwrap(); - let nbd = NbdService::new(device, "/dev/nbd15".to_string()).unwrap(); - let nbd = Arc::new(nbd); - let nbd2 = nbd.clone(); - let worker1 = nbd.create_worker().unwrap(); - let worker2 = nbd.create_worker().unwrap(); - - tokio_uring::spawn(async move { worker1.run().await }); - tokio_uring::spawn(async move { worker2.run().await }); - std::thread::spawn(move || { - nbd2.run().unwrap(); - }); - tokio::time::sleep(Duration::from_micros(100000)).await; - nbd.stop(); - }) - } -} +// Copyright (C) 2023 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: (Apache-2.0) + +//! Export a RAFSv6 image as a block device through NBD(Network Block Device) protocol. +//! +//! The [Network Block Device](https://github.com/NetworkBlockDevice/nbd/blob/master/doc/proto.md) +//! is a Linux-originated lightweight block access protocol that allows one to export a block device +//! to a client. RAFSv6 images have an block address based encoding, so an RAFSv6 image can be +//! exposed as a block device. The [NbdService] exposes a RAFSv6 image as a block device based on +//! the Linux Network Block Device driver. + +use std::any::Any; +use std::fs::{self, OpenOptions}; +use std::io::{Error, Result}; +use std::os::fd::{AsRawFd, FromRawFd, RawFd}; +use std::sync::atomic::{AtomicBool, AtomicI32, Ordering}; +use std::sync::{Arc, Mutex}; +use std::thread::JoinHandle; + +use bytes::{Buf, BufMut}; +use mio::Waker; +use nydus_api::{BlobCacheEntry, BuildTimeInfo}; +use nydus_storage::utils::alloc_buf; +use tokio::sync::broadcast::{channel, Sender}; +use tokio_uring::buf::IoBuf; +use tokio_uring::net::UnixStream; + +use crate::blob_cache::{generate_blob_key, BlobCacheMgr}; +use crate::block_device::BlockDevice; +use crate::daemon::{ + DaemonState, DaemonStateMachineContext, DaemonStateMachineInput, DaemonStateMachineSubscriber, + NydusDaemon, +}; +use crate::{Error as NydusError, Result as NydusResult}; + +const NBD_SET_SOCK: u32 = 0; +const NBD_SET_BLOCK_SIZE: u32 = 1; +const NBD_DO_IT: u32 = 3; +const NBD_CLEAR_SOCK: u32 = 4; +const NBD_SET_BLOCKS: u32 = 7; +//const NBD_DISCONNECT: u32 = 8; +const NBD_SET_TIMEOUT: u32 = 9; +const NBD_SET_FLAGS: u32 = 10; +const NBD_FLAG_HAS_FLAGS: u32 = 0x1; +const NBD_FLAG_READ_ONLY: u32 = 0x2; +const NBD_FLAG_CAN_MULTI_CONN: u32 = 0x100; +const NBD_CMD_READ: u32 = 0; +const NBD_CMD_DISC: u32 = 2; +const NBD_REQUEST_HEADER_SIZE: usize = 28; +const NBD_REQUEST_MAGIC: u32 = 0x25609513; +const NBD_REPLY_MAGIC: u32 = 0x67446698; +const NBD_OK: u32 = 0; +const NBD_EIO: u32 = 5; +const NBD_EINVAL: u32 = 22; + +fn nbd_ioctl(fd: RawFd, cmd: u32, arg: u64) -> nix::Result { + let code = nix::request_code_none!(0xab, cmd); + unsafe { nix::convert_ioctl_res!(libc::ioctl(fd, code, arg)) } +} + +/// Network Block Device server to expose RAFSv6 images as block devices. +pub struct NbdService { + active: Arc, + blob_id: String, + cache_mgr: Arc, + nbd_dev: fs::File, + sender: Arc>, +} + +impl NbdService { + /// Create a new instance of [NbdService] to expose a RAFSv6 image as a block device. + /// + /// It opens the NBD device at `nbd_path` and initialize it according to information from + /// the block device composed from a RAFSv6 image. The caller needs to ensure that the NBD + /// device is available. + pub fn new(device: Arc, nbd_path: String) -> Result { + // Initialize the NBD device: set block size, block count and flags. + let nbd_dev = OpenOptions::new() + .read(true) + .write(true) + .open(&nbd_path) + .map_err(|e| { + error!("block_nbd: failed to open NBD device {}", nbd_path); + e + })?; + nbd_ioctl(nbd_dev.as_raw_fd(), NBD_SET_BLOCK_SIZE, device.block_size())?; + nbd_ioctl(nbd_dev.as_raw_fd(), NBD_SET_BLOCKS, device.blocks() as u64)?; + nbd_ioctl(nbd_dev.as_raw_fd(), NBD_SET_TIMEOUT, 60)?; + nbd_ioctl(nbd_dev.as_raw_fd(), NBD_CLEAR_SOCK, 0)?; + nbd_ioctl( + nbd_dev.as_raw_fd(), + NBD_SET_FLAGS, + (NBD_FLAG_HAS_FLAGS | NBD_FLAG_READ_ONLY | NBD_FLAG_CAN_MULTI_CONN) as u64, + )?; + + let (sender, _receiver) = channel(4); + + Ok(NbdService { + active: Arc::new(AtomicBool::new(true)), + blob_id: device.meta_blob_id().to_string(), + cache_mgr: device.cache_mgr().clone(), + nbd_dev, + sender: Arc::new(sender), + }) + } + + /// Create a [NbdWorker] to run the event loop to handle NBD requests from kernel. + pub fn create_worker(&self) -> Result { + // Let the NBD driver go. + let (sock1, sock2) = std::os::unix::net::UnixStream::pair()?; + nbd_ioctl( + self.nbd_dev.as_raw_fd(), + NBD_SET_SOCK, + sock1.as_raw_fd() as u64, + )?; + + Ok(NbdWorker { + active: self.active.clone(), + blob_id: self.blob_id.clone(), + cache_mgr: self.cache_mgr.clone(), + _sock_kern: sock1, + sock_user: sock2, + sender: self.sender.clone(), + }) + } + + /// Run the event loop to handle incoming NBD requests. + /// + /// The caller will get blocked until the NBD device get destroyed or `NbdService::stop()` get + /// called. + pub fn run(&self) -> Result<()> { + let _ = nbd_ioctl(self.nbd_dev.as_raw_fd(), NBD_DO_IT, 0); + self.active.store(false, Ordering::Release); + let _ = self.sender.send(1); + let _ = nbd_ioctl(self.nbd_dev.as_raw_fd(), NBD_CLEAR_SOCK, 0); + + Ok(()) + } + + /// Shutdown the NBD session and send exit notification to workers. + pub fn stop(&self) { + self.active.store(false, Ordering::Release); + let _ = self.sender.send(0); + //let _ = nbd_ioctl(self.nbd_dev.as_raw_fd(), NBD_DISCONNECT, 0); + let _ = nbd_ioctl(self.nbd_dev.as_raw_fd(), NBD_CLEAR_SOCK, 0); + } +} + +/// A worker to handle NBD requests in asynchronous mode. +pub struct NbdWorker { + active: Arc, + blob_id: String, + cache_mgr: Arc, + _sock_kern: std::os::unix::net::UnixStream, + sock_user: std::os::unix::net::UnixStream, + sender: Arc>, +} + +impl NbdWorker { + /// Run the event loop to handle NBD requests from kernel in asynchronous mode. + pub async fn run(self) { + let device = + match BlockDevice::new_with_cache_manager(self.blob_id.clone(), self.cache_mgr.clone()) + { + Ok(v) => v, + Err(e) => { + error!( + "block_nbd: failed to create block device for {}, {}", + self.blob_id, e + ); + return; + } + }; + + // Safe because the RawFd is valid during the lifetime of run(). + let mut sock = unsafe { UnixStream::from_raw_fd(self.sock_user.as_raw_fd()) }; + let mut receiver = self.sender.subscribe(); + let mut buf = vec![0u8; NBD_REQUEST_HEADER_SIZE]; + let mut pos = 0; + + while self.active.load(Ordering::Acquire) { + tokio::select! { + (res, s) = sock.read(buf.slice(pos..)) => { + match res { + Err(e) => { + warn!("block_nbd: failed to get request from kernel for {}, {}", self.blob_id, e); + break; + } + Ok(sz) => { + buf = s.into_inner(); + pos += sz; + if pos == NBD_REQUEST_HEADER_SIZE { + match self.handle_request(&buf, &mut sock, &device).await { + Ok(true) => {} + Ok(false) => break, + Err(e) => { + warn!("block_nbd: failed to handle request for {}, {}", self.blob_id, e); + break; + } + } + pos = 0; + } + } + } + } + _ = receiver.recv() => { + break; + } + } + } + } + + async fn handle_request( + &self, + mut request: &[u8], + sock: &mut UnixStream, + device: &BlockDevice, + ) -> Result { + let magic = request.get_u32(); + let ty = request.get_u32(); + let handle = request.get_u64(); + let pos = request.get_u64(); + let len = request.get_u32(); + + let block_size = device.block_size(); + let mut code = NBD_OK; + let mut data_buf = alloc_buf(len as usize); + if magic != NBD_REQUEST_MAGIC || pos % block_size != 0 || len as u64 % block_size != 0 { + warn!( + "block_nbd: invalid request magic 0x{:x}, type {}, pos 0x{:x}, len 0x{:x}", + magic, ty, pos, len + ); + code = NBD_EINVAL; + } else if ty == NBD_CMD_READ { + let start = (pos / block_size) as u32; + let count = len / block_size as u32; + let (res, buf) = device.async_read(start, count, data_buf).await; + data_buf = buf; + match res { + Ok(sz) => { + if sz != len as usize { + warn!("block_nbd: got 0x{:x} bytes, expect 0x{:x}", sz, len); + code = NBD_EIO; + } + } + Err(e) => { + warn!("block_nbd: failed to read data from block device, {}", e); + code = NBD_EIO; + } + } + } else if ty == NBD_CMD_DISC { + return Ok(false); + } + + let mut reply = Vec::with_capacity(16); + reply.put_u32(NBD_REPLY_MAGIC); + reply.put_u32(code); + reply.put_u64(handle); + assert_eq!(reply.len(), 16); + assert_eq!(data_buf.len(), len as usize); + sock.write_all(reply).await.0?; + if code == NBD_OK { + sock.write_all(data_buf).await.0?; + } + + Ok(true) + } +} + +/// A [NydusDaemon] implementation to expose RAFS v6 images as block devices through NBD. +pub struct NbdDaemon { + cache_mgr: Arc, + service: Arc, + + bti: BuildTimeInfo, + id: Option, + supervisor: Option, + + nbd_threads: u32, + nbd_control_thread: Mutex>>, + nbd_service_threads: Mutex>>>, + request_sender: Arc>>, + result_receiver: Mutex>>, + state: AtomicI32, + state_machine_thread: Mutex>>>, + waker: Arc, +} + +impl NbdDaemon { + fn new( + nbd_path: String, + threads: u32, + blob_entry: BlobCacheEntry, + trigger: std::sync::mpsc::Sender, + receiver: std::sync::mpsc::Receiver>, + waker: Arc, + bti: BuildTimeInfo, + id: Option, + supervisor: Option, + ) -> Result { + let blob_id = generate_blob_key(&blob_entry.domain_id, &blob_entry.blob_id); + let cache_mgr = Arc::new(BlobCacheMgr::new()); + cache_mgr.add_blob_entry(&blob_entry)?; + let block_device = BlockDevice::new_with_cache_manager(blob_id.clone(), cache_mgr.clone())?; + let nbd_service = NbdService::new(Arc::new(block_device), nbd_path)?; + + Ok(NbdDaemon { + cache_mgr, + service: Arc::new(nbd_service), + + bti, + id, + supervisor, + + nbd_threads: threads, + nbd_control_thread: Mutex::new(None), + nbd_service_threads: Mutex::new(Vec::new()), + state: AtomicI32::new(DaemonState::INIT as i32), + request_sender: Arc::new(Mutex::new(trigger)), + result_receiver: Mutex::new(receiver), + state_machine_thread: Mutex::new(None), + waker, + }) + } +} + +impl DaemonStateMachineSubscriber for NbdDaemon { + fn on_event(&self, event: DaemonStateMachineInput) -> NydusResult<()> { + self.request_sender + .lock() + .expect("block_nbd: failed to lock request sender!") + .send(event) + .map_err(NydusError::ChannelSend)?; + + self.result_receiver + .lock() + .expect("block_nbd: failed to lock result receiver!") + .recv() + .map_err(NydusError::ChannelReceive)? + } +} + +impl NydusDaemon for NbdDaemon { + fn as_any(&self) -> &dyn Any { + self + } + + fn id(&self) -> Option { + self.id.clone() + } + + fn version(&self) -> BuildTimeInfo { + self.bti.clone() + } + + fn get_state(&self) -> DaemonState { + self.state.load(Ordering::Relaxed).into() + } + + fn set_state(&self, state: DaemonState) { + self.state.store(state as i32, Ordering::Relaxed); + } + + fn start(&self) -> NydusResult<()> { + info!("start NBD service with {} worker threads", self.nbd_threads); + for _ in 0..self.nbd_threads { + let waker = self.waker.clone(); + let worker = self + .service + .create_worker() + .map_err(|e| NydusError::StartService(format!("{}", e)))?; + let thread = std::thread::Builder::new() + .name("nbd_worker".to_string()) + .spawn(move || { + tokio_uring::start(async move { + worker.run().await; + // Notify the daemon controller that one working thread has exited. + if let Err(err) = waker.wake() { + error!("block_nbd: fail to exit daemon, error: {:?}", err); + } + }); + Ok(()) + }) + .map_err(NydusError::ThreadSpawn)?; + self.nbd_service_threads.lock().unwrap().push(thread); + } + + let nbd = self.service.clone(); + let thread = std::thread::spawn(move || { + if let Err(e) = nbd.run() { + error!("block_nbd: failed to run NBD control loop, {e}"); + } + }); + *self.nbd_control_thread.lock().unwrap() = Some(thread); + + Ok(()) + } + + fn umount(&self) -> NydusResult<()> { + Ok(()) + } + + fn stop(&self) { + self.service.stop(); + } + + fn wait(&self) -> NydusResult<()> { + self.wait_state_machine()?; + self.wait_service() + } + + fn wait_service(&self) -> NydusResult<()> { + loop { + let handle = self.nbd_service_threads.lock().unwrap().pop(); + if let Some(handle) = handle { + handle + .join() + .map_err(|e| { + let e = *e + .downcast::() + .unwrap_or_else(|e| Box::new(eother!(e))); + NydusError::WaitDaemon(e) + })? + .map_err(NydusError::WaitDaemon)?; + } else { + // No more handles to wait + break; + } + } + + Ok(()) + } + + fn wait_state_machine(&self) -> NydusResult<()> { + let mut guard = self.state_machine_thread.lock().unwrap(); + if let Some(handler) = guard.take() { + let result = handler.join().map_err(|e| { + let e = *e + .downcast::() + .unwrap_or_else(|e| Box::new(eother!(e))); + NydusError::WaitDaemon(e) + })?; + result.map_err(NydusError::WaitDaemon) + } else { + Ok(()) + } + } + + fn supervisor(&self) -> Option { + self.supervisor.clone() + } + + fn save(&self) -> NydusResult<()> { + unimplemented!() + } + + fn restore(&self) -> NydusResult<()> { + unimplemented!() + } + + fn get_blob_cache_mgr(&self) -> Option> { + Some(self.cache_mgr.clone()) + } +} + +/// Create and start a [NbdDaemon] instance to expose a RAFS v6 image as a block device through NBD. +#[allow(clippy::too_many_arguments)] +pub fn create_nbd_daemon( + device: String, + threads: u32, + blob_entry: BlobCacheEntry, + bti: BuildTimeInfo, + id: Option, + supervisor: Option, + waker: Arc, +) -> Result> { + let (trigger, events_rx) = std::sync::mpsc::channel::(); + let (result_sender, result_receiver) = std::sync::mpsc::channel::>(); + let daemon = NbdDaemon::new( + device, + threads, + blob_entry, + trigger, + result_receiver, + waker, + bti, + id, + supervisor, + )?; + let daemon = Arc::new(daemon); + let machine = DaemonStateMachineContext::new(daemon.clone(), events_rx, result_sender); + let machine_thread = machine.kick_state_machine()?; + *daemon.state_machine_thread.lock().unwrap() = Some(machine_thread); + daemon + .on_event(DaemonStateMachineInput::Mount) + .map_err(|e| eother!(e))?; + daemon + .on_event(DaemonStateMachineInput::Start) + .map_err(|e| eother!(e))?; + + /* + // TODO: support crash recover and hot-upgrade. + // Without api socket, nydusd can't do neither live-upgrade nor failover, so the helper + // finding a victim is not necessary. + if (api_sock.as_ref().is_some() && !upgrade && !is_crashed(&mnt, api_sock.as_ref().unwrap())?) + || api_sock.is_none() + { + if let Some(cmd) = mount_cmd { + daemon.service.mount(cmd)?; + } + daemon + .service + .session + .lock() + .unwrap() + .mount() + .map_err(|e| eother!(e))?; + daemon + .on_event(DaemonStateMachineInput::Mount) + .map_err(|e| eother!(e))?; + daemon + .on_event(DaemonStateMachineInput::Start) + .map_err(|e| eother!(e))?; + daemon + .service + .conn + .store(calc_fuse_conn(mnt)?, Ordering::Relaxed); + } + */ + + Ok(daemon) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::blob_cache::{generate_blob_key, BlobCacheMgr}; + use nydus_api::BlobCacheEntry; + use std::path::PathBuf; + use std::time::Duration; + use vmm_sys_util::tempdir::TempDir; + + fn create_block_device(tmpdir: PathBuf) -> Result> { + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let mut source_path = PathBuf::from(root_dir); + source_path.push("../tests/texture/blobs/be7d77eeb719f70884758d1aa800ed0fb09d701aaec469964e9d54325f0d5fef"); + let mut dest_path = tmpdir.clone(); + dest_path.push("be7d77eeb719f70884758d1aa800ed0fb09d701aaec469964e9d54325f0d5fef"); + fs::copy(&source_path, &dest_path).unwrap(); + + let mut source_path = PathBuf::from(root_dir); + source_path.push("../tests/texture/bootstrap/rafs-v6-2.2.boot"); + let config = r#" + { + "type": "bootstrap", + "id": "rafs-v6", + "domain_id": "domain2", + "config_v2": { + "version": 2, + "id": "factory1", + "backend": { + "type": "localfs", + "localfs": { + "dir": "/tmp/nydus" + } + }, + "cache": { + "type": "filecache", + "filecache": { + "work_dir": "/tmp/nydus" + } + }, + "metadata_path": "RAFS_V5" + } + }"#; + let content = config + .replace("/tmp/nydus", tmpdir.as_path().to_str().unwrap()) + .replace("RAFS_V5", &source_path.display().to_string()); + let mut entry: BlobCacheEntry = serde_json::from_str(&content).unwrap(); + assert!(entry.prepare_configuration_info()); + + let mgr = BlobCacheMgr::new(); + mgr.add_blob_entry(&entry).unwrap(); + let blob_id = generate_blob_key(&entry.domain_id, &entry.blob_id); + assert!(mgr.get_config(&blob_id).is_some()); + + // Check existence of data blob referenced by the bootstrap. + let key = generate_blob_key( + &entry.domain_id, + "be7d77eeb719f70884758d1aa800ed0fb09d701aaec469964e9d54325f0d5fef", + ); + assert!(mgr.get_config(&key).is_some()); + + let mgr = Arc::new(mgr); + let device = BlockDevice::new_with_cache_manager(blob_id.clone(), mgr).unwrap(); + + Ok(Arc::new(device)) + } + + #[ignore] + #[test] + fn test_nbd_device() { + tokio_uring::start(async { + let tmpdir = TempDir::new().unwrap(); + let device = create_block_device(tmpdir.as_path().to_path_buf()).unwrap(); + let nbd = NbdService::new(device, "/dev/nbd15".to_string()).unwrap(); + let nbd = Arc::new(nbd); + let nbd2 = nbd.clone(); + let worker1 = nbd.create_worker().unwrap(); + let worker2 = nbd.create_worker().unwrap(); + + tokio_uring::spawn(async move { worker1.run().await }); + tokio_uring::spawn(async move { worker2.run().await }); + std::thread::spawn(move || { + nbd2.run().unwrap(); + }); + tokio::time::sleep(Duration::from_micros(100000)).await; + nbd.stop(); + }) + } +} diff --git a/service/src/daemon.rs b/service/src/daemon.rs index c858e05ceb8..5373772166e 100644 --- a/service/src/daemon.rs +++ b/service/src/daemon.rs @@ -1,518 +1,518 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2020-2022 Alibaba Cloud. All rights reserved. -// Copyright 2019 Intel Corporation. All Rights Reserved. -// -// SPDX-License-Identifier: (Apache-2.0 AND BSD-3-Clause) - -//! Infrastructure to define and manage Nydus service daemons. - -use std::any::Any; -use std::cmp::PartialEq; -use std::convert::From; -use std::fmt::{Display, Formatter}; -use std::ops::Deref; -use std::process; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::mpsc::{Receiver, Sender}; -use std::sync::{Arc, Mutex, MutexGuard}; -use std::thread::{Builder, JoinHandle}; - -use mio::{Events, Poll, Token, Waker}; -use nydus_api::BuildTimeInfo; -use rust_fsm::*; -use serde::{self, Serialize}; - -use crate::fs_service::{FsBackendCollection, FsService}; -use crate::upgrade::UpgradeManager; -use crate::{BlobCacheMgr, Error, Result}; - -/// Nydus daemon working states. -#[allow(clippy::upper_case_acronyms)] -#[derive(Debug, Hash, PartialEq, Eq, Serialize)] -pub enum DaemonState { - INIT = 1, - RUNNING = 2, - READY = 3, - STOPPED = 4, - UNKNOWN = 5, -} - -impl Display for DaemonState { - fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { - write!(f, "{:?}", self) - } -} - -impl From for DaemonState { - fn from(i: i32) -> Self { - match i { - 1 => DaemonState::INIT, - 2 => DaemonState::RUNNING, - 3 => DaemonState::READY, - 4 => DaemonState::STOPPED, - _ => DaemonState::UNKNOWN, - } - } -} - -/// Build, version and working state information for Nydus daemons. -#[derive(Serialize)] -pub struct DaemonInfo { - /// Build and version information. - pub version: BuildTimeInfo, - /// Optional daemon identifier. - pub id: Option, - /// Optional daemon supervisor configuration information. - pub supervisor: Option, - /// Daemon working state. - pub state: DaemonState, - /// Optional metrics and statistics about filesystem instances. - pub backend_collection: Option, -} - -/// Abstract interfaces for Nydus daemon objects. -/// -/// The [`NydusDaemon`] trait defines interfaces that an Nydus daemon object should implement, -/// so the daemon manager can manage those objects. -pub trait NydusDaemon: DaemonStateMachineSubscriber + Send + Sync { - /// Cast `self` to trait object of [Any] to support object downcast. - fn as_any(&self) -> &dyn Any; - - /// Get optional daemon identifier. - fn id(&self) -> Option; - - /// Get build and version information. - fn version(&self) -> BuildTimeInfo; - - /// Get status information about the daemon. - fn export_info(&self, include_fs_info: bool) -> Result { - let mut response = DaemonInfo { - version: self.version(), - id: self.id(), - supervisor: self.supervisor(), - state: self.get_state(), - backend_collection: None, - }; - if include_fs_info { - if let Some(fs) = self.get_default_fs_service() { - response.backend_collection = Some(fs.backend_collection().deref().clone()); - } - } - - serde_json::to_string(&response).map_err(Error::Serde) - } - - /// Get daemon working state. - fn get_state(&self) -> DaemonState; - /// Set daemon working state. - fn set_state(&self, s: DaemonState); - /// Start the daemon object to serve incoming requests. - fn start(&self) -> Result<()>; - /// Umount the FUSE filesystem. - fn umount(&self) -> Result<()>; - /// Stop the daemon object. - fn stop(&self) {} - /// Trigger `Stop` transition event to stop the daemon. - fn trigger_stop(&self) -> Result<()> { - let s = self.get_state(); - - if s == DaemonState::STOPPED { - return Ok(()); - } - - if s == DaemonState::RUNNING { - self.on_event(DaemonStateMachineInput::Stop)?; - } - - self.on_event(DaemonStateMachineInput::Stop) - } - /// Trigger transition events to move the state machine to `STOPPED` state. - fn trigger_exit(&self) -> Result<()> { - let s = self.get_state(); - - if s == DaemonState::STOPPED { - return Ok(()); - } - - if s == DaemonState::INIT { - return self.on_event(DaemonStateMachineInput::Stop); - } - - if s == DaemonState::RUNNING { - self.on_event(DaemonStateMachineInput::Stop)?; - } - - self.on_event(DaemonStateMachineInput::Exit) - } - - /// Wait for daemon to exit. - fn wait(&self) -> Result<()>; - /// Wait for service worker thread to exit. - fn wait_service(&self) -> Result<()> { - Ok(()) - } - /// Wait for state machine worker thread to exit. - fn wait_state_machine(&self) -> Result<()> { - Ok(()) - } - - /// Get supervisor configuration information. - fn supervisor(&self) -> Option; - /// Save state for online upgrade. - fn save(&self) -> Result<()>; - /// Restore state for online upgrade. - fn restore(&self) -> Result<()>; - /// Trigger `Takeover` transition event to take over control from old instance. - fn trigger_takeover(&self) -> Result<()> { - self.on_event(DaemonStateMachineInput::Takeover) - } - /// Trigger `Start` transition event to start the new instance. - fn trigger_start(&self) -> Result<()> { - self.on_event(DaemonStateMachineInput::Start) - } - - fn upgrade_mgr(&self) -> Option> { - None - } - - // For backward compatibility. - /// Set default filesystem service object. - fn get_default_fs_service(&self) -> Option> { - None - } - - /// Get the optional `BlobCacheMgr` object. - fn get_blob_cache_mgr(&self) -> Option> { - None - } - - /// Delete a blob object managed by the daemon. - fn delete_blob(&self, _blob_id: String) -> Result<()> { - Ok(()) - } -} - -// State machine for Nydus daemon workflow. -// -// Valid states for Nydus daemon state machine: -// - `Init` means nydusd is just started and potentially configured well but not -// yet negotiate with kernel the capabilities of both sides. It even does not try -// to set up fuse session by mounting `/fuse/dev`(in case of `fusedev` backend). -// - `Ready` means nydusd is ready for start or die. Fuse session is created. -// - `Running` means nydusd has successfully prepared all the stuff needed to work as a -// user-space fuse filesystem, however, the essential capabilities negotiation might not be -// done yet. It relies on `fuse-rs` to tell if capability negotiation is done. -// - `Die` state means the whole nydusd process is going to die. -state_machine! { - derive(Debug, Clone) - pub DaemonStateMachine(Init) - - Init => { - Mount => Ready, - Takeover => Ready[Restore], - Stop => Die[StopStateMachine], - }, - Ready => { - Start => Running[StartService], - Stop => Die[Umount], - Exit => Die[StopStateMachine], - }, - Running => { - Stop => Ready [TerminateService], - }, -} - -/// An implementation of the state machine defined by [`DaemonStateMachine`]. -pub struct DaemonStateMachineContext { - pid: u32, - daemon: Arc, - sm: StateMachine, - request_receiver: Receiver, - result_sender: Sender>, -} - -impl DaemonStateMachineContext { - /// Create a new instance of [`DaemonStateMachineContext`]. - pub fn new( - daemon: Arc, - request_receiver: Receiver, - result_sender: Sender>, - ) -> Self { - DaemonStateMachineContext { - pid: process::id(), - daemon, - sm: StateMachine::new(), - request_receiver, - result_sender, - } - } - - /// Create a worker thread to run event loop for the state machine. - pub fn kick_state_machine(self) -> Result>> { - Builder::new() - .name("state_machine".to_string()) - .spawn(move || self.run_state_machine_event_loop()) - .map_err(Error::ThreadSpawn) - } - - fn run_state_machine_event_loop(mut self) -> std::io::Result<()> { - loop { - use DaemonStateMachineOutput::*; - let event = self - .request_receiver - .recv() - .expect("Event channel can't be broken!"); - let last = self.sm.state().clone(); - let input = &event; - - let action = if let Ok(a) = self.sm.consume(&event) { - a - } else { - error!( - "Wrong event input. Event={:?}, CurrentState={:?}", - input, &last - ); - // Safe to unwrap because channel is never closed - self.result_sender - .send(Err(Error::UnexpectedEvent(event))) - .unwrap(); - continue; - }; - - let d = self.daemon.as_ref(); - let cur = self.sm.state(); - info!( - "State machine(pid={}): from {:?} to {:?}, input [{:?}], output [{:?}]", - &self.pid, last, cur, input, &action - ); - let r = match action { - Some(StartService) => d.start().map(|r| { - d.set_state(DaemonState::RUNNING); - r - }), - Some(TerminateService) => { - d.stop(); - let res = d.wait_service(); - if res.is_ok() { - d.set_state(DaemonState::READY); - } - res - } - Some(Umount) => d.umount().map(|r| { - // Always interrupt fuse service loop after shutdown connection to kernel. - // In case that kernel does not really shutdown the session due to some reasons - // causing service loop keep waiting of `/dev/fuse`. - d.stop(); - d.wait_service() - .unwrap_or_else(|e| error!("failed to wait service {}", e)); - // at least all fuse thread stopped, no matter what error each thread got - d.set_state(DaemonState::STOPPED); - r - }), - Some(Restore) => { - let res = d.restore(); - if res.is_ok() { - d.set_state(DaemonState::READY); - } - res - } - Some(StopStateMachine) => { - d.set_state(DaemonState::STOPPED); - Ok(()) - } - // With no output action involved, caller should also have reply back - None => Ok(()), - }; - - // Safe to unwrap because channel is never closed - self.result_sender.send(r).unwrap(); - // Quit state machine thread if interrupted or stopped - if d.get_state() == DaemonState::STOPPED { - break; - } - } - - info!("state_machine thread exits"); - Ok(()) - } -} - -/// Handler to process state transition events emitted from the state machine. -pub trait DaemonStateMachineSubscriber { - /// Event handler to process state transition events. - /// - /// It will be invoked in single-threaded context. - fn on_event(&self, event: DaemonStateMachineInput) -> Result<()>; -} - -/// Controller to manage registered filesystem/blobcache/fscache services. -pub struct DaemonController { - active: AtomicBool, - singleton_mode: AtomicBool, - daemon: Mutex>>, - blob_cache_mgr: Mutex>>, - // For backward compatibility to support singleton fusedev/virtiofs server. - fs_service: Mutex>>, - waker: Arc, - poller: Mutex, -} - -impl DaemonController { - /// Create a new instance of [DaemonController]. - pub fn new() -> Self { - let poller = Poll::new().expect("Failed to create poller for DaemonController"); - let waker = Waker::new(poller.registry(), Token(1)) - .expect("Failed to create waker for DaemonController"); - - Self { - active: AtomicBool::new(true), - singleton_mode: AtomicBool::new(false), - daemon: Mutex::new(None), - blob_cache_mgr: Mutex::new(None), - fs_service: Mutex::new(None), - waker: Arc::new(waker), - poller: Mutex::new(poller), - } - } - - /// Check whether the service controller is still in active/working state. - pub fn is_active(&self) -> bool { - self.active.load(Ordering::Acquire) - } - - /// Allocate a waker to notify stop events. - pub fn alloc_waker(&self) -> Arc { - self.waker.clone() - } - - /// Enable/disable singleton mode. - pub fn set_singleton_mode(&self, enabled: bool) { - self.singleton_mode.store(enabled, Ordering::Release); - } - - /// Set the daemon service object. - pub fn set_daemon(&self, daemon: Arc) -> Option> { - self.daemon.lock().unwrap().replace(daemon) - } - - /// Get the daemon service object. - /// - /// Panic if called before `set_daemon()` has been called. - pub fn get_daemon(&self) -> Arc { - self.daemon.lock().unwrap().clone().unwrap() - } - - /// Get the optional blob cache manager. - pub fn get_blob_cache_mgr(&self) -> Option> { - self.blob_cache_mgr.lock().unwrap().clone() - } - - /// Set the optional blob cache manager. - pub fn set_blob_cache_mgr(&self, mgr: Arc) -> Option> { - self.blob_cache_mgr.lock().unwrap().replace(mgr) - } - - /// Set the default fs service object. - pub fn set_fs_service(&self, service: Arc) -> Option> { - self.fs_service.lock().unwrap().replace(service) - } - - /// Get the default fs service object. - pub fn get_fs_service(&self) -> Option> { - self.fs_service.lock().unwrap().clone() - } - - /// Notify controller shutdown - pub fn notify_shutdown(&self) { - // Marking exiting state. - self.active.store(false, Ordering::Release); - // Signal the `run_loop()` working thread to exit. - let _ = self.waker.wake(); - } - - /// Shutdown all services managed by the controller. - pub fn shutdown(&self) { - let daemon = self.daemon.lock().unwrap().take(); - if let Some(d) = daemon { - if let Err(e) = d.trigger_stop() { - error!("failed to stop daemon: {}", e); - } - if let Err(e) = d.wait() { - error!("failed to wait daemon: {}", e) - } - } - } - - /// Run the event loop to handle service management events. - pub fn run_loop(&self) { - let mut events = Events::with_capacity(8); - - loop { - match self.poller.lock().unwrap().poll(&mut events, None) { - Err(e) if e.kind() == std::io::ErrorKind::Interrupted => continue, - Err(e) => error!("failed to receive notification from waker: {}", e), - Ok(_) => {} - } - - for event in events.iter() { - if event.is_error() { - error!("Got error on the monitored event."); - continue; - } - - if event.is_readable() && event.token() == Token(1) { - if !self.active.load(Ordering::Acquire) { - return; - } else if !self.singleton_mode.load(Ordering::Acquire) { - self.active.store(false, Ordering::Relaxed); - return; - } - } - } - } - } -} - -impl Default for DaemonController { - fn default() -> Self { - DaemonController::new() - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::FsBackendType; - - #[test] - fn it_should_convert_int_to_daemonstate() { - let stat = DaemonState::from(1); - assert_eq!(stat, DaemonState::INIT); - - let stat = DaemonState::from(2); - assert_eq!(stat, DaemonState::RUNNING); - - let stat = DaemonState::from(3); - assert_eq!(stat, DaemonState::READY); - - let stat = DaemonState::from(4); - assert_eq!(stat, DaemonState::STOPPED); - - let stat = DaemonState::from(5); - assert_eq!(stat, DaemonState::UNKNOWN); - - let stat = DaemonState::from(8); - assert_eq!(stat, DaemonState::UNKNOWN); - } - - #[test] - fn it_should_convert_str_to_fsbackendtype() { - let backend_type: FsBackendType = "rafs".parse().unwrap(); - assert_eq!(backend_type, FsBackendType::Rafs); - - let backend_type: FsBackendType = "passthrough_fs".parse().unwrap(); - assert_eq!(backend_type, FsBackendType::PassthroughFs); - - assert!("xxxxxxxxxxxxx".parse::().is_err()); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2020-2022 Alibaba Cloud. All rights reserved. +// Copyright 2019 Intel Corporation. All Rights Reserved. +// +// SPDX-License-Identifier: (Apache-2.0 AND BSD-3-Clause) + +//! Infrastructure to define and manage Nydus service daemons. + +use std::any::Any; +use std::cmp::PartialEq; +use std::convert::From; +use std::fmt::{Display, Formatter}; +use std::ops::Deref; +use std::process; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::mpsc::{Receiver, Sender}; +use std::sync::{Arc, Mutex, MutexGuard}; +use std::thread::{Builder, JoinHandle}; + +use mio::{Events, Poll, Token, Waker}; +use nydus_api::BuildTimeInfo; +use rust_fsm::*; +use serde::{self, Serialize}; + +use crate::fs_service::{FsBackendCollection, FsService}; +use crate::upgrade::UpgradeManager; +use crate::{BlobCacheMgr, Error, Result}; + +/// Nydus daemon working states. +#[allow(clippy::upper_case_acronyms)] +#[derive(Debug, Hash, PartialEq, Eq, Serialize)] +pub enum DaemonState { + INIT = 1, + RUNNING = 2, + READY = 3, + STOPPED = 4, + UNKNOWN = 5, +} + +impl Display for DaemonState { + fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { + write!(f, "{:?}", self) + } +} + +impl From for DaemonState { + fn from(i: i32) -> Self { + match i { + 1 => DaemonState::INIT, + 2 => DaemonState::RUNNING, + 3 => DaemonState::READY, + 4 => DaemonState::STOPPED, + _ => DaemonState::UNKNOWN, + } + } +} + +/// Build, version and working state information for Nydus daemons. +#[derive(Serialize)] +pub struct DaemonInfo { + /// Build and version information. + pub version: BuildTimeInfo, + /// Optional daemon identifier. + pub id: Option, + /// Optional daemon supervisor configuration information. + pub supervisor: Option, + /// Daemon working state. + pub state: DaemonState, + /// Optional metrics and statistics about filesystem instances. + pub backend_collection: Option, +} + +/// Abstract interfaces for Nydus daemon objects. +/// +/// The [`NydusDaemon`] trait defines interfaces that an Nydus daemon object should implement, +/// so the daemon manager can manage those objects. +pub trait NydusDaemon: DaemonStateMachineSubscriber + Send + Sync { + /// Cast `self` to trait object of [Any] to support object downcast. + fn as_any(&self) -> &dyn Any; + + /// Get optional daemon identifier. + fn id(&self) -> Option; + + /// Get build and version information. + fn version(&self) -> BuildTimeInfo; + + /// Get status information about the daemon. + fn export_info(&self, include_fs_info: bool) -> Result { + let mut response = DaemonInfo { + version: self.version(), + id: self.id(), + supervisor: self.supervisor(), + state: self.get_state(), + backend_collection: None, + }; + if include_fs_info { + if let Some(fs) = self.get_default_fs_service() { + response.backend_collection = Some(fs.backend_collection().deref().clone()); + } + } + + serde_json::to_string(&response).map_err(Error::Serde) + } + + /// Get daemon working state. + fn get_state(&self) -> DaemonState; + /// Set daemon working state. + fn set_state(&self, s: DaemonState); + /// Start the daemon object to serve incoming requests. + fn start(&self) -> Result<()>; + /// Umount the FUSE filesystem. + fn umount(&self) -> Result<()>; + /// Stop the daemon object. + fn stop(&self) {} + /// Trigger `Stop` transition event to stop the daemon. + fn trigger_stop(&self) -> Result<()> { + let s = self.get_state(); + + if s == DaemonState::STOPPED { + return Ok(()); + } + + if s == DaemonState::RUNNING { + self.on_event(DaemonStateMachineInput::Stop)?; + } + + self.on_event(DaemonStateMachineInput::Stop) + } + /// Trigger transition events to move the state machine to `STOPPED` state. + fn trigger_exit(&self) -> Result<()> { + let s = self.get_state(); + + if s == DaemonState::STOPPED { + return Ok(()); + } + + if s == DaemonState::INIT { + return self.on_event(DaemonStateMachineInput::Stop); + } + + if s == DaemonState::RUNNING { + self.on_event(DaemonStateMachineInput::Stop)?; + } + + self.on_event(DaemonStateMachineInput::Exit) + } + + /// Wait for daemon to exit. + fn wait(&self) -> Result<()>; + /// Wait for service worker thread to exit. + fn wait_service(&self) -> Result<()> { + Ok(()) + } + /// Wait for state machine worker thread to exit. + fn wait_state_machine(&self) -> Result<()> { + Ok(()) + } + + /// Get supervisor configuration information. + fn supervisor(&self) -> Option; + /// Save state for online upgrade. + fn save(&self) -> Result<()>; + /// Restore state for online upgrade. + fn restore(&self) -> Result<()>; + /// Trigger `Takeover` transition event to take over control from old instance. + fn trigger_takeover(&self) -> Result<()> { + self.on_event(DaemonStateMachineInput::Takeover) + } + /// Trigger `Start` transition event to start the new instance. + fn trigger_start(&self) -> Result<()> { + self.on_event(DaemonStateMachineInput::Start) + } + + fn upgrade_mgr(&self) -> Option> { + None + } + + // For backward compatibility. + /// Set default filesystem service object. + fn get_default_fs_service(&self) -> Option> { + None + } + + /// Get the optional `BlobCacheMgr` object. + fn get_blob_cache_mgr(&self) -> Option> { + None + } + + /// Delete a blob object managed by the daemon. + fn delete_blob(&self, _blob_id: String) -> Result<()> { + Ok(()) + } +} + +// State machine for Nydus daemon workflow. +// +// Valid states for Nydus daemon state machine: +// - `Init` means nydusd is just started and potentially configured well but not +// yet negotiate with kernel the capabilities of both sides. It even does not try +// to set up fuse session by mounting `/fuse/dev`(in case of `fusedev` backend). +// - `Ready` means nydusd is ready for start or die. Fuse session is created. +// - `Running` means nydusd has successfully prepared all the stuff needed to work as a +// user-space fuse filesystem, however, the essential capabilities negotiation might not be +// done yet. It relies on `fuse-rs` to tell if capability negotiation is done. +// - `Die` state means the whole nydusd process is going to die. +state_machine! { + derive(Debug, Clone) + pub DaemonStateMachine(Init) + + Init => { + Mount => Ready, + Takeover => Ready[Restore], + Stop => Die[StopStateMachine], + }, + Ready => { + Start => Running[StartService], + Stop => Die[Umount], + Exit => Die[StopStateMachine], + }, + Running => { + Stop => Ready [TerminateService], + }, +} + +/// An implementation of the state machine defined by [`DaemonStateMachine`]. +pub struct DaemonStateMachineContext { + pid: u32, + daemon: Arc, + sm: StateMachine, + request_receiver: Receiver, + result_sender: Sender>, +} + +impl DaemonStateMachineContext { + /// Create a new instance of [`DaemonStateMachineContext`]. + pub fn new( + daemon: Arc, + request_receiver: Receiver, + result_sender: Sender>, + ) -> Self { + DaemonStateMachineContext { + pid: process::id(), + daemon, + sm: StateMachine::new(), + request_receiver, + result_sender, + } + } + + /// Create a worker thread to run event loop for the state machine. + pub fn kick_state_machine(self) -> Result>> { + Builder::new() + .name("state_machine".to_string()) + .spawn(move || self.run_state_machine_event_loop()) + .map_err(Error::ThreadSpawn) + } + + fn run_state_machine_event_loop(mut self) -> std::io::Result<()> { + loop { + use DaemonStateMachineOutput::*; + let event = self + .request_receiver + .recv() + .expect("Event channel can't be broken!"); + let last = self.sm.state().clone(); + let input = &event; + + let action = if let Ok(a) = self.sm.consume(&event) { + a + } else { + error!( + "Wrong event input. Event={:?}, CurrentState={:?}", + input, &last + ); + // Safe to unwrap because channel is never closed + self.result_sender + .send(Err(Error::UnexpectedEvent(event))) + .unwrap(); + continue; + }; + + let d = self.daemon.as_ref(); + let cur = self.sm.state(); + info!( + "State machine(pid={}): from {:?} to {:?}, input [{:?}], output [{:?}]", + &self.pid, last, cur, input, &action + ); + let r = match action { + Some(StartService) => d.start().map(|r| { + d.set_state(DaemonState::RUNNING); + r + }), + Some(TerminateService) => { + d.stop(); + let res = d.wait_service(); + if res.is_ok() { + d.set_state(DaemonState::READY); + } + res + } + Some(Umount) => d.umount().map(|r| { + // Always interrupt fuse service loop after shutdown connection to kernel. + // In case that kernel does not really shutdown the session due to some reasons + // causing service loop keep waiting of `/dev/fuse`. + d.stop(); + d.wait_service() + .unwrap_or_else(|e| error!("failed to wait service {}", e)); + // at least all fuse thread stopped, no matter what error each thread got + d.set_state(DaemonState::STOPPED); + r + }), + Some(Restore) => { + let res = d.restore(); + if res.is_ok() { + d.set_state(DaemonState::READY); + } + res + } + Some(StopStateMachine) => { + d.set_state(DaemonState::STOPPED); + Ok(()) + } + // With no output action involved, caller should also have reply back + None => Ok(()), + }; + + // Safe to unwrap because channel is never closed + self.result_sender.send(r).unwrap(); + // Quit state machine thread if interrupted or stopped + if d.get_state() == DaemonState::STOPPED { + break; + } + } + + info!("state_machine thread exits"); + Ok(()) + } +} + +/// Handler to process state transition events emitted from the state machine. +pub trait DaemonStateMachineSubscriber { + /// Event handler to process state transition events. + /// + /// It will be invoked in single-threaded context. + fn on_event(&self, event: DaemonStateMachineInput) -> Result<()>; +} + +/// Controller to manage registered filesystem/blobcache/fscache services. +pub struct DaemonController { + active: AtomicBool, + singleton_mode: AtomicBool, + daemon: Mutex>>, + blob_cache_mgr: Mutex>>, + // For backward compatibility to support singleton fusedev/virtiofs server. + fs_service: Mutex>>, + waker: Arc, + poller: Mutex, +} + +impl DaemonController { + /// Create a new instance of [DaemonController]. + pub fn new() -> Self { + let poller = Poll::new().expect("Failed to create poller for DaemonController"); + let waker = Waker::new(poller.registry(), Token(1)) + .expect("Failed to create waker for DaemonController"); + + Self { + active: AtomicBool::new(true), + singleton_mode: AtomicBool::new(false), + daemon: Mutex::new(None), + blob_cache_mgr: Mutex::new(None), + fs_service: Mutex::new(None), + waker: Arc::new(waker), + poller: Mutex::new(poller), + } + } + + /// Check whether the service controller is still in active/working state. + pub fn is_active(&self) -> bool { + self.active.load(Ordering::Acquire) + } + + /// Allocate a waker to notify stop events. + pub fn alloc_waker(&self) -> Arc { + self.waker.clone() + } + + /// Enable/disable singleton mode. + pub fn set_singleton_mode(&self, enabled: bool) { + self.singleton_mode.store(enabled, Ordering::Release); + } + + /// Set the daemon service object. + pub fn set_daemon(&self, daemon: Arc) -> Option> { + self.daemon.lock().unwrap().replace(daemon) + } + + /// Get the daemon service object. + /// + /// Panic if called before `set_daemon()` has been called. + pub fn get_daemon(&self) -> Arc { + self.daemon.lock().unwrap().clone().unwrap() + } + + /// Get the optional blob cache manager. + pub fn get_blob_cache_mgr(&self) -> Option> { + self.blob_cache_mgr.lock().unwrap().clone() + } + + /// Set the optional blob cache manager. + pub fn set_blob_cache_mgr(&self, mgr: Arc) -> Option> { + self.blob_cache_mgr.lock().unwrap().replace(mgr) + } + + /// Set the default fs service object. + pub fn set_fs_service(&self, service: Arc) -> Option> { + self.fs_service.lock().unwrap().replace(service) + } + + /// Get the default fs service object. + pub fn get_fs_service(&self) -> Option> { + self.fs_service.lock().unwrap().clone() + } + + /// Notify controller shutdown + pub fn notify_shutdown(&self) { + // Marking exiting state. + self.active.store(false, Ordering::Release); + // Signal the `run_loop()` working thread to exit. + let _ = self.waker.wake(); + } + + /// Shutdown all services managed by the controller. + pub fn shutdown(&self) { + let daemon = self.daemon.lock().unwrap().take(); + if let Some(d) = daemon { + if let Err(e) = d.trigger_stop() { + error!("failed to stop daemon: {}", e); + } + if let Err(e) = d.wait() { + error!("failed to wait daemon: {}", e) + } + } + } + + /// Run the event loop to handle service management events. + pub fn run_loop(&self) { + let mut events = Events::with_capacity(8); + + loop { + match self.poller.lock().unwrap().poll(&mut events, None) { + Err(e) if e.kind() == std::io::ErrorKind::Interrupted => continue, + Err(e) => error!("failed to receive notification from waker: {}", e), + Ok(_) => {} + } + + for event in events.iter() { + if event.is_error() { + error!("Got error on the monitored event."); + continue; + } + + if event.is_readable() && event.token() == Token(1) { + if !self.active.load(Ordering::Acquire) { + return; + } else if !self.singleton_mode.load(Ordering::Acquire) { + self.active.store(false, Ordering::Relaxed); + return; + } + } + } + } + } +} + +impl Default for DaemonController { + fn default() -> Self { + DaemonController::new() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::FsBackendType; + + #[test] + fn it_should_convert_int_to_daemonstate() { + let stat = DaemonState::from(1); + assert_eq!(stat, DaemonState::INIT); + + let stat = DaemonState::from(2); + assert_eq!(stat, DaemonState::RUNNING); + + let stat = DaemonState::from(3); + assert_eq!(stat, DaemonState::READY); + + let stat = DaemonState::from(4); + assert_eq!(stat, DaemonState::STOPPED); + + let stat = DaemonState::from(5); + assert_eq!(stat, DaemonState::UNKNOWN); + + let stat = DaemonState::from(8); + assert_eq!(stat, DaemonState::UNKNOWN); + } + + #[test] + fn it_should_convert_str_to_fsbackendtype() { + let backend_type: FsBackendType = "rafs".parse().unwrap(); + assert_eq!(backend_type, FsBackendType::Rafs); + + let backend_type: FsBackendType = "passthrough_fs".parse().unwrap(); + assert_eq!(backend_type, FsBackendType::PassthroughFs); + + assert!("xxxxxxxxxxxxx".parse::().is_err()); + } +} diff --git a/service/src/fs_cache.rs b/service/src/fs_cache.rs index 680e120e24b..897f3241ad3 100644 --- a/service/src/fs_cache.rs +++ b/service/src/fs_cache.rs @@ -1,1060 +1,1060 @@ -// Copyright (C) 2022 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: (Apache-2.0 AND BSD-3-Clause) - -//! Handler to expose RAFSv6 image through EROFS/fscache. -//! -//! The [`FsCacheHandler`] is the inter-connection between in kernel EROFS/fscache drivers -//! and the user space [BlobCacheMgr](https://docs.rs/nydus-service/latest/nydus_service/blob_cache/struct.BlobCacheMgr.html). -//! The workflow is as below: -//! - EROFS presents a filesystem structure by parsing a RAFS image metadata blob. -//! - EROFS sends requests to the fscache subsystem when user reads data from files. -//! - Fscache subsystem send requests to [FsCacheHandler] if the requested data has been cached yet. -//! - [FsCacheHandler] reads blob data from the [BlobCacheMgr] and sends back reply messages. - -use std::collections::hash_map::Entry::Vacant; -use std::collections::HashMap; -use std::convert::TryFrom; -use std::fs::{self, File, OpenOptions}; -use std::io::{copy, Error, ErrorKind, Result, Write}; -use std::ops::Deref; -use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; -use std::path::{Path, PathBuf}; -use std::ptr::read_unaligned; -use std::string::String; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::{Arc, Barrier, Condvar, Mutex, MutexGuard, RwLock}; -use std::{cmp, env, thread, time}; - -use mio::unix::SourceFd; -use mio::{Events, Interest, Poll, Token, Waker}; -use nydus_storage::cache::BlobCache; -use nydus_storage::device::BlobPrefetchRequest; -use nydus_storage::factory::{ASYNC_RUNTIME, BLOB_FACTORY}; - -use crate::blob_cache::{ - generate_blob_key, BlobCacheMgr, BlobConfig, DataBlobConfig, MetaBlobConfig, -}; - -nix::ioctl_write_int!(fscache_cread, 0x98, 1); - -/// Maximum size of fscache request message from kernel. -const MIN_DATA_BUF_SIZE: usize = 1024; -const MSG_HEADER_SIZE: usize = 16; -const MSG_OPEN_SIZE: usize = 16; -const MSG_READ_SIZE: usize = 16; - -const TOKEN_EVENT_WAKER: usize = 1; -const TOKEN_EVENT_FSCACHE: usize = 2; - -const BLOB_CACHE_INIT_RETRY: u8 = 5; -const BLOB_CACHE_INIT_INTERVAL_MS: u64 = 300; - -/// Command code in requests from fscache driver. -#[repr(u32)] -#[derive(Debug, Eq, PartialEq)] -enum FsCacheOpCode { - Open = 0, - Close = 1, - Read = 2, -} - -impl TryFrom for FsCacheOpCode { - type Error = Error; - - fn try_from(value: u32) -> std::result::Result { - match value { - 0 => Ok(FsCacheOpCode::Open), - 1 => Ok(FsCacheOpCode::Close), - 2 => Ok(FsCacheOpCode::Read), - _ => Err(einval!(format!( - "fscache: invalid operation code {}", - value - ))), - } - } -} - -/// Common header for request messages. -#[repr(C)] -#[derive(Debug, Eq, PartialEq)] -struct FsCacheMsgHeader { - /// Message identifier to associate reply with request by the fscache driver. - msg_id: u32, - /// Message operation code. - opcode: FsCacheOpCode, - /// Message length, including message header and message body. - len: u32, - /// A unique ID identifying the cache file operated on. - object_id: u32, -} - -impl TryFrom<&[u8]> for FsCacheMsgHeader { - type Error = Error; - - fn try_from(value: &[u8]) -> std::result::Result { - if value.len() < MSG_HEADER_SIZE { - return Err(einval!(format!( - "fscache: request message size is too small, {}", - value.len() - ))); - } - - // Safe because we have verified buffer size. - let msg_id = unsafe { read_unaligned(value[0..4].as_ptr() as *const u32) }; - let opcode = unsafe { read_unaligned(value[4..8].as_ptr() as *const u32) }; - let len = unsafe { read_unaligned(value[8..12].as_ptr() as *const u32) }; - let opcode = FsCacheOpCode::try_from(opcode)?; - let object_id = unsafe { read_unaligned(value[12..16].as_ptr() as *const u32) }; - if len as usize != value.len() { - return Err(einval!(format!( - "fscache: message length {} does not match length from message header {}", - value.len(), - len - ))); - } - - Ok(FsCacheMsgHeader { - msg_id, - opcode, - len, - object_id, - }) - } -} - -/// Request message to open a file. -/// -/// The opened file should be kept valid until corresponding `CLOSE` message has been received -/// from the fscache driver. -#[derive(Default, Debug, Eq, PartialEq)] -struct FsCacheMsgOpen { - volume_key: String, - cookie_key: String, - fd: u32, - flags: u32, -} - -impl TryFrom<&[u8]> for FsCacheMsgOpen { - type Error = Error; - - fn try_from(value: &[u8]) -> std::result::Result { - if value.len() < MSG_OPEN_SIZE { - return Err(einval!(format!( - "fscache: request message size is too small, {}", - value.len() - ))); - } - - // Safe because we have verified buffer size. - let volume_key_size = unsafe { read_unaligned(value[0..4].as_ptr() as *const u32) }; - let cookie_key_size = unsafe { read_unaligned(value[4..8].as_ptr() as *const u32) }; - let fd = unsafe { read_unaligned(value[8..12].as_ptr() as *const u32) }; - let flags = unsafe { read_unaligned(value[12..16].as_ptr() as *const u32) }; - if volume_key_size.checked_add(cookie_key_size).is_none() - || (volume_key_size + cookie_key_size) - .checked_add(MSG_OPEN_SIZE as u32) - .is_none() - { - return Err(einval!( - "fscache: invalid volume/cookie key length in OPEN request" - )); - } - let total_sz = (volume_key_size + cookie_key_size) as usize + MSG_OPEN_SIZE; - if value.len() < total_sz { - return Err(einval!("fscache: invalid message length for OPEN request")); - } - let pos = MSG_OPEN_SIZE + volume_key_size as usize; - let volume_key = String::from_utf8(value[MSG_OPEN_SIZE..pos].to_vec()) - .map_err(|_e| einval!("fscache: invalid volume key in OPEN request"))? - .trim_end_matches('\0') - .to_string(); - let cookie_key = String::from_utf8(value[pos..pos + cookie_key_size as usize].to_vec()) - .map_err(|_e| einval!("fscache: invalid cookie key in OPEN request"))?; - - Ok(FsCacheMsgOpen { - volume_key, - cookie_key, - fd, - flags, - }) - } -} - -/// Request message to feed requested data into the cache file. -#[repr(C)] -#[derive(Default, Debug, Eq, PartialEq)] -struct FsCacheMsgRead { - off: u64, - len: u64, -} - -impl TryFrom<&[u8]> for FsCacheMsgRead { - type Error = Error; - - fn try_from(value: &[u8]) -> std::result::Result { - if value.len() < MSG_READ_SIZE { - return Err(einval!(format!( - "fscache: request message size is too small, {}", - value.len() - ))); - } - - // Safe because we have verified buffer size. - let off = unsafe { read_unaligned(value[0..8].as_ptr() as *const u64) }; - let len = unsafe { read_unaligned(value[8..16].as_ptr() as *const u64) }; - - Ok(FsCacheMsgRead { off, len }) - } -} - -struct FsCacheBootstrap { - bootstrap_file: File, - cache_file: File, -} - -struct FsCacheBlobCache { - cache: Option>, - config: Arc, - file: Arc, -} - -impl FsCacheBlobCache { - fn set_blob_cache(&mut self, cache: Option>) { - self.cache = cache; - } - - fn get_blob_cache(&self) -> Option> { - self.cache.clone() - } -} - -#[derive(Clone)] -enum FsCacheObject { - Bootstrap(Arc), - DataBlob(Arc>), -} - -/// Struct to maintain cached file objects. -#[derive(Default)] -struct FsCacheState { - id_to_object_map: HashMap, - id_to_config_map: HashMap>, - blob_cache_mgr: Arc, -} - -/// Handler to cooperate with Linux fscache driver to manage cached blob objects. -/// -/// The `FsCacheHandler` create a communication channel with the Linux fscache driver, configure -/// the communication session and serves all requests from the fscache driver. -pub struct FsCacheHandler { - active: AtomicBool, - barrier: Barrier, - threads: usize, - file: File, - state: Arc>, - poller: Mutex, - waker: Arc, - cache_dir: PathBuf, -} - -impl FsCacheHandler { - /// Create a new instance of [FsCacheHandler]. - pub fn new( - path: &str, - dir: &str, - tag: Option<&str>, - blob_cache_mgr: Arc, - threads: usize, - restore_file: Option<&File>, - ) -> Result { - info!( - "fscache: create FsCacheHandler with dir {}, tag {}", - dir, - tag.unwrap_or("") - ); - - let mut file = match restore_file { - None => OpenOptions::new() - .write(true) - .read(true) - .create(false) - .open(path) - .map_err(|e| { - error!("Failed to open cachefiles device {}. {}", path, e); - e - })?, - Some(f) => f.try_clone()?, - }; - - let poller = - Poll::new().map_err(|_e| eother!("fscache: failed to create poller for service"))?; - let waker = Waker::new(poller.registry(), Token(TOKEN_EVENT_WAKER)) - .map_err(|_e| eother!("fscache: failed to create waker for service"))?; - poller - .registry() - .register( - &mut SourceFd(&file.as_raw_fd()), - Token(TOKEN_EVENT_FSCACHE), - Interest::READABLE, - ) - .map_err(|_e| eother!("fscache: failed to register fd for service"))?; - - if restore_file.is_none() { - // Initialize the fscache session - file.write_all(format!("dir {}", dir).as_bytes())?; - file.flush()?; - if let Some(tag) = tag { - file.write_all(format!("tag {}", tag).as_bytes())?; - file.flush()?; - } - file.write_all(b"bind ondemand")?; - file.flush()?; - } else { - // send restore cmd, if we are in restore process - file.write_all(b"restore")?; - file.flush()?; - } - - let state = FsCacheState { - id_to_object_map: Default::default(), - id_to_config_map: Default::default(), - blob_cache_mgr, - }; - let cache_dir = PathBuf::new().join(dir).join("cache"); - - Ok(FsCacheHandler { - active: AtomicBool::new(true), - barrier: Barrier::new(threads + 1), - threads, - file, - state: Arc::new(Mutex::new(state)), - poller: Mutex::new(poller), - waker: Arc::new(waker), - cache_dir, - }) - } - - /// Get number of working threads to service fscache requests. - pub fn working_threads(&self) -> usize { - self.threads - } - - /// Stop worker threads for the fscache service. - pub fn stop(&self) { - self.active.store(false, Ordering::Release); - if let Err(e) = self.waker.wake() { - error!("fscache: failed to signal worker thread to exit, {}", e); - } - self.barrier.wait(); - } - - /// Run the event loop to handle all requests from kernel fscache driver. - /// - /// This method should only be invoked by a single thread, which will poll the fscache fd - /// and dispatch requests from fscache fd to other working threads. - pub fn run_loop(&self) -> Result<()> { - let mut events = Events::with_capacity(64); - let mut buf = vec![0u8; MIN_DATA_BUF_SIZE]; - - loop { - match self.poller.lock().unwrap().poll(&mut events, None) { - Ok(_) => {} - Err(e) if e.kind() == ErrorKind::Interrupted => continue, - Err(e) => { - warn!("fscache: failed to poll events"); - return Err(e); - } - } - - for event in events.iter() { - if event.is_error() { - error!("fscache: got error event from poller"); - continue; - } - if event.token() == Token(TOKEN_EVENT_FSCACHE) { - if event.is_readable() { - self.handle_requests(&mut buf)?; - } - } else if event.is_readable() - && event.token() == Token(TOKEN_EVENT_WAKER) - && !self.active.load(Ordering::Acquire) - { - // Notify next worker to exit. - let _ = self.waker.wake(); - self.barrier.wait(); - return Ok(()); - } - } - } - } - - pub fn get_file(&self) -> &File { - &self.file - } - - /// Read and process all requests from fscache driver until no data available. - fn handle_requests(&self, buf: &mut [u8]) -> Result<()> { - loop { - let ret = unsafe { - libc::read( - self.file.as_raw_fd(), - buf.as_ptr() as *mut u8 as *mut libc::c_void, - buf.len(), - ) - }; - match ret { - // A special behavior of old cachefile driver which returns zero if there's no - // pending requests instead of `ErrorKind::WouldBlock`. - 0 => return Ok(()), - _i if _i > 0 => self.handle_one_request(&buf[0..ret as usize])?, - _ => { - let err = Error::last_os_error(); - match err.kind() { - ErrorKind::Interrupted => continue, - ErrorKind::WouldBlock => return Ok(()), - _ => return Err(err), - } - } - } - } - } - - fn handle_one_request(&self, buf: &[u8]) -> Result<()> { - let hdr = FsCacheMsgHeader::try_from(buf)?; - let buf = &buf[MSG_HEADER_SIZE..]; - - match hdr.opcode { - FsCacheOpCode::Open => { - let msg = FsCacheMsgOpen::try_from(buf)?; - self.handle_open_request(&hdr, &msg); - } - FsCacheOpCode::Close => { - self.handle_close_request(&hdr); - } - FsCacheOpCode::Read => { - let msg = FsCacheMsgRead::try_from(buf)?; - self.handle_read_request(&hdr, &msg); - } - } - - Ok(()) - } - - fn handle_open_request(&self, hdr: &FsCacheMsgHeader, msg: &FsCacheMsgOpen) { - // Drop the 'erofs,' prefix if any - let domain_id = msg - .volume_key - .strip_prefix("erofs,") - .unwrap_or(msg.volume_key.as_str()); - - let key = generate_blob_key(domain_id, &msg.cookie_key); - match self.get_config(&key) { - None => { - unsafe { libc::close(msg.fd as i32) }; - self.reply(&format!("copen {},{}", hdr.msg_id, -libc::ENOENT)); - } - Some(cfg) => match cfg { - BlobConfig::DataBlob(config) => { - let reply = self.handle_open_data_blob(hdr, msg, config); - self.reply(&reply); - } - BlobConfig::MetaBlob(config) => { - self.handle_open_bootstrap(hdr, msg, config); - } - }, - } - } - - fn handle_open_data_blob( - &self, - hdr: &FsCacheMsgHeader, - msg: &FsCacheMsgOpen, - config: Arc, - ) -> String { - let mut state = self.state.lock().unwrap(); - if let Vacant(e) = state.id_to_object_map.entry(hdr.object_id) { - let fsblob = Arc::new(RwLock::new(FsCacheBlobCache { - cache: None, - config: config.clone(), - file: Arc::new(unsafe { File::from_raw_fd(msg.fd as RawFd) }), - })); - e.insert((FsCacheObject::DataBlob(fsblob.clone()), msg.fd)); - state.id_to_config_map.insert(hdr.object_id, config.clone()); - let blob_size = config.blob_info().deref().uncompressed_size(); - let barrier = Arc::new(Barrier::new(2)); - Self::init_blob_cache(fsblob, barrier.clone()); - // make sure that the blobcache init thread have gotten writer lock before user daemon - // receives first request. - barrier.wait(); - format!("copen {},{}", hdr.msg_id, blob_size) - } else { - unsafe { libc::close(msg.fd as i32) }; - format!("copen {},{}", hdr.msg_id, -libc::EALREADY) - } - } - - fn init_blob_cache(fsblob: Arc>, barrier: Arc) { - thread::spawn(move || { - let mut guard = fsblob.write().unwrap(); - barrier.wait(); - //for now FsCacheBlobCache only init once, should not have blobcache associated with it - assert!(guard.get_blob_cache().is_none()); - for _ in 0..BLOB_CACHE_INIT_RETRY { - match Self::create_data_blob_object(&guard.config, guard.file.clone()) { - Err(e) => { - warn!("fscache: create_data_blob_object failed {}", e); - thread::sleep(time::Duration::from_millis(BLOB_CACHE_INIT_INTERVAL_MS)); - } - Ok(blob) => { - guard.set_blob_cache(Some(blob.clone())); - if let Err(e) = Self::do_prefetch(&guard.config, blob.clone()) { - warn!( - "fscache: failed to prefetch data for blob {}, {}", - blob.blob_id(), - e - ); - } - break; - } - } - } - }); - } - - fn do_prefetch(cfg: &DataBlobConfig, blob: Arc) -> Result<()> { - let blob_info = cfg.blob_info().deref(); - let cache_cfg = cfg.config_v2().get_cache_config()?; - if !cache_cfg.prefetch.enable { - return Ok(()); - } - blob.start_prefetch() - .map_err(|e| eother!(format!("failed to start prefetch worker, {}", e)))?; - - let size = match cache_cfg.prefetch.batch_size.checked_next_power_of_two() { - None => nydus_api::default_prefetch_batch_size() as u64, - Some(1) => nydus_api::default_prefetch_batch_size() as u64, - Some(s) => s as u64, - }; - let size = std::cmp::max(0x4_0000u64, size); - let blob_size = blob_info.compressed_data_size(); - let count = (blob_size + size - 1) / size; - let mut blob_req = Vec::with_capacity(count as usize); - let mut pre_offset = 0u64; - for _i in 0..count { - blob_req.push(BlobPrefetchRequest { - blob_id: blob_info.blob_id().to_owned(), - offset: pre_offset, - len: cmp::min(size, blob_size - pre_offset), - }); - pre_offset += size; - if pre_offset >= blob_size { - break; - } - } - - let id = blob.blob_id(); - info!("fscache: start to prefetch data for blob {}", id); - if let Err(e) = blob.prefetch(blob.clone(), &blob_req, &[]) { - warn!("fscache: failed to prefetch data for blob {}, {}", id, e); - } - - Ok(()) - } - - /// The `fscache` factory essentially creates a namespace for blob objects cached by the - /// fscache subsystem. The data blob files will be managed the in kernel fscache driver, - /// the chunk map file will be managed by the userspace daemon. We need to figure out the - /// way to share blob/chunkamp files with filecache manager. - fn create_data_blob_object( - config: &DataBlobConfig, - file: Arc, - ) -> Result> { - let mut blob_info = config.blob_info().deref().clone(); - blob_info.set_fscache_file(Some(file)); - let blob_ref = Arc::new(blob_info); - BLOB_FACTORY.new_blob_cache(config.config_v2(), &blob_ref) - } - - fn fill_bootstrap_cache(bootstrap: Arc) -> Result { - // Safe because bootstrap.bootstrap_file/cache_file are valid. - let mut src = unsafe { File::from_raw_fd(bootstrap.bootstrap_file.as_raw_fd()) }; - let mut dst = unsafe { File::from_raw_fd(bootstrap.cache_file.as_raw_fd()) }; - let ret = copy(&mut src, &mut dst); - std::mem::forget(src); - std::mem::forget(dst); - ret.map_err(|e| { - warn!("failed to copy content from bootstap into cache fd, {}", e); - e - }) - } - - fn handle_open_bootstrap( - &self, - hdr: &FsCacheMsgHeader, - msg: &FsCacheMsgOpen, - config: Arc, - ) { - let path = config.path().display(); - let condvar = Arc::new((Mutex::new(false), Condvar::new())); - let condvar2 = condvar.clone(); - let mut state = self.get_state(); - - let ret: i64 = if let Vacant(e) = state.id_to_object_map.entry(hdr.object_id) { - match OpenOptions::new().read(true).open(config.path()) { - Err(e) => { - warn!("fscache: failed to open bootstrap file {}, {}", path, e); - -libc::ENOENT as i64 - } - Ok(f) => match f.metadata() { - Err(e) => { - warn!("fscache: failed to open bootstrap file {}, {}", path, e); - -libc::ENOENT as i64 - } - Ok(md) => { - let cache_file = unsafe { File::from_raw_fd(msg.fd as RawFd) }; - let bootstrap = Arc::new(FsCacheBootstrap { - bootstrap_file: f, - cache_file, - }); - let object = FsCacheObject::Bootstrap(bootstrap.clone()); - e.insert((object, msg.fd)); - ASYNC_RUNTIME.spawn_blocking(|| async move { - // Ensure copen reply message has been sent to kernel. - { - let (m, c) = condvar.as_ref(); - let mut g = m.lock().unwrap(); - while !*g { - g = c.wait(g).unwrap(); - } - } - - for _i in 0..3 { - if Self::fill_bootstrap_cache(bootstrap.clone()).is_ok() { - break; - } - tokio::time::sleep(time::Duration::from_secs(2)).await; - } - }); - md.len() as i64 - } - }, - } - } else { - -libc::EALREADY as i64 - }; - - if ret < 0 { - unsafe { libc::close(msg.fd as i32) }; - } - self.reply(&format!("copen {},{}", hdr.msg_id, ret)); - if ret >= 0 { - let (m, c) = condvar2.as_ref(); - *m.lock().unwrap() = true; - c.notify_one(); - } - } - - fn handle_close_request(&self, hdr: &FsCacheMsgHeader) { - let mut state = self.get_state(); - - if let Some((FsCacheObject::DataBlob(fsblob), _)) = - state.id_to_object_map.remove(&hdr.object_id) - { - // Safe to unwrap() because `id_to_config_map` and `id_to_object_map` is kept - // in consistence. - let config = state.id_to_config_map.remove(&hdr.object_id).unwrap(); - let factory_config = config.config_v2(); - let guard = fsblob.read().unwrap(); - match guard.get_blob_cache() { - Some(blob) => { - if let Ok(cache_cfg) = factory_config.get_cache_config() { - if cache_cfg.prefetch.enable { - let _ = blob.stop_prefetch(); - } - } - let id = blob.blob_id().to_string(); - drop(blob); - BLOB_FACTORY.gc(Some((factory_config, &id))); - } - _ => warn!("fscache: blob object not ready {}", hdr.object_id), - } - } - } - - fn handle_read_request(&self, hdr: &FsCacheMsgHeader, msg: &FsCacheMsgRead) { - let fd: u32; - - match self.get_object(hdr.object_id) { - None => { - warn!( - "fscache: no cached file object found for obj_id {}", - hdr.object_id - ); - return; - } - Some((FsCacheObject::DataBlob(fsblob), u)) => { - fd = u; - let guard = fsblob.read().unwrap(); - match guard.get_blob_cache() { - Some(blob) => match blob.get_blob_object() { - None => { - warn!("fscache: internal error: cached object is not BlobCache objects") - } - Some(obj) => { - if let Err(e) = obj.fetch_range_uncompressed(msg.off, msg.len) { - error!("fscache: failed to read data from blob object: {}", e,); - } - } - }, - _ => { - //TODO: maybe we should retry init blob object here - warn!("fscache: blob object not ready"); - } - } - } - Some((FsCacheObject::Bootstrap(bs), u)) => { - // TODO: should we feed the bootstrap at together to improve performance? - fd = u; - let base = unsafe { - libc::mmap( - std::ptr::null_mut(), - msg.len as usize, - libc::PROT_READ, - libc::MAP_SHARED, - bs.bootstrap_file.as_raw_fd(), - msg.off as libc::off_t, - ) - }; - if base == libc::MAP_FAILED { - warn!( - "fscache: failed to mmap bootstrap file, {}", - std::io::Error::last_os_error() - ); - } else { - let ret = unsafe { - libc::pwrite( - bs.cache_file.as_raw_fd(), - base, - msg.len as usize, - msg.off as libc::off_t, - ) - }; - let _ = unsafe { libc::munmap(base, msg.len as usize) }; - if ret < 0 { - warn!( - "fscache: failed to write bootstrap blob data to cached file, {}", - std::io::Error::last_os_error() - ); - } - } - } - } - - if let Err(e) = unsafe { fscache_cread(fd as i32, hdr.msg_id as u64) } { - warn!("failed to send reply for cread request, {}", e); - } - } - - /// Reclaim unused facache objects. - pub fn cull_cache(&self, blob_id: String) -> Result<()> { - let children = fs::read_dir(self.cache_dir.clone())?; - let mut res = true; - // This is safe, because only api server which is a single thread server will call this func, - // and no other func will change cwd. - let cwd_old = env::current_dir()?; - - info!("try to cull blob {}", blob_id); - - // calc blob path in all volumes then try to cull them - for child in children { - let child = child?; - let path = child.path(); - let file_name = match child.file_name().to_str() { - Some(n) => n.to_string(), - None => { - warn!("failed to get file name of {}", child.path().display()); - continue; - } - }; - if !path.is_dir() || !file_name.starts_with("Ierofs,") { - continue; - } - - // get volume_key form volume dir name e.g. Ierofs,SharedDomain - let volume_key = &file_name[1..]; - let (cookie_dir, cookie_name) = self.generate_cookie_path(&path, volume_key, &blob_id); - let cookie_path = cookie_dir.join(&cookie_name); - if !cookie_path.is_file() { - continue; - } - let cookie_path = cookie_path.display(); - - match self.inuse(&cookie_dir, &cookie_name) { - Err(e) => { - warn!("blob {} call inuse err {}, cull failed!", cookie_path, e); - res = false; - } - Ok(true) => { - warn!("blob {} in use, skip!", cookie_path); - res = false; - } - Ok(false) => { - if let Err(e) = self.cull(&cookie_dir, &cookie_name) { - warn!("blob {} call cull err {}, cull failed!", cookie_path, e); - res = false; - } - } - } - } - - env::set_current_dir(cwd_old)?; - if res { - Ok(()) - } else { - Err(eother!("failed to cull blob objects from fscache")) - } - } - - #[inline] - fn hash_32(&self, val: u32) -> u32 { - val * 0x61C88647 - } - - #[inline] - fn rol32(&self, word: u32, shift: i32) -> u32 { - word << (shift & 31) | (word >> ((-shift) & 31)) - } - - #[inline] - fn round_up_u32(&self, size: usize) -> usize { - (size + 3) / 4 * 4 - } - - //address from kernel fscache_hash() - fn fscache_hash(&self, salt: u32, data: &[u8]) -> u32 { - assert_eq!(data.len() % 4, 0); - - let mut x = 0; - let mut y = salt; - let mut buf_le32: [u8; 4] = [0; 4]; - let n = data.len() / 4; - - for i in 0..n { - buf_le32.clone_from_slice(&data[i * 4..i * 4 + 4]); - let a = unsafe { std::mem::transmute::<[u8; 4], u32>(buf_le32) }.to_le(); - x ^= a; - y ^= x; - x = self.rol32(x, 7); - x += y; - y = self.rol32(y, 20); - y *= 9; - } - self.hash_32(y ^ self.hash_32(x)) - } - - fn generate_cookie_path( - &self, - volume_path: &Path, - volume_key: &str, - cookie_key: &str, - ) -> (PathBuf, String) { - //calc volume hash - let mut volume_hash_key: Vec = - Vec::with_capacity(self.round_up_u32(volume_key.len() + 2)); - volume_hash_key.push(volume_key.len() as u8); - volume_hash_key.append(&mut volume_key.as_bytes().to_vec()); - volume_hash_key.resize(volume_hash_key.capacity(), 0); - let volume_hash = self.fscache_hash(0, volume_hash_key.as_slice()); - - //calc cookie hash - let mut cookie_hash_key: Vec = Vec::with_capacity(self.round_up_u32(cookie_key.len())); - cookie_hash_key.append(&mut cookie_key.as_bytes().to_vec()); - cookie_hash_key.resize(cookie_hash_key.capacity(), 0); - let dir_hash = self.fscache_hash(volume_hash, cookie_hash_key.as_slice()); - - let dir = format!("@{:02x}", dir_hash as u8); - let cookie = format!("D{}", cookie_key); - (volume_path.join(dir), cookie) - } - - fn inuse(&self, cookie_dir: &Path, cookie_name: &str) -> Result { - env::set_current_dir(cookie_dir)?; - let msg = format!("inuse {}", cookie_name); - let ret = unsafe { - libc::write( - self.file.as_raw_fd(), - msg.as_bytes().as_ptr() as *const u8 as *const libc::c_void, - msg.len(), - ) - }; - if ret < 0 { - let err = Error::last_os_error(); - if let Some(e) = err.raw_os_error() { - if e == libc::EBUSY { - return Ok(true); - } - } - Err(err) - } else { - Ok(false) - } - } - - fn cull(&self, cookie_dir: &Path, cookie_name: &str) -> Result<()> { - env::set_current_dir(cookie_dir)?; - let msg = format!("cull {}", cookie_name); - let ret = unsafe { - libc::write( - self.file.as_raw_fd(), - msg.as_bytes().as_ptr() as *const u8 as *const libc::c_void, - msg.len(), - ) - }; - if ret as usize != msg.len() { - Err(Error::last_os_error()) - } else { - Ok(()) - } - } - - #[inline] - fn reply(&self, result: &str) { - // Safe because the fd and data buffer are valid. And we trust the fscache driver which - // will never return error for write operations. - let ret = unsafe { - libc::write( - self.file.as_raw_fd(), - result.as_bytes().as_ptr() as *const u8 as *const libc::c_void, - result.len(), - ) - }; - if ret as usize != result.len() { - warn!( - "fscache: failed to send reply \"{}\", {}", - result, - std::io::Error::last_os_error() - ); - } - } - - #[inline] - fn get_state(&self) -> MutexGuard { - self.state.lock().unwrap() - } - - #[inline] - fn get_object(&self, object_id: u32) -> Option<(FsCacheObject, u32)> { - self.get_state().id_to_object_map.get(&object_id).cloned() - } - - #[inline] - fn get_config(&self, key: &str) -> Option { - self.get_state().blob_cache_mgr.get_config(key) - } -} - -impl AsRawFd for FsCacheHandler { - fn as_raw_fd(&self) -> RawFd { - self.file.as_raw_fd() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_op_code() { - assert_eq!(FsCacheOpCode::try_from(0).unwrap(), FsCacheOpCode::Open); - assert_eq!(FsCacheOpCode::try_from(1).unwrap(), FsCacheOpCode::Close); - assert_eq!(FsCacheOpCode::try_from(2).unwrap(), FsCacheOpCode::Read); - FsCacheOpCode::try_from(3).unwrap_err(); - } - - #[test] - fn test_msg_header() { - let hdr = FsCacheMsgHeader::try_from( - vec![1u8, 0, 0, 0, 2, 0, 0, 0, 17, 0, 0, 0, 2u8, 0, 0, 0, 0].as_slice(), - ) - .unwrap(); - assert_eq!(hdr.msg_id, 0x1); - assert_eq!(hdr.opcode, FsCacheOpCode::Read); - assert_eq!(hdr.len, 17); - assert_eq!(hdr.object_id, 0x2); - - FsCacheMsgHeader::try_from(vec![0u8, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 13, 0].as_slice()) - .unwrap_err(); - FsCacheMsgHeader::try_from(vec![0u8, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 13].as_slice()) - .unwrap_err(); - FsCacheMsgHeader::try_from(vec![0u8, 0, 0, 1, 0, 0, 0, 2, 0, 0].as_slice()).unwrap_err(); - FsCacheMsgHeader::try_from(vec![].as_slice()).unwrap_err(); - } - - #[test] - fn test_fs_cache_msg_open_try_from() { - // request message size too small - assert!(FsCacheMsgOpen::try_from( - vec![1u8, 0, 0, 0, 2, 0, 0, 0, 17, 0, 0, 0, 2u8, 0, 0].as_slice() - ) - .is_err()); - - // volume key size or cookie key size too large - assert!(FsCacheMsgOpen::try_from( - vec![255u8, 127, 127, 127, 255, 127, 127, 255, 17, 0, 0, 0, 2u8, 0, 0, 0, 4u8, 0, 0, 0] - .as_slice() - ) - .is_err()); - assert!(FsCacheMsgOpen::try_from( - vec![ - 255u8, 127, 127, 127, 241u8, 127, 128, 128, 17, 0, 0, 0, 2u8, 0, 0, 0, 4u8, 0, 0, - 0, - ] - .as_slice() - ) - .is_err()); - - // value size too small - assert!(FsCacheMsgOpen::try_from( - vec![1u8, 0, 0, 0, 2, 0, 0, 0, 17, 0, 0, 0, 2u8, 0, 0, 0, 0].as_slice() - ) - .is_err()); - - let res = FsCacheMsgOpen::try_from( - vec![ - 1u8, 0, 0, 0, 2, 0, 0, 0, 17, 0, 0, 0, 2u8, 0, 0, 0, 4u8, 0, 0, 0, - ] - .as_slice(), - ); - assert!(res.is_ok()); - assert_eq!( - res.unwrap(), - FsCacheMsgOpen { - volume_key: String::from("\u{4}"), - cookie_key: String::from("\0\0"), - fd: 17, - flags: 2 - } - ); - } - - #[test] - fn test_fs_cache_msg_read_try_from() { - assert!(FsCacheMsgRead::try_from( - vec![1u8, 0, 0, 0, 2, 0, 0, 0, 17, 0, 0, 0, 2u8, 0, 0].as_slice() - ) - .is_err()); - - let res = FsCacheMsgRead::try_from( - vec![1u8, 0, 0, 0, 2, 0, 0, 0, 17, 0, 0, 0, 2u8, 0, 0, 0].as_slice(), - ); - assert!(res.is_ok()); - assert_eq!( - res.unwrap(), - FsCacheMsgRead { - off: 8589934593, - len: 8589934609, - } - ); - } -} +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: (Apache-2.0 AND BSD-3-Clause) + +//! Handler to expose RAFSv6 image through EROFS/fscache. +//! +//! The [`FsCacheHandler`] is the inter-connection between in kernel EROFS/fscache drivers +//! and the user space [BlobCacheMgr](https://docs.rs/nydus-service/latest/nydus_service/blob_cache/struct.BlobCacheMgr.html). +//! The workflow is as below: +//! - EROFS presents a filesystem structure by parsing a RAFS image metadata blob. +//! - EROFS sends requests to the fscache subsystem when user reads data from files. +//! - Fscache subsystem send requests to [FsCacheHandler] if the requested data has been cached yet. +//! - [FsCacheHandler] reads blob data from the [BlobCacheMgr] and sends back reply messages. + +use std::collections::hash_map::Entry::Vacant; +use std::collections::HashMap; +use std::convert::TryFrom; +use std::fs::{self, File, OpenOptions}; +use std::io::{copy, Error, ErrorKind, Result, Write}; +use std::ops::Deref; +use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; +use std::path::{Path, PathBuf}; +use std::ptr::read_unaligned; +use std::string::String; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::{Arc, Barrier, Condvar, Mutex, MutexGuard, RwLock}; +use std::{cmp, env, thread, time}; + +use mio::unix::SourceFd; +use mio::{Events, Interest, Poll, Token, Waker}; +use nydus_storage::cache::BlobCache; +use nydus_storage::device::BlobPrefetchRequest; +use nydus_storage::factory::{ASYNC_RUNTIME, BLOB_FACTORY}; + +use crate::blob_cache::{ + generate_blob_key, BlobCacheMgr, BlobConfig, DataBlobConfig, MetaBlobConfig, +}; + +nix::ioctl_write_int!(fscache_cread, 0x98, 1); + +/// Maximum size of fscache request message from kernel. +const MIN_DATA_BUF_SIZE: usize = 1024; +const MSG_HEADER_SIZE: usize = 16; +const MSG_OPEN_SIZE: usize = 16; +const MSG_READ_SIZE: usize = 16; + +const TOKEN_EVENT_WAKER: usize = 1; +const TOKEN_EVENT_FSCACHE: usize = 2; + +const BLOB_CACHE_INIT_RETRY: u8 = 5; +const BLOB_CACHE_INIT_INTERVAL_MS: u64 = 300; + +/// Command code in requests from fscache driver. +#[repr(u32)] +#[derive(Debug, Eq, PartialEq)] +enum FsCacheOpCode { + Open = 0, + Close = 1, + Read = 2, +} + +impl TryFrom for FsCacheOpCode { + type Error = Error; + + fn try_from(value: u32) -> std::result::Result { + match value { + 0 => Ok(FsCacheOpCode::Open), + 1 => Ok(FsCacheOpCode::Close), + 2 => Ok(FsCacheOpCode::Read), + _ => Err(einval!(format!( + "fscache: invalid operation code {}", + value + ))), + } + } +} + +/// Common header for request messages. +#[repr(C)] +#[derive(Debug, Eq, PartialEq)] +struct FsCacheMsgHeader { + /// Message identifier to associate reply with request by the fscache driver. + msg_id: u32, + /// Message operation code. + opcode: FsCacheOpCode, + /// Message length, including message header and message body. + len: u32, + /// A unique ID identifying the cache file operated on. + object_id: u32, +} + +impl TryFrom<&[u8]> for FsCacheMsgHeader { + type Error = Error; + + fn try_from(value: &[u8]) -> std::result::Result { + if value.len() < MSG_HEADER_SIZE { + return Err(einval!(format!( + "fscache: request message size is too small, {}", + value.len() + ))); + } + + // Safe because we have verified buffer size. + let msg_id = unsafe { read_unaligned(value[0..4].as_ptr() as *const u32) }; + let opcode = unsafe { read_unaligned(value[4..8].as_ptr() as *const u32) }; + let len = unsafe { read_unaligned(value[8..12].as_ptr() as *const u32) }; + let opcode = FsCacheOpCode::try_from(opcode)?; + let object_id = unsafe { read_unaligned(value[12..16].as_ptr() as *const u32) }; + if len as usize != value.len() { + return Err(einval!(format!( + "fscache: message length {} does not match length from message header {}", + value.len(), + len + ))); + } + + Ok(FsCacheMsgHeader { + msg_id, + opcode, + len, + object_id, + }) + } +} + +/// Request message to open a file. +/// +/// The opened file should be kept valid until corresponding `CLOSE` message has been received +/// from the fscache driver. +#[derive(Default, Debug, Eq, PartialEq)] +struct FsCacheMsgOpen { + volume_key: String, + cookie_key: String, + fd: u32, + flags: u32, +} + +impl TryFrom<&[u8]> for FsCacheMsgOpen { + type Error = Error; + + fn try_from(value: &[u8]) -> std::result::Result { + if value.len() < MSG_OPEN_SIZE { + return Err(einval!(format!( + "fscache: request message size is too small, {}", + value.len() + ))); + } + + // Safe because we have verified buffer size. + let volume_key_size = unsafe { read_unaligned(value[0..4].as_ptr() as *const u32) }; + let cookie_key_size = unsafe { read_unaligned(value[4..8].as_ptr() as *const u32) }; + let fd = unsafe { read_unaligned(value[8..12].as_ptr() as *const u32) }; + let flags = unsafe { read_unaligned(value[12..16].as_ptr() as *const u32) }; + if volume_key_size.checked_add(cookie_key_size).is_none() + || (volume_key_size + cookie_key_size) + .checked_add(MSG_OPEN_SIZE as u32) + .is_none() + { + return Err(einval!( + "fscache: invalid volume/cookie key length in OPEN request" + )); + } + let total_sz = (volume_key_size + cookie_key_size) as usize + MSG_OPEN_SIZE; + if value.len() < total_sz { + return Err(einval!("fscache: invalid message length for OPEN request")); + } + let pos = MSG_OPEN_SIZE + volume_key_size as usize; + let volume_key = String::from_utf8(value[MSG_OPEN_SIZE..pos].to_vec()) + .map_err(|_e| einval!("fscache: invalid volume key in OPEN request"))? + .trim_end_matches('\0') + .to_string(); + let cookie_key = String::from_utf8(value[pos..pos + cookie_key_size as usize].to_vec()) + .map_err(|_e| einval!("fscache: invalid cookie key in OPEN request"))?; + + Ok(FsCacheMsgOpen { + volume_key, + cookie_key, + fd, + flags, + }) + } +} + +/// Request message to feed requested data into the cache file. +#[repr(C)] +#[derive(Default, Debug, Eq, PartialEq)] +struct FsCacheMsgRead { + off: u64, + len: u64, +} + +impl TryFrom<&[u8]> for FsCacheMsgRead { + type Error = Error; + + fn try_from(value: &[u8]) -> std::result::Result { + if value.len() < MSG_READ_SIZE { + return Err(einval!(format!( + "fscache: request message size is too small, {}", + value.len() + ))); + } + + // Safe because we have verified buffer size. + let off = unsafe { read_unaligned(value[0..8].as_ptr() as *const u64) }; + let len = unsafe { read_unaligned(value[8..16].as_ptr() as *const u64) }; + + Ok(FsCacheMsgRead { off, len }) + } +} + +struct FsCacheBootstrap { + bootstrap_file: File, + cache_file: File, +} + +struct FsCacheBlobCache { + cache: Option>, + config: Arc, + file: Arc, +} + +impl FsCacheBlobCache { + fn set_blob_cache(&mut self, cache: Option>) { + self.cache = cache; + } + + fn get_blob_cache(&self) -> Option> { + self.cache.clone() + } +} + +#[derive(Clone)] +enum FsCacheObject { + Bootstrap(Arc), + DataBlob(Arc>), +} + +/// Struct to maintain cached file objects. +#[derive(Default)] +struct FsCacheState { + id_to_object_map: HashMap, + id_to_config_map: HashMap>, + blob_cache_mgr: Arc, +} + +/// Handler to cooperate with Linux fscache driver to manage cached blob objects. +/// +/// The `FsCacheHandler` create a communication channel with the Linux fscache driver, configure +/// the communication session and serves all requests from the fscache driver. +pub struct FsCacheHandler { + active: AtomicBool, + barrier: Barrier, + threads: usize, + file: File, + state: Arc>, + poller: Mutex, + waker: Arc, + cache_dir: PathBuf, +} + +impl FsCacheHandler { + /// Create a new instance of [FsCacheHandler]. + pub fn new( + path: &str, + dir: &str, + tag: Option<&str>, + blob_cache_mgr: Arc, + threads: usize, + restore_file: Option<&File>, + ) -> Result { + info!( + "fscache: create FsCacheHandler with dir {}, tag {}", + dir, + tag.unwrap_or("") + ); + + let mut file = match restore_file { + None => OpenOptions::new() + .write(true) + .read(true) + .create(false) + .open(path) + .map_err(|e| { + error!("Failed to open cachefiles device {}. {}", path, e); + e + })?, + Some(f) => f.try_clone()?, + }; + + let poller = + Poll::new().map_err(|_e| eother!("fscache: failed to create poller for service"))?; + let waker = Waker::new(poller.registry(), Token(TOKEN_EVENT_WAKER)) + .map_err(|_e| eother!("fscache: failed to create waker for service"))?; + poller + .registry() + .register( + &mut SourceFd(&file.as_raw_fd()), + Token(TOKEN_EVENT_FSCACHE), + Interest::READABLE, + ) + .map_err(|_e| eother!("fscache: failed to register fd for service"))?; + + if restore_file.is_none() { + // Initialize the fscache session + file.write_all(format!("dir {}", dir).as_bytes())?; + file.flush()?; + if let Some(tag) = tag { + file.write_all(format!("tag {}", tag).as_bytes())?; + file.flush()?; + } + file.write_all(b"bind ondemand")?; + file.flush()?; + } else { + // send restore cmd, if we are in restore process + file.write_all(b"restore")?; + file.flush()?; + } + + let state = FsCacheState { + id_to_object_map: Default::default(), + id_to_config_map: Default::default(), + blob_cache_mgr, + }; + let cache_dir = PathBuf::new().join(dir).join("cache"); + + Ok(FsCacheHandler { + active: AtomicBool::new(true), + barrier: Barrier::new(threads + 1), + threads, + file, + state: Arc::new(Mutex::new(state)), + poller: Mutex::new(poller), + waker: Arc::new(waker), + cache_dir, + }) + } + + /// Get number of working threads to service fscache requests. + pub fn working_threads(&self) -> usize { + self.threads + } + + /// Stop worker threads for the fscache service. + pub fn stop(&self) { + self.active.store(false, Ordering::Release); + if let Err(e) = self.waker.wake() { + error!("fscache: failed to signal worker thread to exit, {}", e); + } + self.barrier.wait(); + } + + /// Run the event loop to handle all requests from kernel fscache driver. + /// + /// This method should only be invoked by a single thread, which will poll the fscache fd + /// and dispatch requests from fscache fd to other working threads. + pub fn run_loop(&self) -> Result<()> { + let mut events = Events::with_capacity(64); + let mut buf = vec![0u8; MIN_DATA_BUF_SIZE]; + + loop { + match self.poller.lock().unwrap().poll(&mut events, None) { + Ok(_) => {} + Err(e) if e.kind() == ErrorKind::Interrupted => continue, + Err(e) => { + warn!("fscache: failed to poll events"); + return Err(e); + } + } + + for event in events.iter() { + if event.is_error() { + error!("fscache: got error event from poller"); + continue; + } + if event.token() == Token(TOKEN_EVENT_FSCACHE) { + if event.is_readable() { + self.handle_requests(&mut buf)?; + } + } else if event.is_readable() + && event.token() == Token(TOKEN_EVENT_WAKER) + && !self.active.load(Ordering::Acquire) + { + // Notify next worker to exit. + let _ = self.waker.wake(); + self.barrier.wait(); + return Ok(()); + } + } + } + } + + pub fn get_file(&self) -> &File { + &self.file + } + + /// Read and process all requests from fscache driver until no data available. + fn handle_requests(&self, buf: &mut [u8]) -> Result<()> { + loop { + let ret = unsafe { + libc::read( + self.file.as_raw_fd(), + buf.as_ptr() as *mut u8 as *mut libc::c_void, + buf.len(), + ) + }; + match ret { + // A special behavior of old cachefile driver which returns zero if there's no + // pending requests instead of `ErrorKind::WouldBlock`. + 0 => return Ok(()), + _i if _i > 0 => self.handle_one_request(&buf[0..ret as usize])?, + _ => { + let err = Error::last_os_error(); + match err.kind() { + ErrorKind::Interrupted => continue, + ErrorKind::WouldBlock => return Ok(()), + _ => return Err(err), + } + } + } + } + } + + fn handle_one_request(&self, buf: &[u8]) -> Result<()> { + let hdr = FsCacheMsgHeader::try_from(buf)?; + let buf = &buf[MSG_HEADER_SIZE..]; + + match hdr.opcode { + FsCacheOpCode::Open => { + let msg = FsCacheMsgOpen::try_from(buf)?; + self.handle_open_request(&hdr, &msg); + } + FsCacheOpCode::Close => { + self.handle_close_request(&hdr); + } + FsCacheOpCode::Read => { + let msg = FsCacheMsgRead::try_from(buf)?; + self.handle_read_request(&hdr, &msg); + } + } + + Ok(()) + } + + fn handle_open_request(&self, hdr: &FsCacheMsgHeader, msg: &FsCacheMsgOpen) { + // Drop the 'erofs,' prefix if any + let domain_id = msg + .volume_key + .strip_prefix("erofs,") + .unwrap_or(msg.volume_key.as_str()); + + let key = generate_blob_key(domain_id, &msg.cookie_key); + match self.get_config(&key) { + None => { + unsafe { libc::close(msg.fd as i32) }; + self.reply(&format!("copen {},{}", hdr.msg_id, -libc::ENOENT)); + } + Some(cfg) => match cfg { + BlobConfig::DataBlob(config) => { + let reply = self.handle_open_data_blob(hdr, msg, config); + self.reply(&reply); + } + BlobConfig::MetaBlob(config) => { + self.handle_open_bootstrap(hdr, msg, config); + } + }, + } + } + + fn handle_open_data_blob( + &self, + hdr: &FsCacheMsgHeader, + msg: &FsCacheMsgOpen, + config: Arc, + ) -> String { + let mut state = self.state.lock().unwrap(); + if let Vacant(e) = state.id_to_object_map.entry(hdr.object_id) { + let fsblob = Arc::new(RwLock::new(FsCacheBlobCache { + cache: None, + config: config.clone(), + file: Arc::new(unsafe { File::from_raw_fd(msg.fd as RawFd) }), + })); + e.insert((FsCacheObject::DataBlob(fsblob.clone()), msg.fd)); + state.id_to_config_map.insert(hdr.object_id, config.clone()); + let blob_size = config.blob_info().deref().uncompressed_size(); + let barrier = Arc::new(Barrier::new(2)); + Self::init_blob_cache(fsblob, barrier.clone()); + // make sure that the blobcache init thread have gotten writer lock before user daemon + // receives first request. + barrier.wait(); + format!("copen {},{}", hdr.msg_id, blob_size) + } else { + unsafe { libc::close(msg.fd as i32) }; + format!("copen {},{}", hdr.msg_id, -libc::EALREADY) + } + } + + fn init_blob_cache(fsblob: Arc>, barrier: Arc) { + thread::spawn(move || { + let mut guard = fsblob.write().unwrap(); + barrier.wait(); + //for now FsCacheBlobCache only init once, should not have blobcache associated with it + assert!(guard.get_blob_cache().is_none()); + for _ in 0..BLOB_CACHE_INIT_RETRY { + match Self::create_data_blob_object(&guard.config, guard.file.clone()) { + Err(e) => { + warn!("fscache: create_data_blob_object failed {}", e); + thread::sleep(time::Duration::from_millis(BLOB_CACHE_INIT_INTERVAL_MS)); + } + Ok(blob) => { + guard.set_blob_cache(Some(blob.clone())); + if let Err(e) = Self::do_prefetch(&guard.config, blob.clone()) { + warn!( + "fscache: failed to prefetch data for blob {}, {}", + blob.blob_id(), + e + ); + } + break; + } + } + } + }); + } + + fn do_prefetch(cfg: &DataBlobConfig, blob: Arc) -> Result<()> { + let blob_info = cfg.blob_info().deref(); + let cache_cfg = cfg.config_v2().get_cache_config()?; + if !cache_cfg.prefetch.enable { + return Ok(()); + } + blob.start_prefetch() + .map_err(|e| eother!(format!("failed to start prefetch worker, {}", e)))?; + + let size = match cache_cfg.prefetch.batch_size.checked_next_power_of_two() { + None => nydus_api::default_prefetch_batch_size() as u64, + Some(1) => nydus_api::default_prefetch_batch_size() as u64, + Some(s) => s as u64, + }; + let size = std::cmp::max(0x4_0000u64, size); + let blob_size = blob_info.compressed_data_size(); + let count = (blob_size + size - 1) / size; + let mut blob_req = Vec::with_capacity(count as usize); + let mut pre_offset = 0u64; + for _i in 0..count { + blob_req.push(BlobPrefetchRequest { + blob_id: blob_info.blob_id().to_owned(), + offset: pre_offset, + len: cmp::min(size, blob_size - pre_offset), + }); + pre_offset += size; + if pre_offset >= blob_size { + break; + } + } + + let id = blob.blob_id(); + info!("fscache: start to prefetch data for blob {}", id); + if let Err(e) = blob.prefetch(blob.clone(), &blob_req, &[]) { + warn!("fscache: failed to prefetch data for blob {}, {}", id, e); + } + + Ok(()) + } + + /// The `fscache` factory essentially creates a namespace for blob objects cached by the + /// fscache subsystem. The data blob files will be managed the in kernel fscache driver, + /// the chunk map file will be managed by the userspace daemon. We need to figure out the + /// way to share blob/chunkamp files with filecache manager. + fn create_data_blob_object( + config: &DataBlobConfig, + file: Arc, + ) -> Result> { + let mut blob_info = config.blob_info().deref().clone(); + blob_info.set_fscache_file(Some(file)); + let blob_ref = Arc::new(blob_info); + BLOB_FACTORY.new_blob_cache(config.config_v2(), &blob_ref) + } + + fn fill_bootstrap_cache(bootstrap: Arc) -> Result { + // Safe because bootstrap.bootstrap_file/cache_file are valid. + let mut src = unsafe { File::from_raw_fd(bootstrap.bootstrap_file.as_raw_fd()) }; + let mut dst = unsafe { File::from_raw_fd(bootstrap.cache_file.as_raw_fd()) }; + let ret = copy(&mut src, &mut dst); + std::mem::forget(src); + std::mem::forget(dst); + ret.map_err(|e| { + warn!("failed to copy content from bootstap into cache fd, {}", e); + e + }) + } + + fn handle_open_bootstrap( + &self, + hdr: &FsCacheMsgHeader, + msg: &FsCacheMsgOpen, + config: Arc, + ) { + let path = config.path().display(); + let condvar = Arc::new((Mutex::new(false), Condvar::new())); + let condvar2 = condvar.clone(); + let mut state = self.get_state(); + + let ret: i64 = if let Vacant(e) = state.id_to_object_map.entry(hdr.object_id) { + match OpenOptions::new().read(true).open(config.path()) { + Err(e) => { + warn!("fscache: failed to open bootstrap file {}, {}", path, e); + -libc::ENOENT as i64 + } + Ok(f) => match f.metadata() { + Err(e) => { + warn!("fscache: failed to open bootstrap file {}, {}", path, e); + -libc::ENOENT as i64 + } + Ok(md) => { + let cache_file = unsafe { File::from_raw_fd(msg.fd as RawFd) }; + let bootstrap = Arc::new(FsCacheBootstrap { + bootstrap_file: f, + cache_file, + }); + let object = FsCacheObject::Bootstrap(bootstrap.clone()); + e.insert((object, msg.fd)); + ASYNC_RUNTIME.spawn_blocking(|| async move { + // Ensure copen reply message has been sent to kernel. + { + let (m, c) = condvar.as_ref(); + let mut g = m.lock().unwrap(); + while !*g { + g = c.wait(g).unwrap(); + } + } + + for _i in 0..3 { + if Self::fill_bootstrap_cache(bootstrap.clone()).is_ok() { + break; + } + tokio::time::sleep(time::Duration::from_secs(2)).await; + } + }); + md.len() as i64 + } + }, + } + } else { + -libc::EALREADY as i64 + }; + + if ret < 0 { + unsafe { libc::close(msg.fd as i32) }; + } + self.reply(&format!("copen {},{}", hdr.msg_id, ret)); + if ret >= 0 { + let (m, c) = condvar2.as_ref(); + *m.lock().unwrap() = true; + c.notify_one(); + } + } + + fn handle_close_request(&self, hdr: &FsCacheMsgHeader) { + let mut state = self.get_state(); + + if let Some((FsCacheObject::DataBlob(fsblob), _)) = + state.id_to_object_map.remove(&hdr.object_id) + { + // Safe to unwrap() because `id_to_config_map` and `id_to_object_map` is kept + // in consistence. + let config = state.id_to_config_map.remove(&hdr.object_id).unwrap(); + let factory_config = config.config_v2(); + let guard = fsblob.read().unwrap(); + match guard.get_blob_cache() { + Some(blob) => { + if let Ok(cache_cfg) = factory_config.get_cache_config() { + if cache_cfg.prefetch.enable { + let _ = blob.stop_prefetch(); + } + } + let id = blob.blob_id().to_string(); + drop(blob); + BLOB_FACTORY.gc(Some((factory_config, &id))); + } + _ => warn!("fscache: blob object not ready {}", hdr.object_id), + } + } + } + + fn handle_read_request(&self, hdr: &FsCacheMsgHeader, msg: &FsCacheMsgRead) { + let fd: u32; + + match self.get_object(hdr.object_id) { + None => { + warn!( + "fscache: no cached file object found for obj_id {}", + hdr.object_id + ); + return; + } + Some((FsCacheObject::DataBlob(fsblob), u)) => { + fd = u; + let guard = fsblob.read().unwrap(); + match guard.get_blob_cache() { + Some(blob) => match blob.get_blob_object() { + None => { + warn!("fscache: internal error: cached object is not BlobCache objects") + } + Some(obj) => { + if let Err(e) = obj.fetch_range_uncompressed(msg.off, msg.len) { + error!("fscache: failed to read data from blob object: {}", e,); + } + } + }, + _ => { + //TODO: maybe we should retry init blob object here + warn!("fscache: blob object not ready"); + } + } + } + Some((FsCacheObject::Bootstrap(bs), u)) => { + // TODO: should we feed the bootstrap at together to improve performance? + fd = u; + let base = unsafe { + libc::mmap( + std::ptr::null_mut(), + msg.len as usize, + libc::PROT_READ, + libc::MAP_SHARED, + bs.bootstrap_file.as_raw_fd(), + msg.off as libc::off_t, + ) + }; + if base == libc::MAP_FAILED { + warn!( + "fscache: failed to mmap bootstrap file, {}", + std::io::Error::last_os_error() + ); + } else { + let ret = unsafe { + libc::pwrite( + bs.cache_file.as_raw_fd(), + base, + msg.len as usize, + msg.off as libc::off_t, + ) + }; + let _ = unsafe { libc::munmap(base, msg.len as usize) }; + if ret < 0 { + warn!( + "fscache: failed to write bootstrap blob data to cached file, {}", + std::io::Error::last_os_error() + ); + } + } + } + } + + if let Err(e) = unsafe { fscache_cread(fd as i32, hdr.msg_id as u64) } { + warn!("failed to send reply for cread request, {}", e); + } + } + + /// Reclaim unused facache objects. + pub fn cull_cache(&self, blob_id: String) -> Result<()> { + let children = fs::read_dir(self.cache_dir.clone())?; + let mut res = true; + // This is safe, because only api server which is a single thread server will call this func, + // and no other func will change cwd. + let cwd_old = env::current_dir()?; + + info!("try to cull blob {}", blob_id); + + // calc blob path in all volumes then try to cull them + for child in children { + let child = child?; + let path = child.path(); + let file_name = match child.file_name().to_str() { + Some(n) => n.to_string(), + None => { + warn!("failed to get file name of {}", child.path().display()); + continue; + } + }; + if !path.is_dir() || !file_name.starts_with("Ierofs,") { + continue; + } + + // get volume_key form volume dir name e.g. Ierofs,SharedDomain + let volume_key = &file_name[1..]; + let (cookie_dir, cookie_name) = self.generate_cookie_path(&path, volume_key, &blob_id); + let cookie_path = cookie_dir.join(&cookie_name); + if !cookie_path.is_file() { + continue; + } + let cookie_path = cookie_path.display(); + + match self.inuse(&cookie_dir, &cookie_name) { + Err(e) => { + warn!("blob {} call inuse err {}, cull failed!", cookie_path, e); + res = false; + } + Ok(true) => { + warn!("blob {} in use, skip!", cookie_path); + res = false; + } + Ok(false) => { + if let Err(e) = self.cull(&cookie_dir, &cookie_name) { + warn!("blob {} call cull err {}, cull failed!", cookie_path, e); + res = false; + } + } + } + } + + env::set_current_dir(cwd_old)?; + if res { + Ok(()) + } else { + Err(eother!("failed to cull blob objects from fscache")) + } + } + + #[inline] + fn hash_32(&self, val: u32) -> u32 { + val * 0x61C88647 + } + + #[inline] + fn rol32(&self, word: u32, shift: i32) -> u32 { + word << (shift & 31) | (word >> ((-shift) & 31)) + } + + #[inline] + fn round_up_u32(&self, size: usize) -> usize { + (size + 3) / 4 * 4 + } + + //address from kernel fscache_hash() + fn fscache_hash(&self, salt: u32, data: &[u8]) -> u32 { + assert_eq!(data.len() % 4, 0); + + let mut x = 0; + let mut y = salt; + let mut buf_le32: [u8; 4] = [0; 4]; + let n = data.len() / 4; + + for i in 0..n { + buf_le32.clone_from_slice(&data[i * 4..i * 4 + 4]); + let a = unsafe { std::mem::transmute::<[u8; 4], u32>(buf_le32) }.to_le(); + x ^= a; + y ^= x; + x = self.rol32(x, 7); + x += y; + y = self.rol32(y, 20); + y *= 9; + } + self.hash_32(y ^ self.hash_32(x)) + } + + fn generate_cookie_path( + &self, + volume_path: &Path, + volume_key: &str, + cookie_key: &str, + ) -> (PathBuf, String) { + //calc volume hash + let mut volume_hash_key: Vec = + Vec::with_capacity(self.round_up_u32(volume_key.len() + 2)); + volume_hash_key.push(volume_key.len() as u8); + volume_hash_key.append(&mut volume_key.as_bytes().to_vec()); + volume_hash_key.resize(volume_hash_key.capacity(), 0); + let volume_hash = self.fscache_hash(0, volume_hash_key.as_slice()); + + //calc cookie hash + let mut cookie_hash_key: Vec = Vec::with_capacity(self.round_up_u32(cookie_key.len())); + cookie_hash_key.append(&mut cookie_key.as_bytes().to_vec()); + cookie_hash_key.resize(cookie_hash_key.capacity(), 0); + let dir_hash = self.fscache_hash(volume_hash, cookie_hash_key.as_slice()); + + let dir = format!("@{:02x}", dir_hash as u8); + let cookie = format!("D{}", cookie_key); + (volume_path.join(dir), cookie) + } + + fn inuse(&self, cookie_dir: &Path, cookie_name: &str) -> Result { + env::set_current_dir(cookie_dir)?; + let msg = format!("inuse {}", cookie_name); + let ret = unsafe { + libc::write( + self.file.as_raw_fd(), + msg.as_bytes().as_ptr() as *const u8 as *const libc::c_void, + msg.len(), + ) + }; + if ret < 0 { + let err = Error::last_os_error(); + if let Some(e) = err.raw_os_error() { + if e == libc::EBUSY { + return Ok(true); + } + } + Err(err) + } else { + Ok(false) + } + } + + fn cull(&self, cookie_dir: &Path, cookie_name: &str) -> Result<()> { + env::set_current_dir(cookie_dir)?; + let msg = format!("cull {}", cookie_name); + let ret = unsafe { + libc::write( + self.file.as_raw_fd(), + msg.as_bytes().as_ptr() as *const u8 as *const libc::c_void, + msg.len(), + ) + }; + if ret as usize != msg.len() { + Err(Error::last_os_error()) + } else { + Ok(()) + } + } + + #[inline] + fn reply(&self, result: &str) { + // Safe because the fd and data buffer are valid. And we trust the fscache driver which + // will never return error for write operations. + let ret = unsafe { + libc::write( + self.file.as_raw_fd(), + result.as_bytes().as_ptr() as *const u8 as *const libc::c_void, + result.len(), + ) + }; + if ret as usize != result.len() { + warn!( + "fscache: failed to send reply \"{}\", {}", + result, + std::io::Error::last_os_error() + ); + } + } + + #[inline] + fn get_state(&self) -> MutexGuard { + self.state.lock().unwrap() + } + + #[inline] + fn get_object(&self, object_id: u32) -> Option<(FsCacheObject, u32)> { + self.get_state().id_to_object_map.get(&object_id).cloned() + } + + #[inline] + fn get_config(&self, key: &str) -> Option { + self.get_state().blob_cache_mgr.get_config(key) + } +} + +impl AsRawFd for FsCacheHandler { + fn as_raw_fd(&self) -> RawFd { + self.file.as_raw_fd() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_op_code() { + assert_eq!(FsCacheOpCode::try_from(0).unwrap(), FsCacheOpCode::Open); + assert_eq!(FsCacheOpCode::try_from(1).unwrap(), FsCacheOpCode::Close); + assert_eq!(FsCacheOpCode::try_from(2).unwrap(), FsCacheOpCode::Read); + FsCacheOpCode::try_from(3).unwrap_err(); + } + + #[test] + fn test_msg_header() { + let hdr = FsCacheMsgHeader::try_from( + vec![1u8, 0, 0, 0, 2, 0, 0, 0, 17, 0, 0, 0, 2u8, 0, 0, 0, 0].as_slice(), + ) + .unwrap(); + assert_eq!(hdr.msg_id, 0x1); + assert_eq!(hdr.opcode, FsCacheOpCode::Read); + assert_eq!(hdr.len, 17); + assert_eq!(hdr.object_id, 0x2); + + FsCacheMsgHeader::try_from(vec![0u8, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 13, 0].as_slice()) + .unwrap_err(); + FsCacheMsgHeader::try_from(vec![0u8, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 13].as_slice()) + .unwrap_err(); + FsCacheMsgHeader::try_from(vec![0u8, 0, 0, 1, 0, 0, 0, 2, 0, 0].as_slice()).unwrap_err(); + FsCacheMsgHeader::try_from(vec![].as_slice()).unwrap_err(); + } + + #[test] + fn test_fs_cache_msg_open_try_from() { + // request message size too small + assert!(FsCacheMsgOpen::try_from( + vec![1u8, 0, 0, 0, 2, 0, 0, 0, 17, 0, 0, 0, 2u8, 0, 0].as_slice() + ) + .is_err()); + + // volume key size or cookie key size too large + assert!(FsCacheMsgOpen::try_from( + vec![255u8, 127, 127, 127, 255, 127, 127, 255, 17, 0, 0, 0, 2u8, 0, 0, 0, 4u8, 0, 0, 0] + .as_slice() + ) + .is_err()); + assert!(FsCacheMsgOpen::try_from( + vec![ + 255u8, 127, 127, 127, 241u8, 127, 128, 128, 17, 0, 0, 0, 2u8, 0, 0, 0, 4u8, 0, 0, + 0, + ] + .as_slice() + ) + .is_err()); + + // value size too small + assert!(FsCacheMsgOpen::try_from( + vec![1u8, 0, 0, 0, 2, 0, 0, 0, 17, 0, 0, 0, 2u8, 0, 0, 0, 0].as_slice() + ) + .is_err()); + + let res = FsCacheMsgOpen::try_from( + vec![ + 1u8, 0, 0, 0, 2, 0, 0, 0, 17, 0, 0, 0, 2u8, 0, 0, 0, 4u8, 0, 0, 0, + ] + .as_slice(), + ); + assert!(res.is_ok()); + assert_eq!( + res.unwrap(), + FsCacheMsgOpen { + volume_key: String::from("\u{4}"), + cookie_key: String::from("\0\0"), + fd: 17, + flags: 2 + } + ); + } + + #[test] + fn test_fs_cache_msg_read_try_from() { + assert!(FsCacheMsgRead::try_from( + vec![1u8, 0, 0, 0, 2, 0, 0, 0, 17, 0, 0, 0, 2u8, 0, 0].as_slice() + ) + .is_err()); + + let res = FsCacheMsgRead::try_from( + vec![1u8, 0, 0, 0, 2, 0, 0, 0, 17, 0, 0, 0, 2u8, 0, 0, 0].as_slice(), + ); + assert!(res.is_ok()); + assert_eq!( + res.unwrap(), + FsCacheMsgRead { + off: 8589934593, + len: 8589934609, + } + ); + } +} diff --git a/service/src/fs_service.rs b/service/src/fs_service.rs index f260cf364ad..d7a4c2b0e2f 100644 --- a/service/src/fs_service.rs +++ b/service/src/fs_service.rs @@ -1,448 +1,448 @@ -// Copyright (C) 2020-2022 Alibaba Cloud. All rights reserved. -// Copyright 2020 Ant Group. All rights reserved. -// Copyright 2019 Intel Corporation. All Rights Reserved. -// -// SPDX-License-Identifier: (Apache-2.0 AND BSD-3-Clause) - -//! Infrastructure to define and implement filesystem services. - -use std::any::Any; -use std::collections::HashMap; -use std::ops::Deref; -use std::path::{Path, PathBuf}; -use std::str::FromStr; -use std::sync::{Arc, MutexGuard}; - -#[cfg(target_os = "linux")] -use fuse_backend_rs::api::filesystem::{FileSystem, FsOptions, Layer}; -use fuse_backend_rs::api::vfs::VfsError; -use fuse_backend_rs::api::{BackFileSystem, Vfs}; -#[cfg(target_os = "linux")] -use fuse_backend_rs::overlayfs::{config::Config as overlay_config, OverlayFs}; -#[cfg(target_os = "linux")] -use fuse_backend_rs::passthrough::{CachePolicy, Config as passthrough_config, PassthroughFs}; -use nydus_api::ConfigV2; -use nydus_rafs::fs::Rafs; -use nydus_rafs::{RafsError, RafsIoRead}; -use nydus_storage::factory::BLOB_FACTORY; -use serde::{Deserialize, Serialize}; -use versionize::{VersionMap, Versionize, VersionizeResult}; -use versionize_derive::Versionize; - -use crate::upgrade::UpgradeManager; -use crate::{Error, FsBackendDescriptor, FsBackendType, Result}; - -/// Request structure to mount a filesystem instance. -#[derive(Clone, Versionize, Debug)] -pub struct FsBackendMountCmd { - /// Filesystem type. - pub fs_type: FsBackendType, - /// Mount source. - pub source: String, - /// Configuration information for the mount operation. - pub config: String, - /// Filesystem mountpoint. - pub mountpoint: String, - /// Optional prefetch file list. - pub prefetch_files: Option>, -} - -/// Request structure to unmount a filesystem instance. -#[derive(Clone, Deserialize, Serialize, Debug)] -pub struct FsBackendUmountCmd { - /// Filesystem mountpoint. - pub mountpoint: String, -} - -/// List of [FsBackendDescriptor], providing filesystem metrics and statistics information. -#[derive(Default, Serialize, Clone)] -pub struct FsBackendCollection(HashMap); - -impl FsBackendCollection { - fn add(&mut self, id: &str, cmd: &FsBackendMountCmd) -> Result<()> { - // We only wash Rafs backend now. - let fs_config = match cmd.fs_type { - FsBackendType::Rafs => { - let cfg = ConfigV2::from_str(&cmd.config) - .map_err(|e| Error::InvalidConfig(format!("{}", e)))?; - let cfg = cfg.clone_without_secrets(); - Some(cfg) - } - FsBackendType::PassthroughFs => { - // Passthrough Fs has no configuration information. - None - } - }; - - let desc = FsBackendDescriptor { - backend_type: cmd.fs_type.clone(), - mountpoint: cmd.mountpoint.clone(), - mounted_time: time::OffsetDateTime::now_utc(), - config: fs_config, - }; - - self.0.insert(id.to_string(), desc); - - Ok(()) - } - - fn del(&mut self, id: &str) { - self.0.remove(id); - } -} - -/// Abstract interfaces for filesystem service provider. -pub trait FsService: Send + Sync { - /// Get the [Vfs](https://docs.rs/fuse-backend-rs/latest/fuse_backend_rs/api/vfs/struct.Vfs.html) - /// object associated with the filesystem service object. - fn get_vfs(&self) -> &Vfs; - - /// Get the [BackFileSystem](https://docs.rs/fuse-backend-rs/latest/fuse_backend_rs/api/vfs/type.BackFileSystem.html) - /// object associated with a mount point. - fn backend_from_mountpoint(&self, mp: &str) -> Result>> { - self.get_vfs().get_rootfs(mp).map_err(|e| e.into()) - } - - /// Get handle to the optional upgrade manager. - fn upgrade_mgr(&self) -> Option>; - - /// Mount a new filesystem instance. - // NOTE: This method is not thread-safe, however, it is acceptable as - // mount/umount/remount/restore_mount is invoked from single thread in FSM - fn mount(&self, cmd: FsBackendMountCmd) -> Result<()> { - if self.backend_from_mountpoint(&cmd.mountpoint)?.is_some() { - return Err(Error::AlreadyExists); - } - let backend = fs_backend_factory(&cmd)?; - let index = self.get_vfs().mount(backend, &cmd.mountpoint)?; - info!("{} filesystem mounted at {}", &cmd.fs_type, &cmd.mountpoint); - - if let Err(e) = self.backend_collection().add(&cmd.mountpoint, &cmd) { - warn!( - "failed to add filesystem instance to metrics manager, {}", - e - ); - } - if let Some(mut mgr_guard) = self.upgrade_mgr() { - mgr_guard.add_mounts_state(cmd, index); - mgr_guard.save_vfs_stat(self.get_vfs())?; - } - - Ok(()) - } - - /// Remount a filesystem instance. - fn remount(&self, cmd: FsBackendMountCmd) -> Result<()> { - let rootfs = self - .backend_from_mountpoint(&cmd.mountpoint)? - .ok_or(Error::NotFound)?; - let mut bootstrap = ::from_file(&cmd.source)?; - let any_fs = rootfs.deref().as_any(); - let rafs = any_fs - .downcast_ref::() - .ok_or_else(|| Error::FsTypeMismatch("RAFS".to_string()))?; - let rafs_cfg = ConfigV2::from_str(&cmd.config).map_err(RafsError::LoadConfig)?; - let rafs_cfg = Arc::new(rafs_cfg); - - rafs.update(&mut bootstrap, &rafs_cfg) - .map_err(|e| match e { - RafsError::Unsupported => Error::Unsupported, - e => Error::Rafs(e), - })?; - - // To update mounted time and backend configurations. - if let Err(e) = self.backend_collection().add(&cmd.mountpoint, &cmd) { - warn!( - "failed to update filesystem instance to metrics manager, {}", - e - ); - } - // Update mounts opaque from UpgradeManager - if let Some(mut mgr_guard) = self.upgrade_mgr() { - mgr_guard.update_mounts_state(cmd)?; - } - - Ok(()) - } - - /// Restore a filesystem instance. - fn restore_mount(&self, cmd: &FsBackendMountCmd, vfs_index: u8) -> Result<()> { - let backend = fs_backend_factory(cmd)?; - self.get_vfs() - .restore_mount(backend, vfs_index, &cmd.mountpoint) - .map_err(VfsError::RestoreMount)?; - self.backend_collection().add(&cmd.mountpoint, &cmd)?; - info!("backend fs restored at {}", cmd.mountpoint); - Ok(()) - } - - /// Umount a filesystem instance. - fn umount(&self, cmd: FsBackendUmountCmd) -> Result<()> { - let _ = self - .backend_from_mountpoint(&cmd.mountpoint)? - .ok_or(Error::NotFound)?; - - self.get_vfs().umount(&cmd.mountpoint)?; - self.backend_collection().del(&cmd.mountpoint); - if let Some(mut mgr_guard) = self.upgrade_mgr() { - // Remove mount opaque from UpgradeManager - mgr_guard.remove_mounts_state(cmd); - mgr_guard.save_vfs_stat(self.get_vfs())?; - } - - debug!("try to gc unused blobs"); - BLOB_FACTORY.gc(None); - - Ok(()) - } - - /// Get list of metrics information objects about mounted filesystem instances. - fn backend_collection(&self) -> MutexGuard; - - /// Export information about the filesystem service. - fn export_backend_info(&self, mountpoint: &str) -> Result { - let fs = self - .backend_from_mountpoint(mountpoint)? - .ok_or(Error::NotFound)?; - let any_fs = fs.deref().as_any(); - let rafs = any_fs - .downcast_ref::() - .ok_or_else(|| Error::FsTypeMismatch("RAFS".to_string()))?; - let resp = serde_json::to_string(rafs.metadata()).map_err(Error::Serde)?; - Ok(resp) - } - - /// Export metrics about in-flight operations. - fn export_inflight_ops(&self) -> Result>; - - /// Cast `self` to trait object of [Any] to support object downcast. - fn as_any(&self) -> &dyn Any; -} - -/// Validate prefetch file list from user input. -/// -/// Validation rules: -/// - an item may be file or directory. -/// - items must be separated by space, such as " ". -/// - each item must be absolute path, such as "/foo1/bar1 /foo2/bar2". -fn validate_prefetch_file_list(input: &Option>) -> Result>> { - if let Some(list) = input { - let list: Vec = list.iter().map(PathBuf::from).collect(); - for elem in list.iter() { - if !elem.is_absolute() { - return Err(Error::InvalidPrefetchList); - } - } - Ok(Some(list)) - } else { - Ok(None) - } -} - -fn fs_backend_factory(cmd: &FsBackendMountCmd) -> Result { - let prefetch_files = validate_prefetch_file_list(&cmd.prefetch_files)?; - - match cmd.fs_type { - FsBackendType::Rafs => { - let config = ConfigV2::from_str(cmd.config.as_str()).map_err(RafsError::LoadConfig)?; - let config = Arc::new(config); - let (mut rafs, reader) = Rafs::new(&config, &cmd.mountpoint, Path::new(&cmd.source))?; - rafs.import(reader, prefetch_files)?; - - // Put a writable upper layer above the rafs to create an OverlayFS with two layers. - match &config.overlay { - Some(ovl_conf) => { - // check workdir and upperdir params. - if ovl_conf.work_dir.is_empty() || ovl_conf.upper_dir.is_empty() { - return Err(Error::InvalidArguments(String::from( - "workdir and upperdir must be specified for overlayfs", - ))); - } - - // Create an overlay upper layer with passthroughfs. - #[cfg(target_os = "macos")] - return Err(Error::InvalidArguments(String::from( - "not support OverlayFs since passthroughfs isn't supported on MacOS", - ))); - #[cfg(target_os = "linux")] - { - let fs_cfg = passthrough_config { - // Use upper_dir as root_dir as rw layer. - root_dir: ovl_conf.upper_dir.clone(), - do_import: true, - writeback: true, - no_open: true, - no_opendir: true, - xattr: true, - cache_policy: CachePolicy::Always, - ..Default::default() - }; - let fsopts = FsOptions::WRITEBACK_CACHE - | FsOptions::ZERO_MESSAGE_OPEN - | FsOptions::ZERO_MESSAGE_OPENDIR; - - let passthrough_fs = PassthroughFs::<()>::new(fs_cfg) - .map_err(|e| Error::InvalidConfig(format!("{}", e)))?; - passthrough_fs.init(fsopts).map_err(Error::PassthroughFs)?; - - type BoxedLayer = Box + Send + Sync>; - let upper_layer = Arc::new(Box::new(passthrough_fs) as BoxedLayer); - - // Create overlay lower layer with rafs, use lower_dir as root_dir of rafs. - let lower_layers = vec![Arc::new(Box::new(rafs) as BoxedLayer)]; - - let overlay_config = overlay_config { - work: ovl_conf.work_dir.clone(), - mountpoint: cmd.mountpoint.clone(), - do_import: false, - no_open: true, - no_opendir: true, - ..Default::default() - }; - let overlayfs = - OverlayFs::new(Some(upper_layer), lower_layers, overlay_config) - .map_err(|e| Error::InvalidConfig(format!("{}", e)))?; - info!( - "init overlay fs inode, upper {}, work {}\n", - ovl_conf.upper_dir.clone(), - ovl_conf.work_dir.clone() - ); - overlayfs - .import() - .map_err(|e| Error::InvalidConfig(format!("{}", e)))?; - info!("Overlay filesystem imported"); - Ok(Box::new(overlayfs)) - } - } - None => { - info!("RAFS filesystem imported"); - Ok(Box::new(rafs)) - } - } - } - FsBackendType::PassthroughFs => { - #[cfg(target_os = "macos")] - return Err(Error::InvalidArguments(String::from( - "not support passthroughfs", - ))); - #[cfg(target_os = "linux")] - { - // Vfs by default enables no_open and writeback, passthroughfs - // needs to specify them explicitly. - // TODO(liubo): enable no_open_dir. - let fs_cfg = passthrough_config { - root_dir: cmd.source.to_string(), - do_import: false, - writeback: true, - no_open: true, - xattr: true, - ..Default::default() - }; - let passthrough_fs = - PassthroughFs::<()>::new(fs_cfg).map_err(Error::PassthroughFs)?; - passthrough_fs.import().map_err(Error::PassthroughFs)?; - info!("PassthroughFs imported"); - Ok(Box::new(passthrough_fs)) - } - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn it_should_add_new_backend() { - let mut col: FsBackendCollection = Default::default(); - let config = r#"{ - "version": 2, - "id": "factory1", - "backend": { - "type": "localfs", - "localfs": { - "dir": "/tmp/nydus" - } - }, - "cache": { - "type": "fscache", - "fscache": { - "work_dir": "/tmp/nydus" - } - }, - "metadata_path": "/tmp/nydus/bootstrap1" - }"#; - let r = col.add( - "test", - &FsBackendMountCmd { - fs_type: FsBackendType::Rafs, - config: config.to_string(), - mountpoint: "testmonutount".to_string(), - source: "testsource".to_string(), - prefetch_files: Some(vec!["testfile".to_string()]), - }, - ); - assert!(r.is_ok(), "failed to add backend collection"); - - assert_eq!(col.0.len(), 1); - - col.del("test"); - assert_eq!(col.0.len(), 0); - } - - #[test] - fn it_should_verify_prefetch_files() { - let files = validate_prefetch_file_list(&Some(vec!["/etc/passwd".to_string()])); - assert!(files.is_ok(), "failed to verify prefetch files"); - assert_eq!(1, files.unwrap().unwrap().len()); - - assert!( - validate_prefetch_file_list(&Some(vec!["etc/passwd".to_string()])).is_err(), - "should not pass verify" - ); - } - - #[test] - fn it_should_create_rafs_backend() { - let config = r#" - { - "device": { - "backend": { - "type": "oss", - "config": { - "endpoint": "test", - "access_key_id": "test", - "access_key_secret": "test", - "bucket_name": "antsys-nydus", - "object_prefix":"nydus_v2/", - "scheme": "http" - } - } - }, - "mode": "direct", - "digest_validate": false, - "enable_xattr": true, - "fs_prefetch": { - "enable": true, - "threads_count": 10, - "merging_size": 131072, - "bandwidth_rate": 10485760 - } - }"#; - let bootstrap = "../tests/texture/bootstrap/nydusd_daemon_test_bootstrap"; - if fs_backend_factory(&FsBackendMountCmd { - fs_type: FsBackendType::Rafs, - config: config.to_string(), - mountpoint: "testmountpoint".to_string(), - source: bootstrap.to_string(), - prefetch_files: Some(vec!["/testfile".to_string()]), - }) - .unwrap() - .as_any() - .downcast_ref::() - .is_none() - { - panic!("failed to create rafs backend") - } - } -} +// Copyright (C) 2020-2022 Alibaba Cloud. All rights reserved. +// Copyright 2020 Ant Group. All rights reserved. +// Copyright 2019 Intel Corporation. All Rights Reserved. +// +// SPDX-License-Identifier: (Apache-2.0 AND BSD-3-Clause) + +//! Infrastructure to define and implement filesystem services. + +use std::any::Any; +use std::collections::HashMap; +use std::ops::Deref; +use std::path::{Path, PathBuf}; +use std::str::FromStr; +use std::sync::{Arc, MutexGuard}; + +#[cfg(target_os = "linux")] +use fuse_backend_rs::api::filesystem::{FileSystem, FsOptions, Layer}; +use fuse_backend_rs::api::vfs::VfsError; +use fuse_backend_rs::api::{BackFileSystem, Vfs}; +#[cfg(target_os = "linux")] +use fuse_backend_rs::overlayfs::{config::Config as overlay_config, OverlayFs}; +#[cfg(target_os = "linux")] +use fuse_backend_rs::passthrough::{CachePolicy, Config as passthrough_config, PassthroughFs}; +use nydus_api::ConfigV2; +use nydus_rafs::fs::Rafs; +use nydus_rafs::{RafsError, RafsIoRead}; +use nydus_storage::factory::BLOB_FACTORY; +use serde::{Deserialize, Serialize}; +use versionize::{VersionMap, Versionize, VersionizeResult}; +use versionize_derive::Versionize; + +use crate::upgrade::UpgradeManager; +use crate::{Error, FsBackendDescriptor, FsBackendType, Result}; + +/// Request structure to mount a filesystem instance. +#[derive(Clone, Versionize, Debug)] +pub struct FsBackendMountCmd { + /// Filesystem type. + pub fs_type: FsBackendType, + /// Mount source. + pub source: String, + /// Configuration information for the mount operation. + pub config: String, + /// Filesystem mountpoint. + pub mountpoint: String, + /// Optional prefetch file list. + pub prefetch_files: Option>, +} + +/// Request structure to unmount a filesystem instance. +#[derive(Clone, Deserialize, Serialize, Debug)] +pub struct FsBackendUmountCmd { + /// Filesystem mountpoint. + pub mountpoint: String, +} + +/// List of [FsBackendDescriptor], providing filesystem metrics and statistics information. +#[derive(Default, Serialize, Clone)] +pub struct FsBackendCollection(HashMap); + +impl FsBackendCollection { + fn add(&mut self, id: &str, cmd: &FsBackendMountCmd) -> Result<()> { + // We only wash Rafs backend now. + let fs_config = match cmd.fs_type { + FsBackendType::Rafs => { + let cfg = ConfigV2::from_str(&cmd.config) + .map_err(|e| Error::InvalidConfig(format!("{}", e)))?; + let cfg = cfg.clone_without_secrets(); + Some(cfg) + } + FsBackendType::PassthroughFs => { + // Passthrough Fs has no configuration information. + None + } + }; + + let desc = FsBackendDescriptor { + backend_type: cmd.fs_type.clone(), + mountpoint: cmd.mountpoint.clone(), + mounted_time: time::OffsetDateTime::now_utc(), + config: fs_config, + }; + + self.0.insert(id.to_string(), desc); + + Ok(()) + } + + fn del(&mut self, id: &str) { + self.0.remove(id); + } +} + +/// Abstract interfaces for filesystem service provider. +pub trait FsService: Send + Sync { + /// Get the [Vfs](https://docs.rs/fuse-backend-rs/latest/fuse_backend_rs/api/vfs/struct.Vfs.html) + /// object associated with the filesystem service object. + fn get_vfs(&self) -> &Vfs; + + /// Get the [BackFileSystem](https://docs.rs/fuse-backend-rs/latest/fuse_backend_rs/api/vfs/type.BackFileSystem.html) + /// object associated with a mount point. + fn backend_from_mountpoint(&self, mp: &str) -> Result>> { + self.get_vfs().get_rootfs(mp).map_err(|e| e.into()) + } + + /// Get handle to the optional upgrade manager. + fn upgrade_mgr(&self) -> Option>; + + /// Mount a new filesystem instance. + // NOTE: This method is not thread-safe, however, it is acceptable as + // mount/umount/remount/restore_mount is invoked from single thread in FSM + fn mount(&self, cmd: FsBackendMountCmd) -> Result<()> { + if self.backend_from_mountpoint(&cmd.mountpoint)?.is_some() { + return Err(Error::AlreadyExists); + } + let backend = fs_backend_factory(&cmd)?; + let index = self.get_vfs().mount(backend, &cmd.mountpoint)?; + info!("{} filesystem mounted at {}", &cmd.fs_type, &cmd.mountpoint); + + if let Err(e) = self.backend_collection().add(&cmd.mountpoint, &cmd) { + warn!( + "failed to add filesystem instance to metrics manager, {}", + e + ); + } + if let Some(mut mgr_guard) = self.upgrade_mgr() { + mgr_guard.add_mounts_state(cmd, index); + mgr_guard.save_vfs_stat(self.get_vfs())?; + } + + Ok(()) + } + + /// Remount a filesystem instance. + fn remount(&self, cmd: FsBackendMountCmd) -> Result<()> { + let rootfs = self + .backend_from_mountpoint(&cmd.mountpoint)? + .ok_or(Error::NotFound)?; + let mut bootstrap = ::from_file(&cmd.source)?; + let any_fs = rootfs.deref().as_any(); + let rafs = any_fs + .downcast_ref::() + .ok_or_else(|| Error::FsTypeMismatch("RAFS".to_string()))?; + let rafs_cfg = ConfigV2::from_str(&cmd.config).map_err(RafsError::LoadConfig)?; + let rafs_cfg = Arc::new(rafs_cfg); + + rafs.update(&mut bootstrap, &rafs_cfg) + .map_err(|e| match e { + RafsError::Unsupported => Error::Unsupported, + e => Error::Rafs(e), + })?; + + // To update mounted time and backend configurations. + if let Err(e) = self.backend_collection().add(&cmd.mountpoint, &cmd) { + warn!( + "failed to update filesystem instance to metrics manager, {}", + e + ); + } + // Update mounts opaque from UpgradeManager + if let Some(mut mgr_guard) = self.upgrade_mgr() { + mgr_guard.update_mounts_state(cmd)?; + } + + Ok(()) + } + + /// Restore a filesystem instance. + fn restore_mount(&self, cmd: &FsBackendMountCmd, vfs_index: u8) -> Result<()> { + let backend = fs_backend_factory(cmd)?; + self.get_vfs() + .restore_mount(backend, vfs_index, &cmd.mountpoint) + .map_err(VfsError::RestoreMount)?; + self.backend_collection().add(&cmd.mountpoint, &cmd)?; + info!("backend fs restored at {}", cmd.mountpoint); + Ok(()) + } + + /// Umount a filesystem instance. + fn umount(&self, cmd: FsBackendUmountCmd) -> Result<()> { + let _ = self + .backend_from_mountpoint(&cmd.mountpoint)? + .ok_or(Error::NotFound)?; + + self.get_vfs().umount(&cmd.mountpoint)?; + self.backend_collection().del(&cmd.mountpoint); + if let Some(mut mgr_guard) = self.upgrade_mgr() { + // Remove mount opaque from UpgradeManager + mgr_guard.remove_mounts_state(cmd); + mgr_guard.save_vfs_stat(self.get_vfs())?; + } + + debug!("try to gc unused blobs"); + BLOB_FACTORY.gc(None); + + Ok(()) + } + + /// Get list of metrics information objects about mounted filesystem instances. + fn backend_collection(&self) -> MutexGuard; + + /// Export information about the filesystem service. + fn export_backend_info(&self, mountpoint: &str) -> Result { + let fs = self + .backend_from_mountpoint(mountpoint)? + .ok_or(Error::NotFound)?; + let any_fs = fs.deref().as_any(); + let rafs = any_fs + .downcast_ref::() + .ok_or_else(|| Error::FsTypeMismatch("RAFS".to_string()))?; + let resp = serde_json::to_string(rafs.metadata()).map_err(Error::Serde)?; + Ok(resp) + } + + /// Export metrics about in-flight operations. + fn export_inflight_ops(&self) -> Result>; + + /// Cast `self` to trait object of [Any] to support object downcast. + fn as_any(&self) -> &dyn Any; +} + +/// Validate prefetch file list from user input. +/// +/// Validation rules: +/// - an item may be file or directory. +/// - items must be separated by space, such as " ". +/// - each item must be absolute path, such as "/foo1/bar1 /foo2/bar2". +fn validate_prefetch_file_list(input: &Option>) -> Result>> { + if let Some(list) = input { + let list: Vec = list.iter().map(PathBuf::from).collect(); + for elem in list.iter() { + if !elem.is_absolute() { + return Err(Error::InvalidPrefetchList); + } + } + Ok(Some(list)) + } else { + Ok(None) + } +} + +fn fs_backend_factory(cmd: &FsBackendMountCmd) -> Result { + let prefetch_files = validate_prefetch_file_list(&cmd.prefetch_files)?; + + match cmd.fs_type { + FsBackendType::Rafs => { + let config = ConfigV2::from_str(cmd.config.as_str()).map_err(RafsError::LoadConfig)?; + let config = Arc::new(config); + let (mut rafs, reader) = Rafs::new(&config, &cmd.mountpoint, Path::new(&cmd.source))?; + rafs.import(reader, prefetch_files)?; + + // Put a writable upper layer above the rafs to create an OverlayFS with two layers. + match &config.overlay { + Some(ovl_conf) => { + // check workdir and upperdir params. + if ovl_conf.work_dir.is_empty() || ovl_conf.upper_dir.is_empty() { + return Err(Error::InvalidArguments(String::from( + "workdir and upperdir must be specified for overlayfs", + ))); + } + + // Create an overlay upper layer with passthroughfs. + #[cfg(target_os = "macos")] + return Err(Error::InvalidArguments(String::from( + "not support OverlayFs since passthroughfs isn't supported on MacOS", + ))); + #[cfg(target_os = "linux")] + { + let fs_cfg = passthrough_config { + // Use upper_dir as root_dir as rw layer. + root_dir: ovl_conf.upper_dir.clone(), + do_import: true, + writeback: true, + no_open: true, + no_opendir: true, + xattr: true, + cache_policy: CachePolicy::Always, + ..Default::default() + }; + let fsopts = FsOptions::WRITEBACK_CACHE + | FsOptions::ZERO_MESSAGE_OPEN + | FsOptions::ZERO_MESSAGE_OPENDIR; + + let passthrough_fs = PassthroughFs::<()>::new(fs_cfg) + .map_err(|e| Error::InvalidConfig(format!("{}", e)))?; + passthrough_fs.init(fsopts).map_err(Error::PassthroughFs)?; + + type BoxedLayer = Box + Send + Sync>; + let upper_layer = Arc::new(Box::new(passthrough_fs) as BoxedLayer); + + // Create overlay lower layer with rafs, use lower_dir as root_dir of rafs. + let lower_layers = vec![Arc::new(Box::new(rafs) as BoxedLayer)]; + + let overlay_config = overlay_config { + work: ovl_conf.work_dir.clone(), + mountpoint: cmd.mountpoint.clone(), + do_import: false, + no_open: true, + no_opendir: true, + ..Default::default() + }; + let overlayfs = + OverlayFs::new(Some(upper_layer), lower_layers, overlay_config) + .map_err(|e| Error::InvalidConfig(format!("{}", e)))?; + info!( + "init overlay fs inode, upper {}, work {}\n", + ovl_conf.upper_dir.clone(), + ovl_conf.work_dir.clone() + ); + overlayfs + .import() + .map_err(|e| Error::InvalidConfig(format!("{}", e)))?; + info!("Overlay filesystem imported"); + Ok(Box::new(overlayfs)) + } + } + None => { + info!("RAFS filesystem imported"); + Ok(Box::new(rafs)) + } + } + } + FsBackendType::PassthroughFs => { + #[cfg(target_os = "macos")] + return Err(Error::InvalidArguments(String::from( + "not support passthroughfs", + ))); + #[cfg(target_os = "linux")] + { + // Vfs by default enables no_open and writeback, passthroughfs + // needs to specify them explicitly. + // TODO(liubo): enable no_open_dir. + let fs_cfg = passthrough_config { + root_dir: cmd.source.to_string(), + do_import: false, + writeback: true, + no_open: true, + xattr: true, + ..Default::default() + }; + let passthrough_fs = + PassthroughFs::<()>::new(fs_cfg).map_err(Error::PassthroughFs)?; + passthrough_fs.import().map_err(Error::PassthroughFs)?; + info!("PassthroughFs imported"); + Ok(Box::new(passthrough_fs)) + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn it_should_add_new_backend() { + let mut col: FsBackendCollection = Default::default(); + let config = r#"{ + "version": 2, + "id": "factory1", + "backend": { + "type": "localfs", + "localfs": { + "dir": "/tmp/nydus" + } + }, + "cache": { + "type": "fscache", + "fscache": { + "work_dir": "/tmp/nydus" + } + }, + "metadata_path": "/tmp/nydus/bootstrap1" + }"#; + let r = col.add( + "test", + &FsBackendMountCmd { + fs_type: FsBackendType::Rafs, + config: config.to_string(), + mountpoint: "testmonutount".to_string(), + source: "testsource".to_string(), + prefetch_files: Some(vec!["testfile".to_string()]), + }, + ); + assert!(r.is_ok(), "failed to add backend collection"); + + assert_eq!(col.0.len(), 1); + + col.del("test"); + assert_eq!(col.0.len(), 0); + } + + #[test] + fn it_should_verify_prefetch_files() { + let files = validate_prefetch_file_list(&Some(vec!["/etc/passwd".to_string()])); + assert!(files.is_ok(), "failed to verify prefetch files"); + assert_eq!(1, files.unwrap().unwrap().len()); + + assert!( + validate_prefetch_file_list(&Some(vec!["etc/passwd".to_string()])).is_err(), + "should not pass verify" + ); + } + + #[test] + fn it_should_create_rafs_backend() { + let config = r#" + { + "device": { + "backend": { + "type": "oss", + "config": { + "endpoint": "test", + "access_key_id": "test", + "access_key_secret": "test", + "bucket_name": "antsys-nydus", + "object_prefix":"nydus_v2/", + "scheme": "http" + } + } + }, + "mode": "direct", + "digest_validate": false, + "enable_xattr": true, + "fs_prefetch": { + "enable": true, + "threads_count": 10, + "merging_size": 131072, + "bandwidth_rate": 10485760 + } + }"#; + let bootstrap = "../tests/texture/bootstrap/nydusd_daemon_test_bootstrap"; + if fs_backend_factory(&FsBackendMountCmd { + fs_type: FsBackendType::Rafs, + config: config.to_string(), + mountpoint: "testmountpoint".to_string(), + source: bootstrap.to_string(), + prefetch_files: Some(vec!["/testfile".to_string()]), + }) + .unwrap() + .as_any() + .downcast_ref::() + .is_none() + { + panic!("failed to create rafs backend") + } + } +} diff --git a/service/src/fusedev.rs b/service/src/fusedev.rs index 5dc9da598fa..e87abe95b4a 100644 --- a/service/src/fusedev.rs +++ b/service/src/fusedev.rs @@ -1,674 +1,674 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2020 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: (Apache-2.0 AND BSD-3-Clause) - -//! Nydus FUSE filesystem daemon. - -use std::any::Any; -use std::ffi::{CStr, CString}; -use std::fs::metadata; -use std::io::{Error, ErrorKind, Result}; -use std::ops::Deref; -#[cfg(target_os = "linux")] -use std::os::linux::fs::MetadataExt; -#[cfg(target_os = "linux")] -use std::os::unix::ffi::OsStrExt; -#[cfg(target_os = "macos")] -use std::os::unix::fs::MetadataExt; -use std::os::unix::net::UnixStream; -use std::path::Path; -use std::sync::{ - atomic::{AtomicI32, AtomicU64, Ordering}, - mpsc::{channel, Receiver, Sender}, - Arc, Mutex, MutexGuard, -}; -use std::thread::{self, JoinHandle}; -use std::time::{SystemTime, UNIX_EPOCH}; - -use fuse_backend_rs::abi::fuse_abi::{InHeader, OutHeader}; -use fuse_backend_rs::api::server::{MetricsHook, Server}; -use fuse_backend_rs::api::Vfs; -use fuse_backend_rs::transport::{FuseChannel, FuseSession}; -use mio::Waker; -#[cfg(target_os = "linux")] -use nix::sys::stat::{major, minor}; -use nydus_api::BuildTimeInfo; -use serde::Serialize; - -use crate::daemon::{ - DaemonState, DaemonStateMachineContext, DaemonStateMachineInput, DaemonStateMachineSubscriber, - NydusDaemon, -}; -use crate::fs_service::{FsBackendCollection, FsBackendMountCmd, FsService}; -use crate::upgrade::{self, FailoverPolicy, UpgradeManager}; -use crate::{Error as NydusError, FsBackendType, Result as NydusResult}; - -#[derive(Serialize)] -struct FuseOp { - inode: u64, - opcode: u32, - unique: u64, - timestamp_secs: u64, -} - -impl Default for FuseOp { - fn default() -> Self { - // unwrap because time can't be earlier than EPOCH. - let timestamp_secs = SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap() - .as_secs(); - - Self { - inode: u64::default(), - opcode: u32::default(), - unique: u64::default(), - timestamp_secs, - } - } -} - -#[derive(Default, Clone, Serialize)] -struct FuseOpWrapper { - // Mutex should be acceptable since `inflight_op` is always updated - // within the same thread, which means locking is always directly acquired. - op: Arc>>, -} - -impl MetricsHook for FuseOpWrapper { - fn collect(&self, ih: &InHeader) { - let (n, u, o) = (ih.nodeid, ih.unique, ih.opcode); - // Unwrap is safe because time can't be earlier than EPOCH - let timestamp_secs = SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap() - .as_secs(); - let op = FuseOp { - inode: n, - unique: u, - opcode: o, - timestamp_secs, - }; - - *self.op.lock().expect("Not expect poisoned lock") = Some(op); - } - - fn release(&self, _oh: Option<&OutHeader>) { - *self.op.lock().expect("Not expect poisoned lock") = None - } -} - -struct FuseServer { - server: Arc>>, - ch: FuseChannel, -} - -impl FuseServer { - fn new(server: Arc>>, se: &FuseSession) -> Result { - let ch = se.new_channel().map_err(|e| eother!(e))?; - Ok(FuseServer { server, ch }) - } - - fn svc_loop(&mut self, metrics_hook: &dyn MetricsHook) -> Result<()> { - // Given error EBADF, it means kernel has shut down this session. - let _ebadf = Error::from_raw_os_error(libc::EBADF); - - loop { - if let Some((reader, writer)) = self.ch.get_request().map_err(|e| { - Error::new( - ErrorKind::Other, - format!("failed to get fuse request from /dev/fuse, {}", e), - ) - })? { - if let Err(e) = - self.server - .handle_message(reader, writer.into(), None, Some(metrics_hook)) - { - match e { - fuse_backend_rs::Error::EncodeMessage(_ebadf) => { - return Err(eio!("fuse session has been shut down")); - } - _ => { - error!("Handling fuse message, {}", NydusError::ProcessQueue(e)); - continue; - } - } - } - } else { - info!("fuse server exits"); - break; - } - } - - Ok(()) - } -} - -pub struct FusedevFsService { - /// Fuse connection ID which usually equals to `st_dev` - pub conn: AtomicU64, - pub failover_policy: FailoverPolicy, - pub session: Mutex, - - server: Arc>>, - upgrade_mgr: Option>, - vfs: Arc, - - backend_collection: Mutex, - inflight_ops: Mutex>, -} - -impl FusedevFsService { - fn new( - vfs: Arc, - mnt: &Path, - supervisor: Option<&String>, - failover_policy: FailoverPolicy, - readonly: bool, - ) -> Result { - let session = FuseSession::new(mnt, "rafs", "", readonly).map_err(|e| eother!(e))?; - let upgrade_mgr = supervisor - .as_ref() - .map(|s| Mutex::new(UpgradeManager::new(s.to_string().into()))); - - Ok(FusedevFsService { - vfs: vfs.clone(), - conn: AtomicU64::new(0), - failover_policy, - session: Mutex::new(session), - server: Arc::new(Server::new(vfs)), - upgrade_mgr, - - backend_collection: Default::default(), - inflight_ops: Default::default(), - }) - } - - fn create_fuse_server(&self) -> Result { - FuseServer::new(self.server.clone(), self.session.lock().unwrap().deref()) - } - - fn create_inflight_op(&self) -> FuseOpWrapper { - let inflight_op = FuseOpWrapper::default(); - - // "Not expected poisoned lock" - self.inflight_ops.lock().unwrap().push(inflight_op.clone()); - - inflight_op - } - - fn umount(&self) -> NydusResult<()> { - let mut session = self.session.lock().expect("Not expect poisoned lock."); - session.umount().map_err(NydusError::SessionShutdown)?; - session.wake().map_err(NydusError::SessionShutdown)?; - Ok(()) - } -} - -impl FsService for FusedevFsService { - fn get_vfs(&self) -> &Vfs { - &self.vfs - } - - fn upgrade_mgr(&self) -> Option> { - self.upgrade_mgr.as_ref().map(|mgr| mgr.lock().unwrap()) - } - - fn backend_collection(&self) -> MutexGuard { - self.backend_collection.lock().unwrap() - } - - fn export_inflight_ops(&self) -> NydusResult> { - let ops = self.inflight_ops.lock().unwrap(); - - let r = ops - .iter() - .filter(|w| w.op.lock().unwrap().is_some()) - .map(|w| &w.op) - .collect::>>>>(); - - if r.is_empty() { - Ok(None) - } else { - let resp = serde_json::to_string(&r).map_err(NydusError::Serde)?; - Ok(Some(resp)) - } - } - - fn as_any(&self) -> &dyn Any { - self - } -} - -/// Nydus daemon to implement FUSE servers by accessing `/dev/fuse`. -/// -/// One FUSE mountpoint will be created for each [FusedevDaemon] object. Every [FusedevDaemon] -/// object has a built-in [Vfs](https://docs.rs/fuse-backend-rs/latest/fuse_backend_rs/api/vfs/struct.Vfs.html) -/// object, which can be used to mount multiple RAFS and/or passthroughfs instances. -pub struct FusedevDaemon { - bti: BuildTimeInfo, - id: Option, - request_sender: Arc>>, - result_receiver: Mutex>>, - service: Arc, - state: AtomicI32, - pub supervisor: Option, - threads_cnt: u32, - state_machine_thread: Mutex>>>, - fuse_service_threads: Mutex>>>, - waker: Arc, -} - -impl FusedevDaemon { - /// Create a new instance of [FusedevDaemon]. - #[allow(clippy::too_many_arguments)] - pub fn new( - trigger: Sender, - receiver: Receiver>, - vfs: Arc, - mountpoint: &Path, - threads_cnt: u32, - waker: Arc, - bti: BuildTimeInfo, - id: Option, - supervisor: Option, - readonly: bool, - fp: FailoverPolicy, - ) -> Result { - let service = FusedevFsService::new(vfs, mountpoint, supervisor.as_ref(), fp, readonly)?; - - Ok(FusedevDaemon { - bti, - id, - supervisor, - threads_cnt, - waker, - - state: AtomicI32::new(DaemonState::INIT as i32), - result_receiver: Mutex::new(receiver), - request_sender: Arc::new(Mutex::new(trigger)), - service: Arc::new(service), - state_machine_thread: Mutex::new(None), - fuse_service_threads: Mutex::new(Vec::new()), - }) - } - - fn kick_one_server(&self, waker: Arc) -> NydusResult<()> { - let mut s = self - .service - .create_fuse_server() - .map_err(NydusError::CreateFuseServer)?; - let inflight_op = self.service.create_inflight_op(); - let thread = thread::Builder::new() - .name("fuse_server".to_string()) - .spawn(move || { - if let Err(_err) = s.svc_loop(&inflight_op) { - // Notify the daemon controller that one working thread has exited. - if let Err(err) = waker.wake() { - error!("fail to exit daemon, error: {:?}", err); - } - } - Ok(()) - }) - .map_err(NydusError::ThreadSpawn)?; - - self.fuse_service_threads.lock().unwrap().push(thread); - - Ok(()) - } -} - -impl DaemonStateMachineSubscriber for FusedevDaemon { - fn on_event(&self, event: DaemonStateMachineInput) -> NydusResult<()> { - self.request_sender - .lock() - .unwrap() - .send(event) - .map_err(NydusError::ChannelSend)?; - - self.result_receiver - .lock() - .expect("Not expect poisoned lock!") - .recv() - .map_err(NydusError::ChannelReceive)? - } -} - -impl NydusDaemon for FusedevDaemon { - fn as_any(&self) -> &dyn Any { - self - } - - fn id(&self) -> Option { - self.id.clone() - } - - fn version(&self) -> BuildTimeInfo { - self.bti.clone() - } - - fn get_state(&self) -> DaemonState { - self.state.load(Ordering::Relaxed).into() - } - - fn set_state(&self, state: DaemonState) { - self.state.store(state as i32, Ordering::Relaxed); - } - - fn start(&self) -> NydusResult<()> { - info!( - "start fuse servers with {} worker threads", - self.threads_cnt - ); - for _ in 0..self.threads_cnt { - let waker = self.waker.clone(); - self.kick_one_server(waker) - .map_err(|e| NydusError::StartService(format!("{}", e)))?; - } - - Ok(()) - } - - fn umount(&self) -> NydusResult<()> { - self.service.umount() - } - - fn stop(&self) { - let session = self - .service - .session - .lock() - .expect("Not expect poisoned lock."); - if let Err(e) = session.wake().map_err(NydusError::SessionShutdown) { - error!("failed to stop FUSE service thread: {:?}", e); - } - } - - fn wait(&self) -> NydusResult<()> { - self.wait_state_machine()?; - self.wait_service() - } - - fn wait_service(&self) -> NydusResult<()> { - loop { - let handle = self.fuse_service_threads.lock().unwrap().pop(); - if let Some(handle) = handle { - handle - .join() - .map_err(|e| { - let e = *e - .downcast::() - .unwrap_or_else(|e| Box::new(eother!(e))); - NydusError::WaitDaemon(e) - })? - .map_err(NydusError::WaitDaemon)?; - } else { - // No more handles to wait - break; - } - } - - Ok(()) - } - - fn wait_state_machine(&self) -> NydusResult<()> { - let mut guard = self.state_machine_thread.lock().unwrap(); - if let Some(handler) = guard.take() { - let result = handler.join().map_err(|e| { - let e = *e - .downcast::() - .unwrap_or_else(|e| Box::new(eother!(e))); - NydusError::WaitDaemon(e) - })?; - result.map_err(NydusError::WaitDaemon) - } else { - Ok(()) - } - } - - fn supervisor(&self) -> Option { - self.supervisor.clone() - } - - fn save(&self) -> NydusResult<()> { - upgrade::fusedev_upgrade::save(self) - } - - fn restore(&self) -> NydusResult<()> { - upgrade::fusedev_upgrade::restore(self) - } - - fn get_default_fs_service(&self) -> Option> { - Some(self.service.clone()) - } -} - -#[cfg(target_os = "macos")] -fn is_mounted(mp: impl AsRef) -> Result { - let mp = mp - .as_ref() - .to_str() - .ok_or_else(|| Error::from_raw_os_error(libc::EINVAL))?; - let mp = CString::new(String::from(mp)).map_err(|_| Error::from_raw_os_error(libc::EINVAL))?; - let mut mpb: Vec = Vec::new(); - let mut mpb_ptr = mpb.as_mut_ptr(); - let mpb_ptr = &mut mpb_ptr; - - let mpb: Vec = unsafe { - let res = libc::getmntinfo(mpb_ptr, libc::MNT_NOWAIT); - if res < 0 { - return Err(Error::from_raw_os_error(res)); - } - let size = res as usize; - Vec::from_raw_parts(*mpb_ptr, size, size) - }; - let match_mp = mpb.iter().find(|mp_stat| unsafe { - let mp_name = CStr::from_ptr(&mp_stat.f_mntonname as *const i8); - let mp = CStr::from_ptr(mp.as_ptr()); - mp.eq(mp_name) - }); - - Ok(match_mp.is_some()) -} - -// TODO: Perhaps, we can't rely on `/proc/self/mounts` to tell if it is mounted. -#[cfg(target_os = "linux")] -fn is_mounted(mp: impl AsRef) -> Result { - let mounts = CString::new("/proc/self/mounts").unwrap(); - let ty = CString::new("r").unwrap(); - - let mounts_stream = unsafe { - libc::setmntent( - mounts.as_ptr() as *const libc::c_char, - ty.as_ptr() as *const libc::c_char, - ) - }; - - loop { - let mnt = unsafe { libc::getmntent(mounts_stream) }; - if mnt as u32 == libc::PT_NULL { - break; - } - - // Mount point path - if unsafe { CStr::from_ptr((*mnt).mnt_dir) } - == CString::new(mp.as_ref().as_os_str().as_bytes())?.as_c_str() - { - unsafe { libc::endmntent(mounts_stream) }; - return Ok(true); - } - } - - unsafe { libc::endmntent(mounts_stream) }; - - Ok(false) -} - -fn is_sock_residual(sock: impl AsRef) -> bool { - if metadata(&sock).is_ok() { - return UnixStream::connect(&sock).is_err(); - } - - false -} - -/// When nydusd starts, it checks that whether a previous nydusd died unexpected by: -/// 1. Checking whether the mount point is residual by retrieving `/proc/self/mounts`. -/// 2. Checking whether the API socket exists and the connection can established or not. -fn is_crashed(path: impl AsRef, sock: &impl AsRef) -> Result { - if is_mounted(path)? && is_sock_residual(sock) { - warn!("A previous daemon crashed! Try to failover later."); - return Ok(true); - } - - Ok(false) -} - -#[cfg(target_os = "macos")] -fn calc_fuse_conn(mp: impl AsRef) -> Result { - let st = metadata(mp.as_ref()).map_err(|e| { - error!("Stat mountpoint {:?}, {}", mp.as_ref(), &e); - e - })?; - Ok(st.dev()) -} - -#[cfg(target_os = "linux")] -fn calc_fuse_conn(mp: impl AsRef) -> Result { - let st = metadata(mp.as_ref()).map_err(|e| { - error!("Stat mountpoint {:?}, {}", mp.as_ref(), &e); - e - })?; - let dev = st.st_dev(); - let (major, minor) = (major(dev), minor(dev)); - - // According to kernel formula: MKDEV(ma,mi) (((ma) << 20) | (mi)) - Ok(major << 20 | minor) -} - -/// Create and start a [FusedevDaemon] instance. -#[allow(clippy::too_many_arguments)] -pub fn create_fuse_daemon( - mountpoint: &str, - vfs: Arc, - supervisor: Option, - id: Option, - threads_cnt: u32, - waker: Arc, - api_sock: Option>, - upgrade: bool, - readonly: bool, - fp: FailoverPolicy, - mount_cmd: Option, - bti: BuildTimeInfo, -) -> Result> { - let mnt = Path::new(mountpoint).canonicalize()?; - let (trigger, events_rx) = channel::(); - let (result_sender, result_receiver) = channel::>(); - let daemon = FusedevDaemon::new( - trigger, - result_receiver, - vfs, - &mnt, - threads_cnt, - waker, - bti, - id, - supervisor, - readonly, - fp, - )?; - let daemon = Arc::new(daemon); - let machine = DaemonStateMachineContext::new(daemon.clone(), events_rx, result_sender); - let machine_thread = machine.kick_state_machine()?; - *daemon.state_machine_thread.lock().unwrap() = Some(machine_thread); - - // Without api socket, nydusd can't do neither live-upgrade nor failover, so the helper - // finding a victim is not necessary. - if (api_sock.as_ref().is_some() && !upgrade && !is_crashed(&mnt, api_sock.as_ref().unwrap())?) - || api_sock.is_none() - { - if let Some(cmd) = mount_cmd { - daemon.service.mount(cmd).map_err(|e| { - error!("service mount error: {}", &e); - eother!(e) - })?; - } - daemon - .service - .session - .lock() - .unwrap() - .mount() - .map_err(|e| { - error!("service session mount error: {}", &e); - eother!(e) - })?; - - daemon - .on_event(DaemonStateMachineInput::Mount) - .map_err(|e| eother!(e))?; - daemon - .on_event(DaemonStateMachineInput::Start) - .map_err(|e| eother!(e))?; - daemon - .service - .conn - .store(calc_fuse_conn(mnt)?, Ordering::Relaxed); - - if let Some(f) = daemon.service.session.lock().unwrap().get_fuse_file() { - if let Some(mut m) = daemon.service.upgrade_mgr() { - m.hold_file(f).map_err(|e| { - error!("Failed to hold fusedev fd, {:?}", e); - eother!(e) - })?; - m.save_fuse_cid(daemon.service.conn.load(Ordering::Acquire)); - } - } - } - - Ok(daemon) -} - -/// Create vfs backend with rafs or passthrough as the fuse filesystem driver - -#[cfg(target_os = "macos")] -pub fn create_vfs_backend( - _fs_type: FsBackendType, - _is_fuse: bool, - _hybrid_mode: bool, -) -> Result> { - let vfs = fuse_backend_rs::api::Vfs::new(fuse_backend_rs::api::VfsOptions::default()); - Ok(Arc::new(vfs)) -} - -#[cfg(target_os = "linux")] -pub fn create_vfs_backend( - fs_type: FsBackendType, - is_fuse: bool, - hybrid_mode: bool, -) -> Result> { - let mut opts = fuse_backend_rs::api::VfsOptions::default(); - match fs_type { - FsBackendType::PassthroughFs => { - // passthroughfs requires !no_open - opts.no_open = false; - opts.no_opendir = false; - opts.killpriv_v2 = true; - } - FsBackendType::Rafs => { - // rafs can be readonly and skip open - opts.no_open = true; - } - }; - - if !is_fuse && hybrid_mode { - opts.no_open = false; - opts.no_opendir = false; - opts.killpriv_v2 = true; - } - - let vfs = fuse_backend_rs::api::Vfs::new(opts); - Ok(Arc::new(vfs)) -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2020 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: (Apache-2.0 AND BSD-3-Clause) + +//! Nydus FUSE filesystem daemon. + +use std::any::Any; +use std::ffi::{CStr, CString}; +use std::fs::metadata; +use std::io::{Error, ErrorKind, Result}; +use std::ops::Deref; +#[cfg(target_os = "linux")] +use std::os::linux::fs::MetadataExt; +#[cfg(target_os = "linux")] +use std::os::unix::ffi::OsStrExt; +#[cfg(target_os = "macos")] +use std::os::unix::fs::MetadataExt; +use std::os::unix::net::UnixStream; +use std::path::Path; +use std::sync::{ + atomic::{AtomicI32, AtomicU64, Ordering}, + mpsc::{channel, Receiver, Sender}, + Arc, Mutex, MutexGuard, +}; +use std::thread::{self, JoinHandle}; +use std::time::{SystemTime, UNIX_EPOCH}; + +use fuse_backend_rs::abi::fuse_abi::{InHeader, OutHeader}; +use fuse_backend_rs::api::server::{MetricsHook, Server}; +use fuse_backend_rs::api::Vfs; +use fuse_backend_rs::transport::{FuseChannel, FuseSession}; +use mio::Waker; +#[cfg(target_os = "linux")] +use nix::sys::stat::{major, minor}; +use nydus_api::BuildTimeInfo; +use serde::Serialize; + +use crate::daemon::{ + DaemonState, DaemonStateMachineContext, DaemonStateMachineInput, DaemonStateMachineSubscriber, + NydusDaemon, +}; +use crate::fs_service::{FsBackendCollection, FsBackendMountCmd, FsService}; +use crate::upgrade::{self, FailoverPolicy, UpgradeManager}; +use crate::{Error as NydusError, FsBackendType, Result as NydusResult}; + +#[derive(Serialize)] +struct FuseOp { + inode: u64, + opcode: u32, + unique: u64, + timestamp_secs: u64, +} + +impl Default for FuseOp { + fn default() -> Self { + // unwrap because time can't be earlier than EPOCH. + let timestamp_secs = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs(); + + Self { + inode: u64::default(), + opcode: u32::default(), + unique: u64::default(), + timestamp_secs, + } + } +} + +#[derive(Default, Clone, Serialize)] +struct FuseOpWrapper { + // Mutex should be acceptable since `inflight_op` is always updated + // within the same thread, which means locking is always directly acquired. + op: Arc>>, +} + +impl MetricsHook for FuseOpWrapper { + fn collect(&self, ih: &InHeader) { + let (n, u, o) = (ih.nodeid, ih.unique, ih.opcode); + // Unwrap is safe because time can't be earlier than EPOCH + let timestamp_secs = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs(); + let op = FuseOp { + inode: n, + unique: u, + opcode: o, + timestamp_secs, + }; + + *self.op.lock().expect("Not expect poisoned lock") = Some(op); + } + + fn release(&self, _oh: Option<&OutHeader>) { + *self.op.lock().expect("Not expect poisoned lock") = None + } +} + +struct FuseServer { + server: Arc>>, + ch: FuseChannel, +} + +impl FuseServer { + fn new(server: Arc>>, se: &FuseSession) -> Result { + let ch = se.new_channel().map_err(|e| eother!(e))?; + Ok(FuseServer { server, ch }) + } + + fn svc_loop(&mut self, metrics_hook: &dyn MetricsHook) -> Result<()> { + // Given error EBADF, it means kernel has shut down this session. + let _ebadf = Error::from_raw_os_error(libc::EBADF); + + loop { + if let Some((reader, writer)) = self.ch.get_request().map_err(|e| { + Error::new( + ErrorKind::Other, + format!("failed to get fuse request from /dev/fuse, {}", e), + ) + })? { + if let Err(e) = + self.server + .handle_message(reader, writer.into(), None, Some(metrics_hook)) + { + match e { + fuse_backend_rs::Error::EncodeMessage(_ebadf) => { + return Err(eio!("fuse session has been shut down")); + } + _ => { + error!("Handling fuse message, {}", NydusError::ProcessQueue(e)); + continue; + } + } + } + } else { + info!("fuse server exits"); + break; + } + } + + Ok(()) + } +} + +pub struct FusedevFsService { + /// Fuse connection ID which usually equals to `st_dev` + pub conn: AtomicU64, + pub failover_policy: FailoverPolicy, + pub session: Mutex, + + server: Arc>>, + upgrade_mgr: Option>, + vfs: Arc, + + backend_collection: Mutex, + inflight_ops: Mutex>, +} + +impl FusedevFsService { + fn new( + vfs: Arc, + mnt: &Path, + supervisor: Option<&String>, + failover_policy: FailoverPolicy, + readonly: bool, + ) -> Result { + let session = FuseSession::new(mnt, "rafs", "", readonly).map_err(|e| eother!(e))?; + let upgrade_mgr = supervisor + .as_ref() + .map(|s| Mutex::new(UpgradeManager::new(s.to_string().into()))); + + Ok(FusedevFsService { + vfs: vfs.clone(), + conn: AtomicU64::new(0), + failover_policy, + session: Mutex::new(session), + server: Arc::new(Server::new(vfs)), + upgrade_mgr, + + backend_collection: Default::default(), + inflight_ops: Default::default(), + }) + } + + fn create_fuse_server(&self) -> Result { + FuseServer::new(self.server.clone(), self.session.lock().unwrap().deref()) + } + + fn create_inflight_op(&self) -> FuseOpWrapper { + let inflight_op = FuseOpWrapper::default(); + + // "Not expected poisoned lock" + self.inflight_ops.lock().unwrap().push(inflight_op.clone()); + + inflight_op + } + + fn umount(&self) -> NydusResult<()> { + let mut session = self.session.lock().expect("Not expect poisoned lock."); + session.umount().map_err(NydusError::SessionShutdown)?; + session.wake().map_err(NydusError::SessionShutdown)?; + Ok(()) + } +} + +impl FsService for FusedevFsService { + fn get_vfs(&self) -> &Vfs { + &self.vfs + } + + fn upgrade_mgr(&self) -> Option> { + self.upgrade_mgr.as_ref().map(|mgr| mgr.lock().unwrap()) + } + + fn backend_collection(&self) -> MutexGuard { + self.backend_collection.lock().unwrap() + } + + fn export_inflight_ops(&self) -> NydusResult> { + let ops = self.inflight_ops.lock().unwrap(); + + let r = ops + .iter() + .filter(|w| w.op.lock().unwrap().is_some()) + .map(|w| &w.op) + .collect::>>>>(); + + if r.is_empty() { + Ok(None) + } else { + let resp = serde_json::to_string(&r).map_err(NydusError::Serde)?; + Ok(Some(resp)) + } + } + + fn as_any(&self) -> &dyn Any { + self + } +} + +/// Nydus daemon to implement FUSE servers by accessing `/dev/fuse`. +/// +/// One FUSE mountpoint will be created for each [FusedevDaemon] object. Every [FusedevDaemon] +/// object has a built-in [Vfs](https://docs.rs/fuse-backend-rs/latest/fuse_backend_rs/api/vfs/struct.Vfs.html) +/// object, which can be used to mount multiple RAFS and/or passthroughfs instances. +pub struct FusedevDaemon { + bti: BuildTimeInfo, + id: Option, + request_sender: Arc>>, + result_receiver: Mutex>>, + service: Arc, + state: AtomicI32, + pub supervisor: Option, + threads_cnt: u32, + state_machine_thread: Mutex>>>, + fuse_service_threads: Mutex>>>, + waker: Arc, +} + +impl FusedevDaemon { + /// Create a new instance of [FusedevDaemon]. + #[allow(clippy::too_many_arguments)] + pub fn new( + trigger: Sender, + receiver: Receiver>, + vfs: Arc, + mountpoint: &Path, + threads_cnt: u32, + waker: Arc, + bti: BuildTimeInfo, + id: Option, + supervisor: Option, + readonly: bool, + fp: FailoverPolicy, + ) -> Result { + let service = FusedevFsService::new(vfs, mountpoint, supervisor.as_ref(), fp, readonly)?; + + Ok(FusedevDaemon { + bti, + id, + supervisor, + threads_cnt, + waker, + + state: AtomicI32::new(DaemonState::INIT as i32), + result_receiver: Mutex::new(receiver), + request_sender: Arc::new(Mutex::new(trigger)), + service: Arc::new(service), + state_machine_thread: Mutex::new(None), + fuse_service_threads: Mutex::new(Vec::new()), + }) + } + + fn kick_one_server(&self, waker: Arc) -> NydusResult<()> { + let mut s = self + .service + .create_fuse_server() + .map_err(NydusError::CreateFuseServer)?; + let inflight_op = self.service.create_inflight_op(); + let thread = thread::Builder::new() + .name("fuse_server".to_string()) + .spawn(move || { + if let Err(_err) = s.svc_loop(&inflight_op) { + // Notify the daemon controller that one working thread has exited. + if let Err(err) = waker.wake() { + error!("fail to exit daemon, error: {:?}", err); + } + } + Ok(()) + }) + .map_err(NydusError::ThreadSpawn)?; + + self.fuse_service_threads.lock().unwrap().push(thread); + + Ok(()) + } +} + +impl DaemonStateMachineSubscriber for FusedevDaemon { + fn on_event(&self, event: DaemonStateMachineInput) -> NydusResult<()> { + self.request_sender + .lock() + .unwrap() + .send(event) + .map_err(NydusError::ChannelSend)?; + + self.result_receiver + .lock() + .expect("Not expect poisoned lock!") + .recv() + .map_err(NydusError::ChannelReceive)? + } +} + +impl NydusDaemon for FusedevDaemon { + fn as_any(&self) -> &dyn Any { + self + } + + fn id(&self) -> Option { + self.id.clone() + } + + fn version(&self) -> BuildTimeInfo { + self.bti.clone() + } + + fn get_state(&self) -> DaemonState { + self.state.load(Ordering::Relaxed).into() + } + + fn set_state(&self, state: DaemonState) { + self.state.store(state as i32, Ordering::Relaxed); + } + + fn start(&self) -> NydusResult<()> { + info!( + "start fuse servers with {} worker threads", + self.threads_cnt + ); + for _ in 0..self.threads_cnt { + let waker = self.waker.clone(); + self.kick_one_server(waker) + .map_err(|e| NydusError::StartService(format!("{}", e)))?; + } + + Ok(()) + } + + fn umount(&self) -> NydusResult<()> { + self.service.umount() + } + + fn stop(&self) { + let session = self + .service + .session + .lock() + .expect("Not expect poisoned lock."); + if let Err(e) = session.wake().map_err(NydusError::SessionShutdown) { + error!("failed to stop FUSE service thread: {:?}", e); + } + } + + fn wait(&self) -> NydusResult<()> { + self.wait_state_machine()?; + self.wait_service() + } + + fn wait_service(&self) -> NydusResult<()> { + loop { + let handle = self.fuse_service_threads.lock().unwrap().pop(); + if let Some(handle) = handle { + handle + .join() + .map_err(|e| { + let e = *e + .downcast::() + .unwrap_or_else(|e| Box::new(eother!(e))); + NydusError::WaitDaemon(e) + })? + .map_err(NydusError::WaitDaemon)?; + } else { + // No more handles to wait + break; + } + } + + Ok(()) + } + + fn wait_state_machine(&self) -> NydusResult<()> { + let mut guard = self.state_machine_thread.lock().unwrap(); + if let Some(handler) = guard.take() { + let result = handler.join().map_err(|e| { + let e = *e + .downcast::() + .unwrap_or_else(|e| Box::new(eother!(e))); + NydusError::WaitDaemon(e) + })?; + result.map_err(NydusError::WaitDaemon) + } else { + Ok(()) + } + } + + fn supervisor(&self) -> Option { + self.supervisor.clone() + } + + fn save(&self) -> NydusResult<()> { + upgrade::fusedev_upgrade::save(self) + } + + fn restore(&self) -> NydusResult<()> { + upgrade::fusedev_upgrade::restore(self) + } + + fn get_default_fs_service(&self) -> Option> { + Some(self.service.clone()) + } +} + +#[cfg(target_os = "macos")] +fn is_mounted(mp: impl AsRef) -> Result { + let mp = mp + .as_ref() + .to_str() + .ok_or_else(|| Error::from_raw_os_error(libc::EINVAL))?; + let mp = CString::new(String::from(mp)).map_err(|_| Error::from_raw_os_error(libc::EINVAL))?; + let mut mpb: Vec = Vec::new(); + let mut mpb_ptr = mpb.as_mut_ptr(); + let mpb_ptr = &mut mpb_ptr; + + let mpb: Vec = unsafe { + let res = libc::getmntinfo(mpb_ptr, libc::MNT_NOWAIT); + if res < 0 { + return Err(Error::from_raw_os_error(res)); + } + let size = res as usize; + Vec::from_raw_parts(*mpb_ptr, size, size) + }; + let match_mp = mpb.iter().find(|mp_stat| unsafe { + let mp_name = CStr::from_ptr(&mp_stat.f_mntonname as *const i8); + let mp = CStr::from_ptr(mp.as_ptr()); + mp.eq(mp_name) + }); + + Ok(match_mp.is_some()) +} + +// TODO: Perhaps, we can't rely on `/proc/self/mounts` to tell if it is mounted. +#[cfg(target_os = "linux")] +fn is_mounted(mp: impl AsRef) -> Result { + let mounts = CString::new("/proc/self/mounts").unwrap(); + let ty = CString::new("r").unwrap(); + + let mounts_stream = unsafe { + libc::setmntent( + mounts.as_ptr() as *const libc::c_char, + ty.as_ptr() as *const libc::c_char, + ) + }; + + loop { + let mnt = unsafe { libc::getmntent(mounts_stream) }; + if mnt as u32 == libc::PT_NULL { + break; + } + + // Mount point path + if unsafe { CStr::from_ptr((*mnt).mnt_dir) } + == CString::new(mp.as_ref().as_os_str().as_bytes())?.as_c_str() + { + unsafe { libc::endmntent(mounts_stream) }; + return Ok(true); + } + } + + unsafe { libc::endmntent(mounts_stream) }; + + Ok(false) +} + +fn is_sock_residual(sock: impl AsRef) -> bool { + if metadata(&sock).is_ok() { + return UnixStream::connect(&sock).is_err(); + } + + false +} + +/// When nydusd starts, it checks that whether a previous nydusd died unexpected by: +/// 1. Checking whether the mount point is residual by retrieving `/proc/self/mounts`. +/// 2. Checking whether the API socket exists and the connection can established or not. +fn is_crashed(path: impl AsRef, sock: &impl AsRef) -> Result { + if is_mounted(path)? && is_sock_residual(sock) { + warn!("A previous daemon crashed! Try to failover later."); + return Ok(true); + } + + Ok(false) +} + +#[cfg(target_os = "macos")] +fn calc_fuse_conn(mp: impl AsRef) -> Result { + let st = metadata(mp.as_ref()).map_err(|e| { + error!("Stat mountpoint {:?}, {}", mp.as_ref(), &e); + e + })?; + Ok(st.dev()) +} + +#[cfg(target_os = "linux")] +fn calc_fuse_conn(mp: impl AsRef) -> Result { + let st = metadata(mp.as_ref()).map_err(|e| { + error!("Stat mountpoint {:?}, {}", mp.as_ref(), &e); + e + })?; + let dev = st.st_dev(); + let (major, minor) = (major(dev), minor(dev)); + + // According to kernel formula: MKDEV(ma,mi) (((ma) << 20) | (mi)) + Ok(major << 20 | minor) +} + +/// Create and start a [FusedevDaemon] instance. +#[allow(clippy::too_many_arguments)] +pub fn create_fuse_daemon( + mountpoint: &str, + vfs: Arc, + supervisor: Option, + id: Option, + threads_cnt: u32, + waker: Arc, + api_sock: Option>, + upgrade: bool, + readonly: bool, + fp: FailoverPolicy, + mount_cmd: Option, + bti: BuildTimeInfo, +) -> Result> { + let mnt = Path::new(mountpoint).canonicalize()?; + let (trigger, events_rx) = channel::(); + let (result_sender, result_receiver) = channel::>(); + let daemon = FusedevDaemon::new( + trigger, + result_receiver, + vfs, + &mnt, + threads_cnt, + waker, + bti, + id, + supervisor, + readonly, + fp, + )?; + let daemon = Arc::new(daemon); + let machine = DaemonStateMachineContext::new(daemon.clone(), events_rx, result_sender); + let machine_thread = machine.kick_state_machine()?; + *daemon.state_machine_thread.lock().unwrap() = Some(machine_thread); + + // Without api socket, nydusd can't do neither live-upgrade nor failover, so the helper + // finding a victim is not necessary. + if (api_sock.as_ref().is_some() && !upgrade && !is_crashed(&mnt, api_sock.as_ref().unwrap())?) + || api_sock.is_none() + { + if let Some(cmd) = mount_cmd { + daemon.service.mount(cmd).map_err(|e| { + error!("service mount error: {}", &e); + eother!(e) + })?; + } + daemon + .service + .session + .lock() + .unwrap() + .mount() + .map_err(|e| { + error!("service session mount error: {}", &e); + eother!(e) + })?; + + daemon + .on_event(DaemonStateMachineInput::Mount) + .map_err(|e| eother!(e))?; + daemon + .on_event(DaemonStateMachineInput::Start) + .map_err(|e| eother!(e))?; + daemon + .service + .conn + .store(calc_fuse_conn(mnt)?, Ordering::Relaxed); + + if let Some(f) = daemon.service.session.lock().unwrap().get_fuse_file() { + if let Some(mut m) = daemon.service.upgrade_mgr() { + m.hold_file(f).map_err(|e| { + error!("Failed to hold fusedev fd, {:?}", e); + eother!(e) + })?; + m.save_fuse_cid(daemon.service.conn.load(Ordering::Acquire)); + } + } + } + + Ok(daemon) +} + +/// Create vfs backend with rafs or passthrough as the fuse filesystem driver + +#[cfg(target_os = "macos")] +pub fn create_vfs_backend( + _fs_type: FsBackendType, + _is_fuse: bool, + _hybrid_mode: bool, +) -> Result> { + let vfs = fuse_backend_rs::api::Vfs::new(fuse_backend_rs::api::VfsOptions::default()); + Ok(Arc::new(vfs)) +} + +#[cfg(target_os = "linux")] +pub fn create_vfs_backend( + fs_type: FsBackendType, + is_fuse: bool, + hybrid_mode: bool, +) -> Result> { + let mut opts = fuse_backend_rs::api::VfsOptions::default(); + match fs_type { + FsBackendType::PassthroughFs => { + // passthroughfs requires !no_open + opts.no_open = false; + opts.no_opendir = false; + opts.killpriv_v2 = true; + } + FsBackendType::Rafs => { + // rafs can be readonly and skip open + opts.no_open = true; + } + }; + + if !is_fuse && hybrid_mode { + opts.no_open = false; + opts.no_opendir = false; + opts.killpriv_v2 = true; + } + + let vfs = fuse_backend_rs::api::Vfs::new(opts); + Ok(Arc::new(vfs)) +} diff --git a/service/src/lib.rs b/service/src/lib.rs index 8e47b27b2fd..27e04ab1389 100644 --- a/service/src/lib.rs +++ b/service/src/lib.rs @@ -1,294 +1,294 @@ -// Copyright 2021 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Nydus Image Service Management Framework -//! -//! The `nydus-service` crate provides facilities to manage Nydus services, such as: -//! - `blobfs`: share processed RAFS metadata/data blobs to guest by virtio-fs, so the RAFS -//! filesystem can be mounted by EROFS inside guest. -//! - `blockdev`: compose processed RAFS metadata/data as a block device, so it can be used as -//! backend for virtio-blk. -//! - `fscache`: cooperate Linux fscache subsystem to mount RAFS filesystems by EROFS. -//! - `fuse`: mount RAFS filesystems as FUSE filesystems. - -#[macro_use] -extern crate log; -#[macro_use] -extern crate nydus_api; - -use std::fmt::{self, Display}; -use std::io; -use std::str::FromStr; -use std::sync::mpsc::{RecvError, SendError}; - -use fuse_backend_rs::api::vfs::VfsError; -use fuse_backend_rs::transport::Error as FuseTransportError; -use fuse_backend_rs::Error as FuseError; -use nydus_api::{ConfigV2, DaemonErrorKind}; -use nydus_rafs::RafsError; -use serde::{Deserialize, Serialize}; -use serde_json::Error as SerdeError; -use versionize::{VersionMap, Versionize, VersionizeError, VersionizeResult}; -use versionize_derive::Versionize; - -pub mod daemon; -mod fs_service; -mod fusedev; -mod singleton; -pub mod upgrade; - -pub use blob_cache::BlobCacheMgr; -pub use fs_service::{FsBackendCollection, FsBackendMountCmd, FsBackendUmountCmd, FsService}; -pub use fusedev::{create_fuse_daemon, create_vfs_backend, FusedevDaemon}; -pub use singleton::create_daemon; - -#[cfg(target_os = "linux")] -pub mod blob_cache; -#[cfg(all(target_os = "linux", feature = "block-device"))] -pub mod block_device; -#[cfg(all(target_os = "linux", feature = "block-nbd"))] -pub mod block_nbd; -#[cfg(target_os = "linux")] -mod fs_cache; - -#[cfg(target_os = "linux")] -pub use fs_cache::FsCacheHandler; - -/// Error code related to Nydus library. -#[derive(thiserror::Error, Debug)] -pub enum Error { - #[error("object or filesystem already exists")] - AlreadyExists, - /// Invalid arguments provided. - #[error("invalid argument `{0}`")] - InvalidArguments(String), - #[error("invalid configuration, {0}")] - InvalidConfig(String), - #[error("invalid prefetch file list")] - InvalidPrefetchList, - #[error("object or filesystem doesn't exist")] - NotFound, - #[error("daemon is not ready yet")] - NotReady, - #[error("unsupported request or operation")] - Unsupported, - #[error("failed to serialize/deserialize message, {0}")] - Serde(SerdeError), - #[error("failed to spawn thread, {0}")] - ThreadSpawn(io::Error), - #[error("failed to send message to channel, {0}")] - ChannelSend(#[from] SendError), - #[error("failed to receive message from channel, {0}")] - ChannelReceive(#[from] RecvError), - #[error("failed to upgrade nydusd daemon, {0}")] - UpgradeManager(upgrade::UpgradeMgrError), - #[error("failed to start service, {0}")] - StartService(String), - /// Input event to stat-machine is not expected. - #[error("unexpect state machine transition event `{0:?}`")] - UnexpectedEvent(crate::daemon::DaemonStateMachineInput), - #[error("failed to wait daemon, {0}")] - WaitDaemon(#[source] io::Error), - - #[error("filesystem type mismatch, expect {0}")] - FsTypeMismatch(String), - #[error("passthroughfs failed to handle request, {0}")] - PassthroughFs(#[source] io::Error), - #[error("RAFS failed to handle request, {0}")] - Rafs(#[from] RafsError), - #[error("VFS failed to handle request, {0:?}")] - Vfs(#[from] VfsError), - - // fusedev - #[error("failed to create FUSE server, {0}")] - CreateFuseServer(io::Error), - // Fuse session has been shutdown. - #[error("FUSE session has been shut down, {0}")] - SessionShutdown(FuseTransportError), - - // virtio-fs - #[error("failed to handle event other than input event")] - HandleEventNotEpollIn, - #[error("failed to handle unknown event")] - HandleEventUnknownEvent, - #[error("fail to walk descriptor chain")] - IterateQueue, - #[error("invalid Virtio descriptor chain, {0}")] - InvalidDescriptorChain(#[from] FuseTransportError), - #[error("failed to process FUSE request, {0}")] - ProcessQueue(#[from] FuseError), - #[error("failed to create epoll context, {0}")] - Epoll(#[source] io::Error), - #[error("vhost-user failed to process request, {0}")] - VhostUser(String), - #[error("missing memory configuration for virtio queue")] - QueueMemoryUnset, -} - -impl From for io::Error { - fn from(e: Error) -> Self { - einval!(e) - } -} - -impl From for DaemonErrorKind { - fn from(e: Error) -> Self { - use Error::*; - match e { - UpgradeManager(e) => DaemonErrorKind::UpgradeManager(format!("{:?}", e)), - NotReady => DaemonErrorKind::NotReady, - Unsupported => DaemonErrorKind::Unsupported, - Serde(e) => DaemonErrorKind::Serde(e), - UnexpectedEvent(e) => DaemonErrorKind::UnexpectedEvent(format!("{:?}", e)), - o => DaemonErrorKind::Other(o.to_string()), - } - } -} - -/// Specialized `Result` for Nydus library. -pub type Result = std::result::Result; - -/// Type of supported backend filesystems. -#[derive(Clone, Debug, Serialize, PartialEq, Deserialize, Versionize)] -pub enum FsBackendType { - /// Registry Accelerated File System - Rafs, - /// Share an underlying directory as a FUSE filesystem. - PassthroughFs, -} - -impl FromStr for FsBackendType { - type Err = Error; - - fn from_str(s: &str) -> Result { - match s { - "rafs" => Ok(FsBackendType::Rafs), - "passthrough" => Ok(FsBackendType::PassthroughFs), - "passthroughfs" => Ok(FsBackendType::PassthroughFs), - "passthrough_fs" => Ok(FsBackendType::PassthroughFs), - o => Err(Error::InvalidArguments(format!( - "only 'rafs' and 'passthrough_fs' are supported, but {} was specified", - o - ))), - } - } -} - -impl Display for FsBackendType { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{:?}", self) - } -} - -/// Backend filesystem descriptor. -#[derive(Serialize, Clone, Deserialize)] -pub struct FsBackendDescriptor { - /// Type of backend filesystem. - pub backend_type: FsBackendType, - /// Mount point for the filesystem. - pub mountpoint: String, - /// Timestamp for the mount operation. - pub mounted_time: time::OffsetDateTime, - /// Optional configuration information for the backend filesystem. - pub config: Option, -} - -/// Validate thread number configuration, valid range is `[1-1024]`. -pub fn validate_threads_configuration>(v: V) -> std::result::Result { - if let Ok(t) = v.as_ref().parse::() { - if t > 0 && t <= 1024 { - Ok(t) - } else { - Err(format!( - "invalid thread number {}, valid range: [1-1024]", - t - )) - } - } else { - Err(format!( - "invalid thread number configuration: {}", - v.as_ref() - )) - } -} - -/// Trait to get configuration options for services. -pub trait ServiceArgs { - /// Get value of commandline option `key`. - fn value_of(&self, key: &str) -> Option<&String>; - - /// Check whether commandline optio `key` is present. - fn is_present(&self, key: &str) -> bool; -} - -#[cfg(not(target_os = "linux"))] -mod blob_cache { - use super::*; - - pub struct BlobCacheMgr {} - - impl Default for BlobCacheMgr { - fn default() -> Self { - Self::new() - } - } - - impl BlobCacheMgr { - pub fn new() -> Self { - BlobCacheMgr {} - } - - pub fn add_blob_list(&self, _blobs: &nydus_api::BlobCacheList) -> io::Result<()> { - unimplemented!() - } - - pub fn add_blob_entry(&self, _entry: &nydus_api::BlobCacheEntry) -> Result<()> { - unimplemented!() - } - - pub fn remove_blob_entry(&self, _param: &nydus_api::BlobCacheObjectId) -> Result<()> { - unimplemented!() - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_backend_fs_type() { - assert_eq!( - FsBackendType::from_str("rafs").unwrap(), - FsBackendType::Rafs - ); - assert_eq!( - FsBackendType::from_str("passthrough").unwrap(), - FsBackendType::PassthroughFs - ); - assert_eq!( - FsBackendType::from_str("passthroughfs").unwrap(), - FsBackendType::PassthroughFs - ); - assert_eq!( - FsBackendType::from_str("passthrough_fs").unwrap(), - FsBackendType::PassthroughFs - ); - assert!(FsBackendType::from_str("passthroug").is_err()); - - assert_eq!(format!("{}", FsBackendType::Rafs), "Rafs"); - assert_eq!(format!("{}", FsBackendType::PassthroughFs), "PassthroughFs"); - } - - #[test] - fn test_validate_thread_configuration() { - assert_eq!(validate_threads_configuration("1").unwrap(), 1); - assert_eq!(validate_threads_configuration("1024").unwrap(), 1024); - assert!(validate_threads_configuration("0").is_err()); - assert!(validate_threads_configuration("-1").is_err()); - assert!(validate_threads_configuration("1.0").is_err()); - assert!(validate_threads_configuration("1025").is_err()); - assert!(validate_threads_configuration("test").is_err()); - } -} +// Copyright 2021 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Nydus Image Service Management Framework +//! +//! The `nydus-service` crate provides facilities to manage Nydus services, such as: +//! - `blobfs`: share processed RAFS metadata/data blobs to guest by virtio-fs, so the RAFS +//! filesystem can be mounted by EROFS inside guest. +//! - `blockdev`: compose processed RAFS metadata/data as a block device, so it can be used as +//! backend for virtio-blk. +//! - `fscache`: cooperate Linux fscache subsystem to mount RAFS filesystems by EROFS. +//! - `fuse`: mount RAFS filesystems as FUSE filesystems. + +#[macro_use] +extern crate log; +#[macro_use] +extern crate nydus_api; + +use std::fmt::{self, Display}; +use std::io; +use std::str::FromStr; +use std::sync::mpsc::{RecvError, SendError}; + +use fuse_backend_rs::api::vfs::VfsError; +use fuse_backend_rs::transport::Error as FuseTransportError; +use fuse_backend_rs::Error as FuseError; +use nydus_api::{ConfigV2, DaemonErrorKind}; +use nydus_rafs::RafsError; +use serde::{Deserialize, Serialize}; +use serde_json::Error as SerdeError; +use versionize::{VersionMap, Versionize, VersionizeError, VersionizeResult}; +use versionize_derive::Versionize; + +pub mod daemon; +mod fs_service; +mod fusedev; +mod singleton; +pub mod upgrade; + +pub use blob_cache::BlobCacheMgr; +pub use fs_service::{FsBackendCollection, FsBackendMountCmd, FsBackendUmountCmd, FsService}; +pub use fusedev::{create_fuse_daemon, create_vfs_backend, FusedevDaemon}; +pub use singleton::create_daemon; + +#[cfg(target_os = "linux")] +pub mod blob_cache; +#[cfg(all(target_os = "linux", feature = "block-device"))] +pub mod block_device; +#[cfg(all(target_os = "linux", feature = "block-nbd"))] +pub mod block_nbd; +#[cfg(target_os = "linux")] +mod fs_cache; + +#[cfg(target_os = "linux")] +pub use fs_cache::FsCacheHandler; + +/// Error code related to Nydus library. +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("object or filesystem already exists")] + AlreadyExists, + /// Invalid arguments provided. + #[error("invalid argument `{0}`")] + InvalidArguments(String), + #[error("invalid configuration, {0}")] + InvalidConfig(String), + #[error("invalid prefetch file list")] + InvalidPrefetchList, + #[error("object or filesystem doesn't exist")] + NotFound, + #[error("daemon is not ready yet")] + NotReady, + #[error("unsupported request or operation")] + Unsupported, + #[error("failed to serialize/deserialize message, {0}")] + Serde(SerdeError), + #[error("failed to spawn thread, {0}")] + ThreadSpawn(io::Error), + #[error("failed to send message to channel, {0}")] + ChannelSend(#[from] SendError), + #[error("failed to receive message from channel, {0}")] + ChannelReceive(#[from] RecvError), + #[error("failed to upgrade nydusd daemon, {0}")] + UpgradeManager(upgrade::UpgradeMgrError), + #[error("failed to start service, {0}")] + StartService(String), + /// Input event to stat-machine is not expected. + #[error("unexpect state machine transition event `{0:?}`")] + UnexpectedEvent(crate::daemon::DaemonStateMachineInput), + #[error("failed to wait daemon, {0}")] + WaitDaemon(#[source] io::Error), + + #[error("filesystem type mismatch, expect {0}")] + FsTypeMismatch(String), + #[error("passthroughfs failed to handle request, {0}")] + PassthroughFs(#[source] io::Error), + #[error("RAFS failed to handle request, {0}")] + Rafs(#[from] RafsError), + #[error("VFS failed to handle request, {0:?}")] + Vfs(#[from] VfsError), + + // fusedev + #[error("failed to create FUSE server, {0}")] + CreateFuseServer(io::Error), + // Fuse session has been shutdown. + #[error("FUSE session has been shut down, {0}")] + SessionShutdown(FuseTransportError), + + // virtio-fs + #[error("failed to handle event other than input event")] + HandleEventNotEpollIn, + #[error("failed to handle unknown event")] + HandleEventUnknownEvent, + #[error("fail to walk descriptor chain")] + IterateQueue, + #[error("invalid Virtio descriptor chain, {0}")] + InvalidDescriptorChain(#[from] FuseTransportError), + #[error("failed to process FUSE request, {0}")] + ProcessQueue(#[from] FuseError), + #[error("failed to create epoll context, {0}")] + Epoll(#[source] io::Error), + #[error("vhost-user failed to process request, {0}")] + VhostUser(String), + #[error("missing memory configuration for virtio queue")] + QueueMemoryUnset, +} + +impl From for io::Error { + fn from(e: Error) -> Self { + einval!(e) + } +} + +impl From for DaemonErrorKind { + fn from(e: Error) -> Self { + use Error::*; + match e { + UpgradeManager(e) => DaemonErrorKind::UpgradeManager(format!("{:?}", e)), + NotReady => DaemonErrorKind::NotReady, + Unsupported => DaemonErrorKind::Unsupported, + Serde(e) => DaemonErrorKind::Serde(e), + UnexpectedEvent(e) => DaemonErrorKind::UnexpectedEvent(format!("{:?}", e)), + o => DaemonErrorKind::Other(o.to_string()), + } + } +} + +/// Specialized `Result` for Nydus library. +pub type Result = std::result::Result; + +/// Type of supported backend filesystems. +#[derive(Clone, Debug, Serialize, PartialEq, Deserialize, Versionize)] +pub enum FsBackendType { + /// Registry Accelerated File System + Rafs, + /// Share an underlying directory as a FUSE filesystem. + PassthroughFs, +} + +impl FromStr for FsBackendType { + type Err = Error; + + fn from_str(s: &str) -> Result { + match s { + "rafs" => Ok(FsBackendType::Rafs), + "passthrough" => Ok(FsBackendType::PassthroughFs), + "passthroughfs" => Ok(FsBackendType::PassthroughFs), + "passthrough_fs" => Ok(FsBackendType::PassthroughFs), + o => Err(Error::InvalidArguments(format!( + "only 'rafs' and 'passthrough_fs' are supported, but {} was specified", + o + ))), + } + } +} + +impl Display for FsBackendType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:?}", self) + } +} + +/// Backend filesystem descriptor. +#[derive(Serialize, Clone, Deserialize)] +pub struct FsBackendDescriptor { + /// Type of backend filesystem. + pub backend_type: FsBackendType, + /// Mount point for the filesystem. + pub mountpoint: String, + /// Timestamp for the mount operation. + pub mounted_time: time::OffsetDateTime, + /// Optional configuration information for the backend filesystem. + pub config: Option, +} + +/// Validate thread number configuration, valid range is `[1-1024]`. +pub fn validate_threads_configuration>(v: V) -> std::result::Result { + if let Ok(t) = v.as_ref().parse::() { + if t > 0 && t <= 1024 { + Ok(t) + } else { + Err(format!( + "invalid thread number {}, valid range: [1-1024]", + t + )) + } + } else { + Err(format!( + "invalid thread number configuration: {}", + v.as_ref() + )) + } +} + +/// Trait to get configuration options for services. +pub trait ServiceArgs { + /// Get value of commandline option `key`. + fn value_of(&self, key: &str) -> Option<&String>; + + /// Check whether commandline optio `key` is present. + fn is_present(&self, key: &str) -> bool; +} + +#[cfg(not(target_os = "linux"))] +mod blob_cache { + use super::*; + + pub struct BlobCacheMgr {} + + impl Default for BlobCacheMgr { + fn default() -> Self { + Self::new() + } + } + + impl BlobCacheMgr { + pub fn new() -> Self { + BlobCacheMgr {} + } + + pub fn add_blob_list(&self, _blobs: &nydus_api::BlobCacheList) -> io::Result<()> { + unimplemented!() + } + + pub fn add_blob_entry(&self, _entry: &nydus_api::BlobCacheEntry) -> Result<()> { + unimplemented!() + } + + pub fn remove_blob_entry(&self, _param: &nydus_api::BlobCacheObjectId) -> Result<()> { + unimplemented!() + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_backend_fs_type() { + assert_eq!( + FsBackendType::from_str("rafs").unwrap(), + FsBackendType::Rafs + ); + assert_eq!( + FsBackendType::from_str("passthrough").unwrap(), + FsBackendType::PassthroughFs + ); + assert_eq!( + FsBackendType::from_str("passthroughfs").unwrap(), + FsBackendType::PassthroughFs + ); + assert_eq!( + FsBackendType::from_str("passthrough_fs").unwrap(), + FsBackendType::PassthroughFs + ); + assert!(FsBackendType::from_str("passthroug").is_err()); + + assert_eq!(format!("{}", FsBackendType::Rafs), "Rafs"); + assert_eq!(format!("{}", FsBackendType::PassthroughFs), "PassthroughFs"); + } + + #[test] + fn test_validate_thread_configuration() { + assert_eq!(validate_threads_configuration("1").unwrap(), 1); + assert_eq!(validate_threads_configuration("1024").unwrap(), 1024); + assert!(validate_threads_configuration("0").is_err()); + assert!(validate_threads_configuration("-1").is_err()); + assert!(validate_threads_configuration("1.0").is_err()); + assert!(validate_threads_configuration("1025").is_err()); + assert!(validate_threads_configuration("test").is_err()); + } +} diff --git a/service/src/singleton.rs b/service/src/singleton.rs index 6546c93d3ad..8a422b553e4 100644 --- a/service/src/singleton.rs +++ b/service/src/singleton.rs @@ -1,504 +1,504 @@ -// Copyright (C) 2022 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: (Apache-2.0 AND BSD-3-Clause) - -//! Nydus daemon to host multiple services, including fscache and fusedev. - -use std::any::Any; -use std::fs::metadata; -#[cfg(target_os = "linux")] -use std::fs::{File, OpenOptions}; -use std::os::unix::net::UnixStream; -use std::path::Path; -use std::sync::atomic::{AtomicBool, AtomicI32, Ordering}; -use std::sync::mpsc::{channel, Receiver, Sender}; -use std::sync::{Arc, Mutex, MutexGuard}; - -use mio::Waker; -use nydus_api::config::BlobCacheList; -use nydus_api::BuildTimeInfo; - -use crate::daemon::{ - DaemonState, DaemonStateMachineContext, DaemonStateMachineInput, DaemonStateMachineSubscriber, - NydusDaemon, -}; -use crate::fs_service::FsService; -#[cfg(target_os = "linux")] -use crate::upgrade; -use crate::upgrade::UpgradeManager; -use crate::{BlobCacheMgr, Error, Result}; - -#[allow(dead_code)] -pub struct ServiceController { - bti: BuildTimeInfo, - id: Option, - request_sender: Arc>>, - result_receiver: Mutex>>, - state: AtomicI32, - supervisor: Option, - waker: Arc, - - blob_cache_mgr: Arc, - upgrade_mgr: Option>, - fscache_enabled: AtomicBool, - #[cfg(target_os = "linux")] - fscache: Mutex>>, -} - -impl ServiceController { - /// Start all enabled services. - fn start_services(&self) -> std::io::Result<()> { - info!("Starting all Nydus services..."); - - #[cfg(target_os = "linux")] - if self.fscache_enabled.load(Ordering::Acquire) { - if let Some(fscache) = self.fscache.lock().unwrap().clone() { - for _ in 0..fscache.working_threads() { - let fscache2 = fscache.clone(); - let waker = self.waker.clone(); - std::thread::spawn(move || { - if let Err(e) = fscache2.run_loop() { - error!("Failed to run fscache service loop, {}", e); - } - // Notify the global service controller that one working thread is exiting. - if let Err(err) = waker.wake() { - error!("fail to exit daemon, error: {:?}", err); - } - }); - } - } - } - - Ok(()) - } - - /// Stop all enabled services. - fn stop_services(&self) { - info!("Stopping all Nydus services..."); - - #[cfg(target_os = "linux")] - if self.fscache_enabled.load(Ordering::Acquire) { - if let Some(fscache) = self.fscache.lock().unwrap().take() { - fscache.stop(); - } - } - } - - fn initialize_blob_cache(&self, config: &Option) -> std::io::Result<()> { - // Create blob cache objects configured by the configuration file. - if let Some(config) = config { - if let Some(config1) = config.as_object() { - if config1.contains_key("blobs") { - if let Ok(v) = serde_json::from_value::(config.clone()) { - if let Err(e) = self.blob_cache_mgr.add_blob_list(&v) { - error!("Failed to add blob list: {}", e); - return Err(e); - } - } - } - } - } - - Ok(()) - } -} - -#[cfg(target_os = "linux")] -impl ServiceController { - pub fn initialize_fscache_service( - &self, - tag: Option<&str>, - threads: usize, - path: &str, - file: Option<&File>, - ) -> std::io::Result<()> { - // Validate --fscache option value is an existing directory. - let p = match std::path::Path::new(&path).canonicalize() { - Err(e) => { - error!("--fscache option needs a directory to cache files"); - return Err(e); - } - Ok(v) => { - if !v.is_dir() { - error!("--fscache options needs a directory to cache files"); - return Err(einval!("--fscache options is not a directory")); - } - v - } - }; - let p = match p.to_str() { - Some(v) => v, - None => { - error!("--fscache option contains invalid characters"); - return Err(einval!("--fscache option contains invalid characters")); - } - }; - - info!( - "Create fscache instance at {} with tag {}, {} working threads", - p, - tag.unwrap_or(""), - threads - ); - let fscache = crate::fs_cache::FsCacheHandler::new( - "/dev/cachefiles", - p, - tag, - self.blob_cache_mgr.clone(), - threads, - file, - )?; - *self.fscache.lock().unwrap() = Some(Arc::new(fscache)); - self.fscache_enabled.store(true, Ordering::Release); - - Ok(()) - } - - fn get_fscache_file(&self) -> std::io::Result { - if let Some(fscache) = self.fscache.lock().unwrap().clone() { - let f = fscache.get_file().try_clone()?; - Ok(f) - } else { - Err(einval!("fscache file not init")) - } - } -} - -impl NydusDaemon for ServiceController { - fn as_any(&self) -> &dyn Any { - self - } - - fn id(&self) -> Option { - self.id.clone() - } - - fn version(&self) -> BuildTimeInfo { - self.bti.clone() - } - - fn get_state(&self) -> DaemonState { - self.state.load(Ordering::Relaxed).into() - } - - fn set_state(&self, state: DaemonState) { - self.state.store(state as i32, Ordering::Relaxed); - } - - fn start(&self) -> Result<()> { - self.start_services() - .map_err(|e| Error::StartService(format!("{}", e))) - } - - fn umount(&self) -> Result<()> { - self.stop_services(); - Ok(()) - } - - fn wait(&self) -> Result<()> { - Ok(()) - } - - fn supervisor(&self) -> Option { - self.supervisor.clone() - } - - fn save(&self) -> Result<()> { - #[cfg(target_os = "linux")] - return upgrade::fscache_upgrade::save(self); - #[cfg(target_os = "macos")] - return Ok(()); - } - - fn restore(&self) -> Result<()> { - #[cfg(target_os = "linux")] - return upgrade::fscache_upgrade::restore(self); - #[cfg(target_os = "macos")] - return Ok(()); - } - - fn upgrade_mgr(&self) -> Option> { - self.upgrade_mgr.as_ref().map(|mgr| mgr.lock().unwrap()) - } - - fn get_default_fs_service(&self) -> Option> { - None - } - - fn get_blob_cache_mgr(&self) -> Option> { - Some(self.blob_cache_mgr.clone()) - } - - fn delete_blob(&self, _blob_id: String) -> Result<()> { - #[cfg(target_os = "linux")] - if self.fscache_enabled.load(Ordering::Acquire) { - if let Some(fscache) = self.fscache.lock().unwrap().clone() { - return fscache - .cull_cache(_blob_id) - .map_err(|e| Error::StartService(format!("{}", e))); - } - } - Err(Error::Unsupported) - } -} - -impl DaemonStateMachineSubscriber for ServiceController { - fn on_event(&self, event: DaemonStateMachineInput) -> Result<()> { - self.request_sender - .lock() - .unwrap() - .send(event) - .map_err(Error::ChannelSend)?; - - self.result_receiver - .lock() - .expect("Not expect poisoned lock!") - .recv() - .map_err(Error::ChannelReceive)? - } -} - -#[allow(unused)] -fn is_sock_residual(sock: impl AsRef) -> bool { - if metadata(&sock).is_ok() { - return UnixStream::connect(&sock).is_err(); - } - - false -} -/// When nydusd starts, it checks that whether a previous nydusd died unexpected by: -/// 1. Checking whether /dev/cachefiles can be opened. -/// 2. Checking whether the API socket exists and the connection can established or not. -fn is_crashed(_sock: &impl AsRef) -> Result { - #[cfg(target_os = "linux")] - if let Err(_e) = OpenOptions::new() - .write(true) - .read(true) - .create(false) - .open("/dev/cachefiles") - { - warn!("cachefiles devfd can not open, the devfd may hold by supervisor or another daemon."); - if is_sock_residual(_sock) { - warn!("A previous daemon crashed! Try to failover later."); - return Ok(true); - } - warn!("another daemon is running, will exit!"); - return Err(Error::Unsupported); - } - Ok(false) -} - -/// Create and start a Nydus daemon to host fscache and fusedev services. -#[allow(clippy::too_many_arguments, unused)] -pub fn create_daemon( - id: Option, - supervisor: Option, - fscache: Option<&str>, - tag: Option<&str>, - threads: Option<&str>, - config: Option, - bti: BuildTimeInfo, - waker: Arc, - api_sock: Option>, - upgrade: bool, -) -> std::io::Result> { - let (to_sm, from_client) = channel::(); - let (to_client, from_sm) = channel::>(); - let upgrade_mgr = supervisor - .as_ref() - .map(|s| Mutex::new(UpgradeManager::new(s.to_string().into()))); - - let service_controller = ServiceController { - bti, - id, - request_sender: Arc::new(Mutex::new(to_sm)), - result_receiver: Mutex::new(from_sm), - state: AtomicI32::new(DaemonState::INIT as i32), - supervisor, - waker, - - blob_cache_mgr: Arc::new(BlobCacheMgr::new()), - upgrade_mgr, - fscache_enabled: AtomicBool::new(false), - #[cfg(target_os = "linux")] - fscache: Mutex::new(None), - }; - - service_controller.initialize_blob_cache(&config)?; - - let daemon = Arc::new(service_controller); - let machine = DaemonStateMachineContext::new(daemon.clone(), from_client, to_client); - machine.kick_state_machine()?; - - // Without api socket, nydusd can't do neither live-upgrade nor failover, so the helper - // finding a victim is not necessary. - if (api_sock.as_ref().is_some() && !upgrade && !is_crashed(api_sock.as_ref().unwrap())?) - || api_sock.is_none() - { - #[cfg(target_os = "linux")] - if let Some(path) = fscache { - let threads = if let Some(threads_value) = threads { - crate::validate_threads_configuration(threads_value).map_err(|err| einval!(err))? - } else { - 1usize - }; - daemon.initialize_fscache_service(tag, threads, path, None)?; - let f = daemon.get_fscache_file()?; - if let Some(mut mgr_guard) = daemon.upgrade_mgr() { - mgr_guard.hold_file(&f).map_err(|e| { - error!("Failed to hold fscache fd, {:?}", e); - eother!(e) - })?; - mgr_guard.save_fscache_states(threads, path.to_string()); - } - } - - daemon - .on_event(DaemonStateMachineInput::Mount) - .map_err(|e| eother!(e))?; - daemon - .on_event(DaemonStateMachineInput::Start) - .map_err(|e| eother!(e))?; - } - - Ok(daemon) -} - -#[cfg(all(test, target_os = "linux"))] -mod tests { - use crate::blob_cache::generate_blob_key; - - use super::*; - use mio::{Poll, Token}; - use vmm_sys_util::tempdir::TempDir; - - fn create_service_controller() -> ServiceController { - let bti = BuildTimeInfo { - package_ver: String::from("package_ver"), - git_commit: String::from("git_commit"), - build_time: String::from("build_time"), - profile: String::from("profile"), - rustc: String::from("rustc"), - }; - - let (to_sm, _) = channel::(); - let (_, from_sm) = channel::>(); - - let poller = Poll::new().expect("Failed to create poller"); - let waker = Waker::new(poller.registry(), Token(1)).expect("Failed to create waker"); - - ServiceController { - bti, - id: Some(String::from("id")), - request_sender: Arc::new(Mutex::new(to_sm)), - result_receiver: Mutex::new(from_sm), - state: Default::default(), - supervisor: Some(String::from("supervisor")), - waker: Arc::new(waker), - blob_cache_mgr: Arc::new(BlobCacheMgr::new()), - upgrade_mgr: None, - fscache_enabled: AtomicBool::new(false), - fscache: Mutex::new(None), - } - } - - #[test] - fn test_initialize_fscache_service() { - let service_controller = create_service_controller(); - - assert!(service_controller - .initialize_fscache_service(None, 1, "some path", None) - .is_err()); - - let mut p = std::env::current_dir().unwrap(); - p.push("Cargo.toml"); - assert!(service_controller - .initialize_fscache_service(None, 1, p.to_str().unwrap(), None) - .is_err()); - - let tmp_dir = TempDir::new().unwrap(); - let dir = tmp_dir.as_path().to_str().unwrap(); - assert!(service_controller - .initialize_fscache_service(None, 1, dir, None) - .is_ok()); - - assert_eq!(service_controller.id(), Some(String::from("id"))); - assert_eq!( - service_controller.version().build_time, - String::from("build_time") - ); - assert_eq!( - service_controller.supervisor(), - Some(String::from("supervisor")) - ); - } - - fn create_factory_config() -> String { - let config = r#"{ - "blobs": [{ - "type": "bootstrap", - "id": "rafs-v6", - "domain_id": "domain2", - "config_v2": { - "version": 2, - "id": "factory1", - "backend": { - "type": "localfs", - "localfs": { - "dir": "/tmp/nydus" - } - }, - "cache": { - "type": "fscache", - "fscache": { - "work_dir": "/tmp/nydus" - } - }, - "metadata_path": "RAFS_V5" - } - }] - }"#; - config.to_string() - } - - #[test] - fn test_initialize_blob_cache() { - let service_controller = create_service_controller(); - let blob_cache_mgr = service_controller.get_blob_cache_mgr().unwrap(); - let content = create_factory_config(); - let key = generate_blob_key("domain2", "rafs-v6"); - - // test first if - assert!(service_controller.initialize_blob_cache(&None).is_ok()); - assert!(blob_cache_mgr.get_config(&key).is_none()); - - //test second if - let config = serde_json::Value::Null; - assert!(service_controller - .initialize_blob_cache(&Some(config)) - .is_ok()); - assert!(blob_cache_mgr.get_config(&key).is_none()); - - // test third if - let cfg = content.replace("blobs", "blob"); - let config: serde_json::Value = serde_json::from_str(&cfg).unwrap(); - assert!(service_controller - .initialize_blob_cache(&Some(config)) - .is_ok()); - assert!(blob_cache_mgr.get_config(&key).is_none()); - - //test fourth if - let tmp_dir = TempDir::new().unwrap(); - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let mut source_path = std::path::PathBuf::from(root_dir); - source_path.push("../tests/texture/bootstrap/rafs-v6-2.2.boot"); - let cfg = content - .replace("/tmp/nydus", tmp_dir.as_path().to_str().unwrap()) - .replace("RAFS_V5", &source_path.display().to_string()); - let config: serde_json::Value = serde_json::from_str(&cfg).unwrap(); - assert!(service_controller - .initialize_blob_cache(&Some(config)) - .is_ok()); - assert!(blob_cache_mgr.get_config(&key).is_some()); - } -} +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: (Apache-2.0 AND BSD-3-Clause) + +//! Nydus daemon to host multiple services, including fscache and fusedev. + +use std::any::Any; +use std::fs::metadata; +#[cfg(target_os = "linux")] +use std::fs::{File, OpenOptions}; +use std::os::unix::net::UnixStream; +use std::path::Path; +use std::sync::atomic::{AtomicBool, AtomicI32, Ordering}; +use std::sync::mpsc::{channel, Receiver, Sender}; +use std::sync::{Arc, Mutex, MutexGuard}; + +use mio::Waker; +use nydus_api::config::BlobCacheList; +use nydus_api::BuildTimeInfo; + +use crate::daemon::{ + DaemonState, DaemonStateMachineContext, DaemonStateMachineInput, DaemonStateMachineSubscriber, + NydusDaemon, +}; +use crate::fs_service::FsService; +#[cfg(target_os = "linux")] +use crate::upgrade; +use crate::upgrade::UpgradeManager; +use crate::{BlobCacheMgr, Error, Result}; + +#[allow(dead_code)] +pub struct ServiceController { + bti: BuildTimeInfo, + id: Option, + request_sender: Arc>>, + result_receiver: Mutex>>, + state: AtomicI32, + supervisor: Option, + waker: Arc, + + blob_cache_mgr: Arc, + upgrade_mgr: Option>, + fscache_enabled: AtomicBool, + #[cfg(target_os = "linux")] + fscache: Mutex>>, +} + +impl ServiceController { + /// Start all enabled services. + fn start_services(&self) -> std::io::Result<()> { + info!("Starting all Nydus services..."); + + #[cfg(target_os = "linux")] + if self.fscache_enabled.load(Ordering::Acquire) { + if let Some(fscache) = self.fscache.lock().unwrap().clone() { + for _ in 0..fscache.working_threads() { + let fscache2 = fscache.clone(); + let waker = self.waker.clone(); + std::thread::spawn(move || { + if let Err(e) = fscache2.run_loop() { + error!("Failed to run fscache service loop, {}", e); + } + // Notify the global service controller that one working thread is exiting. + if let Err(err) = waker.wake() { + error!("fail to exit daemon, error: {:?}", err); + } + }); + } + } + } + + Ok(()) + } + + /// Stop all enabled services. + fn stop_services(&self) { + info!("Stopping all Nydus services..."); + + #[cfg(target_os = "linux")] + if self.fscache_enabled.load(Ordering::Acquire) { + if let Some(fscache) = self.fscache.lock().unwrap().take() { + fscache.stop(); + } + } + } + + fn initialize_blob_cache(&self, config: &Option) -> std::io::Result<()> { + // Create blob cache objects configured by the configuration file. + if let Some(config) = config { + if let Some(config1) = config.as_object() { + if config1.contains_key("blobs") { + if let Ok(v) = serde_json::from_value::(config.clone()) { + if let Err(e) = self.blob_cache_mgr.add_blob_list(&v) { + error!("Failed to add blob list: {}", e); + return Err(e); + } + } + } + } + } + + Ok(()) + } +} + +#[cfg(target_os = "linux")] +impl ServiceController { + pub fn initialize_fscache_service( + &self, + tag: Option<&str>, + threads: usize, + path: &str, + file: Option<&File>, + ) -> std::io::Result<()> { + // Validate --fscache option value is an existing directory. + let p = match std::path::Path::new(&path).canonicalize() { + Err(e) => { + error!("--fscache option needs a directory to cache files"); + return Err(e); + } + Ok(v) => { + if !v.is_dir() { + error!("--fscache options needs a directory to cache files"); + return Err(einval!("--fscache options is not a directory")); + } + v + } + }; + let p = match p.to_str() { + Some(v) => v, + None => { + error!("--fscache option contains invalid characters"); + return Err(einval!("--fscache option contains invalid characters")); + } + }; + + info!( + "Create fscache instance at {} with tag {}, {} working threads", + p, + tag.unwrap_or(""), + threads + ); + let fscache = crate::fs_cache::FsCacheHandler::new( + "/dev/cachefiles", + p, + tag, + self.blob_cache_mgr.clone(), + threads, + file, + )?; + *self.fscache.lock().unwrap() = Some(Arc::new(fscache)); + self.fscache_enabled.store(true, Ordering::Release); + + Ok(()) + } + + fn get_fscache_file(&self) -> std::io::Result { + if let Some(fscache) = self.fscache.lock().unwrap().clone() { + let f = fscache.get_file().try_clone()?; + Ok(f) + } else { + Err(einval!("fscache file not init")) + } + } +} + +impl NydusDaemon for ServiceController { + fn as_any(&self) -> &dyn Any { + self + } + + fn id(&self) -> Option { + self.id.clone() + } + + fn version(&self) -> BuildTimeInfo { + self.bti.clone() + } + + fn get_state(&self) -> DaemonState { + self.state.load(Ordering::Relaxed).into() + } + + fn set_state(&self, state: DaemonState) { + self.state.store(state as i32, Ordering::Relaxed); + } + + fn start(&self) -> Result<()> { + self.start_services() + .map_err(|e| Error::StartService(format!("{}", e))) + } + + fn umount(&self) -> Result<()> { + self.stop_services(); + Ok(()) + } + + fn wait(&self) -> Result<()> { + Ok(()) + } + + fn supervisor(&self) -> Option { + self.supervisor.clone() + } + + fn save(&self) -> Result<()> { + #[cfg(target_os = "linux")] + return upgrade::fscache_upgrade::save(self); + #[cfg(target_os = "macos")] + return Ok(()); + } + + fn restore(&self) -> Result<()> { + #[cfg(target_os = "linux")] + return upgrade::fscache_upgrade::restore(self); + #[cfg(target_os = "macos")] + return Ok(()); + } + + fn upgrade_mgr(&self) -> Option> { + self.upgrade_mgr.as_ref().map(|mgr| mgr.lock().unwrap()) + } + + fn get_default_fs_service(&self) -> Option> { + None + } + + fn get_blob_cache_mgr(&self) -> Option> { + Some(self.blob_cache_mgr.clone()) + } + + fn delete_blob(&self, _blob_id: String) -> Result<()> { + #[cfg(target_os = "linux")] + if self.fscache_enabled.load(Ordering::Acquire) { + if let Some(fscache) = self.fscache.lock().unwrap().clone() { + return fscache + .cull_cache(_blob_id) + .map_err(|e| Error::StartService(format!("{}", e))); + } + } + Err(Error::Unsupported) + } +} + +impl DaemonStateMachineSubscriber for ServiceController { + fn on_event(&self, event: DaemonStateMachineInput) -> Result<()> { + self.request_sender + .lock() + .unwrap() + .send(event) + .map_err(Error::ChannelSend)?; + + self.result_receiver + .lock() + .expect("Not expect poisoned lock!") + .recv() + .map_err(Error::ChannelReceive)? + } +} + +#[allow(unused)] +fn is_sock_residual(sock: impl AsRef) -> bool { + if metadata(&sock).is_ok() { + return UnixStream::connect(&sock).is_err(); + } + + false +} +/// When nydusd starts, it checks that whether a previous nydusd died unexpected by: +/// 1. Checking whether /dev/cachefiles can be opened. +/// 2. Checking whether the API socket exists and the connection can established or not. +fn is_crashed(_sock: &impl AsRef) -> Result { + #[cfg(target_os = "linux")] + if let Err(_e) = OpenOptions::new() + .write(true) + .read(true) + .create(false) + .open("/dev/cachefiles") + { + warn!("cachefiles devfd can not open, the devfd may hold by supervisor or another daemon."); + if is_sock_residual(_sock) { + warn!("A previous daemon crashed! Try to failover later."); + return Ok(true); + } + warn!("another daemon is running, will exit!"); + return Err(Error::Unsupported); + } + Ok(false) +} + +/// Create and start a Nydus daemon to host fscache and fusedev services. +#[allow(clippy::too_many_arguments, unused)] +pub fn create_daemon( + id: Option, + supervisor: Option, + fscache: Option<&str>, + tag: Option<&str>, + threads: Option<&str>, + config: Option, + bti: BuildTimeInfo, + waker: Arc, + api_sock: Option>, + upgrade: bool, +) -> std::io::Result> { + let (to_sm, from_client) = channel::(); + let (to_client, from_sm) = channel::>(); + let upgrade_mgr = supervisor + .as_ref() + .map(|s| Mutex::new(UpgradeManager::new(s.to_string().into()))); + + let service_controller = ServiceController { + bti, + id, + request_sender: Arc::new(Mutex::new(to_sm)), + result_receiver: Mutex::new(from_sm), + state: AtomicI32::new(DaemonState::INIT as i32), + supervisor, + waker, + + blob_cache_mgr: Arc::new(BlobCacheMgr::new()), + upgrade_mgr, + fscache_enabled: AtomicBool::new(false), + #[cfg(target_os = "linux")] + fscache: Mutex::new(None), + }; + + service_controller.initialize_blob_cache(&config)?; + + let daemon = Arc::new(service_controller); + let machine = DaemonStateMachineContext::new(daemon.clone(), from_client, to_client); + machine.kick_state_machine()?; + + // Without api socket, nydusd can't do neither live-upgrade nor failover, so the helper + // finding a victim is not necessary. + if (api_sock.as_ref().is_some() && !upgrade && !is_crashed(api_sock.as_ref().unwrap())?) + || api_sock.is_none() + { + #[cfg(target_os = "linux")] + if let Some(path) = fscache { + let threads = if let Some(threads_value) = threads { + crate::validate_threads_configuration(threads_value).map_err(|err| einval!(err))? + } else { + 1usize + }; + daemon.initialize_fscache_service(tag, threads, path, None)?; + let f = daemon.get_fscache_file()?; + if let Some(mut mgr_guard) = daemon.upgrade_mgr() { + mgr_guard.hold_file(&f).map_err(|e| { + error!("Failed to hold fscache fd, {:?}", e); + eother!(e) + })?; + mgr_guard.save_fscache_states(threads, path.to_string()); + } + } + + daemon + .on_event(DaemonStateMachineInput::Mount) + .map_err(|e| eother!(e))?; + daemon + .on_event(DaemonStateMachineInput::Start) + .map_err(|e| eother!(e))?; + } + + Ok(daemon) +} + +#[cfg(all(test, target_os = "linux"))] +mod tests { + use crate::blob_cache::generate_blob_key; + + use super::*; + use mio::{Poll, Token}; + use vmm_sys_util::tempdir::TempDir; + + fn create_service_controller() -> ServiceController { + let bti = BuildTimeInfo { + package_ver: String::from("package_ver"), + git_commit: String::from("git_commit"), + build_time: String::from("build_time"), + profile: String::from("profile"), + rustc: String::from("rustc"), + }; + + let (to_sm, _) = channel::(); + let (_, from_sm) = channel::>(); + + let poller = Poll::new().expect("Failed to create poller"); + let waker = Waker::new(poller.registry(), Token(1)).expect("Failed to create waker"); + + ServiceController { + bti, + id: Some(String::from("id")), + request_sender: Arc::new(Mutex::new(to_sm)), + result_receiver: Mutex::new(from_sm), + state: Default::default(), + supervisor: Some(String::from("supervisor")), + waker: Arc::new(waker), + blob_cache_mgr: Arc::new(BlobCacheMgr::new()), + upgrade_mgr: None, + fscache_enabled: AtomicBool::new(false), + fscache: Mutex::new(None), + } + } + + #[test] + fn test_initialize_fscache_service() { + let service_controller = create_service_controller(); + + assert!(service_controller + .initialize_fscache_service(None, 1, "some path", None) + .is_err()); + + let mut p = std::env::current_dir().unwrap(); + p.push("Cargo.toml"); + assert!(service_controller + .initialize_fscache_service(None, 1, p.to_str().unwrap(), None) + .is_err()); + + let tmp_dir = TempDir::new().unwrap(); + let dir = tmp_dir.as_path().to_str().unwrap(); + assert!(service_controller + .initialize_fscache_service(None, 1, dir, None) + .is_ok()); + + assert_eq!(service_controller.id(), Some(String::from("id"))); + assert_eq!( + service_controller.version().build_time, + String::from("build_time") + ); + assert_eq!( + service_controller.supervisor(), + Some(String::from("supervisor")) + ); + } + + fn create_factory_config() -> String { + let config = r#"{ + "blobs": [{ + "type": "bootstrap", + "id": "rafs-v6", + "domain_id": "domain2", + "config_v2": { + "version": 2, + "id": "factory1", + "backend": { + "type": "localfs", + "localfs": { + "dir": "/tmp/nydus" + } + }, + "cache": { + "type": "fscache", + "fscache": { + "work_dir": "/tmp/nydus" + } + }, + "metadata_path": "RAFS_V5" + } + }] + }"#; + config.to_string() + } + + #[test] + fn test_initialize_blob_cache() { + let service_controller = create_service_controller(); + let blob_cache_mgr = service_controller.get_blob_cache_mgr().unwrap(); + let content = create_factory_config(); + let key = generate_blob_key("domain2", "rafs-v6"); + + // test first if + assert!(service_controller.initialize_blob_cache(&None).is_ok()); + assert!(blob_cache_mgr.get_config(&key).is_none()); + + //test second if + let config = serde_json::Value::Null; + assert!(service_controller + .initialize_blob_cache(&Some(config)) + .is_ok()); + assert!(blob_cache_mgr.get_config(&key).is_none()); + + // test third if + let cfg = content.replace("blobs", "blob"); + let config: serde_json::Value = serde_json::from_str(&cfg).unwrap(); + assert!(service_controller + .initialize_blob_cache(&Some(config)) + .is_ok()); + assert!(blob_cache_mgr.get_config(&key).is_none()); + + //test fourth if + let tmp_dir = TempDir::new().unwrap(); + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let mut source_path = std::path::PathBuf::from(root_dir); + source_path.push("../tests/texture/bootstrap/rafs-v6-2.2.boot"); + let cfg = content + .replace("/tmp/nydus", tmp_dir.as_path().to_str().unwrap()) + .replace("RAFS_V5", &source_path.display().to_string()); + let config: serde_json::Value = serde_json::from_str(&cfg).unwrap(); + assert!(service_controller + .initialize_blob_cache(&Some(config)) + .is_ok()); + assert!(blob_cache_mgr.get_config(&key).is_some()); + } +} diff --git a/service/src/upgrade.rs b/service/src/upgrade.rs index 821ea4031af..6f4d1341323 100644 --- a/service/src/upgrade.rs +++ b/service/src/upgrade.rs @@ -1,666 +1,666 @@ -// Copyright 2021 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Online upgrade manager for Nydus daemons and filesystems. - -use std::any::TypeId; -use std::collections::HashMap; -use std::convert::{TryFrom, TryInto}; -use std::fs::File; -use std::io; -use std::os::fd::{AsRawFd, FromRawFd}; -use std::path::PathBuf; - -use nydus_api::BlobCacheEntry; -use nydus_upgrade::backend::unix_domain_socket::UdsStorageBackend; -use nydus_upgrade::backend::{StorageBackend, StorageBackendErr}; - -use crate::fs_service::{FsBackendMountCmd, FsBackendUmountCmd}; -use crate::{Error, Result}; -use fuse_backend_rs::api::Vfs; -use versionize::{VersionMap, Versionize, VersionizeResult}; -use versionize_derive::Versionize; - -/// Error codes related to upgrade manager. -#[derive(thiserror::Error, Debug)] -pub enum UpgradeMgrError { - #[error("missing supervisor path")] - MissingSupervisorPath, - - #[error("failed to save/restore data via the backend, {0}")] - StorageBackendError(StorageBackendErr), - #[error("failed to serialize, {0}")] - Serialize(io::Error), - #[error("failed to deserialize, {0}")] - Deserialize(io::Error), - #[error("failed to clone file, {0}")] - CloneFile(io::Error), - #[error("failed to initialize fscache driver, {0}")] - InitializeFscache(io::Error), -} - -impl From for Error { - fn from(e: UpgradeMgrError) -> Self { - Error::UpgradeManager(e) - } -} - -/// FUSE fail-over policies. -#[derive(PartialEq, Eq, Debug)] -pub enum FailoverPolicy { - /// Flush pending requests. - Flush, - /// Resend pending requests. - Resend, -} - -impl TryFrom<&str> for FailoverPolicy { - type Error = std::io::Error; - - fn try_from(p: &str) -> std::result::Result { - match p { - "flush" => Ok(FailoverPolicy::Flush), - "resend" => Ok(FailoverPolicy::Resend), - x => Err(einval!(format!("invalid FUSE fail-over mode {}", x))), - } - } -} - -impl TryFrom<&String> for FailoverPolicy { - type Error = std::io::Error; - - fn try_from(p: &String) -> std::result::Result { - p.as_str().try_into() - } -} - -struct FscacheState { - blob_entry_map: HashMap, - threads: usize, - path: String, -} - -#[derive(Versionize, Clone, Debug)] -struct MountStateWrapper { - cmd: FsBackendMountCmd, - vfs_index: u8, -} - -struct FusedevState { - fs_mount_cmd_map: HashMap, - vfs_state_data: Vec, - fuse_conn_id: u64, -} - -/// Online upgrade manager. -pub struct UpgradeManager { - fscache_deamon_stat: FscacheState, - fuse_deamon_stat: FusedevState, - file: Option, - backend: Box, -} - -impl UpgradeManager { - /// Create a new instance of [UpgradeManager]. - pub fn new(socket_path: PathBuf) -> Self { - UpgradeManager { - fscache_deamon_stat: FscacheState { - blob_entry_map: HashMap::new(), - threads: 1, - path: "".to_string(), - }, - fuse_deamon_stat: FusedevState { - fs_mount_cmd_map: HashMap::new(), - vfs_state_data: vec![], - fuse_conn_id: 0, - }, - file: None, - backend: Box::new(UdsStorageBackend::new(socket_path)), - } - } - pub fn add_blob_entry_state(&mut self, entry: BlobCacheEntry) { - let mut blob_state_id = entry.domain_id.to_string(); - blob_state_id.push('/'); - blob_state_id.push_str(&entry.blob_id); - - self.fscache_deamon_stat - .blob_entry_map - .insert(blob_state_id, entry); - } - - pub fn remove_blob_entry_state(&mut self, domain_id: &str, blob_id: &str) { - let mut blob_state_id = domain_id.to_string(); - blob_state_id.push('/'); - // for no shared domain mode, snapshotter will call unbind without blob_id - if !blob_id.is_empty() { - blob_state_id.push_str(blob_id); - } else { - blob_state_id.push_str(domain_id); - } - - if self - .fscache_deamon_stat - .blob_entry_map - .remove(&blob_state_id) - .is_none() - { - warn!("blob {}: state was not saved before!", blob_state_id) - } - } - - pub fn save_fscache_states(&mut self, threads: usize, path: String) { - self.fscache_deamon_stat.path = path; - self.fscache_deamon_stat.threads = threads; - } - - pub fn save_fuse_cid(&mut self, fuse_conn_id: u64) { - self.fuse_deamon_stat.fuse_conn_id = fuse_conn_id; - } - - pub fn save_vfs_stat(&mut self, vfs: &Vfs) -> Result<()> { - let vfs_state_data = vfs.save_to_bytes().map_err(|e| { - let io_err = io::Error::new( - io::ErrorKind::Other, - format!("Failed to save vfs state: {:?}", e), - ); - UpgradeMgrError::Serialize(io_err) - })?; - self.fuse_deamon_stat.vfs_state_data = vfs_state_data; - Ok(()) - } - - /// Add a filesystem instance into the upgrade manager. - pub fn add_mounts_state(&mut self, cmd: FsBackendMountCmd, vfs_index: u8) { - let cmd_wrapper = MountStateWrapper { - cmd: cmd.clone(), - vfs_index, - }; - self.fuse_deamon_stat - .fs_mount_cmd_map - .insert(cmd.mountpoint, cmd_wrapper); - } - - /// Update a filesystem instance in the upgrade manager. - pub fn update_mounts_state(&mut self, cmd: FsBackendMountCmd) -> Result<()> { - match self - .fuse_deamon_stat - .fs_mount_cmd_map - .get_mut(&cmd.mountpoint) - { - Some(cmd_wrapper) => { - cmd_wrapper.cmd = cmd; - Ok(()) - } - None => Err(Error::NotFound), - } - } - - /// Remove a filesystem instance from the upgrade manager. - pub fn remove_mounts_state(&mut self, cmd: FsBackendUmountCmd) { - if self - .fuse_deamon_stat - .fs_mount_cmd_map - .remove(&cmd.mountpoint) - .is_none() - { - warn!( - "mount state for {}: state was not saved before!", - cmd.mountpoint - ) - } - } - - /// Save the fd and daemon state data for online upgrade. - fn save(&mut self, data: &[u8]) -> Result<()> { - let mut fds = Vec::new(); - if let Some(ref f) = self.file { - fds.push(f.as_raw_fd()) - } - - self.backend - .save(&fds, data) - .map_err(UpgradeMgrError::StorageBackendError)?; - Ok(()) - } - - /// Restore the fd and daemon state data for online upgrade. - fn restore(&mut self) -> Result> { - let (fds, state_data) = self - .backend - .restore() - .map_err(UpgradeMgrError::StorageBackendError)?; - if fds.len() != 1 { - warn!("Too many fds {}, we may not correctly handle it", fds.len()); - } - self.file = Some(unsafe { File::from_raw_fd(fds[0]) }); - Ok(state_data) - } - - pub fn hold_file(&mut self, fd: &File) -> Result<()> { - let f = fd.try_clone().map_err(UpgradeMgrError::CloneFile)?; - self.file = Some(f); - - Ok(()) - } - - pub fn return_file(&mut self) -> Option { - if let Some(ref f) = self.file { - // Basically, this can hardly fail. - f.try_clone() - .map_err(|e| { - error!("Clone file error, {}", e); - e - }) - .ok() - } else { - warn!("No file can be returned"); - None - } - } -} -#[cfg(target_os = "linux")] -/// Online upgrade utilities for fscache daemon. -pub mod fscache_upgrade { - use std::convert::TryFrom; - use std::str::FromStr; - - use super::*; - use crate::daemon::NydusDaemon; - use crate::singleton::ServiceController; - use nydus_upgrade::persist::Snapshotter; - use versionize::{VersionMap, Versionize, VersionizeResult}; - use versionize_derive::Versionize; - - #[derive(Versionize, Clone, Debug)] - pub struct BlobCacheEntryState { - json_str: String, - } - - #[derive(Versionize, Clone, Default, Debug)] - pub struct FscacheBackendState { - blob_entry_list: Vec<(String, BlobCacheEntryState)>, - threads: usize, - path: String, - } - - impl Snapshotter for FscacheBackendState { - fn get_versions() -> Vec> { - vec![ - // version 1 - HashMap::from([(FscacheBackendState::type_id(), 1)]), - // more versions for the future - ] - } - } - - impl TryFrom<&FscacheBackendState> for FscacheState { - type Error = std::io::Error; - fn try_from(backend_stat: &FscacheBackendState) -> std::result::Result { - let mut map = HashMap::new(); - for (id, entry_stat) in &backend_stat.blob_entry_list { - let entry = BlobCacheEntry::from_str(&entry_stat.json_str)?; - map.insert(id.to_string(), entry); - } - Ok(FscacheState { - blob_entry_map: map, - threads: backend_stat.threads, - path: backend_stat.path.clone(), - }) - } - } - - impl TryFrom<&FscacheState> for FscacheBackendState { - type Error = std::io::Error; - fn try_from(stat: &FscacheState) -> std::result::Result { - let mut list = Vec::new(); - for (id, entry) in &stat.blob_entry_map { - let entry_stat = serde_json::to_string(&entry)?; - list.push(( - id.to_string(), - BlobCacheEntryState { - json_str: entry_stat, - }, - )); - } - Ok(FscacheBackendState { - blob_entry_list: list, - threads: stat.threads, - path: stat.path.clone(), - }) - } - } - - pub fn save(daemon: &ServiceController) -> Result<()> { - if let Some(mut mgr) = daemon.upgrade_mgr() { - let backend_stat = FscacheBackendState::try_from(&mgr.fscache_deamon_stat) - .map_err(UpgradeMgrError::Serialize)?; - let stat = backend_stat.save().map_err(UpgradeMgrError::Serialize)?; - mgr.save(&stat)?; - } - Ok(()) - } - - pub fn restore(daemon: &ServiceController) -> Result<()> { - if let Some(mut mgr) = daemon.upgrade_mgr() { - if let Some(blob_mgr) = daemon.get_blob_cache_mgr() { - // restore the mgr state via the backend in the mgr - let mut state_data = mgr.restore()?; - - let backend_stat = FscacheBackendState::restore(&mut state_data) - .map_err(UpgradeMgrError::Deserialize)?; - - let stat = - FscacheState::try_from(&backend_stat).map_err(UpgradeMgrError::Deserialize)?; - // restore blob entry - stat.blob_entry_map - .iter() - .try_for_each(|(_, entry)| -> Result<()> { - blob_mgr - .add_blob_entry(entry) - .map_err(UpgradeMgrError::Deserialize)?; - Ok(()) - })?; - - // init fscache daemon with restored fd - if let Some(f) = mgr.return_file() { - daemon - .initialize_fscache_service(None, stat.threads, &stat.path, Some(&f)) - .map_err(UpgradeMgrError::InitializeFscache)?; - } - - //restore upgrade manager fscache stat - mgr.fscache_deamon_stat = stat; - return Ok(()); - } - } - Err(UpgradeMgrError::MissingSupervisorPath.into()) - } -} - -/// Online upgrade utilities for FUSE daemon. -pub mod fusedev_upgrade { - use std::sync::atomic::Ordering; - - use super::*; - use crate::daemon::NydusDaemon; - use crate::fusedev::{FusedevDaemon, FusedevFsService}; - use nydus_upgrade::persist::Snapshotter; - use versionize::{VersionMap, Versionize, VersionizeResult}; - use versionize_derive::Versionize; - - #[derive(Versionize, Clone, Default, Debug)] - pub struct FusedevBackendState { - fs_mount_cmd_list: Vec<(String, MountStateWrapper)>, - vfs_state_data: Vec, - fuse_conn_id: u64, - } - - impl Snapshotter for FusedevBackendState { - fn get_versions() -> Vec> { - vec![ - // version 1 - HashMap::from([(FusedevBackendState::type_id(), 1)]), - // more versions for the future - ] - } - } - - impl From<&FusedevBackendState> for FusedevState { - fn from(backend_stat: &FusedevBackendState) -> Self { - let mut map = HashMap::new(); - for (mp, mw) in &backend_stat.fs_mount_cmd_list { - map.insert(mp.to_string(), mw.clone()); - } - FusedevState { - fs_mount_cmd_map: map, - vfs_state_data: backend_stat.vfs_state_data.clone(), - fuse_conn_id: backend_stat.fuse_conn_id, - } - } - } - - impl From<&FusedevState> for FusedevBackendState { - fn from(stat: &FusedevState) -> Self { - let mut list = Vec::new(); - for (mp, mw) in &stat.fs_mount_cmd_map { - list.push((mp.to_string(), mw.clone())); - } - FusedevBackendState { - fs_mount_cmd_list: list, - vfs_state_data: stat.vfs_state_data.clone(), - fuse_conn_id: stat.fuse_conn_id, - } - } - } - - /// Save state information for a FUSE daemon. - pub fn save(daemon: &FusedevDaemon) -> Result<()> { - let svc = daemon.get_default_fs_service().ok_or(Error::NotFound)?; - if !svc.get_vfs().initialized() { - return Err(Error::NotReady); - } - - let mut mgr = svc.upgrade_mgr().unwrap(); - let backend_stat = FusedevBackendState::from(&mgr.fuse_deamon_stat); - - let state = backend_stat.save().map_err(UpgradeMgrError::Serialize)?; - mgr.save(&state)?; - - Ok(()) - } - - /// Restore state information for a FUSE daemon. - pub fn restore(daemon: &FusedevDaemon) -> Result<()> { - if daemon.supervisor.is_none() { - return Err(UpgradeMgrError::MissingSupervisorPath.into()); - } - - let svc = daemon.get_default_fs_service().ok_or(Error::NotFound)?; - - let mut mgr = svc.upgrade_mgr().unwrap(); - - // restore the mgr state via the backend in the mgr - let mut state_data = mgr.restore()?; - - let backend_state = - FusedevBackendState::restore(&mut state_data).map_err(UpgradeMgrError::Deserialize)?; - - let mut state = FusedevState::from(&backend_state); - - // restore the fuse daemon - svc.as_any() - .downcast_ref::() - .unwrap() - .conn - .store(state.fuse_conn_id, Ordering::Release); - - // restore fuse fd - if let Some(f) = mgr.return_file() { - svc.as_any() - .downcast_ref::() - .unwrap() - .session - .lock() - .unwrap() - .set_fuse_file(f); - } - - // restore vfs - svc.get_vfs() - .restore_from_bytes(&mut state.vfs_state_data)?; - state - .fs_mount_cmd_map - .iter() - .try_for_each(|(_, mount_wrapper)| -> Result<()> { - svc.restore_mount(&mount_wrapper.cmd, mount_wrapper.vfs_index)?; - // as we are in upgrade stage and obtain the lock, `unwrap` is safe here - //mgr.add_mounts_state(cmd.clone(), *vfs_idx); - Ok(()) - })?; - - //restore upgrade manager fuse stat - mgr.fuse_deamon_stat = state; - - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::fs_service::{FsBackendMountCmd, FsBackendUmountCmd}; - #[cfg(target_os = "linux")] - use crate::upgrade::fscache_upgrade::FscacheBackendState; - use crate::upgrade::fusedev_upgrade::FusedevBackendState; - use crate::FsBackendType; - use nydus_upgrade::persist::Snapshotter; - use vmm_sys_util::tempfile::TempFile; - - #[test] - fn test_failover_policy() { - assert_eq!( - FailoverPolicy::try_from("flush").unwrap(), - FailoverPolicy::Flush - ); - assert_eq!( - FailoverPolicy::try_from("resend").unwrap(), - FailoverPolicy::Resend - ); - - let strs = vec!["flash", "Resend"]; - for s in strs.clone().into_iter() { - assert!(FailoverPolicy::try_from(s).is_err()); - } - - let str = String::from("flush"); - assert_eq!( - FailoverPolicy::try_from(&str).unwrap(), - FailoverPolicy::Flush - ); - let str = String::from("resend"); - assert_eq!( - FailoverPolicy::try_from(&str).unwrap(), - FailoverPolicy::Resend - ); - - let strings: Vec = strs.into_iter().map(|s| s.to_owned()).collect(); - for s in strings.iter() { - assert!(FailoverPolicy::try_from(s).is_err()); - } - } - - #[test] - #[cfg(target_os = "linux")] - fn test_upgrade_manager_for_fscache() { - let mut upgrade_mgr = UpgradeManager::new("dummy_socket".into()); - - let content = r#"{ - "type": "bootstrap", - "id": "blob1", - "config": { - "id": "cache1", - "backend_type": "localfs", - "backend_config": {}, - "cache_type": "fscache", - "cache_config": {}, - "metadata_path": "/tmp/metadata1" - }, - "domain_id": "domain1" - }"#; - let entry: BlobCacheEntry = serde_json::from_str(content).unwrap(); - upgrade_mgr.save_fscache_states(4, "/tmp/fscache_dir".to_string()); - assert_eq!(upgrade_mgr.fscache_deamon_stat.threads, 4); - assert_eq!(upgrade_mgr.fscache_deamon_stat.path, "/tmp/fscache_dir"); - - upgrade_mgr.add_blob_entry_state(entry); - assert!(upgrade_mgr - .fscache_deamon_stat - .blob_entry_map - .get("domain1/blob1") - .is_some()); - - assert!(FscacheBackendState::try_from(&upgrade_mgr.fscache_deamon_stat).is_ok()); - - let backend_stat = FscacheBackendState::try_from(&upgrade_mgr.fscache_deamon_stat).unwrap(); - assert!(backend_stat.save().is_ok()); - assert!(FscacheState::try_from(&backend_stat).is_ok()); - let stat = FscacheState::try_from(&backend_stat).unwrap(); - assert_eq!(stat.path, upgrade_mgr.fscache_deamon_stat.path); - assert_eq!(stat.threads, upgrade_mgr.fscache_deamon_stat.threads); - assert!(stat.blob_entry_map.get("domain1/blob1").is_some()); - - upgrade_mgr.remove_blob_entry_state("domain1", "blob1"); - assert!(upgrade_mgr - .fscache_deamon_stat - .blob_entry_map - .get("domain1/blob1") - .is_none()); - } - - #[test] - fn test_upgrade_manager_for_fusedev() { - let mut upgrade_mgr = UpgradeManager::new("dummy_socket".into()); - - let config = r#"{ - "version": 2, - "id": "factory1", - "backend": { - "type": "localfs", - "localfs": { - "dir": "/tmp/nydus" - } - }, - "cache": { - "type": "fscache", - "fscache": { - "work_dir": "/tmp/nydus" - } - }, - "metadata_path": "/tmp/nydus/bootstrap1" - }"#; - let cmd = FsBackendMountCmd { - fs_type: FsBackendType::Rafs, - config: config.to_string(), - mountpoint: "testmonutount".to_string(), - source: "testsource".to_string(), - prefetch_files: Some(vec!["testfile".to_string()]), - }; - - upgrade_mgr.save_fuse_cid(10); - assert_eq!(upgrade_mgr.fuse_deamon_stat.fuse_conn_id, 10); - upgrade_mgr.add_mounts_state(cmd.clone(), 5); - assert!(upgrade_mgr - .fuse_deamon_stat - .fs_mount_cmd_map - .get("testmonutount") - .is_some()); - assert!(upgrade_mgr.update_mounts_state(cmd).is_ok()); - - let backend_stat = FusedevBackendState::from(&upgrade_mgr.fuse_deamon_stat); - assert!(backend_stat.save().is_ok()); - - let stat = FusedevState::from(&backend_stat); - assert_eq!(stat.fuse_conn_id, upgrade_mgr.fuse_deamon_stat.fuse_conn_id); - assert!(stat.fs_mount_cmd_map.get("testmonutount").is_some()); - - let umount_cmd: FsBackendUmountCmd = FsBackendUmountCmd { - mountpoint: "testmonutount".to_string(), - }; - upgrade_mgr.remove_mounts_state(umount_cmd); - assert!(upgrade_mgr - .fuse_deamon_stat - .fs_mount_cmd_map - .get("testmonutount") - .is_none()); - } - - #[test] - fn test_upgrade_manager_hold_fd() { - let mut upgrade_mgr = UpgradeManager::new("dummy_socket".into()); - - let temp = TempFile::new().unwrap().into_file(); - assert!(upgrade_mgr.hold_file(&temp).is_ok()); - assert!(upgrade_mgr.return_file().is_some()); - } -} +// Copyright 2021 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Online upgrade manager for Nydus daemons and filesystems. + +use std::any::TypeId; +use std::collections::HashMap; +use std::convert::{TryFrom, TryInto}; +use std::fs::File; +use std::io; +use std::os::fd::{AsRawFd, FromRawFd}; +use std::path::PathBuf; + +use nydus_api::BlobCacheEntry; +use nydus_upgrade::backend::unix_domain_socket::UdsStorageBackend; +use nydus_upgrade::backend::{StorageBackend, StorageBackendErr}; + +use crate::fs_service::{FsBackendMountCmd, FsBackendUmountCmd}; +use crate::{Error, Result}; +use fuse_backend_rs::api::Vfs; +use versionize::{VersionMap, Versionize, VersionizeResult}; +use versionize_derive::Versionize; + +/// Error codes related to upgrade manager. +#[derive(thiserror::Error, Debug)] +pub enum UpgradeMgrError { + #[error("missing supervisor path")] + MissingSupervisorPath, + + #[error("failed to save/restore data via the backend, {0}")] + StorageBackendError(StorageBackendErr), + #[error("failed to serialize, {0}")] + Serialize(io::Error), + #[error("failed to deserialize, {0}")] + Deserialize(io::Error), + #[error("failed to clone file, {0}")] + CloneFile(io::Error), + #[error("failed to initialize fscache driver, {0}")] + InitializeFscache(io::Error), +} + +impl From for Error { + fn from(e: UpgradeMgrError) -> Self { + Error::UpgradeManager(e) + } +} + +/// FUSE fail-over policies. +#[derive(PartialEq, Eq, Debug)] +pub enum FailoverPolicy { + /// Flush pending requests. + Flush, + /// Resend pending requests. + Resend, +} + +impl TryFrom<&str> for FailoverPolicy { + type Error = std::io::Error; + + fn try_from(p: &str) -> std::result::Result { + match p { + "flush" => Ok(FailoverPolicy::Flush), + "resend" => Ok(FailoverPolicy::Resend), + x => Err(einval!(format!("invalid FUSE fail-over mode {}", x))), + } + } +} + +impl TryFrom<&String> for FailoverPolicy { + type Error = std::io::Error; + + fn try_from(p: &String) -> std::result::Result { + p.as_str().try_into() + } +} + +struct FscacheState { + blob_entry_map: HashMap, + threads: usize, + path: String, +} + +#[derive(Versionize, Clone, Debug)] +struct MountStateWrapper { + cmd: FsBackendMountCmd, + vfs_index: u8, +} + +struct FusedevState { + fs_mount_cmd_map: HashMap, + vfs_state_data: Vec, + fuse_conn_id: u64, +} + +/// Online upgrade manager. +pub struct UpgradeManager { + fscache_deamon_stat: FscacheState, + fuse_deamon_stat: FusedevState, + file: Option, + backend: Box, +} + +impl UpgradeManager { + /// Create a new instance of [UpgradeManager]. + pub fn new(socket_path: PathBuf) -> Self { + UpgradeManager { + fscache_deamon_stat: FscacheState { + blob_entry_map: HashMap::new(), + threads: 1, + path: "".to_string(), + }, + fuse_deamon_stat: FusedevState { + fs_mount_cmd_map: HashMap::new(), + vfs_state_data: vec![], + fuse_conn_id: 0, + }, + file: None, + backend: Box::new(UdsStorageBackend::new(socket_path)), + } + } + pub fn add_blob_entry_state(&mut self, entry: BlobCacheEntry) { + let mut blob_state_id = entry.domain_id.to_string(); + blob_state_id.push('/'); + blob_state_id.push_str(&entry.blob_id); + + self.fscache_deamon_stat + .blob_entry_map + .insert(blob_state_id, entry); + } + + pub fn remove_blob_entry_state(&mut self, domain_id: &str, blob_id: &str) { + let mut blob_state_id = domain_id.to_string(); + blob_state_id.push('/'); + // for no shared domain mode, snapshotter will call unbind without blob_id + if !blob_id.is_empty() { + blob_state_id.push_str(blob_id); + } else { + blob_state_id.push_str(domain_id); + } + + if self + .fscache_deamon_stat + .blob_entry_map + .remove(&blob_state_id) + .is_none() + { + warn!("blob {}: state was not saved before!", blob_state_id) + } + } + + pub fn save_fscache_states(&mut self, threads: usize, path: String) { + self.fscache_deamon_stat.path = path; + self.fscache_deamon_stat.threads = threads; + } + + pub fn save_fuse_cid(&mut self, fuse_conn_id: u64) { + self.fuse_deamon_stat.fuse_conn_id = fuse_conn_id; + } + + pub fn save_vfs_stat(&mut self, vfs: &Vfs) -> Result<()> { + let vfs_state_data = vfs.save_to_bytes().map_err(|e| { + let io_err = io::Error::new( + io::ErrorKind::Other, + format!("Failed to save vfs state: {:?}", e), + ); + UpgradeMgrError::Serialize(io_err) + })?; + self.fuse_deamon_stat.vfs_state_data = vfs_state_data; + Ok(()) + } + + /// Add a filesystem instance into the upgrade manager. + pub fn add_mounts_state(&mut self, cmd: FsBackendMountCmd, vfs_index: u8) { + let cmd_wrapper = MountStateWrapper { + cmd: cmd.clone(), + vfs_index, + }; + self.fuse_deamon_stat + .fs_mount_cmd_map + .insert(cmd.mountpoint, cmd_wrapper); + } + + /// Update a filesystem instance in the upgrade manager. + pub fn update_mounts_state(&mut self, cmd: FsBackendMountCmd) -> Result<()> { + match self + .fuse_deamon_stat + .fs_mount_cmd_map + .get_mut(&cmd.mountpoint) + { + Some(cmd_wrapper) => { + cmd_wrapper.cmd = cmd; + Ok(()) + } + None => Err(Error::NotFound), + } + } + + /// Remove a filesystem instance from the upgrade manager. + pub fn remove_mounts_state(&mut self, cmd: FsBackendUmountCmd) { + if self + .fuse_deamon_stat + .fs_mount_cmd_map + .remove(&cmd.mountpoint) + .is_none() + { + warn!( + "mount state for {}: state was not saved before!", + cmd.mountpoint + ) + } + } + + /// Save the fd and daemon state data for online upgrade. + fn save(&mut self, data: &[u8]) -> Result<()> { + let mut fds = Vec::new(); + if let Some(ref f) = self.file { + fds.push(f.as_raw_fd()) + } + + self.backend + .save(&fds, data) + .map_err(UpgradeMgrError::StorageBackendError)?; + Ok(()) + } + + /// Restore the fd and daemon state data for online upgrade. + fn restore(&mut self) -> Result> { + let (fds, state_data) = self + .backend + .restore() + .map_err(UpgradeMgrError::StorageBackendError)?; + if fds.len() != 1 { + warn!("Too many fds {}, we may not correctly handle it", fds.len()); + } + self.file = Some(unsafe { File::from_raw_fd(fds[0]) }); + Ok(state_data) + } + + pub fn hold_file(&mut self, fd: &File) -> Result<()> { + let f = fd.try_clone().map_err(UpgradeMgrError::CloneFile)?; + self.file = Some(f); + + Ok(()) + } + + pub fn return_file(&mut self) -> Option { + if let Some(ref f) = self.file { + // Basically, this can hardly fail. + f.try_clone() + .map_err(|e| { + error!("Clone file error, {}", e); + e + }) + .ok() + } else { + warn!("No file can be returned"); + None + } + } +} +#[cfg(target_os = "linux")] +/// Online upgrade utilities for fscache daemon. +pub mod fscache_upgrade { + use std::convert::TryFrom; + use std::str::FromStr; + + use super::*; + use crate::daemon::NydusDaemon; + use crate::singleton::ServiceController; + use nydus_upgrade::persist::Snapshotter; + use versionize::{VersionMap, Versionize, VersionizeResult}; + use versionize_derive::Versionize; + + #[derive(Versionize, Clone, Debug)] + pub struct BlobCacheEntryState { + json_str: String, + } + + #[derive(Versionize, Clone, Default, Debug)] + pub struct FscacheBackendState { + blob_entry_list: Vec<(String, BlobCacheEntryState)>, + threads: usize, + path: String, + } + + impl Snapshotter for FscacheBackendState { + fn get_versions() -> Vec> { + vec![ + // version 1 + HashMap::from([(FscacheBackendState::type_id(), 1)]), + // more versions for the future + ] + } + } + + impl TryFrom<&FscacheBackendState> for FscacheState { + type Error = std::io::Error; + fn try_from(backend_stat: &FscacheBackendState) -> std::result::Result { + let mut map = HashMap::new(); + for (id, entry_stat) in &backend_stat.blob_entry_list { + let entry = BlobCacheEntry::from_str(&entry_stat.json_str)?; + map.insert(id.to_string(), entry); + } + Ok(FscacheState { + blob_entry_map: map, + threads: backend_stat.threads, + path: backend_stat.path.clone(), + }) + } + } + + impl TryFrom<&FscacheState> for FscacheBackendState { + type Error = std::io::Error; + fn try_from(stat: &FscacheState) -> std::result::Result { + let mut list = Vec::new(); + for (id, entry) in &stat.blob_entry_map { + let entry_stat = serde_json::to_string(&entry)?; + list.push(( + id.to_string(), + BlobCacheEntryState { + json_str: entry_stat, + }, + )); + } + Ok(FscacheBackendState { + blob_entry_list: list, + threads: stat.threads, + path: stat.path.clone(), + }) + } + } + + pub fn save(daemon: &ServiceController) -> Result<()> { + if let Some(mut mgr) = daemon.upgrade_mgr() { + let backend_stat = FscacheBackendState::try_from(&mgr.fscache_deamon_stat) + .map_err(UpgradeMgrError::Serialize)?; + let stat = backend_stat.save().map_err(UpgradeMgrError::Serialize)?; + mgr.save(&stat)?; + } + Ok(()) + } + + pub fn restore(daemon: &ServiceController) -> Result<()> { + if let Some(mut mgr) = daemon.upgrade_mgr() { + if let Some(blob_mgr) = daemon.get_blob_cache_mgr() { + // restore the mgr state via the backend in the mgr + let mut state_data = mgr.restore()?; + + let backend_stat = FscacheBackendState::restore(&mut state_data) + .map_err(UpgradeMgrError::Deserialize)?; + + let stat = + FscacheState::try_from(&backend_stat).map_err(UpgradeMgrError::Deserialize)?; + // restore blob entry + stat.blob_entry_map + .iter() + .try_for_each(|(_, entry)| -> Result<()> { + blob_mgr + .add_blob_entry(entry) + .map_err(UpgradeMgrError::Deserialize)?; + Ok(()) + })?; + + // init fscache daemon with restored fd + if let Some(f) = mgr.return_file() { + daemon + .initialize_fscache_service(None, stat.threads, &stat.path, Some(&f)) + .map_err(UpgradeMgrError::InitializeFscache)?; + } + + //restore upgrade manager fscache stat + mgr.fscache_deamon_stat = stat; + return Ok(()); + } + } + Err(UpgradeMgrError::MissingSupervisorPath.into()) + } +} + +/// Online upgrade utilities for FUSE daemon. +pub mod fusedev_upgrade { + use std::sync::atomic::Ordering; + + use super::*; + use crate::daemon::NydusDaemon; + use crate::fusedev::{FusedevDaemon, FusedevFsService}; + use nydus_upgrade::persist::Snapshotter; + use versionize::{VersionMap, Versionize, VersionizeResult}; + use versionize_derive::Versionize; + + #[derive(Versionize, Clone, Default, Debug)] + pub struct FusedevBackendState { + fs_mount_cmd_list: Vec<(String, MountStateWrapper)>, + vfs_state_data: Vec, + fuse_conn_id: u64, + } + + impl Snapshotter for FusedevBackendState { + fn get_versions() -> Vec> { + vec![ + // version 1 + HashMap::from([(FusedevBackendState::type_id(), 1)]), + // more versions for the future + ] + } + } + + impl From<&FusedevBackendState> for FusedevState { + fn from(backend_stat: &FusedevBackendState) -> Self { + let mut map = HashMap::new(); + for (mp, mw) in &backend_stat.fs_mount_cmd_list { + map.insert(mp.to_string(), mw.clone()); + } + FusedevState { + fs_mount_cmd_map: map, + vfs_state_data: backend_stat.vfs_state_data.clone(), + fuse_conn_id: backend_stat.fuse_conn_id, + } + } + } + + impl From<&FusedevState> for FusedevBackendState { + fn from(stat: &FusedevState) -> Self { + let mut list = Vec::new(); + for (mp, mw) in &stat.fs_mount_cmd_map { + list.push((mp.to_string(), mw.clone())); + } + FusedevBackendState { + fs_mount_cmd_list: list, + vfs_state_data: stat.vfs_state_data.clone(), + fuse_conn_id: stat.fuse_conn_id, + } + } + } + + /// Save state information for a FUSE daemon. + pub fn save(daemon: &FusedevDaemon) -> Result<()> { + let svc = daemon.get_default_fs_service().ok_or(Error::NotFound)?; + if !svc.get_vfs().initialized() { + return Err(Error::NotReady); + } + + let mut mgr = svc.upgrade_mgr().unwrap(); + let backend_stat = FusedevBackendState::from(&mgr.fuse_deamon_stat); + + let state = backend_stat.save().map_err(UpgradeMgrError::Serialize)?; + mgr.save(&state)?; + + Ok(()) + } + + /// Restore state information for a FUSE daemon. + pub fn restore(daemon: &FusedevDaemon) -> Result<()> { + if daemon.supervisor.is_none() { + return Err(UpgradeMgrError::MissingSupervisorPath.into()); + } + + let svc = daemon.get_default_fs_service().ok_or(Error::NotFound)?; + + let mut mgr = svc.upgrade_mgr().unwrap(); + + // restore the mgr state via the backend in the mgr + let mut state_data = mgr.restore()?; + + let backend_state = + FusedevBackendState::restore(&mut state_data).map_err(UpgradeMgrError::Deserialize)?; + + let mut state = FusedevState::from(&backend_state); + + // restore the fuse daemon + svc.as_any() + .downcast_ref::() + .unwrap() + .conn + .store(state.fuse_conn_id, Ordering::Release); + + // restore fuse fd + if let Some(f) = mgr.return_file() { + svc.as_any() + .downcast_ref::() + .unwrap() + .session + .lock() + .unwrap() + .set_fuse_file(f); + } + + // restore vfs + svc.get_vfs() + .restore_from_bytes(&mut state.vfs_state_data)?; + state + .fs_mount_cmd_map + .iter() + .try_for_each(|(_, mount_wrapper)| -> Result<()> { + svc.restore_mount(&mount_wrapper.cmd, mount_wrapper.vfs_index)?; + // as we are in upgrade stage and obtain the lock, `unwrap` is safe here + //mgr.add_mounts_state(cmd.clone(), *vfs_idx); + Ok(()) + })?; + + //restore upgrade manager fuse stat + mgr.fuse_deamon_stat = state; + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::fs_service::{FsBackendMountCmd, FsBackendUmountCmd}; + #[cfg(target_os = "linux")] + use crate::upgrade::fscache_upgrade::FscacheBackendState; + use crate::upgrade::fusedev_upgrade::FusedevBackendState; + use crate::FsBackendType; + use nydus_upgrade::persist::Snapshotter; + use vmm_sys_util::tempfile::TempFile; + + #[test] + fn test_failover_policy() { + assert_eq!( + FailoverPolicy::try_from("flush").unwrap(), + FailoverPolicy::Flush + ); + assert_eq!( + FailoverPolicy::try_from("resend").unwrap(), + FailoverPolicy::Resend + ); + + let strs = vec!["flash", "Resend"]; + for s in strs.clone().into_iter() { + assert!(FailoverPolicy::try_from(s).is_err()); + } + + let str = String::from("flush"); + assert_eq!( + FailoverPolicy::try_from(&str).unwrap(), + FailoverPolicy::Flush + ); + let str = String::from("resend"); + assert_eq!( + FailoverPolicy::try_from(&str).unwrap(), + FailoverPolicy::Resend + ); + + let strings: Vec = strs.into_iter().map(|s| s.to_owned()).collect(); + for s in strings.iter() { + assert!(FailoverPolicy::try_from(s).is_err()); + } + } + + #[test] + #[cfg(target_os = "linux")] + fn test_upgrade_manager_for_fscache() { + let mut upgrade_mgr = UpgradeManager::new("dummy_socket".into()); + + let content = r#"{ + "type": "bootstrap", + "id": "blob1", + "config": { + "id": "cache1", + "backend_type": "localfs", + "backend_config": {}, + "cache_type": "fscache", + "cache_config": {}, + "metadata_path": "/tmp/metadata1" + }, + "domain_id": "domain1" + }"#; + let entry: BlobCacheEntry = serde_json::from_str(content).unwrap(); + upgrade_mgr.save_fscache_states(4, "/tmp/fscache_dir".to_string()); + assert_eq!(upgrade_mgr.fscache_deamon_stat.threads, 4); + assert_eq!(upgrade_mgr.fscache_deamon_stat.path, "/tmp/fscache_dir"); + + upgrade_mgr.add_blob_entry_state(entry); + assert!(upgrade_mgr + .fscache_deamon_stat + .blob_entry_map + .get("domain1/blob1") + .is_some()); + + assert!(FscacheBackendState::try_from(&upgrade_mgr.fscache_deamon_stat).is_ok()); + + let backend_stat = FscacheBackendState::try_from(&upgrade_mgr.fscache_deamon_stat).unwrap(); + assert!(backend_stat.save().is_ok()); + assert!(FscacheState::try_from(&backend_stat).is_ok()); + let stat = FscacheState::try_from(&backend_stat).unwrap(); + assert_eq!(stat.path, upgrade_mgr.fscache_deamon_stat.path); + assert_eq!(stat.threads, upgrade_mgr.fscache_deamon_stat.threads); + assert!(stat.blob_entry_map.get("domain1/blob1").is_some()); + + upgrade_mgr.remove_blob_entry_state("domain1", "blob1"); + assert!(upgrade_mgr + .fscache_deamon_stat + .blob_entry_map + .get("domain1/blob1") + .is_none()); + } + + #[test] + fn test_upgrade_manager_for_fusedev() { + let mut upgrade_mgr = UpgradeManager::new("dummy_socket".into()); + + let config = r#"{ + "version": 2, + "id": "factory1", + "backend": { + "type": "localfs", + "localfs": { + "dir": "/tmp/nydus" + } + }, + "cache": { + "type": "fscache", + "fscache": { + "work_dir": "/tmp/nydus" + } + }, + "metadata_path": "/tmp/nydus/bootstrap1" + }"#; + let cmd = FsBackendMountCmd { + fs_type: FsBackendType::Rafs, + config: config.to_string(), + mountpoint: "testmonutount".to_string(), + source: "testsource".to_string(), + prefetch_files: Some(vec!["testfile".to_string()]), + }; + + upgrade_mgr.save_fuse_cid(10); + assert_eq!(upgrade_mgr.fuse_deamon_stat.fuse_conn_id, 10); + upgrade_mgr.add_mounts_state(cmd.clone(), 5); + assert!(upgrade_mgr + .fuse_deamon_stat + .fs_mount_cmd_map + .get("testmonutount") + .is_some()); + assert!(upgrade_mgr.update_mounts_state(cmd).is_ok()); + + let backend_stat = FusedevBackendState::from(&upgrade_mgr.fuse_deamon_stat); + assert!(backend_stat.save().is_ok()); + + let stat = FusedevState::from(&backend_stat); + assert_eq!(stat.fuse_conn_id, upgrade_mgr.fuse_deamon_stat.fuse_conn_id); + assert!(stat.fs_mount_cmd_map.get("testmonutount").is_some()); + + let umount_cmd: FsBackendUmountCmd = FsBackendUmountCmd { + mountpoint: "testmonutount".to_string(), + }; + upgrade_mgr.remove_mounts_state(umount_cmd); + assert!(upgrade_mgr + .fuse_deamon_stat + .fs_mount_cmd_map + .get("testmonutount") + .is_none()); + } + + #[test] + fn test_upgrade_manager_hold_fd() { + let mut upgrade_mgr = UpgradeManager::new("dummy_socket".into()); + + let temp = TempFile::new().unwrap().into_file(); + assert!(upgrade_mgr.hold_file(&temp).is_ok()); + assert!(upgrade_mgr.return_file().is_some()); + } +} diff --git a/smoke/.gitignore b/smoke/.gitignore index 8071ea60391..e02dde85f2a 100644 --- a/smoke/.gitignore +++ b/smoke/.gitignore @@ -1,3 +1,3 @@ -smoke.test -output -tmp +smoke.test +output +tmp diff --git a/smoke/.golangci.yml b/smoke/.golangci.yml index 734653d6721..2755646facd 100644 --- a/smoke/.golangci.yml +++ b/smoke/.golangci.yml @@ -1,21 +1,21 @@ -# https://golangci-lint.run/usage/configuration#config-file - -linters: - enable: - - staticcheck - - unconvert - - gofmt - - goimports - - revive - - ineffassign - - vet - - unused - - misspell - disable: - - errcheck - -run: - deadline: 4m - skip-dirs: - - misc - +# https://golangci-lint.run/usage/configuration#config-file + +linters: + enable: + - staticcheck + - unconvert + - gofmt + - goimports + - revive + - ineffassign + - vet + - unused + - misspell + disable: + - errcheck + +run: + deadline: 4m + skip-dirs: + - misc + diff --git a/smoke/Makefile b/smoke/Makefile index 4703117b6cb..548f844f39f 100644 --- a/smoke/Makefile +++ b/smoke/Makefile @@ -1,55 +1,55 @@ -PACKAGES ?= $(shell go list ./... | grep -v /vendor/) -GOPROXY ?= https://goproxy.io -TESTS ?= .* - -ifdef GOPROXY -PROXY := GOPROXY=${GOPROXY} -endif - -build: - go test -o smoke.test -c -race -v ./tests - -# WORK_DIR=/tmp \ -# NYDUS_BUILDER=/path/to/latest/nydus-image \ -# NYDUS_NYDUSD=/path/to/latest/nydusd \ -# NYDUS_NYDUSIFY=/path/to/latest/nydusify \ -# SKIP_CASES=compressor=lz4_block,fs_version=5 \ -# make test -test: build - golangci-lint run - sudo -E ./smoke.test -test.v -test.timeout 10m -test.parallel=16 -test.run=$(TESTS) - -# PERFORMANCE_TEST_MODE=fs-version-5 \ -# PERFORMANCE_TEST_IMAGE=wordpress:latest \ -# make test-performance -test-performance: build - PERFORMANCE_TEST=True sudo -E ./smoke.test -test.v -test.timeout 10m -test.parallel=1 -test.run=TestPerformance - -# BENCHMARK_TEST_IMAGE=wordpress:6.1.1 \ -# BENCHMARK_MODE=fs-version-6 \ -# make test-benchmark -test-benchmark: build - BENCHMARK_TEST=True sudo -E ./smoke.test -test.v -test.timeout 10m -test.parallel=1 -test.run=TestBenchmark - -# WORK_DIR=/tmp \ -# NYDUS_STABLE_VERSION=v2.2.3 \ -# NYDUS_STABLE_VERSION_EXPORT=v2_2_3 \ -# NYDUS_BUILDER=/path/to/latest/nydus-image \ -# NYDUS_NYDUSD=/path/to/latest/nydusd \ -# NYDUS_NYDUSIFY=/path/to/latest/nydusify \ -# NYDUS_BUILDER_v0_1_0=/path/to/v0.1.0/nydus-image \ -# NYDUS_NYDUSD_v0_1_0=/path/to/v0.1.0/nydusd \ -# NYDUS_NYDUSIFY_v0_1_0=/path/to/v0.1.0/nydusify \ -# NYDUS_BUILDER_$NYDUS_STABLE_VERSION_EXPORT=/path/to/$NYDUS_STABLE_VERSION/nydus-image \ -# NYDUS_NYDUSD_$NYDUS_STABLE_VERSION_EXPORT=/path/to/$NYDUS_STABLE_VERSION/nydusd \ -# NYDUS_NYDUSIFY_$NYDUS_STABLE_VERSION_EXPORT=/path/to/$NYDUS_STABLE_VERSION/nydusify \ -# make test TESTS=TestCompatibility -test-compatibility: build - make test TESTS=TestCompatibility - -# SNAPSHOTTER_SYSTEM_SOCK=/run/containerd-nydus/system.sock -# SNAPSHOTTER=nydus -# TAKEOVER_TEST_IMAGE=wordpress -# NEW_NYDUSD_BINARY_PATH=target/release/nydusd -test-takeover: build - TAKEOVER_TEST=true sudo -E ./smoke.test -test.v -test.timeout 10m -test.parallel=1 -test.run=TestTakeover +PACKAGES ?= $(shell go list ./... | grep -v /vendor/) +GOPROXY ?= https://goproxy.io +TESTS ?= .* + +ifdef GOPROXY +PROXY := GOPROXY=${GOPROXY} +endif + +build: + go test -o smoke.test -c -race -v ./tests + +# WORK_DIR=/tmp \ +# NYDUS_BUILDER=/path/to/latest/nydus-image \ +# NYDUS_NYDUSD=/path/to/latest/nydusd \ +# NYDUS_NYDUSIFY=/path/to/latest/nydusify \ +# SKIP_CASES=compressor=lz4_block,fs_version=5 \ +# make test +test: build + golangci-lint run + sudo -E ./smoke.test -test.v -test.timeout 10m -test.parallel=16 -test.run=TestNativeLayer + +# PERFORMANCE_TEST_MODE=fs-version-5 \ +# PERFORMANCE_TEST_IMAGE=wordpress:latest \ +# make test-performance +test-performance: build + PERFORMANCE_TEST=True sudo -E ./smoke.test -test.v -test.timeout 10m -test.parallel=1 -test.run=TestPerformance + +# BENCHMARK_TEST_IMAGE=wordpress:6.1.1 \ +# BENCHMARK_MODE=fs-version-6 \ +# make test-benchmark +test-benchmark: build + BENCHMARK_TEST=True sudo -E ./smoke.test -test.v -test.timeout 10m -test.parallel=1 -test.run=TestBenchmark + +# WORK_DIR=/tmp \ +# NYDUS_STABLE_VERSION=v2.2.3 \ +# NYDUS_STABLE_VERSION_EXPORT=v2_2_3 \ +# NYDUS_BUILDER=/path/to/latest/nydus-image \ +# NYDUS_NYDUSD=/path/to/latest/nydusd \ +# NYDUS_NYDUSIFY=/path/to/latest/nydusify \ +# NYDUS_BUILDER_v0_1_0=/path/to/v0.1.0/nydus-image \ +# NYDUS_NYDUSD_v0_1_0=/path/to/v0.1.0/nydusd \ +# NYDUS_NYDUSIFY_v0_1_0=/path/to/v0.1.0/nydusify \ +# NYDUS_BUILDER_$NYDUS_STABLE_VERSION_EXPORT=/path/to/$NYDUS_STABLE_VERSION/nydus-image \ +# NYDUS_NYDUSD_$NYDUS_STABLE_VERSION_EXPORT=/path/to/$NYDUS_STABLE_VERSION/nydusd \ +# NYDUS_NYDUSIFY_$NYDUS_STABLE_VERSION_EXPORT=/path/to/$NYDUS_STABLE_VERSION/nydusify \ +# make test TESTS=TestCompatibility +test-compatibility: build + make test TESTS=TestCompatibility + +# SNAPSHOTTER_SYSTEM_SOCK=/run/containerd-nydus/system.sock +# SNAPSHOTTER=nydus +# TAKEOVER_TEST_IMAGE=wordpress +# NEW_NYDUSD_BINARY_PATH=target/release/nydusd +test-takeover: build + TAKEOVER_TEST=true sudo -E ./smoke.test -test.v -test.timeout 10m -test.parallel=1 -test.run=TestTakeover diff --git a/smoke/go.mod b/smoke/go.mod index dbf95cf434f..abf0fa9204e 100644 --- a/smoke/go.mod +++ b/smoke/go.mod @@ -1,46 +1,46 @@ -module github.com/dragonflyoss/nydus/smoke - -go 1.21 - -require ( - github.com/containerd/containerd v1.7.11 - github.com/containerd/log v0.1.0 - github.com/containerd/nydus-snapshotter v0.13.4 - github.com/google/uuid v1.5.0 - github.com/opencontainers/go-digest v1.0.0 - github.com/pkg/errors v0.9.1 - github.com/pkg/xattr v0.4.9 - github.com/stretchr/testify v1.8.4 - golang.org/x/sys v0.15.0 -) - -require ( - github.com/Microsoft/go-winio v0.6.1 // indirect - github.com/Microsoft/hcsshim v0.11.4 // indirect - github.com/containerd/cgroups v1.1.0 // indirect - github.com/containerd/continuity v0.4.3 // indirect - github.com/containerd/fifo v1.1.0 // indirect - github.com/davecgh/go-spew v1.1.1 // indirect - github.com/gogo/protobuf v1.3.2 // indirect - github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect - github.com/golang/protobuf v1.5.3 // indirect - github.com/google/go-cmp v0.6.0 // indirect - github.com/klauspost/compress v1.17.4 // indirect - github.com/kr/pretty v0.3.1 // indirect - github.com/moby/sys/mountinfo v0.7.1 // indirect - github.com/moby/sys/sequential v0.5.0 // indirect - github.com/opencontainers/image-spec v1.1.0-rc5 // indirect - github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/rogpeppe/go-internal v1.12.0 // indirect - github.com/sirupsen/logrus v1.9.3 // indirect - go.opencensus.io v0.24.0 // indirect - golang.org/x/mod v0.14.0 // indirect - golang.org/x/sync v0.5.0 // indirect - golang.org/x/text v0.14.0 // indirect - golang.org/x/tools v0.16.1 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20240102182953-50ed04b92917 // indirect - google.golang.org/grpc v1.60.1 // indirect - google.golang.org/protobuf v1.32.0 // indirect - gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect - gopkg.in/yaml.v3 v3.0.1 // indirect -) +module github.com/dragonflyoss/nydus/smoke + +go 1.21 + +require ( + github.com/containerd/containerd v1.7.11 + github.com/containerd/log v0.1.0 + github.com/containerd/nydus-snapshotter v0.13.4 + github.com/google/uuid v1.5.0 + github.com/opencontainers/go-digest v1.0.0 + github.com/pkg/errors v0.9.1 + github.com/pkg/xattr v0.4.9 + github.com/stretchr/testify v1.8.4 + golang.org/x/sys v0.15.0 +) + +require ( + github.com/Microsoft/go-winio v0.6.1 // indirect + github.com/Microsoft/hcsshim v0.11.4 // indirect + github.com/containerd/cgroups v1.1.0 // indirect + github.com/containerd/continuity v0.4.3 // indirect + github.com/containerd/fifo v1.1.0 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect + github.com/golang/protobuf v1.5.3 // indirect + github.com/google/go-cmp v0.6.0 // indirect + github.com/klauspost/compress v1.17.4 // indirect + github.com/kr/pretty v0.3.1 // indirect + github.com/moby/sys/mountinfo v0.7.1 // indirect + github.com/moby/sys/sequential v0.5.0 // indirect + github.com/opencontainers/image-spec v1.1.0-rc5 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/rogpeppe/go-internal v1.12.0 // indirect + github.com/sirupsen/logrus v1.9.3 // indirect + go.opencensus.io v0.24.0 // indirect + golang.org/x/mod v0.14.0 // indirect + golang.org/x/sync v0.5.0 // indirect + golang.org/x/text v0.14.0 // indirect + golang.org/x/tools v0.16.1 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240102182953-50ed04b92917 // indirect + google.golang.org/grpc v1.60.1 // indirect + google.golang.org/protobuf v1.32.0 // indirect + gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/smoke/go.sum b/smoke/go.sum index 0fa56cfafd8..51a88792b04 100644 --- a/smoke/go.sum +++ b/smoke/go.sum @@ -1,193 +1,193 @@ -cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow= -github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM= -github.com/Microsoft/hcsshim v0.11.4 h1:68vKo2VN8DE9AdN4tnkWnmdhqdbpUFM8OF3Airm7fz8= -github.com/Microsoft/hcsshim v0.11.4/go.mod h1:smjE4dvqPX9Zldna+t5FG3rnoHhaB7QYxPRqGcpAD9w= -github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= -github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= -github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= -github.com/containerd/cgroups v1.1.0 h1:v8rEWFl6EoqHB+swVNjVoCJE8o3jX7e8nqBGPLaDFBM= -github.com/containerd/cgroups v1.1.0/go.mod h1:6ppBcbh/NOOUU+dMKrykgaBnK9lCIBxHqJDGwsa1mIw= -github.com/containerd/containerd v1.7.11 h1:lfGKw3eU35sjV0aG2eYZTiwFEY1pCzxdzicHP3SZILw= -github.com/containerd/continuity v0.4.3 h1:6HVkalIp+2u1ZLH1J/pYX2oBVXlJZvh1X1A7bEZ9Su8= -github.com/containerd/continuity v0.4.3/go.mod h1:F6PTNCKepoxEaXLQp3wDAjygEnImnZ/7o4JzpodfroQ= -github.com/containerd/fifo v1.1.0 h1:4I2mbh5stb1u6ycIABlBw9zgtlK8viPI9QkQNRQEEmY= -github.com/containerd/fifo v1.1.0/go.mod h1:bmC4NWMbXlt2EZ0Hc7Fx7QzTFxgPID13eH0Qu+MAb2o= -github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= -github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo= -github.com/containerd/nydus-snapshotter v0.13.4 h1:veTQCgpfRGdPD031dVNGlU+vK/W9vBhZNlMWR9oupiQ= -github.com/containerd/nydus-snapshotter v0.13.4/go.mod h1:y41TM10lXhskfHHvge7kf1VucM4CeWwsCmQ5Q51UJrc= -github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= -github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= -github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= -github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= -github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= -github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= -github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= -github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= -github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= -github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= -github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= -github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= -github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= -github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= -github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= -github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= -github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= -github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= -github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= -github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= -github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= -github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU= -github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= -github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4= -github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= -github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= -github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= -github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= -github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= -github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= -github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= -github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/moby/sys/mountinfo v0.7.1 h1:/tTvQaSJRr2FshkhXiIpux6fQ2Zvc4j7tAhMTStAG2g= -github.com/moby/sys/sequential v0.5.0 h1:OPvI35Lzn9K04PBbCLW0g4LcFAJgHsvXsRyewg5lXtc= -github.com/moby/sys/sequential v0.5.0/go.mod h1:tH2cOOs5V9MlPiXcQzRC+eEyab644PWKGRYaaV5ZZlo= -github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= -github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= -github.com/opencontainers/image-spec v1.1.0-rc5 h1:Ygwkfw9bpDvs+c9E34SdgGOj41dX/cbdlwvlWt0pnFI= -github.com/opencontainers/image-spec v1.1.0-rc5/go.mod h1:X4pATf0uXsnn3g5aiGIsVnJBR4mxhKzfwmvK/B2NTm8= -github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pkg/xattr v0.4.9 h1:5883YPCtkSd8LFbs13nXplj9g9tlrwoJRjgpgMu1/fE= -github.com/pkg/xattr v0.4.9/go.mod h1:di8WF84zAKk8jzR1UBTEWh9AUlIZZ7M/JNt8e9B6ktU= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= -github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= -github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= -github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= -github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= -github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= -github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= -github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= -go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= -golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= -golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.14.0 h1:dGoOF9QVLYng8IHTm7BAyWqCqSheQ5pYWGhzW00YJr0= -golang.org/x/mod v0.14.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= -golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.19.0 h1:zTwKpTd2XuCqf8huc7Fo2iSy+4RHPd10s4KzeTnVr1c= -golang.org/x/net v0.19.0/go.mod h1:CfAk/cbD4CthTvqiEl8NpboMuiuOYsAr/7NOjZJtv1U= -golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE= -golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= -golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20220408201424-a24fb2fb8a0f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc= -golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= -golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.16.1 h1:TLyB3WofjdOEepBHAU20JdNC1Zbg87elYofWYAY5oZA= -golang.org/x/tools v0.16.1/go.mod h1:kYVVN6I1mBNoB1OX+noeBjbRk4IUEPa7JJ+TJMEooJ0= -golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= -google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= -google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= -google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240102182953-50ed04b92917 h1:6G8oQ016D88m1xAKljMlBOOGWDZkes4kMhgGFlf8WcQ= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240102182953-50ed04b92917/go.mod h1:xtjpI3tXFPP051KaWnhvxkiubL/6dJ18vLVf7q2pTOU= -google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= -google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= -google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= -google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= -google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= -google.golang.org/grpc v1.60.1 h1:26+wFr+cNqSGFcOXcabYC0lUVJVRa2Sb2ortSK7VrEU= -google.golang.org/grpc v1.60.1/go.mod h1:OlCHIeLYqSSsLi6i49B5QGdzaMZK9+M7LXN2FKz4eGM= -google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= -google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= -google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= -google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= -google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= -google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= -google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= -google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.32.0 h1:pPC6BG5ex8PDFnkbrGU3EixyhKcQ2aDuBS36lqK/C7I= -google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= -gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= -gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow= +github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM= +github.com/Microsoft/hcsshim v0.11.4 h1:68vKo2VN8DE9AdN4tnkWnmdhqdbpUFM8OF3Airm7fz8= +github.com/Microsoft/hcsshim v0.11.4/go.mod h1:smjE4dvqPX9Zldna+t5FG3rnoHhaB7QYxPRqGcpAD9w= +github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= +github.com/containerd/cgroups v1.1.0 h1:v8rEWFl6EoqHB+swVNjVoCJE8o3jX7e8nqBGPLaDFBM= +github.com/containerd/cgroups v1.1.0/go.mod h1:6ppBcbh/NOOUU+dMKrykgaBnK9lCIBxHqJDGwsa1mIw= +github.com/containerd/containerd v1.7.11 h1:lfGKw3eU35sjV0aG2eYZTiwFEY1pCzxdzicHP3SZILw= +github.com/containerd/continuity v0.4.3 h1:6HVkalIp+2u1ZLH1J/pYX2oBVXlJZvh1X1A7bEZ9Su8= +github.com/containerd/continuity v0.4.3/go.mod h1:F6PTNCKepoxEaXLQp3wDAjygEnImnZ/7o4JzpodfroQ= +github.com/containerd/fifo v1.1.0 h1:4I2mbh5stb1u6ycIABlBw9zgtlK8viPI9QkQNRQEEmY= +github.com/containerd/fifo v1.1.0/go.mod h1:bmC4NWMbXlt2EZ0Hc7Fx7QzTFxgPID13eH0Qu+MAb2o= +github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= +github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo= +github.com/containerd/nydus-snapshotter v0.13.4 h1:veTQCgpfRGdPD031dVNGlU+vK/W9vBhZNlMWR9oupiQ= +github.com/containerd/nydus-snapshotter v0.13.4/go.mod h1:y41TM10lXhskfHHvge7kf1VucM4CeWwsCmQ5Q51UJrc= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= +github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= +github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= +github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= +github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= +github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= +github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= +github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= +github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU= +github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4= +github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= +github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/moby/sys/mountinfo v0.7.1 h1:/tTvQaSJRr2FshkhXiIpux6fQ2Zvc4j7tAhMTStAG2g= +github.com/moby/sys/sequential v0.5.0 h1:OPvI35Lzn9K04PBbCLW0g4LcFAJgHsvXsRyewg5lXtc= +github.com/moby/sys/sequential v0.5.0/go.mod h1:tH2cOOs5V9MlPiXcQzRC+eEyab644PWKGRYaaV5ZZlo= +github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= +github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= +github.com/opencontainers/image-spec v1.1.0-rc5 h1:Ygwkfw9bpDvs+c9E34SdgGOj41dX/cbdlwvlWt0pnFI= +github.com/opencontainers/image-spec v1.1.0-rc5/go.mod h1:X4pATf0uXsnn3g5aiGIsVnJBR4mxhKzfwmvK/B2NTm8= +github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/xattr v0.4.9 h1:5883YPCtkSd8LFbs13nXplj9g9tlrwoJRjgpgMu1/fE= +github.com/pkg/xattr v0.4.9/go.mod h1:di8WF84zAKk8jzR1UBTEWh9AUlIZZ7M/JNt8e9B6ktU= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= +github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= +github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= +github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= +github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= +go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= +golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.14.0 h1:dGoOF9QVLYng8IHTm7BAyWqCqSheQ5pYWGhzW00YJr0= +golang.org/x/mod v0.14.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.19.0 h1:zTwKpTd2XuCqf8huc7Fo2iSy+4RHPd10s4KzeTnVr1c= +golang.org/x/net v0.19.0/go.mod h1:CfAk/cbD4CthTvqiEl8NpboMuiuOYsAr/7NOjZJtv1U= +golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE= +golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20220408201424-a24fb2fb8a0f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc= +golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= +golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.16.1 h1:TLyB3WofjdOEepBHAU20JdNC1Zbg87elYofWYAY5oZA= +golang.org/x/tools v0.16.1/go.mod h1:kYVVN6I1mBNoB1OX+noeBjbRk4IUEPa7JJ+TJMEooJ0= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= +google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240102182953-50ed04b92917 h1:6G8oQ016D88m1xAKljMlBOOGWDZkes4kMhgGFlf8WcQ= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240102182953-50ed04b92917/go.mod h1:xtjpI3tXFPP051KaWnhvxkiubL/6dJ18vLVf7q2pTOU= +google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= +google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= +google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= +google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= +google.golang.org/grpc v1.60.1 h1:26+wFr+cNqSGFcOXcabYC0lUVJVRa2Sb2ortSK7VrEU= +google.golang.org/grpc v1.60.1/go.mod h1:OlCHIeLYqSSsLi6i49B5QGdzaMZK9+M7LXN2FKz4eGM= +google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= +google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= +google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= +google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= +google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= +google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.32.0 h1:pPC6BG5ex8PDFnkbrGU3EixyhKcQ2aDuBS36lqK/C7I= +google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= diff --git a/smoke/tests/texture/golang/entrypoint.sh b/smoke/tests/texture/golang/entrypoint.sh index 172835887cc..47726c5b33a 100644 --- a/smoke/tests/texture/golang/entrypoint.sh +++ b/smoke/tests/texture/golang/entrypoint.sh @@ -1,2 +1,2 @@ -cd /src -go run main.go +cd /src +go run main.go diff --git a/smoke/tests/texture/java/Main.java b/smoke/tests/texture/java/Main.java index fd6e78f6641..ca25dd31a2d 100644 --- a/smoke/tests/texture/java/Main.java +++ b/smoke/tests/texture/java/Main.java @@ -1,5 +1,5 @@ -class Main { - public static void main(String[] args) { - System.out.println("hello"); - } -} +class Main { + public static void main(String[] args) { + System.out.println("hello"); + } +} diff --git a/smoke/tests/texture/java/entrypoint.sh b/smoke/tests/texture/java/entrypoint.sh index 5acfb3ca0e1..79077b50724 100644 --- a/smoke/tests/texture/java/entrypoint.sh +++ b/smoke/tests/texture/java/entrypoint.sh @@ -1,3 +1,3 @@ -cd /src -javac Main.java -java Main +cd /src +javac Main.java +java Main diff --git a/smoke/tests/texture/node/index.js b/smoke/tests/texture/node/index.js index 9dc329b2ff3..6017980fd8c 100644 --- a/smoke/tests/texture/node/index.js +++ b/smoke/tests/texture/node/index.js @@ -1,10 +1,10 @@ -// Load the http module to create an http server. -var http = require('http'); - -// Configure our HTTP server to respond with Hello World to all requests. -var server = http.createServer(function (request, response) { - response.writeHead(200, {"Content-Type": "text/plain"}); - response.end("hello\n"); -}); - -server.listen(80); +// Load the http module to create an http server. +var http = require('http'); + +// Configure our HTTP server to respond with Hello World to all requests. +var server = http.createServer(function (request, response) { + response.writeHead(200, {"Content-Type": "text/plain"}); + response.end("hello\n"); +}); + +server.listen(80); diff --git a/smoke/tests/texture/python/entrypoint.sh b/smoke/tests/texture/python/entrypoint.sh index a627a228590..22ad3f7a803 100644 --- a/smoke/tests/texture/python/entrypoint.sh +++ b/smoke/tests/texture/python/entrypoint.sh @@ -1 +1 @@ -python -c 'print("hello")' +python -c 'print("hello")' diff --git a/smoke/tests/texture/ruby/entrypoint.sh b/smoke/tests/texture/ruby/entrypoint.sh index 2d950064536..90ca7defa34 100644 --- a/smoke/tests/texture/ruby/entrypoint.sh +++ b/smoke/tests/texture/ruby/entrypoint.sh @@ -1 +1 @@ -ruby -e "puts \"hello\"" +ruby -e "puts \"hello\"" diff --git a/smoke/tests/tool/nydusd.go b/smoke/tests/tool/nydusd.go index 70ca9bd7a03..793b14dd2ba 100644 --- a/smoke/tests/tool/nydusd.go +++ b/smoke/tests/tool/nydusd.go @@ -708,8 +708,8 @@ func Verify(t *testing.T, ctx Context, expectedFileTree map[string]*File) { nydusd, err := NewNydusd(config) require.NoError(t, err) - // err = nydusd.Mount() - // require.NoError(t, err) + err = nydusd.Mount() + require.NoError(t, err) // defer func() { // if err := nydusd.Umount(); err != nil { // log.L.WithError(err).Errorf("umount") @@ -735,6 +735,7 @@ func Verify(t *testing.T, ctx Context, expectedFileTree map[string]*File) { config.DigestValidate = false config.AmplifyIO = ctx.Runtime.AmplifyIO err = nydusd.MountByAPI2(NydusdConfigTpl, config) + //err = nydusd.MountByAPI(config) require.NoError(t, err) defer nydusd.Umount() diff --git a/src/bin/nydus-image/deduplicate.rs b/src/bin/nydus-image/deduplicate.rs index c28130e023f..0419e4b4313 100644 --- a/src/bin/nydus-image/deduplicate.rs +++ b/src/bin/nydus-image/deduplicate.rs @@ -1,1780 +1,1780 @@ -// Copyright (C) 2023 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Deduplicate for Chunk. -use anyhow::{Context, Result}; -use core::cmp::Ordering; -use nydus_api::ConfigV2; -use nydus_builder::BuildContext; -use nydus_builder::ConversionType; -use nydus_builder::Tree; -use nydus_builder::{ChunkdictBlobInfo, ChunkdictChunkInfo}; -use nydus_rafs::metadata::{RafsSuper, RafsVersion}; -use nydus_storage::device::BlobInfo; -use rusqlite::{params, Connection}; -use std::collections::HashSet; -use std::collections::{BTreeMap, HashMap}; -use std::convert::TryFrom; -use std::fs; -use std::path::{Path, PathBuf}; -use std::result::Result::Ok; -use std::sync::{Arc, Mutex}; - -#[derive(Debug)] -pub enum DatabaseError { - SqliteError(rusqlite::Error), - PoisonError(String), - // Add other database error variants here as needed, e.g.: - // MysqlError(mysql::Error). -} - -impl std::fmt::Display for DatabaseError { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - match *self { - DatabaseError::SqliteError(ref err) => err.fmt(f), - DatabaseError::PoisonError(ref err) => write!(f, "PoisonError: {}", err), - // Add other error type formatting here. - } - } -} - -impl std::error::Error for DatabaseError {} - -impl From for DatabaseError { - fn from(error: rusqlite::Error) -> Self { - DatabaseError::SqliteError(error) - } -} - -pub trait Database { - /// Creates a new chunk in the database. - fn create_chunk_table(&self) -> Result<()>; - - /// Creates a new blob in the database. - fn create_blob_table(&self) -> Result<()>; - - /// Inserts chunk information into the database. - fn insert_chunk(&self, chunk_info: &ChunkdictChunkInfo) -> Result<()>; - - /// Inserts blob information into the database. - fn insert_blob(&self, blob_info: &ChunkdictBlobInfo) -> Result<()>; - - /// Retrieves all chunk information from the database. - fn get_chunks(&self) -> Result>; - - /// Retrieves all chunk information from the database filtered by blob ID. - fn get_chunks_by_blob_id(&self, blob_id: &str) -> Result>; - - /// Retrieves all blob information from the database. - fn get_blobs(&self) -> Result>; - - /// Retrieves blob information from the database filtered by blob ID. - fn get_blob_by_id(&self, blob_id: &str) -> Result; -} - -pub struct SqliteDatabase { - chunk_table: ChunkTable, - blob_table: BlobTable, -} - -impl SqliteDatabase { - pub fn new(database_url: &str) -> Result { - // Connect to a database that already exists. - if let Ok(metadata) = fs::metadata(database_url) { - if metadata.is_file() { - } else { - panic!("Warning: Unable to find existing database file."); - } - } - - let chunk_table = ChunkTable::new(database_url)?; - let blob_table = BlobTable::new(database_url)?; - - Ok(Self { - chunk_table, - blob_table, - }) - } - - pub fn new_in_memory() -> Result { - let chunk_table = ChunkTable::new_in_memory()?; - let blob_table = BlobTable::new_in_memory()?; - Ok(Self { - chunk_table, - blob_table, - }) - } -} - -impl Database for SqliteDatabase { - fn create_chunk_table(&self) -> Result<()> { - ChunkTable::create(&self.chunk_table).context("Failed to create chunk table") - } - - fn create_blob_table(&self) -> Result<()> { - BlobTable::create(&self.blob_table).context("Failed to create blob table") - } - - fn insert_chunk(&self, chunk: &ChunkdictChunkInfo) -> Result<()> { - self.chunk_table - .insert(chunk) - .context("Failed to insert chunk") - } - - fn insert_blob(&self, blob: &ChunkdictBlobInfo) -> Result<()> { - self.blob_table - .insert(blob) - .context("Failed to insert blob") - } - - fn get_chunks(&self) -> Result> { - ChunkTable::list_all(&self.chunk_table).context("Failed to get chunks") - } - - fn get_chunks_by_blob_id(&self, blob_id: &str) -> Result> { - ChunkTable::list_all_by_blob_id(&self.chunk_table, blob_id).context("Failed to get chunks") - } - - fn get_blobs(&self) -> Result> { - BlobTable::list_all(&self.blob_table).context("Failed to get blobs") - } - - fn get_blob_by_id(&self, blob_id: &str) -> Result { - BlobTable::list_by_id(&self.blob_table, blob_id).context("Failed to get blob") - } -} - -/// Get fs version from bootstrap file. -fn get_fs_version(bootstrap_path: &Path) -> Result { - let (sb, _) = RafsSuper::load_from_file(bootstrap_path, Arc::new(ConfigV2::default()), false)?; - RafsVersion::try_from(sb.meta.version).context("Failed to get RAFS version number") -} - -/// Checks if all Bootstrap versions are consistent. -/// If they are inconsistent, returns an error and prints the version of each Bootstrap. -pub fn check_bootstrap_versions_consistency( - ctx: &mut BuildContext, - bootstrap_paths: &[PathBuf], -) -> Result<()> { - let mut versions = Vec::new(); - - for bootstrap_path in bootstrap_paths { - let version = get_fs_version(bootstrap_path)?; - versions.push((bootstrap_path.clone(), version)); - } - - if !versions.is_empty() { - let first_version = versions[0].1; - ctx.fs_version = first_version; - if versions.iter().any(|(_, v)| *v != first_version) { - for (path, version) in &versions { - println!("Bootstrap path {:?} has version {:?}", path, version); - } - return Err(anyhow!( - "Bootstrap versions are inconsistent, cannot use chunkdict." - )); - } - } - - Ok(()) -} - -// Get parent bootstrap context for chunkdict bootstrap. -pub fn update_ctx_from_parent_bootstrap( - ctx: &mut BuildContext, - bootstrap_path: &PathBuf, -) -> Result<()> { - let (sb, _) = RafsSuper::load_from_file(bootstrap_path, Arc::new(ConfigV2::default()), false)?; - - // Obtain the features of the first blob to use as the features for the blobs in chunkdict. - if let Some(first_blob) = sb.superblock.get_blob_infos().first() { - ctx.blob_features = first_blob.features(); - } - - let config = sb.meta.get_config(); - config.check_compatibility(&sb.meta)?; - - if config.is_tarfs_mode { - ctx.conversion_type = ConversionType::TarToTarfs; - } - ctx.fs_version = - RafsVersion::try_from(sb.meta.version).context("Failed to get RAFS version")?; - ctx.compressor = config.compressor; - - Ok(()) -} - -pub struct Deduplicate { - db: D, -} - -const IN_MEMORY_DB_URL: &str = ":memory:"; - -impl Deduplicate { - pub fn new(db_url: &str) -> anyhow::Result { - let db = if db_url == IN_MEMORY_DB_URL { - SqliteDatabase::new_in_memory()? - } else { - SqliteDatabase::new(db_url)? - }; - Ok(Self { db }) - } - - pub fn save_metadata( - &mut self, - bootstrap_path: &Path, - config: Arc, - image_reference: String, - version: String, - ) -> anyhow::Result>> { - let (sb, _) = RafsSuper::load_from_file(bootstrap_path, config, false)?; - self.create_tables()?; - let blob_infos = sb.superblock.get_blob_infos(); - self.insert_blobs(&blob_infos)?; - self.insert_chunks(&blob_infos, &sb, image_reference, version)?; - Ok(blob_infos) - } - - fn create_tables(&mut self) -> anyhow::Result<()> { - self.db - .create_chunk_table() - .context("Failed to create chunk table.")?; - self.db - .create_blob_table() - .context("Failed to create blob table.")?; - Ok(()) - } - - fn insert_blobs(&mut self, blob_infos: &[Arc]) -> anyhow::Result<()> { - for blob in blob_infos { - self.db - .insert_blob(&ChunkdictBlobInfo { - blob_id: blob.blob_id().to_string(), - blob_compressed_size: blob.compressed_size(), - blob_uncompressed_size: blob.uncompressed_size(), - blob_compressor: blob.compressor().to_string(), - blob_meta_ci_compressed_size: blob.meta_ci_compressed_size(), - blob_meta_ci_uncompressed_size: blob.meta_ci_uncompressed_size(), - blob_meta_ci_offset: blob.meta_ci_offset(), - }) - .context("Failed to insert blob")?; - } - Ok(()) - } - - fn insert_chunks( - &mut self, - blob_infos: &[Arc], - sb: &RafsSuper, - image_reference: String, - version: String, - ) -> anyhow::Result<()> { - let process_chunk = &mut |t: &Tree| -> Result<()> { - let node = t.lock_node(); - for chunk in &node.chunks { - let index = chunk.inner.blob_index(); - let chunk_blob_id = blob_infos[index as usize].blob_id(); - self.db - .insert_chunk(&ChunkdictChunkInfo { - image_reference: image_reference.to_string(), - version: version.to_string(), - chunk_blob_id, - chunk_digest: chunk.inner.id().to_string(), - chunk_compressed_size: chunk.inner.compressed_size(), - chunk_uncompressed_size: chunk.inner.uncompressed_size(), - chunk_compressed_offset: chunk.inner.compressed_offset(), - chunk_uncompressed_offset: chunk.inner.uncompressed_offset(), - }) - .context("Failed to insert chunk")?; - } - Ok(()) - }; - let tree = Tree::from_bootstrap(sb, &mut ()) - .context("Failed to load bootstrap for deduplication.")?; - tree.walk_dfs_pre(process_chunk)?; - Ok(()) - } -} - -pub struct Algorithm { - algorithm_name: String, - db: D, -} - -// Generate deduplicated chunkdict by exponential_smoothing algorithm. -type VersionMap = HashMap>; -// Generate deduplicated chunkdict by cluster algorithm. -type ImageMap = Vec, Vec>>; - -impl Algorithm { - pub fn new(algorithm: String, db_url: &str) -> anyhow::Result { - let algorithm_name = algorithm; - let db = SqliteDatabase::new(db_url)?; - Ok(Self { algorithm_name, db }) - } - - // Call the algorithm to generate a dictionary. - pub fn chunkdict_generate( - &mut self, - ) -> anyhow::Result<(Vec, Vec, Vec)> { - let all_chunks: Vec = self.db.chunk_table.list_all()?; - let mut chunkdict_chunks: Vec = Vec::new(); - let mut chunkdict_blobs: Vec = Vec::new(); - let mut core_image = Vec::new(); - let mut noise_points = Vec::new(); - - let (chunkdict_version, chunkdict_image) = match &self.algorithm_name as &str { - "exponential_smoothing" => Self::deduplicate_version(&all_chunks)?, - _ => { - bail!("Unsupported algorithm name:, please use a valid algorithm name, such as exponential_smoothing") - } - }; - for single_clustering in chunkdict_image { - for (image_list, cluster_dictionary) in single_clustering { - core_image.extend(image_list); - chunkdict_chunks.extend(cluster_dictionary); - } - } - for (_, dictionary) in chunkdict_version { - chunkdict_chunks.extend(dictionary); - } - let mut chunkdict_size = 0; - for i in &chunkdict_chunks { - chunkdict_size += i.chunk_compressed_size; - } - info!( - "Chunkdict size is {}", - chunkdict_size as f64 / 1024 as f64 / 1024 as f64 - ); - for chunk in all_chunks { - if !core_image.contains(&chunk.image_reference) - && !noise_points.contains(&chunk.image_reference) - { - noise_points.push(chunk.image_reference.clone()); - } - } - Self::fill_chunkdict(self, &mut chunkdict_chunks, &mut chunkdict_blobs)?; - Ok((chunkdict_chunks, chunkdict_blobs, noise_points)) - } - - /// Baseed chunk list to fill chunkdict, including all chunks in the same blob and all blobs in the chunkdict. - fn fill_chunkdict( - &mut self, - chunkdict_chunks: &mut Vec, - chunkdict_blobs: &mut Vec, - ) -> Result<()> { - let mut blob_ids = std::collections::HashSet::new(); - for chunk in chunkdict_chunks.iter() { - blob_ids.insert(chunk.chunk_blob_id.clone()); - } - for blob_id in blob_ids { - let mut chunks = self.db.get_chunks_by_blob_id(&blob_id)?; - chunks = chunks - .into_iter() - .collect::>() - .into_iter() - .collect::>(); - for chunk in chunks { - if !chunkdict_chunks.contains(&chunk) { - chunkdict_chunks.push(chunk); - } - } - chunkdict_blobs.push(self.db.get_blob_by_id(&blob_id)?); - } - Ok(()) - } - - // Algorithm "exponential_smoothing" - // List all chunk and sort them by the order in chunk table. - // Score each chunk by "exponential_smoothing" formula. - // Select chunks whose score is greater than threshold and generate chunk dictionary. - fn exponential_smoothing( - all_chunks: Vec, - threshold: f64, - ) -> anyhow::Result> { - let alpha = 0.5; - let mut smoothed_data = Vec::new(); - - let mut last_start_version_index = 0; - let mut start_version_index = 0; - let mut last_end_version_index = 0; - - for (chunk_index, chunk) in all_chunks.iter().enumerate() { - let mut is_duplicate: f64 = 0.0; - if chunk.version == all_chunks[0].version { - let smoothed_score: f64 = 0.0; - smoothed_data.push(smoothed_score); - } else { - if all_chunks[chunk_index - 1].version != all_chunks[chunk_index].version { - last_start_version_index = start_version_index; - start_version_index = chunk_index; - last_end_version_index = chunk_index - 1; - } - for last_chunk in all_chunks - .iter() - .take(last_end_version_index + 1) - .skip(last_start_version_index) - { - if chunk.chunk_digest == last_chunk.chunk_digest { - is_duplicate = 1.0; - break; - } - } - let smoothed_score: f64 = - alpha * is_duplicate + (1.0 - alpha) * smoothed_data[chunk_index - 1]; - smoothed_data.push(smoothed_score); - } - } - - let mut chunkdict: Vec = Vec::new(); - for i in 0..smoothed_data.len() { - let chunk = ChunkdictChunkInfo { - image_reference: all_chunks[i].image_reference.clone(), - version: all_chunks[i].version.clone(), - chunk_blob_id: all_chunks[i].chunk_blob_id.clone(), - chunk_digest: all_chunks[i].chunk_digest.clone(), - chunk_compressed_offset: all_chunks[i].chunk_compressed_offset, - chunk_uncompressed_offset: all_chunks[i].chunk_uncompressed_offset, - chunk_compressed_size: all_chunks[i].chunk_compressed_size, - chunk_uncompressed_size: all_chunks[i].chunk_uncompressed_size, - }; - if smoothed_data[i] > threshold { - chunkdict.push(chunk); - } - } - - // Deduplicate chunk dictionary. - let mut unique_chunks: BTreeMap = BTreeMap::new(); - for chunk in &chunkdict { - if !unique_chunks.contains_key(&chunk.chunk_digest) { - unique_chunks.insert(chunk.chunk_digest.clone(), chunk.clone()); - } - } - let unique_chunk_list: Vec = unique_chunks.values().cloned().collect(); - Ok(unique_chunk_list) - } - - /// Calculate the distance between two images. - fn distance( - image1: &[ChunkdictChunkInfo], - image2: &[ChunkdictChunkInfo], - ) -> anyhow::Result { - // The total size of all chunks in both images. - let mut image1_size: u64 = 0; - let mut image2_size: u64 = 0; - - for chunk1 in image1 { - image1_size += chunk1.chunk_compressed_size as u64; - } - for chunk2 in image2 { - image2_size += chunk2.chunk_compressed_size as u64; - } - - // The total size of the chunk repeated between two images. - let all_chunks: Vec<&ChunkdictChunkInfo> = image1.iter().chain(image2.iter()).collect(); - let mut compressed_size_map: std::collections::HashMap = - std::collections::HashMap::new(); - let mut processed_digests: HashSet<&String> = HashSet::new(); - - for chunk in all_chunks { - if processed_digests.contains(&chunk.chunk_digest) { - let size = compressed_size_map - .entry(chunk.chunk_digest.clone()) - .or_insert(0); - *size += chunk.chunk_compressed_size as u64; - } - processed_digests.insert(&chunk.chunk_digest); - } - - let repeat_size: u64 = compressed_size_map.values().cloned().sum(); - let distance: f64 = 1.0 - (repeat_size as f64 / ((image1_size + image2_size) as f64)); - Ok(distance) - } - - /// Divide the chunk list into sublists by image name. - fn divide_by_image(all_chunks: &[ChunkdictChunkInfo]) -> anyhow::Result> { - let mut image_chunks: std::collections::HashMap> = - std::collections::HashMap::new(); - let mut datadict: Vec = Vec::new(); - for chunk in all_chunks { - image_chunks - .entry(chunk.image_reference.clone()) - .or_insert(Vec::new()) - .push(chunk.clone()); - } - for (index, chunks) in image_chunks { - let data_point = DataPoint { - image_reference: index, - chunk_list: chunks, - visited: false, - clustered: false, - cluster_id: 0, - }; - datadict.push(data_point); - } - Ok(datadict) - } - - fn divide_set( - chunks: &[ChunkdictChunkInfo], - train_percentage: f64, - ) -> anyhow::Result<(Vec, Vec)> { - // Create a HashMap to store the list of chunks for each image_reference. - let mut image_chunks: BTreeMap> = BTreeMap::new(); - - // Group chunks into image_reference. - for chunk in chunks { - let entry = image_chunks - .entry(chunk.image_reference.clone()) - .or_insert(Vec::new()); - entry.push(chunk.clone()); - } - - // Create the final training and testing sets. - let mut train_set: Vec = Vec::new(); - let mut test_set: Vec = Vec::new(); - - // Iterate through the list of Chunks for each image_reference. - for (_, chunk_list) in image_chunks.iter_mut() { - let mut version_chunks: BTreeMap> = - BTreeMap::new(); - // Group the chunks in the image into version. - for chunk in chunk_list { - let entry = version_chunks - .entry(CustomString(chunk.version.clone())) - .or_insert(Vec::new()); - entry.push(chunk.clone()); - } - - let num_version_groups = version_chunks.len(); - let num_train_groups = (num_version_groups as f64 * train_percentage) as usize; - let version_groups = version_chunks.into_iter().collect::>(); - let (train_version_groups, test_version_groups) = - version_groups.split_at(num_train_groups); - - for (_, train_chunks) in train_version_groups { - for chunk in train_chunks { - train_set.push(chunk.clone()); - } - } - - for (_, test_chunks) in test_version_groups { - for chunk in test_chunks { - test_set.push(chunk.clone()); - } - } - } - Ok((train_set, test_set)) - } - - /// Dbscan clustering algorithm. - fn dbsacn(data_point: &mut Vec, radius: f64) -> anyhow::Result<&Vec> { - let min_points = 10; - let mut cluster_id = 1; - - for i in 0..data_point.len() { - if data_point[i].visited { - continue; - } - if data_point[i].clustered { - continue; - } - - let mut neighbors = Vec::new(); - for j in 0..data_point.len() { - let distance = - Self::distance(&data_point[i].chunk_list, &data_point[j].chunk_list)?; - if !data_point[j].visited && distance <= radius { - neighbors.push(j); - } - } - if neighbors.len() < min_points { - data_point[i].clustered = false; - } else { - Self::expand_cluster(data_point, i, cluster_id, radius, min_points)?; - cluster_id += 1; - } - } - Ok(data_point) - } - - /// Core point expansion cluster in dbscan algorithm. - fn expand_cluster( - data_point: &mut Vec, - i: usize, - cluster_id: i32, - radius: f64, - min_points: usize, - ) -> anyhow::Result<()> { - data_point[i].clustered = true; - data_point[i].cluster_id = cluster_id; - - let mut stack = vec![i]; - while let Some(q) = stack.pop() { - if data_point[q].visited { - continue; - } - data_point[q].visited = true; - let mut q_neighbors = Vec::new(); - for j in 0..data_point.len() { - let distance = - Self::distance(&data_point[q].chunk_list, &data_point[j].chunk_list)?; - if !data_point[j].visited && distance <= radius { - q_neighbors.push(j); - } - } - if q_neighbors.len() >= min_points { - for &r_index in &q_neighbors { - if !data_point[r_index].visited { - data_point[r_index].visited = true; - stack.push(r_index) - } - if !data_point[r_index].clustered { - data_point[r_index].clustered = true; - data_point[r_index].cluster_id = cluster_id; - } - } - } else { - data_point[i].clustered = false; - } - } - Ok(()) - } - - /// Aggregate the chunks in each cluster into a dictionary. - fn aggregate_chunk( - data_point: &[DataPoint], - ) -> anyhow::Result, Vec>> { - // Divide chunk list according to clusters. - let mut cluster_map: HashMap> = HashMap::new(); - for (index, point) in data_point.iter().enumerate() { - if point.clustered { - let cluster_id = point.cluster_id; - cluster_map - .entry(cluster_id) - .or_insert(Vec::new()) - .push(index); - } - } - - // Iterate through each cluster. - let mut dictionary: HashMap, Vec> = HashMap::new(); - for (_, cluster_points) in cluster_map.iter() { - let mut image_total_counts: HashMap<&str, usize> = HashMap::new(); - let mut image_list: Vec = Vec::new(); - // Count the total number of images in the cluster. - for &point_index in cluster_points { - let point = &data_point[point_index]; - let image_total_count = image_total_counts - .entry(&point.image_reference) - .or_insert(0); - *image_total_count += 1; - - image_list.push(point.image_reference.clone()); - } - - // Count the number of images in which chunks appear in the cluster. - let mut chunk_digest_counts: HashMap = HashMap::new(); - for &point_index in cluster_points { - let point = &data_point[point_index]; - let chunk_digest_set: HashSet = point - .chunk_list - .iter() - .map(|chunk| chunk.chunk_digest.clone()) - .collect(); - for chunk_digest in chunk_digest_set { - let count = chunk_digest_counts - .entry(chunk_digest.to_string()) - .or_insert(0); - *count += 1; - } - } - - let mut chunk_list: Vec = Vec::new(); - let mut added_chunk_digests: HashSet = HashSet::new(); - for &point_index in cluster_points { - let point = &data_point[point_index]; - for chunk in &point.chunk_list { - let chunk_digest = &chunk.chunk_digest; - if !added_chunk_digests.contains(chunk_digest) { - let count = chunk_digest_counts.get(chunk_digest).unwrap_or(&0); - if *count as f64 / image_total_counts.len() as f64 >= 0.9 { - chunk_list.push(chunk.clone()); - added_chunk_digests.insert(chunk_digest.to_string()); - } - } - } - } - dictionary.insert(image_list, chunk_list); - } - Ok(dictionary) - } - - fn deduplicate_image( - all_chunks: Vec, - ) -> anyhow::Result, Vec>>> { - let train_percentage = 0.7; - let max_cluster_count = 7; - let mut counter = 0; - let all_chunks_clone = all_chunks; - let mut data_dict: Vec, Vec>> = Vec::new(); - - let (mut train, mut test) = Self::divide_set(&all_chunks_clone, train_percentage)?; - while counter < max_cluster_count { - // Parameter settings. - let mut data_point = Self::divide_by_image(&train)?; - let all_train_length = data_point.len(); - let mut radius = 0.5; - let max_radius = 0.9; - let mut test_chunk_sizes = Vec::new(); - let mut min_test_size: u64 = std::u64::MAX; - let mut min_data_dict = HashMap::new(); - let mut data_cluster_length = 0; - - // Adjust the radius size to select the dictionary that tests best. - while radius <= max_radius { - let data_cluster = Self::dbsacn(&mut data_point, radius)?; - data_cluster_length = data_cluster.len(); - - let data_dict = Self::aggregate_chunk(data_cluster)?; - - let all_chunks: HashSet<&ChunkdictChunkInfo> = - data_dict.values().flat_map(|v| v.iter()).collect(); - let mut total_test_set_size: u64 = 0; - - for chunk in test.iter() { - if !all_chunks.contains(chunk) { - total_test_set_size += chunk.chunk_compressed_size as u64; - } - } - test_chunk_sizes.push((radius, total_test_set_size)); - min_test_size = total_test_set_size; - if total_test_set_size <= min_test_size { - min_test_size = total_test_set_size; - min_data_dict = data_dict; - } - radius += 0.05; - } - debug!("test set size is {}", min_test_size); - - let min_chunk_list: Vec = min_data_dict - .values() - .flat_map(|chunk_list| chunk_list.iter()) - .cloned() - .collect(); - let mut to_remove = Vec::new(); - for chunk in train.iter() { - if min_chunk_list.contains(chunk) { - to_remove.push(chunk.clone()); - } - } - for chunk in &to_remove { - train.retain(|c| c.chunk_digest != chunk.chunk_digest); - } - for chunk in &to_remove { - test.retain(|c| c.chunk_digest != chunk.chunk_digest); - } - if (data_cluster_length as f64 / all_train_length as f64) < 0.2 { - break; - } - data_dict.push(min_data_dict); - counter += 1; - } - Ok(data_dict) - } - - pub fn deduplicate_version( - all_chunks: &[ChunkdictChunkInfo], - ) -> anyhow::Result<(VersionMap, ImageMap)> { - let mut all_chunks_size = 0; - for i in all_chunks { - all_chunks_size += i.chunk_compressed_size; - } - info!( - "All chunk size is {}", - all_chunks_size as f64 / 1024 as f64 / 1024 as f64 - ); - - let train_percentage = 0.7; - let datadict = Self::deduplicate_image(all_chunks.to_owned())?; - let (train, test) = Self::divide_set(all_chunks, train_percentage)?; - let mut train_set_size = 0; - for i in &train { - train_set_size += i.chunk_compressed_size; - } - info!( - "Train set size is {}", - train_set_size as f64 / 1024 as f64 / 1024 as f64 - ); - - let mut test_set_size = 0; - for i in &test { - test_set_size += i.chunk_compressed_size; - } - info!( - "Test set size is {}", - test_set_size as f64 / 1024 as f64 / 1024 as f64 - ); - - let mut version_datadict: HashMap> = HashMap::new(); - let mut data_point = Self::divide_by_image(&train)?; - - let mut threshold = 0.5; - let max_threshold = 0.8; - - let mut test_total_size: u32 = 0; - let mut min_test_size: u32 = std::u32::MAX; - let mut min_data_dict = HashMap::new(); - - while threshold <= max_threshold { - version_datadict.clear(); - for point in data_point.iter_mut() { - for single_dictionary in &datadict { - for (key, value) in single_dictionary.iter() { - if key.contains(&point.image_reference) { - let mut to_remove = Vec::new(); - for chunk in point.chunk_list.iter() { - if value.contains(chunk) { - to_remove.push(chunk.clone()); - } - } - for chunk in to_remove { - point.chunk_list.retain(|c| c != &chunk); - } - } - } - } - let chunk_dict = Self::exponential_smoothing(point.chunk_list.clone(), threshold)?; - version_datadict.insert(point.image_reference.clone(), chunk_dict); - } - - let mut test_by_image = Self::divide_by_image(&test)?; - for point in test_by_image.iter_mut() { - if version_datadict.contains_key(&point.image_reference.clone()) { - let mut to_remove = Vec::new(); - let mut vec_string = Vec::new(); - let chunkdict_option = version_datadict.get(&point.image_reference); - if let Some(chunkdict) = chunkdict_option { - for i in chunkdict { - vec_string.push(i.chunk_digest.clone()); - } - } - for chunk in point.chunk_list.iter() { - if vec_string.contains(&chunk.chunk_digest) { - to_remove.push(chunk.clone()); - } - } - for chunk in to_remove { - point.chunk_list.retain(|c| c != &chunk); - } - } - for chunk in point.chunk_list.iter() { - test_total_size = test_total_size - .checked_add(chunk.chunk_compressed_size) - .unwrap_or(test_total_size); - } - } - if test_total_size <= min_test_size { - min_test_size = test_total_size; - min_data_dict = version_datadict.clone(); - } - threshold += 0.05; - } - info!( - "After deduplicating test set size is {} and deduplicating rate is {} ", - min_test_size as f64 / 1024 as f64 / 1024 as f64, - 1.0 - (min_test_size as f64) / (test_set_size as f64) - ); - Ok((min_data_dict, datadict)) - } -} - -#[allow(dead_code)] -#[derive(Debug)] -struct DataPoint { - image_reference: String, - chunk_list: Vec, - visited: bool, - clustered: bool, - cluster_id: i32, -} - -pub trait Table: Sync + Send + Sized + 'static -where - Err: std::error::Error + 'static, -{ - /// Clear table. - fn clear(&self) -> Result<(), Err>; - - /// Create table. - fn create(&self) -> Result<(), Err>; - - /// Insert data. - fn insert(&self, table: &T) -> Result<(), Err>; - - /// Select all data. - fn list_all(&self) -> Result, Err>; - - /// Select data with offset and limit. - fn list_paged(&self, offset: i64, limit: i64) -> Result, Err>; -} - -#[derive()] -pub struct ChunkTable { - conn: Arc>, -} - -impl ChunkTable { - pub fn new(database_url: &str) -> Result { - let conn = Connection::open(database_url)?; - Ok(ChunkTable { - conn: Arc::new(Mutex::new(conn)), - }) - } - - pub fn new_in_memory() -> Result { - let conn = Connection::open_in_memory()?; - Ok(ChunkTable { - conn: Arc::new(Mutex::new(conn)), - }) - } - - /// Select all data filtered by blob ID. - fn list_all_by_blob_id(&self, blob_id: &str) -> Result, DatabaseError> { - let mut offset = 0; - let limit: i64 = 100; - let mut all_chunks_by_blob_id = Vec::new(); - - loop { - let chunks = self.list_paged_by_blob_id(blob_id, offset, limit)?; - if chunks.is_empty() { - break; - } - - all_chunks_by_blob_id.extend(chunks); - offset += limit; - } - - Ok(all_chunks_by_blob_id) - } - - /// Select data with offset and limit filtered by blob ID. - fn list_paged_by_blob_id( - &self, - blob_id: &str, - offset: i64, - limit: i64, - ) -> Result, DatabaseError> { - let conn_guard = self - .conn - .lock() - .map_err(|e| DatabaseError::PoisonError(e.to_string()))?; - let mut stmt: rusqlite::Statement<'_> = conn_guard - .prepare( - "SELECT id, image_reference, version, chunk_blob_id, chunk_digest, chunk_compressed_size, - chunk_uncompressed_size, chunk_compressed_offset, chunk_uncompressed_offset from chunk - WHERE chunk_blob_id = ?1 - ORDER BY id LIMIT ?2 OFFSET ?3", - )?; - let chunk_iterator = stmt.query_map(params![blob_id, limit, offset], |row| { - Ok(ChunkdictChunkInfo { - image_reference: row.get(1)?, - version: row.get(2)?, - chunk_blob_id: row.get(3)?, - chunk_digest: row.get(4)?, - chunk_compressed_size: row.get(5)?, - chunk_uncompressed_size: row.get(6)?, - chunk_compressed_offset: row.get(7)?, - chunk_uncompressed_offset: row.get(8)?, - }) - })?; - let mut chunks = Vec::new(); - for chunk in chunk_iterator { - chunks.push(chunk.map_err(DatabaseError::SqliteError)?); - } - Ok(chunks) - } -} - -#[derive(Debug, Clone)] -struct CustomString(String); - -impl Ord for CustomString { - /// Extract the numbers in the string. - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - let mut current_number = String::new(); - - // Parse numbers in strings. - let mut numbers1 = Vec::new(); - let mut numbers2 = Vec::new(); - - for ch in self.0.chars() { - if ch.is_ascii_digit() { - current_number.push(ch); - } else if !current_number.is_empty() { - if let Ok(number) = current_number.parse::() { - numbers1.push(number); - } - current_number.clear(); - } - } - if !current_number.is_empty() { - if let Ok(number) = current_number.parse::() { - numbers1.push(number); - } - } - current_number.clear(); - - for ch in other.0.chars() { - if ch.is_ascii_digit() { - current_number.push(ch); - } else if !current_number.is_empty() { - if let Ok(number) = current_number.parse::() { - numbers2.push(number); - } - current_number.clear(); - } - } - if !current_number.is_empty() { - if let Ok(number) = current_number.parse::() { - numbers2.push(number); - } - } - current_number.clear(); - numbers1.cmp(&numbers2) - } -} - -impl PartialOrd for CustomString { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl PartialEq for CustomString { - fn eq(&self, other: &Self) -> bool { - self.0 == other.0 - } -} - -impl Eq for CustomString {} - -impl Table for ChunkTable { - fn clear(&self) -> Result<(), DatabaseError> { - self.conn - .lock() - .map_err(|e| DatabaseError::PoisonError(e.to_string()))? - .execute("DROP TABLE chunk", []) - .map_err(DatabaseError::SqliteError)?; - Ok(()) - } - - fn create(&self) -> Result<(), DatabaseError> { - self.conn - .lock() - .map_err(|e| DatabaseError::PoisonError(e.to_string()))? - .execute( - "CREATE TABLE IF NOT EXISTS chunk ( - id INTEGER PRIMARY KEY, - image_reference TEXT, - version TEXT, - chunk_blob_id TEXT NOT NULL, - chunk_digest TEXT, - chunk_compressed_size INT, - chunk_uncompressed_size INT, - chunk_compressed_offset INT, - chunk_uncompressed_offset INT - )", - [], - ) - .map_err(DatabaseError::SqliteError)?; - Ok(()) - } - - fn insert(&self, chunk: &ChunkdictChunkInfo) -> Result<(), DatabaseError> { - self.conn - .lock() - .map_err(|e| DatabaseError::PoisonError(e.to_string()))? - .execute( - "INSERT INTO chunk( - image_reference, - version, - chunk_blob_id, - chunk_digest, - chunk_compressed_size, - chunk_uncompressed_size, - chunk_compressed_offset, - chunk_uncompressed_offset - ) - VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8); - ", - rusqlite::params![ - chunk.image_reference, - chunk.version, - chunk.chunk_blob_id, - chunk.chunk_digest, - chunk.chunk_compressed_size, - chunk.chunk_uncompressed_size, - chunk.chunk_compressed_offset, - chunk.chunk_uncompressed_offset, - ], - ) - .map_err(DatabaseError::SqliteError)?; - Ok(()) - } - - fn list_all(&self) -> Result, DatabaseError> { - let mut offset = 0; - let limit: i64 = 100; - let mut all_chunks = Vec::new(); - - loop { - let chunks = self.list_paged(offset, limit)?; - if chunks.is_empty() { - break; - } - - all_chunks.extend(chunks); - offset += limit; - } - - Ok(all_chunks) - } - - fn list_paged( - &self, - offset: i64, - limit: i64, - ) -> Result, DatabaseError> { - let conn_guard = self - .conn - .lock() - .map_err(|e| DatabaseError::PoisonError(e.to_string()))?; - let mut stmt: rusqlite::Statement<'_> = conn_guard - .prepare( - "SELECT id, image_reference, version, chunk_blob_id, chunk_digest, chunk_compressed_size, - chunk_uncompressed_size, chunk_compressed_offset, chunk_uncompressed_offset from chunk - ORDER BY id LIMIT ?1 OFFSET ?2", - )?; - let chunk_iterator = stmt.query_map(params![limit, offset], |row| { - Ok(ChunkdictChunkInfo { - image_reference: row.get(1)?, - version: row.get(2)?, - chunk_blob_id: row.get(3)?, - chunk_digest: row.get(4)?, - chunk_compressed_size: row.get(5)?, - chunk_uncompressed_size: row.get(6)?, - chunk_compressed_offset: row.get(7)?, - chunk_uncompressed_offset: row.get(8)?, - }) - })?; - let mut chunks = Vec::new(); - for chunk in chunk_iterator { - chunks.push(chunk.map_err(DatabaseError::SqliteError)?); - } - Ok(chunks) - } -} - -#[derive(Debug)] -pub struct BlobTable { - conn: Arc>, -} - -impl BlobTable { - pub fn new(database_url: &str) -> Result { - let conn = Connection::open(database_url)?; - Ok(BlobTable { - conn: Arc::new(Mutex::new(conn)), - }) - } - - pub fn new_in_memory() -> Result { - let conn = Connection::open_in_memory()?; - Ok(BlobTable { - conn: Arc::new(Mutex::new(conn)), - }) - } - - pub fn list_by_id(&self, blob_id: &str) -> Result { - let conn_guard = self - .conn - .lock() - .map_err(|e| DatabaseError::PoisonError(e.to_string()))?; - let mut stmt = conn_guard.prepare( - "SELECT blob_id, blob_compressed_size, blob_uncompressed_size, blob_compressor, blob_meta_ci_compressed_size, blob_meta_ci_uncompressed_size, blob_meta_ci_offset FROM blob WHERE blob_id = ?1", - )?; - let mut blob_iterator = stmt.query_map([blob_id], |row| { - Ok(ChunkdictBlobInfo { - blob_id: row.get(0)?, - blob_compressed_size: row.get(1)?, - blob_uncompressed_size: row.get(2)?, - blob_compressor: row.get(3)?, - blob_meta_ci_compressed_size: row.get(4)?, - blob_meta_ci_uncompressed_size: row.get(5)?, - blob_meta_ci_offset: row.get(6)?, - }) - })?; - - if let Some(blob) = blob_iterator.next() { - blob.map_err(DatabaseError::SqliteError) - } else { - Err(DatabaseError::SqliteError( - rusqlite::Error::QueryReturnedNoRows, - )) - } - } -} - -impl Table for BlobTable { - fn clear(&self) -> Result<(), DatabaseError> { - self.conn - .lock() - .map_err(|e| DatabaseError::PoisonError(e.to_string()))? - .execute("DROP TABLE blob", []) - .map_err(DatabaseError::SqliteError)?; - Ok(()) - } - - fn create(&self) -> Result<(), DatabaseError> { - self.conn - .lock() - .map_err(|e| DatabaseError::PoisonError(e.to_string()))? - .execute( - "CREATE TABLE IF NOT EXISTS blob ( - id INTEGER PRIMARY KEY, - blob_id TEXT NOT NULL, - blob_compressed_size INT, - blob_uncompressed_size INT, - blob_compressor TEXT, - blob_meta_ci_compressed_size INT, - blob_meta_ci_uncompressed_size INT, - blob_meta_ci_offset INT - )", - [], - ) - .map_err(DatabaseError::SqliteError)?; - Ok(()) - } - - fn insert(&self, blob: &ChunkdictBlobInfo) -> Result<(), DatabaseError> { - self.conn - .lock() - .map_err(|e| DatabaseError::PoisonError(e.to_string()))? - .execute( - "INSERT INTO blob ( - blob_id, - blob_compressed_size, - blob_uncompressed_size, - blob_compressor, - blob_meta_ci_compressed_size, - blob_meta_ci_uncompressed_size, - blob_meta_ci_offset - ) - VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7); - ", - rusqlite::params![ - blob.blob_id, - blob.blob_compressed_size, - blob.blob_uncompressed_size, - blob.blob_compressor, - blob.blob_meta_ci_compressed_size, - blob.blob_meta_ci_uncompressed_size, - blob.blob_meta_ci_offset, - ], - ) - .map_err(DatabaseError::SqliteError)?; - Ok(()) - } - - fn list_all(&self) -> Result, DatabaseError> { - let mut offset = 0; - let limit: i64 = 100; - let mut all_blobs = Vec::new(); - - loop { - let blobs = self.list_paged(offset, limit)?; - if blobs.is_empty() { - break; - } - - all_blobs.extend(blobs); - offset += limit; - } - - Ok(all_blobs) - } - - fn list_paged(&self, offset: i64, limit: i64) -> Result, DatabaseError> { - let conn_guard = self - .conn - .lock() - .map_err(|e| DatabaseError::PoisonError(e.to_string()))?; - let mut stmt: rusqlite::Statement<'_> = conn_guard.prepare( - "SELECT blob_id, blob_compressed_size, blob_uncompressed_size, blob_compressor, blob_meta_ci_compressed_size, blob_meta_ci_uncompressed_size, blob_meta_ci_offset from blob - ORDER BY id LIMIT ?1 OFFSET ?2", - )?; - let blob_iterator = stmt.query_map(params![limit, offset], |row| { - Ok(ChunkdictBlobInfo { - blob_id: row.get(0)?, - blob_compressed_size: row.get(1)?, - blob_uncompressed_size: row.get(2)?, - blob_compressor: row.get(3)?, - blob_meta_ci_compressed_size: row.get(4)?, - blob_meta_ci_uncompressed_size: row.get(5)?, - blob_meta_ci_offset: row.get(6)?, - }) - })?; - let mut blobs = Vec::new(); - for blob in blob_iterator { - blobs.push(blob.map_err(DatabaseError::SqliteError)?); - } - Ok(blobs) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use rusqlite::Result; - - #[test] - fn test_partial_cmp() -> Result<(), Box> { - let custom_string1 = CustomString("nydus_1.2.3".to_string()); - let custom_string2 = CustomString("nydus_1.2.10".to_string()); - let custom_string3 = CustomString("nydus_2.0".to_string()); - - assert!(custom_string1 < custom_string2); - assert!(custom_string2 < custom_string3); - assert!(custom_string1 < custom_string3); - - assert!(custom_string1 <= custom_string2); - assert!(custom_string2 <= custom_string3); - assert!(custom_string1 <= custom_string3); - - assert!(custom_string2 > custom_string1); - assert!(custom_string3 > custom_string2); - assert!(custom_string3 > custom_string1); - - assert!(custom_string2 >= custom_string1); - assert!(custom_string3 >= custom_string2); - assert!(custom_string3 >= custom_string1); - - assert_eq!(custom_string1, CustomString("nydus_1.2.3".to_string())); - assert_ne!(custom_string1, custom_string2); - Ok(()) - } - - #[test] - fn test_blob_table() -> Result<(), Box> { - let blob_table = BlobTable::new_in_memory()?; - blob_table.create()?; - let blob = ChunkdictBlobInfo { - blob_id: "BLOB123".to_string(), - blob_compressed_size: 1024, - blob_uncompressed_size: 2048, - blob_compressor: "zstd".to_string(), - blob_meta_ci_compressed_size: 1024, - blob_meta_ci_uncompressed_size: 2048, - blob_meta_ci_offset: 0, - }; - blob_table.insert(&blob)?; - let blobs = blob_table.list_all()?; - assert_eq!(blobs.len(), 1); - assert_eq!(blobs[0].blob_id, blob.blob_id); - assert_eq!(blobs[0].blob_compressed_size, blob.blob_compressed_size); - assert_eq!(blobs[0].blob_uncompressed_size, blob.blob_uncompressed_size); - assert_eq!(blobs[0].blob_compressor, blob.blob_compressor); - assert_eq!( - blobs[0].blob_meta_ci_compressed_size, - blob.blob_meta_ci_compressed_size - ); - assert_eq!( - blobs[0].blob_meta_ci_uncompressed_size, - blob.blob_meta_ci_uncompressed_size - ); - assert_eq!(blobs[0].blob_meta_ci_offset, blob.blob_meta_ci_offset); - Ok(()) - } - - #[test] - fn test_chunk_table() -> Result<(), Box> { - let chunk_table = ChunkTable::new_in_memory()?; - chunk_table.create()?; - let chunk = ChunkdictChunkInfo { - image_reference: "REDIS".to_string(), - version: "1.0.0".to_string(), - chunk_blob_id: "BLOB123".to_string(), - chunk_digest: "DIGEST123".to_string(), - chunk_compressed_size: 512, - chunk_uncompressed_size: 1024, - chunk_compressed_offset: 0, - chunk_uncompressed_offset: 0, - }; - chunk_table.insert(&chunk)?; - let chunk2 = ChunkdictChunkInfo { - image_reference: "REDIS02".to_string(), - version: "1.0.0".to_string(), - chunk_blob_id: "BLOB456".to_string(), - chunk_digest: "DIGEST123".to_string(), - chunk_compressed_size: 512, - chunk_uncompressed_size: 1024, - chunk_compressed_offset: 0, - chunk_uncompressed_offset: 0, - }; - chunk_table.insert(&chunk2)?; - let chunks = chunk_table.list_all()?; - assert_eq!(chunks[0].image_reference, chunk.image_reference); - assert_eq!(chunks[0].version, chunk.version); - assert_eq!(chunks.len(), 2); - assert_eq!(chunks[0].chunk_blob_id, chunk.chunk_blob_id); - assert_eq!(chunks[0].chunk_digest, chunk.chunk_digest); - assert_eq!(chunks[0].chunk_compressed_size, chunk.chunk_compressed_size); - assert_eq!( - chunks[0].chunk_uncompressed_size, - chunk.chunk_uncompressed_size - ); - assert_eq!( - chunks[0].chunk_compressed_offset, - chunk.chunk_compressed_offset - ); - assert_eq!( - chunks[0].chunk_uncompressed_offset, - chunk.chunk_uncompressed_offset - ); - - let chunks = chunk_table.list_all_by_blob_id(&chunk.chunk_blob_id)?; - assert_eq!(chunks[0].chunk_blob_id, chunk.chunk_blob_id); - assert_eq!(chunks.len(), 1); - - Ok(()) - } - - #[test] - fn test_blob_table_paged() -> Result<(), Box> { - let blob_table = BlobTable::new_in_memory()?; - blob_table.create()?; - for i in 0..200 { - let blob = ChunkdictBlobInfo { - blob_id: format!("BLOB{}", i), - blob_compressed_size: i, - blob_uncompressed_size: i * 2, - blob_compressor: "zstd".to_string(), - blob_meta_ci_compressed_size: i, - blob_meta_ci_uncompressed_size: i * 2, - blob_meta_ci_offset: i * 3, - }; - blob_table.insert(&blob)?; - } - let blobs = blob_table.list_paged(100, 100)?; - assert_eq!(blobs.len(), 100); - assert_eq!(blobs[0].blob_id, "BLOB100"); - assert_eq!(blobs[0].blob_compressed_size, 100); - assert_eq!(blobs[0].blob_uncompressed_size, 200); - assert_eq!(blobs[0].blob_compressor, "zstd"); - assert_eq!(blobs[0].blob_meta_ci_compressed_size, 100); - assert_eq!(blobs[0].blob_meta_ci_uncompressed_size, 200); - assert_eq!(blobs[0].blob_meta_ci_offset, 300); - Ok(()) - } - - #[test] - fn test_chunk_table_paged() -> Result<(), Box> { - let chunk_table = ChunkTable::new_in_memory()?; - chunk_table.create()?; - for i in 0..200 { - let i64 = i as u64; - let chunk = ChunkdictChunkInfo { - image_reference: format!("REDIS{}", i), - version: format!("1.0.0{}", i), - chunk_blob_id: format!("BLOB{}", i), - chunk_digest: format!("DIGEST{}", i), - chunk_compressed_size: i, - chunk_uncompressed_size: i * 2, - chunk_compressed_offset: i64 * 3, - chunk_uncompressed_offset: i64 * 4, - }; - chunk_table.insert(&chunk)?; - } - let chunks = chunk_table.list_paged(100, 100)?; - assert_eq!(chunks.len(), 100); - assert_eq!(chunks[0].image_reference, "REDIS100"); - assert_eq!(chunks[0].version, "1.0.0100"); - assert_eq!(chunks[0].chunk_blob_id, "BLOB100"); - assert_eq!(chunks[0].chunk_digest, "DIGEST100"); - assert_eq!(chunks[0].chunk_compressed_size, 100); - assert_eq!(chunks[0].chunk_uncompressed_size, 200); - assert_eq!(chunks[0].chunk_compressed_offset, 300); - assert_eq!(chunks[0].chunk_uncompressed_offset, 400); - Ok(()) - } - - #[test] - fn test_algorithm_exponential_smoothing() -> Result<(), Box> { - let threshold = 0.1; - let mut all_chunk: Vec = Vec::new(); - for i in 0..199 { - let i64 = i as u64; - let chunk = ChunkdictChunkInfo { - image_reference: format!("REDIS{}", 0), - version: format!("1.0.0{}", (i + 1) / 100), - chunk_blob_id: format!("BLOB{}", i), - chunk_digest: format!("DIGEST{}", (i + 1) % 2), - chunk_compressed_size: i, - chunk_uncompressed_size: i * 2, - chunk_compressed_offset: i64 * 3, - chunk_uncompressed_offset: i64 * 4, - }; - all_chunk.push(chunk); - } - let chunkdict = Algorithm::::exponential_smoothing(all_chunk, threshold)?; - assert_eq!(chunkdict.len(), 2); - assert_eq!(chunkdict[0].image_reference, "REDIS0"); - assert_eq!(chunkdict[0].version, "1.0.01"); - assert_eq!(chunkdict[0].chunk_blob_id, "BLOB99"); - assert_eq!(chunkdict[0].chunk_digest, "DIGEST0"); - assert_eq!(chunkdict[0].chunk_compressed_size, 99); - assert_eq!(chunkdict[0].chunk_uncompressed_size, 198); - assert_eq!(chunkdict[0].chunk_compressed_offset, 297); - assert_eq!(chunkdict[0].chunk_uncompressed_offset, 396); - Ok(()) - } - - #[test] - fn test_divide_by_image() -> Result<(), Box> { - let db_url = "./metadata.db"; - let chunk_table = ChunkTable::new(db_url)?; - chunk_table.create()?; - for i in 0..200 { - let i64 = i as u64; - let chunk = ChunkdictChunkInfo { - image_reference: format!("REDIS{}", i / 50), - version: format!("1.0.0{}", (i + 1) / 100), - chunk_blob_id: format!("BLOB{}", i), - chunk_digest: format!("DIGEST{}", (i + 1) % 2), - chunk_compressed_size: i, - chunk_uncompressed_size: i * 2, - chunk_compressed_offset: i64 * 3, - chunk_uncompressed_offset: i64 * 4, - }; - chunk_table.insert(&chunk)?; - } - let algorithm = String::from("exponential_smoothing"); - let algorithm = Algorithm::::new(algorithm, db_url)?; - let all_chunks = algorithm.db.chunk_table.list_all()?; - assert_eq!(all_chunks.len(), 200); - let datadict = Algorithm::::divide_by_image(&all_chunks)?; - assert_eq!(datadict.len(), 4); - assert_eq!(datadict[3].cluster_id, 0); - assert_eq!(datadict[3].chunk_list.len(), 50); - chunk_table.clear()?; - Ok(()) - } - - #[test] - fn test_distance() -> Result<(), Box> { - let mut all_chunks1: Vec = Vec::new(); - for i in 0..200 { - let i64 = i as u64; - let chunk = ChunkdictChunkInfo { - image_reference: format!("REDIS{}", 0), - version: format!("1.0.0{}", (i + 1) / 100), - chunk_blob_id: format!("BLOB{}", i), - chunk_digest: format!("DIGEST{}", (i + 1) % 4), - chunk_compressed_size: 1, - chunk_uncompressed_size: 1, - chunk_compressed_offset: i64 * 3, - chunk_uncompressed_offset: i64 * 4, - }; - all_chunks1.push(chunk); - } - let mut all_chunks2: Vec = Vec::new(); - for i in 0..200 { - let i64 = i as u64; - let chunk = ChunkdictChunkInfo { - image_reference: format!("REDIS{}", 1), - version: format!("1.0.0{}", (i + 1) / 100), - chunk_blob_id: format!("BLOB{}", i), - chunk_digest: format!("DIGEST{}", (i + 1) % 4), - chunk_compressed_size: 1, - chunk_uncompressed_size: 1, - chunk_compressed_offset: i64 * 3, - chunk_uncompressed_offset: i64 * 4, - }; - all_chunks2.push(chunk); - } - let datadict = Algorithm::::distance(&all_chunks1, &all_chunks2)?; - assert!( - (datadict - 0.01).abs() <= 0.0001, - "Expected {} to be approximately equal to {} with tolerance {}", - datadict, - 0.01, - 0.0001 - ); - Ok(()) - } - - #[test] - fn test_divide_set() -> Result<(), Box> { - let mut all_chunks: Vec = Vec::new(); - for i in 0..200 { - for j in 0..100 { - let chunk = ChunkdictChunkInfo { - image_reference: format!("REDIS{}", i), - version: format!("1.0.0{}", j / 10), - chunk_blob_id: format!("BLOB{}", j), - chunk_digest: format!("DIGEST{}", j + (i / 100) * 100), - chunk_compressed_size: 1, - chunk_uncompressed_size: 1, - chunk_compressed_offset: 1, - chunk_uncompressed_offset: 1, - }; - all_chunks.push(chunk); - } - } - assert_eq!(all_chunks.len(), 20000); - let (train, test) = Algorithm::::divide_set(&all_chunks, 0.7)?; - assert_eq!(train.len(), 14000); - assert_eq!(train[0].image_reference, "REDIS0"); - assert_eq!(train[0].version, "1.0.00"); - assert_eq!(test.len(), 6000); - assert_eq!(test[0].image_reference, "REDIS0"); - assert_eq!(test[0].version, "1.0.07"); - Ok(()) - } - - #[test] - fn test_dbscan() -> Result<(), Box> { - let mut all_chunks: Vec = Vec::new(); - let radius = 0.6; - for i in 0..200 { - for j in 0..100 { - let chunk = ChunkdictChunkInfo { - image_reference: format!("REDIS{}", i), - version: format!("1.0.0{}", j / 10), - chunk_blob_id: format!("BLOB{}", j), - chunk_digest: format!("DIGEST{}", j + (i / 100) * 100), - chunk_compressed_size: 1, - chunk_uncompressed_size: 1, - chunk_compressed_offset: 1, - chunk_uncompressed_offset: 1, - }; - all_chunks.push(chunk); - } - } - assert_eq!(all_chunks.len(), 20000); - let mut data_point = Algorithm::::divide_by_image(&all_chunks)?; - let datadict = Algorithm::::dbsacn(&mut data_point, radius)?; - assert_eq!(datadict.len(), 200); - if datadict[150].chunk_list[0].chunk_digest == datadict[0].chunk_list[0].chunk_digest { - assert_eq!(datadict[150].cluster_id, 1); - } else { - assert_eq!(datadict[150].cluster_id, 2); - } - assert_eq!(datadict[0].cluster_id, 1); - assert!(datadict[150].clustered); - assert!(datadict[150].visited); - assert_eq!(datadict[0].chunk_list.len(), 100); - Ok(()) - } - - #[test] - fn test_aggregate_chunk() -> Result<(), Box> { - let mut all_chunks: Vec = Vec::new(); - let radius = 0.6; - for i in 0..200 { - for j in 0..100 { - let chunk = ChunkdictChunkInfo { - image_reference: format!("REDIS{}", i), - version: format!("1.0.0{}", (j + 1) / 100), - chunk_blob_id: format!("BLOB{}", j), - chunk_digest: format!("DIGEST{}", j + (i / 100) * 100), - chunk_compressed_size: 1, - chunk_uncompressed_size: 1, - chunk_compressed_offset: 1, - chunk_uncompressed_offset: 1, - }; - all_chunks.push(chunk); - } - } - assert_eq!(all_chunks.len(), 20000); - let mut data_point = Algorithm::::divide_by_image(&all_chunks)?; - let data_cluster = Algorithm::::dbsacn(&mut data_point, radius)?; - let datadict = Algorithm::::aggregate_chunk(&data_cluster)?; - assert_eq!(datadict.len(), 2); - Ok(()) - } - - #[test] - fn test_deduplicate_image() -> Result<(), Box> { - let mut all_chunks: Vec = Vec::new(); - for i in 0..200 { - for j in 0..100 { - let chunk = ChunkdictChunkInfo { - image_reference: format!("REDIS{}", i), - version: format!("1.0.0{}", j / 10), - chunk_blob_id: format!("BLOB{}", j), - chunk_digest: format!("DIGEST{}", j + (i / 100) * 100), - chunk_compressed_size: 1, - chunk_uncompressed_size: 1, - chunk_compressed_offset: 1, - chunk_uncompressed_offset: 1, - }; - all_chunks.push(chunk); - } - } - assert_eq!(all_chunks.len(), 20000); - let datadict = Algorithm::::deduplicate_image(all_chunks)?; - for i in datadict.clone() { - for (_, b) in i { - if !b.is_empty() { - assert_eq!(b.len(), 70); - } - } - } - assert_eq!(datadict[0].len(), 2); - assert_eq!(datadict[0].values().len(), 2); - assert_eq!(datadict[1].len(), 0); - assert_eq!(datadict[1].values().len(), 0); - assert_eq!(datadict.len(), 7); - Ok(()) - } - - #[test] - fn test_deduplicate_version() -> Result<(), Box> { - let mut all_chunks: Vec = Vec::new(); - let mut chunkdict: Vec = Vec::new(); - for i in 0..200 { - let i64 = i as u64; - let chunk = ChunkdictChunkInfo { - image_reference: format!("REDIS{}", 0), - version: format!("1.0.0{}", (i + 1) / 20), - chunk_blob_id: format!("BLOB{}", i), - chunk_digest: format!("DIGEST{}", (i + 1) % 2), - chunk_compressed_size: i, - chunk_uncompressed_size: i * 2, - chunk_compressed_offset: i64 * 3, - chunk_uncompressed_offset: i64 * 4, - }; - all_chunks.push(chunk); - } - let (chunkdict_version, chunkdict_image) = - Algorithm::::deduplicate_version(&all_chunks)?; - for (_, dictionary) in chunkdict_version { - chunkdict.extend(dictionary); - } - - assert_eq!(chunkdict[0].image_reference, "REDIS0"); - assert_eq!(chunkdict[0].chunk_compressed_size, 21); - assert_eq!(chunkdict.len(), 2); - - for single_clustering in chunkdict_image { - for (_, cluster_dictionary) in single_clustering { - chunkdict.extend(cluster_dictionary); - } - } - assert_eq!(chunkdict.len(), 2); - Ok(()) - } -} +// Copyright (C) 2023 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Deduplicate for Chunk. +use anyhow::{Context, Result}; +use core::cmp::Ordering; +use nydus_api::ConfigV2; +use nydus_builder::BuildContext; +use nydus_builder::ConversionType; +use nydus_builder::Tree; +use nydus_builder::{ChunkdictBlobInfo, ChunkdictChunkInfo}; +use nydus_rafs::metadata::{RafsSuper, RafsVersion}; +use nydus_storage::device::BlobInfo; +use rusqlite::{params, Connection}; +use std::collections::HashSet; +use std::collections::{BTreeMap, HashMap}; +use std::convert::TryFrom; +use std::fs; +use std::path::{Path, PathBuf}; +use std::result::Result::Ok; +use std::sync::{Arc, Mutex}; + +#[derive(Debug)] +pub enum DatabaseError { + SqliteError(rusqlite::Error), + PoisonError(String), + // Add other database error variants here as needed, e.g.: + // MysqlError(mysql::Error). +} + +impl std::fmt::Display for DatabaseError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match *self { + DatabaseError::SqliteError(ref err) => err.fmt(f), + DatabaseError::PoisonError(ref err) => write!(f, "PoisonError: {}", err), + // Add other error type formatting here. + } + } +} + +impl std::error::Error for DatabaseError {} + +impl From for DatabaseError { + fn from(error: rusqlite::Error) -> Self { + DatabaseError::SqliteError(error) + } +} + +pub trait Database { + /// Creates a new chunk in the database. + fn create_chunk_table(&self) -> Result<()>; + + /// Creates a new blob in the database. + fn create_blob_table(&self) -> Result<()>; + + /// Inserts chunk information into the database. + fn insert_chunk(&self, chunk_info: &ChunkdictChunkInfo) -> Result<()>; + + /// Inserts blob information into the database. + fn insert_blob(&self, blob_info: &ChunkdictBlobInfo) -> Result<()>; + + /// Retrieves all chunk information from the database. + fn get_chunks(&self) -> Result>; + + /// Retrieves all chunk information from the database filtered by blob ID. + fn get_chunks_by_blob_id(&self, blob_id: &str) -> Result>; + + /// Retrieves all blob information from the database. + fn get_blobs(&self) -> Result>; + + /// Retrieves blob information from the database filtered by blob ID. + fn get_blob_by_id(&self, blob_id: &str) -> Result; +} + +pub struct SqliteDatabase { + chunk_table: ChunkTable, + blob_table: BlobTable, +} + +impl SqliteDatabase { + pub fn new(database_url: &str) -> Result { + // Connect to a database that already exists. + if let Ok(metadata) = fs::metadata(database_url) { + if metadata.is_file() { + } else { + panic!("Warning: Unable to find existing database file."); + } + } + + let chunk_table = ChunkTable::new(database_url)?; + let blob_table = BlobTable::new(database_url)?; + + Ok(Self { + chunk_table, + blob_table, + }) + } + + pub fn new_in_memory() -> Result { + let chunk_table = ChunkTable::new_in_memory()?; + let blob_table = BlobTable::new_in_memory()?; + Ok(Self { + chunk_table, + blob_table, + }) + } +} + +impl Database for SqliteDatabase { + fn create_chunk_table(&self) -> Result<()> { + ChunkTable::create(&self.chunk_table).context("Failed to create chunk table") + } + + fn create_blob_table(&self) -> Result<()> { + BlobTable::create(&self.blob_table).context("Failed to create blob table") + } + + fn insert_chunk(&self, chunk: &ChunkdictChunkInfo) -> Result<()> { + self.chunk_table + .insert(chunk) + .context("Failed to insert chunk") + } + + fn insert_blob(&self, blob: &ChunkdictBlobInfo) -> Result<()> { + self.blob_table + .insert(blob) + .context("Failed to insert blob") + } + + fn get_chunks(&self) -> Result> { + ChunkTable::list_all(&self.chunk_table).context("Failed to get chunks") + } + + fn get_chunks_by_blob_id(&self, blob_id: &str) -> Result> { + ChunkTable::list_all_by_blob_id(&self.chunk_table, blob_id).context("Failed to get chunks") + } + + fn get_blobs(&self) -> Result> { + BlobTable::list_all(&self.blob_table).context("Failed to get blobs") + } + + fn get_blob_by_id(&self, blob_id: &str) -> Result { + BlobTable::list_by_id(&self.blob_table, blob_id).context("Failed to get blob") + } +} + +/// Get fs version from bootstrap file. +fn get_fs_version(bootstrap_path: &Path) -> Result { + let (sb, _) = RafsSuper::load_from_file(bootstrap_path, Arc::new(ConfigV2::default()), false)?; + RafsVersion::try_from(sb.meta.version).context("Failed to get RAFS version number") +} + +/// Checks if all Bootstrap versions are consistent. +/// If they are inconsistent, returns an error and prints the version of each Bootstrap. +pub fn check_bootstrap_versions_consistency( + ctx: &mut BuildContext, + bootstrap_paths: &[PathBuf], +) -> Result<()> { + let mut versions = Vec::new(); + + for bootstrap_path in bootstrap_paths { + let version = get_fs_version(bootstrap_path)?; + versions.push((bootstrap_path.clone(), version)); + } + + if !versions.is_empty() { + let first_version = versions[0].1; + ctx.fs_version = first_version; + if versions.iter().any(|(_, v)| *v != first_version) { + for (path, version) in &versions { + println!("Bootstrap path {:?} has version {:?}", path, version); + } + return Err(anyhow!( + "Bootstrap versions are inconsistent, cannot use chunkdict." + )); + } + } + + Ok(()) +} + +// Get parent bootstrap context for chunkdict bootstrap. +pub fn update_ctx_from_parent_bootstrap( + ctx: &mut BuildContext, + bootstrap_path: &PathBuf, +) -> Result<()> { + let (sb, _) = RafsSuper::load_from_file(bootstrap_path, Arc::new(ConfigV2::default()), false)?; + + // Obtain the features of the first blob to use as the features for the blobs in chunkdict. + if let Some(first_blob) = sb.superblock.get_blob_infos().first() { + ctx.blob_features = first_blob.features(); + } + + let config = sb.meta.get_config(); + config.check_compatibility(&sb.meta)?; + + if config.is_tarfs_mode { + ctx.conversion_type = ConversionType::TarToTarfs; + } + ctx.fs_version = + RafsVersion::try_from(sb.meta.version).context("Failed to get RAFS version")?; + ctx.compressor = config.compressor; + + Ok(()) +} + +pub struct Deduplicate { + db: D, +} + +const IN_MEMORY_DB_URL: &str = ":memory:"; + +impl Deduplicate { + pub fn new(db_url: &str) -> anyhow::Result { + let db = if db_url == IN_MEMORY_DB_URL { + SqliteDatabase::new_in_memory()? + } else { + SqliteDatabase::new(db_url)? + }; + Ok(Self { db }) + } + + pub fn save_metadata( + &mut self, + bootstrap_path: &Path, + config: Arc, + image_reference: String, + version: String, + ) -> anyhow::Result>> { + let (sb, _) = RafsSuper::load_from_file(bootstrap_path, config, false)?; + self.create_tables()?; + let blob_infos = sb.superblock.get_blob_infos(); + self.insert_blobs(&blob_infos)?; + self.insert_chunks(&blob_infos, &sb, image_reference, version)?; + Ok(blob_infos) + } + + fn create_tables(&mut self) -> anyhow::Result<()> { + self.db + .create_chunk_table() + .context("Failed to create chunk table.")?; + self.db + .create_blob_table() + .context("Failed to create blob table.")?; + Ok(()) + } + + fn insert_blobs(&mut self, blob_infos: &[Arc]) -> anyhow::Result<()> { + for blob in blob_infos { + self.db + .insert_blob(&ChunkdictBlobInfo { + blob_id: blob.blob_id().to_string(), + blob_compressed_size: blob.compressed_size(), + blob_uncompressed_size: blob.uncompressed_size(), + blob_compressor: blob.compressor().to_string(), + blob_meta_ci_compressed_size: blob.meta_ci_compressed_size(), + blob_meta_ci_uncompressed_size: blob.meta_ci_uncompressed_size(), + blob_meta_ci_offset: blob.meta_ci_offset(), + }) + .context("Failed to insert blob")?; + } + Ok(()) + } + + fn insert_chunks( + &mut self, + blob_infos: &[Arc], + sb: &RafsSuper, + image_reference: String, + version: String, + ) -> anyhow::Result<()> { + let process_chunk = &mut |t: &Tree| -> Result<()> { + let node = t.lock_node(); + for chunk in &node.chunks { + let index = chunk.inner.blob_index(); + let chunk_blob_id = blob_infos[index as usize].blob_id(); + self.db + .insert_chunk(&ChunkdictChunkInfo { + image_reference: image_reference.to_string(), + version: version.to_string(), + chunk_blob_id, + chunk_digest: chunk.inner.id().to_string(), + chunk_compressed_size: chunk.inner.compressed_size(), + chunk_uncompressed_size: chunk.inner.uncompressed_size(), + chunk_compressed_offset: chunk.inner.compressed_offset(), + chunk_uncompressed_offset: chunk.inner.uncompressed_offset(), + }) + .context("Failed to insert chunk")?; + } + Ok(()) + }; + let tree = Tree::from_bootstrap(sb, &mut ()) + .context("Failed to load bootstrap for deduplication.")?; + tree.walk_dfs_pre(process_chunk)?; + Ok(()) + } +} + +pub struct Algorithm { + algorithm_name: String, + db: D, +} + +// Generate deduplicated chunkdict by exponential_smoothing algorithm. +type VersionMap = HashMap>; +// Generate deduplicated chunkdict by cluster algorithm. +type ImageMap = Vec, Vec>>; + +impl Algorithm { + pub fn new(algorithm: String, db_url: &str) -> anyhow::Result { + let algorithm_name = algorithm; + let db = SqliteDatabase::new(db_url)?; + Ok(Self { algorithm_name, db }) + } + + // Call the algorithm to generate a dictionary. + pub fn chunkdict_generate( + &mut self, + ) -> anyhow::Result<(Vec, Vec, Vec)> { + let all_chunks: Vec = self.db.chunk_table.list_all()?; + let mut chunkdict_chunks: Vec = Vec::new(); + let mut chunkdict_blobs: Vec = Vec::new(); + let mut core_image = Vec::new(); + let mut noise_points = Vec::new(); + + let (chunkdict_version, chunkdict_image) = match &self.algorithm_name as &str { + "exponential_smoothing" => Self::deduplicate_version(&all_chunks)?, + _ => { + bail!("Unsupported algorithm name:, please use a valid algorithm name, such as exponential_smoothing") + } + }; + for single_clustering in chunkdict_image { + for (image_list, cluster_dictionary) in single_clustering { + core_image.extend(image_list); + chunkdict_chunks.extend(cluster_dictionary); + } + } + for (_, dictionary) in chunkdict_version { + chunkdict_chunks.extend(dictionary); + } + let mut chunkdict_size = 0; + for i in &chunkdict_chunks { + chunkdict_size += i.chunk_compressed_size; + } + info!( + "Chunkdict size is {}", + chunkdict_size as f64 / 1024 as f64 / 1024 as f64 + ); + for chunk in all_chunks { + if !core_image.contains(&chunk.image_reference) + && !noise_points.contains(&chunk.image_reference) + { + noise_points.push(chunk.image_reference.clone()); + } + } + Self::fill_chunkdict(self, &mut chunkdict_chunks, &mut chunkdict_blobs)?; + Ok((chunkdict_chunks, chunkdict_blobs, noise_points)) + } + + /// Baseed chunk list to fill chunkdict, including all chunks in the same blob and all blobs in the chunkdict. + fn fill_chunkdict( + &mut self, + chunkdict_chunks: &mut Vec, + chunkdict_blobs: &mut Vec, + ) -> Result<()> { + let mut blob_ids = std::collections::HashSet::new(); + for chunk in chunkdict_chunks.iter() { + blob_ids.insert(chunk.chunk_blob_id.clone()); + } + for blob_id in blob_ids { + let mut chunks = self.db.get_chunks_by_blob_id(&blob_id)?; + chunks = chunks + .into_iter() + .collect::>() + .into_iter() + .collect::>(); + for chunk in chunks { + if !chunkdict_chunks.contains(&chunk) { + chunkdict_chunks.push(chunk); + } + } + chunkdict_blobs.push(self.db.get_blob_by_id(&blob_id)?); + } + Ok(()) + } + + // Algorithm "exponential_smoothing" + // List all chunk and sort them by the order in chunk table. + // Score each chunk by "exponential_smoothing" formula. + // Select chunks whose score is greater than threshold and generate chunk dictionary. + fn exponential_smoothing( + all_chunks: Vec, + threshold: f64, + ) -> anyhow::Result> { + let alpha = 0.5; + let mut smoothed_data = Vec::new(); + + let mut last_start_version_index = 0; + let mut start_version_index = 0; + let mut last_end_version_index = 0; + + for (chunk_index, chunk) in all_chunks.iter().enumerate() { + let mut is_duplicate: f64 = 0.0; + if chunk.version == all_chunks[0].version { + let smoothed_score: f64 = 0.0; + smoothed_data.push(smoothed_score); + } else { + if all_chunks[chunk_index - 1].version != all_chunks[chunk_index].version { + last_start_version_index = start_version_index; + start_version_index = chunk_index; + last_end_version_index = chunk_index - 1; + } + for last_chunk in all_chunks + .iter() + .take(last_end_version_index + 1) + .skip(last_start_version_index) + { + if chunk.chunk_digest == last_chunk.chunk_digest { + is_duplicate = 1.0; + break; + } + } + let smoothed_score: f64 = + alpha * is_duplicate + (1.0 - alpha) * smoothed_data[chunk_index - 1]; + smoothed_data.push(smoothed_score); + } + } + + let mut chunkdict: Vec = Vec::new(); + for i in 0..smoothed_data.len() { + let chunk = ChunkdictChunkInfo { + image_reference: all_chunks[i].image_reference.clone(), + version: all_chunks[i].version.clone(), + chunk_blob_id: all_chunks[i].chunk_blob_id.clone(), + chunk_digest: all_chunks[i].chunk_digest.clone(), + chunk_compressed_offset: all_chunks[i].chunk_compressed_offset, + chunk_uncompressed_offset: all_chunks[i].chunk_uncompressed_offset, + chunk_compressed_size: all_chunks[i].chunk_compressed_size, + chunk_uncompressed_size: all_chunks[i].chunk_uncompressed_size, + }; + if smoothed_data[i] > threshold { + chunkdict.push(chunk); + } + } + + // Deduplicate chunk dictionary. + let mut unique_chunks: BTreeMap = BTreeMap::new(); + for chunk in &chunkdict { + if !unique_chunks.contains_key(&chunk.chunk_digest) { + unique_chunks.insert(chunk.chunk_digest.clone(), chunk.clone()); + } + } + let unique_chunk_list: Vec = unique_chunks.values().cloned().collect(); + Ok(unique_chunk_list) + } + + /// Calculate the distance between two images. + fn distance( + image1: &[ChunkdictChunkInfo], + image2: &[ChunkdictChunkInfo], + ) -> anyhow::Result { + // The total size of all chunks in both images. + let mut image1_size: u64 = 0; + let mut image2_size: u64 = 0; + + for chunk1 in image1 { + image1_size += chunk1.chunk_compressed_size as u64; + } + for chunk2 in image2 { + image2_size += chunk2.chunk_compressed_size as u64; + } + + // The total size of the chunk repeated between two images. + let all_chunks: Vec<&ChunkdictChunkInfo> = image1.iter().chain(image2.iter()).collect(); + let mut compressed_size_map: std::collections::HashMap = + std::collections::HashMap::new(); + let mut processed_digests: HashSet<&String> = HashSet::new(); + + for chunk in all_chunks { + if processed_digests.contains(&chunk.chunk_digest) { + let size = compressed_size_map + .entry(chunk.chunk_digest.clone()) + .or_insert(0); + *size += chunk.chunk_compressed_size as u64; + } + processed_digests.insert(&chunk.chunk_digest); + } + + let repeat_size: u64 = compressed_size_map.values().cloned().sum(); + let distance: f64 = 1.0 - (repeat_size as f64 / ((image1_size + image2_size) as f64)); + Ok(distance) + } + + /// Divide the chunk list into sublists by image name. + fn divide_by_image(all_chunks: &[ChunkdictChunkInfo]) -> anyhow::Result> { + let mut image_chunks: std::collections::HashMap> = + std::collections::HashMap::new(); + let mut datadict: Vec = Vec::new(); + for chunk in all_chunks { + image_chunks + .entry(chunk.image_reference.clone()) + .or_insert(Vec::new()) + .push(chunk.clone()); + } + for (index, chunks) in image_chunks { + let data_point = DataPoint { + image_reference: index, + chunk_list: chunks, + visited: false, + clustered: false, + cluster_id: 0, + }; + datadict.push(data_point); + } + Ok(datadict) + } + + fn divide_set( + chunks: &[ChunkdictChunkInfo], + train_percentage: f64, + ) -> anyhow::Result<(Vec, Vec)> { + // Create a HashMap to store the list of chunks for each image_reference. + let mut image_chunks: BTreeMap> = BTreeMap::new(); + + // Group chunks into image_reference. + for chunk in chunks { + let entry = image_chunks + .entry(chunk.image_reference.clone()) + .or_insert(Vec::new()); + entry.push(chunk.clone()); + } + + // Create the final training and testing sets. + let mut train_set: Vec = Vec::new(); + let mut test_set: Vec = Vec::new(); + + // Iterate through the list of Chunks for each image_reference. + for (_, chunk_list) in image_chunks.iter_mut() { + let mut version_chunks: BTreeMap> = + BTreeMap::new(); + // Group the chunks in the image into version. + for chunk in chunk_list { + let entry = version_chunks + .entry(CustomString(chunk.version.clone())) + .or_insert(Vec::new()); + entry.push(chunk.clone()); + } + + let num_version_groups = version_chunks.len(); + let num_train_groups = (num_version_groups as f64 * train_percentage) as usize; + let version_groups = version_chunks.into_iter().collect::>(); + let (train_version_groups, test_version_groups) = + version_groups.split_at(num_train_groups); + + for (_, train_chunks) in train_version_groups { + for chunk in train_chunks { + train_set.push(chunk.clone()); + } + } + + for (_, test_chunks) in test_version_groups { + for chunk in test_chunks { + test_set.push(chunk.clone()); + } + } + } + Ok((train_set, test_set)) + } + + /// Dbscan clustering algorithm. + fn dbsacn(data_point: &mut Vec, radius: f64) -> anyhow::Result<&Vec> { + let min_points = 10; + let mut cluster_id = 1; + + for i in 0..data_point.len() { + if data_point[i].visited { + continue; + } + if data_point[i].clustered { + continue; + } + + let mut neighbors = Vec::new(); + for j in 0..data_point.len() { + let distance = + Self::distance(&data_point[i].chunk_list, &data_point[j].chunk_list)?; + if !data_point[j].visited && distance <= radius { + neighbors.push(j); + } + } + if neighbors.len() < min_points { + data_point[i].clustered = false; + } else { + Self::expand_cluster(data_point, i, cluster_id, radius, min_points)?; + cluster_id += 1; + } + } + Ok(data_point) + } + + /// Core point expansion cluster in dbscan algorithm. + fn expand_cluster( + data_point: &mut Vec, + i: usize, + cluster_id: i32, + radius: f64, + min_points: usize, + ) -> anyhow::Result<()> { + data_point[i].clustered = true; + data_point[i].cluster_id = cluster_id; + + let mut stack = vec![i]; + while let Some(q) = stack.pop() { + if data_point[q].visited { + continue; + } + data_point[q].visited = true; + let mut q_neighbors = Vec::new(); + for j in 0..data_point.len() { + let distance = + Self::distance(&data_point[q].chunk_list, &data_point[j].chunk_list)?; + if !data_point[j].visited && distance <= radius { + q_neighbors.push(j); + } + } + if q_neighbors.len() >= min_points { + for &r_index in &q_neighbors { + if !data_point[r_index].visited { + data_point[r_index].visited = true; + stack.push(r_index) + } + if !data_point[r_index].clustered { + data_point[r_index].clustered = true; + data_point[r_index].cluster_id = cluster_id; + } + } + } else { + data_point[i].clustered = false; + } + } + Ok(()) + } + + /// Aggregate the chunks in each cluster into a dictionary. + fn aggregate_chunk( + data_point: &[DataPoint], + ) -> anyhow::Result, Vec>> { + // Divide chunk list according to clusters. + let mut cluster_map: HashMap> = HashMap::new(); + for (index, point) in data_point.iter().enumerate() { + if point.clustered { + let cluster_id = point.cluster_id; + cluster_map + .entry(cluster_id) + .or_insert(Vec::new()) + .push(index); + } + } + + // Iterate through each cluster. + let mut dictionary: HashMap, Vec> = HashMap::new(); + for (_, cluster_points) in cluster_map.iter() { + let mut image_total_counts: HashMap<&str, usize> = HashMap::new(); + let mut image_list: Vec = Vec::new(); + // Count the total number of images in the cluster. + for &point_index in cluster_points { + let point = &data_point[point_index]; + let image_total_count = image_total_counts + .entry(&point.image_reference) + .or_insert(0); + *image_total_count += 1; + + image_list.push(point.image_reference.clone()); + } + + // Count the number of images in which chunks appear in the cluster. + let mut chunk_digest_counts: HashMap = HashMap::new(); + for &point_index in cluster_points { + let point = &data_point[point_index]; + let chunk_digest_set: HashSet = point + .chunk_list + .iter() + .map(|chunk| chunk.chunk_digest.clone()) + .collect(); + for chunk_digest in chunk_digest_set { + let count = chunk_digest_counts + .entry(chunk_digest.to_string()) + .or_insert(0); + *count += 1; + } + } + + let mut chunk_list: Vec = Vec::new(); + let mut added_chunk_digests: HashSet = HashSet::new(); + for &point_index in cluster_points { + let point = &data_point[point_index]; + for chunk in &point.chunk_list { + let chunk_digest = &chunk.chunk_digest; + if !added_chunk_digests.contains(chunk_digest) { + let count = chunk_digest_counts.get(chunk_digest).unwrap_or(&0); + if *count as f64 / image_total_counts.len() as f64 >= 0.9 { + chunk_list.push(chunk.clone()); + added_chunk_digests.insert(chunk_digest.to_string()); + } + } + } + } + dictionary.insert(image_list, chunk_list); + } + Ok(dictionary) + } + + fn deduplicate_image( + all_chunks: Vec, + ) -> anyhow::Result, Vec>>> { + let train_percentage = 0.7; + let max_cluster_count = 7; + let mut counter = 0; + let all_chunks_clone = all_chunks; + let mut data_dict: Vec, Vec>> = Vec::new(); + + let (mut train, mut test) = Self::divide_set(&all_chunks_clone, train_percentage)?; + while counter < max_cluster_count { + // Parameter settings. + let mut data_point = Self::divide_by_image(&train)?; + let all_train_length = data_point.len(); + let mut radius = 0.5; + let max_radius = 0.9; + let mut test_chunk_sizes = Vec::new(); + let mut min_test_size: u64 = std::u64::MAX; + let mut min_data_dict = HashMap::new(); + let mut data_cluster_length = 0; + + // Adjust the radius size to select the dictionary that tests best. + while radius <= max_radius { + let data_cluster = Self::dbsacn(&mut data_point, radius)?; + data_cluster_length = data_cluster.len(); + + let data_dict = Self::aggregate_chunk(data_cluster)?; + + let all_chunks: HashSet<&ChunkdictChunkInfo> = + data_dict.values().flat_map(|v| v.iter()).collect(); + let mut total_test_set_size: u64 = 0; + + for chunk in test.iter() { + if !all_chunks.contains(chunk) { + total_test_set_size += chunk.chunk_compressed_size as u64; + } + } + test_chunk_sizes.push((radius, total_test_set_size)); + min_test_size = total_test_set_size; + if total_test_set_size <= min_test_size { + min_test_size = total_test_set_size; + min_data_dict = data_dict; + } + radius += 0.05; + } + debug!("test set size is {}", min_test_size); + + let min_chunk_list: Vec = min_data_dict + .values() + .flat_map(|chunk_list| chunk_list.iter()) + .cloned() + .collect(); + let mut to_remove = Vec::new(); + for chunk in train.iter() { + if min_chunk_list.contains(chunk) { + to_remove.push(chunk.clone()); + } + } + for chunk in &to_remove { + train.retain(|c| c.chunk_digest != chunk.chunk_digest); + } + for chunk in &to_remove { + test.retain(|c| c.chunk_digest != chunk.chunk_digest); + } + if (data_cluster_length as f64 / all_train_length as f64) < 0.2 { + break; + } + data_dict.push(min_data_dict); + counter += 1; + } + Ok(data_dict) + } + + pub fn deduplicate_version( + all_chunks: &[ChunkdictChunkInfo], + ) -> anyhow::Result<(VersionMap, ImageMap)> { + let mut all_chunks_size = 0; + for i in all_chunks { + all_chunks_size += i.chunk_compressed_size; + } + info!( + "All chunk size is {}", + all_chunks_size as f64 / 1024 as f64 / 1024 as f64 + ); + + let train_percentage = 0.7; + let datadict = Self::deduplicate_image(all_chunks.to_owned())?; + let (train, test) = Self::divide_set(all_chunks, train_percentage)?; + let mut train_set_size = 0; + for i in &train { + train_set_size += i.chunk_compressed_size; + } + info!( + "Train set size is {}", + train_set_size as f64 / 1024 as f64 / 1024 as f64 + ); + + let mut test_set_size = 0; + for i in &test { + test_set_size += i.chunk_compressed_size; + } + info!( + "Test set size is {}", + test_set_size as f64 / 1024 as f64 / 1024 as f64 + ); + + let mut version_datadict: HashMap> = HashMap::new(); + let mut data_point = Self::divide_by_image(&train)?; + + let mut threshold = 0.5; + let max_threshold = 0.8; + + let mut test_total_size: u32 = 0; + let mut min_test_size: u32 = std::u32::MAX; + let mut min_data_dict = HashMap::new(); + + while threshold <= max_threshold { + version_datadict.clear(); + for point in data_point.iter_mut() { + for single_dictionary in &datadict { + for (key, value) in single_dictionary.iter() { + if key.contains(&point.image_reference) { + let mut to_remove = Vec::new(); + for chunk in point.chunk_list.iter() { + if value.contains(chunk) { + to_remove.push(chunk.clone()); + } + } + for chunk in to_remove { + point.chunk_list.retain(|c| c != &chunk); + } + } + } + } + let chunk_dict = Self::exponential_smoothing(point.chunk_list.clone(), threshold)?; + version_datadict.insert(point.image_reference.clone(), chunk_dict); + } + + let mut test_by_image = Self::divide_by_image(&test)?; + for point in test_by_image.iter_mut() { + if version_datadict.contains_key(&point.image_reference.clone()) { + let mut to_remove = Vec::new(); + let mut vec_string = Vec::new(); + let chunkdict_option = version_datadict.get(&point.image_reference); + if let Some(chunkdict) = chunkdict_option { + for i in chunkdict { + vec_string.push(i.chunk_digest.clone()); + } + } + for chunk in point.chunk_list.iter() { + if vec_string.contains(&chunk.chunk_digest) { + to_remove.push(chunk.clone()); + } + } + for chunk in to_remove { + point.chunk_list.retain(|c| c != &chunk); + } + } + for chunk in point.chunk_list.iter() { + test_total_size = test_total_size + .checked_add(chunk.chunk_compressed_size) + .unwrap_or(test_total_size); + } + } + if test_total_size <= min_test_size { + min_test_size = test_total_size; + min_data_dict = version_datadict.clone(); + } + threshold += 0.05; + } + info!( + "After deduplicating test set size is {} and deduplicating rate is {} ", + min_test_size as f64 / 1024 as f64 / 1024 as f64, + 1.0 - (min_test_size as f64) / (test_set_size as f64) + ); + Ok((min_data_dict, datadict)) + } +} + +#[allow(dead_code)] +#[derive(Debug)] +struct DataPoint { + image_reference: String, + chunk_list: Vec, + visited: bool, + clustered: bool, + cluster_id: i32, +} + +pub trait Table: Sync + Send + Sized + 'static +where + Err: std::error::Error + 'static, +{ + /// Clear table. + fn clear(&self) -> Result<(), Err>; + + /// Create table. + fn create(&self) -> Result<(), Err>; + + /// Insert data. + fn insert(&self, table: &T) -> Result<(), Err>; + + /// Select all data. + fn list_all(&self) -> Result, Err>; + + /// Select data with offset and limit. + fn list_paged(&self, offset: i64, limit: i64) -> Result, Err>; +} + +#[derive()] +pub struct ChunkTable { + conn: Arc>, +} + +impl ChunkTable { + pub fn new(database_url: &str) -> Result { + let conn = Connection::open(database_url)?; + Ok(ChunkTable { + conn: Arc::new(Mutex::new(conn)), + }) + } + + pub fn new_in_memory() -> Result { + let conn = Connection::open_in_memory()?; + Ok(ChunkTable { + conn: Arc::new(Mutex::new(conn)), + }) + } + + /// Select all data filtered by blob ID. + fn list_all_by_blob_id(&self, blob_id: &str) -> Result, DatabaseError> { + let mut offset = 0; + let limit: i64 = 100; + let mut all_chunks_by_blob_id = Vec::new(); + + loop { + let chunks = self.list_paged_by_blob_id(blob_id, offset, limit)?; + if chunks.is_empty() { + break; + } + + all_chunks_by_blob_id.extend(chunks); + offset += limit; + } + + Ok(all_chunks_by_blob_id) + } + + /// Select data with offset and limit filtered by blob ID. + fn list_paged_by_blob_id( + &self, + blob_id: &str, + offset: i64, + limit: i64, + ) -> Result, DatabaseError> { + let conn_guard = self + .conn + .lock() + .map_err(|e| DatabaseError::PoisonError(e.to_string()))?; + let mut stmt: rusqlite::Statement<'_> = conn_guard + .prepare( + "SELECT id, image_reference, version, chunk_blob_id, chunk_digest, chunk_compressed_size, + chunk_uncompressed_size, chunk_compressed_offset, chunk_uncompressed_offset from chunk + WHERE chunk_blob_id = ?1 + ORDER BY id LIMIT ?2 OFFSET ?3", + )?; + let chunk_iterator = stmt.query_map(params![blob_id, limit, offset], |row| { + Ok(ChunkdictChunkInfo { + image_reference: row.get(1)?, + version: row.get(2)?, + chunk_blob_id: row.get(3)?, + chunk_digest: row.get(4)?, + chunk_compressed_size: row.get(5)?, + chunk_uncompressed_size: row.get(6)?, + chunk_compressed_offset: row.get(7)?, + chunk_uncompressed_offset: row.get(8)?, + }) + })?; + let mut chunks = Vec::new(); + for chunk in chunk_iterator { + chunks.push(chunk.map_err(DatabaseError::SqliteError)?); + } + Ok(chunks) + } +} + +#[derive(Debug, Clone)] +struct CustomString(String); + +impl Ord for CustomString { + /// Extract the numbers in the string. + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + let mut current_number = String::new(); + + // Parse numbers in strings. + let mut numbers1 = Vec::new(); + let mut numbers2 = Vec::new(); + + for ch in self.0.chars() { + if ch.is_ascii_digit() { + current_number.push(ch); + } else if !current_number.is_empty() { + if let Ok(number) = current_number.parse::() { + numbers1.push(number); + } + current_number.clear(); + } + } + if !current_number.is_empty() { + if let Ok(number) = current_number.parse::() { + numbers1.push(number); + } + } + current_number.clear(); + + for ch in other.0.chars() { + if ch.is_ascii_digit() { + current_number.push(ch); + } else if !current_number.is_empty() { + if let Ok(number) = current_number.parse::() { + numbers2.push(number); + } + current_number.clear(); + } + } + if !current_number.is_empty() { + if let Ok(number) = current_number.parse::() { + numbers2.push(number); + } + } + current_number.clear(); + numbers1.cmp(&numbers2) + } +} + +impl PartialOrd for CustomString { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl PartialEq for CustomString { + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } +} + +impl Eq for CustomString {} + +impl Table for ChunkTable { + fn clear(&self) -> Result<(), DatabaseError> { + self.conn + .lock() + .map_err(|e| DatabaseError::PoisonError(e.to_string()))? + .execute("DROP TABLE chunk", []) + .map_err(DatabaseError::SqliteError)?; + Ok(()) + } + + fn create(&self) -> Result<(), DatabaseError> { + self.conn + .lock() + .map_err(|e| DatabaseError::PoisonError(e.to_string()))? + .execute( + "CREATE TABLE IF NOT EXISTS chunk ( + id INTEGER PRIMARY KEY, + image_reference TEXT, + version TEXT, + chunk_blob_id TEXT NOT NULL, + chunk_digest TEXT, + chunk_compressed_size INT, + chunk_uncompressed_size INT, + chunk_compressed_offset INT, + chunk_uncompressed_offset INT + )", + [], + ) + .map_err(DatabaseError::SqliteError)?; + Ok(()) + } + + fn insert(&self, chunk: &ChunkdictChunkInfo) -> Result<(), DatabaseError> { + self.conn + .lock() + .map_err(|e| DatabaseError::PoisonError(e.to_string()))? + .execute( + "INSERT INTO chunk( + image_reference, + version, + chunk_blob_id, + chunk_digest, + chunk_compressed_size, + chunk_uncompressed_size, + chunk_compressed_offset, + chunk_uncompressed_offset + ) + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8); + ", + rusqlite::params![ + chunk.image_reference, + chunk.version, + chunk.chunk_blob_id, + chunk.chunk_digest, + chunk.chunk_compressed_size, + chunk.chunk_uncompressed_size, + chunk.chunk_compressed_offset, + chunk.chunk_uncompressed_offset, + ], + ) + .map_err(DatabaseError::SqliteError)?; + Ok(()) + } + + fn list_all(&self) -> Result, DatabaseError> { + let mut offset = 0; + let limit: i64 = 100; + let mut all_chunks = Vec::new(); + + loop { + let chunks = self.list_paged(offset, limit)?; + if chunks.is_empty() { + break; + } + + all_chunks.extend(chunks); + offset += limit; + } + + Ok(all_chunks) + } + + fn list_paged( + &self, + offset: i64, + limit: i64, + ) -> Result, DatabaseError> { + let conn_guard = self + .conn + .lock() + .map_err(|e| DatabaseError::PoisonError(e.to_string()))?; + let mut stmt: rusqlite::Statement<'_> = conn_guard + .prepare( + "SELECT id, image_reference, version, chunk_blob_id, chunk_digest, chunk_compressed_size, + chunk_uncompressed_size, chunk_compressed_offset, chunk_uncompressed_offset from chunk + ORDER BY id LIMIT ?1 OFFSET ?2", + )?; + let chunk_iterator = stmt.query_map(params![limit, offset], |row| { + Ok(ChunkdictChunkInfo { + image_reference: row.get(1)?, + version: row.get(2)?, + chunk_blob_id: row.get(3)?, + chunk_digest: row.get(4)?, + chunk_compressed_size: row.get(5)?, + chunk_uncompressed_size: row.get(6)?, + chunk_compressed_offset: row.get(7)?, + chunk_uncompressed_offset: row.get(8)?, + }) + })?; + let mut chunks = Vec::new(); + for chunk in chunk_iterator { + chunks.push(chunk.map_err(DatabaseError::SqliteError)?); + } + Ok(chunks) + } +} + +#[derive(Debug)] +pub struct BlobTable { + conn: Arc>, +} + +impl BlobTable { + pub fn new(database_url: &str) -> Result { + let conn = Connection::open(database_url)?; + Ok(BlobTable { + conn: Arc::new(Mutex::new(conn)), + }) + } + + pub fn new_in_memory() -> Result { + let conn = Connection::open_in_memory()?; + Ok(BlobTable { + conn: Arc::new(Mutex::new(conn)), + }) + } + + pub fn list_by_id(&self, blob_id: &str) -> Result { + let conn_guard = self + .conn + .lock() + .map_err(|e| DatabaseError::PoisonError(e.to_string()))?; + let mut stmt = conn_guard.prepare( + "SELECT blob_id, blob_compressed_size, blob_uncompressed_size, blob_compressor, blob_meta_ci_compressed_size, blob_meta_ci_uncompressed_size, blob_meta_ci_offset FROM blob WHERE blob_id = ?1", + )?; + let mut blob_iterator = stmt.query_map([blob_id], |row| { + Ok(ChunkdictBlobInfo { + blob_id: row.get(0)?, + blob_compressed_size: row.get(1)?, + blob_uncompressed_size: row.get(2)?, + blob_compressor: row.get(3)?, + blob_meta_ci_compressed_size: row.get(4)?, + blob_meta_ci_uncompressed_size: row.get(5)?, + blob_meta_ci_offset: row.get(6)?, + }) + })?; + + if let Some(blob) = blob_iterator.next() { + blob.map_err(DatabaseError::SqliteError) + } else { + Err(DatabaseError::SqliteError( + rusqlite::Error::QueryReturnedNoRows, + )) + } + } +} + +impl Table for BlobTable { + fn clear(&self) -> Result<(), DatabaseError> { + self.conn + .lock() + .map_err(|e| DatabaseError::PoisonError(e.to_string()))? + .execute("DROP TABLE blob", []) + .map_err(DatabaseError::SqliteError)?; + Ok(()) + } + + fn create(&self) -> Result<(), DatabaseError> { + self.conn + .lock() + .map_err(|e| DatabaseError::PoisonError(e.to_string()))? + .execute( + "CREATE TABLE IF NOT EXISTS blob ( + id INTEGER PRIMARY KEY, + blob_id TEXT NOT NULL, + blob_compressed_size INT, + blob_uncompressed_size INT, + blob_compressor TEXT, + blob_meta_ci_compressed_size INT, + blob_meta_ci_uncompressed_size INT, + blob_meta_ci_offset INT + )", + [], + ) + .map_err(DatabaseError::SqliteError)?; + Ok(()) + } + + fn insert(&self, blob: &ChunkdictBlobInfo) -> Result<(), DatabaseError> { + self.conn + .lock() + .map_err(|e| DatabaseError::PoisonError(e.to_string()))? + .execute( + "INSERT INTO blob ( + blob_id, + blob_compressed_size, + blob_uncompressed_size, + blob_compressor, + blob_meta_ci_compressed_size, + blob_meta_ci_uncompressed_size, + blob_meta_ci_offset + ) + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7); + ", + rusqlite::params![ + blob.blob_id, + blob.blob_compressed_size, + blob.blob_uncompressed_size, + blob.blob_compressor, + blob.blob_meta_ci_compressed_size, + blob.blob_meta_ci_uncompressed_size, + blob.blob_meta_ci_offset, + ], + ) + .map_err(DatabaseError::SqliteError)?; + Ok(()) + } + + fn list_all(&self) -> Result, DatabaseError> { + let mut offset = 0; + let limit: i64 = 100; + let mut all_blobs = Vec::new(); + + loop { + let blobs = self.list_paged(offset, limit)?; + if blobs.is_empty() { + break; + } + + all_blobs.extend(blobs); + offset += limit; + } + + Ok(all_blobs) + } + + fn list_paged(&self, offset: i64, limit: i64) -> Result, DatabaseError> { + let conn_guard = self + .conn + .lock() + .map_err(|e| DatabaseError::PoisonError(e.to_string()))?; + let mut stmt: rusqlite::Statement<'_> = conn_guard.prepare( + "SELECT blob_id, blob_compressed_size, blob_uncompressed_size, blob_compressor, blob_meta_ci_compressed_size, blob_meta_ci_uncompressed_size, blob_meta_ci_offset from blob + ORDER BY id LIMIT ?1 OFFSET ?2", + )?; + let blob_iterator = stmt.query_map(params![limit, offset], |row| { + Ok(ChunkdictBlobInfo { + blob_id: row.get(0)?, + blob_compressed_size: row.get(1)?, + blob_uncompressed_size: row.get(2)?, + blob_compressor: row.get(3)?, + blob_meta_ci_compressed_size: row.get(4)?, + blob_meta_ci_uncompressed_size: row.get(5)?, + blob_meta_ci_offset: row.get(6)?, + }) + })?; + let mut blobs = Vec::new(); + for blob in blob_iterator { + blobs.push(blob.map_err(DatabaseError::SqliteError)?); + } + Ok(blobs) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use rusqlite::Result; + + #[test] + fn test_partial_cmp() -> Result<(), Box> { + let custom_string1 = CustomString("nydus_1.2.3".to_string()); + let custom_string2 = CustomString("nydus_1.2.10".to_string()); + let custom_string3 = CustomString("nydus_2.0".to_string()); + + assert!(custom_string1 < custom_string2); + assert!(custom_string2 < custom_string3); + assert!(custom_string1 < custom_string3); + + assert!(custom_string1 <= custom_string2); + assert!(custom_string2 <= custom_string3); + assert!(custom_string1 <= custom_string3); + + assert!(custom_string2 > custom_string1); + assert!(custom_string3 > custom_string2); + assert!(custom_string3 > custom_string1); + + assert!(custom_string2 >= custom_string1); + assert!(custom_string3 >= custom_string2); + assert!(custom_string3 >= custom_string1); + + assert_eq!(custom_string1, CustomString("nydus_1.2.3".to_string())); + assert_ne!(custom_string1, custom_string2); + Ok(()) + } + + #[test] + fn test_blob_table() -> Result<(), Box> { + let blob_table = BlobTable::new_in_memory()?; + blob_table.create()?; + let blob = ChunkdictBlobInfo { + blob_id: "BLOB123".to_string(), + blob_compressed_size: 1024, + blob_uncompressed_size: 2048, + blob_compressor: "zstd".to_string(), + blob_meta_ci_compressed_size: 1024, + blob_meta_ci_uncompressed_size: 2048, + blob_meta_ci_offset: 0, + }; + blob_table.insert(&blob)?; + let blobs = blob_table.list_all()?; + assert_eq!(blobs.len(), 1); + assert_eq!(blobs[0].blob_id, blob.blob_id); + assert_eq!(blobs[0].blob_compressed_size, blob.blob_compressed_size); + assert_eq!(blobs[0].blob_uncompressed_size, blob.blob_uncompressed_size); + assert_eq!(blobs[0].blob_compressor, blob.blob_compressor); + assert_eq!( + blobs[0].blob_meta_ci_compressed_size, + blob.blob_meta_ci_compressed_size + ); + assert_eq!( + blobs[0].blob_meta_ci_uncompressed_size, + blob.blob_meta_ci_uncompressed_size + ); + assert_eq!(blobs[0].blob_meta_ci_offset, blob.blob_meta_ci_offset); + Ok(()) + } + + #[test] + fn test_chunk_table() -> Result<(), Box> { + let chunk_table = ChunkTable::new_in_memory()?; + chunk_table.create()?; + let chunk = ChunkdictChunkInfo { + image_reference: "REDIS".to_string(), + version: "1.0.0".to_string(), + chunk_blob_id: "BLOB123".to_string(), + chunk_digest: "DIGEST123".to_string(), + chunk_compressed_size: 512, + chunk_uncompressed_size: 1024, + chunk_compressed_offset: 0, + chunk_uncompressed_offset: 0, + }; + chunk_table.insert(&chunk)?; + let chunk2 = ChunkdictChunkInfo { + image_reference: "REDIS02".to_string(), + version: "1.0.0".to_string(), + chunk_blob_id: "BLOB456".to_string(), + chunk_digest: "DIGEST123".to_string(), + chunk_compressed_size: 512, + chunk_uncompressed_size: 1024, + chunk_compressed_offset: 0, + chunk_uncompressed_offset: 0, + }; + chunk_table.insert(&chunk2)?; + let chunks = chunk_table.list_all()?; + assert_eq!(chunks[0].image_reference, chunk.image_reference); + assert_eq!(chunks[0].version, chunk.version); + assert_eq!(chunks.len(), 2); + assert_eq!(chunks[0].chunk_blob_id, chunk.chunk_blob_id); + assert_eq!(chunks[0].chunk_digest, chunk.chunk_digest); + assert_eq!(chunks[0].chunk_compressed_size, chunk.chunk_compressed_size); + assert_eq!( + chunks[0].chunk_uncompressed_size, + chunk.chunk_uncompressed_size + ); + assert_eq!( + chunks[0].chunk_compressed_offset, + chunk.chunk_compressed_offset + ); + assert_eq!( + chunks[0].chunk_uncompressed_offset, + chunk.chunk_uncompressed_offset + ); + + let chunks = chunk_table.list_all_by_blob_id(&chunk.chunk_blob_id)?; + assert_eq!(chunks[0].chunk_blob_id, chunk.chunk_blob_id); + assert_eq!(chunks.len(), 1); + + Ok(()) + } + + #[test] + fn test_blob_table_paged() -> Result<(), Box> { + let blob_table = BlobTable::new_in_memory()?; + blob_table.create()?; + for i in 0..200 { + let blob = ChunkdictBlobInfo { + blob_id: format!("BLOB{}", i), + blob_compressed_size: i, + blob_uncompressed_size: i * 2, + blob_compressor: "zstd".to_string(), + blob_meta_ci_compressed_size: i, + blob_meta_ci_uncompressed_size: i * 2, + blob_meta_ci_offset: i * 3, + }; + blob_table.insert(&blob)?; + } + let blobs = blob_table.list_paged(100, 100)?; + assert_eq!(blobs.len(), 100); + assert_eq!(blobs[0].blob_id, "BLOB100"); + assert_eq!(blobs[0].blob_compressed_size, 100); + assert_eq!(blobs[0].blob_uncompressed_size, 200); + assert_eq!(blobs[0].blob_compressor, "zstd"); + assert_eq!(blobs[0].blob_meta_ci_compressed_size, 100); + assert_eq!(blobs[0].blob_meta_ci_uncompressed_size, 200); + assert_eq!(blobs[0].blob_meta_ci_offset, 300); + Ok(()) + } + + #[test] + fn test_chunk_table_paged() -> Result<(), Box> { + let chunk_table = ChunkTable::new_in_memory()?; + chunk_table.create()?; + for i in 0..200 { + let i64 = i as u64; + let chunk = ChunkdictChunkInfo { + image_reference: format!("REDIS{}", i), + version: format!("1.0.0{}", i), + chunk_blob_id: format!("BLOB{}", i), + chunk_digest: format!("DIGEST{}", i), + chunk_compressed_size: i, + chunk_uncompressed_size: i * 2, + chunk_compressed_offset: i64 * 3, + chunk_uncompressed_offset: i64 * 4, + }; + chunk_table.insert(&chunk)?; + } + let chunks = chunk_table.list_paged(100, 100)?; + assert_eq!(chunks.len(), 100); + assert_eq!(chunks[0].image_reference, "REDIS100"); + assert_eq!(chunks[0].version, "1.0.0100"); + assert_eq!(chunks[0].chunk_blob_id, "BLOB100"); + assert_eq!(chunks[0].chunk_digest, "DIGEST100"); + assert_eq!(chunks[0].chunk_compressed_size, 100); + assert_eq!(chunks[0].chunk_uncompressed_size, 200); + assert_eq!(chunks[0].chunk_compressed_offset, 300); + assert_eq!(chunks[0].chunk_uncompressed_offset, 400); + Ok(()) + } + + #[test] + fn test_algorithm_exponential_smoothing() -> Result<(), Box> { + let threshold = 0.1; + let mut all_chunk: Vec = Vec::new(); + for i in 0..199 { + let i64 = i as u64; + let chunk = ChunkdictChunkInfo { + image_reference: format!("REDIS{}", 0), + version: format!("1.0.0{}", (i + 1) / 100), + chunk_blob_id: format!("BLOB{}", i), + chunk_digest: format!("DIGEST{}", (i + 1) % 2), + chunk_compressed_size: i, + chunk_uncompressed_size: i * 2, + chunk_compressed_offset: i64 * 3, + chunk_uncompressed_offset: i64 * 4, + }; + all_chunk.push(chunk); + } + let chunkdict = Algorithm::::exponential_smoothing(all_chunk, threshold)?; + assert_eq!(chunkdict.len(), 2); + assert_eq!(chunkdict[0].image_reference, "REDIS0"); + assert_eq!(chunkdict[0].version, "1.0.01"); + assert_eq!(chunkdict[0].chunk_blob_id, "BLOB99"); + assert_eq!(chunkdict[0].chunk_digest, "DIGEST0"); + assert_eq!(chunkdict[0].chunk_compressed_size, 99); + assert_eq!(chunkdict[0].chunk_uncompressed_size, 198); + assert_eq!(chunkdict[0].chunk_compressed_offset, 297); + assert_eq!(chunkdict[0].chunk_uncompressed_offset, 396); + Ok(()) + } + + #[test] + fn test_divide_by_image() -> Result<(), Box> { + let db_url = "./metadata.db"; + let chunk_table = ChunkTable::new(db_url)?; + chunk_table.create()?; + for i in 0..200 { + let i64 = i as u64; + let chunk = ChunkdictChunkInfo { + image_reference: format!("REDIS{}", i / 50), + version: format!("1.0.0{}", (i + 1) / 100), + chunk_blob_id: format!("BLOB{}", i), + chunk_digest: format!("DIGEST{}", (i + 1) % 2), + chunk_compressed_size: i, + chunk_uncompressed_size: i * 2, + chunk_compressed_offset: i64 * 3, + chunk_uncompressed_offset: i64 * 4, + }; + chunk_table.insert(&chunk)?; + } + let algorithm = String::from("exponential_smoothing"); + let algorithm = Algorithm::::new(algorithm, db_url)?; + let all_chunks = algorithm.db.chunk_table.list_all()?; + assert_eq!(all_chunks.len(), 200); + let datadict = Algorithm::::divide_by_image(&all_chunks)?; + assert_eq!(datadict.len(), 4); + assert_eq!(datadict[3].cluster_id, 0); + assert_eq!(datadict[3].chunk_list.len(), 50); + chunk_table.clear()?; + Ok(()) + } + + #[test] + fn test_distance() -> Result<(), Box> { + let mut all_chunks1: Vec = Vec::new(); + for i in 0..200 { + let i64 = i as u64; + let chunk = ChunkdictChunkInfo { + image_reference: format!("REDIS{}", 0), + version: format!("1.0.0{}", (i + 1) / 100), + chunk_blob_id: format!("BLOB{}", i), + chunk_digest: format!("DIGEST{}", (i + 1) % 4), + chunk_compressed_size: 1, + chunk_uncompressed_size: 1, + chunk_compressed_offset: i64 * 3, + chunk_uncompressed_offset: i64 * 4, + }; + all_chunks1.push(chunk); + } + let mut all_chunks2: Vec = Vec::new(); + for i in 0..200 { + let i64 = i as u64; + let chunk = ChunkdictChunkInfo { + image_reference: format!("REDIS{}", 1), + version: format!("1.0.0{}", (i + 1) / 100), + chunk_blob_id: format!("BLOB{}", i), + chunk_digest: format!("DIGEST{}", (i + 1) % 4), + chunk_compressed_size: 1, + chunk_uncompressed_size: 1, + chunk_compressed_offset: i64 * 3, + chunk_uncompressed_offset: i64 * 4, + }; + all_chunks2.push(chunk); + } + let datadict = Algorithm::::distance(&all_chunks1, &all_chunks2)?; + assert!( + (datadict - 0.01).abs() <= 0.0001, + "Expected {} to be approximately equal to {} with tolerance {}", + datadict, + 0.01, + 0.0001 + ); + Ok(()) + } + + #[test] + fn test_divide_set() -> Result<(), Box> { + let mut all_chunks: Vec = Vec::new(); + for i in 0..200 { + for j in 0..100 { + let chunk = ChunkdictChunkInfo { + image_reference: format!("REDIS{}", i), + version: format!("1.0.0{}", j / 10), + chunk_blob_id: format!("BLOB{}", j), + chunk_digest: format!("DIGEST{}", j + (i / 100) * 100), + chunk_compressed_size: 1, + chunk_uncompressed_size: 1, + chunk_compressed_offset: 1, + chunk_uncompressed_offset: 1, + }; + all_chunks.push(chunk); + } + } + assert_eq!(all_chunks.len(), 20000); + let (train, test) = Algorithm::::divide_set(&all_chunks, 0.7)?; + assert_eq!(train.len(), 14000); + assert_eq!(train[0].image_reference, "REDIS0"); + assert_eq!(train[0].version, "1.0.00"); + assert_eq!(test.len(), 6000); + assert_eq!(test[0].image_reference, "REDIS0"); + assert_eq!(test[0].version, "1.0.07"); + Ok(()) + } + + #[test] + fn test_dbscan() -> Result<(), Box> { + let mut all_chunks: Vec = Vec::new(); + let radius = 0.6; + for i in 0..200 { + for j in 0..100 { + let chunk = ChunkdictChunkInfo { + image_reference: format!("REDIS{}", i), + version: format!("1.0.0{}", j / 10), + chunk_blob_id: format!("BLOB{}", j), + chunk_digest: format!("DIGEST{}", j + (i / 100) * 100), + chunk_compressed_size: 1, + chunk_uncompressed_size: 1, + chunk_compressed_offset: 1, + chunk_uncompressed_offset: 1, + }; + all_chunks.push(chunk); + } + } + assert_eq!(all_chunks.len(), 20000); + let mut data_point = Algorithm::::divide_by_image(&all_chunks)?; + let datadict = Algorithm::::dbsacn(&mut data_point, radius)?; + assert_eq!(datadict.len(), 200); + if datadict[150].chunk_list[0].chunk_digest == datadict[0].chunk_list[0].chunk_digest { + assert_eq!(datadict[150].cluster_id, 1); + } else { + assert_eq!(datadict[150].cluster_id, 2); + } + assert_eq!(datadict[0].cluster_id, 1); + assert!(datadict[150].clustered); + assert!(datadict[150].visited); + assert_eq!(datadict[0].chunk_list.len(), 100); + Ok(()) + } + + #[test] + fn test_aggregate_chunk() -> Result<(), Box> { + let mut all_chunks: Vec = Vec::new(); + let radius = 0.6; + for i in 0..200 { + for j in 0..100 { + let chunk = ChunkdictChunkInfo { + image_reference: format!("REDIS{}", i), + version: format!("1.0.0{}", (j + 1) / 100), + chunk_blob_id: format!("BLOB{}", j), + chunk_digest: format!("DIGEST{}", j + (i / 100) * 100), + chunk_compressed_size: 1, + chunk_uncompressed_size: 1, + chunk_compressed_offset: 1, + chunk_uncompressed_offset: 1, + }; + all_chunks.push(chunk); + } + } + assert_eq!(all_chunks.len(), 20000); + let mut data_point = Algorithm::::divide_by_image(&all_chunks)?; + let data_cluster = Algorithm::::dbsacn(&mut data_point, radius)?; + let datadict = Algorithm::::aggregate_chunk(&data_cluster)?; + assert_eq!(datadict.len(), 2); + Ok(()) + } + + #[test] + fn test_deduplicate_image() -> Result<(), Box> { + let mut all_chunks: Vec = Vec::new(); + for i in 0..200 { + for j in 0..100 { + let chunk = ChunkdictChunkInfo { + image_reference: format!("REDIS{}", i), + version: format!("1.0.0{}", j / 10), + chunk_blob_id: format!("BLOB{}", j), + chunk_digest: format!("DIGEST{}", j + (i / 100) * 100), + chunk_compressed_size: 1, + chunk_uncompressed_size: 1, + chunk_compressed_offset: 1, + chunk_uncompressed_offset: 1, + }; + all_chunks.push(chunk); + } + } + assert_eq!(all_chunks.len(), 20000); + let datadict = Algorithm::::deduplicate_image(all_chunks)?; + for i in datadict.clone() { + for (_, b) in i { + if !b.is_empty() { + assert_eq!(b.len(), 70); + } + } + } + assert_eq!(datadict[0].len(), 2); + assert_eq!(datadict[0].values().len(), 2); + assert_eq!(datadict[1].len(), 0); + assert_eq!(datadict[1].values().len(), 0); + assert_eq!(datadict.len(), 7); + Ok(()) + } + + #[test] + fn test_deduplicate_version() -> Result<(), Box> { + let mut all_chunks: Vec = Vec::new(); + let mut chunkdict: Vec = Vec::new(); + for i in 0..200 { + let i64 = i as u64; + let chunk = ChunkdictChunkInfo { + image_reference: format!("REDIS{}", 0), + version: format!("1.0.0{}", (i + 1) / 20), + chunk_blob_id: format!("BLOB{}", i), + chunk_digest: format!("DIGEST{}", (i + 1) % 2), + chunk_compressed_size: i, + chunk_uncompressed_size: i * 2, + chunk_compressed_offset: i64 * 3, + chunk_uncompressed_offset: i64 * 4, + }; + all_chunks.push(chunk); + } + let (chunkdict_version, chunkdict_image) = + Algorithm::::deduplicate_version(&all_chunks)?; + for (_, dictionary) in chunkdict_version { + chunkdict.extend(dictionary); + } + + assert_eq!(chunkdict[0].image_reference, "REDIS0"); + assert_eq!(chunkdict[0].chunk_compressed_size, 21); + assert_eq!(chunkdict.len(), 2); + + for single_clustering in chunkdict_image { + for (_, cluster_dictionary) in single_clustering { + chunkdict.extend(cluster_dictionary); + } + } + assert_eq!(chunkdict.len(), 2); + Ok(()) + } +} diff --git a/src/bin/nydus-image/inspect.rs b/src/bin/nydus-image/inspect.rs index 0a0e720f72d..8d344e8f39c 100644 --- a/src/bin/nydus-image/inspect.rs +++ b/src/bin/nydus-image/inspect.rs @@ -1,774 +1,774 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::{ - collections::BTreeMap, - ffi::OsString, - fs::Permissions, - io::{Error, ErrorKind, Write}, - ops::DerefMut, - os::unix::prelude::PermissionsExt, - path::{Path, PathBuf}, - sync::{Arc, Mutex}, -}; - -use nydus_api::ConfigV2; -use nydus_rafs::metadata::{RafsInode, RafsInodeExt, RafsInodeWalkAction, RafsSuper}; -use nydus_rafs::RafsIoReader; -use nydus_storage::device::BlobChunkInfo; -use serde_json::Value; - -pub(crate) struct RafsInspector { - request_mode: bool, - // Rafs Meta Data - rafs_meta: RafsSuper, - // Bootstrap - bootstrap: Arc>, - // The inode number of current directory - cur_dir_ino: u64, - // Inode numbers of parent directories - parent_inodes: Vec, - // Inode of parent directory for rafs v6 files - file_parents: BTreeMap>, -} - -impl RafsInspector { - // create the RafsInspector - pub fn new( - bootstrap_path: &Path, - request_mode: bool, - config: Arc, - ) -> Result { - let (rafs_meta, f) = RafsSuper::load_from_file(bootstrap_path, config, false)?; - let root_ino = rafs_meta.superblock.root_ino(); - - Ok(RafsInspector { - request_mode, - rafs_meta, - bootstrap: Arc::new(Mutex::new(f)), - cur_dir_ino: root_ino, - parent_inodes: Vec::new(), - file_parents: BTreeMap::new(), - }) - } - - // Generate the files parent inode BTreeMap for rafs v6 - fn generate_file_parents(&mut self) -> anyhow::Result<()> { - let mut file_parents = BTreeMap::new(); - self.walk_dir( - self.rafs_meta.superblock.root_ino(), - None, - None, - &mut |parent, inode, _| { - if !inode.is_dir() { - if let Some(parent) = parent { - file_parents - .entry(inode.ino()) - .or_insert_with(Vec::new) - .push(parent.ino()); - } - } - Ok(()) - }, - )?; - self.file_parents = file_parents; - Ok(()) - } - - // Implement command "stats"" - // Print information of "RafsSuperMeta" - fn cmd_stats(&mut self) -> Result, anyhow::Error> { - let o = if self.request_mode { - Some(json!({"inodes_count": self.rafs_meta.meta.inodes_count})) - } else { - println!( - r#" - Version: {version} - Inodes Count: {inodes_count} - Chunk Size: {chunk_size}KB - Root Inode: {root_inode} - Flags: {flags} - Blob table offset: 0x{blob_tbl_offset:x} - Blob table size: 0x{blob_tbl_size:x} - Prefetch table offset: 0x{prefetch_tbl_offset:x} - Prefetch table entries: 0x{prefetch_tbl_entries:x} - Chunk table offset: 0x{chunk_tbl_offset:x} - Chunk table size: 0x{chunk_tbl_size:x} - "#, - version = self.rafs_meta.meta.version >> 8, - inodes_count = self.rafs_meta.meta.inodes_count, - chunk_size = self.rafs_meta.meta.chunk_size / 1024, - flags = self.rafs_meta.meta.flags, - root_inode = self.rafs_meta.superblock.root_ino(), - blob_tbl_offset = self.rafs_meta.meta.blob_table_offset, - blob_tbl_size = self.rafs_meta.meta.blob_table_size, - prefetch_tbl_offset = self.rafs_meta.meta.prefetch_table_offset, - prefetch_tbl_entries = self.rafs_meta.meta.prefetch_table_entries, - chunk_tbl_offset = self.rafs_meta.meta.chunk_table_offset, - chunk_tbl_size = self.rafs_meta.meta.chunk_table_size, - ); - None - }; - Ok(o) - } - - // Implement command "ls" - // Walk_children_inodes with handler defined - fn cmd_list_dir(&mut self) -> Result, anyhow::Error> { - let dir_inode = self.rafs_meta.get_inode(self.cur_dir_ino, false)?; - - // Entry_offset: 0, and skip 0 - dir_inode.walk_children_inodes(0, &mut |_inode, f, ino, _offset| { - trace!("inode {:?}, name: {:?}", ino, f); - - if f == "." || f == ".." { - return Ok(RafsInodeWalkAction::Continue); - } - - let child_inode = self.rafs_meta.get_inode(ino, false)?; - let sign = if child_inode.is_reg() { - "-" - } else if child_inode.is_dir() { - "d" - } else if child_inode.is_symlink() { - "l" - } else { - " " - }; - - println!( - r#"{} {inode_number:<8} {name:?}"#, - sign, - name = f, - inode_number = ino, - ); - - Ok(RafsInodeWalkAction::Continue) - })?; - - Ok(None) - } - - // Implement command "cd" - // Change_dir to address relative to current directory - fn cmd_change_dir(&mut self, dir_name: &str) -> Result, anyhow::Error> { - // Special path - if dir_name == "." { - return Ok(None); - } - if dir_name == ".." { - // Parent_inodes is empty only when current directory is root, - // so we do not have to handle the error case - if let Some(parent_ino) = self.parent_inodes.pop() { - self.cur_dir_ino = parent_ino; - } - return Ok(None); - } - - // Walk through children inodes of current directory - let mut new_dir_ino = None; - let mut err = ""; - let dir_inodes = self.rafs_meta.get_inode(self.cur_dir_ino, false)?; - dir_inodes.walk_children_inodes(0, &mut |_inode, child_name, child_ino, _offset| { - let child_inode = self.rafs_meta.get_inode(child_ino, false)?; - if child_name != dir_name { - Ok(RafsInodeWalkAction::Continue) - } else { - if child_inode.is_dir() { - new_dir_ino = Some(child_ino); - } else { - err = "not a directory"; - } - Ok(RafsInodeWalkAction::Break) - } - })?; - - if let Some(n) = new_dir_ino { - self.parent_inodes.push(self.cur_dir_ino); - self.cur_dir_ino = n; - } else { - println!("{} is {}", dir_name, err); - } - - Ok(None) - } - - // Implement command "stat" - fn cmd_stat_file(&self, file_name: &str) -> Result, anyhow::Error> { - // Stat current directory - if file_name == "." { - let inode = self.rafs_meta.get_extended_inode(self.cur_dir_ino, false)?; - let inode_parent = self.rafs_meta.get_extended_inode(inode.parent(), false)?; - return self.stat_single_file(Some(inode_parent.as_ref()), inode.as_inode()); - } - - // Walk through children inodes to find the file - // Print its basic information and all chunk information - let dir_inode = self.rafs_meta.get_extended_inode(self.cur_dir_ino, false)?; - dir_inode.walk_children_inodes(0, &mut |_inode, child_name, child_ino, _offset| { - if child_name == file_name { - // Print file information - let child_inode = self.rafs_meta.get_inode(child_ino, false)?; - if let Err(e) = - self.stat_single_file(Some(dir_inode.as_ref()), child_inode.as_ref()) - { - return Err(Error::new(ErrorKind::Other, e)); - } - - let child_inode = dir_inode.get_child_by_name(&child_name)?; - // only reg_file can get and print chunk info - if !child_inode.is_reg() { - return Ok(RafsInodeWalkAction::Break); - } - - let mut chunks = Vec::>::new(); - let chunk_count = child_inode.get_chunk_count(); - for idx in 0..chunk_count { - let cur_chunk = child_inode.get_chunk_info(idx)?; - chunks.push(cur_chunk); - } - - println!(" Chunk list:"); - for (i, c) in chunks.iter().enumerate() { - let blob_id = if let Ok(id) = self.get_blob_id_by_index(c.blob_index()) { - id.to_owned() - } else { - error!( - "Blob index is {}. But no blob entry associate with it", - c.blob_index() - ); - return Ok(RafsInodeWalkAction::Break); - }; - - // file_offset = chunk_index * chunk_size - let file_offset = i * self.rafs_meta.meta.chunk_size as usize; - - println!( - r#" {} -> - file offset: {file_offset}, chunk index: {chunk_index} - compressed size: {compressed_size}, decompressed size: {decompressed_size} - compressed offset: {compressed_offset}, decompressed offset: {decompressed_offset} - blob id: {blob_id} - chunk id: {chunk_id} - "#, - i, - chunk_index = c.id(), - file_offset = file_offset, - compressed_size = c.compressed_size(), - decompressed_size = c.uncompressed_size(), - decompressed_offset = c.uncompressed_offset(), - compressed_offset = c.compressed_offset(), - blob_id = blob_id, - chunk_id = c.chunk_id() - ); - } - Ok(RafsInodeWalkAction::Break) - } else { - Ok(RafsInodeWalkAction::Continue) - } - })?; - - Ok(None) - } - - // Implement command "blobs" - fn cmd_list_blobs(&self) -> Result, anyhow::Error> { - let blob_infos = self.rafs_meta.superblock.get_blob_infos(); - let extra_infos = self - .rafs_meta - .superblock - .get_blob_extra_infos() - .unwrap_or_default(); - - let mut value = json!([]); - for blob_info in blob_infos.iter() { - if self.request_mode { - let v = json!({"blob_id": blob_info.blob_id(), - "readahead_offset": blob_info.prefetch_offset(), - "readahead_size": blob_info.prefetch_size(), - "decompressed_size": blob_info.uncompressed_size(), - "compressed_size": blob_info.compressed_size(),}); - value.as_array_mut().unwrap().push(v); - } else { - let mapped_blkaddr = extra_infos - .get(&blob_info.blob_id()) - .map(|v| v.mapped_blkaddr) - .unwrap_or_default(); - print!( - r#" -Blob Index: {blob_index} -Blob ID: {blob_id} -Raw Blob ID: {raw_blob_id} -Blob Size: {blob_size} -Compressed Data Size: {compressed_size} -Uncompressed Data Size: {uncompressed_size} -Mapped Block Address: {mapped_blkaddr} -Features: {features:?} -Compressor: {compressor} -Digester: {digester} -Cipher: {cipher} -Chunk Size: 0x{chunk_size:x} -Chunk Count: {chunk_count} -Prefetch Table Offset: {prefetch_tbl_offset} -Prefetch Table Size: {prefetch_tbl_size} -Meta Compressor: {meta_compressor} -Meta Offset: {meta_offset} -Meta Compressed Size: {meta_comp_size} -Meta Uncompressed Size: {meta_uncomp_size} -ToC Digest: {toc_digest} -ToC Size: {toc_size} -RAFS Blob Digest: {rafs_digest} -RAFS Blob Size: {rafs_size} -"#, - blob_index = blob_info.blob_index(), - blob_id = blob_info.blob_id(), - raw_blob_id = blob_info.raw_blob_id(), - features = blob_info.features(), - uncompressed_size = blob_info.uncompressed_size(), - blob_size = blob_info.compressed_size(), - compressed_size = blob_info.compressed_data_size(), - chunk_size = blob_info.chunk_size(), - chunk_count = blob_info.chunk_count(), - compressor = blob_info.compressor(), - digester = blob_info.digester(), - cipher = blob_info.cipher(), - prefetch_tbl_offset = blob_info.prefetch_offset(), - prefetch_tbl_size = blob_info.prefetch_size(), - meta_compressor = blob_info.meta_ci_compressor(), - meta_offset = blob_info.meta_ci_offset(), - meta_comp_size = blob_info.meta_ci_compressed_size(), - meta_uncomp_size = blob_info.meta_ci_uncompressed_size(), - toc_digest = hex::encode(blob_info.blob_toc_digest()), - toc_size = blob_info.blob_toc_size(), - rafs_digest = hex::encode(blob_info.blob_meta_digest()), - rafs_size = blob_info.blob_meta_size(), - ); - } - } - - if self.request_mode { - return Ok(Some(value)); - } - - Ok(None) - } - - // Convert an inode number to a file path. - // For rafs v6, it will return all paths of the hard link file. - fn path_from_ino(&mut self, ino: u64) -> Result, anyhow::Error> { - let inode = self.rafs_meta.superblock.get_inode(ino, false)?; - let mut file_paths = Vec::new(); - if ino == self.rafs_meta.superblock.root_ino() { - file_paths.push(PathBuf::from( - self.rafs_meta - .superblock - .get_extended_inode(ino, false)? - .name(), - )); - return Ok(file_paths); - } - - if self.rafs_meta.meta.is_v6() && !inode.is_dir() { - if self.file_parents.is_empty() { - self.generate_file_parents()?; - } - - if let Some(parents) = self.file_parents.get(&ino) { - for parent in parents { - let parent_inode = self - .rafs_meta - .superblock - .get_extended_inode(*parent, false)?; - let parent_path = self.rafs_meta.path_from_ino(*parent)?; - let child_count = parent_inode.get_child_count(); - for idx in 0..child_count { - let child = parent_inode.get_child_by_index(idx)?; - if child.ino() == ino { - file_paths.push(parent_path.join(child.name())); - break; - } - } - } - } - } else { - let file_path = self.rafs_meta.path_from_ino(ino as u64)?; - file_paths.push(file_path); - }; - Ok(file_paths) - } - - // Implement command "prefetch" - fn cmd_list_prefetch(&mut self) -> Result, anyhow::Error> { - let mut guard = self.bootstrap.lock().unwrap(); - let bootstrap = guard.deref_mut(); - let prefetch_inos = self.rafs_meta.get_prefetched_inos(bootstrap)?; - drop(guard); - - let o = if self.request_mode { - let mut value = json!([]); - for ino in prefetch_inos { - let path = self.path_from_ino(ino as u64)?; - let v = json!({"inode": ino, "path": path}); - value.as_array_mut().unwrap().push(v); - } - Some(value) - } else { - println!( - "Total Prefetching Files: {}", - self.rafs_meta.meta.prefetch_table_entries - ); - for ino in prefetch_inos { - let path_string: Vec = self - .path_from_ino(ino as u64)? - .iter() - .map(|x| String::from(x.to_string_lossy())) - .collect(); - - println!( - r#"Inode Number:{inode_number:10} | Path: {path:?} "#, - path = path_string.join(" "), - inode_number = ino, - ); - } - None - }; - - Ok(o) - } - - // Implement command "chunk" - fn cmd_show_chunk(&self, offset_in_blob: u64) -> Result, anyhow::Error> { - self.rafs_meta.walk_directory::( - self.rafs_meta.superblock.root_ino(), - None, - &mut |inode: Arc, _path: &Path| -> anyhow::Result<()> { - // only regular file has data chunks - if !inode.is_reg() { - return Ok(()); - } - - // walk through chunks of current file - let chunk_count = inode.get_chunk_count(); - for idx in 0..chunk_count { - let cur_chunk = inode.get_chunk_info(idx)?; - if cur_chunk.compressed_offset() == offset_in_blob { - let path = self.rafs_meta.path_from_ino(inode.parent()).unwrap(); - let block_id = if let Ok(blob_id) = - self.get_blob_id_by_index(cur_chunk.blob_index()) - { - blob_id.to_owned() - } else { - return Err(anyhow!( - "Can't find blob by its index, index={:?}", - cur_chunk.blob_index() - )); - }; - - println!( - r#" -File: {:width$} Parent Path: {:width$} -Compressed Offset: {}, Compressed Size: {} -Decompressed Offset: {}, Decompressed Size: {} -Chunk ID: {:50}, -Blob ID: {} -"#, - inode.name().to_string_lossy(), - path.to_string_lossy(), - cur_chunk.compressed_offset(), - cur_chunk.compressed_size(), - cur_chunk.uncompressed_offset(), - cur_chunk.uncompressed_size(), - cur_chunk.chunk_id(), - block_id, - width = 32 - ); - } - } - Ok(()) - }, - )?; - - Ok(None) - } - - #[allow(clippy::type_complexity)] - /// Walkthrough the file tree rooted at ino, calling cb for each file or directory - /// in the tree by DFS order, including ino, please ensure ino is a directory. - fn walk_dir( - &self, - ino: u64, - parent: Option<&PathBuf>, - parent_inode_ext: Option<&dyn RafsInodeExt>, - cb: &mut dyn FnMut(Option<&dyn RafsInodeExt>, &dyn RafsInode, &Path) -> anyhow::Result<()>, - ) -> anyhow::Result<()> { - let inode = self.rafs_meta.superblock.get_extended_inode(ino, false)?; - if !inode.is_dir() { - bail!("inode {} is not a directory", ino); - } - self.walk_dir_inner(inode.as_ref(), parent, parent_inode_ext, cb) - } - - #[allow(clippy::only_used_in_recursion, clippy::type_complexity)] - fn walk_dir_inner( - &self, - inode: &dyn RafsInodeExt, - parent: Option<&PathBuf>, - parent_inode_ext: Option<&dyn RafsInodeExt>, - cb: &mut dyn FnMut(Option<&dyn RafsInodeExt>, &dyn RafsInode, &Path) -> anyhow::Result<()>, - ) -> anyhow::Result<()> { - let path = if let Some(parent) = parent { - parent.join(inode.name()) - } else { - PathBuf::from("/") - }; - cb(parent_inode_ext, inode.as_inode(), &path)?; - if !inode.is_dir() { - return Ok(()); - } - let child_count = inode.get_child_count(); - for idx in 0..child_count { - let child = inode.get_child_by_index(idx)?; - self.walk_dir_inner(child.as_ref(), Some(&path), Some(inode), cb)?; - } - Ok(()) - } - - // Implement command "icheck" - fn cmd_check_inode(&mut self, ino: u64) -> Result, anyhow::Error> { - let current_inode = self.rafs_meta.superblock.get_inode(ino, false)?; - if self.rafs_meta.meta.is_v6() && !current_inode.is_dir() { - if self.file_parents.is_empty() { - self.generate_file_parents()?; - } - - if let Some(parents) = self.file_parents.get(&ino) { - for parent in parents { - let parent_inode = self - .rafs_meta - .superblock - .get_extended_inode(*parent, false)?; - let parent_path = self.rafs_meta.path_from_ino(*parent)?; - let child_count = parent_inode.get_child_count(); - for idx in 0..child_count { - let child = parent_inode.get_child_by_index(idx)?; - if child.ino() == ino { - let path = parent_path.join(child.name()); - println!(r#"{}"#, path.to_string_lossy(),); - self.stat_single_file( - Some(parent_inode.as_ref()), - current_inode.as_ref(), - )?; - break; - } - } - } - } - } else { - self.walk_dir( - self.rafs_meta.superblock.root_ino(), - None, - None, - &mut |parent, inode, path| { - if inode.ino() == ino { - println!(r#"{}"#, path.to_string_lossy(),); - self.stat_single_file(parent, inode)?; - } - Ok(()) - }, - )?; - } - - Ok(None) - } -} - -impl RafsInspector { - /// Get file name of the inode, the rafs v6 file is handled separately. - fn get_file_name(&self, parent_inode: &dyn RafsInodeExt, inode: &dyn RafsInode) -> OsString { - let mut filename = OsString::from(""); - if self.rafs_meta.meta.is_v6() && !inode.is_dir() { - parent_inode - .walk_children_inodes( - 0, - &mut |_inode: Option>, name: OsString, cur_ino, _offset| { - if cur_ino == inode.ino() { - filename = name; - Ok(RafsInodeWalkAction::Break) - } else { - Ok(RafsInodeWalkAction::Continue) - } - }, - ) - .unwrap(); - } else if let Ok(inode) = self - .rafs_meta - .superblock - .get_extended_inode(inode.ino(), false) - { - filename = inode.name(); - } - filename - } - - // print information of single file - fn stat_single_file( - &self, - parent_inode: Option<&dyn RafsInodeExt>, - inode: &dyn RafsInode, - ) -> Result, anyhow::Error> { - let inode_attr = inode.get_attr(); - - if let Some(parent) = parent_inode { - println!( - r#" -Inode Number: {inode_number} -Name: {name:?} -Size: {size} -Parent: {parent} -Mode: 0x{mode:X} -Permissions: {permissions:o} -Nlink: {nlink} -UID: {uid} -GID: {gid} -Mtime: {mtime} -MtimeNsec: {mtime_nsec} -Blocks: {blocks}"#, - inode_number = inode.ino(), - name = self.get_file_name(parent, inode), - size = inode.size(), - parent = parent.ino(), - mode = inode_attr.mode, - permissions = Permissions::from_mode(inode_attr.mode).mode(), - nlink = inode_attr.nlink, - uid = inode_attr.uid, - gid = inode_attr.gid, - mtime = inode_attr.mtime, - mtime_nsec = inode_attr.mtimensec, - blocks = inode_attr.blocks, - ); - } - - Ok(None) - } - - // Match blobinfo by using blob index - fn get_blob_id_by_index(&self, blob_index: u32) -> Result { - let blob_infos = self.rafs_meta.superblock.get_blob_infos(); - for b in blob_infos.iter() { - if b.blob_index() == blob_index { - return Ok(b.blob_id()); - } - } - Err(anyhow!("can not find blob by index: {}", blob_index)) - } -} - -#[derive(Debug)] -pub(crate) enum ExecuteError { - HelpCommand, - IllegalCommand, - ArgumentParse, - Exit, - ExecError(anyhow::Error), -} - -pub(crate) struct Executor {} - -impl Executor { - pub fn execute( - inspector: &mut RafsInspector, - input: String, - ) -> Result, ExecuteError> { - let mut raw = input - .strip_suffix('\n') - .unwrap_or(&input) - .split_ascii_whitespace(); - let cmd = match raw.next() { - Some(c) => c, - None => return Ok(None), - }; - let args = raw.next().map(|a| a.trim()); - debug!("execute {:?} {:?}", cmd, args); - - let output = match (cmd, args) { - ("help", _) => { - Self::usage(); - return Err(ExecuteError::HelpCommand); - } - ("exit", _) | ("q", _) => return Err(ExecuteError::Exit), - ("stats", None) => inspector.cmd_stats(), - ("ls", None) => inspector.cmd_list_dir(), - ("cd", Some(dir)) => inspector.cmd_change_dir(dir), - ("stat", Some(file_name)) => inspector.cmd_stat_file(file_name), - ("blobs", None) => inspector.cmd_list_blobs(), - ("prefetch", None) => inspector.cmd_list_prefetch(), - ("chunk", Some(argument)) => { - let offset: u64 = argument.parse().unwrap(); - inspector.cmd_show_chunk(offset) - } - ("icheck", Some(argument)) => { - let ino: u64 = argument.parse().map_err(|_| { - println!("Wrong INODE is specified. Is it a inode number?"); - ExecuteError::ArgumentParse - })?; - inspector.cmd_check_inode(ino) - } - (cmd, _) => { - println!("Unsupported command: {}", cmd); - { - Self::usage(); - return Err(ExecuteError::IllegalCommand); - }; - } - } - .map_err(ExecuteError::ExecError)?; - - Ok(output) - } - - pub(crate) fn usage() { - println!( - r#" - stats: Display RAFS filesystesm metadata - ls: Show files in current directory - cd DIR: Change current directory - stat FILE_NAME: Show particular information of RAFS file - blobs: Show blob table - prefetch: Show prefetch table - chunk OFFSET: List basic info of a single chunk together with a list of files that share it - icheck INODE: Show path of the inode and basic information - exit: Exit - "# - ); - } -} - -pub(crate) struct Prompt {} - -impl Prompt { - pub(crate) fn run(mut inspector: RafsInspector) { - loop { - print!("Inspecting RAFS :> "); - std::io::stdout().flush().unwrap(); - - let mut input = String::new(); - std::io::stdin().read_line(&mut input).unwrap(); - - match Executor::execute(&mut inspector, input) { - Err(ExecuteError::Exit) => break, - Err(ExecuteError::IllegalCommand) => continue, - Err(ExecuteError::HelpCommand) => continue, - Err(ExecuteError::ExecError(e)) => { - println!("Failed to execute command, {:?}", e); - continue; - } - Ok(Some(o)) => { - serde_json::to_writer(std::io::stdout(), &o) - .unwrap_or_else(|e| error!("Failed to serialize message, {:?}", e)); - } - _ => continue, - } - } - } -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::{ + collections::BTreeMap, + ffi::OsString, + fs::Permissions, + io::{Error, ErrorKind, Write}, + ops::DerefMut, + os::unix::prelude::PermissionsExt, + path::{Path, PathBuf}, + sync::{Arc, Mutex}, +}; + +use nydus_api::ConfigV2; +use nydus_rafs::metadata::{RafsInode, RafsInodeExt, RafsInodeWalkAction, RafsSuper}; +use nydus_rafs::RafsIoReader; +use nydus_storage::device::BlobChunkInfo; +use serde_json::Value; + +pub(crate) struct RafsInspector { + request_mode: bool, + // Rafs Meta Data + rafs_meta: RafsSuper, + // Bootstrap + bootstrap: Arc>, + // The inode number of current directory + cur_dir_ino: u64, + // Inode numbers of parent directories + parent_inodes: Vec, + // Inode of parent directory for rafs v6 files + file_parents: BTreeMap>, +} + +impl RafsInspector { + // create the RafsInspector + pub fn new( + bootstrap_path: &Path, + request_mode: bool, + config: Arc, + ) -> Result { + let (rafs_meta, f) = RafsSuper::load_from_file(bootstrap_path, config, false)?; + let root_ino = rafs_meta.superblock.root_ino(); + + Ok(RafsInspector { + request_mode, + rafs_meta, + bootstrap: Arc::new(Mutex::new(f)), + cur_dir_ino: root_ino, + parent_inodes: Vec::new(), + file_parents: BTreeMap::new(), + }) + } + + // Generate the files parent inode BTreeMap for rafs v6 + fn generate_file_parents(&mut self) -> anyhow::Result<()> { + let mut file_parents = BTreeMap::new(); + self.walk_dir( + self.rafs_meta.superblock.root_ino(), + None, + None, + &mut |parent, inode, _| { + if !inode.is_dir() { + if let Some(parent) = parent { + file_parents + .entry(inode.ino()) + .or_insert_with(Vec::new) + .push(parent.ino()); + } + } + Ok(()) + }, + )?; + self.file_parents = file_parents; + Ok(()) + } + + // Implement command "stats"" + // Print information of "RafsSuperMeta" + fn cmd_stats(&mut self) -> Result, anyhow::Error> { + let o = if self.request_mode { + Some(json!({"inodes_count": self.rafs_meta.meta.inodes_count})) + } else { + println!( + r#" + Version: {version} + Inodes Count: {inodes_count} + Chunk Size: {chunk_size}KB + Root Inode: {root_inode} + Flags: {flags} + Blob table offset: 0x{blob_tbl_offset:x} + Blob table size: 0x{blob_tbl_size:x} + Prefetch table offset: 0x{prefetch_tbl_offset:x} + Prefetch table entries: 0x{prefetch_tbl_entries:x} + Chunk table offset: 0x{chunk_tbl_offset:x} + Chunk table size: 0x{chunk_tbl_size:x} + "#, + version = self.rafs_meta.meta.version >> 8, + inodes_count = self.rafs_meta.meta.inodes_count, + chunk_size = self.rafs_meta.meta.chunk_size / 1024, + flags = self.rafs_meta.meta.flags, + root_inode = self.rafs_meta.superblock.root_ino(), + blob_tbl_offset = self.rafs_meta.meta.blob_table_offset, + blob_tbl_size = self.rafs_meta.meta.blob_table_size, + prefetch_tbl_offset = self.rafs_meta.meta.prefetch_table_offset, + prefetch_tbl_entries = self.rafs_meta.meta.prefetch_table_entries, + chunk_tbl_offset = self.rafs_meta.meta.chunk_table_offset, + chunk_tbl_size = self.rafs_meta.meta.chunk_table_size, + ); + None + }; + Ok(o) + } + + // Implement command "ls" + // Walk_children_inodes with handler defined + fn cmd_list_dir(&mut self) -> Result, anyhow::Error> { + let dir_inode = self.rafs_meta.get_inode(self.cur_dir_ino, false)?; + + // Entry_offset: 0, and skip 0 + dir_inode.walk_children_inodes(0, &mut |_inode, f, ino, _offset| { + trace!("inode {:?}, name: {:?}", ino, f); + + if f == "." || f == ".." { + return Ok(RafsInodeWalkAction::Continue); + } + + let child_inode = self.rafs_meta.get_inode(ino, false)?; + let sign = if child_inode.is_reg() { + "-" + } else if child_inode.is_dir() { + "d" + } else if child_inode.is_symlink() { + "l" + } else { + " " + }; + + println!( + r#"{} {inode_number:<8} {name:?}"#, + sign, + name = f, + inode_number = ino, + ); + + Ok(RafsInodeWalkAction::Continue) + })?; + + Ok(None) + } + + // Implement command "cd" + // Change_dir to address relative to current directory + fn cmd_change_dir(&mut self, dir_name: &str) -> Result, anyhow::Error> { + // Special path + if dir_name == "." { + return Ok(None); + } + if dir_name == ".." { + // Parent_inodes is empty only when current directory is root, + // so we do not have to handle the error case + if let Some(parent_ino) = self.parent_inodes.pop() { + self.cur_dir_ino = parent_ino; + } + return Ok(None); + } + + // Walk through children inodes of current directory + let mut new_dir_ino = None; + let mut err = ""; + let dir_inodes = self.rafs_meta.get_inode(self.cur_dir_ino, false)?; + dir_inodes.walk_children_inodes(0, &mut |_inode, child_name, child_ino, _offset| { + let child_inode = self.rafs_meta.get_inode(child_ino, false)?; + if child_name != dir_name { + Ok(RafsInodeWalkAction::Continue) + } else { + if child_inode.is_dir() { + new_dir_ino = Some(child_ino); + } else { + err = "not a directory"; + } + Ok(RafsInodeWalkAction::Break) + } + })?; + + if let Some(n) = new_dir_ino { + self.parent_inodes.push(self.cur_dir_ino); + self.cur_dir_ino = n; + } else { + println!("{} is {}", dir_name, err); + } + + Ok(None) + } + + // Implement command "stat" + fn cmd_stat_file(&self, file_name: &str) -> Result, anyhow::Error> { + // Stat current directory + if file_name == "." { + let inode = self.rafs_meta.get_extended_inode(self.cur_dir_ino, false)?; + let inode_parent = self.rafs_meta.get_extended_inode(inode.parent(), false)?; + return self.stat_single_file(Some(inode_parent.as_ref()), inode.as_inode()); + } + + // Walk through children inodes to find the file + // Print its basic information and all chunk information + let dir_inode = self.rafs_meta.get_extended_inode(self.cur_dir_ino, false)?; + dir_inode.walk_children_inodes(0, &mut |_inode, child_name, child_ino, _offset| { + if child_name == file_name { + // Print file information + let child_inode = self.rafs_meta.get_inode(child_ino, false)?; + if let Err(e) = + self.stat_single_file(Some(dir_inode.as_ref()), child_inode.as_ref()) + { + return Err(Error::new(ErrorKind::Other, e)); + } + + let child_inode = dir_inode.get_child_by_name(&child_name)?; + // only reg_file can get and print chunk info + if !child_inode.is_reg() { + return Ok(RafsInodeWalkAction::Break); + } + + let mut chunks = Vec::>::new(); + let chunk_count = child_inode.get_chunk_count(); + for idx in 0..chunk_count { + let cur_chunk = child_inode.get_chunk_info(idx)?; + chunks.push(cur_chunk); + } + + println!(" Chunk list:"); + for (i, c) in chunks.iter().enumerate() { + let blob_id = if let Ok(id) = self.get_blob_id_by_index(c.blob_index()) { + id.to_owned() + } else { + error!( + "Blob index is {}. But no blob entry associate with it", + c.blob_index() + ); + return Ok(RafsInodeWalkAction::Break); + }; + + // file_offset = chunk_index * chunk_size + let file_offset = i * self.rafs_meta.meta.chunk_size as usize; + + println!( + r#" {} -> + file offset: {file_offset}, chunk index: {chunk_index} + compressed size: {compressed_size}, decompressed size: {decompressed_size} + compressed offset: {compressed_offset}, decompressed offset: {decompressed_offset} + blob id: {blob_id} + chunk id: {chunk_id} + "#, + i, + chunk_index = c.id(), + file_offset = file_offset, + compressed_size = c.compressed_size(), + decompressed_size = c.uncompressed_size(), + decompressed_offset = c.uncompressed_offset(), + compressed_offset = c.compressed_offset(), + blob_id = blob_id, + chunk_id = c.chunk_id() + ); + } + Ok(RafsInodeWalkAction::Break) + } else { + Ok(RafsInodeWalkAction::Continue) + } + })?; + + Ok(None) + } + + // Implement command "blobs" + fn cmd_list_blobs(&self) -> Result, anyhow::Error> { + let blob_infos = self.rafs_meta.superblock.get_blob_infos(); + let extra_infos = self + .rafs_meta + .superblock + .get_blob_extra_infos() + .unwrap_or_default(); + + let mut value = json!([]); + for blob_info in blob_infos.iter() { + if self.request_mode { + let v = json!({"blob_id": blob_info.blob_id(), + "readahead_offset": blob_info.prefetch_offset(), + "readahead_size": blob_info.prefetch_size(), + "decompressed_size": blob_info.uncompressed_size(), + "compressed_size": blob_info.compressed_size(),}); + value.as_array_mut().unwrap().push(v); + } else { + let mapped_blkaddr = extra_infos + .get(&blob_info.blob_id()) + .map(|v| v.mapped_blkaddr) + .unwrap_or_default(); + print!( + r#" +Blob Index: {blob_index} +Blob ID: {blob_id} +Raw Blob ID: {raw_blob_id} +Blob Size: {blob_size} +Compressed Data Size: {compressed_size} +Uncompressed Data Size: {uncompressed_size} +Mapped Block Address: {mapped_blkaddr} +Features: {features:?} +Compressor: {compressor} +Digester: {digester} +Cipher: {cipher} +Chunk Size: 0x{chunk_size:x} +Chunk Count: {chunk_count} +Prefetch Table Offset: {prefetch_tbl_offset} +Prefetch Table Size: {prefetch_tbl_size} +Meta Compressor: {meta_compressor} +Meta Offset: {meta_offset} +Meta Compressed Size: {meta_comp_size} +Meta Uncompressed Size: {meta_uncomp_size} +ToC Digest: {toc_digest} +ToC Size: {toc_size} +RAFS Blob Digest: {rafs_digest} +RAFS Blob Size: {rafs_size} +"#, + blob_index = blob_info.blob_index(), + blob_id = blob_info.blob_id(), + raw_blob_id = blob_info.raw_blob_id(), + features = blob_info.features(), + uncompressed_size = blob_info.uncompressed_size(), + blob_size = blob_info.compressed_size(), + compressed_size = blob_info.compressed_data_size(), + chunk_size = blob_info.chunk_size(), + chunk_count = blob_info.chunk_count(), + compressor = blob_info.compressor(), + digester = blob_info.digester(), + cipher = blob_info.cipher(), + prefetch_tbl_offset = blob_info.prefetch_offset(), + prefetch_tbl_size = blob_info.prefetch_size(), + meta_compressor = blob_info.meta_ci_compressor(), + meta_offset = blob_info.meta_ci_offset(), + meta_comp_size = blob_info.meta_ci_compressed_size(), + meta_uncomp_size = blob_info.meta_ci_uncompressed_size(), + toc_digest = hex::encode(blob_info.blob_toc_digest()), + toc_size = blob_info.blob_toc_size(), + rafs_digest = hex::encode(blob_info.blob_meta_digest()), + rafs_size = blob_info.blob_meta_size(), + ); + } + } + + if self.request_mode { + return Ok(Some(value)); + } + + Ok(None) + } + + // Convert an inode number to a file path. + // For rafs v6, it will return all paths of the hard link file. + fn path_from_ino(&mut self, ino: u64) -> Result, anyhow::Error> { + let inode = self.rafs_meta.superblock.get_inode(ino, false)?; + let mut file_paths = Vec::new(); + if ino == self.rafs_meta.superblock.root_ino() { + file_paths.push(PathBuf::from( + self.rafs_meta + .superblock + .get_extended_inode(ino, false)? + .name(), + )); + return Ok(file_paths); + } + + if self.rafs_meta.meta.is_v6() && !inode.is_dir() { + if self.file_parents.is_empty() { + self.generate_file_parents()?; + } + + if let Some(parents) = self.file_parents.get(&ino) { + for parent in parents { + let parent_inode = self + .rafs_meta + .superblock + .get_extended_inode(*parent, false)?; + let parent_path = self.rafs_meta.path_from_ino(*parent)?; + let child_count = parent_inode.get_child_count(); + for idx in 0..child_count { + let child = parent_inode.get_child_by_index(idx)?; + if child.ino() == ino { + file_paths.push(parent_path.join(child.name())); + break; + } + } + } + } + } else { + let file_path = self.rafs_meta.path_from_ino(ino as u64)?; + file_paths.push(file_path); + }; + Ok(file_paths) + } + + // Implement command "prefetch" + fn cmd_list_prefetch(&mut self) -> Result, anyhow::Error> { + let mut guard = self.bootstrap.lock().unwrap(); + let bootstrap = guard.deref_mut(); + let prefetch_inos = self.rafs_meta.get_prefetched_inos(bootstrap)?; + drop(guard); + + let o = if self.request_mode { + let mut value = json!([]); + for ino in prefetch_inos { + let path = self.path_from_ino(ino as u64)?; + let v = json!({"inode": ino, "path": path}); + value.as_array_mut().unwrap().push(v); + } + Some(value) + } else { + println!( + "Total Prefetching Files: {}", + self.rafs_meta.meta.prefetch_table_entries + ); + for ino in prefetch_inos { + let path_string: Vec = self + .path_from_ino(ino as u64)? + .iter() + .map(|x| String::from(x.to_string_lossy())) + .collect(); + + println!( + r#"Inode Number:{inode_number:10} | Path: {path:?} "#, + path = path_string.join(" "), + inode_number = ino, + ); + } + None + }; + + Ok(o) + } + + // Implement command "chunk" + fn cmd_show_chunk(&self, offset_in_blob: u64) -> Result, anyhow::Error> { + self.rafs_meta.walk_directory::( + self.rafs_meta.superblock.root_ino(), + None, + &mut |inode: Arc, _path: &Path| -> anyhow::Result<()> { + // only regular file has data chunks + if !inode.is_reg() { + return Ok(()); + } + + // walk through chunks of current file + let chunk_count = inode.get_chunk_count(); + for idx in 0..chunk_count { + let cur_chunk = inode.get_chunk_info(idx)?; + if cur_chunk.compressed_offset() == offset_in_blob { + let path = self.rafs_meta.path_from_ino(inode.parent()).unwrap(); + let block_id = if let Ok(blob_id) = + self.get_blob_id_by_index(cur_chunk.blob_index()) + { + blob_id.to_owned() + } else { + return Err(anyhow!( + "Can't find blob by its index, index={:?}", + cur_chunk.blob_index() + )); + }; + + println!( + r#" +File: {:width$} Parent Path: {:width$} +Compressed Offset: {}, Compressed Size: {} +Decompressed Offset: {}, Decompressed Size: {} +Chunk ID: {:50}, +Blob ID: {} +"#, + inode.name().to_string_lossy(), + path.to_string_lossy(), + cur_chunk.compressed_offset(), + cur_chunk.compressed_size(), + cur_chunk.uncompressed_offset(), + cur_chunk.uncompressed_size(), + cur_chunk.chunk_id(), + block_id, + width = 32 + ); + } + } + Ok(()) + }, + )?; + + Ok(None) + } + + #[allow(clippy::type_complexity)] + /// Walkthrough the file tree rooted at ino, calling cb for each file or directory + /// in the tree by DFS order, including ino, please ensure ino is a directory. + fn walk_dir( + &self, + ino: u64, + parent: Option<&PathBuf>, + parent_inode_ext: Option<&dyn RafsInodeExt>, + cb: &mut dyn FnMut(Option<&dyn RafsInodeExt>, &dyn RafsInode, &Path) -> anyhow::Result<()>, + ) -> anyhow::Result<()> { + let inode = self.rafs_meta.superblock.get_extended_inode(ino, false)?; + if !inode.is_dir() { + bail!("inode {} is not a directory", ino); + } + self.walk_dir_inner(inode.as_ref(), parent, parent_inode_ext, cb) + } + + #[allow(clippy::only_used_in_recursion, clippy::type_complexity)] + fn walk_dir_inner( + &self, + inode: &dyn RafsInodeExt, + parent: Option<&PathBuf>, + parent_inode_ext: Option<&dyn RafsInodeExt>, + cb: &mut dyn FnMut(Option<&dyn RafsInodeExt>, &dyn RafsInode, &Path) -> anyhow::Result<()>, + ) -> anyhow::Result<()> { + let path = if let Some(parent) = parent { + parent.join(inode.name()) + } else { + PathBuf::from("/") + }; + cb(parent_inode_ext, inode.as_inode(), &path)?; + if !inode.is_dir() { + return Ok(()); + } + let child_count = inode.get_child_count(); + for idx in 0..child_count { + let child = inode.get_child_by_index(idx)?; + self.walk_dir_inner(child.as_ref(), Some(&path), Some(inode), cb)?; + } + Ok(()) + } + + // Implement command "icheck" + fn cmd_check_inode(&mut self, ino: u64) -> Result, anyhow::Error> { + let current_inode = self.rafs_meta.superblock.get_inode(ino, false)?; + if self.rafs_meta.meta.is_v6() && !current_inode.is_dir() { + if self.file_parents.is_empty() { + self.generate_file_parents()?; + } + + if let Some(parents) = self.file_parents.get(&ino) { + for parent in parents { + let parent_inode = self + .rafs_meta + .superblock + .get_extended_inode(*parent, false)?; + let parent_path = self.rafs_meta.path_from_ino(*parent)?; + let child_count = parent_inode.get_child_count(); + for idx in 0..child_count { + let child = parent_inode.get_child_by_index(idx)?; + if child.ino() == ino { + let path = parent_path.join(child.name()); + println!(r#"{}"#, path.to_string_lossy(),); + self.stat_single_file( + Some(parent_inode.as_ref()), + current_inode.as_ref(), + )?; + break; + } + } + } + } + } else { + self.walk_dir( + self.rafs_meta.superblock.root_ino(), + None, + None, + &mut |parent, inode, path| { + if inode.ino() == ino { + println!(r#"{}"#, path.to_string_lossy(),); + self.stat_single_file(parent, inode)?; + } + Ok(()) + }, + )?; + } + + Ok(None) + } +} + +impl RafsInspector { + /// Get file name of the inode, the rafs v6 file is handled separately. + fn get_file_name(&self, parent_inode: &dyn RafsInodeExt, inode: &dyn RafsInode) -> OsString { + let mut filename = OsString::from(""); + if self.rafs_meta.meta.is_v6() && !inode.is_dir() { + parent_inode + .walk_children_inodes( + 0, + &mut |_inode: Option>, name: OsString, cur_ino, _offset| { + if cur_ino == inode.ino() { + filename = name; + Ok(RafsInodeWalkAction::Break) + } else { + Ok(RafsInodeWalkAction::Continue) + } + }, + ) + .unwrap(); + } else if let Ok(inode) = self + .rafs_meta + .superblock + .get_extended_inode(inode.ino(), false) + { + filename = inode.name(); + } + filename + } + + // print information of single file + fn stat_single_file( + &self, + parent_inode: Option<&dyn RafsInodeExt>, + inode: &dyn RafsInode, + ) -> Result, anyhow::Error> { + let inode_attr = inode.get_attr(); + + if let Some(parent) = parent_inode { + println!( + r#" +Inode Number: {inode_number} +Name: {name:?} +Size: {size} +Parent: {parent} +Mode: 0x{mode:X} +Permissions: {permissions:o} +Nlink: {nlink} +UID: {uid} +GID: {gid} +Mtime: {mtime} +MtimeNsec: {mtime_nsec} +Blocks: {blocks}"#, + inode_number = inode.ino(), + name = self.get_file_name(parent, inode), + size = inode.size(), + parent = parent.ino(), + mode = inode_attr.mode, + permissions = Permissions::from_mode(inode_attr.mode).mode(), + nlink = inode_attr.nlink, + uid = inode_attr.uid, + gid = inode_attr.gid, + mtime = inode_attr.mtime, + mtime_nsec = inode_attr.mtimensec, + blocks = inode_attr.blocks, + ); + } + + Ok(None) + } + + // Match blobinfo by using blob index + fn get_blob_id_by_index(&self, blob_index: u32) -> Result { + let blob_infos = self.rafs_meta.superblock.get_blob_infos(); + for b in blob_infos.iter() { + if b.blob_index() == blob_index { + return Ok(b.blob_id()); + } + } + Err(anyhow!("can not find blob by index: {}", blob_index)) + } +} + +#[derive(Debug)] +pub(crate) enum ExecuteError { + HelpCommand, + IllegalCommand, + ArgumentParse, + Exit, + ExecError(anyhow::Error), +} + +pub(crate) struct Executor {} + +impl Executor { + pub fn execute( + inspector: &mut RafsInspector, + input: String, + ) -> Result, ExecuteError> { + let mut raw = input + .strip_suffix('\n') + .unwrap_or(&input) + .split_ascii_whitespace(); + let cmd = match raw.next() { + Some(c) => c, + None => return Ok(None), + }; + let args = raw.next().map(|a| a.trim()); + debug!("execute {:?} {:?}", cmd, args); + + let output = match (cmd, args) { + ("help", _) => { + Self::usage(); + return Err(ExecuteError::HelpCommand); + } + ("exit", _) | ("q", _) => return Err(ExecuteError::Exit), + ("stats", None) => inspector.cmd_stats(), + ("ls", None) => inspector.cmd_list_dir(), + ("cd", Some(dir)) => inspector.cmd_change_dir(dir), + ("stat", Some(file_name)) => inspector.cmd_stat_file(file_name), + ("blobs", None) => inspector.cmd_list_blobs(), + ("prefetch", None) => inspector.cmd_list_prefetch(), + ("chunk", Some(argument)) => { + let offset: u64 = argument.parse().unwrap(); + inspector.cmd_show_chunk(offset) + } + ("icheck", Some(argument)) => { + let ino: u64 = argument.parse().map_err(|_| { + println!("Wrong INODE is specified. Is it a inode number?"); + ExecuteError::ArgumentParse + })?; + inspector.cmd_check_inode(ino) + } + (cmd, _) => { + println!("Unsupported command: {}", cmd); + { + Self::usage(); + return Err(ExecuteError::IllegalCommand); + }; + } + } + .map_err(ExecuteError::ExecError)?; + + Ok(output) + } + + pub(crate) fn usage() { + println!( + r#" + stats: Display RAFS filesystesm metadata + ls: Show files in current directory + cd DIR: Change current directory + stat FILE_NAME: Show particular information of RAFS file + blobs: Show blob table + prefetch: Show prefetch table + chunk OFFSET: List basic info of a single chunk together with a list of files that share it + icheck INODE: Show path of the inode and basic information + exit: Exit + "# + ); + } +} + +pub(crate) struct Prompt {} + +impl Prompt { + pub(crate) fn run(mut inspector: RafsInspector) { + loop { + print!("Inspecting RAFS :> "); + std::io::stdout().flush().unwrap(); + + let mut input = String::new(); + std::io::stdin().read_line(&mut input).unwrap(); + + match Executor::execute(&mut inspector, input) { + Err(ExecuteError::Exit) => break, + Err(ExecuteError::IllegalCommand) => continue, + Err(ExecuteError::HelpCommand) => continue, + Err(ExecuteError::ExecError(e)) => { + println!("Failed to execute command, {:?}", e); + continue; + } + Ok(Some(o)) => { + serde_json::to_writer(std::io::stdout(), &o) + .unwrap_or_else(|e| error!("Failed to serialize message, {:?}", e)); + } + _ => continue, + } + } + } +} diff --git a/src/bin/nydus-image/main.rs b/src/bin/nydus-image/main.rs index 5fa3a3a3c10..7d951cc460b 100644 --- a/src/bin/nydus-image/main.rs +++ b/src/bin/nydus-image/main.rs @@ -1,2039 +1,2039 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -#![deny(warnings)] -#[macro_use(crate_authors)] -extern crate clap; -#[macro_use] -extern crate anyhow; -#[macro_use] -extern crate log; -#[macro_use] -extern crate serde_json; -#[macro_use] -extern crate lazy_static; -use crate::deduplicate::{ - check_bootstrap_versions_consistency, update_ctx_from_parent_bootstrap, Deduplicate, - SqliteDatabase, -}; -use std::convert::TryFrom; -use std::fs::{self, metadata, DirEntry, File, OpenOptions}; -use std::os::unix::fs::FileTypeExt; -use std::path::{Path, PathBuf}; -use std::sync::{Arc, Mutex}; - -use anyhow::{bail, Context, Result}; -use clap::parser::ValueSource; -use clap::{Arg, ArgAction, ArgMatches, Command as App}; -use nix::unistd::{getegid, geteuid}; -use nydus::{get_build_time_info, setup_logging}; -use nydus_api::{BuildTimeInfo, ConfigV2, LocalFsConfig}; -use nydus_builder::{ - parse_chunk_dict_arg, ArtifactStorage, BlobCacheGenerator, BlobCompactor, BlobManager, - BootstrapManager, BuildContext, BuildOutput, Builder, ChunkdictBlobInfo, ChunkdictChunkInfo, - ConversionType, DirectoryBuilder, Feature, Features, Generator, HashChunkDict, Merger, - Prefetch, PrefetchPolicy, StargzBuilder, TarballBuilder, WhiteoutSpec, -}; -use nydus_rafs::metadata::{MergeError, RafsSuper, RafsSuperConfig, RafsVersion}; -use nydus_storage::backend::localfs::LocalFs; -use nydus_storage::backend::BlobBackend; -use nydus_storage::device::BlobFeatures; -use nydus_storage::factory::BlobFactory; -use nydus_storage::meta::{format_blob_features, BatchContextGenerator}; -use nydus_storage::{RAFS_DEFAULT_CHUNK_SIZE, RAFS_MAX_CHUNK_SIZE}; -use nydus_utils::trace::{EventTracerClass, TimingTracerClass, TraceClass}; -use nydus_utils::{ - compress, digest, event_tracer, lazy_drop, register_tracer, root_tracer, timing_tracer, -}; -use serde::{Deserialize, Serialize}; - -use crate::unpack::{OCIUnpacker, Unpacker}; -use crate::validator::Validator; - -#[cfg(target_os = "linux")] -use nydus_service::ServiceArgs; -#[cfg(target_os = "linux")] -use std::str::FromStr; - -mod deduplicate; -mod inspect; -mod stat; -mod unpack; -mod validator; - -const BLOB_ID_MAXIMUM_LENGTH: usize = 255; - -#[derive(Serialize, Deserialize, Default)] -pub struct OutputSerializer { - /// The binary version of builder (nydus-image). - version: String, - /// RAFS meta data file path. - bootstrap: String, - /// Represents all blob in blob table ordered by blob index, this field - /// only include the layer that does have a blob, and should be deprecated - /// in future, use `artifacts` field to replace. - blobs: Vec, - /// Performance trace info for current build. - trace: serde_json::Map, - /// RAFS filesystem version (5 or 6). - fs_version: String, - /// Chunk compression algorithm. - compressor: String, -} - -impl OutputSerializer { - fn dump( - matches: &ArgMatches, - build_output: BuildOutput, - build_info: &BuildTimeInfo, - compressor: compress::Algorithm, - fs_version: RafsVersion, - ) -> Result<()> { - let output_json: Option = matches - .get_one::("output-json") - .map(|o| o.to_string().into()); - - if let Some(ref f) = output_json { - let w = OpenOptions::new() - .truncate(true) - .create(true) - .write(true) - .open(f) - .with_context(|| format!("can not open output file {}", f.display()))?; - let trace = root_tracer!().dump_summary_map().unwrap_or_default(); - let version = format!("{}-{}", build_info.package_ver, build_info.git_commit); - let output = Self { - version, - bootstrap: build_output.bootstrap_path.unwrap_or_default(), - blobs: build_output.blobs, - trace, - fs_version: fs_version.to_string(), - compressor: compressor.to_string(), - }; - - serde_json::to_writer_pretty(w, &output) - .context("failed to write result to output file")?; - } - - Ok(()) - } - - fn dump_for_check( - matches: &ArgMatches, - build_info: &BuildTimeInfo, - blob_ids: Vec, - bootstrap: &Path, - compressor: compress::Algorithm, - fs_version: RafsVersion, - ) -> Result<()> { - let output_json: Option = matches - .get_one::("output-json") - .map(|o| o.to_string().into()); - - if let Some(ref f) = output_json { - let w = OpenOptions::new() - .truncate(true) - .create(true) - .write(true) - .open(f) - .with_context(|| format!("can not open output file {}", f.display()))?; - let trace = root_tracer!().dump_summary_map().unwrap_or_default(); - let version = format!("{}-{}", build_info.package_ver, build_info.git_commit); - let output = Self { - version, - bootstrap: bootstrap.display().to_string(), - blobs: blob_ids, - trace, - fs_version: fs_version.to_string(), - compressor: compressor.to_string(), - }; - - serde_json::to_writer(w, &output).context("failed to write result to output file")?; - } - - Ok(()) - } -} - -fn prepare_cmd_args(bti_string: &'static str) -> App { - let arg_chunk_dict = Arg::new("chunk-dict") - .long("chunk-dict") - .help("File path of chunk dictionary for data deduplication"); - let arg_prefetch_policy = Arg::new("prefetch-policy") - .long("prefetch-policy") - .help("Set data prefetch policy") - .required(false) - .default_value("none") - .value_parser(["fs", "blob", "none"]); - let arg_output_json = Arg::new("output-json") - .long("output-json") - .short('J') - .help("File path to save operation result in JSON format"); - let arg_config = Arg::new("config") - .long("config") - .short('C') - .help("Configuration file for storage backend, cache and RAFS FUSE filesystem.") - .required(false); - - let app = App::new("") - .version(bti_string) - .author(crate_authors!()) - .about("Build, analyze, inspect or validate RAFS filesystems/Nydus accelerated container images") - .arg( - Arg::new("log-file") - .long("log-file") - .short('L') - .help("Log file path") - .required(false) - .global(true), - ) - .arg( - Arg::new("log-level") - .long("log-level") - .short('l') - .help("Log level:") - .default_value("info") - .value_parser(["trace", "debug", "info", "warn", "error"]) - .required(false) - .global(true), - ); - - let app = app.subcommand( - App::new("create") - .about("Create RAFS filesystems from directories, tar files or OCI images") - .arg( - Arg::new("SOURCE") - .help("source from which to build the RAFS filesystem") - .required(true) - .num_args(1), - ) - .arg( - Arg::new("type") - .long("type") - .short('t') - .alias("source-type") - .help("Conversion type:") - .default_value("dir-rafs") - .value_parser([ - "directory", - "dir-rafs", - "estargz-rafs", - "estargz-ref", - "estargztoc-ref", - "tar-rafs", - "tar-tarfs", - "targz-rafs", - "targz-ref", - "stargz_index", - ]) - ) - .arg( - Arg::new("bootstrap") - .long("bootstrap") - .short('B') - .help("File path to save the generated RAFS metadata blob") - .required_unless_present_any(["blob-dir", "blob-inline-meta"]) - .conflicts_with("blob-inline-meta"), - ) - .arg( - Arg::new("blob-dir") - .long("blob-dir") - .short('D') - .help("Directory path to save generated RAFS metadata and data blobs"), - ) - .arg( - Arg::new("blob") - .long("blob") - .short('b') - .help("File path to save the generated RAFS data blob") - .required_unless_present_any(["type", "blob-dir"]), - ) - .arg( - Arg::new("blob-inline-meta") - .long("blob-inline-meta") - .alias("inline-bootstrap") - .help("Inline RAFS metadata and blob metadata into the data blob") - .action(ArgAction::SetTrue) - .conflicts_with("blob-id") - .required(false), - ) - .arg( - Arg::new("blob-id") - .long("blob-id") - .required_if_eq_any([("type", "estargztoc-ref"), ("type", "stargz_index")]) - .help("OSS object id for the generated RAFS data blob") - ) - .arg( - Arg::new("blob-data-size") - .long("blob-data-size") - .help("Set data blob size for 'estargztoc-ref' conversion"), - ) - .arg( - Arg::new("blob-offset") - .long("blob-offset") - .help("File offset to store RAFS data, to support storing data blobs into tar files") - .hide(true) - .default_value("0"), - ) - .arg( - Arg::new("chunk-size") - .long("chunk-size") - .help("Set the size of data chunks, must be power of two and between 0x1000-0x1000000:") - .required(false), - ) - .arg( - Arg::new("batch-size") - .long("batch-size") - .help("Set the batch size to merge small chunks, must be power of two, between 0x1000-0x1000000 or be zero:") - .required(false) - .default_value("0"), - ) - .arg( - Arg::new("compressor") - .long("compressor") - .help("Algorithm to compress data chunks:") - .required(false) - .default_value("zstd") - .value_parser(["none", "lz4_block", "zstd"]), - ) - .arg( - Arg::new("digester") - .long("digester") - .help("Algorithm to digest data chunks:") - .required(false) - .default_value("blake3") - .value_parser(["blake3", "sha256"]), - ) - .arg( arg_config.clone() ) - .arg( - Arg::new("fs-version") - .long("fs-version") - .short('v') - .help("Set RAFS format version number:") - .default_value("6") - .value_parser(["5", "6"]), - ) - .arg( - Arg::new("features") - .long("features") - .value_parser(["blob-toc"]) - .help("Enable/disable features") - ) - .arg( - arg_chunk_dict.clone(), - ) - .arg( - Arg::new("parent-bootstrap") - .long("parent-bootstrap") - .help("File path of the parent/referenced RAFS metadata blob (optional)") - .required(false), - ) - .arg( - Arg::new("aligned-chunk") - .long("aligned-chunk") - .help("Align uncompressed data chunks to 4K, only for RAFS V5") - .action(ArgAction::SetTrue) - ) - .arg( - Arg::new("repeatable") - .long("repeatable") - .help("Generate reproducible RAFS metadata") - .action(ArgAction::SetTrue) - .required(false), - ) - .arg( - Arg::new("disable-check") - .long("disable-check") - .help("Disable RAFS metadata validation after build") - .hide(true) - .action(ArgAction::SetTrue) - .required(false) - ) - .arg( - Arg::new("whiteout-spec") - .long("whiteout-spec") - .help("Set the type of whiteout specification:") - .default_value("oci") - .value_parser(["oci", "overlayfs", "none"]) - ) - .arg( - arg_prefetch_policy.clone(), - ) - .arg( - arg_output_json.clone(), - ) - .arg( - Arg::new("encrypt") - .long("encrypt") - .short('E') - .help("Encrypt the generated RAFS metadata and data blobs") - .action(ArgAction::SetTrue) - .required(false) - ) - .arg( - Arg::new("blob-cache-dir") - .long("blob-cache-dir") - .help("Directory path to generate blob cache files ($id.blob.meta and $id.blob.data)") - .value_parser(clap::value_parser!(PathBuf)) - .conflicts_with("blob-inline-meta") - .conflicts_with("blob") - .conflicts_with("blob-dir") - .conflicts_with("compressor") - .required(false) - ) - ); - - let app = app.subcommand( - App::new("chunkdict") - .about("deduplicate RAFS filesystem metadata") - .subcommand( - App::new("generate") - .about("generate chunk dictionary based on database") - .arg( - Arg::new("database") - .long("database") - .help("Database connection address for assisting chunk dictionary generation, e.g. /path/database.db") - .default_value("sqlite:///home/runner/output/database.db") - .required(false), - ) - .arg( - Arg::new("bootstrap") - .long("bootstrap") - .short('B') - .help("Output path of nydus overlaid bootstrap"), - ) - .arg( - Arg::new("blob-dir") - .long("blob-dir") - .short('D') - .help("Directory path to save generated RAFS metadata and data blobs"), - ) - .arg(arg_prefetch_policy.clone()) - .arg(arg_output_json.clone()) - .arg(arg_config.clone()) - .arg( - Arg::new("SOURCE") - .help("bootstrap paths (allow one or more)") - .required(true) - .num_args(1..), - ) - .arg( - Arg::new("verbose") - .long("verbose") - .short('v') - .help("Output message in verbose mode") - .action(ArgAction::SetTrue) - .required(false), - ) - ) - ); - - let app = app.subcommand( - App::new("merge") - .about("Merge multiple bootstraps into a overlaid bootstrap") - .arg( - Arg::new("parent-bootstrap") - .long("parent-bootstrap") - .help("File path of the parent/referenced RAFS metadata blob (optional)") - .required(false), - ) - .arg( - Arg::new("bootstrap") - .long("bootstrap") - .short('B') - .help("Output path of nydus overlaid bootstrap"), - ) - .arg( - Arg::new("blob-dir") - .long("blob-dir") - .short('D') - .help("Directory path to save generated RAFS metadata and data blobs"), - ) - .arg(arg_chunk_dict.clone()) - .arg(arg_prefetch_policy) - .arg(arg_output_json.clone()) - .arg( - Arg::new("blob-digests") - .long("blob-digests") - .required(false) - .help("RAFS blob digest list separated by comma"), - ) - .arg( - Arg::new("original-blob-ids") - .long("original-blob-ids") - .required(false) - .help("original blob id list separated by comma, it may usually be a sha256 hex string"), - ) - .arg( - Arg::new("blob-sizes") - .long("blob-sizes") - .required(false) - .help("RAFS blob size list separated by comma"), - ) - .arg( - Arg::new("blob-toc-digests") - .long("blob-toc-digests") - .required(false) - .help("RAFS blob toc digest list separated by comma"), - ) - .arg( - Arg::new("blob-toc-sizes") - .long("blob-toc-sizes") - .required(false) - .help("RAFS blob toc size list separated by comma"), - ) - .arg(arg_config.clone()) - .arg( - Arg::new("SOURCE") - .help("bootstrap paths (allow one or more)") - .required(true) - .num_args(1..), - ), - ); - - let app = app.subcommand( - App::new("check") - .about("Validate RAFS filesystem metadata") - .arg( - Arg::new("BOOTSTRAP") - .help("File path of RAFS metadata") - .required_unless_present("bootstrap"), - ) - .arg( - Arg::new("bootstrap") - .short('B') - .long("bootstrap") - .help("[Deprecated] File path of RAFS meta blob/bootstrap") - .conflicts_with("BOOTSTRAP") - .required(false), - ) - .arg( - Arg::new("blob-dir") - .long("blob-dir") - .short('D') - .conflicts_with("config") - .help( - "Directory for localfs storage backend, hosting data blobs and cache files", - ), - ) - .arg(arg_config.clone()) - .arg( - Arg::new("verbose") - .long("verbose") - .short('v') - .help("Output message in verbose mode") - .action(ArgAction::SetTrue) - .required(false), - ) - .arg(arg_output_json.clone()), - ); - - #[cfg(target_os = "linux")] - let app = app.subcommand( - App::new("export") - .about("Export RAFS filesystems as raw block disk images or tar files") - .arg( - Arg::new("block") - .long("block") - .action(ArgAction::SetTrue) - .required(true) - .help("Export RAFS filesystems as raw block disk images") - ) - .arg( - Arg::new("bootstrap") - .long("bootstrap") - .short('B') - .help("Bootstrap of the RAFS filesystem to be exported") - .requires("localfs-dir") - ) - .arg(Arg::new("config") - .long("config") - .short('C') - .help("Configuration file containing a `BlobCacheEntry` object") - .required(false)) - .arg( - Arg::new("localfs-dir") - .long("localfs-dir") - .short('D') - .help( - "Path to the `localfs` working directory, which also enables the `localfs` storage backend" - ) - .requires("bootstrap") - .conflicts_with("config"), - ) - .arg( - Arg::new("threads") - .long("threads") - .default_value("4") - .help("Number of worker threads to execute export operation, valid values: [1-32]") - .value_parser(Command::thread_validator) - .required(false), - ) - .arg( - Arg::new("output") - .long("output") - .short('O') - .help("File path for saving the exported content") - .required_unless_present("localfs-dir") - ) - .arg( - Arg::new("verity") - .long("verity") - .help("Generate dm-verity data for block device") - .action(ArgAction::SetTrue) - .required(false) - .requires("block") - ) - ); - - let app = app.subcommand( - App::new("inspect") - .about("Inspect RAFS filesystem metadata in interactive or request mode") - .arg( - Arg::new("BOOTSTRAP") - .help("File path of RAFS metadata") - .required_unless_present("bootstrap"), - ) - .arg( - Arg::new("bootstrap") - .short('B') - .long("bootstrap") - .help("[Deprecated] File path of RAFS meta blob/bootstrap") - .conflicts_with("BOOTSTRAP") - .required(false), - ) - .arg( - Arg::new("blob-dir") - .long("blob-dir") - .short('D') - .conflicts_with("config") - .help( - "Directory for localfs storage backend, hosting data blobs and cache files", - ), - ) - .arg(arg_config.clone()) - .arg( - Arg::new("request") - .long("request") - .short('R') - .help("Inspect RAFS filesystem metadata in request mode") - .required(false), - ), - ); - - let app = app.subcommand( - App::new("stat") - .about("Generate statistics information for RAFS filesystems") - .arg( - Arg::new("bootstrap") - .long("bootstrap") - .short('B') - .help("Generate statistics information for the RAFS filesystem") - .required(false), - ) - .arg( - Arg::new("blob-dir") - .long("blob-dir") - .short('D') - .help("Generate statistics information for all RAFS filesystems in the directory") - .required(false), - ) - .arg( - Arg::new("target") - .long("target") - .short('T') - .help("Generate statistics information for the RAFS filesystem after applying chunk deduplication") - .required(false), - ) - .arg(arg_config.clone()) - .arg( - Arg::new("digester") - .long("digester") - .help("Algorithm to digest data chunks:") - .required(false) - .default_value("blake3") - .value_parser(["blake3", "sha256"]), - ) - .arg( - arg_output_json.clone(), - ) - ); - - let app = app.subcommand( - App::new("compact") - .about("(experimental)Compact specific nydus image, remove unused chunks in blobs, merge small blobs") - .arg( - Arg::new("bootstrap") - .long("bootstrap") - .short('B') - .help("bootstrap to compact") - .required(true), - ) - .arg( - Arg::new("config") - .long("config") - .short('C') - .help("config to compactor") - .required(true), - ) - .arg( - Arg::new("backend-config") - .long("backend-config") - .help("config file of backend") - .required(true), - ) - .arg( arg_chunk_dict ) - .arg( - Arg::new("output-bootstrap") - .long("output-bootstrap") - .short('O') - .help("bootstrap to output, default is source bootstrap add suffix .compact"), - ) - .arg( - arg_output_json, - ) - ); - - app.subcommand( - App::new("unpack") - .about("Unpack a RAFS filesystem to a tar file") - .arg( - Arg::new("BOOTSTRAP") - .help("File path of RAFS metadata") - .required_unless_present("bootstrap"), - ) - .arg( - Arg::new("backend-config") - .long("backend-config") - .help("config file of backend") - .required(false), - ) - .arg( - Arg::new("bootstrap") - .short('B') - .long("bootstrap") - .help("[Deprecated] File path of RAFS meta blob/bootstrap") - .conflicts_with("BOOTSTRAP") - .required(false), - ) - .arg( - Arg::new("blob") - .long("blob") - .short('b') - .help("path to RAFS data blob file") - .required(false), - ) - .arg( - Arg::new("blob-dir") - .long("blob-dir") - .short('D') - .conflicts_with("config") - .help( - "Directory for localfs storage backend, hosting data blobs and cache files", - ), - ) - .arg(arg_config) - .arg( - Arg::new("output") - .long("output") - .help("path for output tar file") - .required(true), - ), - ) -} - -fn init_log(matches: &ArgMatches) -> Result<()> { - let mut log_file = None; - if let Some(file) = matches.get_one::("log-file") { - let path = PathBuf::from(file); - log_file = Some(path); - } - - // Safe to unwrap because it has a default value and possible values are defined. - let level = matches - .get_one::("log-level") - .unwrap() - .parse() - .unwrap(); - - setup_logging(log_file, level, 0).context("failed to setup logging") -} - -lazy_static! { - static ref BTI_STRING: String = get_build_time_info().0; - static ref BTI: BuildTimeInfo = get_build_time_info().1; -} - -fn main() -> Result<()> { - let build_info = BTI.to_owned(); - let mut app = prepare_cmd_args(BTI_STRING.as_str()); - let usage = app.render_usage(); - let cmd = app.get_matches(); - - init_log(&cmd)?; - - register_tracer!(TraceClass::Timing, TimingTracerClass); - register_tracer!(TraceClass::Event, EventTracerClass); - - if let Some(matches) = cmd.subcommand_matches("create") { - Command::create(matches, &build_info) - } else if let Some(matches) = cmd.subcommand_matches("chunkdict") { - match matches.subcommand_name() { - Some("generate") => Command::chunkdict_generate( - matches.subcommand_matches("generate").unwrap(), - &build_info, - ), - _ => { - println!("{}", usage); - Ok(()) - } - } - } else if let Some(matches) = cmd.subcommand_matches("merge") { - let result = Command::merge(matches, &build_info); - if let Err(ref err) = result { - if let Some(MergeError::InconsistentFilesystem(_)) = err.downcast_ref::() { - error!("message:{}", err); - std::process::exit(2); - } - } - result - } else if let Some(matches) = cmd.subcommand_matches("check") { - Command::check(matches, &build_info) - } else if let Some(matches) = cmd.subcommand_matches("inspect") { - Command::inspect(matches) - } else if let Some(matches) = cmd.subcommand_matches("stat") { - Command::stat(matches) - } else if let Some(matches) = cmd.subcommand_matches("compact") { - Command::compact(matches, &build_info) - } else if let Some(matches) = cmd.subcommand_matches("unpack") { - Command::unpack(matches) - } else { - #[cfg(target_os = "linux")] - if let Some(matches) = cmd.subcommand_matches("export") { - Command::export(&cmd, matches, &build_info) - } else { - println!("{}", usage); - Ok(()) - } - #[cfg(not(target_os = "linux"))] - { - println!("{}", usage); - Ok(()) - } - } -} - -struct Command {} - -impl Command { - fn create(matches: &ArgMatches, build_info: &BuildTimeInfo) -> Result<()> { - let blob_id = Self::get_blob_id(matches)?; - let blob_offset = Self::get_blob_offset(matches)?; - let parent_path = Self::get_parent_bootstrap(matches)?; - let prefetch = Self::get_prefetch(matches)?; - let source_path = PathBuf::from(matches.get_one::("SOURCE").unwrap()); - let conversion_type: ConversionType = matches.get_one::("type").unwrap().parse()?; - let blob_inline_meta = matches.get_flag("blob-inline-meta"); - let repeatable = matches.get_flag("repeatable"); - let version = Self::get_fs_version(matches)?; - let chunk_size = Self::get_chunk_size(matches, conversion_type)?; - let batch_size = Self::get_batch_size(matches, version, conversion_type, chunk_size)?; - let blob_cache_storage = Self::get_blob_cache_storage(matches, conversion_type)?; - // blob-cache-dir and blob-dir/blob are a set of mutually exclusive functions, - // the former is used to generate blob cache, nydusd is directly started through blob cache, - // the latter is to generate nydus blob, as nydusd backend to start - let blob_storage = if blob_cache_storage.is_some() { - None - } else { - Self::get_blob_storage(matches, conversion_type)? - }; - - let aligned_chunk = if version.is_v6() && conversion_type != ConversionType::TarToTarfs { - true - } else { - // get_fs_version makes sure it's either v6 or v5. - matches.get_flag("aligned-chunk") - }; - let whiteout_spec: WhiteoutSpec = matches - .get_one::("whiteout-spec") - .map(|s| s.as_str()) - .unwrap_or_default() - .parse()?; - let mut compressor = matches - .get_one::("compressor") - .map(|s| s.as_str()) - .unwrap_or_default() - .parse()?; - let mut digester = matches - .get_one::("digester") - .map(|s| s.as_str()) - .unwrap_or_default() - .parse()?; - let blob_data_size = Self::get_blob_size(matches, conversion_type)?; - let features = Features::try_from( - matches - .get_one::("features") - .map(|s| s.as_str()) - .unwrap_or_default(), - )?; - let encrypt = matches.get_flag("encrypt"); - match conversion_type { - ConversionType::DirectoryToRafs => { - Self::ensure_directory(&source_path)?; - if blob_storage.is_none() && blob_cache_storage.is_none() { - bail!("both --blob and --blob-dir or --blob-cache-dir are missing"); - } - } - ConversionType::EStargzToRafs - | ConversionType::TargzToRafs - | ConversionType::TarToRafs => { - Self::ensure_file(&source_path)?; - if blob_storage.is_none() && blob_cache_storage.is_none() { - bail!("both --blob and --blob-dir or --blob-cache-dir are missing"); - } - } - ConversionType::TarToRef - | ConversionType::TargzToRef - | ConversionType::EStargzToRef => { - Self::ensure_file(&source_path)?; - if matches.value_source("compressor") != Some(ValueSource::DefaultValue) - && compressor != compress::Algorithm::GZip - { - info!( - "only GZip is supported for conversion type {}, use GZip instead of {}", - conversion_type, compressor - ); - } - if matches.value_source("digester") != Some(ValueSource::DefaultValue) - && digester != digest::Algorithm::Sha256 - { - info!( - "only SHA256 is supported for conversion type {}, use SHA256 instead of {}", - conversion_type, compressor - ); - } - compressor = compress::Algorithm::GZip; - digester = digest::Algorithm::Sha256; - if blob_storage.is_none() && blob_cache_storage.is_none() { - bail!("all of --blob, --blob-dir and --blob-cache-dir are missing"); - } else if !prefetch.disabled && prefetch.policy == PrefetchPolicy::Blob { - bail!( - "conversion type {} conflicts with '--prefetch-policy blob'", - conversion_type - ); - } - if version != RafsVersion::V6 { - bail!( - "'--fs-version 5' conflicts with conversion type '{}', only V6 is supported", - conversion_type - ); - } - if blob_id.trim() != "" { - bail!( - "conversion type '{}' conflicts with '--blob-id'", - conversion_type - ); - } - if encrypt { - bail!( - "conversion type '{}' conflicts with '--encrypt'", - conversion_type - ) - } - } - ConversionType::TarToTarfs => { - Self::ensure_file(&source_path)?; - if matches.value_source("compressor") != Some(ValueSource::DefaultValue) - && compressor != compress::Algorithm::None - { - info!( - "only compressor `None` is supported for conversion type {}, use `None` instead of {}", - conversion_type, compressor - ); - } - if matches.value_source("digester") != Some(ValueSource::DefaultValue) - && digester != digest::Algorithm::Sha256 - { - info!( - "only SHA256 is supported for conversion type {}, use SHA256 instead of {}", - conversion_type, compressor - ); - } - compressor = compress::Algorithm::None; - digester = digest::Algorithm::Sha256; - if blob_storage.is_none() && blob_cache_storage.is_none() { - bail!("both --blob and --blob-dir or --blob-cache-dir are missing"); - } else if !prefetch.disabled && prefetch.policy == PrefetchPolicy::Blob { - bail!( - "conversion type {} conflicts with '--prefetch-policy blob'", - conversion_type - ); - } - if version != RafsVersion::V6 { - bail!( - "'--fs-version 5' conflicts with conversion type '{}', only V6 is supported", - conversion_type - ); - } - if matches.get_one::("chunk-dict").is_some() { - bail!( - "conversion type '{}' conflicts with '--chunk-dict'", - conversion_type - ); - } - if parent_path.is_some() { - bail!( - "conversion type '{}' conflicts with '--parent-bootstrap'", - conversion_type - ); - } - if blob_inline_meta { - bail!( - "conversion type '{}' conflicts with '--blob-inline-meta'", - conversion_type - ); - } - if features.is_enabled(Feature::BlobToc) { - bail!( - "conversion type '{}' conflicts with '--features blob-toc'", - conversion_type - ); - } - if aligned_chunk { - bail!( - "conversion type '{}' conflicts with '--aligned-chunk'", - conversion_type - ); - } - if encrypt { - bail!( - "conversion type '{}' conflicts with '--encrypt'", - conversion_type - ) - } - } - ConversionType::EStargzIndexToRef => { - Self::ensure_file(&source_path)?; - if matches.value_source("compressor") != Some(ValueSource::DefaultValue) - && compressor != compress::Algorithm::GZip - { - info!( - "only GZip is supported for conversion type {}, use GZip instead of {}", - conversion_type, compressor - ); - } - if matches.value_source("digester") != Some(ValueSource::DefaultValue) - && digester != digest::Algorithm::Sha256 - { - info!( - "only SHA256 is supported for conversion type {}, use SHA256 instead of {}", - conversion_type, compressor - ); - } - compressor = compress::Algorithm::GZip; - digester = digest::Algorithm::Sha256; - if blob_storage.is_some() || blob_cache_storage.is_some() { - bail!( - "conversion type '{}' conflicts with '--blob' and '--blob-cache-dir'", - conversion_type - ); - } - if version != RafsVersion::V6 { - bail!( - "'--fs-version 5' conflicts with conversion type '{}', only V6 is supported", - conversion_type - ); - } - if blob_id.trim() == "" { - bail!("'--blob-id' is missing for '--type stargz_index'"); - } - if encrypt { - bail!( - "conversion type '{}' conflicts with '--encrypt'", - conversion_type - ) - } - } - ConversionType::DirectoryToStargz - | ConversionType::TargzToStargz - | ConversionType::TarToStargz => { - unimplemented!() - } - ConversionType::DirectoryToTargz => { - unimplemented!() - } - } - - if features.is_enabled(Feature::BlobToc) && version == RafsVersion::V5 { - bail!("`--features blob-toc` can't be used with `--version 5` "); - } - - if blob_cache_storage.is_some() { - // In blob cache mode, we don't need to do any compression for the original data - compressor = compress::Algorithm::None; - } - - let mut build_ctx = BuildContext::new( - blob_id, - aligned_chunk, - blob_offset, - compressor, - digester, - !repeatable, - whiteout_spec, - conversion_type, - source_path, - prefetch, - blob_storage, - blob_inline_meta, - features, - encrypt, - ); - build_ctx.set_fs_version(version); - build_ctx.set_chunk_size(chunk_size); - build_ctx.set_batch_size(batch_size); - - let blob_cache_generator = match blob_cache_storage { - Some(storage) => Some(BlobCacheGenerator::new(storage)?), - None => None, - }; - build_ctx.blob_cache_generator = blob_cache_generator; - - let mut config = Self::get_configuration(matches)?; - if let Some(cache) = Arc::get_mut(&mut config).unwrap().cache.as_mut() { - cache.cache_validate = true; - } - config.internal.set_blob_accessible(true); - build_ctx.set_configuration(config.clone()); - - let mut blob_mgr = BlobManager::new(digester); - if let Some(chunk_dict_arg) = matches.get_one::("chunk-dict") { - let config = RafsSuperConfig { - version, - compressor, - digester, - chunk_size, - batch_size, - explicit_uidgid: !repeatable, - is_tarfs_mode: false, - }; - let rafs_config = Arc::new(build_ctx.configuration.as_ref().clone()); - // The separate chunk dict bootstrap doesn't support blob accessible. - rafs_config.internal.set_blob_accessible(false); - blob_mgr.set_chunk_dict(timing_tracer!( - { HashChunkDict::from_commandline_arg(chunk_dict_arg, rafs_config, &config,) }, - "import_chunk_dict" - )?); - } - - let mut bootstrap_mgr = if blob_inline_meta { - BootstrapManager::new(None, parent_path) - } else { - let bootstrap_path = Self::get_bootstrap_storage(matches)?; - BootstrapManager::new(Some(bootstrap_path), parent_path) - }; - - // Legality has been checked and filtered by `get_batch_size()`. - if build_ctx.batch_size > 0 { - let generator = BatchContextGenerator::new(build_ctx.batch_size)?; - build_ctx.blob_batch_generator = Some(Mutex::new(generator)); - build_ctx.blob_features.insert(BlobFeatures::BATCH); - build_ctx.blob_features.insert(BlobFeatures::CHUNK_INFO_V2); - } - - let mut builder: Box = match conversion_type { - ConversionType::DirectoryToRafs => { - if encrypt { - build_ctx.blob_features.insert(BlobFeatures::CHUNK_INFO_V2); - build_ctx.blob_features.insert(BlobFeatures::ENCRYPTED); - } - Box::new(DirectoryBuilder::new()) - } - ConversionType::EStargzIndexToRef => { - Box::new(StargzBuilder::new(blob_data_size, &build_ctx)) - } - ConversionType::EStargzToRafs - | ConversionType::TargzToRafs - | ConversionType::TarToRafs => { - if encrypt { - build_ctx.blob_features.insert(BlobFeatures::CHUNK_INFO_V2); - build_ctx.blob_features.insert(BlobFeatures::ENCRYPTED); - } - Box::new(TarballBuilder::new(conversion_type)) - } - ConversionType::EStargzToRef - | ConversionType::TargzToRef - | ConversionType::TarToRef => { - if version.is_v5() { - bail!("conversion type {} conflicts with RAFS v5", conversion_type); - } - build_ctx.blob_features.insert(BlobFeatures::CHUNK_INFO_V2); - build_ctx.blob_features.insert(BlobFeatures::SEPARATE); - Box::new(TarballBuilder::new(conversion_type)) - } - ConversionType::TarToTarfs => { - if version.is_v5() { - bail!("conversion type {} conflicts with RAFS v5", conversion_type); - } - Box::new(TarballBuilder::new(conversion_type)) - } - ConversionType::DirectoryToStargz - | ConversionType::DirectoryToTargz - | ConversionType::TarToStargz - | ConversionType::TargzToStargz => unimplemented!(), - }; - let build_output = timing_tracer!( - { - builder - .build(&mut build_ctx, &mut bootstrap_mgr, &mut blob_mgr) - .context("build failed") - }, - "total_build" - )?; - - lazy_drop(build_ctx); - - // Some operations like listing xattr pairs of certain namespace need the process - // to be privileged. Therefore, trace what euid and egid are. - event_tracer!("euid", "{}", geteuid()); - event_tracer!("egid", "{}", getegid()); - info!("successfully built RAFS filesystem: \n{}", build_output); - OutputSerializer::dump(matches, build_output, build_info, compressor, version) - } - - fn chunkdict_generate(matches: &ArgMatches, build_info: &BuildTimeInfo) -> Result<()> { - let mut build_ctx = BuildContext { - prefetch: Self::get_prefetch(matches)?, - ..Default::default() - }; - let db_url: &String = matches.get_one::("database").unwrap(); - // Save chunk and blob info to database. - let source_bootstrap_paths: Vec = matches - .get_many::("SOURCE") - .map(|paths| paths.map(PathBuf::from).collect()) - .unwrap(); - - check_bootstrap_versions_consistency(&mut build_ctx, &source_bootstrap_paths)?; - update_ctx_from_parent_bootstrap(&mut build_ctx, &source_bootstrap_paths[0])?; - - for (_, bootstrap_path) in source_bootstrap_paths.iter().enumerate() { - let path_name = bootstrap_path.as_path(); - - // Extract the image name and version name from the bootstrap directory. - let bootstrap_dir = match path_name - .parent() - .and_then(|p| p.file_name().and_then(|f| f.to_str())) - { - Some(dir_str) => dir_str.to_string(), - None => bail!("Invalid Bootstrap directory name"), - }; - let full_image_name: Vec<&str> = bootstrap_dir.split(':').collect(); - let image_name = match full_image_name.get(full_image_name.len() - 2) { - Some(&second_last) => second_last.to_string(), - None => bail!( - "Invalid image name {:?}", - full_image_name.get(full_image_name.len() - 2) - ), - }; - let image_tag = match full_image_name.last() { - Some(&last) => last.to_string(), - None => bail!("Invalid version name {:?}", full_image_name.last()), - }; - // For backward compatibility with v2.1. - let config = Self::get_configuration(matches)?; - config - .internal - .set_blob_accessible(matches.get_one::("bootstrap").is_none()); - let db_strs: Vec<&str> = db_url.split("://").collect(); - if db_strs.len() != 2 || (!db_strs[1].starts_with('/') && !db_strs[1].starts_with(':')) - { - bail!("Invalid database URL: {}", db_url); - } - match db_strs[0] { - "sqlite" => { - let mut deduplicate: Deduplicate = - Deduplicate::::new(db_strs[1])?; - deduplicate.save_metadata(bootstrap_path, config, image_name, image_tag)? - } - _ => { - bail!("Unsupported database type: {}, please use a valid database URI, such as 'sqlite:///path/to/chunkdict.db'.", db_strs[0]) - } - }; - } - info!("Chunkdict metadata is saved at: {:?}", db_url); - - // Connecting database and generating chunk dictionary by algorithm "exponential_smoothing". - let db_strs: Vec<&str> = db_url.split("://").collect(); - if db_strs.len() != 2 || (!db_strs[1].starts_with('/') && !db_strs[1].starts_with(':')) { - bail!("Invalid database URL: {}", db_url); - } - let algorithm = String::from("exponential_smoothing"); - let _source_bootstrap_paths: Vec = matches - .get_many::("SOURCE") - .map(|paths| paths.map(PathBuf::from).collect()) - .unwrap(); - - let (chunkdict_chunks, chunkdict_blobs, noise_points): ( - Vec, - Vec, - Vec, - ); - - match db_strs[0] { - "sqlite" => { - let mut algorithm: deduplicate::Algorithm = - deduplicate::Algorithm::::new(algorithm, db_strs[1])?; - let result = algorithm.chunkdict_generate()?; - chunkdict_chunks = result.0; - chunkdict_blobs = result.1; - noise_points = result.2; - } - _ => { - bail!("Unsupported database type: {}, please use a valid database URI, such as 'sqlite:///path/to/chunkdict.db'.", db_strs[0]) - } - }; - - // Output noise point in DBSCAN clustering algorithm. - info!( - "The length of chunkdict is {}", - Vec::::len(&chunkdict_chunks) - ); - info!("It is not recommended to use image deduplication"); - for image_name in noise_points { - info!("{}", image_name); - } - - // Dump chunkdict to bootstrap. - let chunkdict_bootstrap_path = Self::get_bootstrap_storage(matches)?; - let config = - Self::get_configuration(matches).context("failed to get configuration information")?; - config - .internal - .set_blob_accessible(matches.get_one::("config").is_some()); - build_ctx.configuration = config; - build_ctx.blob_storage = Some(chunkdict_bootstrap_path); - build_ctx - .blob_features - .insert(BlobFeatures::IS_CHUNKDICT_GENERATED); - build_ctx.is_chunkdict_generated = true; - - let mut blob_mgr = BlobManager::new(build_ctx.digester); - - let bootstrap_path = Self::get_bootstrap_storage(matches)?; - let mut bootstrap_mgr = BootstrapManager::new(Some(bootstrap_path), None); - - let output = Generator::generate( - &mut build_ctx, - &mut bootstrap_mgr, - &mut blob_mgr, - chunkdict_chunks, - chunkdict_blobs, - )?; - OutputSerializer::dump( - matches, - output, - build_info, - build_ctx.compressor, - build_ctx.fs_version, - ) - .unwrap(); - info!( - "Chunkdict metadata is saved at: {:?}", - matches - .get_one::("bootstrap") - .map(|s| s.as_str()) - .unwrap_or_default(), - ); - - Ok(()) - } - - fn merge(matches: &ArgMatches, build_info: &BuildTimeInfo) -> Result<()> { - let source_bootstrap_paths: Vec = matches - .get_many::("SOURCE") - .map(|paths| paths.map(PathBuf::from).collect()) - .unwrap(); - let blob_sizes: Option> = matches.get_one::("blob-sizes").map(|list| { - list.split(',') - .map(|item| { - item.trim() - .parse::() - .expect("invalid number in --blob-sizes option") - }) - .collect() - }); - let blob_digests: Option> = - matches.get_one::("blob-digests").map(|list| { - list.split(',') - .map(|item| item.trim().to_string()) - .collect() - }); - let original_blob_ids: Option> = - matches.get_one::("original-blob-ids").map(|list| { - list.split(',') - .map(|item| item.trim().to_string()) - .collect() - }); - let blob_toc_sizes: Option> = - matches.get_one::("blob-toc-sizes").map(|list| { - list.split(',') - .map(|item| { - item.trim() - .parse::() - .expect("invalid number in --blob-toc-sizes option") - }) - .collect() - }); - let blob_toc_digests: Option> = - matches.get_one::("blob-toc-digests").map(|list| { - list.split(',') - .map(|item| item.trim().to_string()) - .collect() - }); - let target_bootstrap_path = Self::get_bootstrap_storage(matches)?; - let chunk_dict_path = if let Some(arg) = matches.get_one::("chunk-dict") { - Some(parse_chunk_dict_arg(arg)?) - } else { - None - }; - let config = - Self::get_configuration(matches).context("failed to get configuration information")?; - config - .internal - .set_blob_accessible(matches.get_one::("config").is_some()); - let mut ctx = BuildContext { - prefetch: Self::get_prefetch(matches)?, - ..Default::default() - }; - ctx.configuration = config.clone(); - - let parent_bootstrap_path = Self::get_parent_bootstrap(matches)?; - let meta = RafsSuper::load_from_file(&source_bootstrap_paths[0], config.clone(), false)? - .0 - .meta; - - let output = Merger::merge( - &mut ctx, - parent_bootstrap_path, - source_bootstrap_paths, - blob_digests, - original_blob_ids, - blob_sizes, - blob_toc_digests, - blob_toc_sizes, - target_bootstrap_path, - chunk_dict_path, - config, - )?; - OutputSerializer::dump( - matches, - output, - build_info, - meta.get_compressor(), - meta.version.try_into().unwrap(), - ) - } - - fn compact(matches: &ArgMatches, build_info: &BuildTimeInfo) -> Result<()> { - let config = - Self::get_configuration(matches).context("failed to get configuration information")?; - config - .internal - .set_blob_accessible(matches.get_one::("config").is_some()); - let bootstrap_path = PathBuf::from(Self::get_bootstrap(matches)?); - let dst_bootstrap = match matches.get_one::("output-bootstrap") { - None => bootstrap_path.with_extension("bootstrap.compact"), - Some(s) => PathBuf::from(s), - }; - - let (rs, _) = RafsSuper::load_from_file(&bootstrap_path, config.clone(), false)?; - info!("load bootstrap {:?} successfully", bootstrap_path); - let chunk_dict = match matches.get_one::("chunk-dict") { - None => None, - Some(args) => Some(HashChunkDict::from_commandline_arg( - args, - config, - &rs.meta.get_config(), - )?), - }; - - let backend = Self::get_backend(matches, "compactor")?; - - let config_file_path = matches.get_one::("config").unwrap(); - let file = File::open(config_file_path) - .with_context(|| format!("failed to open config file {}", config_file_path))?; - let config = serde_json::from_reader(file) - .with_context(|| format!("invalid config file {}", config_file_path))?; - - let version = rs.meta.version.try_into().unwrap(); - let compressor = rs.meta.get_compressor(); - if let Some(build_output) = - BlobCompactor::compact(rs, dst_bootstrap, chunk_dict, backend, &config)? - { - OutputSerializer::dump(matches, build_output, build_info, compressor, version)?; - } - Ok(()) - } - - fn unpack(matches: &ArgMatches) -> Result<()> { - let bootstrap = Self::get_bootstrap(matches)?; - let config = Self::get_configuration(matches)?; - config - .internal - .set_blob_accessible(matches.get_one::("config").is_some()); - let output = matches.get_one::("output").expect("pass in output"); - if output.is_empty() { - return Err(anyhow!("invalid empty --output option")); - } - - let blob = matches.get_one::("blob").map(|s| s.as_str()); - let backend: Option> = match blob { - Some(blob_path) => { - let blob_path = PathBuf::from(blob_path); - let local_fs_conf = LocalFsConfig { - blob_file: blob_path.to_str().unwrap().to_owned(), - dir: Default::default(), - alt_dirs: Default::default(), - }; - let local_fs = LocalFs::new(&local_fs_conf, Some("unpacker")) - .with_context(|| format!("fail to create local backend for {:?}", blob_path))?; - - Some(Arc::new(local_fs)) - } - None => { - if let Some(backend) = &config.backend { - Some(BlobFactory::new_backend(&backend, "unpacker")?) - } else { - match Self::get_backend(matches, "unpacker") { - Ok(backend) => Some(backend), - Err(_) => bail!("one of `--blob`, `--blob-dir` and `--backend-config` must be specified"), - } - } - } - }; - - OCIUnpacker::new(bootstrap, backend, output) - .with_context(|| "fail to create unpacker")? - .unpack(config) - .with_context(|| "fail to unpack") - } - - fn check(matches: &ArgMatches, build_info: &BuildTimeInfo) -> Result<()> { - let bootstrap_path = Self::get_bootstrap(matches)?; - let verbose = matches.get_flag("verbose"); - let config = Self::get_configuration(matches)?; - // For backward compatibility with v2.1 - config - .internal - .set_blob_accessible(matches.get_one::("bootstrap").is_none()); - - let mut validator = Validator::new(bootstrap_path, config)?; - let (blobs, compressor, fs_version) = validator - .check(verbose) - .with_context(|| format!("failed to check bootstrap {:?}", bootstrap_path))?; - - println!("RAFS filesystem metadata is valid, referenced data blobs: "); - let mut blob_ids = Vec::new(); - for (idx, blob) in blobs.iter().enumerate() { - println!( - "\t {}: {}, compressed data size 0x{:x}, compressed file size 0x{:x}, uncompressed file size 0x{:x}, chunks: 0x{:x}, features: {}", - idx, - blob.blob_id(), - blob.compressed_data_size(), - blob.compressed_size(), - blob.uncompressed_size(), - blob.chunk_count(), - format_blob_features(blob.features()), - ); - blob_ids.push(blob.blob_id().to_string()); - } - - OutputSerializer::dump_for_check( - matches, - build_info, - blob_ids, - bootstrap_path, - compressor, - fs_version, - )?; - - Ok(()) - } - - fn inspect(matches: &ArgMatches) -> Result<()> { - let bootstrap_path = Self::get_bootstrap(matches)?; - let mut config = Self::get_configuration(matches)?; - // For backward compatibility with v2.1 - config - .internal - .set_blob_accessible(matches.get_one::("bootstrap").is_none()); - if let Some(cache) = Arc::get_mut(&mut config).unwrap().cache.as_mut() { - cache.cache_validate = true; - } - - let cmd = matches.get_one::("request"); - let mut inspector = inspect::RafsInspector::new(bootstrap_path, cmd.is_some(), config) - .map_err(|e| { - error!("failed to create inspector, {:?}", e); - e - })?; - - if let Some(c) = cmd { - let o = inspect::Executor::execute(&mut inspector, c.to_string()).unwrap(); - serde_json::to_writer(std::io::stdout(), &o) - .unwrap_or_else(|e| error!("Failed to serialize result, {:?}", e)); - } else { - inspect::Prompt::run(inspector); - } - - Ok(()) - } - - fn stat(matches: &ArgMatches) -> Result<()> { - let digester = matches - .get_one::("digester") - .map(|s| s.as_str()) - .unwrap_or_default() - .parse()?; - let mut stat = stat::ImageStat::new(digester); - let target = matches - .get_one::("target") - .map(Path::new) - .unwrap_or_else(|| Path::new("")); - let mut config = Self::get_configuration(matches)?; - if let Some(cache) = Arc::get_mut(&mut config).unwrap().cache.as_mut() { - cache.cache_validate = true; - } - config - .internal - .set_blob_accessible(matches.get_one::("config").is_some()); - - if let Some(blob) = matches.get_one::("bootstrap").map(PathBuf::from) { - stat.stat(&blob, true, config.clone())?; - } else if let Some(d) = matches.get_one::("blob-dir").map(PathBuf::from) { - Self::ensure_directory(d.clone())?; - - stat.dedup_enabled = true; - - let children = fs::read_dir(d.as_path()) - .with_context(|| format!("failed to read dir {:?}", d.as_path()))?; - let children = children.collect::, std::io::Error>>()?; - for child in children { - let path = child.path(); - if path.is_file() && path != target && path.extension().is_none() { - if let Err(e) = stat.stat(&path, true, config.clone()) { - debug!( - "failed to process {}, {}", - path.to_str().unwrap_or_default(), - e - ); - }; - } - } - } else { - bail!("one of `--bootstrap` and `--blob-dir` must be specified"); - } - - if let Some(blob) = matches.get_one::("target").map(PathBuf::from) { - stat.target_enabled = true; - stat.stat(&blob, false, config)?; - } - - stat.finalize(); - - if let Some(path) = matches.get_one::("output-json").map(PathBuf::from) { - stat.dump_json(&path)?; - } else { - stat.dump(); - } - - Ok(()) - } - - fn get_bootstrap(matches: &ArgMatches) -> Result<&Path> { - match matches.get_one::("bootstrap") { - Some(s) => Ok(Path::new(s)), - None => match matches.get_one::("BOOTSTRAP") { - Some(s) => Ok(Path::new(s)), - None => bail!("missing parameter `bootstrap` or `BOOTSTRAP`"), - }, - } - } - - fn get_bootstrap_storage(matches: &ArgMatches) -> Result { - if let Some(s) = matches.get_one::("bootstrap") { - Ok(ArtifactStorage::SingleFile(s.into())) - } else if let Some(d) = matches.get_one::("blob-dir").map(PathBuf::from) { - if !d.exists() { - bail!("Directory to store blobs does not exist") - } - Ok(ArtifactStorage::FileDir(d)) - } else { - bail!("both --bootstrap and --blob-dir are missing, please specify one to store the generated metadata blob file"); - } - } - - fn get_blob_cache_storage( - matches: &ArgMatches, - conversion_type: ConversionType, - ) -> Result> { - if let Some(p) = matches.get_one::("blob-cache-dir") { - if conversion_type == ConversionType::TarToTarfs - || conversion_type == ConversionType::EStargzIndexToRef - || conversion_type == ConversionType::EStargzToRafs - || conversion_type == ConversionType::EStargzToRef - { - bail!( - "conversion type `{}` conflicts with `--blob-cache-dir`", - conversion_type - ); - } - - if !p.exists() { - bail!("directory to store blob cache does not exist") - } - Ok(Some(ArtifactStorage::FileDir(p.to_owned()))) - } else { - Ok(None) - } - } - - // Must specify a path to blob file. - // For cli/binary interface compatibility sake, keep option `backend-config`, but - // it only receives "localfs" backend type and it will be REMOVED in the future - fn get_blob_storage( - matches: &ArgMatches, - conversion_type: ConversionType, - ) -> Result> { - // Must specify a path to blob file. - // For cli/binary interface compatibility sake, keep option `backend-config`, but - // it only receives "localfs" backend type and it will be REMOVED in the future - if conversion_type == ConversionType::EStargzIndexToRef { - Ok(None) - } else if let Some(p) = matches - .get_one::("blob") - .map(|b| ArtifactStorage::SingleFile(b.into())) - { - if conversion_type == ConversionType::TarToTarfs { - bail!( - "conversion type `{}` conflicts with `--blob`", - conversion_type - ); - } - Ok(Some(p)) - } else if let Some(d) = matches.get_one::("blob-dir").map(PathBuf::from) { - if !d.exists() { - bail!("directory to store blobs does not exist") - } - Ok(Some(ArtifactStorage::FileDir(d))) - } else if let Some(config_json) = matches.get_one::("backend-config") { - let config: serde_json::Value = serde_json::from_str(config_json).unwrap(); - warn!("using --backend-type=localfs is DEPRECATED. Use --blob-dir instead."); - if let Some(bf) = config.get("blob_file") { - // Even unwrap, it is caused by invalid json. Image creation just can't start. - let b: PathBuf = bf - .as_str() - .ok_or_else(|| anyhow!("backend-config is invalid"))? - .to_string() - .into(); - Ok(Some(ArtifactStorage::SingleFile(b))) - } else { - error!("Wrong backend config input!"); - Err(anyhow!("invalid backend config")) - } - } else { - bail!("both --blob and --blob-dir are missing, please specify one to store the generated data blob file"); - } - } - - fn get_parent_bootstrap(matches: &ArgMatches) -> Result> { - let mut parent_bootstrap_path = String::new(); - if let Some(_parent_bootstrap_path) = matches.get_one::("parent-bootstrap") { - parent_bootstrap_path = _parent_bootstrap_path.to_string(); - } - - if !parent_bootstrap_path.is_empty() { - Ok(Some(parent_bootstrap_path)) - } else { - Ok(None) - } - } - - fn get_configuration(matches: &ArgMatches) -> Result> { - let config = if let Some(config_file) = matches.get_one::("config") { - ConfigV2::from_file(config_file)? - } else if let Some(dir) = matches.get_one::("blob-dir") { - ConfigV2::new_localfs("", dir)? - } else { - ConfigV2::default() - }; - if !config.validate() { - return Err(anyhow!("invalid configuration: {:?}", config)); - } - - Ok(Arc::new(config)) - } - - fn get_backend( - matches: &ArgMatches, - blob_id: &str, - ) -> Result> { - let cfg_file = matches - .get_one::("backend-config") - .context("missing backend-config argument")?; - let cfg = ConfigV2::from_file(cfg_file)?; - let backend_cfg = cfg.get_backend_config()?; - let backend = BlobFactory::new_backend(backend_cfg, blob_id)?; - - Ok(backend) - } - - fn get_blob_id(matches: &ArgMatches) -> Result { - let mut blob_id = String::new(); - - if let Some(p_blob_id) = matches.get_one::("blob-id") { - blob_id = String::from(p_blob_id); - if blob_id.len() > BLOB_ID_MAXIMUM_LENGTH { - bail!("blob id is limited to length {}", BLOB_ID_MAXIMUM_LENGTH); - } - } - - Ok(blob_id) - } - - fn get_blob_size(matches: &ArgMatches, ty: ConversionType) -> Result { - if ty != ConversionType::EStargzIndexToRef { - return Ok(0); - } - - match matches.get_one::("blob-data-size") { - None => bail!("no value specified for '--blob-data-size'"), - Some(v) => { - let param = v.trim_start_matches("0x").trim_start_matches("0X"); - let size = u64::from_str_radix(param, 16) - .context(format!("invalid blob data size {}", v))?; - Ok(size) - } - } - } - - fn get_chunk_size(matches: &ArgMatches, ty: ConversionType) -> Result { - match matches.get_one::("chunk-size") { - None => { - if ty == ConversionType::EStargzIndexToRef { - Ok(0x400000u32) - } else { - Ok(RAFS_DEFAULT_CHUNK_SIZE as u32) - } - } - Some(v) => { - let chunk_size = if v.starts_with("0x") || v.starts_with("0X") { - u32::from_str_radix(&v[2..], 16).context(format!("invalid chunk size {}", v))? - } else { - v.parse::() - .context(format!("invalid chunk size {}", v))? - }; - if chunk_size as u64 > RAFS_MAX_CHUNK_SIZE - || chunk_size < 0x1000 - || !chunk_size.is_power_of_two() - { - bail!("invalid chunk size: {}", chunk_size); - } - Ok(chunk_size) - } - } - } - - fn get_batch_size( - matches: &ArgMatches, - version: RafsVersion, - ty: ConversionType, - chunk_size: u32, - ) -> Result { - match matches.get_one::("batch-size") { - None => Ok(0), - Some(v) => { - let batch_size = if v.starts_with("0x") || v.starts_with("0X") { - u32::from_str_radix(&v[2..], 16).context(format!("invalid batch size {}", v))? - } else { - v.parse::() - .context(format!("invalid batch size {}", v))? - }; - if batch_size > 0 { - if version.is_v5() { - bail!("`--batch-size` with non-zero value conflicts with `--fs-version 5`"); - } - match ty { - ConversionType::DirectoryToRafs - | ConversionType::EStargzToRafs - | ConversionType::TargzToRafs - | ConversionType::TarToRafs => { - if batch_size as u64 > RAFS_MAX_CHUNK_SIZE - || batch_size < 0x1000 - || !batch_size.is_power_of_two() - { - bail!("invalid batch size: {}", batch_size); - } - if batch_size > chunk_size { - bail!( - "batch size 0x{:x} is bigger than chunk size 0x{:x}", - batch_size, - chunk_size - ); - } - } - _ => bail!("unsupported ConversionType for batch chunk: {}", ty), - } - } - Ok(batch_size) - } - } - } - - fn get_prefetch(matches: &ArgMatches) -> Result { - let prefetch_policy = matches - .get_one::("prefetch-policy") - .map(|s| s.as_str()) - .unwrap_or_default() - .parse()?; - Prefetch::new(prefetch_policy) - } - - fn get_blob_offset(matches: &ArgMatches) -> Result { - match matches.get_one::("blob-offset") { - None => Ok(0), - Some(v) => v - .parse::() - .context(format!("invalid blob offset {}", v)), - } - } - - fn get_fs_version(matches: &ArgMatches) -> Result { - match matches.get_one::("fs-version") { - None => Ok(RafsVersion::V6), - Some(v) => { - let version: u32 = v.parse().context(format!("invalid fs-version: {}", v))?; - if version == 5 { - Ok(RafsVersion::V5) - } else if version == 6 { - Ok(RafsVersion::V6) - } else { - bail!("invalid fs-version: {}", v); - } - } - } - } - - fn ensure_file>(path: P) -> Result<()> { - let file_type = metadata(path.as_ref()) - .context(format!("failed to access path {:?}", path.as_ref()))? - .file_type(); - // The SOURCE can be a regular file, FIFO file, or /dev/stdin char device, etc.. - ensure!( - file_type.is_file() || file_type.is_fifo() || file_type.is_char_device(), - "specified path must be a regular/fifo/char_device file: {:?}", - path.as_ref() - ); - Ok(()) - } - - fn ensure_directory>(path: P) -> Result<()> { - let dir = metadata(path.as_ref()) - .context(format!("failed to access path {:?}", path.as_ref()))?; - ensure!( - dir.is_dir(), - "specified path must be a directory: {:?}", - path.as_ref() - ); - Ok(()) - } -} - -#[cfg(target_os = "linux")] -impl Command { - fn export(args: &ArgMatches, subargs: &ArgMatches, build_info: &BuildTimeInfo) -> Result<()> { - let subargs = nydus::SubCmdArgs::new(args, subargs); - if subargs.is_present("block") { - Self::export_block(&subargs, build_info)?; - } else { - bail!("unknown export type"); - } - Ok(()) - } - - fn export_block(subargs: &nydus::SubCmdArgs, _bti: &BuildTimeInfo) -> Result<()> { - let mut localfs_dir = None; - let mut entry = if let Some(dir) = subargs.value_of("localfs-dir") { - // Safe to unwrap because `--block` requires `--bootstrap`. - let bootstrap = subargs.value_of("bootstrap").unwrap(); - let config = format!( - r#" - {{ - "type": "bootstrap", - "id": "disk-default", - "domain_id": "block-nbd", - "config_v2": {{ - "version": 2, - "id": "block-nbd-factory", - "backend": {{ - "type": "localfs", - "localfs": {{ - "dir": "{}" - }} - }}, - "cache": {{ - "type": "filecache", - "filecache": {{ - "work_dir": "{}" - }} - }}, - "metadata_path": "{}" - }} - }}"#, - dir, dir, bootstrap - ); - localfs_dir = Some(dir.to_string()); - nydus_api::BlobCacheEntry::from_str(&config)? - } else if let Some(v) = subargs.value_of("config") { - nydus_api::BlobCacheEntry::from_file(v)? - } else { - bail!("both option `-C/--config` and `-D/--localfs-dir` are missing"); - }; - if !entry.prepare_configuration_info() { - bail!("invalid blob cache entry configuration information"); - } - if !entry.validate() { - bail!("invalid blob cache entry configuration information"); - } - - let threads: u32 = subargs - .value_of("threads") - .map(|n| n.parse().unwrap_or(1)) - .unwrap_or(1); - let output = subargs.value_of("output").map(|v| v.to_string()); - let verity = subargs.is_present("verity"); - - nydus_service::block_device::BlockDevice::export( - entry, - output, - localfs_dir, - threads, - verity, - ) - .context("failed to export RAFS filesystem as raw block device image") - } - - fn thread_validator(v: &str) -> std::result::Result { - nydus_service::validate_threads_configuration(v).map(|s| s.to_string()) - } -} - -#[cfg(test)] -mod tests { - use super::Command; - #[test] - fn test_ensure_file() { - Command::ensure_file("/dev/stdin").unwrap(); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +#![deny(warnings)] +#[macro_use(crate_authors)] +extern crate clap; +#[macro_use] +extern crate anyhow; +#[macro_use] +extern crate log; +#[macro_use] +extern crate serde_json; +#[macro_use] +extern crate lazy_static; +use crate::deduplicate::{ + check_bootstrap_versions_consistency, update_ctx_from_parent_bootstrap, Deduplicate, + SqliteDatabase, +}; +use std::convert::TryFrom; +use std::fs::{self, metadata, DirEntry, File, OpenOptions}; +use std::os::unix::fs::FileTypeExt; +use std::path::{Path, PathBuf}; +use std::sync::{Arc, Mutex}; + +use anyhow::{bail, Context, Result}; +use clap::parser::ValueSource; +use clap::{Arg, ArgAction, ArgMatches, Command as App}; +use nix::unistd::{getegid, geteuid}; +use nydus::{get_build_time_info, setup_logging}; +use nydus_api::{BuildTimeInfo, ConfigV2, LocalFsConfig}; +use nydus_builder::{ + parse_chunk_dict_arg, ArtifactStorage, BlobCacheGenerator, BlobCompactor, BlobManager, + BootstrapManager, BuildContext, BuildOutput, Builder, ChunkdictBlobInfo, ChunkdictChunkInfo, + ConversionType, DirectoryBuilder, Feature, Features, Generator, HashChunkDict, Merger, + Prefetch, PrefetchPolicy, StargzBuilder, TarballBuilder, WhiteoutSpec, +}; +use nydus_rafs::metadata::{MergeError, RafsSuper, RafsSuperConfig, RafsVersion}; +use nydus_storage::backend::localfs::LocalFs; +use nydus_storage::backend::BlobBackend; +use nydus_storage::device::BlobFeatures; +use nydus_storage::factory::BlobFactory; +use nydus_storage::meta::{format_blob_features, BatchContextGenerator}; +use nydus_storage::{RAFS_DEFAULT_CHUNK_SIZE, RAFS_MAX_CHUNK_SIZE}; +use nydus_utils::trace::{EventTracerClass, TimingTracerClass, TraceClass}; +use nydus_utils::{ + compress, digest, event_tracer, lazy_drop, register_tracer, root_tracer, timing_tracer, +}; +use serde::{Deserialize, Serialize}; + +use crate::unpack::{OCIUnpacker, Unpacker}; +use crate::validator::Validator; + +#[cfg(target_os = "linux")] +use nydus_service::ServiceArgs; +#[cfg(target_os = "linux")] +use std::str::FromStr; + +mod deduplicate; +mod inspect; +mod stat; +mod unpack; +mod validator; + +const BLOB_ID_MAXIMUM_LENGTH: usize = 255; + +#[derive(Serialize, Deserialize, Default)] +pub struct OutputSerializer { + /// The binary version of builder (nydus-image). + version: String, + /// RAFS meta data file path. + bootstrap: String, + /// Represents all blob in blob table ordered by blob index, this field + /// only include the layer that does have a blob, and should be deprecated + /// in future, use `artifacts` field to replace. + blobs: Vec, + /// Performance trace info for current build. + trace: serde_json::Map, + /// RAFS filesystem version (5 or 6). + fs_version: String, + /// Chunk compression algorithm. + compressor: String, +} + +impl OutputSerializer { + fn dump( + matches: &ArgMatches, + build_output: BuildOutput, + build_info: &BuildTimeInfo, + compressor: compress::Algorithm, + fs_version: RafsVersion, + ) -> Result<()> { + let output_json: Option = matches + .get_one::("output-json") + .map(|o| o.to_string().into()); + + if let Some(ref f) = output_json { + let w = OpenOptions::new() + .truncate(true) + .create(true) + .write(true) + .open(f) + .with_context(|| format!("can not open output file {}", f.display()))?; + let trace = root_tracer!().dump_summary_map().unwrap_or_default(); + let version = format!("{}-{}", build_info.package_ver, build_info.git_commit); + let output = Self { + version, + bootstrap: build_output.bootstrap_path.unwrap_or_default(), + blobs: build_output.blobs, + trace, + fs_version: fs_version.to_string(), + compressor: compressor.to_string(), + }; + + serde_json::to_writer_pretty(w, &output) + .context("failed to write result to output file")?; + } + + Ok(()) + } + + fn dump_for_check( + matches: &ArgMatches, + build_info: &BuildTimeInfo, + blob_ids: Vec, + bootstrap: &Path, + compressor: compress::Algorithm, + fs_version: RafsVersion, + ) -> Result<()> { + let output_json: Option = matches + .get_one::("output-json") + .map(|o| o.to_string().into()); + + if let Some(ref f) = output_json { + let w = OpenOptions::new() + .truncate(true) + .create(true) + .write(true) + .open(f) + .with_context(|| format!("can not open output file {}", f.display()))?; + let trace = root_tracer!().dump_summary_map().unwrap_or_default(); + let version = format!("{}-{}", build_info.package_ver, build_info.git_commit); + let output = Self { + version, + bootstrap: bootstrap.display().to_string(), + blobs: blob_ids, + trace, + fs_version: fs_version.to_string(), + compressor: compressor.to_string(), + }; + + serde_json::to_writer(w, &output).context("failed to write result to output file")?; + } + + Ok(()) + } +} + +fn prepare_cmd_args(bti_string: &'static str) -> App { + let arg_chunk_dict = Arg::new("chunk-dict") + .long("chunk-dict") + .help("File path of chunk dictionary for data deduplication"); + let arg_prefetch_policy = Arg::new("prefetch-policy") + .long("prefetch-policy") + .help("Set data prefetch policy") + .required(false) + .default_value("none") + .value_parser(["fs", "blob", "none"]); + let arg_output_json = Arg::new("output-json") + .long("output-json") + .short('J') + .help("File path to save operation result in JSON format"); + let arg_config = Arg::new("config") + .long("config") + .short('C') + .help("Configuration file for storage backend, cache and RAFS FUSE filesystem.") + .required(false); + + let app = App::new("") + .version(bti_string) + .author(crate_authors!()) + .about("Build, analyze, inspect or validate RAFS filesystems/Nydus accelerated container images") + .arg( + Arg::new("log-file") + .long("log-file") + .short('L') + .help("Log file path") + .required(false) + .global(true), + ) + .arg( + Arg::new("log-level") + .long("log-level") + .short('l') + .help("Log level:") + .default_value("info") + .value_parser(["trace", "debug", "info", "warn", "error"]) + .required(false) + .global(true), + ); + + let app = app.subcommand( + App::new("create") + .about("Create RAFS filesystems from directories, tar files or OCI images") + .arg( + Arg::new("SOURCE") + .help("source from which to build the RAFS filesystem") + .required(true) + .num_args(1), + ) + .arg( + Arg::new("type") + .long("type") + .short('t') + .alias("source-type") + .help("Conversion type:") + .default_value("dir-rafs") + .value_parser([ + "directory", + "dir-rafs", + "estargz-rafs", + "estargz-ref", + "estargztoc-ref", + "tar-rafs", + "tar-tarfs", + "targz-rafs", + "targz-ref", + "stargz_index", + ]) + ) + .arg( + Arg::new("bootstrap") + .long("bootstrap") + .short('B') + .help("File path to save the generated RAFS metadata blob") + .required_unless_present_any(["blob-dir", "blob-inline-meta"]) + .conflicts_with("blob-inline-meta"), + ) + .arg( + Arg::new("blob-dir") + .long("blob-dir") + .short('D') + .help("Directory path to save generated RAFS metadata and data blobs"), + ) + .arg( + Arg::new("blob") + .long("blob") + .short('b') + .help("File path to save the generated RAFS data blob") + .required_unless_present_any(["type", "blob-dir"]), + ) + .arg( + Arg::new("blob-inline-meta") + .long("blob-inline-meta") + .alias("inline-bootstrap") + .help("Inline RAFS metadata and blob metadata into the data blob") + .action(ArgAction::SetTrue) + .conflicts_with("blob-id") + .required(false), + ) + .arg( + Arg::new("blob-id") + .long("blob-id") + .required_if_eq_any([("type", "estargztoc-ref"), ("type", "stargz_index")]) + .help("OSS object id for the generated RAFS data blob") + ) + .arg( + Arg::new("blob-data-size") + .long("blob-data-size") + .help("Set data blob size for 'estargztoc-ref' conversion"), + ) + .arg( + Arg::new("blob-offset") + .long("blob-offset") + .help("File offset to store RAFS data, to support storing data blobs into tar files") + .hide(true) + .default_value("0"), + ) + .arg( + Arg::new("chunk-size") + .long("chunk-size") + .help("Set the size of data chunks, must be power of two and between 0x1000-0x1000000:") + .required(false), + ) + .arg( + Arg::new("batch-size") + .long("batch-size") + .help("Set the batch size to merge small chunks, must be power of two, between 0x1000-0x1000000 or be zero:") + .required(false) + .default_value("0"), + ) + .arg( + Arg::new("compressor") + .long("compressor") + .help("Algorithm to compress data chunks:") + .required(false) + .default_value("zstd") + .value_parser(["none", "lz4_block", "zstd"]), + ) + .arg( + Arg::new("digester") + .long("digester") + .help("Algorithm to digest data chunks:") + .required(false) + .default_value("blake3") + .value_parser(["blake3", "sha256"]), + ) + .arg( arg_config.clone() ) + .arg( + Arg::new("fs-version") + .long("fs-version") + .short('v') + .help("Set RAFS format version number:") + .default_value("6") + .value_parser(["5", "6"]), + ) + .arg( + Arg::new("features") + .long("features") + .value_parser(["blob-toc"]) + .help("Enable/disable features") + ) + .arg( + arg_chunk_dict.clone(), + ) + .arg( + Arg::new("parent-bootstrap") + .long("parent-bootstrap") + .help("File path of the parent/referenced RAFS metadata blob (optional)") + .required(false), + ) + .arg( + Arg::new("aligned-chunk") + .long("aligned-chunk") + .help("Align uncompressed data chunks to 4K, only for RAFS V5") + .action(ArgAction::SetTrue) + ) + .arg( + Arg::new("repeatable") + .long("repeatable") + .help("Generate reproducible RAFS metadata") + .action(ArgAction::SetTrue) + .required(false), + ) + .arg( + Arg::new("disable-check") + .long("disable-check") + .help("Disable RAFS metadata validation after build") + .hide(true) + .action(ArgAction::SetTrue) + .required(false) + ) + .arg( + Arg::new("whiteout-spec") + .long("whiteout-spec") + .help("Set the type of whiteout specification:") + .default_value("oci") + .value_parser(["oci", "overlayfs", "none"]) + ) + .arg( + arg_prefetch_policy.clone(), + ) + .arg( + arg_output_json.clone(), + ) + .arg( + Arg::new("encrypt") + .long("encrypt") + .short('E') + .help("Encrypt the generated RAFS metadata and data blobs") + .action(ArgAction::SetTrue) + .required(false) + ) + .arg( + Arg::new("blob-cache-dir") + .long("blob-cache-dir") + .help("Directory path to generate blob cache files ($id.blob.meta and $id.blob.data)") + .value_parser(clap::value_parser!(PathBuf)) + .conflicts_with("blob-inline-meta") + .conflicts_with("blob") + .conflicts_with("blob-dir") + .conflicts_with("compressor") + .required(false) + ) + ); + + let app = app.subcommand( + App::new("chunkdict") + .about("deduplicate RAFS filesystem metadata") + .subcommand( + App::new("generate") + .about("generate chunk dictionary based on database") + .arg( + Arg::new("database") + .long("database") + .help("Database connection address for assisting chunk dictionary generation, e.g. /path/database.db") + .default_value("sqlite:///home/runner/output/database.db") + .required(false), + ) + .arg( + Arg::new("bootstrap") + .long("bootstrap") + .short('B') + .help("Output path of nydus overlaid bootstrap"), + ) + .arg( + Arg::new("blob-dir") + .long("blob-dir") + .short('D') + .help("Directory path to save generated RAFS metadata and data blobs"), + ) + .arg(arg_prefetch_policy.clone()) + .arg(arg_output_json.clone()) + .arg(arg_config.clone()) + .arg( + Arg::new("SOURCE") + .help("bootstrap paths (allow one or more)") + .required(true) + .num_args(1..), + ) + .arg( + Arg::new("verbose") + .long("verbose") + .short('v') + .help("Output message in verbose mode") + .action(ArgAction::SetTrue) + .required(false), + ) + ) + ); + + let app = app.subcommand( + App::new("merge") + .about("Merge multiple bootstraps into a overlaid bootstrap") + .arg( + Arg::new("parent-bootstrap") + .long("parent-bootstrap") + .help("File path of the parent/referenced RAFS metadata blob (optional)") + .required(false), + ) + .arg( + Arg::new("bootstrap") + .long("bootstrap") + .short('B') + .help("Output path of nydus overlaid bootstrap"), + ) + .arg( + Arg::new("blob-dir") + .long("blob-dir") + .short('D') + .help("Directory path to save generated RAFS metadata and data blobs"), + ) + .arg(arg_chunk_dict.clone()) + .arg(arg_prefetch_policy) + .arg(arg_output_json.clone()) + .arg( + Arg::new("blob-digests") + .long("blob-digests") + .required(false) + .help("RAFS blob digest list separated by comma"), + ) + .arg( + Arg::new("original-blob-ids") + .long("original-blob-ids") + .required(false) + .help("original blob id list separated by comma, it may usually be a sha256 hex string"), + ) + .arg( + Arg::new("blob-sizes") + .long("blob-sizes") + .required(false) + .help("RAFS blob size list separated by comma"), + ) + .arg( + Arg::new("blob-toc-digests") + .long("blob-toc-digests") + .required(false) + .help("RAFS blob toc digest list separated by comma"), + ) + .arg( + Arg::new("blob-toc-sizes") + .long("blob-toc-sizes") + .required(false) + .help("RAFS blob toc size list separated by comma"), + ) + .arg(arg_config.clone()) + .arg( + Arg::new("SOURCE") + .help("bootstrap paths (allow one or more)") + .required(true) + .num_args(1..), + ), + ); + + let app = app.subcommand( + App::new("check") + .about("Validate RAFS filesystem metadata") + .arg( + Arg::new("BOOTSTRAP") + .help("File path of RAFS metadata") + .required_unless_present("bootstrap"), + ) + .arg( + Arg::new("bootstrap") + .short('B') + .long("bootstrap") + .help("[Deprecated] File path of RAFS meta blob/bootstrap") + .conflicts_with("BOOTSTRAP") + .required(false), + ) + .arg( + Arg::new("blob-dir") + .long("blob-dir") + .short('D') + .conflicts_with("config") + .help( + "Directory for localfs storage backend, hosting data blobs and cache files", + ), + ) + .arg(arg_config.clone()) + .arg( + Arg::new("verbose") + .long("verbose") + .short('v') + .help("Output message in verbose mode") + .action(ArgAction::SetTrue) + .required(false), + ) + .arg(arg_output_json.clone()), + ); + + #[cfg(target_os = "linux")] + let app = app.subcommand( + App::new("export") + .about("Export RAFS filesystems as raw block disk images or tar files") + .arg( + Arg::new("block") + .long("block") + .action(ArgAction::SetTrue) + .required(true) + .help("Export RAFS filesystems as raw block disk images") + ) + .arg( + Arg::new("bootstrap") + .long("bootstrap") + .short('B') + .help("Bootstrap of the RAFS filesystem to be exported") + .requires("localfs-dir") + ) + .arg(Arg::new("config") + .long("config") + .short('C') + .help("Configuration file containing a `BlobCacheEntry` object") + .required(false)) + .arg( + Arg::new("localfs-dir") + .long("localfs-dir") + .short('D') + .help( + "Path to the `localfs` working directory, which also enables the `localfs` storage backend" + ) + .requires("bootstrap") + .conflicts_with("config"), + ) + .arg( + Arg::new("threads") + .long("threads") + .default_value("4") + .help("Number of worker threads to execute export operation, valid values: [1-32]") + .value_parser(Command::thread_validator) + .required(false), + ) + .arg( + Arg::new("output") + .long("output") + .short('O') + .help("File path for saving the exported content") + .required_unless_present("localfs-dir") + ) + .arg( + Arg::new("verity") + .long("verity") + .help("Generate dm-verity data for block device") + .action(ArgAction::SetTrue) + .required(false) + .requires("block") + ) + ); + + let app = app.subcommand( + App::new("inspect") + .about("Inspect RAFS filesystem metadata in interactive or request mode") + .arg( + Arg::new("BOOTSTRAP") + .help("File path of RAFS metadata") + .required_unless_present("bootstrap"), + ) + .arg( + Arg::new("bootstrap") + .short('B') + .long("bootstrap") + .help("[Deprecated] File path of RAFS meta blob/bootstrap") + .conflicts_with("BOOTSTRAP") + .required(false), + ) + .arg( + Arg::new("blob-dir") + .long("blob-dir") + .short('D') + .conflicts_with("config") + .help( + "Directory for localfs storage backend, hosting data blobs and cache files", + ), + ) + .arg(arg_config.clone()) + .arg( + Arg::new("request") + .long("request") + .short('R') + .help("Inspect RAFS filesystem metadata in request mode") + .required(false), + ), + ); + + let app = app.subcommand( + App::new("stat") + .about("Generate statistics information for RAFS filesystems") + .arg( + Arg::new("bootstrap") + .long("bootstrap") + .short('B') + .help("Generate statistics information for the RAFS filesystem") + .required(false), + ) + .arg( + Arg::new("blob-dir") + .long("blob-dir") + .short('D') + .help("Generate statistics information for all RAFS filesystems in the directory") + .required(false), + ) + .arg( + Arg::new("target") + .long("target") + .short('T') + .help("Generate statistics information for the RAFS filesystem after applying chunk deduplication") + .required(false), + ) + .arg(arg_config.clone()) + .arg( + Arg::new("digester") + .long("digester") + .help("Algorithm to digest data chunks:") + .required(false) + .default_value("blake3") + .value_parser(["blake3", "sha256"]), + ) + .arg( + arg_output_json.clone(), + ) + ); + + let app = app.subcommand( + App::new("compact") + .about("(experimental)Compact specific nydus image, remove unused chunks in blobs, merge small blobs") + .arg( + Arg::new("bootstrap") + .long("bootstrap") + .short('B') + .help("bootstrap to compact") + .required(true), + ) + .arg( + Arg::new("config") + .long("config") + .short('C') + .help("config to compactor") + .required(true), + ) + .arg( + Arg::new("backend-config") + .long("backend-config") + .help("config file of backend") + .required(true), + ) + .arg( arg_chunk_dict ) + .arg( + Arg::new("output-bootstrap") + .long("output-bootstrap") + .short('O') + .help("bootstrap to output, default is source bootstrap add suffix .compact"), + ) + .arg( + arg_output_json, + ) + ); + + app.subcommand( + App::new("unpack") + .about("Unpack a RAFS filesystem to a tar file") + .arg( + Arg::new("BOOTSTRAP") + .help("File path of RAFS metadata") + .required_unless_present("bootstrap"), + ) + .arg( + Arg::new("backend-config") + .long("backend-config") + .help("config file of backend") + .required(false), + ) + .arg( + Arg::new("bootstrap") + .short('B') + .long("bootstrap") + .help("[Deprecated] File path of RAFS meta blob/bootstrap") + .conflicts_with("BOOTSTRAP") + .required(false), + ) + .arg( + Arg::new("blob") + .long("blob") + .short('b') + .help("path to RAFS data blob file") + .required(false), + ) + .arg( + Arg::new("blob-dir") + .long("blob-dir") + .short('D') + .conflicts_with("config") + .help( + "Directory for localfs storage backend, hosting data blobs and cache files", + ), + ) + .arg(arg_config) + .arg( + Arg::new("output") + .long("output") + .help("path for output tar file") + .required(true), + ), + ) +} + +fn init_log(matches: &ArgMatches) -> Result<()> { + let mut log_file = None; + if let Some(file) = matches.get_one::("log-file") { + let path = PathBuf::from(file); + log_file = Some(path); + } + + // Safe to unwrap because it has a default value and possible values are defined. + let level = matches + .get_one::("log-level") + .unwrap() + .parse() + .unwrap(); + + setup_logging(log_file, level, 0).context("failed to setup logging") +} + +lazy_static! { + static ref BTI_STRING: String = get_build_time_info().0; + static ref BTI: BuildTimeInfo = get_build_time_info().1; +} + +fn main() -> Result<()> { + let build_info = BTI.to_owned(); + let mut app = prepare_cmd_args(BTI_STRING.as_str()); + let usage = app.render_usage(); + let cmd = app.get_matches(); + + init_log(&cmd)?; + + register_tracer!(TraceClass::Timing, TimingTracerClass); + register_tracer!(TraceClass::Event, EventTracerClass); + + if let Some(matches) = cmd.subcommand_matches("create") { + Command::create(matches, &build_info) + } else if let Some(matches) = cmd.subcommand_matches("chunkdict") { + match matches.subcommand_name() { + Some("generate") => Command::chunkdict_generate( + matches.subcommand_matches("generate").unwrap(), + &build_info, + ), + _ => { + println!("{}", usage); + Ok(()) + } + } + } else if let Some(matches) = cmd.subcommand_matches("merge") { + let result = Command::merge(matches, &build_info); + if let Err(ref err) = result { + if let Some(MergeError::InconsistentFilesystem(_)) = err.downcast_ref::() { + error!("message:{}", err); + std::process::exit(2); + } + } + result + } else if let Some(matches) = cmd.subcommand_matches("check") { + Command::check(matches, &build_info) + } else if let Some(matches) = cmd.subcommand_matches("inspect") { + Command::inspect(matches) + } else if let Some(matches) = cmd.subcommand_matches("stat") { + Command::stat(matches) + } else if let Some(matches) = cmd.subcommand_matches("compact") { + Command::compact(matches, &build_info) + } else if let Some(matches) = cmd.subcommand_matches("unpack") { + Command::unpack(matches) + } else { + #[cfg(target_os = "linux")] + if let Some(matches) = cmd.subcommand_matches("export") { + Command::export(&cmd, matches, &build_info) + } else { + println!("{}", usage); + Ok(()) + } + #[cfg(not(target_os = "linux"))] + { + println!("{}", usage); + Ok(()) + } + } +} + +struct Command {} + +impl Command { + fn create(matches: &ArgMatches, build_info: &BuildTimeInfo) -> Result<()> { + let blob_id = Self::get_blob_id(matches)?; + let blob_offset = Self::get_blob_offset(matches)?; + let parent_path = Self::get_parent_bootstrap(matches)?; + let prefetch = Self::get_prefetch(matches)?; + let source_path = PathBuf::from(matches.get_one::("SOURCE").unwrap()); + let conversion_type: ConversionType = matches.get_one::("type").unwrap().parse()?; + let blob_inline_meta = matches.get_flag("blob-inline-meta"); + let repeatable = matches.get_flag("repeatable"); + let version = Self::get_fs_version(matches)?; + let chunk_size = Self::get_chunk_size(matches, conversion_type)?; + let batch_size = Self::get_batch_size(matches, version, conversion_type, chunk_size)?; + let blob_cache_storage = Self::get_blob_cache_storage(matches, conversion_type)?; + // blob-cache-dir and blob-dir/blob are a set of mutually exclusive functions, + // the former is used to generate blob cache, nydusd is directly started through blob cache, + // the latter is to generate nydus blob, as nydusd backend to start + let blob_storage = if blob_cache_storage.is_some() { + None + } else { + Self::get_blob_storage(matches, conversion_type)? + }; + + let aligned_chunk = if version.is_v6() && conversion_type != ConversionType::TarToTarfs { + true + } else { + // get_fs_version makes sure it's either v6 or v5. + matches.get_flag("aligned-chunk") + }; + let whiteout_spec: WhiteoutSpec = matches + .get_one::("whiteout-spec") + .map(|s| s.as_str()) + .unwrap_or_default() + .parse()?; + let mut compressor = matches + .get_one::("compressor") + .map(|s| s.as_str()) + .unwrap_or_default() + .parse()?; + let mut digester = matches + .get_one::("digester") + .map(|s| s.as_str()) + .unwrap_or_default() + .parse()?; + let blob_data_size = Self::get_blob_size(matches, conversion_type)?; + let features = Features::try_from( + matches + .get_one::("features") + .map(|s| s.as_str()) + .unwrap_or_default(), + )?; + let encrypt = matches.get_flag("encrypt"); + match conversion_type { + ConversionType::DirectoryToRafs => { + Self::ensure_directory(&source_path)?; + if blob_storage.is_none() && blob_cache_storage.is_none() { + bail!("both --blob and --blob-dir or --blob-cache-dir are missing"); + } + } + ConversionType::EStargzToRafs + | ConversionType::TargzToRafs + | ConversionType::TarToRafs => { + Self::ensure_file(&source_path)?; + if blob_storage.is_none() && blob_cache_storage.is_none() { + bail!("both --blob and --blob-dir or --blob-cache-dir are missing"); + } + } + ConversionType::TarToRef + | ConversionType::TargzToRef + | ConversionType::EStargzToRef => { + Self::ensure_file(&source_path)?; + if matches.value_source("compressor") != Some(ValueSource::DefaultValue) + && compressor != compress::Algorithm::GZip + { + info!( + "only GZip is supported for conversion type {}, use GZip instead of {}", + conversion_type, compressor + ); + } + if matches.value_source("digester") != Some(ValueSource::DefaultValue) + && digester != digest::Algorithm::Sha256 + { + info!( + "only SHA256 is supported for conversion type {}, use SHA256 instead of {}", + conversion_type, compressor + ); + } + compressor = compress::Algorithm::GZip; + digester = digest::Algorithm::Sha256; + if blob_storage.is_none() && blob_cache_storage.is_none() { + bail!("all of --blob, --blob-dir and --blob-cache-dir are missing"); + } else if !prefetch.disabled && prefetch.policy == PrefetchPolicy::Blob { + bail!( + "conversion type {} conflicts with '--prefetch-policy blob'", + conversion_type + ); + } + if version != RafsVersion::V6 { + bail!( + "'--fs-version 5' conflicts with conversion type '{}', only V6 is supported", + conversion_type + ); + } + if blob_id.trim() != "" { + bail!( + "conversion type '{}' conflicts with '--blob-id'", + conversion_type + ); + } + if encrypt { + bail!( + "conversion type '{}' conflicts with '--encrypt'", + conversion_type + ) + } + } + ConversionType::TarToTarfs => { + Self::ensure_file(&source_path)?; + if matches.value_source("compressor") != Some(ValueSource::DefaultValue) + && compressor != compress::Algorithm::None + { + info!( + "only compressor `None` is supported for conversion type {}, use `None` instead of {}", + conversion_type, compressor + ); + } + if matches.value_source("digester") != Some(ValueSource::DefaultValue) + && digester != digest::Algorithm::Sha256 + { + info!( + "only SHA256 is supported for conversion type {}, use SHA256 instead of {}", + conversion_type, compressor + ); + } + compressor = compress::Algorithm::None; + digester = digest::Algorithm::Sha256; + if blob_storage.is_none() && blob_cache_storage.is_none() { + bail!("both --blob and --blob-dir or --blob-cache-dir are missing"); + } else if !prefetch.disabled && prefetch.policy == PrefetchPolicy::Blob { + bail!( + "conversion type {} conflicts with '--prefetch-policy blob'", + conversion_type + ); + } + if version != RafsVersion::V6 { + bail!( + "'--fs-version 5' conflicts with conversion type '{}', only V6 is supported", + conversion_type + ); + } + if matches.get_one::("chunk-dict").is_some() { + bail!( + "conversion type '{}' conflicts with '--chunk-dict'", + conversion_type + ); + } + if parent_path.is_some() { + bail!( + "conversion type '{}' conflicts with '--parent-bootstrap'", + conversion_type + ); + } + if blob_inline_meta { + bail!( + "conversion type '{}' conflicts with '--blob-inline-meta'", + conversion_type + ); + } + if features.is_enabled(Feature::BlobToc) { + bail!( + "conversion type '{}' conflicts with '--features blob-toc'", + conversion_type + ); + } + if aligned_chunk { + bail!( + "conversion type '{}' conflicts with '--aligned-chunk'", + conversion_type + ); + } + if encrypt { + bail!( + "conversion type '{}' conflicts with '--encrypt'", + conversion_type + ) + } + } + ConversionType::EStargzIndexToRef => { + Self::ensure_file(&source_path)?; + if matches.value_source("compressor") != Some(ValueSource::DefaultValue) + && compressor != compress::Algorithm::GZip + { + info!( + "only GZip is supported for conversion type {}, use GZip instead of {}", + conversion_type, compressor + ); + } + if matches.value_source("digester") != Some(ValueSource::DefaultValue) + && digester != digest::Algorithm::Sha256 + { + info!( + "only SHA256 is supported for conversion type {}, use SHA256 instead of {}", + conversion_type, compressor + ); + } + compressor = compress::Algorithm::GZip; + digester = digest::Algorithm::Sha256; + if blob_storage.is_some() || blob_cache_storage.is_some() { + bail!( + "conversion type '{}' conflicts with '--blob' and '--blob-cache-dir'", + conversion_type + ); + } + if version != RafsVersion::V6 { + bail!( + "'--fs-version 5' conflicts with conversion type '{}', only V6 is supported", + conversion_type + ); + } + if blob_id.trim() == "" { + bail!("'--blob-id' is missing for '--type stargz_index'"); + } + if encrypt { + bail!( + "conversion type '{}' conflicts with '--encrypt'", + conversion_type + ) + } + } + ConversionType::DirectoryToStargz + | ConversionType::TargzToStargz + | ConversionType::TarToStargz => { + unimplemented!() + } + ConversionType::DirectoryToTargz => { + unimplemented!() + } + } + + if features.is_enabled(Feature::BlobToc) && version == RafsVersion::V5 { + bail!("`--features blob-toc` can't be used with `--version 5` "); + } + + if blob_cache_storage.is_some() { + // In blob cache mode, we don't need to do any compression for the original data + compressor = compress::Algorithm::None; + } + + let mut build_ctx = BuildContext::new( + blob_id, + aligned_chunk, + blob_offset, + compressor, + digester, + !repeatable, + whiteout_spec, + conversion_type, + source_path, + prefetch, + blob_storage, + blob_inline_meta, + features, + encrypt, + ); + build_ctx.set_fs_version(version); + build_ctx.set_chunk_size(chunk_size); + build_ctx.set_batch_size(batch_size); + + let blob_cache_generator = match blob_cache_storage { + Some(storage) => Some(BlobCacheGenerator::new(storage)?), + None => None, + }; + build_ctx.blob_cache_generator = blob_cache_generator; + + let mut config = Self::get_configuration(matches)?; + if let Some(cache) = Arc::get_mut(&mut config).unwrap().cache.as_mut() { + cache.cache_validate = true; + } + config.internal.set_blob_accessible(true); + build_ctx.set_configuration(config.clone()); + + let mut blob_mgr = BlobManager::new(digester); + if let Some(chunk_dict_arg) = matches.get_one::("chunk-dict") { + let config = RafsSuperConfig { + version, + compressor, + digester, + chunk_size, + batch_size, + explicit_uidgid: !repeatable, + is_tarfs_mode: false, + }; + let rafs_config = Arc::new(build_ctx.configuration.as_ref().clone()); + // The separate chunk dict bootstrap doesn't support blob accessible. + rafs_config.internal.set_blob_accessible(false); + blob_mgr.set_chunk_dict(timing_tracer!( + { HashChunkDict::from_commandline_arg(chunk_dict_arg, rafs_config, &config,) }, + "import_chunk_dict" + )?); + } + + let mut bootstrap_mgr = if blob_inline_meta { + BootstrapManager::new(None, parent_path) + } else { + let bootstrap_path = Self::get_bootstrap_storage(matches)?; + BootstrapManager::new(Some(bootstrap_path), parent_path) + }; + + // Legality has been checked and filtered by `get_batch_size()`. + if build_ctx.batch_size > 0 { + let generator = BatchContextGenerator::new(build_ctx.batch_size)?; + build_ctx.blob_batch_generator = Some(Mutex::new(generator)); + build_ctx.blob_features.insert(BlobFeatures::BATCH); + build_ctx.blob_features.insert(BlobFeatures::CHUNK_INFO_V2); + } + + let mut builder: Box = match conversion_type { + ConversionType::DirectoryToRafs => { + if encrypt { + build_ctx.blob_features.insert(BlobFeatures::CHUNK_INFO_V2); + build_ctx.blob_features.insert(BlobFeatures::ENCRYPTED); + } + Box::new(DirectoryBuilder::new()) + } + ConversionType::EStargzIndexToRef => { + Box::new(StargzBuilder::new(blob_data_size, &build_ctx)) + } + ConversionType::EStargzToRafs + | ConversionType::TargzToRafs + | ConversionType::TarToRafs => { + if encrypt { + build_ctx.blob_features.insert(BlobFeatures::CHUNK_INFO_V2); + build_ctx.blob_features.insert(BlobFeatures::ENCRYPTED); + } + Box::new(TarballBuilder::new(conversion_type)) + } + ConversionType::EStargzToRef + | ConversionType::TargzToRef + | ConversionType::TarToRef => { + if version.is_v5() { + bail!("conversion type {} conflicts with RAFS v5", conversion_type); + } + build_ctx.blob_features.insert(BlobFeatures::CHUNK_INFO_V2); + build_ctx.blob_features.insert(BlobFeatures::SEPARATE); + Box::new(TarballBuilder::new(conversion_type)) + } + ConversionType::TarToTarfs => { + if version.is_v5() { + bail!("conversion type {} conflicts with RAFS v5", conversion_type); + } + Box::new(TarballBuilder::new(conversion_type)) + } + ConversionType::DirectoryToStargz + | ConversionType::DirectoryToTargz + | ConversionType::TarToStargz + | ConversionType::TargzToStargz => unimplemented!(), + }; + let build_output = timing_tracer!( + { + builder + .build(&mut build_ctx, &mut bootstrap_mgr, &mut blob_mgr) + .context("build failed") + }, + "total_build" + )?; + + lazy_drop(build_ctx); + + // Some operations like listing xattr pairs of certain namespace need the process + // to be privileged. Therefore, trace what euid and egid are. + event_tracer!("euid", "{}", geteuid()); + event_tracer!("egid", "{}", getegid()); + info!("successfully built RAFS filesystem: \n{}", build_output); + OutputSerializer::dump(matches, build_output, build_info, compressor, version) + } + + fn chunkdict_generate(matches: &ArgMatches, build_info: &BuildTimeInfo) -> Result<()> { + let mut build_ctx = BuildContext { + prefetch: Self::get_prefetch(matches)?, + ..Default::default() + }; + let db_url: &String = matches.get_one::("database").unwrap(); + // Save chunk and blob info to database. + let source_bootstrap_paths: Vec = matches + .get_many::("SOURCE") + .map(|paths| paths.map(PathBuf::from).collect()) + .unwrap(); + + check_bootstrap_versions_consistency(&mut build_ctx, &source_bootstrap_paths)?; + update_ctx_from_parent_bootstrap(&mut build_ctx, &source_bootstrap_paths[0])?; + + for (_, bootstrap_path) in source_bootstrap_paths.iter().enumerate() { + let path_name = bootstrap_path.as_path(); + + // Extract the image name and version name from the bootstrap directory. + let bootstrap_dir = match path_name + .parent() + .and_then(|p| p.file_name().and_then(|f| f.to_str())) + { + Some(dir_str) => dir_str.to_string(), + None => bail!("Invalid Bootstrap directory name"), + }; + let full_image_name: Vec<&str> = bootstrap_dir.split(':').collect(); + let image_name = match full_image_name.get(full_image_name.len() - 2) { + Some(&second_last) => second_last.to_string(), + None => bail!( + "Invalid image name {:?}", + full_image_name.get(full_image_name.len() - 2) + ), + }; + let image_tag = match full_image_name.last() { + Some(&last) => last.to_string(), + None => bail!("Invalid version name {:?}", full_image_name.last()), + }; + // For backward compatibility with v2.1. + let config = Self::get_configuration(matches)?; + config + .internal + .set_blob_accessible(matches.get_one::("bootstrap").is_none()); + let db_strs: Vec<&str> = db_url.split("://").collect(); + if db_strs.len() != 2 || (!db_strs[1].starts_with('/') && !db_strs[1].starts_with(':')) + { + bail!("Invalid database URL: {}", db_url); + } + match db_strs[0] { + "sqlite" => { + let mut deduplicate: Deduplicate = + Deduplicate::::new(db_strs[1])?; + deduplicate.save_metadata(bootstrap_path, config, image_name, image_tag)? + } + _ => { + bail!("Unsupported database type: {}, please use a valid database URI, such as 'sqlite:///path/to/chunkdict.db'.", db_strs[0]) + } + }; + } + info!("Chunkdict metadata is saved at: {:?}", db_url); + + // Connecting database and generating chunk dictionary by algorithm "exponential_smoothing". + let db_strs: Vec<&str> = db_url.split("://").collect(); + if db_strs.len() != 2 || (!db_strs[1].starts_with('/') && !db_strs[1].starts_with(':')) { + bail!("Invalid database URL: {}", db_url); + } + let algorithm = String::from("exponential_smoothing"); + let _source_bootstrap_paths: Vec = matches + .get_many::("SOURCE") + .map(|paths| paths.map(PathBuf::from).collect()) + .unwrap(); + + let (chunkdict_chunks, chunkdict_blobs, noise_points): ( + Vec, + Vec, + Vec, + ); + + match db_strs[0] { + "sqlite" => { + let mut algorithm: deduplicate::Algorithm = + deduplicate::Algorithm::::new(algorithm, db_strs[1])?; + let result = algorithm.chunkdict_generate()?; + chunkdict_chunks = result.0; + chunkdict_blobs = result.1; + noise_points = result.2; + } + _ => { + bail!("Unsupported database type: {}, please use a valid database URI, such as 'sqlite:///path/to/chunkdict.db'.", db_strs[0]) + } + }; + + // Output noise point in DBSCAN clustering algorithm. + info!( + "The length of chunkdict is {}", + Vec::::len(&chunkdict_chunks) + ); + info!("It is not recommended to use image deduplication"); + for image_name in noise_points { + info!("{}", image_name); + } + + // Dump chunkdict to bootstrap. + let chunkdict_bootstrap_path = Self::get_bootstrap_storage(matches)?; + let config = + Self::get_configuration(matches).context("failed to get configuration information")?; + config + .internal + .set_blob_accessible(matches.get_one::("config").is_some()); + build_ctx.configuration = config; + build_ctx.blob_storage = Some(chunkdict_bootstrap_path); + build_ctx + .blob_features + .insert(BlobFeatures::IS_CHUNKDICT_GENERATED); + build_ctx.is_chunkdict_generated = true; + + let mut blob_mgr = BlobManager::new(build_ctx.digester); + + let bootstrap_path = Self::get_bootstrap_storage(matches)?; + let mut bootstrap_mgr = BootstrapManager::new(Some(bootstrap_path), None); + + let output = Generator::generate( + &mut build_ctx, + &mut bootstrap_mgr, + &mut blob_mgr, + chunkdict_chunks, + chunkdict_blobs, + )?; + OutputSerializer::dump( + matches, + output, + build_info, + build_ctx.compressor, + build_ctx.fs_version, + ) + .unwrap(); + info!( + "Chunkdict metadata is saved at: {:?}", + matches + .get_one::("bootstrap") + .map(|s| s.as_str()) + .unwrap_or_default(), + ); + + Ok(()) + } + + fn merge(matches: &ArgMatches, build_info: &BuildTimeInfo) -> Result<()> { + let source_bootstrap_paths: Vec = matches + .get_many::("SOURCE") + .map(|paths| paths.map(PathBuf::from).collect()) + .unwrap(); + let blob_sizes: Option> = matches.get_one::("blob-sizes").map(|list| { + list.split(',') + .map(|item| { + item.trim() + .parse::() + .expect("invalid number in --blob-sizes option") + }) + .collect() + }); + let blob_digests: Option> = + matches.get_one::("blob-digests").map(|list| { + list.split(',') + .map(|item| item.trim().to_string()) + .collect() + }); + let original_blob_ids: Option> = + matches.get_one::("original-blob-ids").map(|list| { + list.split(',') + .map(|item| item.trim().to_string()) + .collect() + }); + let blob_toc_sizes: Option> = + matches.get_one::("blob-toc-sizes").map(|list| { + list.split(',') + .map(|item| { + item.trim() + .parse::() + .expect("invalid number in --blob-toc-sizes option") + }) + .collect() + }); + let blob_toc_digests: Option> = + matches.get_one::("blob-toc-digests").map(|list| { + list.split(',') + .map(|item| item.trim().to_string()) + .collect() + }); + let target_bootstrap_path = Self::get_bootstrap_storage(matches)?; + let chunk_dict_path = if let Some(arg) = matches.get_one::("chunk-dict") { + Some(parse_chunk_dict_arg(arg)?) + } else { + None + }; + let config = + Self::get_configuration(matches).context("failed to get configuration information")?; + config + .internal + .set_blob_accessible(matches.get_one::("config").is_some()); + let mut ctx = BuildContext { + prefetch: Self::get_prefetch(matches)?, + ..Default::default() + }; + ctx.configuration = config.clone(); + + let parent_bootstrap_path = Self::get_parent_bootstrap(matches)?; + let meta = RafsSuper::load_from_file(&source_bootstrap_paths[0], config.clone(), false)? + .0 + .meta; + + let output = Merger::merge( + &mut ctx, + parent_bootstrap_path, + source_bootstrap_paths, + blob_digests, + original_blob_ids, + blob_sizes, + blob_toc_digests, + blob_toc_sizes, + target_bootstrap_path, + chunk_dict_path, + config, + )?; + OutputSerializer::dump( + matches, + output, + build_info, + meta.get_compressor(), + meta.version.try_into().unwrap(), + ) + } + + fn compact(matches: &ArgMatches, build_info: &BuildTimeInfo) -> Result<()> { + let config = + Self::get_configuration(matches).context("failed to get configuration information")?; + config + .internal + .set_blob_accessible(matches.get_one::("config").is_some()); + let bootstrap_path = PathBuf::from(Self::get_bootstrap(matches)?); + let dst_bootstrap = match matches.get_one::("output-bootstrap") { + None => bootstrap_path.with_extension("bootstrap.compact"), + Some(s) => PathBuf::from(s), + }; + + let (rs, _) = RafsSuper::load_from_file(&bootstrap_path, config.clone(), false)?; + info!("load bootstrap {:?} successfully", bootstrap_path); + let chunk_dict = match matches.get_one::("chunk-dict") { + None => None, + Some(args) => Some(HashChunkDict::from_commandline_arg( + args, + config, + &rs.meta.get_config(), + )?), + }; + + let backend = Self::get_backend(matches, "compactor")?; + + let config_file_path = matches.get_one::("config").unwrap(); + let file = File::open(config_file_path) + .with_context(|| format!("failed to open config file {}", config_file_path))?; + let config = serde_json::from_reader(file) + .with_context(|| format!("invalid config file {}", config_file_path))?; + + let version = rs.meta.version.try_into().unwrap(); + let compressor = rs.meta.get_compressor(); + if let Some(build_output) = + BlobCompactor::compact(rs, dst_bootstrap, chunk_dict, backend, &config)? + { + OutputSerializer::dump(matches, build_output, build_info, compressor, version)?; + } + Ok(()) + } + + fn unpack(matches: &ArgMatches) -> Result<()> { + let bootstrap = Self::get_bootstrap(matches)?; + let config = Self::get_configuration(matches)?; + config + .internal + .set_blob_accessible(matches.get_one::("config").is_some()); + let output = matches.get_one::("output").expect("pass in output"); + if output.is_empty() { + return Err(anyhow!("invalid empty --output option")); + } + + let blob = matches.get_one::("blob").map(|s| s.as_str()); + let backend: Option> = match blob { + Some(blob_path) => { + let blob_path = PathBuf::from(blob_path); + let local_fs_conf = LocalFsConfig { + blob_file: blob_path.to_str().unwrap().to_owned(), + dir: Default::default(), + alt_dirs: Default::default(), + }; + let local_fs = LocalFs::new(&local_fs_conf, Some("unpacker")) + .with_context(|| format!("fail to create local backend for {:?}", blob_path))?; + + Some(Arc::new(local_fs)) + } + None => { + if let Some(backend) = &config.backend { + Some(BlobFactory::new_backend(&backend, "unpacker")?) + } else { + match Self::get_backend(matches, "unpacker") { + Ok(backend) => Some(backend), + Err(_) => bail!("one of `--blob`, `--blob-dir` and `--backend-config` must be specified"), + } + } + } + }; + + OCIUnpacker::new(bootstrap, backend, output) + .with_context(|| "fail to create unpacker")? + .unpack(config) + .with_context(|| "fail to unpack") + } + + fn check(matches: &ArgMatches, build_info: &BuildTimeInfo) -> Result<()> { + let bootstrap_path = Self::get_bootstrap(matches)?; + let verbose = matches.get_flag("verbose"); + let config = Self::get_configuration(matches)?; + // For backward compatibility with v2.1 + config + .internal + .set_blob_accessible(matches.get_one::("bootstrap").is_none()); + + let mut validator = Validator::new(bootstrap_path, config)?; + let (blobs, compressor, fs_version) = validator + .check(verbose) + .with_context(|| format!("failed to check bootstrap {:?}", bootstrap_path))?; + + println!("RAFS filesystem metadata is valid, referenced data blobs: "); + let mut blob_ids = Vec::new(); + for (idx, blob) in blobs.iter().enumerate() { + println!( + "\t {}: {}, compressed data size 0x{:x}, compressed file size 0x{:x}, uncompressed file size 0x{:x}, chunks: 0x{:x}, features: {}", + idx, + blob.blob_id(), + blob.compressed_data_size(), + blob.compressed_size(), + blob.uncompressed_size(), + blob.chunk_count(), + format_blob_features(blob.features()), + ); + blob_ids.push(blob.blob_id().to_string()); + } + + OutputSerializer::dump_for_check( + matches, + build_info, + blob_ids, + bootstrap_path, + compressor, + fs_version, + )?; + + Ok(()) + } + + fn inspect(matches: &ArgMatches) -> Result<()> { + let bootstrap_path = Self::get_bootstrap(matches)?; + let mut config = Self::get_configuration(matches)?; + // For backward compatibility with v2.1 + config + .internal + .set_blob_accessible(matches.get_one::("bootstrap").is_none()); + if let Some(cache) = Arc::get_mut(&mut config).unwrap().cache.as_mut() { + cache.cache_validate = true; + } + + let cmd = matches.get_one::("request"); + let mut inspector = inspect::RafsInspector::new(bootstrap_path, cmd.is_some(), config) + .map_err(|e| { + error!("failed to create inspector, {:?}", e); + e + })?; + + if let Some(c) = cmd { + let o = inspect::Executor::execute(&mut inspector, c.to_string()).unwrap(); + serde_json::to_writer(std::io::stdout(), &o) + .unwrap_or_else(|e| error!("Failed to serialize result, {:?}", e)); + } else { + inspect::Prompt::run(inspector); + } + + Ok(()) + } + + fn stat(matches: &ArgMatches) -> Result<()> { + let digester = matches + .get_one::("digester") + .map(|s| s.as_str()) + .unwrap_or_default() + .parse()?; + let mut stat = stat::ImageStat::new(digester); + let target = matches + .get_one::("target") + .map(Path::new) + .unwrap_or_else(|| Path::new("")); + let mut config = Self::get_configuration(matches)?; + if let Some(cache) = Arc::get_mut(&mut config).unwrap().cache.as_mut() { + cache.cache_validate = true; + } + config + .internal + .set_blob_accessible(matches.get_one::("config").is_some()); + + if let Some(blob) = matches.get_one::("bootstrap").map(PathBuf::from) { + stat.stat(&blob, true, config.clone())?; + } else if let Some(d) = matches.get_one::("blob-dir").map(PathBuf::from) { + Self::ensure_directory(d.clone())?; + + stat.dedup_enabled = true; + + let children = fs::read_dir(d.as_path()) + .with_context(|| format!("failed to read dir {:?}", d.as_path()))?; + let children = children.collect::, std::io::Error>>()?; + for child in children { + let path = child.path(); + if path.is_file() && path != target && path.extension().is_none() { + if let Err(e) = stat.stat(&path, true, config.clone()) { + debug!( + "failed to process {}, {}", + path.to_str().unwrap_or_default(), + e + ); + }; + } + } + } else { + bail!("one of `--bootstrap` and `--blob-dir` must be specified"); + } + + if let Some(blob) = matches.get_one::("target").map(PathBuf::from) { + stat.target_enabled = true; + stat.stat(&blob, false, config)?; + } + + stat.finalize(); + + if let Some(path) = matches.get_one::("output-json").map(PathBuf::from) { + stat.dump_json(&path)?; + } else { + stat.dump(); + } + + Ok(()) + } + + fn get_bootstrap(matches: &ArgMatches) -> Result<&Path> { + match matches.get_one::("bootstrap") { + Some(s) => Ok(Path::new(s)), + None => match matches.get_one::("BOOTSTRAP") { + Some(s) => Ok(Path::new(s)), + None => bail!("missing parameter `bootstrap` or `BOOTSTRAP`"), + }, + } + } + + fn get_bootstrap_storage(matches: &ArgMatches) -> Result { + if let Some(s) = matches.get_one::("bootstrap") { + Ok(ArtifactStorage::SingleFile(s.into())) + } else if let Some(d) = matches.get_one::("blob-dir").map(PathBuf::from) { + if !d.exists() { + bail!("Directory to store blobs does not exist") + } + Ok(ArtifactStorage::FileDir(d)) + } else { + bail!("both --bootstrap and --blob-dir are missing, please specify one to store the generated metadata blob file"); + } + } + + fn get_blob_cache_storage( + matches: &ArgMatches, + conversion_type: ConversionType, + ) -> Result> { + if let Some(p) = matches.get_one::("blob-cache-dir") { + if conversion_type == ConversionType::TarToTarfs + || conversion_type == ConversionType::EStargzIndexToRef + || conversion_type == ConversionType::EStargzToRafs + || conversion_type == ConversionType::EStargzToRef + { + bail!( + "conversion type `{}` conflicts with `--blob-cache-dir`", + conversion_type + ); + } + + if !p.exists() { + bail!("directory to store blob cache does not exist") + } + Ok(Some(ArtifactStorage::FileDir(p.to_owned()))) + } else { + Ok(None) + } + } + + // Must specify a path to blob file. + // For cli/binary interface compatibility sake, keep option `backend-config`, but + // it only receives "localfs" backend type and it will be REMOVED in the future + fn get_blob_storage( + matches: &ArgMatches, + conversion_type: ConversionType, + ) -> Result> { + // Must specify a path to blob file. + // For cli/binary interface compatibility sake, keep option `backend-config`, but + // it only receives "localfs" backend type and it will be REMOVED in the future + if conversion_type == ConversionType::EStargzIndexToRef { + Ok(None) + } else if let Some(p) = matches + .get_one::("blob") + .map(|b| ArtifactStorage::SingleFile(b.into())) + { + if conversion_type == ConversionType::TarToTarfs { + bail!( + "conversion type `{}` conflicts with `--blob`", + conversion_type + ); + } + Ok(Some(p)) + } else if let Some(d) = matches.get_one::("blob-dir").map(PathBuf::from) { + if !d.exists() { + bail!("directory to store blobs does not exist") + } + Ok(Some(ArtifactStorage::FileDir(d))) + } else if let Some(config_json) = matches.get_one::("backend-config") { + let config: serde_json::Value = serde_json::from_str(config_json).unwrap(); + warn!("using --backend-type=localfs is DEPRECATED. Use --blob-dir instead."); + if let Some(bf) = config.get("blob_file") { + // Even unwrap, it is caused by invalid json. Image creation just can't start. + let b: PathBuf = bf + .as_str() + .ok_or_else(|| anyhow!("backend-config is invalid"))? + .to_string() + .into(); + Ok(Some(ArtifactStorage::SingleFile(b))) + } else { + error!("Wrong backend config input!"); + Err(anyhow!("invalid backend config")) + } + } else { + bail!("both --blob and --blob-dir are missing, please specify one to store the generated data blob file"); + } + } + + fn get_parent_bootstrap(matches: &ArgMatches) -> Result> { + let mut parent_bootstrap_path = String::new(); + if let Some(_parent_bootstrap_path) = matches.get_one::("parent-bootstrap") { + parent_bootstrap_path = _parent_bootstrap_path.to_string(); + } + + if !parent_bootstrap_path.is_empty() { + Ok(Some(parent_bootstrap_path)) + } else { + Ok(None) + } + } + + fn get_configuration(matches: &ArgMatches) -> Result> { + let config = if let Some(config_file) = matches.get_one::("config") { + ConfigV2::from_file(config_file)? + } else if let Some(dir) = matches.get_one::("blob-dir") { + ConfigV2::new_localfs("", dir)? + } else { + ConfigV2::default() + }; + if !config.validate() { + return Err(anyhow!("invalid configuration: {:?}", config)); + } + + Ok(Arc::new(config)) + } + + fn get_backend( + matches: &ArgMatches, + blob_id: &str, + ) -> Result> { + let cfg_file = matches + .get_one::("backend-config") + .context("missing backend-config argument")?; + let cfg = ConfigV2::from_file(cfg_file)?; + let backend_cfg = cfg.get_backend_config()?; + let backend = BlobFactory::new_backend(backend_cfg, blob_id)?; + + Ok(backend) + } + + fn get_blob_id(matches: &ArgMatches) -> Result { + let mut blob_id = String::new(); + + if let Some(p_blob_id) = matches.get_one::("blob-id") { + blob_id = String::from(p_blob_id); + if blob_id.len() > BLOB_ID_MAXIMUM_LENGTH { + bail!("blob id is limited to length {}", BLOB_ID_MAXIMUM_LENGTH); + } + } + + Ok(blob_id) + } + + fn get_blob_size(matches: &ArgMatches, ty: ConversionType) -> Result { + if ty != ConversionType::EStargzIndexToRef { + return Ok(0); + } + + match matches.get_one::("blob-data-size") { + None => bail!("no value specified for '--blob-data-size'"), + Some(v) => { + let param = v.trim_start_matches("0x").trim_start_matches("0X"); + let size = u64::from_str_radix(param, 16) + .context(format!("invalid blob data size {}", v))?; + Ok(size) + } + } + } + + fn get_chunk_size(matches: &ArgMatches, ty: ConversionType) -> Result { + match matches.get_one::("chunk-size") { + None => { + if ty == ConversionType::EStargzIndexToRef { + Ok(0x400000u32) + } else { + Ok(RAFS_DEFAULT_CHUNK_SIZE as u32) + } + } + Some(v) => { + let chunk_size = if v.starts_with("0x") || v.starts_with("0X") { + u32::from_str_radix(&v[2..], 16).context(format!("invalid chunk size {}", v))? + } else { + v.parse::() + .context(format!("invalid chunk size {}", v))? + }; + if chunk_size as u64 > RAFS_MAX_CHUNK_SIZE + || chunk_size < 0x1000 + || !chunk_size.is_power_of_two() + { + bail!("invalid chunk size: {}", chunk_size); + } + Ok(chunk_size) + } + } + } + + fn get_batch_size( + matches: &ArgMatches, + version: RafsVersion, + ty: ConversionType, + chunk_size: u32, + ) -> Result { + match matches.get_one::("batch-size") { + None => Ok(0), + Some(v) => { + let batch_size = if v.starts_with("0x") || v.starts_with("0X") { + u32::from_str_radix(&v[2..], 16).context(format!("invalid batch size {}", v))? + } else { + v.parse::() + .context(format!("invalid batch size {}", v))? + }; + if batch_size > 0 { + if version.is_v5() { + bail!("`--batch-size` with non-zero value conflicts with `--fs-version 5`"); + } + match ty { + ConversionType::DirectoryToRafs + | ConversionType::EStargzToRafs + | ConversionType::TargzToRafs + | ConversionType::TarToRafs => { + if batch_size as u64 > RAFS_MAX_CHUNK_SIZE + || batch_size < 0x1000 + || !batch_size.is_power_of_two() + { + bail!("invalid batch size: {}", batch_size); + } + if batch_size > chunk_size { + bail!( + "batch size 0x{:x} is bigger than chunk size 0x{:x}", + batch_size, + chunk_size + ); + } + } + _ => bail!("unsupported ConversionType for batch chunk: {}", ty), + } + } + Ok(batch_size) + } + } + } + + fn get_prefetch(matches: &ArgMatches) -> Result { + let prefetch_policy = matches + .get_one::("prefetch-policy") + .map(|s| s.as_str()) + .unwrap_or_default() + .parse()?; + Prefetch::new(prefetch_policy) + } + + fn get_blob_offset(matches: &ArgMatches) -> Result { + match matches.get_one::("blob-offset") { + None => Ok(0), + Some(v) => v + .parse::() + .context(format!("invalid blob offset {}", v)), + } + } + + fn get_fs_version(matches: &ArgMatches) -> Result { + match matches.get_one::("fs-version") { + None => Ok(RafsVersion::V6), + Some(v) => { + let version: u32 = v.parse().context(format!("invalid fs-version: {}", v))?; + if version == 5 { + Ok(RafsVersion::V5) + } else if version == 6 { + Ok(RafsVersion::V6) + } else { + bail!("invalid fs-version: {}", v); + } + } + } + } + + fn ensure_file>(path: P) -> Result<()> { + let file_type = metadata(path.as_ref()) + .context(format!("failed to access path {:?}", path.as_ref()))? + .file_type(); + // The SOURCE can be a regular file, FIFO file, or /dev/stdin char device, etc.. + ensure!( + file_type.is_file() || file_type.is_fifo() || file_type.is_char_device(), + "specified path must be a regular/fifo/char_device file: {:?}", + path.as_ref() + ); + Ok(()) + } + + fn ensure_directory>(path: P) -> Result<()> { + let dir = metadata(path.as_ref()) + .context(format!("failed to access path {:?}", path.as_ref()))?; + ensure!( + dir.is_dir(), + "specified path must be a directory: {:?}", + path.as_ref() + ); + Ok(()) + } +} + +#[cfg(target_os = "linux")] +impl Command { + fn export(args: &ArgMatches, subargs: &ArgMatches, build_info: &BuildTimeInfo) -> Result<()> { + let subargs = nydus::SubCmdArgs::new(args, subargs); + if subargs.is_present("block") { + Self::export_block(&subargs, build_info)?; + } else { + bail!("unknown export type"); + } + Ok(()) + } + + fn export_block(subargs: &nydus::SubCmdArgs, _bti: &BuildTimeInfo) -> Result<()> { + let mut localfs_dir = None; + let mut entry = if let Some(dir) = subargs.value_of("localfs-dir") { + // Safe to unwrap because `--block` requires `--bootstrap`. + let bootstrap = subargs.value_of("bootstrap").unwrap(); + let config = format!( + r#" + {{ + "type": "bootstrap", + "id": "disk-default", + "domain_id": "block-nbd", + "config_v2": {{ + "version": 2, + "id": "block-nbd-factory", + "backend": {{ + "type": "localfs", + "localfs": {{ + "dir": "{}" + }} + }}, + "cache": {{ + "type": "filecache", + "filecache": {{ + "work_dir": "{}" + }} + }}, + "metadata_path": "{}" + }} + }}"#, + dir, dir, bootstrap + ); + localfs_dir = Some(dir.to_string()); + nydus_api::BlobCacheEntry::from_str(&config)? + } else if let Some(v) = subargs.value_of("config") { + nydus_api::BlobCacheEntry::from_file(v)? + } else { + bail!("both option `-C/--config` and `-D/--localfs-dir` are missing"); + }; + if !entry.prepare_configuration_info() { + bail!("invalid blob cache entry configuration information"); + } + if !entry.validate() { + bail!("invalid blob cache entry configuration information"); + } + + let threads: u32 = subargs + .value_of("threads") + .map(|n| n.parse().unwrap_or(1)) + .unwrap_or(1); + let output = subargs.value_of("output").map(|v| v.to_string()); + let verity = subargs.is_present("verity"); + + nydus_service::block_device::BlockDevice::export( + entry, + output, + localfs_dir, + threads, + verity, + ) + .context("failed to export RAFS filesystem as raw block device image") + } + + fn thread_validator(v: &str) -> std::result::Result { + nydus_service::validate_threads_configuration(v).map(|s| s.to_string()) + } +} + +#[cfg(test)] +mod tests { + use super::Command; + #[test] + fn test_ensure_file() { + Command::ensure_file("/dev/stdin").unwrap(); + } +} diff --git a/src/bin/nydus-image/stat.rs b/src/bin/nydus-image/stat.rs index 2ee8c796d97..4eaae1649a3 100644 --- a/src/bin/nydus-image/stat.rs +++ b/src/bin/nydus-image/stat.rs @@ -1,323 +1,323 @@ -// Copyright (C) 2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::collections::HashSet; -use std::fs::OpenOptions; -use std::path::Path; -use std::sync::atomic::Ordering; -use std::sync::Arc; - -use anyhow::{Context, Result}; -use nydus_api::ConfigV2; -use nydus_builder::{ChunkDict, HashChunkDict, Tree}; -use nydus_rafs::metadata::RafsSuper; -use nydus_utils::digest; -use serde::Serialize; - -#[derive(Copy, Clone, Default, Serialize)] -struct DedupInfo { - raw_chunks: u64, - dedup_chunks: u64, - comp_content_size: u64, - comp_base_size: u64, - comp_image_size: u64, - uncomp_content_size: u64, - uncomp_base_size: u64, - uncomp_image_size: u64, -} - -#[derive(Serialize)] -struct ImageInfo { - dirs: u32, - files: u32, - symlinks: u32, - chunks: u32, - file_size: u64, - comp_size: u64, - uncomp_size: u64, - padding_size: u64, - chunk_sizes: [u32; 9], - file_sizes: Vec, - - // Number of chunks in the base image after chunk deduplication. - dedup_chunks: u64, - // Sum of compressed size of all dedup chunks. - dedup_comp_size: u64, - // Sum of uncompressed size of all dedup chunks. - dedup_uncomp_size: u64, - // Base Image: number of chunks from all source images - // Target Image: How many chunks are self-contained, not referring to base image. - own_chunks: u64, - // Sum of compressed size of all owned chunks. - own_comp_size: u64, - // Sum of uncompressed size of all owned chunks. - own_uncomp_size: u64, - // How many chunks of the target image are referring to the base image. - ref_chunks: u64, - // Sum of compressed size of all reference chunks. - ref_comp_size: u64, - // Sum of uncompressed size of all reference chunks. - ref_uncomp_size: u64, -} - -impl ImageInfo { - fn new() -> Self { - ImageInfo { - dirs: 0, - files: 0, - symlinks: 0, - chunks: 0, - file_size: 0, - padding_size: 0, - comp_size: 0, - uncomp_size: 0, - chunk_sizes: [0; 9], - file_sizes: vec![0; 45], - dedup_chunks: 0, - dedup_comp_size: 0, - dedup_uncomp_size: 0, - own_chunks: 0, - own_comp_size: 0, - own_uncomp_size: 0, - ref_chunks: 0, - ref_comp_size: 0, - ref_uncomp_size: 0, - } - } - - fn dump(&self) { - println!( - r#" -Directories: {dirs} -Files: {files} -Symlinks: {symlinks} -Chunks: {chunks} -File Size: {file_size} -Padding Size: {padding_size} -Uncompressed Size: {uncomp_size} -Compressed Size: {comp_size}"#, - dirs = self.dirs, - files = self.files, - symlinks = self.symlinks, - chunks = self.chunks, - file_size = self.file_size, - padding_size = self.padding_size, - uncomp_size = self.uncomp_size, - comp_size = self.comp_size, - ); - - println!("\nFile Size Bits:\t\tFile Count:"); - for sz in 0..=44 { - println!("{}:\t\t\t{}", sz, self.file_sizes[sz]); - } - - println!("\nChunk Size Bits:\tChunk Count:"); - for sz in 12..=20 { - println!("{}:\t\t\t{}", sz, self.chunk_sizes[sz - 12]); - } - - println!("\nRaw Content Size:\t{}", self.file_size); - println!("Comp Content Size:\t{}", self.comp_size); - println!("Raw Chunk Count:\t{}", self.chunks); - println!("Dedup Comp Size:\t{}", self.dedup_comp_size); - println!("Dedup Uncomp Size:\t{}", self.dedup_uncomp_size); - println!("Dedup Chunk Count:\t{}", self.dedup_chunks); - println!("Owned Comp Size:\t{}", self.own_comp_size); - println!("Owned Uncomp Size:\t{}", self.own_uncomp_size); - println!("Owned Chunk Count:\t{}", self.own_chunks); - println!("Referenced Comp Size:\t{}", self.ref_comp_size); - println!("Referenced Uncomp Size:\t{}", self.ref_uncomp_size); - println!("Referenced Chunk Count:\t{}", self.ref_chunks); - } -} - -#[derive(Serialize)] -pub(crate) struct ImageStat { - pub dedup_enabled: bool, - pub target_enabled: bool, - - base_image: ImageInfo, - target_image: ImageInfo, - #[serde(skip)] - dedup_dict: HashChunkDict, - #[serde(skip)] - dedup_info: [DedupInfo; 20], -} - -impl ImageStat { - pub fn new(digester: digest::Algorithm) -> Self { - ImageStat { - dedup_enabled: false, - target_enabled: false, - - base_image: ImageInfo::new(), - target_image: ImageInfo::new(), - dedup_dict: HashChunkDict::new(digester), - dedup_info: [Default::default(); 20], - } - } - - pub fn stat(&mut self, path: &Path, is_base: bool, config: Arc) -> Result<()> { - let (rs, _) = RafsSuper::load_from_file(path, config, false)?; - let mut dict = HashChunkDict::new(rs.meta.get_digester()); - let mut hardlinks = HashSet::new(); - let tree = - Tree::from_bootstrap(&rs, &mut dict).context("failed to load bootstrap for stats")?; - let image = if is_base { - &mut self.base_image - } else { - &mut self.target_image - }; - - let pre = &mut |t: &Tree| -> Result<()> { - let node = t.lock_node(); - if node.is_reg() { - image.files += 1; - if node.is_hardlink() { - if hardlinks.contains(&node.inode.ino()) { - return Ok(()); - } - hardlinks.insert(node.inode.ino()); - } - let file_size = node.inode.size(); - let idx = std::cmp::min((64 - file_size.leading_zeros()) as usize, 44); - image.file_sizes[idx] += 1; - image.file_size += file_size; - image.padding_size += ((file_size + 0xfff) & !0xfff) - file_size; - - image.chunks += node.chunks.len() as u32; - for chunk in node.chunks.iter() { - image.comp_size += chunk.inner.compressed_size() as u64; - image.uncomp_size += chunk.inner.uncompressed_size() as u64; - } - - for sz in 12..=20 { - match node.chunk_count(1 << sz) { - Ok(v) => image.chunk_sizes[sz - 12] += v, - Err(e) => error!("failed to get chunk size of inode, {}", e), - } - } - } else if node.is_dir() { - image.dirs += 1; - } else if node.is_symlink() { - image.symlinks += 1; - } - Ok(()) - }; - tree.walk_dfs_pre(pre)?; - - if is_base { - for entry in dict.hashmap().values() { - image.own_chunks += 1; - image.own_comp_size += entry.0.compressed_size() as u64; - image.own_uncomp_size += entry.0.uncompressed_size() as u64; - self.dedup_dict - .add_chunk(entry.0.clone(), rs.meta.get_digester()); - } - } else { - for entry in dict.hashmap().values() { - if self - .dedup_dict - .get_chunk(entry.0.id(), entry.0.uncompressed_size()) - .is_some() - { - image.ref_chunks += 1; - image.ref_comp_size += entry.0.compressed_size() as u64; - image.ref_uncomp_size += entry.0.uncompressed_size() as u64; - } else { - image.own_chunks += 1; - image.own_comp_size += entry.0.compressed_size() as u64; - image.own_uncomp_size += entry.0.uncompressed_size() as u64; - } - } - } - - Ok(()) - } - - pub fn finalize(&mut self) { - self.base_image.uncomp_size += self.base_image.padding_size; - - if self.target_enabled { - self.target_image.uncomp_size += self.target_image.padding_size; - } - - if self.dedup_enabled { - for entry in self.dedup_dict.hashmap().values() { - let count = entry.1.load(Ordering::Relaxed); - let thresh = std::cmp::min(self.dedup_info.len(), count as usize); - for idx in 0..thresh { - let info = &mut self.dedup_info[idx]; - info.raw_chunks += count as u64; - info.dedup_chunks += 1; - info.uncomp_content_size += count as u64 * entry.0.uncompressed_size() as u64; - info.comp_content_size += count as u64 * entry.0.compressed_size() as u64; - info.uncomp_base_size += entry.0.uncompressed_size() as u64; - info.comp_base_size += entry.0.compressed_size() as u64; - } - if thresh < self.dedup_info.len() { - for idx in thresh..self.dedup_info.len() { - let info = &mut self.dedup_info[idx]; - info.raw_chunks += count as u64; - info.dedup_chunks += count as u64; - info.uncomp_content_size += - count as u64 * entry.0.uncompressed_size() as u64; - info.comp_content_size += count as u64 * entry.0.compressed_size() as u64; - info.uncomp_image_size += count as u64 * entry.0.uncompressed_size() as u64; - info.comp_image_size += count as u64 * entry.0.compressed_size() as u64; - } - } - - self.base_image.dedup_chunks += 1; - self.base_image.dedup_comp_size += entry.0.compressed_size() as u64; - self.base_image.dedup_uncomp_size += entry.0.uncompressed_size() as u64; - } - } - } - - pub fn dump_json(&self, path: &Path) -> Result<()> { - let w = OpenOptions::new() - .truncate(true) - .create(true) - .write(true) - .open(path) - .with_context(|| format!("Output file {:?} can't be opened", path))?; - - serde_json::to_writer(w, self).context("Write output file failed")?; - - Ok(()) - } - - pub fn dump(&self) { - if self.target_enabled { - println!("Target Image Statistics:"); - self.target_image.dump(); - } - - println!("\n\nBase Image Statistics:"); - self.base_image.dump(); - - if self.dedup_enabled { - println!("\n\nChunk Deduplication Statistics:"); - println!("Global Dedup Thresh:\tRaw Chunks:\tDedup Chunks:\tComp Content Size:\tComp Base Size:\tComp Image Size:\tUncomp Content Size:\tUncomp Base Size\tUncomp Image Size"); - for (idx, info) in self.dedup_info.iter().enumerate() { - if info.dedup_chunks == 0 { - break; - } - println!( - "{:<24}0x{:<14x}0x{:<14x}0x{:<14x}0x{:<14x}0x{:<14x}0x{:<14x}0x{:<14x}0x{:<14x}", - idx + 1, - info.raw_chunks, - info.dedup_chunks, - info.comp_content_size, - info.comp_base_size, - info.comp_image_size, - info.uncomp_content_size, - info.uncomp_base_size, - info.uncomp_image_size, - ); - } - } - } -} +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::collections::HashSet; +use std::fs::OpenOptions; +use std::path::Path; +use std::sync::atomic::Ordering; +use std::sync::Arc; + +use anyhow::{Context, Result}; +use nydus_api::ConfigV2; +use nydus_builder::{ChunkDict, HashChunkDict, Tree}; +use nydus_rafs::metadata::RafsSuper; +use nydus_utils::digest; +use serde::Serialize; + +#[derive(Copy, Clone, Default, Serialize)] +struct DedupInfo { + raw_chunks: u64, + dedup_chunks: u64, + comp_content_size: u64, + comp_base_size: u64, + comp_image_size: u64, + uncomp_content_size: u64, + uncomp_base_size: u64, + uncomp_image_size: u64, +} + +#[derive(Serialize)] +struct ImageInfo { + dirs: u32, + files: u32, + symlinks: u32, + chunks: u32, + file_size: u64, + comp_size: u64, + uncomp_size: u64, + padding_size: u64, + chunk_sizes: [u32; 9], + file_sizes: Vec, + + // Number of chunks in the base image after chunk deduplication. + dedup_chunks: u64, + // Sum of compressed size of all dedup chunks. + dedup_comp_size: u64, + // Sum of uncompressed size of all dedup chunks. + dedup_uncomp_size: u64, + // Base Image: number of chunks from all source images + // Target Image: How many chunks are self-contained, not referring to base image. + own_chunks: u64, + // Sum of compressed size of all owned chunks. + own_comp_size: u64, + // Sum of uncompressed size of all owned chunks. + own_uncomp_size: u64, + // How many chunks of the target image are referring to the base image. + ref_chunks: u64, + // Sum of compressed size of all reference chunks. + ref_comp_size: u64, + // Sum of uncompressed size of all reference chunks. + ref_uncomp_size: u64, +} + +impl ImageInfo { + fn new() -> Self { + ImageInfo { + dirs: 0, + files: 0, + symlinks: 0, + chunks: 0, + file_size: 0, + padding_size: 0, + comp_size: 0, + uncomp_size: 0, + chunk_sizes: [0; 9], + file_sizes: vec![0; 45], + dedup_chunks: 0, + dedup_comp_size: 0, + dedup_uncomp_size: 0, + own_chunks: 0, + own_comp_size: 0, + own_uncomp_size: 0, + ref_chunks: 0, + ref_comp_size: 0, + ref_uncomp_size: 0, + } + } + + fn dump(&self) { + println!( + r#" +Directories: {dirs} +Files: {files} +Symlinks: {symlinks} +Chunks: {chunks} +File Size: {file_size} +Padding Size: {padding_size} +Uncompressed Size: {uncomp_size} +Compressed Size: {comp_size}"#, + dirs = self.dirs, + files = self.files, + symlinks = self.symlinks, + chunks = self.chunks, + file_size = self.file_size, + padding_size = self.padding_size, + uncomp_size = self.uncomp_size, + comp_size = self.comp_size, + ); + + println!("\nFile Size Bits:\t\tFile Count:"); + for sz in 0..=44 { + println!("{}:\t\t\t{}", sz, self.file_sizes[sz]); + } + + println!("\nChunk Size Bits:\tChunk Count:"); + for sz in 12..=20 { + println!("{}:\t\t\t{}", sz, self.chunk_sizes[sz - 12]); + } + + println!("\nRaw Content Size:\t{}", self.file_size); + println!("Comp Content Size:\t{}", self.comp_size); + println!("Raw Chunk Count:\t{}", self.chunks); + println!("Dedup Comp Size:\t{}", self.dedup_comp_size); + println!("Dedup Uncomp Size:\t{}", self.dedup_uncomp_size); + println!("Dedup Chunk Count:\t{}", self.dedup_chunks); + println!("Owned Comp Size:\t{}", self.own_comp_size); + println!("Owned Uncomp Size:\t{}", self.own_uncomp_size); + println!("Owned Chunk Count:\t{}", self.own_chunks); + println!("Referenced Comp Size:\t{}", self.ref_comp_size); + println!("Referenced Uncomp Size:\t{}", self.ref_uncomp_size); + println!("Referenced Chunk Count:\t{}", self.ref_chunks); + } +} + +#[derive(Serialize)] +pub(crate) struct ImageStat { + pub dedup_enabled: bool, + pub target_enabled: bool, + + base_image: ImageInfo, + target_image: ImageInfo, + #[serde(skip)] + dedup_dict: HashChunkDict, + #[serde(skip)] + dedup_info: [DedupInfo; 20], +} + +impl ImageStat { + pub fn new(digester: digest::Algorithm) -> Self { + ImageStat { + dedup_enabled: false, + target_enabled: false, + + base_image: ImageInfo::new(), + target_image: ImageInfo::new(), + dedup_dict: HashChunkDict::new(digester), + dedup_info: [Default::default(); 20], + } + } + + pub fn stat(&mut self, path: &Path, is_base: bool, config: Arc) -> Result<()> { + let (rs, _) = RafsSuper::load_from_file(path, config, false)?; + let mut dict = HashChunkDict::new(rs.meta.get_digester()); + let mut hardlinks = HashSet::new(); + let tree = + Tree::from_bootstrap(&rs, &mut dict).context("failed to load bootstrap for stats")?; + let image = if is_base { + &mut self.base_image + } else { + &mut self.target_image + }; + + let pre = &mut |t: &Tree| -> Result<()> { + let node = t.lock_node(); + if node.is_reg() { + image.files += 1; + if node.is_hardlink() { + if hardlinks.contains(&node.inode.ino()) { + return Ok(()); + } + hardlinks.insert(node.inode.ino()); + } + let file_size = node.inode.size(); + let idx = std::cmp::min((64 - file_size.leading_zeros()) as usize, 44); + image.file_sizes[idx] += 1; + image.file_size += file_size; + image.padding_size += ((file_size + 0xfff) & !0xfff) - file_size; + + image.chunks += node.chunks.len() as u32; + for chunk in node.chunks.iter() { + image.comp_size += chunk.inner.compressed_size() as u64; + image.uncomp_size += chunk.inner.uncompressed_size() as u64; + } + + for sz in 12..=20 { + match node.chunk_count(1 << sz) { + Ok(v) => image.chunk_sizes[sz - 12] += v, + Err(e) => error!("failed to get chunk size of inode, {}", e), + } + } + } else if node.is_dir() { + image.dirs += 1; + } else if node.is_symlink() { + image.symlinks += 1; + } + Ok(()) + }; + tree.walk_dfs_pre(pre)?; + + if is_base { + for entry in dict.hashmap().values() { + image.own_chunks += 1; + image.own_comp_size += entry.0.compressed_size() as u64; + image.own_uncomp_size += entry.0.uncompressed_size() as u64; + self.dedup_dict + .add_chunk(entry.0.clone(), rs.meta.get_digester()); + } + } else { + for entry in dict.hashmap().values() { + if self + .dedup_dict + .get_chunk(entry.0.id(), entry.0.uncompressed_size()) + .is_some() + { + image.ref_chunks += 1; + image.ref_comp_size += entry.0.compressed_size() as u64; + image.ref_uncomp_size += entry.0.uncompressed_size() as u64; + } else { + image.own_chunks += 1; + image.own_comp_size += entry.0.compressed_size() as u64; + image.own_uncomp_size += entry.0.uncompressed_size() as u64; + } + } + } + + Ok(()) + } + + pub fn finalize(&mut self) { + self.base_image.uncomp_size += self.base_image.padding_size; + + if self.target_enabled { + self.target_image.uncomp_size += self.target_image.padding_size; + } + + if self.dedup_enabled { + for entry in self.dedup_dict.hashmap().values() { + let count = entry.1.load(Ordering::Relaxed); + let thresh = std::cmp::min(self.dedup_info.len(), count as usize); + for idx in 0..thresh { + let info = &mut self.dedup_info[idx]; + info.raw_chunks += count as u64; + info.dedup_chunks += 1; + info.uncomp_content_size += count as u64 * entry.0.uncompressed_size() as u64; + info.comp_content_size += count as u64 * entry.0.compressed_size() as u64; + info.uncomp_base_size += entry.0.uncompressed_size() as u64; + info.comp_base_size += entry.0.compressed_size() as u64; + } + if thresh < self.dedup_info.len() { + for idx in thresh..self.dedup_info.len() { + let info = &mut self.dedup_info[idx]; + info.raw_chunks += count as u64; + info.dedup_chunks += count as u64; + info.uncomp_content_size += + count as u64 * entry.0.uncompressed_size() as u64; + info.comp_content_size += count as u64 * entry.0.compressed_size() as u64; + info.uncomp_image_size += count as u64 * entry.0.uncompressed_size() as u64; + info.comp_image_size += count as u64 * entry.0.compressed_size() as u64; + } + } + + self.base_image.dedup_chunks += 1; + self.base_image.dedup_comp_size += entry.0.compressed_size() as u64; + self.base_image.dedup_uncomp_size += entry.0.uncompressed_size() as u64; + } + } + } + + pub fn dump_json(&self, path: &Path) -> Result<()> { + let w = OpenOptions::new() + .truncate(true) + .create(true) + .write(true) + .open(path) + .with_context(|| format!("Output file {:?} can't be opened", path))?; + + serde_json::to_writer(w, self).context("Write output file failed")?; + + Ok(()) + } + + pub fn dump(&self) { + if self.target_enabled { + println!("Target Image Statistics:"); + self.target_image.dump(); + } + + println!("\n\nBase Image Statistics:"); + self.base_image.dump(); + + if self.dedup_enabled { + println!("\n\nChunk Deduplication Statistics:"); + println!("Global Dedup Thresh:\tRaw Chunks:\tDedup Chunks:\tComp Content Size:\tComp Base Size:\tComp Image Size:\tUncomp Content Size:\tUncomp Base Size\tUncomp Image Size"); + for (idx, info) in self.dedup_info.iter().enumerate() { + if info.dedup_chunks == 0 { + break; + } + println!( + "{:<24}0x{:<14x}0x{:<14x}0x{:<14x}0x{:<14x}0x{:<14x}0x{:<14x}0x{:<14x}0x{:<14x}", + idx + 1, + info.raw_chunks, + info.dedup_chunks, + info.comp_content_size, + info.comp_base_size, + info.comp_image_size, + info.uncomp_content_size, + info.uncomp_base_size, + info.uncomp_image_size, + ); + } + } + } +} diff --git a/src/bin/nydus-image/unpack/mod.rs b/src/bin/nydus-image/unpack/mod.rs index 81587172c7c..da630880715 100644 --- a/src/bin/nydus-image/unpack/mod.rs +++ b/src/bin/nydus-image/unpack/mod.rs @@ -1,232 +1,232 @@ -// Copyright 2022 Ant Group. All rights reserved. -// Copyright (C) 2022 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 -use std::collections::HashMap; -use std::fs::{File, OpenOptions}; -use std::io::Read; -use std::path::{Path, PathBuf}; -use std::rc::Rc; -use std::str; -use std::sync::Arc; - -use anyhow::{Context, Result}; -use nydus_api::ConfigV2; -use nydus_rafs::{ - metadata::{RafsInodeExt, RafsSuper}, - RafsIterator, -}; -use nydus_storage::backend::BlobBackend; -use nydus_storage::device::BlobInfo; -use tar::{Builder, Header}; - -use self::pax::{ - OCIBlockBuilder, OCICharBuilder, OCIDirBuilder, OCIFifoBuilder, OCILinkBuilder, OCIRegBuilder, - OCISocketBuilder, OCISymlinkBuilder, PAXExtensionSectionBuilder, PAXLinkBuilder, - PAXSpecialSectionBuilder, -}; - -mod pax; - -pub trait Unpacker { - fn unpack(&self, config: Arc) -> Result<()>; -} - -/// A unpacker with the ability to convert bootstrap file and blob file to tar -pub struct OCIUnpacker { - bootstrap: PathBuf, - blob_backend: Option>, - output: PathBuf, - - builder_factory: OCITarBuilderFactory, -} - -impl OCIUnpacker { - pub fn new( - bootstrap: &Path, - blob_backend: Option>, - output: &str, - ) -> Result { - let bootstrap = bootstrap.to_path_buf(); - let output = PathBuf::from(output); - - let builder_factory = OCITarBuilderFactory::new(); - - Ok(OCIUnpacker { - builder_factory, - bootstrap, - blob_backend, - output, - }) - } - - fn load_rafs(&self, config: Arc) -> Result { - let (rs, _) = RafsSuper::load_from_file(self.bootstrap.as_path(), config, false)?; - Ok(rs) - } -} - -impl Unpacker for OCIUnpacker { - fn unpack(&self, config: Arc) -> Result<()> { - debug!( - "oci unpacker, bootstrap file: {:?}, output file: {:?}", - self.bootstrap, self.output - ); - - let rafs = self.load_rafs(config)?; - - let mut builder = self - .builder_factory - .create(&rafs, &self.blob_backend, &self.output)?; - - for (node, path) in RafsIterator::new(&rafs) { - builder.append(node, &path)?; - } - - Ok(()) - } -} - -trait TarBuilder { - fn append(&mut self, node: Arc, path: &Path) -> Result<()>; -} - -struct TarSection { - header: Header, - data: Box, -} - -trait SectionBuilder { - fn can_handle(&mut self, inode: Arc, path: &Path) -> bool; - fn build(&self, inode: Arc, path: &Path) -> Result>; -} - -struct OCITarBuilderFactory {} - -impl OCITarBuilderFactory { - fn new() -> Self { - OCITarBuilderFactory {} - } - - fn create( - &self, - meta: &RafsSuper, - blob_backend: &Option>, - output_path: &Path, - ) -> Result> { - let writer = self.create_writer(output_path)?; - - let builders = self.create_builders(meta, blob_backend)?; - - let builder = OCITarBuilder::new(builders, writer); - - Ok(Box::new(builder) as Box) - } - - fn create_writer(&self, output_path: &Path) -> Result> { - let builder = Builder::new( - OpenOptions::new() - .write(true) - .create(true) - .truncate(true) - .read(false) - .open(output_path) - .with_context(|| format!("fail to open output file {:?}", output_path))?, - ); - - Ok(builder) - } - - fn create_builders( - &self, - meta: &RafsSuper, - blob_backend: &Option>, - ) -> Result>> { - // PAX basic builders - let ext_builder = Rc::new(PAXExtensionSectionBuilder::new()); - let link_builder = Rc::new(PAXLinkBuilder::new(ext_builder.clone())); - let special_builder = Rc::new(PAXSpecialSectionBuilder::new(ext_builder.clone())); - - // OCI builders - let sock_builder = OCISocketBuilder::new(); - let hard_link_builder = OCILinkBuilder::new(link_builder.clone()); - let symlink_builder = OCISymlinkBuilder::new(link_builder); - let dir_builder = OCIDirBuilder::new(ext_builder); - let fifo_builder = OCIFifoBuilder::new(special_builder.clone()); - let char_builder = OCICharBuilder::new(special_builder.clone()); - let block_builder = OCIBlockBuilder::new(special_builder); - let blobs = meta.superblock.get_blob_infos(); - let reg_builder = self.create_reg_builder(blobs, blob_backend)?; - - // The order counts. - let builders = vec![ - Box::new(sock_builder) as Box, - Box::new(hard_link_builder), - Box::new(dir_builder), - Box::new(reg_builder), - Box::new(symlink_builder), - Box::new(fifo_builder), - Box::new(char_builder), - Box::new(block_builder), - ]; - - Ok(builders) - } - - fn create_reg_builder( - &self, - blobs: Vec>, - blob_backend: &Option>, - ) -> Result { - let mut readers = HashMap::new(); - let mut compressors = HashMap::new(); - for blob in blobs { - let blob_backend = blob_backend - .as_deref() - .with_context(|| "both blob path or blob backend config are not specified")?; - let reader = blob_backend - .get_reader(blob.blob_id().as_str()) - .map_err(|err| anyhow!("fail to get reader, error {:?}", err))?; - - let compressor = blob.compressor(); - readers.insert(blob.blob_index(), reader); - compressors.insert(blob.blob_index(), compressor); - } - - Ok(OCIRegBuilder::new( - Rc::new(PAXExtensionSectionBuilder::new()), - readers, - compressors, - )) - } -} - -struct OCITarBuilder { - writer: Builder, - builders: Vec>, -} - -impl OCITarBuilder { - fn new(builders: Vec>, writer: Builder) -> Self { - Self { builders, writer } - } -} - -impl TarBuilder for OCITarBuilder { - fn append(&mut self, inode: Arc, path: &Path) -> Result<()> { - for builder in &mut self.builders { - // Useless one, just go !!!!! - if !builder.can_handle(inode.clone(), path) { - continue; - } - - for sect in builder.build(inode.clone(), path)? { - self.writer.append(§.header, sect.data)?; - } - - return Ok(()); - } - - bail!("node {:?} can not be unpacked", path) - } -} +// Copyright 2022 Ant Group. All rights reserved. +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 +use std::collections::HashMap; +use std::fs::{File, OpenOptions}; +use std::io::Read; +use std::path::{Path, PathBuf}; +use std::rc::Rc; +use std::str; +use std::sync::Arc; + +use anyhow::{Context, Result}; +use nydus_api::ConfigV2; +use nydus_rafs::{ + metadata::{RafsInodeExt, RafsSuper}, + RafsIterator, +}; +use nydus_storage::backend::BlobBackend; +use nydus_storage::device::BlobInfo; +use tar::{Builder, Header}; + +use self::pax::{ + OCIBlockBuilder, OCICharBuilder, OCIDirBuilder, OCIFifoBuilder, OCILinkBuilder, OCIRegBuilder, + OCISocketBuilder, OCISymlinkBuilder, PAXExtensionSectionBuilder, PAXLinkBuilder, + PAXSpecialSectionBuilder, +}; + +mod pax; + +pub trait Unpacker { + fn unpack(&self, config: Arc) -> Result<()>; +} + +/// A unpacker with the ability to convert bootstrap file and blob file to tar +pub struct OCIUnpacker { + bootstrap: PathBuf, + blob_backend: Option>, + output: PathBuf, + + builder_factory: OCITarBuilderFactory, +} + +impl OCIUnpacker { + pub fn new( + bootstrap: &Path, + blob_backend: Option>, + output: &str, + ) -> Result { + let bootstrap = bootstrap.to_path_buf(); + let output = PathBuf::from(output); + + let builder_factory = OCITarBuilderFactory::new(); + + Ok(OCIUnpacker { + builder_factory, + bootstrap, + blob_backend, + output, + }) + } + + fn load_rafs(&self, config: Arc) -> Result { + let (rs, _) = RafsSuper::load_from_file(self.bootstrap.as_path(), config, false)?; + Ok(rs) + } +} + +impl Unpacker for OCIUnpacker { + fn unpack(&self, config: Arc) -> Result<()> { + debug!( + "oci unpacker, bootstrap file: {:?}, output file: {:?}", + self.bootstrap, self.output + ); + + let rafs = self.load_rafs(config)?; + + let mut builder = self + .builder_factory + .create(&rafs, &self.blob_backend, &self.output)?; + + for (node, path) in RafsIterator::new(&rafs) { + builder.append(node, &path)?; + } + + Ok(()) + } +} + +trait TarBuilder { + fn append(&mut self, node: Arc, path: &Path) -> Result<()>; +} + +struct TarSection { + header: Header, + data: Box, +} + +trait SectionBuilder { + fn can_handle(&mut self, inode: Arc, path: &Path) -> bool; + fn build(&self, inode: Arc, path: &Path) -> Result>; +} + +struct OCITarBuilderFactory {} + +impl OCITarBuilderFactory { + fn new() -> Self { + OCITarBuilderFactory {} + } + + fn create( + &self, + meta: &RafsSuper, + blob_backend: &Option>, + output_path: &Path, + ) -> Result> { + let writer = self.create_writer(output_path)?; + + let builders = self.create_builders(meta, blob_backend)?; + + let builder = OCITarBuilder::new(builders, writer); + + Ok(Box::new(builder) as Box) + } + + fn create_writer(&self, output_path: &Path) -> Result> { + let builder = Builder::new( + OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .read(false) + .open(output_path) + .with_context(|| format!("fail to open output file {:?}", output_path))?, + ); + + Ok(builder) + } + + fn create_builders( + &self, + meta: &RafsSuper, + blob_backend: &Option>, + ) -> Result>> { + // PAX basic builders + let ext_builder = Rc::new(PAXExtensionSectionBuilder::new()); + let link_builder = Rc::new(PAXLinkBuilder::new(ext_builder.clone())); + let special_builder = Rc::new(PAXSpecialSectionBuilder::new(ext_builder.clone())); + + // OCI builders + let sock_builder = OCISocketBuilder::new(); + let hard_link_builder = OCILinkBuilder::new(link_builder.clone()); + let symlink_builder = OCISymlinkBuilder::new(link_builder); + let dir_builder = OCIDirBuilder::new(ext_builder); + let fifo_builder = OCIFifoBuilder::new(special_builder.clone()); + let char_builder = OCICharBuilder::new(special_builder.clone()); + let block_builder = OCIBlockBuilder::new(special_builder); + let blobs = meta.superblock.get_blob_infos(); + let reg_builder = self.create_reg_builder(blobs, blob_backend)?; + + // The order counts. + let builders = vec![ + Box::new(sock_builder) as Box, + Box::new(hard_link_builder), + Box::new(dir_builder), + Box::new(reg_builder), + Box::new(symlink_builder), + Box::new(fifo_builder), + Box::new(char_builder), + Box::new(block_builder), + ]; + + Ok(builders) + } + + fn create_reg_builder( + &self, + blobs: Vec>, + blob_backend: &Option>, + ) -> Result { + let mut readers = HashMap::new(); + let mut compressors = HashMap::new(); + for blob in blobs { + let blob_backend = blob_backend + .as_deref() + .with_context(|| "both blob path or blob backend config are not specified")?; + let reader = blob_backend + .get_reader(blob.blob_id().as_str()) + .map_err(|err| anyhow!("fail to get reader, error {:?}", err))?; + + let compressor = blob.compressor(); + readers.insert(blob.blob_index(), reader); + compressors.insert(blob.blob_index(), compressor); + } + + Ok(OCIRegBuilder::new( + Rc::new(PAXExtensionSectionBuilder::new()), + readers, + compressors, + )) + } +} + +struct OCITarBuilder { + writer: Builder, + builders: Vec>, +} + +impl OCITarBuilder { + fn new(builders: Vec>, writer: Builder) -> Self { + Self { builders, writer } + } +} + +impl TarBuilder for OCITarBuilder { + fn append(&mut self, inode: Arc, path: &Path) -> Result<()> { + for builder in &mut self.builders { + // Useless one, just go !!!!! + if !builder.can_handle(inode.clone(), path) { + continue; + } + + for sect in builder.build(inode.clone(), path)? { + self.writer.append(§.header, sect.data)?; + } + + return Ok(()); + } + + bail!("node {:?} can not be unpacked", path) + } +} diff --git a/src/bin/nydus-image/unpack/pax.rs b/src/bin/nydus-image/unpack/pax.rs index b35a411c4b8..71ac4f12731 100644 --- a/src/bin/nydus-image/unpack/pax.rs +++ b/src/bin/nydus-image/unpack/pax.rs @@ -1,790 +1,790 @@ -use nix::unistd::{Gid, Group, Uid, User}; -use std::ops::Deref; -use std::{ - collections::HashMap, - ffi::OsStr, - io::{self, Cursor, Error, ErrorKind, Read}, - iter::{self, repeat}, - os::unix::prelude::{OsStrExt, OsStringExt}, - path::{Path, PathBuf}, - rc::Rc, - str, - sync::Arc, - vec::IntoIter, -}; - -use anyhow::{Context, Result}; -use nydus_rafs::metadata::inode::InodeWrapper; -use nydus_rafs::metadata::RafsInodeExt; -use nydus_storage::{backend::BlobReader, device::BlobChunkInfo, utils::alloc_buf}; -use nydus_utils::compress::{self, Algorithm}; -use tar::{EntryType, Header}; - -use super::{SectionBuilder, TarSection}; - -static PAX_SEP1: &[u8; 1] = b" "; -static PAX_SEP2: &[u8; 1] = b"="; -static PAX_PREFIX: &[u8; 13] = b"SCHILY.xattr."; -static PAX_DELIMITER: &[u8; 1] = b"\n"; - -pub struct OCISocketBuilder {} - -impl OCISocketBuilder { - pub fn new() -> Self { - OCISocketBuilder {} - } -} - -impl SectionBuilder for OCISocketBuilder { - fn can_handle(&mut self, node: Arc, _: &Path) -> bool { - InodeWrapper::from_inode_info(node).is_sock() - } - - fn build(&self, _: Arc, _: &Path) -> Result> { - Ok(Vec::new()) - } -} - -pub struct OCILinkBuilder { - links: HashMap, - pax_link_builder: Rc, -} - -impl OCILinkBuilder { - pub fn new(pax_link_builder: Rc) -> Self { - OCILinkBuilder { - links: HashMap::new(), - pax_link_builder, - } - } -} - -impl SectionBuilder for OCILinkBuilder { - fn can_handle(&mut self, node: Arc, path: &Path) -> bool { - if !node.is_hardlink() || node.is_dir() { - return false; - } - - let is_appeared = self.links.contains_key(&node.ino()); - if !is_appeared { - self.links.insert(node.ino(), path.to_path_buf()); - } - - is_appeared - } - - fn build(&self, node: Arc, path: &Path) -> Result> { - let link = self.links.get(&node.ino()).unwrap(); - - self.pax_link_builder - .build(EntryType::hard_link(), node, path, link) - } -} - -pub struct OCIDirBuilder { - ext_builder: Rc, -} - -impl OCIDirBuilder { - pub fn new(ext_builder: Rc) -> Self { - OCIDirBuilder { ext_builder } - } - - fn is_root(&self, path: &Path) -> bool { - path.is_absolute() && path.file_name().is_none() - } -} - -fn set_header_by_inode(inode: Arc, header: &mut Header) -> Result<()> { - let inode = InodeWrapper::from_inode_info(inode); - header.set_size(inode.size()); - header.set_mtime(inode.mtime()); - header.set_uid(inode.uid() as u64); - header.set_gid(inode.gid() as u64); - - // To make the unpacked tar consistent with the OCI-formatted tar before the pack, - // we need to backfill the username and groupname in the tar header, which may - // break the repeatable build when unpacking in different hosts, but actually has - // little effect. - let username = User::from_uid(Uid::from_raw(inode.uid())) - .unwrap_or(None) - .map(|user| user.name) - .unwrap_or_default(); - header.set_username(&username)?; - let groupname = Group::from_gid(Gid::from_raw(inode.gid())) - .unwrap_or(None) - .map(|group| group.name) - .unwrap_or_default(); - header.set_groupname(&groupname)?; - - header.set_mode(Util::mask_mode(inode.mode())); - - Ok(()) -} - -impl SectionBuilder for OCIDirBuilder { - fn can_handle(&mut self, node: Arc, _: &Path) -> bool { - node.is_dir() - } - - fn build(&self, inode: Arc, path: &Path) -> Result> { - if self.is_root(path) { - return Ok(Vec::new()); - } - - let mut header = Header::new_ustar(); - header.set_entry_type(EntryType::dir()); - header.set_device_major(0).unwrap(); - header.set_device_minor(0).unwrap(); - - set_header_by_inode(inode.clone(), &mut header)?; - header.set_size(0); - - let mut extensions = Vec::with_capacity(2); - if let Some(extension) = PAXUtil::set_path(&mut header, path)? { - extensions.push(extension); - } - if let Some(extension) = PAXUtil::get_xattr_as_extensions(inode.deref()) { - extensions.extend(extension); - } - - Util::set_cksum(&mut header); - - let mut sections = Vec::with_capacity(2); - if let Some(ext_sect) = self.ext_builder.build(&header, extensions)? { - sections.push(ext_sect); - } - - let main_header = TarSection { - header, - data: Box::new(io::empty()), - }; - sections.push(main_header); - - Ok(sections) - } -} - -pub struct OCIRegBuilder { - ext_builder: Rc, - readers: HashMap>, - compressors: HashMap, -} - -impl OCIRegBuilder { - pub fn new( - ext_builder: Rc, - readers: HashMap>, - compressors: HashMap, - ) -> Self { - OCIRegBuilder { - ext_builder, - readers, - compressors, - } - } - - fn build_data(&self, inode: &dyn RafsInodeExt) -> Box { - let chunks = (0..inode.get_chunk_count()) - .map(|i| inode.get_chunk_info(i).unwrap()) - .collect(); - - let mut compressors = HashMap::new(); - compressors.clone_from(&self.compressors); - - let mut readers = HashMap::new(); - readers.clone_from(&self.readers); - let reader = ChunkReader::new(compressors, readers, chunks); - - Box::new(reader) - } -} - -impl SectionBuilder for OCIRegBuilder { - fn can_handle(&mut self, node: Arc, _: &Path) -> bool { - node.is_reg() - } - - fn build(&self, inode: Arc, path: &Path) -> Result> { - let mut header = Header::new_ustar(); - header.set_entry_type(EntryType::file()); - header.set_device_major(0).unwrap(); - header.set_device_minor(0).unwrap(); - set_header_by_inode(inode.clone(), &mut header)?; - - let mut extensions = Vec::with_capacity(2); - if let Some(extension) = PAXUtil::set_path(&mut header, path)? { - extensions.push(extension); - } - if let Some(extension) = PAXUtil::get_xattr_as_extensions(inode.deref()) { - extensions.extend(extension); - } - - Util::set_cksum(&mut header); - - let mut sections = Vec::with_capacity(2); - if let Some(ext_sect) = self.ext_builder.build(&header, extensions)? { - sections.push(ext_sect); - } - - let main_header = TarSection { - header, - data: Box::new(self.build_data(inode.deref())), - }; - sections.push(main_header); - - Ok(sections) - } -} - -pub struct OCISymlinkBuilder { - pax_link_builder: Rc, -} - -impl OCISymlinkBuilder { - pub fn new(pax_link_builder: Rc) -> Self { - OCISymlinkBuilder { pax_link_builder } - } -} - -impl SectionBuilder for OCISymlinkBuilder { - fn can_handle(&mut self, node: Arc, _: &Path) -> bool { - node.is_symlink() - } - - fn build(&self, node: Arc, path: &Path) -> Result> { - let link = node.get_symlink().unwrap(); - - self.pax_link_builder - .build(EntryType::symlink(), node, path, &PathBuf::from(link)) - } -} - -pub struct OCIFifoBuilder { - pax_special_builder: Rc, -} - -impl OCIFifoBuilder { - pub fn new(pax_special_builder: Rc) -> Self { - OCIFifoBuilder { - pax_special_builder, - } - } -} - -impl SectionBuilder for OCIFifoBuilder { - fn can_handle(&mut self, node: Arc, _: &Path) -> bool { - InodeWrapper::from_inode_info(node).is_fifo() - } - - fn build(&self, inode: Arc, path: &Path) -> Result> { - self.pax_special_builder - .build(EntryType::fifo(), inode, path) - } -} - -pub struct OCICharBuilder { - pax_special_builder: Rc, -} - -impl OCICharBuilder { - pub fn new(pax_special_builder: Rc) -> Self { - OCICharBuilder { - pax_special_builder, - } - } -} - -impl SectionBuilder for OCICharBuilder { - fn can_handle(&mut self, node: Arc, _: &Path) -> bool { - InodeWrapper::from_inode_info(node).is_chrdev() - } - - fn build(&self, inode: Arc, path: &Path) -> Result> { - self.pax_special_builder - .build(EntryType::character_special(), inode, path) - } -} - -pub struct OCIBlockBuilder { - pax_special_builder: Rc, -} - -impl OCIBlockBuilder { - pub fn new(pax_special_builder: Rc) -> Self { - OCIBlockBuilder { - pax_special_builder, - } - } -} - -impl SectionBuilder for OCIBlockBuilder { - fn can_handle(&mut self, node: Arc, _: &Path) -> bool { - InodeWrapper::from_inode_info(node).is_blkdev() - } - - fn build(&self, inode: Arc, path: &Path) -> Result> { - self.pax_special_builder - .build(EntryType::block_special(), inode, path) - } -} - -pub struct PAXSpecialSectionBuilder { - ext_builder: Rc, -} - -impl PAXSpecialSectionBuilder { - pub fn new(ext_builder: Rc) -> Self { - PAXSpecialSectionBuilder { ext_builder } - } - - fn build( - &self, - entry_type: EntryType, - inode: Arc, - path: &Path, - ) -> Result> { - let mut header = Header::new_ustar(); - header.set_entry_type(entry_type); - set_header_by_inode(inode.clone(), &mut header)?; - - let dev_id = self.cal_dev(inode.rdev() as u64); - header.set_device_major(dev_id.0)?; - header.set_device_minor(dev_id.1)?; - - let mut extensions = Vec::with_capacity(2); - if let Some(extension) = PAXUtil::set_path(&mut header, path)? { - extensions.push(extension); - } - if let Some(extension) = PAXUtil::get_xattr_as_extensions(inode.deref()) { - extensions.extend(extension); - } - - Util::set_cksum(&mut header); - - let mut sections = Vec::with_capacity(2); - if let Some(ext_sect) = self.ext_builder.build(&header, extensions)? { - sections.push(ext_sect); - } - - let main_header = TarSection { - header, - data: Box::new(io::empty()), - }; - sections.push(main_header); - - Ok(sections) - } - - fn cal_dev(&self, dev_id: u64) -> (u32, u32) { - let major = ((dev_id >> 32) & 0xffff_f000) | ((dev_id >> 8) & 0x0000_0fff); - let minor = ((dev_id >> 12) & 0xffff_ff00) | ((dev_id) & 0x0000_00ff); - - (major as u32, minor as u32) - } -} - -struct PAXRecord { - k: Vec, - v: Vec, -} - -pub struct PAXExtensionSectionBuilder {} - -impl PAXExtensionSectionBuilder { - pub fn new() -> Self { - PAXExtensionSectionBuilder {} - } - - fn build(&self, header: &Header, extensions: Vec) -> Result> { - if extensions.is_empty() { - return Ok(None); - } - - let path = header.path().unwrap().into_owned(); - - let mut header = Header::new_ustar(); - header.set_entry_type(EntryType::XHeader); - header.set_mode(0o644); - header.set_uid(0); - header.set_gid(0); - header.set_mtime(0); - - let data = self.build_data(extensions); - header.set_size(data.len() as u64); - - header - .set_path(&self.build_pax_name(&path, header.as_old().name.len())?) - .with_context(|| "fail to set path for pax section")?; - - Util::set_cksum(&mut header); - - Ok(Some(TarSection { - header, - data: Box::new(Cursor::new(data)), - })) - } - - fn build_data(&self, mut extensions: Vec) -> Vec { - extensions.sort_by(|r1, r2| { - let k1 = str::from_utf8(&r1.k).unwrap(); - let k2 = str::from_utf8(&r2.k).unwrap(); - k1.cmp(k2) - }); - - extensions - .into_iter() - .flat_map(|r| self.build_pax_record(&r.k, &r.v)) - .collect() - } - - fn build_pax_name(&self, path: &Path, max_len: usize) -> Result { - let filename = path.file_name().unwrap().to_owned(); - - let mut path = path.to_path_buf(); - path.set_file_name("PaxHeaders.0"); - let mut path = path.join(filename); - - if path.as_os_str().len() > max_len { - path = Util::truncate_path(&path, max_len)?; - } - - Ok(path) - } - - fn build_pax_record(&self, k: &[u8], v: &[u8]) -> Vec { - fn pax(buf: &mut Vec, size: usize, k: &[u8], v: &[u8]) { - buf.extend_from_slice(size.to_string().as_bytes()); - buf.extend_from_slice(PAX_SEP1); - buf.extend_from_slice(k); - buf.extend_from_slice(PAX_SEP2); - buf.extend_from_slice(v); - buf.extend_from_slice(PAX_DELIMITER); - } - - let mut size = k.len() + v.len() + PAX_SEP1.len() + PAX_SEP2.len() + PAX_DELIMITER.len(); - size += size.to_string().as_bytes().len(); - - let mut record = Vec::with_capacity(size); - pax(&mut record, size, k, v); - - if record.len() != size { - size = record.len(); - record.clear(); - pax(&mut record, size, k, v); - } - - record - } -} - -pub struct PAXLinkBuilder { - ext_builder: Rc, -} - -impl PAXLinkBuilder { - pub fn new(ext_builder: Rc) -> Self { - PAXLinkBuilder { ext_builder } - } - - fn build( - &self, - entry_type: EntryType, - inode: Arc, - path: &Path, - link: &Path, - ) -> Result> { - let mut header = Header::new_ustar(); - set_header_by_inode(inode.clone(), &mut header)?; - header.set_entry_type(entry_type); - header.set_size(0); - header.set_device_major(0).unwrap(); - header.set_device_minor(0).unwrap(); - - let mut extensions = Vec::with_capacity(3); - if let Some(extension) = PAXUtil::set_path(&mut header, path)? { - extensions.push(extension); - } - if let Some(extension) = PAXUtil::set_link(&mut header, link)? { - extensions.push(extension); - } - if let Some(extension) = PAXUtil::get_xattr_as_extensions(inode.deref()) { - extensions.extend(extension); - } - - Util::set_cksum(&mut header); - - let mut sections = Vec::with_capacity(2); - if let Some(ext_sect) = self.ext_builder.build(&header, extensions)? { - sections.push(ext_sect); - } - - let main_header = TarSection { - header, - data: Box::new(io::empty()), - }; - sections.push(main_header); - - Ok(sections) - } -} - -struct PAXUtil {} - -impl PAXUtil { - fn get_xattr_as_extensions(inode: &dyn RafsInodeExt) -> Option> { - if !inode.has_xattr() { - return None; - } - - let keys = inode.get_xattrs().unwrap(); - let mut extensions = Vec::with_capacity(keys.len()); - - for key in keys { - let value = inode - .get_xattr(OsStr::from_bytes(&key)) - .unwrap() - .unwrap_or_default(); - - let key = Vec::from(PAX_PREFIX.to_owned()) - .into_iter() - .chain(key.into_iter()) - .collect(); - extensions.push(PAXRecord { k: key, v: value }); - } - - Some(extensions) - } - - fn set_link(header: &mut Header, path: &Path) -> Result> { - let max_len = header.as_old().linkname.len(); - if path.as_os_str().len() <= max_len { - return header - .set_link_name(path) - .with_context(|| "fail to set short link for pax header") - .map(|_| None); - } - - let extension = PAXRecord { - k: "linkpath".to_owned().into_bytes(), - v: path.to_owned().into_os_string().into_vec(), - }; - - let path = Util::truncate_path(path, max_len) - .with_context(|| "fail to truncate link for pax header")?; - - header - .set_link_name(&path) - .with_context(|| format!("fail to set header link again for {:?}", path))?; - - Ok(Some(extension)) - } - - fn set_path(header: &mut Header, path: &Path) -> Result> { - let path = Util::normalize_path(path).with_context(|| "fail to normalize path")?; - - let max_len = header.as_old().name.len(); - if path.as_os_str().len() <= max_len { - return header - .set_path(path) - .with_context(|| "fail to set short path for pax header") - .map(|_| None); - } - - let extension = PAXRecord { - k: "path".to_owned().into_bytes(), - v: path.to_owned().into_os_string().into_vec(), - }; - - let path = Util::truncate_path(&path, max_len) - .with_context(|| "fail to truncate path for pax header")?; - - header - .set_path(&path) - .with_context(|| format!("fail to set header path again for {:?}", path))?; - - Ok(Some(extension)) - } -} - -pub struct Util {} - -impl Util { - fn normalize_path(path: &Path) -> Result { - fn end_with_slash(p: &Path) -> bool { - p.as_os_str().as_bytes().last() == Some(&b'/') - } - - let mut normalized = if path.has_root() { - path.strip_prefix("/") - .with_context(|| "fail to strip prefix /")? - .to_path_buf() - } else { - path.to_path_buf() - }; - - if end_with_slash(&normalized) { - let name = normalized.file_name().unwrap().to_owned(); - normalized.set_file_name(name); - } - - Ok(normalized) - } - - // path is required longer than max_len - fn truncate_path(path: &Path, max_len: usize) -> Result { - let path = path.as_os_str().as_bytes(); - if path.len() < max_len { - bail!("path is shorter than limit") - } - - let path = match str::from_utf8(&path[..max_len]) { - Ok(s) => Ok(s), - Err(err) => str::from_utf8(&path[..err.valid_up_to()]) - .with_context(|| "fail to convert bytes to utf8 str"), - }?; - - Ok(PathBuf::from(path)) - } - - // Common Unix mode constants; these are not defined in any common tar standard. - // - // c_ISDIR = 040000 // Directory - // c_ISFIFO = 010000 // FIFO - // c_ISREG = 0100000 // Regular file - // c_ISLNK = 0120000 // Symbolic link - // c_ISBLK = 060000 // Block special file - // c_ISCHR = 020000 // Character special file - // c_ISSOCK = 0140000 // Socket - // - // Although many readers bear it, such as Go standard library and tar tool in ubuntu - // Truncate to last four bytes. The four consists of below: - // - // c_ISUID = 04000 // Set uid - // c_ISGID = 02000 // Set gid - // c_ISVTX = 01000 // Sticky bit - // MODE_PERM = 0777 // Owner:Group:Other R/W - fn mask_mode(st_mode: u32) -> u32 { - st_mode & 0o7777 - } - - // The checksum is calculated by taking the sum of the unsigned byte values of - // the header record with the eight checksum bytes taken to be ASCII spaces (decimal value 32). - // It is stored as a six digit octal number with leading zeroes followed by a NUL and then a space. - // The wiki and Go standard library adhere to this format. Stay with them~~~. - fn set_cksum(header: &mut Header) { - let old = header.as_old(); - let start = old as *const _ as usize; - let cksum_start = old.cksum.as_ptr() as *const _ as usize; - let offset = cksum_start - start; - let len = old.cksum.len(); - - let bs = header.as_bytes(); - let sum = bs[0..offset] - .iter() - .chain(iter::repeat(&b' ').take(len)) - .chain(&bs[offset + len..]) - .fold(0, |a, b| a + (*b as u32)); - - let bs = &mut header.as_old_mut().cksum; - bs[bs.len() - 1] = b' '; - bs[bs.len() - 2] = 0o0; - - let o = format!("{:o}", sum); - let value = o.bytes().rev().chain(repeat(b'0')); - for (slot, value) in bs.iter_mut().rev().skip(2).zip(value) { - *slot = value; - } - } -} - -struct ChunkReader { - compressors: HashMap, - readers: HashMap>, - - chunks: IntoIter>, - chunk: Cursor>, -} - -impl ChunkReader { - fn new( - compressors: HashMap, - readers: HashMap>, - chunks: Vec>, - ) -> Self { - Self { - compressors, - readers, - chunks: chunks.into_iter(), - chunk: Cursor::new(Vec::new()), - } - } - - fn load_chunk(&mut self, chunk: &dyn BlobChunkInfo) -> Result<()> { - let mut buf = alloc_buf(chunk.compressed_size() as usize); - - let reader = self - .readers - .get(&chunk.blob_index()) - .expect("No valid reader") - .clone(); - reader - .read(buf.as_mut_slice(), chunk.compressed_offset()) - .map_err(|err| { - error!("fail to read chunk, error: {:?}", err); - anyhow!("fail to read chunk, error: {:?}", err) - })?; - - if !chunk.is_compressed() { - self.chunk = Cursor::new(buf); - return Ok(()); - } - - let compressor = *self - .compressors - .get(&chunk.blob_index()) - .expect("No valid compressor"); - - let mut data = vec![0u8; chunk.uncompressed_size() as usize]; - compress::decompress(buf.as_mut_slice(), data.as_mut_slice(), compressor) - .with_context(|| "fail to decompress")?; - - self.chunk = Cursor::new(data); - - Ok(()) - } - - fn is_chunk_empty(&self) -> bool { - self.chunk.position() >= self.chunk.get_ref().len() as u64 - } -} - -impl Read for ChunkReader { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - let mut size = 0; - - loop { - if self.is_chunk_empty() { - match self.chunks.next() { - None => break, - Some(chunk) => self.load_chunk(chunk.as_ref()).map_err(|err| { - Error::new( - ErrorKind::InvalidData, - format!("fail to load chunk, error: {}", err), - ) - })?, - } - } - - size += Read::read(&mut self.chunk, &mut buf[size..])?; - if size == buf.len() { - break; - } - } - - Ok(size) - } -} - -#[cfg(test)] -mod test; +use nix::unistd::{Gid, Group, Uid, User}; +use std::ops::Deref; +use std::{ + collections::HashMap, + ffi::OsStr, + io::{self, Cursor, Error, ErrorKind, Read}, + iter::{self, repeat}, + os::unix::prelude::{OsStrExt, OsStringExt}, + path::{Path, PathBuf}, + rc::Rc, + str, + sync::Arc, + vec::IntoIter, +}; + +use anyhow::{Context, Result}; +use nydus_rafs::metadata::inode::InodeWrapper; +use nydus_rafs::metadata::RafsInodeExt; +use nydus_storage::{backend::BlobReader, device::BlobChunkInfo, utils::alloc_buf}; +use nydus_utils::compress::{self, Algorithm}; +use tar::{EntryType, Header}; + +use super::{SectionBuilder, TarSection}; + +static PAX_SEP1: &[u8; 1] = b" "; +static PAX_SEP2: &[u8; 1] = b"="; +static PAX_PREFIX: &[u8; 13] = b"SCHILY.xattr."; +static PAX_DELIMITER: &[u8; 1] = b"\n"; + +pub struct OCISocketBuilder {} + +impl OCISocketBuilder { + pub fn new() -> Self { + OCISocketBuilder {} + } +} + +impl SectionBuilder for OCISocketBuilder { + fn can_handle(&mut self, node: Arc, _: &Path) -> bool { + InodeWrapper::from_inode_info(node).is_sock() + } + + fn build(&self, _: Arc, _: &Path) -> Result> { + Ok(Vec::new()) + } +} + +pub struct OCILinkBuilder { + links: HashMap, + pax_link_builder: Rc, +} + +impl OCILinkBuilder { + pub fn new(pax_link_builder: Rc) -> Self { + OCILinkBuilder { + links: HashMap::new(), + pax_link_builder, + } + } +} + +impl SectionBuilder for OCILinkBuilder { + fn can_handle(&mut self, node: Arc, path: &Path) -> bool { + if !node.is_hardlink() || node.is_dir() { + return false; + } + + let is_appeared = self.links.contains_key(&node.ino()); + if !is_appeared { + self.links.insert(node.ino(), path.to_path_buf()); + } + + is_appeared + } + + fn build(&self, node: Arc, path: &Path) -> Result> { + let link = self.links.get(&node.ino()).unwrap(); + + self.pax_link_builder + .build(EntryType::hard_link(), node, path, link) + } +} + +pub struct OCIDirBuilder { + ext_builder: Rc, +} + +impl OCIDirBuilder { + pub fn new(ext_builder: Rc) -> Self { + OCIDirBuilder { ext_builder } + } + + fn is_root(&self, path: &Path) -> bool { + path.is_absolute() && path.file_name().is_none() + } +} + +fn set_header_by_inode(inode: Arc, header: &mut Header) -> Result<()> { + let inode = InodeWrapper::from_inode_info(inode); + header.set_size(inode.size()); + header.set_mtime(inode.mtime()); + header.set_uid(inode.uid() as u64); + header.set_gid(inode.gid() as u64); + + // To make the unpacked tar consistent with the OCI-formatted tar before the pack, + // we need to backfill the username and groupname in the tar header, which may + // break the repeatable build when unpacking in different hosts, but actually has + // little effect. + let username = User::from_uid(Uid::from_raw(inode.uid())) + .unwrap_or(None) + .map(|user| user.name) + .unwrap_or_default(); + header.set_username(&username)?; + let groupname = Group::from_gid(Gid::from_raw(inode.gid())) + .unwrap_or(None) + .map(|group| group.name) + .unwrap_or_default(); + header.set_groupname(&groupname)?; + + header.set_mode(Util::mask_mode(inode.mode())); + + Ok(()) +} + +impl SectionBuilder for OCIDirBuilder { + fn can_handle(&mut self, node: Arc, _: &Path) -> bool { + node.is_dir() + } + + fn build(&self, inode: Arc, path: &Path) -> Result> { + if self.is_root(path) { + return Ok(Vec::new()); + } + + let mut header = Header::new_ustar(); + header.set_entry_type(EntryType::dir()); + header.set_device_major(0).unwrap(); + header.set_device_minor(0).unwrap(); + + set_header_by_inode(inode.clone(), &mut header)?; + header.set_size(0); + + let mut extensions = Vec::with_capacity(2); + if let Some(extension) = PAXUtil::set_path(&mut header, path)? { + extensions.push(extension); + } + if let Some(extension) = PAXUtil::get_xattr_as_extensions(inode.deref()) { + extensions.extend(extension); + } + + Util::set_cksum(&mut header); + + let mut sections = Vec::with_capacity(2); + if let Some(ext_sect) = self.ext_builder.build(&header, extensions)? { + sections.push(ext_sect); + } + + let main_header = TarSection { + header, + data: Box::new(io::empty()), + }; + sections.push(main_header); + + Ok(sections) + } +} + +pub struct OCIRegBuilder { + ext_builder: Rc, + readers: HashMap>, + compressors: HashMap, +} + +impl OCIRegBuilder { + pub fn new( + ext_builder: Rc, + readers: HashMap>, + compressors: HashMap, + ) -> Self { + OCIRegBuilder { + ext_builder, + readers, + compressors, + } + } + + fn build_data(&self, inode: &dyn RafsInodeExt) -> Box { + let chunks = (0..inode.get_chunk_count()) + .map(|i| inode.get_chunk_info(i).unwrap()) + .collect(); + + let mut compressors = HashMap::new(); + compressors.clone_from(&self.compressors); + + let mut readers = HashMap::new(); + readers.clone_from(&self.readers); + let reader = ChunkReader::new(compressors, readers, chunks); + + Box::new(reader) + } +} + +impl SectionBuilder for OCIRegBuilder { + fn can_handle(&mut self, node: Arc, _: &Path) -> bool { + node.is_reg() + } + + fn build(&self, inode: Arc, path: &Path) -> Result> { + let mut header = Header::new_ustar(); + header.set_entry_type(EntryType::file()); + header.set_device_major(0).unwrap(); + header.set_device_minor(0).unwrap(); + set_header_by_inode(inode.clone(), &mut header)?; + + let mut extensions = Vec::with_capacity(2); + if let Some(extension) = PAXUtil::set_path(&mut header, path)? { + extensions.push(extension); + } + if let Some(extension) = PAXUtil::get_xattr_as_extensions(inode.deref()) { + extensions.extend(extension); + } + + Util::set_cksum(&mut header); + + let mut sections = Vec::with_capacity(2); + if let Some(ext_sect) = self.ext_builder.build(&header, extensions)? { + sections.push(ext_sect); + } + + let main_header = TarSection { + header, + data: Box::new(self.build_data(inode.deref())), + }; + sections.push(main_header); + + Ok(sections) + } +} + +pub struct OCISymlinkBuilder { + pax_link_builder: Rc, +} + +impl OCISymlinkBuilder { + pub fn new(pax_link_builder: Rc) -> Self { + OCISymlinkBuilder { pax_link_builder } + } +} + +impl SectionBuilder for OCISymlinkBuilder { + fn can_handle(&mut self, node: Arc, _: &Path) -> bool { + node.is_symlink() + } + + fn build(&self, node: Arc, path: &Path) -> Result> { + let link = node.get_symlink().unwrap(); + + self.pax_link_builder + .build(EntryType::symlink(), node, path, &PathBuf::from(link)) + } +} + +pub struct OCIFifoBuilder { + pax_special_builder: Rc, +} + +impl OCIFifoBuilder { + pub fn new(pax_special_builder: Rc) -> Self { + OCIFifoBuilder { + pax_special_builder, + } + } +} + +impl SectionBuilder for OCIFifoBuilder { + fn can_handle(&mut self, node: Arc, _: &Path) -> bool { + InodeWrapper::from_inode_info(node).is_fifo() + } + + fn build(&self, inode: Arc, path: &Path) -> Result> { + self.pax_special_builder + .build(EntryType::fifo(), inode, path) + } +} + +pub struct OCICharBuilder { + pax_special_builder: Rc, +} + +impl OCICharBuilder { + pub fn new(pax_special_builder: Rc) -> Self { + OCICharBuilder { + pax_special_builder, + } + } +} + +impl SectionBuilder for OCICharBuilder { + fn can_handle(&mut self, node: Arc, _: &Path) -> bool { + InodeWrapper::from_inode_info(node).is_chrdev() + } + + fn build(&self, inode: Arc, path: &Path) -> Result> { + self.pax_special_builder + .build(EntryType::character_special(), inode, path) + } +} + +pub struct OCIBlockBuilder { + pax_special_builder: Rc, +} + +impl OCIBlockBuilder { + pub fn new(pax_special_builder: Rc) -> Self { + OCIBlockBuilder { + pax_special_builder, + } + } +} + +impl SectionBuilder for OCIBlockBuilder { + fn can_handle(&mut self, node: Arc, _: &Path) -> bool { + InodeWrapper::from_inode_info(node).is_blkdev() + } + + fn build(&self, inode: Arc, path: &Path) -> Result> { + self.pax_special_builder + .build(EntryType::block_special(), inode, path) + } +} + +pub struct PAXSpecialSectionBuilder { + ext_builder: Rc, +} + +impl PAXSpecialSectionBuilder { + pub fn new(ext_builder: Rc) -> Self { + PAXSpecialSectionBuilder { ext_builder } + } + + fn build( + &self, + entry_type: EntryType, + inode: Arc, + path: &Path, + ) -> Result> { + let mut header = Header::new_ustar(); + header.set_entry_type(entry_type); + set_header_by_inode(inode.clone(), &mut header)?; + + let dev_id = self.cal_dev(inode.rdev() as u64); + header.set_device_major(dev_id.0)?; + header.set_device_minor(dev_id.1)?; + + let mut extensions = Vec::with_capacity(2); + if let Some(extension) = PAXUtil::set_path(&mut header, path)? { + extensions.push(extension); + } + if let Some(extension) = PAXUtil::get_xattr_as_extensions(inode.deref()) { + extensions.extend(extension); + } + + Util::set_cksum(&mut header); + + let mut sections = Vec::with_capacity(2); + if let Some(ext_sect) = self.ext_builder.build(&header, extensions)? { + sections.push(ext_sect); + } + + let main_header = TarSection { + header, + data: Box::new(io::empty()), + }; + sections.push(main_header); + + Ok(sections) + } + + fn cal_dev(&self, dev_id: u64) -> (u32, u32) { + let major = ((dev_id >> 32) & 0xffff_f000) | ((dev_id >> 8) & 0x0000_0fff); + let minor = ((dev_id >> 12) & 0xffff_ff00) | ((dev_id) & 0x0000_00ff); + + (major as u32, minor as u32) + } +} + +struct PAXRecord { + k: Vec, + v: Vec, +} + +pub struct PAXExtensionSectionBuilder {} + +impl PAXExtensionSectionBuilder { + pub fn new() -> Self { + PAXExtensionSectionBuilder {} + } + + fn build(&self, header: &Header, extensions: Vec) -> Result> { + if extensions.is_empty() { + return Ok(None); + } + + let path = header.path().unwrap().into_owned(); + + let mut header = Header::new_ustar(); + header.set_entry_type(EntryType::XHeader); + header.set_mode(0o644); + header.set_uid(0); + header.set_gid(0); + header.set_mtime(0); + + let data = self.build_data(extensions); + header.set_size(data.len() as u64); + + header + .set_path(&self.build_pax_name(&path, header.as_old().name.len())?) + .with_context(|| "fail to set path for pax section")?; + + Util::set_cksum(&mut header); + + Ok(Some(TarSection { + header, + data: Box::new(Cursor::new(data)), + })) + } + + fn build_data(&self, mut extensions: Vec) -> Vec { + extensions.sort_by(|r1, r2| { + let k1 = str::from_utf8(&r1.k).unwrap(); + let k2 = str::from_utf8(&r2.k).unwrap(); + k1.cmp(k2) + }); + + extensions + .into_iter() + .flat_map(|r| self.build_pax_record(&r.k, &r.v)) + .collect() + } + + fn build_pax_name(&self, path: &Path, max_len: usize) -> Result { + let filename = path.file_name().unwrap().to_owned(); + + let mut path = path.to_path_buf(); + path.set_file_name("PaxHeaders.0"); + let mut path = path.join(filename); + + if path.as_os_str().len() > max_len { + path = Util::truncate_path(&path, max_len)?; + } + + Ok(path) + } + + fn build_pax_record(&self, k: &[u8], v: &[u8]) -> Vec { + fn pax(buf: &mut Vec, size: usize, k: &[u8], v: &[u8]) { + buf.extend_from_slice(size.to_string().as_bytes()); + buf.extend_from_slice(PAX_SEP1); + buf.extend_from_slice(k); + buf.extend_from_slice(PAX_SEP2); + buf.extend_from_slice(v); + buf.extend_from_slice(PAX_DELIMITER); + } + + let mut size = k.len() + v.len() + PAX_SEP1.len() + PAX_SEP2.len() + PAX_DELIMITER.len(); + size += size.to_string().as_bytes().len(); + + let mut record = Vec::with_capacity(size); + pax(&mut record, size, k, v); + + if record.len() != size { + size = record.len(); + record.clear(); + pax(&mut record, size, k, v); + } + + record + } +} + +pub struct PAXLinkBuilder { + ext_builder: Rc, +} + +impl PAXLinkBuilder { + pub fn new(ext_builder: Rc) -> Self { + PAXLinkBuilder { ext_builder } + } + + fn build( + &self, + entry_type: EntryType, + inode: Arc, + path: &Path, + link: &Path, + ) -> Result> { + let mut header = Header::new_ustar(); + set_header_by_inode(inode.clone(), &mut header)?; + header.set_entry_type(entry_type); + header.set_size(0); + header.set_device_major(0).unwrap(); + header.set_device_minor(0).unwrap(); + + let mut extensions = Vec::with_capacity(3); + if let Some(extension) = PAXUtil::set_path(&mut header, path)? { + extensions.push(extension); + } + if let Some(extension) = PAXUtil::set_link(&mut header, link)? { + extensions.push(extension); + } + if let Some(extension) = PAXUtil::get_xattr_as_extensions(inode.deref()) { + extensions.extend(extension); + } + + Util::set_cksum(&mut header); + + let mut sections = Vec::with_capacity(2); + if let Some(ext_sect) = self.ext_builder.build(&header, extensions)? { + sections.push(ext_sect); + } + + let main_header = TarSection { + header, + data: Box::new(io::empty()), + }; + sections.push(main_header); + + Ok(sections) + } +} + +struct PAXUtil {} + +impl PAXUtil { + fn get_xattr_as_extensions(inode: &dyn RafsInodeExt) -> Option> { + if !inode.has_xattr() { + return None; + } + + let keys = inode.get_xattrs().unwrap(); + let mut extensions = Vec::with_capacity(keys.len()); + + for key in keys { + let value = inode + .get_xattr(OsStr::from_bytes(&key)) + .unwrap() + .unwrap_or_default(); + + let key = Vec::from(PAX_PREFIX.to_owned()) + .into_iter() + .chain(key.into_iter()) + .collect(); + extensions.push(PAXRecord { k: key, v: value }); + } + + Some(extensions) + } + + fn set_link(header: &mut Header, path: &Path) -> Result> { + let max_len = header.as_old().linkname.len(); + if path.as_os_str().len() <= max_len { + return header + .set_link_name(path) + .with_context(|| "fail to set short link for pax header") + .map(|_| None); + } + + let extension = PAXRecord { + k: "linkpath".to_owned().into_bytes(), + v: path.to_owned().into_os_string().into_vec(), + }; + + let path = Util::truncate_path(path, max_len) + .with_context(|| "fail to truncate link for pax header")?; + + header + .set_link_name(&path) + .with_context(|| format!("fail to set header link again for {:?}", path))?; + + Ok(Some(extension)) + } + + fn set_path(header: &mut Header, path: &Path) -> Result> { + let path = Util::normalize_path(path).with_context(|| "fail to normalize path")?; + + let max_len = header.as_old().name.len(); + if path.as_os_str().len() <= max_len { + return header + .set_path(path) + .with_context(|| "fail to set short path for pax header") + .map(|_| None); + } + + let extension = PAXRecord { + k: "path".to_owned().into_bytes(), + v: path.to_owned().into_os_string().into_vec(), + }; + + let path = Util::truncate_path(&path, max_len) + .with_context(|| "fail to truncate path for pax header")?; + + header + .set_path(&path) + .with_context(|| format!("fail to set header path again for {:?}", path))?; + + Ok(Some(extension)) + } +} + +pub struct Util {} + +impl Util { + fn normalize_path(path: &Path) -> Result { + fn end_with_slash(p: &Path) -> bool { + p.as_os_str().as_bytes().last() == Some(&b'/') + } + + let mut normalized = if path.has_root() { + path.strip_prefix("/") + .with_context(|| "fail to strip prefix /")? + .to_path_buf() + } else { + path.to_path_buf() + }; + + if end_with_slash(&normalized) { + let name = normalized.file_name().unwrap().to_owned(); + normalized.set_file_name(name); + } + + Ok(normalized) + } + + // path is required longer than max_len + fn truncate_path(path: &Path, max_len: usize) -> Result { + let path = path.as_os_str().as_bytes(); + if path.len() < max_len { + bail!("path is shorter than limit") + } + + let path = match str::from_utf8(&path[..max_len]) { + Ok(s) => Ok(s), + Err(err) => str::from_utf8(&path[..err.valid_up_to()]) + .with_context(|| "fail to convert bytes to utf8 str"), + }?; + + Ok(PathBuf::from(path)) + } + + // Common Unix mode constants; these are not defined in any common tar standard. + // + // c_ISDIR = 040000 // Directory + // c_ISFIFO = 010000 // FIFO + // c_ISREG = 0100000 // Regular file + // c_ISLNK = 0120000 // Symbolic link + // c_ISBLK = 060000 // Block special file + // c_ISCHR = 020000 // Character special file + // c_ISSOCK = 0140000 // Socket + // + // Although many readers bear it, such as Go standard library and tar tool in ubuntu + // Truncate to last four bytes. The four consists of below: + // + // c_ISUID = 04000 // Set uid + // c_ISGID = 02000 // Set gid + // c_ISVTX = 01000 // Sticky bit + // MODE_PERM = 0777 // Owner:Group:Other R/W + fn mask_mode(st_mode: u32) -> u32 { + st_mode & 0o7777 + } + + // The checksum is calculated by taking the sum of the unsigned byte values of + // the header record with the eight checksum bytes taken to be ASCII spaces (decimal value 32). + // It is stored as a six digit octal number with leading zeroes followed by a NUL and then a space. + // The wiki and Go standard library adhere to this format. Stay with them~~~. + fn set_cksum(header: &mut Header) { + let old = header.as_old(); + let start = old as *const _ as usize; + let cksum_start = old.cksum.as_ptr() as *const _ as usize; + let offset = cksum_start - start; + let len = old.cksum.len(); + + let bs = header.as_bytes(); + let sum = bs[0..offset] + .iter() + .chain(iter::repeat(&b' ').take(len)) + .chain(&bs[offset + len..]) + .fold(0, |a, b| a + (*b as u32)); + + let bs = &mut header.as_old_mut().cksum; + bs[bs.len() - 1] = b' '; + bs[bs.len() - 2] = 0o0; + + let o = format!("{:o}", sum); + let value = o.bytes().rev().chain(repeat(b'0')); + for (slot, value) in bs.iter_mut().rev().skip(2).zip(value) { + *slot = value; + } + } +} + +struct ChunkReader { + compressors: HashMap, + readers: HashMap>, + + chunks: IntoIter>, + chunk: Cursor>, +} + +impl ChunkReader { + fn new( + compressors: HashMap, + readers: HashMap>, + chunks: Vec>, + ) -> Self { + Self { + compressors, + readers, + chunks: chunks.into_iter(), + chunk: Cursor::new(Vec::new()), + } + } + + fn load_chunk(&mut self, chunk: &dyn BlobChunkInfo) -> Result<()> { + let mut buf = alloc_buf(chunk.compressed_size() as usize); + + let reader = self + .readers + .get(&chunk.blob_index()) + .expect("No valid reader") + .clone(); + reader + .read(buf.as_mut_slice(), chunk.compressed_offset()) + .map_err(|err| { + error!("fail to read chunk, error: {:?}", err); + anyhow!("fail to read chunk, error: {:?}", err) + })?; + + if !chunk.is_compressed() { + self.chunk = Cursor::new(buf); + return Ok(()); + } + + let compressor = *self + .compressors + .get(&chunk.blob_index()) + .expect("No valid compressor"); + + let mut data = vec![0u8; chunk.uncompressed_size() as usize]; + compress::decompress(buf.as_mut_slice(), data.as_mut_slice(), compressor) + .with_context(|| "fail to decompress")?; + + self.chunk = Cursor::new(data); + + Ok(()) + } + + fn is_chunk_empty(&self) -> bool { + self.chunk.position() >= self.chunk.get_ref().len() as u64 + } +} + +impl Read for ChunkReader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let mut size = 0; + + loop { + if self.is_chunk_empty() { + match self.chunks.next() { + None => break, + Some(chunk) => self.load_chunk(chunk.as_ref()).map_err(|err| { + Error::new( + ErrorKind::InvalidData, + format!("fail to load chunk, error: {}", err), + ) + })?, + } + } + + size += Read::read(&mut self.chunk, &mut buf[size..])?; + if size == buf.len() { + break; + } + } + + Ok(size) + } +} + +#[cfg(test)] +mod test; diff --git a/src/bin/nydus-image/unpack/pax/test.rs b/src/bin/nydus-image/unpack/pax/test.rs index 7e8a447c7b9..e5a67711ae2 100644 --- a/src/bin/nydus-image/unpack/pax/test.rs +++ b/src/bin/nydus-image/unpack/pax/test.rs @@ -1,253 +1,253 @@ -use std::collections::HashMap; -use std::{io::Read, sync::Arc}; - -use nydus_storage::backend::{BackendResult, BlobReader}; -use nydus_storage::device::BlobChunkInfo; -use nydus_utils::compress::{self, Algorithm}; -use nydus_utils::metrics::BackendMetrics; - -use super::ChunkReader; - -struct MockBlobReader { - data: Vec, - metrics: Arc, -} - -impl MockBlobReader { - fn new(data: Vec) -> Self { - Self { - data, - metrics: Default::default(), - } - } -} - -impl BlobReader for MockBlobReader { - fn try_read(&self, buf: &mut [u8], offset: u64) -> BackendResult { - let offset = offset as usize; - if offset >= self.data.len() { - return Ok(0_usize); - } - - let end = self.data.len().min(offset as usize + buf.len()); - buf.clone_from_slice(&self.data[offset..end]); - - Ok(end - offset) - } - - fn metrics(&self) -> &BackendMetrics { - self.metrics.as_ref() - } - - fn blob_size(&self) -> BackendResult { - todo!(); - } -} - -struct MockChunkInfo { - compress_offset: u64, - compress_size: u32, - uncompress_offset: u64, - uncompress_size: u32, - is_compressed: bool, -} - -impl MockChunkInfo { - fn new( - compress_offset: u64, - compress_size: u32, - uncompress_offset: u64, - uncompress_size: u32, - is_compressed: bool, - ) -> Self { - Self { - compress_offset, - compress_size, - uncompress_offset, - uncompress_size, - is_compressed, - } - } -} - -impl BlobChunkInfo for MockChunkInfo { - fn chunk_id(&self) -> &nydus_utils::digest::RafsDigest { - todo!(); - } - - fn id(&self) -> u32 { - todo!(); - } - - fn blob_index(&self) -> u32 { - 0 - } - - fn compressed_offset(&self) -> u64 { - self.compress_offset - } - - fn compressed_size(&self) -> u32 { - self.compress_size - } - - fn uncompressed_offset(&self) -> u64 { - self.uncompress_offset - } - - fn uncompressed_size(&self) -> u32 { - self.uncompress_size - } - - fn is_batch(&self) -> bool { - false - } - - fn is_compressed(&self) -> bool { - self.is_compressed - } - - fn is_encrypted(&self) -> bool { - false - } - - fn as_any(&self) -> &dyn std::any::Any { - todo!(); - } -} - -#[test] -fn test_read_chunk() { - let mut reader = create_default_chunk_reader(); - let mut buf = [0u8; 256]; - - assert_eq!(256, reader.read(&mut buf).unwrap()); - assert_eq!(buf, [1u8; 256]); - - assert_eq!(256, reader.read(&mut buf).unwrap()); - assert_eq!(buf, [2u8; 256]); - - assert_eq!(0, reader.read(&mut buf).unwrap()); - assert_eq!(buf, [2u8; 256]); -} - -#[test] -fn test_read_chunk_smaller_buffer() { - let mut reader = create_default_chunk_reader(); - let mut buf = [0u8; 255]; - - assert_eq!(255, reader.read(&mut buf).unwrap()); - assert_eq!(buf, [1u8; 255]); - - assert_eq!(255, reader.read(&mut buf).unwrap()); - assert_eq!(buf[0], 1u8); - assert_eq!(buf[1..255], [2u8; 254]); - - assert_eq!(2, reader.read(&mut buf).unwrap()); - assert_eq!(buf[0..2], [2u8; 2]); - - assert_eq!(0, reader.read(&mut buf).unwrap()); -} - -#[test] -fn test_read_chunk_larger_buffer() { - let mut reader = create_default_chunk_reader(); - let mut buf = [0u8; 257]; - - assert_eq!(257, reader.read(&mut buf).unwrap()); - assert_eq!(buf[..256], [1u8; 256]); - assert_eq!(buf[256], 2u8); - - assert_eq!(255, reader.read(&mut buf).unwrap()); - assert_eq!(buf[..255], [2u8; 255]); - - assert_eq!(0, reader.read(&mut buf).unwrap()); -} - -#[test] -fn test_read_chunk_zero_buffer() { - let mut reader = create_default_chunk_reader(); - let mut buf = [0u8; 0]; - - assert_eq!(0, reader.read(&mut buf).unwrap()); - assert_eq!(0, reader.read(&mut buf).unwrap()); - assert_eq!(0, reader.read(&mut buf).unwrap()); -} - -#[test] -fn test_read_chunk_compress() { - let mut reader = create_compress_chunk_reader(); - let mut buf = [0u8; 256]; - - assert_eq!(256, reader.read(&mut buf).unwrap()); - assert_eq!(buf, [1u8; 256]); - - assert_eq!(256, reader.read(&mut buf).unwrap()); - assert_eq!(buf, [2u8; 256]); - - assert_eq!(256, reader.read(&mut buf).unwrap()); - assert_eq!(buf, [3u8; 256]); - - assert_eq!(256, reader.read(&mut buf).unwrap()); - assert_eq!(buf, [4u8; 256]); - - assert_eq!(0, reader.read(&mut buf).unwrap()); - assert_eq!(buf, [4u8; 256]); -} - -fn create_compress_chunk_reader() -> ChunkReader { - let chunk = [[1u8; 256], [2u8; 256], [3u8; 256], [4u8; 256]].concat(); - - let (compressed_chunk, is_compressed) = compress::compress(&chunk, Algorithm::GZip).unwrap(); - assert!(is_compressed, "expect compressed chunk"); - - let meta = Arc::new(MockChunkInfo::new( - 0, - compressed_chunk.len() as u32, - 0, - chunk.len() as u32, - true, - )); - - let blob_reader = Arc::new(MockBlobReader::new(compressed_chunk.into_owned())); - - let mut readers: HashMap> = HashMap::new(); - readers.insert(meta.blob_index(), blob_reader); - - let mut compressors: HashMap = HashMap::new(); - compressors.insert(meta.blob_index(), Algorithm::GZip); - - ChunkReader::new(compressors, readers, vec![meta]) -} - -fn create_default_chunk_reader() -> ChunkReader { - let chunk1 = [1u8; 256]; - let chunk2 = [2u8; 256]; - - let chunk_meta1 = Arc::new(MockChunkInfo::new( - 0, - chunk1.len() as u32, - 0, - chunk1.len() as u32, - false, - )); - let chunk_meta2 = Arc::new(MockChunkInfo::new( - chunk1.len() as u64, - chunk2.len() as u32, - chunk1.len() as u64, - chunk2.len() as u32, - false, - )); - - let blob_reader = Arc::new(MockBlobReader::new([chunk1, chunk2].concat())); - - let mut readers: HashMap> = HashMap::new(); - readers.insert(chunk_meta1.blob_index(), blob_reader.clone()); - readers.insert(chunk_meta2.blob_index(), blob_reader); - - let mut compressors: HashMap = HashMap::new(); - compressors.insert(chunk_meta1.blob_index(), Algorithm::None); - compressors.insert(chunk_meta2.blob_index(), Algorithm::None); - - ChunkReader::new(compressors, readers, vec![chunk_meta1, chunk_meta2]) -} +use std::collections::HashMap; +use std::{io::Read, sync::Arc}; + +use nydus_storage::backend::{BackendResult, BlobReader}; +use nydus_storage::device::BlobChunkInfo; +use nydus_utils::compress::{self, Algorithm}; +use nydus_utils::metrics::BackendMetrics; + +use super::ChunkReader; + +struct MockBlobReader { + data: Vec, + metrics: Arc, +} + +impl MockBlobReader { + fn new(data: Vec) -> Self { + Self { + data, + metrics: Default::default(), + } + } +} + +impl BlobReader for MockBlobReader { + fn try_read(&self, buf: &mut [u8], offset: u64) -> BackendResult { + let offset = offset as usize; + if offset >= self.data.len() { + return Ok(0_usize); + } + + let end = self.data.len().min(offset as usize + buf.len()); + buf.clone_from_slice(&self.data[offset..end]); + + Ok(end - offset) + } + + fn metrics(&self) -> &BackendMetrics { + self.metrics.as_ref() + } + + fn blob_size(&self) -> BackendResult { + todo!(); + } +} + +struct MockChunkInfo { + compress_offset: u64, + compress_size: u32, + uncompress_offset: u64, + uncompress_size: u32, + is_compressed: bool, +} + +impl MockChunkInfo { + fn new( + compress_offset: u64, + compress_size: u32, + uncompress_offset: u64, + uncompress_size: u32, + is_compressed: bool, + ) -> Self { + Self { + compress_offset, + compress_size, + uncompress_offset, + uncompress_size, + is_compressed, + } + } +} + +impl BlobChunkInfo for MockChunkInfo { + fn chunk_id(&self) -> &nydus_utils::digest::RafsDigest { + todo!(); + } + + fn id(&self) -> u32 { + todo!(); + } + + fn blob_index(&self) -> u32 { + 0 + } + + fn compressed_offset(&self) -> u64 { + self.compress_offset + } + + fn compressed_size(&self) -> u32 { + self.compress_size + } + + fn uncompressed_offset(&self) -> u64 { + self.uncompress_offset + } + + fn uncompressed_size(&self) -> u32 { + self.uncompress_size + } + + fn is_batch(&self) -> bool { + false + } + + fn is_compressed(&self) -> bool { + self.is_compressed + } + + fn is_encrypted(&self) -> bool { + false + } + + fn as_any(&self) -> &dyn std::any::Any { + todo!(); + } +} + +#[test] +fn test_read_chunk() { + let mut reader = create_default_chunk_reader(); + let mut buf = [0u8; 256]; + + assert_eq!(256, reader.read(&mut buf).unwrap()); + assert_eq!(buf, [1u8; 256]); + + assert_eq!(256, reader.read(&mut buf).unwrap()); + assert_eq!(buf, [2u8; 256]); + + assert_eq!(0, reader.read(&mut buf).unwrap()); + assert_eq!(buf, [2u8; 256]); +} + +#[test] +fn test_read_chunk_smaller_buffer() { + let mut reader = create_default_chunk_reader(); + let mut buf = [0u8; 255]; + + assert_eq!(255, reader.read(&mut buf).unwrap()); + assert_eq!(buf, [1u8; 255]); + + assert_eq!(255, reader.read(&mut buf).unwrap()); + assert_eq!(buf[0], 1u8); + assert_eq!(buf[1..255], [2u8; 254]); + + assert_eq!(2, reader.read(&mut buf).unwrap()); + assert_eq!(buf[0..2], [2u8; 2]); + + assert_eq!(0, reader.read(&mut buf).unwrap()); +} + +#[test] +fn test_read_chunk_larger_buffer() { + let mut reader = create_default_chunk_reader(); + let mut buf = [0u8; 257]; + + assert_eq!(257, reader.read(&mut buf).unwrap()); + assert_eq!(buf[..256], [1u8; 256]); + assert_eq!(buf[256], 2u8); + + assert_eq!(255, reader.read(&mut buf).unwrap()); + assert_eq!(buf[..255], [2u8; 255]); + + assert_eq!(0, reader.read(&mut buf).unwrap()); +} + +#[test] +fn test_read_chunk_zero_buffer() { + let mut reader = create_default_chunk_reader(); + let mut buf = [0u8; 0]; + + assert_eq!(0, reader.read(&mut buf).unwrap()); + assert_eq!(0, reader.read(&mut buf).unwrap()); + assert_eq!(0, reader.read(&mut buf).unwrap()); +} + +#[test] +fn test_read_chunk_compress() { + let mut reader = create_compress_chunk_reader(); + let mut buf = [0u8; 256]; + + assert_eq!(256, reader.read(&mut buf).unwrap()); + assert_eq!(buf, [1u8; 256]); + + assert_eq!(256, reader.read(&mut buf).unwrap()); + assert_eq!(buf, [2u8; 256]); + + assert_eq!(256, reader.read(&mut buf).unwrap()); + assert_eq!(buf, [3u8; 256]); + + assert_eq!(256, reader.read(&mut buf).unwrap()); + assert_eq!(buf, [4u8; 256]); + + assert_eq!(0, reader.read(&mut buf).unwrap()); + assert_eq!(buf, [4u8; 256]); +} + +fn create_compress_chunk_reader() -> ChunkReader { + let chunk = [[1u8; 256], [2u8; 256], [3u8; 256], [4u8; 256]].concat(); + + let (compressed_chunk, is_compressed) = compress::compress(&chunk, Algorithm::GZip).unwrap(); + assert!(is_compressed, "expect compressed chunk"); + + let meta = Arc::new(MockChunkInfo::new( + 0, + compressed_chunk.len() as u32, + 0, + chunk.len() as u32, + true, + )); + + let blob_reader = Arc::new(MockBlobReader::new(compressed_chunk.into_owned())); + + let mut readers: HashMap> = HashMap::new(); + readers.insert(meta.blob_index(), blob_reader); + + let mut compressors: HashMap = HashMap::new(); + compressors.insert(meta.blob_index(), Algorithm::GZip); + + ChunkReader::new(compressors, readers, vec![meta]) +} + +fn create_default_chunk_reader() -> ChunkReader { + let chunk1 = [1u8; 256]; + let chunk2 = [2u8; 256]; + + let chunk_meta1 = Arc::new(MockChunkInfo::new( + 0, + chunk1.len() as u32, + 0, + chunk1.len() as u32, + false, + )); + let chunk_meta2 = Arc::new(MockChunkInfo::new( + chunk1.len() as u64, + chunk2.len() as u32, + chunk1.len() as u64, + chunk2.len() as u32, + false, + )); + + let blob_reader = Arc::new(MockBlobReader::new([chunk1, chunk2].concat())); + + let mut readers: HashMap> = HashMap::new(); + readers.insert(chunk_meta1.blob_index(), blob_reader.clone()); + readers.insert(chunk_meta2.blob_index(), blob_reader); + + let mut compressors: HashMap = HashMap::new(); + compressors.insert(chunk_meta1.blob_index(), Algorithm::None); + compressors.insert(chunk_meta2.blob_index(), Algorithm::None); + + ChunkReader::new(compressors, readers, vec![chunk_meta1, chunk_meta2]) +} diff --git a/src/bin/nydus-image/validator.rs b/src/bin/nydus-image/validator.rs index 95e8355c277..c51d3baa518 100644 --- a/src/bin/nydus-image/validator.rs +++ b/src/bin/nydus-image/validator.rs @@ -1,55 +1,55 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Validator for RAFS format - -use std::path::Path; -use std::sync::Arc; - -use anyhow::{Context, Result}; -use nydus_api::ConfigV2; -use nydus_builder::Tree; -use nydus_rafs::metadata::{RafsSuper, RafsVersion}; -use nydus_storage::device::BlobInfo; -use nydus_utils::compress; - -pub struct Validator { - sb: RafsSuper, -} - -impl Validator { - pub fn new(bootstrap_path: &Path, config: Arc) -> Result { - let (sb, _) = RafsSuper::load_from_file(bootstrap_path, config, false)?; - - Ok(Self { sb }) - } - - pub fn check( - &mut self, - verbosity: bool, - ) -> Result<(Vec>, compress::Algorithm, RafsVersion)> { - let err = "failed to load bootstrap for validator"; - let tree = Tree::from_bootstrap(&self.sb, &mut ()).context(err)?; - - let pre = &mut |t: &Tree| -> Result<()> { - let node = t.lock_node(); - if verbosity { - println!("inode: {}", node); - for chunk in &node.chunks { - println!("\t chunk: {}", chunk); - } - } - Ok(()) - }; - tree.walk_dfs_pre(pre)?; - let compressor = self.sb.meta.get_compressor(); - let rafs_version: RafsVersion = self.sb.meta.version.try_into().unwrap(); - - Ok(( - self.sb.superblock.get_blob_infos(), - compressor, - rafs_version, - )) - } -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Validator for RAFS format + +use std::path::Path; +use std::sync::Arc; + +use anyhow::{Context, Result}; +use nydus_api::ConfigV2; +use nydus_builder::Tree; +use nydus_rafs::metadata::{RafsSuper, RafsVersion}; +use nydus_storage::device::BlobInfo; +use nydus_utils::compress; + +pub struct Validator { + sb: RafsSuper, +} + +impl Validator { + pub fn new(bootstrap_path: &Path, config: Arc) -> Result { + let (sb, _) = RafsSuper::load_from_file(bootstrap_path, config, false)?; + + Ok(Self { sb }) + } + + pub fn check( + &mut self, + verbosity: bool, + ) -> Result<(Vec>, compress::Algorithm, RafsVersion)> { + let err = "failed to load bootstrap for validator"; + let tree = Tree::from_bootstrap(&self.sb, &mut ()).context(err)?; + + let pre = &mut |t: &Tree| -> Result<()> { + let node = t.lock_node(); + if verbosity { + println!("inode: {}", node); + for chunk in &node.chunks { + println!("\t chunk: {}", chunk); + } + } + Ok(()) + }; + tree.walk_dfs_pre(pre)?; + let compressor = self.sb.meta.get_compressor(); + let rafs_version: RafsVersion = self.sb.meta.version.try_into().unwrap(); + + Ok(( + self.sb.superblock.get_blob_infos(), + compressor, + rafs_version, + )) + } +} diff --git a/src/bin/nydusctl/client.rs b/src/bin/nydusctl/client.rs index a8e3597966a..8c4c1bd9c76 100644 --- a/src/bin/nydusctl/client.rs +++ b/src/bin/nydusctl/client.rs @@ -1,146 +1,146 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::path::PathBuf; - -use anyhow::Result; -use hyper::{header, Body, Client, Method, Request, Uri as HyperUri}; -use hyperlocal::{UnixClientExt, Uri}; -use serde_json::{self, Value}; - -pub struct NydusdClient { - sock_path: PathBuf, -} - -impl NydusdClient { - pub fn new(sock: &str) -> Self { - Self { - sock_path: sock.to_string().into(), - } - } - - fn build_uri(&self, path: &str, query: Option>) -> HyperUri { - let mut endpoint = format!("/api/{}", path); - - if let Some(q) = query { - let mut params = String::new(); - for p in q { - params.push_str(&format!("{}={}", p.0, p.1)) - } - - endpoint.push_str(&format!("?{}", params)); - } - - Uri::new(&self.sock_path, endpoint.as_str()).into() - } - - pub async fn get(&self, path: &str) -> Result { - let client = Client::unix(); - let uri = self.build_uri(path, None); - let response = client.get(uri).await?; - let sc = response.status().as_u16(); - let buf = hyper::body::to_bytes(response).await?; - let b = serde_json::from_slice(&buf).map_err(|e| anyhow!("deserialize: {}", e))?; - - if sc >= 400 { - bail!("Request failed. {:?}", b); - } - - Ok(b) - } - - pub async fn put(&self, path: &str, data: Option) -> Result<()> { - let client = Client::unix(); - let uri = self.build_uri(path, None); - let (body, _) = if let Some(d) = data { - let l = d.len(); - (d.into(), l) - } else { - (Body::empty(), 0) - }; - - let req = Request::builder() - .method(Method::PUT) - .header(header::USER_AGENT, "nydusctl") - .uri(uri) - .body(body)?; - let response = client.request(req).await?; - let sc = response.status().as_u16(); - let buf = hyper::body::to_bytes(response).await?; - - if sc >= 400 { - let b: serde_json::Value = - serde_json::from_slice(&buf).map_err(|e| anyhow!("deserialize: {}", e))?; - bail!("Request failed. {:?}", b); - } - - Ok(()) - } - - pub async fn post( - &self, - path: &str, - data: Option, - query: Option>, - ) -> Result<()> { - let client = Client::unix(); - let uri = self.build_uri(path, query); - let (body, _) = if let Some(d) = data { - let l = d.len(); - (d.into(), l) - } else { - (Body::empty(), 0) - }; - - let req = Request::builder() - .method(Method::POST) - .header(header::USER_AGENT, "nydusctl") - .uri(uri) - .body(body)?; - let response = client.request(req).await?; - let sc = response.status().as_u16(); - let buf = hyper::body::to_bytes(response).await?; - - if sc >= 400 { - let b: serde_json::Value = - serde_json::from_slice(&buf).map_err(|e| anyhow!("deserialize: {}", e))?; - bail!("Request failed. {:?}", b); - } - - Ok(()) - } - - pub async fn delete( - &self, - path: &str, - data: Option, - query: Option>, - ) -> Result<()> { - let client = Client::unix(); - let uri = self.build_uri(path, query); - let (body, _) = if let Some(d) = data { - let l = d.len(); - (d.into(), l) - } else { - (Body::empty(), 0) - }; - - let req = Request::builder() - .method(Method::DELETE) - .header(header::USER_AGENT, "nydusctl") - .uri(uri) - .body(body)?; - let response = client.request(req).await?; - let sc = response.status().as_u16(); - let buf = hyper::body::to_bytes(response).await?; - - if sc >= 400 { - let b: serde_json::Value = - serde_json::from_slice(&buf).map_err(|e| anyhow!("deserialize: {}", e))?; - bail!("Request failed. {:?}", b); - } - - Ok(()) - } -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::path::PathBuf; + +use anyhow::Result; +use hyper::{header, Body, Client, Method, Request, Uri as HyperUri}; +use hyperlocal::{UnixClientExt, Uri}; +use serde_json::{self, Value}; + +pub struct NydusdClient { + sock_path: PathBuf, +} + +impl NydusdClient { + pub fn new(sock: &str) -> Self { + Self { + sock_path: sock.to_string().into(), + } + } + + fn build_uri(&self, path: &str, query: Option>) -> HyperUri { + let mut endpoint = format!("/api/{}", path); + + if let Some(q) = query { + let mut params = String::new(); + for p in q { + params.push_str(&format!("{}={}", p.0, p.1)) + } + + endpoint.push_str(&format!("?{}", params)); + } + + Uri::new(&self.sock_path, endpoint.as_str()).into() + } + + pub async fn get(&self, path: &str) -> Result { + let client = Client::unix(); + let uri = self.build_uri(path, None); + let response = client.get(uri).await?; + let sc = response.status().as_u16(); + let buf = hyper::body::to_bytes(response).await?; + let b = serde_json::from_slice(&buf).map_err(|e| anyhow!("deserialize: {}", e))?; + + if sc >= 400 { + bail!("Request failed. {:?}", b); + } + + Ok(b) + } + + pub async fn put(&self, path: &str, data: Option) -> Result<()> { + let client = Client::unix(); + let uri = self.build_uri(path, None); + let (body, _) = if let Some(d) = data { + let l = d.len(); + (d.into(), l) + } else { + (Body::empty(), 0) + }; + + let req = Request::builder() + .method(Method::PUT) + .header(header::USER_AGENT, "nydusctl") + .uri(uri) + .body(body)?; + let response = client.request(req).await?; + let sc = response.status().as_u16(); + let buf = hyper::body::to_bytes(response).await?; + + if sc >= 400 { + let b: serde_json::Value = + serde_json::from_slice(&buf).map_err(|e| anyhow!("deserialize: {}", e))?; + bail!("Request failed. {:?}", b); + } + + Ok(()) + } + + pub async fn post( + &self, + path: &str, + data: Option, + query: Option>, + ) -> Result<()> { + let client = Client::unix(); + let uri = self.build_uri(path, query); + let (body, _) = if let Some(d) = data { + let l = d.len(); + (d.into(), l) + } else { + (Body::empty(), 0) + }; + + let req = Request::builder() + .method(Method::POST) + .header(header::USER_AGENT, "nydusctl") + .uri(uri) + .body(body)?; + let response = client.request(req).await?; + let sc = response.status().as_u16(); + let buf = hyper::body::to_bytes(response).await?; + + if sc >= 400 { + let b: serde_json::Value = + serde_json::from_slice(&buf).map_err(|e| anyhow!("deserialize: {}", e))?; + bail!("Request failed. {:?}", b); + } + + Ok(()) + } + + pub async fn delete( + &self, + path: &str, + data: Option, + query: Option>, + ) -> Result<()> { + let client = Client::unix(); + let uri = self.build_uri(path, query); + let (body, _) = if let Some(d) = data { + let l = d.len(); + (d.into(), l) + } else { + (Body::empty(), 0) + }; + + let req = Request::builder() + .method(Method::DELETE) + .header(header::USER_AGENT, "nydusctl") + .uri(uri) + .body(body)?; + let response = client.request(req).await?; + let sc = response.status().as_u16(); + let buf = hyper::body::to_bytes(response).await?; + + if sc >= 400 { + let b: serde_json::Value = + serde_json::from_slice(&buf).map_err(|e| anyhow!("deserialize: {}", e))?; + bail!("Request failed. {:?}", b); + } + + Ok(()) + } +} diff --git a/src/bin/nydusctl/commands.rs b/src/bin/nydusctl/commands.rs index 73ca54de546..7b82b09a345 100644 --- a/src/bin/nydusctl/commands.rs +++ b/src/bin/nydusctl/commands.rs @@ -1,487 +1,487 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::collections::HashMap; -use std::thread::sleep; -use std::time::Duration; - -use anyhow::Result; -use nydus::{FsBackendDescriptor, FsBackendType}; - -use crate::client::NydusdClient; - -type CommandParams = HashMap; - -fn load_param_interval(params: &Option) -> Result> { - if let Some(p) = params { - if let Some(interval) = p.get("interval") { - interval - .parse() - .map(Some) - .map_err(|e| anyhow!("Invalid interval input. {}", e)) - } else { - Ok(None) - } - } else { - Ok(None) - } -} - -pub(crate) struct CommandCache {} - -macro_rules! items_map( - { $($key:expr => $value:expr),+ } => { - { - let mut m: HashMap = HashMap::new(); - $( - m.insert($key.to_string(), $value.to_string()); - )+ - m - } - }; -); - -lazy_static! { - pub static ref CONFIGURE_ITEMS_MAP: HashMap = - items_map!("log-level" => "log_level"); -} - -impl CommandCache { - pub async fn execute( - &self, - raw: bool, - client: &NydusdClient, - _params: Option, - ) -> Result<()> { - let metrics = client.get("v1/metrics/blobcache").await?; - let m = metrics.as_object().unwrap(); - - let prefetch_duration = m["prefetch_end_time_secs"].as_f64().unwrap() - + m["prefetch_end_time_millis"].as_f64().unwrap() / 1000.0 - - (m["prefetch_begin_time_secs"].as_f64().unwrap() - + m["prefetch_begin_time_millis"].as_f64().unwrap() / 1000.0); - - let prefetch_data_amount = m["prefetch_data_amount"].as_f64().unwrap(); - - if raw { - println!("{}", metrics); - } else { - print!( - r#" -Partial Hits: {partial_hits} -Whole Hits: {whole_hits} -Total Read: {total_read} -Directory: {directory} -Files: {files} -Persister Buffer: {buffered} - -Prefetch Workers: {workers} -Prefetch Amount: {prefetch_amount} = {prefetch_amount_kb} KB -Prefetch Requests: {requests} -Prefetch Average Size: {avg_prefetch_size} Bytes -Prefetch Duration: {prefetch_duration} Seconds -Prefetch Bandwidth: {prefetch_bandwidth} MB/S -Prefetch Request Latency: {prefetch_request_latency} Seconds -Prefetch Unmerged: {unmerged_blocks} -"#, - partial_hits = m["partial_hits"], - whole_hits = m["whole_hits"], - total_read = m["total"], - prefetch_amount = prefetch_data_amount, - prefetch_amount_kb = prefetch_data_amount / 1024.0, - files = m["underlying_files"], - directory = m["store_path"], - requests = m["prefetch_requests_count"], - avg_prefetch_size = m["prefetch_data_amount"] - .as_u64() - .unwrap() - .checked_div(m["prefetch_requests_count"].as_u64().unwrap()) - .unwrap_or_default(), - workers = m["prefetch_workers"], - unmerged_blocks = m["prefetch_unmerged_chunks"], - buffered = m["buffered_backend_size"], - prefetch_duration = prefetch_duration, - prefetch_bandwidth = prefetch_data_amount / 1024.0 / 1024.0 / prefetch_duration, - prefetch_request_latency = m["prefetch_cumulative_time_millis"].as_f64().unwrap() - / m["prefetch_requests_count"].as_f64().unwrap() - / 1000.0 - ); - } - - Ok(()) - } -} - -fn metric_delta(old: &serde_json::Value, new: &serde_json::Value, label: &str) -> u64 { - new[label].as_u64().unwrap() - old[label].as_u64().unwrap() -} - -fn metric_vec_delta(old: &serde_json::Value, new: &serde_json::Value, label: &str) -> Vec { - let new_array = new[label].as_array().unwrap(); - let old_array = old[label].as_array().unwrap(); - assert_eq!(new_array.len(), old_array.len()); - let mut r = Vec::new(); - - for i in 0..new_array.len() { - r.push(new_array[i].as_u64().unwrap() - old_array[i].as_u64().unwrap()); - } - - r -} - -pub(crate) struct CommandBackend {} - -impl CommandBackend { - pub async fn execute( - &self, - raw: bool, - client: &NydusdClient, - params: Option, - ) -> Result<()> { - let metrics = client.get("v1/metrics/backend").await?; - - let interval = load_param_interval(¶ms)?; - if let Some(i) = interval { - let mut last = metrics; - loop { - sleep(Duration::from_secs(i as u64)); - let current = client.get("v1/metrics/backend").await?; - - let delta_data = metric_delta(&last, ¤t, "read_amount_total"); - let delta_requests = metric_delta(&last, ¤t, "read_count"); - let delta_latency = - metric_delta(&last, ¤t, "read_cumulative_latency_millis_total"); - // Block size separated counters. - // 1K; 4K; 16K; 64K, 128K, 512K, 1M - // <=1ms, <=20ms, <=50ms, <=100ms, <=500ms, <=1s, <=2s, >2s - - // TODO: Also add 256k - let latency_cumulative_dist = - metric_vec_delta(&last, ¤t, "read_cumulative_latency_millis_dist"); - let latency_block_hits = - metric_vec_delta(&last, ¤t, "read_count_block_size_dist"); - - let sizes = ["<1K", "1K~", "4K~", "16K~", "64K~", "128K~", "512K~", "1M~"]; - - print!( - r#" ->>> >>> >>> >>> >>> -Backend Read Bandwidth: {} KB/S -Backend Average IO Size: {} Bytes -Backend Average Latency: {} millis - -Block Sizes/millis: -{:<8}{:<8}{:<8}{:<8}{:<8}{:<8}{:<8}{:<8} -"#, - delta_data.checked_div(i as u64 * 1024).unwrap_or_default(), - delta_data.checked_div(delta_requests).unwrap_or_default(), - delta_latency - .checked_div(delta_requests) - .unwrap_or_default(), - sizes[0], - sizes[1], - sizes[2], - sizes[3], - sizes[4], - sizes[5], - sizes[6], - sizes[7] - ); - - for (i, _) in sizes.iter().enumerate() { - print!( - "{:<8}", - latency_cumulative_dist[i] - .checked_div(latency_block_hits[i]) - .unwrap_or_default() - ); - } - - println!(); - println!("<<< <<< <<< <<< <<<"); - - last = current; - } - } - - if raw { - println!("{}", metrics); - } else { - let sizes = ["<1K", "1K~", "4K~", "16K~", "64K~", "128K~", "512K~", "1M~"]; - let m = metrics.as_object().unwrap(); - print!( - r#" -Backend Type: {backend_type} -Read Amount: {read_amount} Bytes ({read_count_mb} MB) -Read Count: {read_count} -Read Errors: {read_errors} -"#, - backend_type = m["backend_type"], - read_amount = m["read_amount_total"], - read_count = m["read_count"], - read_count_mb = m["read_amount_total"].as_f64().unwrap() / 1024.0 / 1024.0, - read_errors = m["read_errors"], - ); - - println!( - r#" -{:<25}{:<8}{:<8}{:<8}{:<8}{:<8}{:<8}{:<8}{:<8}"#, - "Block Sizes:", - sizes[0], - sizes[1], - sizes[2], - sizes[3], - sizes[4], - sizes[5], - sizes[6], - sizes[7], - ); - - let latency_cumulative_dist = - m["read_cumulative_latency_millis_dist"].as_array().unwrap(); - let latency_block_hits = m["read_count_block_size_dist"].as_array().unwrap(); - - print!("{:<25}", "Average Latency(millis):"); - - for (i, _) in sizes.iter().enumerate() { - print!( - "{:<8}", - latency_cumulative_dist[i] - .as_u64() - .unwrap() - .checked_div(latency_block_hits[i].as_u64().unwrap()) - .unwrap_or_default() - ); - } - - println!(); - - println!( - r#" -{:<25}{:<8}{:<8}{:<8}{:<8}{:<8}{:<8}{:<8}{:<8}"#, - "Block Sizes:", - sizes[0], - sizes[1], - sizes[2], - sizes[3], - sizes[4], - sizes[5], - sizes[6], - sizes[7] - ); - - print!("{:<25}", "Request Count:"); - - for (i, _) in sizes.iter().enumerate() { - print!("{:<8}", latency_block_hits[i].as_u64().unwrap()); - } - - println!(); - } - - Ok(()) - } -} - -pub(crate) struct CommandFsStats {} - -impl CommandFsStats { - pub async fn execute( - &self, - raw: bool, - client: &NydusdClient, - _params: Option, - ) -> Result<()> { - let metrics = client.get("v1/metrics").await?; - let m = metrics.as_object().unwrap(); - let fop_counter = m["fop_hits"].as_array().unwrap(); - let fop_errors = m["fop_errors"].as_array().unwrap(); - if raw { - println!("{}", metrics); - } else { - let periods = [ - "<1ms", "~20ms", "~50ms", "~100ms", "~500ms", "~1s", "~2s", "2s~", - ]; - let latency_dist = m["read_latency_dist"].as_array().unwrap(); - println!( - r#" -{:<16}{:<8}{:<8}{:<8}{:<8}{:<8}{:<8}{:<8}{:<8}"#, - "Read Latency:", - periods[0], - periods[1], - periods[2], - periods[3], - periods[4], - periods[5], - periods[6], - periods[7], - ); - - print!("{:<16}", "Reads Count:"); - for d in latency_dist { - print!("{:<8}", d.as_u64().unwrap()); - } - - println!("\n"); - - println!("Read Errors: {}", fop_errors[4]); - let data_read = m["data_read"].as_u64().unwrap(); - println!( - "Read Data: {}Bytes ({}MB)", - data_read, - data_read / 1024 / 1024 - ); - - print!( - r#" -FOP Counters: -Getattr({}) Readlink({}) Open({}) Release({}) Read({}) Statfs({}) Getxattr({}) Listxattr({}) -Opendir({}) Lookup({}) Readdir({}) Readdirplus({}) Access({}) Forget({}) BatchForget({}) -"#, - fop_counter[0], - fop_counter[1], - fop_counter[2], - fop_counter[3], - fop_counter[4], - fop_counter[5], - fop_counter[6], - fop_counter[7], - fop_counter[8], - fop_counter[9], - fop_counter[10], - fop_counter[11], - fop_counter[12], - fop_counter[13], - fop_counter[14], - ); - } - - Ok(()) - } -} - -pub(crate) struct CommandDaemon {} - -impl CommandDaemon { - pub async fn execute( - &self, - raw: bool, - client: &NydusdClient, - params: Option, - ) -> Result<()> { - if let Some(p) = params { - let mut real = HashMap::::new(); - - // Map user provided configured item key to the one nydusd accepts. - for (k, v) in p.into_iter() { - real.insert( - CONFIGURE_ITEMS_MAP - .get(&k) - .ok_or_else(|| anyhow!("illegal item input"))? - .clone(), - v, - ); - } - - let data = serde_json::to_string(&real)?; - client.put("v1/daemon", Some(data)).await?; - } else { - let info = client.get("v1/daemon").await?; - let i = info.as_object().unwrap(); - - if raw { - println!("{}", info); - } else { - let version_info = &i["version"]; - print!( - r#" -Version: {version} -Status: {state} -Profile: {profile} -Commit: {git_commit} -"#, - version = version_info["package_ver"], - state = i["state"], - profile = version_info["profile"], - git_commit = version_info["git_commit"], - ); - - if let Some(b) = i.get("backend_collection") { - if let Some(fs_backends) = b.as_object() { - if !fs_backends.is_empty() { - println!("Instances:") - } - - for (mount_point, backend_obj) in fs_backends { - let backend: FsBackendDescriptor = - serde_json::from_value(backend_obj.clone()).unwrap(); - println!("\tInstance Mountpoint: {}", mount_point); - println!("\tType: {}", backend.backend_type); - println!("\tMounted Time: {}", backend.mounted_time); - match backend.backend_type { - FsBackendType::PassthroughFs => {} - FsBackendType::Rafs => { - let cfg = backend.config.unwrap(); - let cache_cfg = cfg.get_cache_config()?; - let rafs_cfg = cfg.get_rafs_config()?; - println!("\tMode: {}", rafs_cfg.mode); - println!("\tPrefetch: {}", cache_cfg.prefetch.enable); - println!( - "\tPrefetch Merging Size: {}", - cache_cfg.prefetch.batch_size - ); - println!(); - } - } - } - } - } - } - } - - Ok(()) - } -} - -pub(crate) struct CommandMount {} - -impl CommandMount { - pub async fn execute( - &self, - _raw: bool, - client: &NydusdClient, - params: Option, - ) -> Result<()> { - let p = params.unwrap(); - let (source, mountpoint, fs_type) = (&p["source"], &p["mountpoint"], &p["type"]); - let config = std::fs::read_to_string(&p["config"]).unwrap(); - let cmd = json!({"source": source, "fs_type": fs_type, "config": config}).to_string(); - - client - .post( - "v1/mount", - Some(cmd), - Some(vec![("mountpoint", mountpoint)]), - ) - .await - } -} - -pub(crate) struct CommandUmount {} - -impl CommandUmount { - pub async fn execute( - &self, - _raw: bool, - client: &NydusdClient, - params: Option, - ) -> Result<()> { - let p = params.unwrap(); - let mountpoint = &p["mountpoint"]; - - client - .delete("v1/mount", None, Some(vec![("mountpoint", mountpoint)])) - .await - } -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::collections::HashMap; +use std::thread::sleep; +use std::time::Duration; + +use anyhow::Result; +use nydus::{FsBackendDescriptor, FsBackendType}; + +use crate::client::NydusdClient; + +type CommandParams = HashMap; + +fn load_param_interval(params: &Option) -> Result> { + if let Some(p) = params { + if let Some(interval) = p.get("interval") { + interval + .parse() + .map(Some) + .map_err(|e| anyhow!("Invalid interval input. {}", e)) + } else { + Ok(None) + } + } else { + Ok(None) + } +} + +pub(crate) struct CommandCache {} + +macro_rules! items_map( + { $($key:expr => $value:expr),+ } => { + { + let mut m: HashMap = HashMap::new(); + $( + m.insert($key.to_string(), $value.to_string()); + )+ + m + } + }; +); + +lazy_static! { + pub static ref CONFIGURE_ITEMS_MAP: HashMap = + items_map!("log-level" => "log_level"); +} + +impl CommandCache { + pub async fn execute( + &self, + raw: bool, + client: &NydusdClient, + _params: Option, + ) -> Result<()> { + let metrics = client.get("v1/metrics/blobcache").await?; + let m = metrics.as_object().unwrap(); + + let prefetch_duration = m["prefetch_end_time_secs"].as_f64().unwrap() + + m["prefetch_end_time_millis"].as_f64().unwrap() / 1000.0 + - (m["prefetch_begin_time_secs"].as_f64().unwrap() + + m["prefetch_begin_time_millis"].as_f64().unwrap() / 1000.0); + + let prefetch_data_amount = m["prefetch_data_amount"].as_f64().unwrap(); + + if raw { + println!("{}", metrics); + } else { + print!( + r#" +Partial Hits: {partial_hits} +Whole Hits: {whole_hits} +Total Read: {total_read} +Directory: {directory} +Files: {files} +Persister Buffer: {buffered} + +Prefetch Workers: {workers} +Prefetch Amount: {prefetch_amount} = {prefetch_amount_kb} KB +Prefetch Requests: {requests} +Prefetch Average Size: {avg_prefetch_size} Bytes +Prefetch Duration: {prefetch_duration} Seconds +Prefetch Bandwidth: {prefetch_bandwidth} MB/S +Prefetch Request Latency: {prefetch_request_latency} Seconds +Prefetch Unmerged: {unmerged_blocks} +"#, + partial_hits = m["partial_hits"], + whole_hits = m["whole_hits"], + total_read = m["total"], + prefetch_amount = prefetch_data_amount, + prefetch_amount_kb = prefetch_data_amount / 1024.0, + files = m["underlying_files"], + directory = m["store_path"], + requests = m["prefetch_requests_count"], + avg_prefetch_size = m["prefetch_data_amount"] + .as_u64() + .unwrap() + .checked_div(m["prefetch_requests_count"].as_u64().unwrap()) + .unwrap_or_default(), + workers = m["prefetch_workers"], + unmerged_blocks = m["prefetch_unmerged_chunks"], + buffered = m["buffered_backend_size"], + prefetch_duration = prefetch_duration, + prefetch_bandwidth = prefetch_data_amount / 1024.0 / 1024.0 / prefetch_duration, + prefetch_request_latency = m["prefetch_cumulative_time_millis"].as_f64().unwrap() + / m["prefetch_requests_count"].as_f64().unwrap() + / 1000.0 + ); + } + + Ok(()) + } +} + +fn metric_delta(old: &serde_json::Value, new: &serde_json::Value, label: &str) -> u64 { + new[label].as_u64().unwrap() - old[label].as_u64().unwrap() +} + +fn metric_vec_delta(old: &serde_json::Value, new: &serde_json::Value, label: &str) -> Vec { + let new_array = new[label].as_array().unwrap(); + let old_array = old[label].as_array().unwrap(); + assert_eq!(new_array.len(), old_array.len()); + let mut r = Vec::new(); + + for i in 0..new_array.len() { + r.push(new_array[i].as_u64().unwrap() - old_array[i].as_u64().unwrap()); + } + + r +} + +pub(crate) struct CommandBackend {} + +impl CommandBackend { + pub async fn execute( + &self, + raw: bool, + client: &NydusdClient, + params: Option, + ) -> Result<()> { + let metrics = client.get("v1/metrics/backend").await?; + + let interval = load_param_interval(¶ms)?; + if let Some(i) = interval { + let mut last = metrics; + loop { + sleep(Duration::from_secs(i as u64)); + let current = client.get("v1/metrics/backend").await?; + + let delta_data = metric_delta(&last, ¤t, "read_amount_total"); + let delta_requests = metric_delta(&last, ¤t, "read_count"); + let delta_latency = + metric_delta(&last, ¤t, "read_cumulative_latency_millis_total"); + // Block size separated counters. + // 1K; 4K; 16K; 64K, 128K, 512K, 1M + // <=1ms, <=20ms, <=50ms, <=100ms, <=500ms, <=1s, <=2s, >2s + + // TODO: Also add 256k + let latency_cumulative_dist = + metric_vec_delta(&last, ¤t, "read_cumulative_latency_millis_dist"); + let latency_block_hits = + metric_vec_delta(&last, ¤t, "read_count_block_size_dist"); + + let sizes = ["<1K", "1K~", "4K~", "16K~", "64K~", "128K~", "512K~", "1M~"]; + + print!( + r#" +>>> >>> >>> >>> >>> +Backend Read Bandwidth: {} KB/S +Backend Average IO Size: {} Bytes +Backend Average Latency: {} millis + +Block Sizes/millis: +{:<8}{:<8}{:<8}{:<8}{:<8}{:<8}{:<8}{:<8} +"#, + delta_data.checked_div(i as u64 * 1024).unwrap_or_default(), + delta_data.checked_div(delta_requests).unwrap_or_default(), + delta_latency + .checked_div(delta_requests) + .unwrap_or_default(), + sizes[0], + sizes[1], + sizes[2], + sizes[3], + sizes[4], + sizes[5], + sizes[6], + sizes[7] + ); + + for (i, _) in sizes.iter().enumerate() { + print!( + "{:<8}", + latency_cumulative_dist[i] + .checked_div(latency_block_hits[i]) + .unwrap_or_default() + ); + } + + println!(); + println!("<<< <<< <<< <<< <<<"); + + last = current; + } + } + + if raw { + println!("{}", metrics); + } else { + let sizes = ["<1K", "1K~", "4K~", "16K~", "64K~", "128K~", "512K~", "1M~"]; + let m = metrics.as_object().unwrap(); + print!( + r#" +Backend Type: {backend_type} +Read Amount: {read_amount} Bytes ({read_count_mb} MB) +Read Count: {read_count} +Read Errors: {read_errors} +"#, + backend_type = m["backend_type"], + read_amount = m["read_amount_total"], + read_count = m["read_count"], + read_count_mb = m["read_amount_total"].as_f64().unwrap() / 1024.0 / 1024.0, + read_errors = m["read_errors"], + ); + + println!( + r#" +{:<25}{:<8}{:<8}{:<8}{:<8}{:<8}{:<8}{:<8}{:<8}"#, + "Block Sizes:", + sizes[0], + sizes[1], + sizes[2], + sizes[3], + sizes[4], + sizes[5], + sizes[6], + sizes[7], + ); + + let latency_cumulative_dist = + m["read_cumulative_latency_millis_dist"].as_array().unwrap(); + let latency_block_hits = m["read_count_block_size_dist"].as_array().unwrap(); + + print!("{:<25}", "Average Latency(millis):"); + + for (i, _) in sizes.iter().enumerate() { + print!( + "{:<8}", + latency_cumulative_dist[i] + .as_u64() + .unwrap() + .checked_div(latency_block_hits[i].as_u64().unwrap()) + .unwrap_or_default() + ); + } + + println!(); + + println!( + r#" +{:<25}{:<8}{:<8}{:<8}{:<8}{:<8}{:<8}{:<8}{:<8}"#, + "Block Sizes:", + sizes[0], + sizes[1], + sizes[2], + sizes[3], + sizes[4], + sizes[5], + sizes[6], + sizes[7] + ); + + print!("{:<25}", "Request Count:"); + + for (i, _) in sizes.iter().enumerate() { + print!("{:<8}", latency_block_hits[i].as_u64().unwrap()); + } + + println!(); + } + + Ok(()) + } +} + +pub(crate) struct CommandFsStats {} + +impl CommandFsStats { + pub async fn execute( + &self, + raw: bool, + client: &NydusdClient, + _params: Option, + ) -> Result<()> { + let metrics = client.get("v1/metrics").await?; + let m = metrics.as_object().unwrap(); + let fop_counter = m["fop_hits"].as_array().unwrap(); + let fop_errors = m["fop_errors"].as_array().unwrap(); + if raw { + println!("{}", metrics); + } else { + let periods = [ + "<1ms", "~20ms", "~50ms", "~100ms", "~500ms", "~1s", "~2s", "2s~", + ]; + let latency_dist = m["read_latency_dist"].as_array().unwrap(); + println!( + r#" +{:<16}{:<8}{:<8}{:<8}{:<8}{:<8}{:<8}{:<8}{:<8}"#, + "Read Latency:", + periods[0], + periods[1], + periods[2], + periods[3], + periods[4], + periods[5], + periods[6], + periods[7], + ); + + print!("{:<16}", "Reads Count:"); + for d in latency_dist { + print!("{:<8}", d.as_u64().unwrap()); + } + + println!("\n"); + + println!("Read Errors: {}", fop_errors[4]); + let data_read = m["data_read"].as_u64().unwrap(); + println!( + "Read Data: {}Bytes ({}MB)", + data_read, + data_read / 1024 / 1024 + ); + + print!( + r#" +FOP Counters: +Getattr({}) Readlink({}) Open({}) Release({}) Read({}) Statfs({}) Getxattr({}) Listxattr({}) +Opendir({}) Lookup({}) Readdir({}) Readdirplus({}) Access({}) Forget({}) BatchForget({}) +"#, + fop_counter[0], + fop_counter[1], + fop_counter[2], + fop_counter[3], + fop_counter[4], + fop_counter[5], + fop_counter[6], + fop_counter[7], + fop_counter[8], + fop_counter[9], + fop_counter[10], + fop_counter[11], + fop_counter[12], + fop_counter[13], + fop_counter[14], + ); + } + + Ok(()) + } +} + +pub(crate) struct CommandDaemon {} + +impl CommandDaemon { + pub async fn execute( + &self, + raw: bool, + client: &NydusdClient, + params: Option, + ) -> Result<()> { + if let Some(p) = params { + let mut real = HashMap::::new(); + + // Map user provided configured item key to the one nydusd accepts. + for (k, v) in p.into_iter() { + real.insert( + CONFIGURE_ITEMS_MAP + .get(&k) + .ok_or_else(|| anyhow!("illegal item input"))? + .clone(), + v, + ); + } + + let data = serde_json::to_string(&real)?; + client.put("v1/daemon", Some(data)).await?; + } else { + let info = client.get("v1/daemon").await?; + let i = info.as_object().unwrap(); + + if raw { + println!("{}", info); + } else { + let version_info = &i["version"]; + print!( + r#" +Version: {version} +Status: {state} +Profile: {profile} +Commit: {git_commit} +"#, + version = version_info["package_ver"], + state = i["state"], + profile = version_info["profile"], + git_commit = version_info["git_commit"], + ); + + if let Some(b) = i.get("backend_collection") { + if let Some(fs_backends) = b.as_object() { + if !fs_backends.is_empty() { + println!("Instances:") + } + + for (mount_point, backend_obj) in fs_backends { + let backend: FsBackendDescriptor = + serde_json::from_value(backend_obj.clone()).unwrap(); + println!("\tInstance Mountpoint: {}", mount_point); + println!("\tType: {}", backend.backend_type); + println!("\tMounted Time: {}", backend.mounted_time); + match backend.backend_type { + FsBackendType::PassthroughFs => {} + FsBackendType::Rafs => { + let cfg = backend.config.unwrap(); + let cache_cfg = cfg.get_cache_config()?; + let rafs_cfg = cfg.get_rafs_config()?; + println!("\tMode: {}", rafs_cfg.mode); + println!("\tPrefetch: {}", cache_cfg.prefetch.enable); + println!( + "\tPrefetch Merging Size: {}", + cache_cfg.prefetch.batch_size + ); + println!(); + } + } + } + } + } + } + } + + Ok(()) + } +} + +pub(crate) struct CommandMount {} + +impl CommandMount { + pub async fn execute( + &self, + _raw: bool, + client: &NydusdClient, + params: Option, + ) -> Result<()> { + let p = params.unwrap(); + let (source, mountpoint, fs_type) = (&p["source"], &p["mountpoint"], &p["type"]); + let config = std::fs::read_to_string(&p["config"]).unwrap(); + let cmd = json!({"source": source, "fs_type": fs_type, "config": config}).to_string(); + + client + .post( + "v1/mount", + Some(cmd), + Some(vec![("mountpoint", mountpoint)]), + ) + .await + } +} + +pub(crate) struct CommandUmount {} + +impl CommandUmount { + pub async fn execute( + &self, + _raw: bool, + client: &NydusdClient, + params: Option, + ) -> Result<()> { + let p = params.unwrap(); + let mountpoint = &p["mountpoint"]; + + client + .delete("v1/mount", None, Some(vec![("mountpoint", mountpoint)])) + .await + } +} diff --git a/src/bin/nydusctl/main.rs b/src/bin/nydusctl/main.rs index 0d0741fc631..a76939b98be 100644 --- a/src/bin/nydusctl/main.rs +++ b/src/bin/nydusctl/main.rs @@ -1,221 +1,221 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -#![deny(warnings)] - -#[macro_use(crate_authors)] -extern crate clap; -#[macro_use] -extern crate anyhow; -#[macro_use] -extern crate lazy_static; -#[macro_use] -extern crate serde_json; -extern crate nydus_rafs as rafs; - -use std::collections::HashMap; - -use anyhow::Result; -use clap::{Arg, ArgAction, Command}; - -mod client; -mod commands; - -use commands::{ - CommandBackend, CommandCache, CommandDaemon, CommandFsStats, CommandMount, CommandUmount, -}; -use nydus::get_build_time_info; -use nydus_api::BuildTimeInfo; - -lazy_static! { - static ref BTI: BuildTimeInfo = get_build_time_info().1; -} - -#[tokio::main] -async fn main() -> Result<()> { - let app = Command::new("A client to query and configure the nydusd daemon\n") - .version(BTI.package_ver.as_str()) - .author(crate_authors!()) - .arg( - Arg::new("sock") - .help("Sets file path for the nydusd API socket") - .short('S') - .long("sock") - .required(true) - .global(false), - ) - .arg( - Arg::new("raw") - .help("Outputs messages in plain json mode") - .short('r') - .long("raw") - .action(ArgAction::SetTrue) - .global(true), - ) - .subcommand(Command::new("info").about("Gets information about the nydusd daemon")) - .subcommand( - Command::new("set") - .about("Configures parameters for the nydusd daemon") - .override_help( - r#"Configurable parameters: - : - log-level: trace, debug, info, warn, error"#, - ) - .arg( - Arg::new("KIND") - .help("the parameter to configure") - .required(true) - .value_parser(["log-level"]) - .index(1), - ) - .arg( - Arg::new("VALUE") - .help("the configuration value") - .required(true) - .index(2), - ), - ) - .subcommand( - Command::new("metrics") - .about("Gets runtime metrics about backend, cache and filesystems") - .arg( - Arg::new("category") - .help("the metrics category to fetch") - .required(true) - .value_parser(["backend", "cache", "fsstats"]) - .index(1), - ) - .arg( - Arg::new("interval") - .help("interval to refresh the metrics") - .short('I') - .long("interval") - .required(false), - ), - ) - .subcommand( - Command::new("mount") - .about("Mounts a new filesystem instance") - .arg( - Arg::new("source") - .help("Storage backend for the filesystem instance") - .short('s') - .long("source") - .required(true), - ) - .arg( - Arg::new("config") - .help("Configuration file for the new filesystem instance") - .short('c') - .long("config") - .required(true), - ) - .arg( - Arg::new("mountpoint") - .help("Mountpoint for the new filesystem instance") - .short('m') - .long("mountpoint") - .required(true), - ) - .arg( - Arg::new("type") - .help("Type of the new filesystem instance") - .short('t') - .long("type") - .required(true) - .value_parser(["rafs", "passthrough_fs"]), - ), - ) - .subcommand( - Command::new("umount") - .about("Umounts a filesystem instance") - .arg( - Arg::new("mountpoint") - .help("Mountpoint of the filesystem instance") - .short('m') - .required(true), - ), - ); - - let cmd = app.get_matches(); - - // Safe to unwrap because it is required by Clap - let sock = cmd.get_one::("sock").map(|s| s.as_str()).unwrap(); - let raw = cmd.get_flag("raw"); - let client = client::NydusdClient::new(sock); - - if let Some(_matches) = cmd.subcommand_matches("info") { - let cmd = CommandDaemon {}; - cmd.execute(raw, &client, None).await?; - } else if let Some(matches) = cmd.subcommand_matches("set") { - // Safe to unwrap since the below two arguments are required by clap. - let kind = matches.get_one::("KIND").unwrap().to_owned(); - let value = matches.get_one::("VALUE").unwrap().to_owned(); - let mut items = HashMap::new(); - items.insert(kind, value); - - let cmd = CommandDaemon {}; - cmd.execute(raw, &client, Some(items)).await?; - } else if let Some(matches) = cmd.subcommand_matches("metrics") { - // Safe to unwrap as it is required by clap - let category = matches - .get_one::("category") - .map(|s| s.as_str()) - .unwrap(); - let mut context = HashMap::new(); - matches - .get_one::("interval") - .map(|i| context.insert("interval".to_string(), i.to_string())); - - match category { - "backend" => { - let cmd = CommandBackend {}; - cmd.execute(raw, &client, Some(context)).await? - } - "cache" => { - let cmd = CommandCache {}; - cmd.execute(raw, &client, None).await? - } - "fsstats" => { - let cmd = CommandFsStats {}; - cmd.execute(raw, &client, None).await? - } - _ => println!("Illegal category"), - } - } else if let Some(matches) = cmd.subcommand_matches("mount") { - // Safe to unwrap as it is required by clap - let mut context = HashMap::new(); - context.insert( - "source".to_string(), - matches.get_one::("source").unwrap().to_string(), - ); - context.insert( - "mountpoint".to_string(), - matches.get_one::("mountpoint").unwrap().to_string(), - ); - context.insert( - "config".to_string(), - matches.get_one::("config").unwrap().to_string(), - ); - context.insert( - "type".to_string(), - matches.get_one::("type").unwrap().to_string(), - ); - - let cmd = CommandMount {}; - cmd.execute(raw, &client, Some(context)).await? - } else if let Some(matches) = cmd.subcommand_matches("umount") { - // Safe to unwrap as it is required by clap - let mut context = HashMap::new(); - context.insert( - "mountpoint".to_string(), - matches.get_one::("mountpoint").unwrap().to_string(), - ); - - let cmd = CommandUmount {}; - cmd.execute(raw, &client, Some(context)).await? - } - - Ok(()) -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +#![deny(warnings)] + +#[macro_use(crate_authors)] +extern crate clap; +#[macro_use] +extern crate anyhow; +#[macro_use] +extern crate lazy_static; +#[macro_use] +extern crate serde_json; +extern crate nydus_rafs as rafs; + +use std::collections::HashMap; + +use anyhow::Result; +use clap::{Arg, ArgAction, Command}; + +mod client; +mod commands; + +use commands::{ + CommandBackend, CommandCache, CommandDaemon, CommandFsStats, CommandMount, CommandUmount, +}; +use nydus::get_build_time_info; +use nydus_api::BuildTimeInfo; + +lazy_static! { + static ref BTI: BuildTimeInfo = get_build_time_info().1; +} + +#[tokio::main] +async fn main() -> Result<()> { + let app = Command::new("A client to query and configure the nydusd daemon\n") + .version(BTI.package_ver.as_str()) + .author(crate_authors!()) + .arg( + Arg::new("sock") + .help("Sets file path for the nydusd API socket") + .short('S') + .long("sock") + .required(true) + .global(false), + ) + .arg( + Arg::new("raw") + .help("Outputs messages in plain json mode") + .short('r') + .long("raw") + .action(ArgAction::SetTrue) + .global(true), + ) + .subcommand(Command::new("info").about("Gets information about the nydusd daemon")) + .subcommand( + Command::new("set") + .about("Configures parameters for the nydusd daemon") + .override_help( + r#"Configurable parameters: + : + log-level: trace, debug, info, warn, error"#, + ) + .arg( + Arg::new("KIND") + .help("the parameter to configure") + .required(true) + .value_parser(["log-level"]) + .index(1), + ) + .arg( + Arg::new("VALUE") + .help("the configuration value") + .required(true) + .index(2), + ), + ) + .subcommand( + Command::new("metrics") + .about("Gets runtime metrics about backend, cache and filesystems") + .arg( + Arg::new("category") + .help("the metrics category to fetch") + .required(true) + .value_parser(["backend", "cache", "fsstats"]) + .index(1), + ) + .arg( + Arg::new("interval") + .help("interval to refresh the metrics") + .short('I') + .long("interval") + .required(false), + ), + ) + .subcommand( + Command::new("mount") + .about("Mounts a new filesystem instance") + .arg( + Arg::new("source") + .help("Storage backend for the filesystem instance") + .short('s') + .long("source") + .required(true), + ) + .arg( + Arg::new("config") + .help("Configuration file for the new filesystem instance") + .short('c') + .long("config") + .required(true), + ) + .arg( + Arg::new("mountpoint") + .help("Mountpoint for the new filesystem instance") + .short('m') + .long("mountpoint") + .required(true), + ) + .arg( + Arg::new("type") + .help("Type of the new filesystem instance") + .short('t') + .long("type") + .required(true) + .value_parser(["rafs", "passthrough_fs"]), + ), + ) + .subcommand( + Command::new("umount") + .about("Umounts a filesystem instance") + .arg( + Arg::new("mountpoint") + .help("Mountpoint of the filesystem instance") + .short('m') + .required(true), + ), + ); + + let cmd = app.get_matches(); + + // Safe to unwrap because it is required by Clap + let sock = cmd.get_one::("sock").map(|s| s.as_str()).unwrap(); + let raw = cmd.get_flag("raw"); + let client = client::NydusdClient::new(sock); + + if let Some(_matches) = cmd.subcommand_matches("info") { + let cmd = CommandDaemon {}; + cmd.execute(raw, &client, None).await?; + } else if let Some(matches) = cmd.subcommand_matches("set") { + // Safe to unwrap since the below two arguments are required by clap. + let kind = matches.get_one::("KIND").unwrap().to_owned(); + let value = matches.get_one::("VALUE").unwrap().to_owned(); + let mut items = HashMap::new(); + items.insert(kind, value); + + let cmd = CommandDaemon {}; + cmd.execute(raw, &client, Some(items)).await?; + } else if let Some(matches) = cmd.subcommand_matches("metrics") { + // Safe to unwrap as it is required by clap + let category = matches + .get_one::("category") + .map(|s| s.as_str()) + .unwrap(); + let mut context = HashMap::new(); + matches + .get_one::("interval") + .map(|i| context.insert("interval".to_string(), i.to_string())); + + match category { + "backend" => { + let cmd = CommandBackend {}; + cmd.execute(raw, &client, Some(context)).await? + } + "cache" => { + let cmd = CommandCache {}; + cmd.execute(raw, &client, None).await? + } + "fsstats" => { + let cmd = CommandFsStats {}; + cmd.execute(raw, &client, None).await? + } + _ => println!("Illegal category"), + } + } else if let Some(matches) = cmd.subcommand_matches("mount") { + // Safe to unwrap as it is required by clap + let mut context = HashMap::new(); + context.insert( + "source".to_string(), + matches.get_one::("source").unwrap().to_string(), + ); + context.insert( + "mountpoint".to_string(), + matches.get_one::("mountpoint").unwrap().to_string(), + ); + context.insert( + "config".to_string(), + matches.get_one::("config").unwrap().to_string(), + ); + context.insert( + "type".to_string(), + matches.get_one::("type").unwrap().to_string(), + ); + + let cmd = CommandMount {}; + cmd.execute(raw, &client, Some(context)).await? + } else if let Some(matches) = cmd.subcommand_matches("umount") { + // Safe to unwrap as it is required by clap + let mut context = HashMap::new(); + context.insert( + "mountpoint".to_string(), + matches.get_one::("mountpoint").unwrap().to_string(), + ); + + let cmd = CommandUmount {}; + cmd.execute(raw, &client, Some(context)).await? + } + + Ok(()) +} diff --git a/src/bin/nydusd/api_server_glue.rs b/src/bin/nydusd/api_server_glue.rs index bd1b6510dfa..165446d2d34 100644 --- a/src/bin/nydusd/api_server_glue.rs +++ b/src/bin/nydusd/api_server_glue.rs @@ -1,440 +1,440 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2020-2022 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: (Apache-2.0 AND BSD-3-Clause) - -use std::io::Result; -use std::str::FromStr; -use std::sync::mpsc::{channel, Receiver, Sender}; -use std::sync::Arc; -use std::thread::JoinHandle; - -use mio::Waker; -use nix::sys::signal::{kill, SIGTERM}; -use nix::unistd::Pid; - -use nydus::daemon::NydusDaemon; -use nydus::{FsBackendMountCmd, FsBackendType, FsBackendUmountCmd, FsService}; -use nydus_api::{ - start_http_thread, ApiError, ApiMountCmd, ApiRequest, ApiResponse, ApiResponsePayload, - ApiResult, BlobCacheEntry, BlobCacheObjectId, DaemonConf, DaemonErrorKind, MetricsErrorKind, -}; -use nydus_utils::metrics; - -use crate::DAEMON_CONTROLLER; - -struct ApiServer { - to_http: Sender, -} - -impl ApiServer { - fn new(to_http: Sender) -> Result { - Ok(ApiServer { to_http }) - } - - fn process_request(&self, request: ApiRequest) -> Result<()> { - let resp = match request { - // Common (v1/v2) - ApiRequest::ConfigureDaemon(conf) => self.configure_daemon(conf), - ApiRequest::GetDaemonInfo => self.daemon_info(true), - ApiRequest::GetEvents => Self::events(), - ApiRequest::Exit => self.do_exit(), - ApiRequest::Start => self.do_start(), - ApiRequest::SendFuseFd => self.send_fuse_fd(), - ApiRequest::TakeoverFuseFd => self.do_takeover(), - ApiRequest::Mount(mountpoint, info) => self.do_mount(mountpoint, info), - ApiRequest::Remount(mountpoint, info) => self.do_remount(mountpoint, info), - ApiRequest::Umount(mountpoint) => self.do_umount(mountpoint), - ApiRequest::ExportBackendMetrics(id) => Self::export_backend_metrics(id), - ApiRequest::ExportBlobcacheMetrics(id) => Self::export_blobcache_metrics(id), - - // Nydus API v1 - ApiRequest::ExportFsGlobalMetrics(id) => Self::export_global_metrics(id), - ApiRequest::ExportFsFilesMetrics(id, latest_read_files) => { - Self::export_files_metrics(id, latest_read_files) - } - ApiRequest::ExportFsAccessPatterns(id) => Self::export_access_patterns(id), - ApiRequest::ExportFsBackendInfo(mountpoint) => self.backend_info(&mountpoint), - ApiRequest::ExportFsInflightMetrics => self.export_inflight_metrics(), - - // Nydus API v2 - ApiRequest::GetDaemonInfoV2 => self.daemon_info(false), - ApiRequest::GetBlobObject(_param) => todo!(), - ApiRequest::CreateBlobObject(entry) => self.create_blob_cache_entry(&entry), - ApiRequest::DeleteBlobObject(param) => self.remove_blob_cache_entry(¶m), - ApiRequest::DeleteBlobFile(blob_id) => self.blob_cache_gc(blob_id), - }; - - self.respond(resp); - - Ok(()) - } - - fn respond(&self, resp: ApiResult) { - if let Err(e) = self.to_http.send(resp) { - error!("send API response failed {}", e); - } - } - - fn configure_daemon(&self, conf: DaemonConf) -> ApiResponse { - conf.log_level - .parse::() - .map_err(|e| { - error!("Invalid log level passed, {}", e); - ApiError::ResponsePayloadType - }) - .map(|v| { - log::set_max_level(v); - ApiResponsePayload::Empty - }) - } - - fn daemon_info(&self, include_fs_info: bool) -> ApiResponse { - self.get_daemon_object()? - .export_info(include_fs_info) - .map_err(|e| ApiError::Metrics(MetricsErrorKind::Daemon(e.into()))) - .map(ApiResponsePayload::DaemonInfo) - } - - /// External supervisor wants this instance to exit. But it can't just die leave - /// some pending or in-flight fuse messages un-handled. So this method guarantees - /// all fuse messages read from kernel are handled and replies are sent back. - /// Before http response are sent back, this must can ensure that current process - /// has absolutely stopped. Otherwise, multiple processes might read from single - /// fuse session simultaneously. - fn do_exit(&self) -> ApiResponse { - let d = self.get_daemon_object()?; - d.trigger_exit() - .map(|_| { - info!("exit daemon by http request"); - ApiResponsePayload::Empty - }) - .map_err(|e| ApiError::DaemonAbnormal(e.into()))?; - - // Should be reliable since this Api server works under event manager. - kill(Pid::this(), SIGTERM).unwrap_or_else(|e| error!("Send signal error. {}", e)); - - Ok(ApiResponsePayload::Empty) - } - - /// External supervisor wants this instance to fetch `/dev/fuse` fd. Before - /// invoking this method, supervisor should already listen on a Unix socket and - /// waits for connection from this instance. Then supervisor should send the *fd* - /// back. Note, the http response does not mean this process already finishes Takeover - /// procedure. Supervisor has to continuously query the state of Nydusd until it gets - /// to *RUNNING*, which means new Nydusd has successfully served as a fuse server. - fn do_takeover(&self) -> ApiResponse { - let d = self.get_daemon_object()?; - d.trigger_takeover() - .map(|_| ApiResponsePayload::Empty) - .map_err(|e| ApiError::DaemonAbnormal(e.into())) - } - - fn events() -> ApiResponse { - let events = metrics::export_events().map_err(|e| ApiError::Events(format!("{:?}", e)))?; - Ok(ApiResponsePayload::Events(events)) - } - - fn export_global_metrics(id: Option) -> ApiResponse { - metrics::export_global_stats(&id) - .map(ApiResponsePayload::FsGlobalMetrics) - .map_err(|e| ApiError::Metrics(MetricsErrorKind::Stats(e))) - } - - fn export_files_metrics(id: Option, latest_read_files: bool) -> ApiResponse { - // TODO: Use mount point name to refer to per rafs metrics. - metrics::export_files_stats(&id, latest_read_files) - .map(ApiResponsePayload::FsFilesMetrics) - .map_err(|e| ApiError::Metrics(MetricsErrorKind::Stats(e))) - } - - fn export_access_patterns(id: Option) -> ApiResponse { - metrics::export_files_access_pattern(&id) - .map(ApiResponsePayload::FsFilesPatterns) - .map_err(|e| ApiError::Metrics(MetricsErrorKind::Stats(e))) - } - - fn export_backend_metrics(id: Option) -> ApiResponse { - metrics::export_backend_metrics(&id) - .map(ApiResponsePayload::BackendMetrics) - .map_err(|e| ApiError::Metrics(MetricsErrorKind::Stats(e))) - } - - fn export_blobcache_metrics(id: Option) -> ApiResponse { - metrics::export_blobcache_metrics(&id) - .map(ApiResponsePayload::BlobcacheMetrics) - .map_err(|e| ApiError::Metrics(MetricsErrorKind::Stats(e))) - } - - #[inline] - fn get_daemon_object(&self) -> std::result::Result, ApiError> { - Ok(DAEMON_CONTROLLER.get_daemon()) - } - - fn backend_info(&self, mountpoint: &str) -> ApiResponse { - let info = self - .get_default_fs_service()? - .export_backend_info(mountpoint) - .map_err(|e| ApiError::Metrics(MetricsErrorKind::Daemon(e.into())))?; - Ok(ApiResponsePayload::FsBackendInfo(info)) - } - - /// Detect if there is fop being hang. - /// `ApiResponsePayload::Empty` will be converted to http status code 204, which means - /// there is no requests being processed right now. - /// Otherwise, json body within http response is provided, - /// ```json - /// [ - /// { - /// "inode": 72057594037929010, - /// "opcode": 44, - /// "unique": 22728, - /// "timestamp_secs": 1612245570 - /// }, - /// { - /// "inode": 72057594037928480, - /// "opcode": 15, - /// "unique": 22656, - /// "timestamp_secs": 1612245570 - /// }, - /// { - /// "inode": 72057594037928940, - /// "opcode": 15, - /// "unique": 22700, - /// "timestamp_secs": 1612245570 - /// } - /// ] - /// It means 3 threads are processing inflight requests. - fn export_inflight_metrics(&self) -> ApiResponse { - // TODO: Implement automatic error conversion between DaemonError and ApiError. - let fs = self.get_default_fs_service()?; - if let Some(ops) = fs - .export_inflight_ops() - .map_err(|e| ApiError::Metrics(MetricsErrorKind::Daemon(e.into())))? - { - Ok(ApiResponsePayload::FsInflightMetrics(ops)) - } else { - Ok(ApiResponsePayload::Empty) - } - } - - fn do_mount(&self, mountpoint: String, cmd: ApiMountCmd) -> ApiResponse { - let fs_type = FsBackendType::from_str(&cmd.fs_type) - .map_err(|e| ApiError::MountFilesystem(e.into()))?; - let fs = self.get_default_fs_service()?; - fs.mount(FsBackendMountCmd { - fs_type, - mountpoint, - config: cmd.config, - source: cmd.source, - prefetch_files: cmd.prefetch_files, - }) - .map(|_| ApiResponsePayload::Empty) - .map_err(|e| ApiError::MountFilesystem(e.into())) - } - - fn do_remount(&self, mountpoint: String, cmd: ApiMountCmd) -> ApiResponse { - let fs_type = FsBackendType::from_str(&cmd.fs_type) - .map_err(|e| ApiError::MountFilesystem(e.into()))?; - self.get_default_fs_service()? - .remount(FsBackendMountCmd { - fs_type, - mountpoint, - config: cmd.config, - source: cmd.source, - prefetch_files: cmd.prefetch_files, - }) - .map(|_| ApiResponsePayload::Empty) - .map_err(|e| ApiError::MountFilesystem(e.into())) - } - - fn do_umount(&self, mountpoint: String) -> ApiResponse { - self.get_default_fs_service()? - .umount(FsBackendUmountCmd { mountpoint }) - .map(|_| ApiResponsePayload::Empty) - .map_err(|e| ApiError::MountFilesystem(e.into())) - } - - fn send_fuse_fd(&self) -> ApiResponse { - let d = self.get_daemon_object()?; - - d.save() - .map(|_| ApiResponsePayload::Empty) - .map_err(|e| ApiError::DaemonAbnormal(e.into())) - } - - fn get_default_fs_service(&self) -> std::result::Result, ApiError> { - DAEMON_CONTROLLER - .get_fs_service() - .ok_or(ApiError::DaemonAbnormal(DaemonErrorKind::Unsupported)) - } - - // HTTP API v2 - fn create_blob_cache_entry(&self, entry: &BlobCacheEntry) -> ApiResponse { - match DAEMON_CONTROLLER.get_blob_cache_mgr() { - None => Err(ApiError::DaemonAbnormal(DaemonErrorKind::Unsupported)), - Some(mgr) => { - if let Err(e) = mgr.add_blob_entry(entry) { - Err(ApiError::DaemonAbnormal(DaemonErrorKind::Other(format!( - "{}", - e - )))) - } else { - if let Some(mut mgr_guard) = self.get_daemon_object()?.upgrade_mgr() { - // if started with supervisor, save the blob entry state - mgr_guard.add_blob_entry_state(entry.clone()); - } - Ok(ApiResponsePayload::Empty) - } - } - } - } - - fn remove_blob_cache_entry(&self, param: &BlobCacheObjectId) -> ApiResponse { - match DAEMON_CONTROLLER.get_blob_cache_mgr() { - None => Err(ApiError::DaemonAbnormal(DaemonErrorKind::Unsupported)), - Some(mgr) => { - if let Err(e) = mgr.remove_blob_entry(param) { - Err(ApiError::DaemonAbnormal(DaemonErrorKind::Other(format!( - "{}", - e - )))) - } else { - if let Some(mut mgr_guard) = self.get_daemon_object()?.upgrade_mgr() { - mgr_guard.remove_blob_entry_state(¶m.domain_id, ¶m.blob_id); - } - Ok(ApiResponsePayload::Empty) - } - } - } - } - - fn blob_cache_gc(&self, blob_id: String) -> ApiResponse { - self.get_daemon_object()? - .delete_blob(blob_id) - .map_err(|e| ApiError::DaemonAbnormal(e.into())) - .map(|_| ApiResponsePayload::Empty) - } - - fn do_start(&self) -> ApiResponse { - let d = self.get_daemon_object()?; - d.trigger_start() - .map(|_| ApiResponsePayload::Empty) - .map_err(|e| ApiError::DaemonAbnormal(e.into())) - } -} - -struct ApiServerHandler { - server: ApiServer, - api_receiver: Receiver>, -} - -impl ApiServerHandler { - fn new(server: ApiServer, api_receiver: Receiver>) -> Result { - Ok(Self { - server, - api_receiver, - }) - } - - fn handle_requests_from_router(&self) { - loop { - match self.api_receiver.recv() { - Ok(request) => { - if let Some(req) = request { - self.server.process_request(req).unwrap_or_else(|e| { - error!("HTTP handler failed to process request, {}", e) - }); - } else { - debug!("Received exit notification from the HTTP router"); - return; - } - } - Err(_e) => { - error!("Failed to receive request from the HTTP router"); - return; - } - } - } - } -} - -/// HTTP API server to serve the administration socket. -pub struct ApiServerController { - http_handler_thread: Option>>, - http_router_thread: Option>>, - sock: Option, - waker: Option>, -} - -impl ApiServerController { - /// Create a new instance of `ApiServerController`. - pub fn new(sock: Option<&str>) -> Self { - ApiServerController { - sock: sock.map(|v| v.to_string()), - http_handler_thread: None, - http_router_thread: None, - waker: None, - } - } - - /// Try to start the HTTP working thread. - pub fn start(&mut self) -> Result<()> { - if self.sock.is_none() { - return Ok(()); - } - - // Safe to unwrap() because self.sock is valid. - let apisock = self.sock.as_ref().unwrap(); - let (to_handler, from_router) = channel(); - let (to_router, from_handler) = channel(); - let api_server = ApiServer::new(to_router)?; - let api_handler = ApiServerHandler::new(api_server, from_router)?; - let (router_thread, waker) = start_http_thread(apisock, to_handler, from_handler)?; - let daemon_waker = DAEMON_CONTROLLER.alloc_waker(); - - info!("HTTP API server running at {}", apisock); - let handler_thread = std::thread::Builder::new() - .name("api-server".to_string()) - .spawn(move || { - api_handler.handle_requests_from_router(); - info!("HTTP api-server handler thread exits"); - let _ = daemon_waker.wake(); - Ok(()) - }) - .map_err(|_e| einval!("Failed to start work thread for HTTP handler"))?; - - self.waker = Some(waker); - self.http_handler_thread = Some(handler_thread); - self.http_router_thread = Some(router_thread); - - Ok(()) - } - - /// Stop the HTTP working thread. - pub fn stop(&mut self) { - // Signal the HTTP router thread to exit, which will then notify the HTTP handler thread. - if let Some(waker) = self.waker.take() { - let _ = waker.wake(); - } - if let Some(t) = self.http_handler_thread.take() { - if let Err(e) = t.join() { - error!( - "Failed to join the HTTP handler thread, execution error. {:?}", - e - ); - } - } - if let Some(t) = self.http_router_thread.take() { - if let Err(e) = t.join() { - error!( - "Failed to join the HTTP router thread, execution error. {:?}", - e - ); - } - } - if let Some(apisock) = self.sock.as_ref() { - std::fs::remove_file(apisock).unwrap_or_default(); - } - } -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2020-2022 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: (Apache-2.0 AND BSD-3-Clause) + +use std::io::Result; +use std::str::FromStr; +use std::sync::mpsc::{channel, Receiver, Sender}; +use std::sync::Arc; +use std::thread::JoinHandle; + +use mio::Waker; +use nix::sys::signal::{kill, SIGTERM}; +use nix::unistd::Pid; + +use nydus::daemon::NydusDaemon; +use nydus::{FsBackendMountCmd, FsBackendType, FsBackendUmountCmd, FsService}; +use nydus_api::{ + start_http_thread, ApiError, ApiMountCmd, ApiRequest, ApiResponse, ApiResponsePayload, + ApiResult, BlobCacheEntry, BlobCacheObjectId, DaemonConf, DaemonErrorKind, MetricsErrorKind, +}; +use nydus_utils::metrics; + +use crate::DAEMON_CONTROLLER; + +struct ApiServer { + to_http: Sender, +} + +impl ApiServer { + fn new(to_http: Sender) -> Result { + Ok(ApiServer { to_http }) + } + + fn process_request(&self, request: ApiRequest) -> Result<()> { + let resp = match request { + // Common (v1/v2) + ApiRequest::ConfigureDaemon(conf) => self.configure_daemon(conf), + ApiRequest::GetDaemonInfo => self.daemon_info(true), + ApiRequest::GetEvents => Self::events(), + ApiRequest::Exit => self.do_exit(), + ApiRequest::Start => self.do_start(), + ApiRequest::SendFuseFd => self.send_fuse_fd(), + ApiRequest::TakeoverFuseFd => self.do_takeover(), + ApiRequest::Mount(mountpoint, info) => self.do_mount(mountpoint, info), + ApiRequest::Remount(mountpoint, info) => self.do_remount(mountpoint, info), + ApiRequest::Umount(mountpoint) => self.do_umount(mountpoint), + ApiRequest::ExportBackendMetrics(id) => Self::export_backend_metrics(id), + ApiRequest::ExportBlobcacheMetrics(id) => Self::export_blobcache_metrics(id), + + // Nydus API v1 + ApiRequest::ExportFsGlobalMetrics(id) => Self::export_global_metrics(id), + ApiRequest::ExportFsFilesMetrics(id, latest_read_files) => { + Self::export_files_metrics(id, latest_read_files) + } + ApiRequest::ExportFsAccessPatterns(id) => Self::export_access_patterns(id), + ApiRequest::ExportFsBackendInfo(mountpoint) => self.backend_info(&mountpoint), + ApiRequest::ExportFsInflightMetrics => self.export_inflight_metrics(), + + // Nydus API v2 + ApiRequest::GetDaemonInfoV2 => self.daemon_info(false), + ApiRequest::GetBlobObject(_param) => todo!(), + ApiRequest::CreateBlobObject(entry) => self.create_blob_cache_entry(&entry), + ApiRequest::DeleteBlobObject(param) => self.remove_blob_cache_entry(¶m), + ApiRequest::DeleteBlobFile(blob_id) => self.blob_cache_gc(blob_id), + }; + + self.respond(resp); + + Ok(()) + } + + fn respond(&self, resp: ApiResult) { + if let Err(e) = self.to_http.send(resp) { + error!("send API response failed {}", e); + } + } + + fn configure_daemon(&self, conf: DaemonConf) -> ApiResponse { + conf.log_level + .parse::() + .map_err(|e| { + error!("Invalid log level passed, {}", e); + ApiError::ResponsePayloadType + }) + .map(|v| { + log::set_max_level(v); + ApiResponsePayload::Empty + }) + } + + fn daemon_info(&self, include_fs_info: bool) -> ApiResponse { + self.get_daemon_object()? + .export_info(include_fs_info) + .map_err(|e| ApiError::Metrics(MetricsErrorKind::Daemon(e.into()))) + .map(ApiResponsePayload::DaemonInfo) + } + + /// External supervisor wants this instance to exit. But it can't just die leave + /// some pending or in-flight fuse messages un-handled. So this method guarantees + /// all fuse messages read from kernel are handled and replies are sent back. + /// Before http response are sent back, this must can ensure that current process + /// has absolutely stopped. Otherwise, multiple processes might read from single + /// fuse session simultaneously. + fn do_exit(&self) -> ApiResponse { + let d = self.get_daemon_object()?; + d.trigger_exit() + .map(|_| { + info!("exit daemon by http request"); + ApiResponsePayload::Empty + }) + .map_err(|e| ApiError::DaemonAbnormal(e.into()))?; + + // Should be reliable since this Api server works under event manager. + kill(Pid::this(), SIGTERM).unwrap_or_else(|e| error!("Send signal error. {}", e)); + + Ok(ApiResponsePayload::Empty) + } + + /// External supervisor wants this instance to fetch `/dev/fuse` fd. Before + /// invoking this method, supervisor should already listen on a Unix socket and + /// waits for connection from this instance. Then supervisor should send the *fd* + /// back. Note, the http response does not mean this process already finishes Takeover + /// procedure. Supervisor has to continuously query the state of Nydusd until it gets + /// to *RUNNING*, which means new Nydusd has successfully served as a fuse server. + fn do_takeover(&self) -> ApiResponse { + let d = self.get_daemon_object()?; + d.trigger_takeover() + .map(|_| ApiResponsePayload::Empty) + .map_err(|e| ApiError::DaemonAbnormal(e.into())) + } + + fn events() -> ApiResponse { + let events = metrics::export_events().map_err(|e| ApiError::Events(format!("{:?}", e)))?; + Ok(ApiResponsePayload::Events(events)) + } + + fn export_global_metrics(id: Option) -> ApiResponse { + metrics::export_global_stats(&id) + .map(ApiResponsePayload::FsGlobalMetrics) + .map_err(|e| ApiError::Metrics(MetricsErrorKind::Stats(e))) + } + + fn export_files_metrics(id: Option, latest_read_files: bool) -> ApiResponse { + // TODO: Use mount point name to refer to per rafs metrics. + metrics::export_files_stats(&id, latest_read_files) + .map(ApiResponsePayload::FsFilesMetrics) + .map_err(|e| ApiError::Metrics(MetricsErrorKind::Stats(e))) + } + + fn export_access_patterns(id: Option) -> ApiResponse { + metrics::export_files_access_pattern(&id) + .map(ApiResponsePayload::FsFilesPatterns) + .map_err(|e| ApiError::Metrics(MetricsErrorKind::Stats(e))) + } + + fn export_backend_metrics(id: Option) -> ApiResponse { + metrics::export_backend_metrics(&id) + .map(ApiResponsePayload::BackendMetrics) + .map_err(|e| ApiError::Metrics(MetricsErrorKind::Stats(e))) + } + + fn export_blobcache_metrics(id: Option) -> ApiResponse { + metrics::export_blobcache_metrics(&id) + .map(ApiResponsePayload::BlobcacheMetrics) + .map_err(|e| ApiError::Metrics(MetricsErrorKind::Stats(e))) + } + + #[inline] + fn get_daemon_object(&self) -> std::result::Result, ApiError> { + Ok(DAEMON_CONTROLLER.get_daemon()) + } + + fn backend_info(&self, mountpoint: &str) -> ApiResponse { + let info = self + .get_default_fs_service()? + .export_backend_info(mountpoint) + .map_err(|e| ApiError::Metrics(MetricsErrorKind::Daemon(e.into())))?; + Ok(ApiResponsePayload::FsBackendInfo(info)) + } + + /// Detect if there is fop being hang. + /// `ApiResponsePayload::Empty` will be converted to http status code 204, which means + /// there is no requests being processed right now. + /// Otherwise, json body within http response is provided, + /// ```json + /// [ + /// { + /// "inode": 72057594037929010, + /// "opcode": 44, + /// "unique": 22728, + /// "timestamp_secs": 1612245570 + /// }, + /// { + /// "inode": 72057594037928480, + /// "opcode": 15, + /// "unique": 22656, + /// "timestamp_secs": 1612245570 + /// }, + /// { + /// "inode": 72057594037928940, + /// "opcode": 15, + /// "unique": 22700, + /// "timestamp_secs": 1612245570 + /// } + /// ] + /// It means 3 threads are processing inflight requests. + fn export_inflight_metrics(&self) -> ApiResponse { + // TODO: Implement automatic error conversion between DaemonError and ApiError. + let fs = self.get_default_fs_service()?; + if let Some(ops) = fs + .export_inflight_ops() + .map_err(|e| ApiError::Metrics(MetricsErrorKind::Daemon(e.into())))? + { + Ok(ApiResponsePayload::FsInflightMetrics(ops)) + } else { + Ok(ApiResponsePayload::Empty) + } + } + + fn do_mount(&self, mountpoint: String, cmd: ApiMountCmd) -> ApiResponse { + let fs_type = FsBackendType::from_str(&cmd.fs_type) + .map_err(|e| ApiError::MountFilesystem(e.into()))?; + let fs = self.get_default_fs_service()?; + fs.mount(FsBackendMountCmd { + fs_type, + mountpoint, + config: cmd.config, + source: cmd.source, + prefetch_files: cmd.prefetch_files, + }) + .map(|_| ApiResponsePayload::Empty) + .map_err(|e| ApiError::MountFilesystem(e.into())) + } + + fn do_remount(&self, mountpoint: String, cmd: ApiMountCmd) -> ApiResponse { + let fs_type = FsBackendType::from_str(&cmd.fs_type) + .map_err(|e| ApiError::MountFilesystem(e.into()))?; + self.get_default_fs_service()? + .remount(FsBackendMountCmd { + fs_type, + mountpoint, + config: cmd.config, + source: cmd.source, + prefetch_files: cmd.prefetch_files, + }) + .map(|_| ApiResponsePayload::Empty) + .map_err(|e| ApiError::MountFilesystem(e.into())) + } + + fn do_umount(&self, mountpoint: String) -> ApiResponse { + self.get_default_fs_service()? + .umount(FsBackendUmountCmd { mountpoint }) + .map(|_| ApiResponsePayload::Empty) + .map_err(|e| ApiError::MountFilesystem(e.into())) + } + + fn send_fuse_fd(&self) -> ApiResponse { + let d = self.get_daemon_object()?; + + d.save() + .map(|_| ApiResponsePayload::Empty) + .map_err(|e| ApiError::DaemonAbnormal(e.into())) + } + + fn get_default_fs_service(&self) -> std::result::Result, ApiError> { + DAEMON_CONTROLLER + .get_fs_service() + .ok_or(ApiError::DaemonAbnormal(DaemonErrorKind::Unsupported)) + } + + // HTTP API v2 + fn create_blob_cache_entry(&self, entry: &BlobCacheEntry) -> ApiResponse { + match DAEMON_CONTROLLER.get_blob_cache_mgr() { + None => Err(ApiError::DaemonAbnormal(DaemonErrorKind::Unsupported)), + Some(mgr) => { + if let Err(e) = mgr.add_blob_entry(entry) { + Err(ApiError::DaemonAbnormal(DaemonErrorKind::Other(format!( + "{}", + e + )))) + } else { + if let Some(mut mgr_guard) = self.get_daemon_object()?.upgrade_mgr() { + // if started with supervisor, save the blob entry state + mgr_guard.add_blob_entry_state(entry.clone()); + } + Ok(ApiResponsePayload::Empty) + } + } + } + } + + fn remove_blob_cache_entry(&self, param: &BlobCacheObjectId) -> ApiResponse { + match DAEMON_CONTROLLER.get_blob_cache_mgr() { + None => Err(ApiError::DaemonAbnormal(DaemonErrorKind::Unsupported)), + Some(mgr) => { + if let Err(e) = mgr.remove_blob_entry(param) { + Err(ApiError::DaemonAbnormal(DaemonErrorKind::Other(format!( + "{}", + e + )))) + } else { + if let Some(mut mgr_guard) = self.get_daemon_object()?.upgrade_mgr() { + mgr_guard.remove_blob_entry_state(¶m.domain_id, ¶m.blob_id); + } + Ok(ApiResponsePayload::Empty) + } + } + } + } + + fn blob_cache_gc(&self, blob_id: String) -> ApiResponse { + self.get_daemon_object()? + .delete_blob(blob_id) + .map_err(|e| ApiError::DaemonAbnormal(e.into())) + .map(|_| ApiResponsePayload::Empty) + } + + fn do_start(&self) -> ApiResponse { + let d = self.get_daemon_object()?; + d.trigger_start() + .map(|_| ApiResponsePayload::Empty) + .map_err(|e| ApiError::DaemonAbnormal(e.into())) + } +} + +struct ApiServerHandler { + server: ApiServer, + api_receiver: Receiver>, +} + +impl ApiServerHandler { + fn new(server: ApiServer, api_receiver: Receiver>) -> Result { + Ok(Self { + server, + api_receiver, + }) + } + + fn handle_requests_from_router(&self) { + loop { + match self.api_receiver.recv() { + Ok(request) => { + if let Some(req) = request { + self.server.process_request(req).unwrap_or_else(|e| { + error!("HTTP handler failed to process request, {}", e) + }); + } else { + debug!("Received exit notification from the HTTP router"); + return; + } + } + Err(_e) => { + error!("Failed to receive request from the HTTP router"); + return; + } + } + } + } +} + +/// HTTP API server to serve the administration socket. +pub struct ApiServerController { + http_handler_thread: Option>>, + http_router_thread: Option>>, + sock: Option, + waker: Option>, +} + +impl ApiServerController { + /// Create a new instance of `ApiServerController`. + pub fn new(sock: Option<&str>) -> Self { + ApiServerController { + sock: sock.map(|v| v.to_string()), + http_handler_thread: None, + http_router_thread: None, + waker: None, + } + } + + /// Try to start the HTTP working thread. + pub fn start(&mut self) -> Result<()> { + if self.sock.is_none() { + return Ok(()); + } + + // Safe to unwrap() because self.sock is valid. + let apisock = self.sock.as_ref().unwrap(); + let (to_handler, from_router) = channel(); + let (to_router, from_handler) = channel(); + let api_server = ApiServer::new(to_router)?; + let api_handler = ApiServerHandler::new(api_server, from_router)?; + let (router_thread, waker) = start_http_thread(apisock, to_handler, from_handler)?; + let daemon_waker = DAEMON_CONTROLLER.alloc_waker(); + + info!("HTTP API server running at {}", apisock); + let handler_thread = std::thread::Builder::new() + .name("api-server".to_string()) + .spawn(move || { + api_handler.handle_requests_from_router(); + info!("HTTP api-server handler thread exits"); + let _ = daemon_waker.wake(); + Ok(()) + }) + .map_err(|_e| einval!("Failed to start work thread for HTTP handler"))?; + + self.waker = Some(waker); + self.http_handler_thread = Some(handler_thread); + self.http_router_thread = Some(router_thread); + + Ok(()) + } + + /// Stop the HTTP working thread. + pub fn stop(&mut self) { + // Signal the HTTP router thread to exit, which will then notify the HTTP handler thread. + if let Some(waker) = self.waker.take() { + let _ = waker.wake(); + } + if let Some(t) = self.http_handler_thread.take() { + if let Err(e) = t.join() { + error!( + "Failed to join the HTTP handler thread, execution error. {:?}", + e + ); + } + } + if let Some(t) = self.http_router_thread.take() { + if let Err(e) = t.join() { + error!( + "Failed to join the HTTP router thread, execution error. {:?}", + e + ); + } + } + if let Some(apisock) = self.sock.as_ref() { + std::fs::remove_file(apisock).unwrap_or_default(); + } + } +} diff --git a/src/bin/nydusd/main.rs b/src/bin/nydusd/main.rs index fc5e4b7a6b8..0067087d870 100644 --- a/src/bin/nydusd/main.rs +++ b/src/bin/nydusd/main.rs @@ -1,806 +1,806 @@ -// Copyright 2022 Alibaba Cloud. All rights reserved. -// Copyright 2020 Ant Group. All rights reserved. -// Copyright 2019 Intel Corporation. All Rights Reserved. -// -// SPDX-License-Identifier: (Apache-2.0 AND BSD-3-Clause) -#![deny(warnings)] - -#[macro_use] -extern crate log; -#[macro_use] -extern crate lazy_static; -#[macro_use] -extern crate nydus_api; - -use std::convert::TryInto; -use std::io::{Error, ErrorKind, Result}; - -use clap::{Arg, ArgAction, ArgMatches, Command}; -use nix::sys::signal; -use rlimit::Resource; - -use nydus::{dump_program_info, get_build_time_info, setup_logging, SubCmdArgs}; -use nydus_api::{BuildTimeInfo, ConfigV2}; -use nydus_service::daemon::DaemonController; -use nydus_service::{ - create_daemon, create_fuse_daemon, create_vfs_backend, validate_threads_configuration, - Error as NydusError, FsBackendMountCmd, FsBackendType, ServiceArgs, -}; - -use crate::api_server_glue::ApiServerController; - -#[cfg(feature = "virtiofs")] -mod virtiofs; - -mod api_server_glue; - -/// Minimal number of file descriptors reserved for system. -const RLIMIT_NOFILE_RESERVED: u64 = 16384; -/// Default number of file descriptors. -const RLIMIT_NOFILE_MAX: u64 = 1_000_000; - -lazy_static! { - static ref DAEMON_CONTROLLER: DaemonController = DaemonController::new(); - static ref BTI_STRING: String = get_build_time_info().0; - static ref BTI: BuildTimeInfo = get_build_time_info().1; -} - -fn thread_validator(v: &str) -> std::result::Result { - validate_threads_configuration(v).map(|s| s.to_string()) -} - -fn append_fs_options(app: Command) -> Command { - app.arg( - Arg::new("bootstrap") - .long("bootstrap") - .short('B') - .help("Path to the RAFS filesystem metadata file") - .conflicts_with("shared-dir"), - ) - .arg( - Arg::new("localfs-dir") - .long("localfs-dir") - .short('D') - .help( - "Path to the `localfs` working directory, which also enables the `localfs` storage backend" - ) - .conflicts_with("config"), - ) - .arg( - Arg::new("shared-dir") - .long("shared-dir") - .short('s') - .help("Path to the directory to be shared via the `passthroughfs` FUSE driver") - ) - .arg( - Arg::new("prefetch-files") - .long("prefetch-files") - .help("Path to the prefetch configuration file containing a list of directories/files separated by newlines") - .required(false) - .requires("bootstrap") - .num_args(1), - ) - .arg( - Arg::new("virtual-mountpoint") - .long("virtual-mountpoint") - .short('m') - .help("Mountpoint within the FUSE/virtiofs device to mount the RAFS/passthroughfs filesystem") - .default_value("/") - .required(false), - ) -} - -fn append_fuse_options(app: Command) -> Command { - app.arg( - Arg::new("mountpoint") - .long("mountpoint") - .short('M') - .help("Mountpoint for the FUSE filesystem, target for `mount.fuse`") - .required(true), - ) - .arg( - Arg::new("failover-policy") - .long("failover-policy") - .default_value("resend") - .help("FUSE server failover policy") - .value_parser(["resend", "flush"]) - .required(false), - ) - .arg( - Arg::new("fuse-threads") - .long("fuse-threads") - .alias("thread-num") - .default_value("4") - .help("Number of worker threads to serve FUSE I/O requests") - .value_parser(thread_validator) - .required(false), - ) - .arg( - Arg::new("writable") - .long("writable") - .action(ArgAction::SetTrue) - .help("Mounts FUSE filesystem in rw mode"), - ) -} - -fn append_fuse_subcmd_options(cmd: Command) -> Command { - let subcmd = Command::new("fuse").about("Run the Nydus daemon as a dedicated FUSE server"); - let subcmd = append_fuse_options(subcmd); - let subcmd = append_fs_options(subcmd); - cmd.subcommand(subcmd) -} - -#[cfg(feature = "virtiofs")] -fn append_virtiofs_options(cmd: Command) -> Command { - cmd.arg( - Arg::new("hybrid-mode") - .long("hybrid-mode") - .help("Enables both `RAFS` and `passthroughfs` filesystem drivers") - .action(ArgAction::SetFalse) - .required(false), - ) - .arg( - Arg::new("sock") - .long("sock") - .help("Path to the vhost-user API socket") - .required(true), - ) -} - -#[cfg(feature = "virtiofs")] -fn append_virtiofs_subcmd_options(cmd: Command) -> Command { - let subcmd = - Command::new("virtiofs").about("Run the Nydus daemon as a dedicated virtio-fs server"); - let subcmd = append_virtiofs_options(subcmd); - let subcmd = append_fs_options(subcmd); - cmd.subcommand(subcmd) -} - -fn append_fscache_options(app: Command) -> Command { - app.arg( - Arg::new("fscache") - .long("fscache") - .short('F') - .help("Working directory for Linux fscache driver to store cache files"), - ) - .arg( - Arg::new("fscache-tag") - .long("fscache-tag") - .help("Tag to identify the fscache daemon instance") - .requires("fscache"), - ) - .arg( - Arg::new("fscache-threads") - .long("fscache-threads") - .default_value("4") - .help("Number of working threads to serve fscache requests") - .required(false) - .value_parser(thread_validator), - ) -} - -fn append_singleton_subcmd_options(cmd: Command) -> Command { - let subcmd = Command::new("singleton") - .about("Run the Nydus daemon to host multiple blobcache/fscache/fuse/virtio-fs services"); - let subcmd = append_fscache_options(subcmd); - - // TODO: enable support of fuse service - /* - let subcmd = subcmd.arg( - Arg::new("fuse") - .long("fuse") - .short("f") - .help("Run as a shared FUSE server"), - ); - let subcmd = append_fuse_options(subcmd); - let subcmd = append_fs_options(subcmd); - */ - - cmd.subcommand(subcmd) -} - -fn prepare_commandline_options() -> Command { - let cmdline = Command::new("nydusd") - .about("Nydus daemon to provide BlobCache, FsCache, FUSE, Virtio-fs and container image services") - .arg( - Arg::new("apisock") - .long("apisock") - .short('A') - .help("Path to the Nydus daemon administration API socket") - .required(false) - .global(true), - ) - .arg( - Arg::new("config") - .long("config") - .short('C') - .help("Path to the Nydus daemon configuration file") - .required(false) - .global(true), - ) - .arg( - Arg::new("id") - .long("id") - .help("Service instance identifier") - .required(false) - .requires("supervisor") - .global(true), - ) - .arg( - Arg::new("log-level") - .long("log-level") - .short('l') - .help("Log level:") - .default_value("info") - .value_parser(["trace", "debug", "info", "warn", "error"]) - .required(false) - .global(true), - ) - .arg( - Arg::new("log-file") - .long("log-file") - .short('L') - .help("Log messages to the file. Default extension \".log\" will be used if no extension specified.") - .required(false) - .global(true), - ) - .arg( - Arg::new("log-rotation-size") - .long("log-rotation-size") - .help("Specify log rotation size(MB), 0 to disable") - .default_value("0") - .required(false) - .global(true), - ) - .arg( - Arg::new("rlimit-nofile") - .long("rlimit-nofile") - .default_value("1000000") - .help("Set rlimit for maximum file descriptor number (0 leaves it unchanged)") - .required(false) - .global(true), - ) - .arg( - Arg::new("supervisor") - .long("supervisor") - .help("Path to the Nydus daemon supervisor API socket") - .required(false) - .requires("id") - .global(true), - ) - .arg( - Arg::new("upgrade") - .long("upgrade") - .help("Start Nydus daemon in upgrade mode") - .action(ArgAction::SetTrue) - .required(false) - .global(true), - ) - .args_conflicts_with_subcommands(true); - - let cmdline = append_fuse_options(cmdline); - let cmdline = append_fs_options(cmdline); - let cmdline = append_fuse_subcmd_options(cmdline); - #[cfg(feature = "virtiofs")] - let cmdline = append_virtiofs_subcmd_options(cmdline); - #[cfg(feature = "block-nbd")] - let cmdline = self::nbd::append_nbd_subcmd_options(cmdline); - append_singleton_subcmd_options(cmdline) -} - -#[cfg(target_os = "macos")] -fn get_max_rlimit_nofile() -> Result { - let mut mib = [nix::libc::CTL_KERN, nix::libc::KERN_MAXFILES]; - let mut file_max: u64 = 0; - let mut size = std::mem::size_of::(); - // Safe because the arguments are valid and we have checked the result. - let res = unsafe { - nix::libc::sysctl( - mib.as_mut_ptr(), - 2, - (&mut file_max) as *mut u64 as *mut nix::libc::c_void, - &mut size, - std::ptr::null_mut(), - 0, - ) - }; - nix::errno::Errno::result(res)?; - Ok(file_max) -} - -#[cfg(target_os = "linux")] -fn get_max_rlimit_nofile() -> Result { - let file_max = std::fs::read_to_string("/proc/sys/fs/file-max")?; - file_max - .trim() - .parse::() - .map_err(|_| eother!("invalid content from fs.file-max")) -} - -/// Handle command line option to tune rlimit for maximum file descriptor number. -fn handle_rlimit_nofile_option(args: &ArgMatches, option_name: &str) -> Result<()> { - // `rlimit-nofile` has a default value, so safe to unwrap(). - let rlimit_nofile: u64 = args - .get_one::(option_name) - .unwrap() - .parse() - .map_err(|_e| { - Error::new( - ErrorKind::InvalidInput, - "invalid value for option `rlimit-nofile`", - ) - })?; - - if rlimit_nofile != 0 { - // Ensures there are fds available for other processes so we don't cause resource exhaustion. - let rlimit_nofile_max = get_max_rlimit_nofile()?; - if rlimit_nofile_max < 2 * RLIMIT_NOFILE_RESERVED { - return Err(eother!( - "The fs.file-max sysctl is too low to allow a reasonable number of open files." - )); - } - - // Reduce max_fds below the system-wide maximum, if necessary. - let rlimit_nofile_max = std::cmp::min( - rlimit_nofile_max - RLIMIT_NOFILE_RESERVED, - RLIMIT_NOFILE_MAX, - ); - let rlimit_nofile_max = Resource::NOFILE.get().map(|(curr, _)| { - if curr >= rlimit_nofile_max { - curr - } else { - rlimit_nofile_max - } - })?; - let rlimit_nofile = std::cmp::min(rlimit_nofile, rlimit_nofile_max); - info!( - "Set rlimit-nofile to {}, maximum {}", - rlimit_nofile, rlimit_nofile_max - ); - Resource::NOFILE.set(rlimit_nofile, rlimit_nofile)?; - } - - Ok(()) -} - -fn process_fs_service( - args: SubCmdArgs, - bti: BuildTimeInfo, - apisock: Option<&str>, - is_fuse: bool, -) -> Result<()> { - // shared-dir means fs passthrough - let shared_dir = args.value_of("shared-dir"); - // bootstrap means rafs only - let bootstrap = args.value_of("bootstrap"); - // safe as virtual_mountpoint default to "/" - let virtual_mnt = args.value_of("virtual-mountpoint").unwrap(); - - let mut fs_type = FsBackendType::PassthroughFs; - let mount_cmd = if let Some(shared_dir) = shared_dir { - let cmd = FsBackendMountCmd { - fs_type: FsBackendType::PassthroughFs, - source: shared_dir.to_string(), - config: "".to_string(), - mountpoint: virtual_mnt.to_string(), - prefetch_files: None, - }; - - Some(cmd) - } else if let Some(b) = bootstrap { - let config = match args.value_of("localfs-dir") { - Some(v) => { - format!( - r###" - {{ - "device": {{ - "backend": {{ - "type": "localfs", - "config": {{ - "dir": {:?}, - "readahead": true - }} - }}, - "cache": {{ - "type": "blobcache", - "config": {{ - "compressed": false, - "work_dir": {:?} - }} - }} - }}, - "mode": "direct", - "digest_validate": false, - "iostats_files": false - }} - "###, - v, v - ) - } - None => match args.value_of("config") { - Some(v) => { - let auth = std::env::var("IMAGE_PULL_AUTH").ok(); - if auth.is_some() { - let mut config = ConfigV2::from_file(v)?; - config.update_registry_auth_info(&auth); - serde_json::to_string(&config)? - } else { - std::fs::read_to_string(v)? - } - } - None => { - let e = NydusError::InvalidArguments( - "both --config and --localfs-dir are missing".to_string(), - ); - return Err(e.into()); - } - }, - }; - - // read the prefetch list of files from prefetch-files - let prefetch_files: Option> = match args.value_of("prefetch-files") { - Some(v) => { - let content = match std::fs::read_to_string(v) { - Ok(v) => v, - Err(_) => { - let e = NydusError::InvalidArguments( - "the prefetch-files arg is not a file path".to_string(), - ); - return Err(e.into()); - } - }; - let mut prefetch_files: Vec = Vec::new(); - for line in content.lines() { - if line.is_empty() || line.trim().is_empty() { - continue; - } - prefetch_files.push(line.trim().to_string()); - } - Some(prefetch_files) - } - None => None, - }; - - let cmd = FsBackendMountCmd { - fs_type: FsBackendType::Rafs, - source: b.to_string(), - config, - mountpoint: virtual_mnt.to_string(), - prefetch_files, - }; - - fs_type = FsBackendType::Rafs; - - Some(cmd) - } else { - None - }; - - let vfs = create_vfs_backend(fs_type, is_fuse, args.is_present("hybrid-mode"))?; - // Basically, below two arguments are essential for live-upgrade/failover/ and external management. - let daemon_id = args.value_of("id").map(|id| id.to_string()); - let supervisor = args.value_of("supervisor").map(|s| s.to_string()); - - if is_fuse { - // threads means number of fuse service threads - let threads: u32 = args - .value_of("fuse-threads") - .map(|n| n.parse().unwrap_or(1)) - .unwrap_or(1); - - let p = args - .value_of("failover-policy") - .unwrap_or(&"flush".to_string()) - .try_into() - .map_err(|e| { - error!("Invalid failover policy"); - e - })?; - - // mountpoint means fuse device only - let mountpoint = args.value_of("mountpoint").ok_or_else(|| { - NydusError::InvalidArguments("Mountpoint must be provided for FUSE server!".to_string()) - })?; - - let daemon = { - create_fuse_daemon( - mountpoint, - vfs, - supervisor, - daemon_id, - threads, - DAEMON_CONTROLLER.alloc_waker(), - apisock, - args.is_present("upgrade"), - !args.is_present("writable"), - p, - mount_cmd, - bti, - ) - .map(|d| { - info!("Fuse daemon started!"); - d - }) - .map_err(|e| { - error!("Failed in starting daemon: {}", e); - e - })? - }; - DAEMON_CONTROLLER.set_daemon(daemon); - } else { - #[cfg(feature = "virtiofs")] - { - let vu_sock = args.value_of("sock").ok_or_else(|| { - NydusError::InvalidArguments("vhost socket must be provided!".to_string()) - })?; - let _ = apisock.as_ref(); - DAEMON_CONTROLLER.set_daemon(virtiofs::create_virtiofs_daemon( - daemon_id, supervisor, vu_sock, vfs, mount_cmd, bti, - )?); - } - } - - Ok(()) -} - -fn process_singleton_arguments( - subargs: &SubCmdArgs, - apisock: Option<&str>, - bti: BuildTimeInfo, -) -> Result<()> { - let id = subargs.value_of("id").map(|id| id.to_string()); - let supervisor = subargs.value_of("supervisor").map(|s| s.to_string()); - let config = match subargs.value_of("config") { - None => None, - Some(path) => { - let config = std::fs::read_to_string(path)?; - let config: serde_json::Value = serde_json::from_str(&config) - .map_err(|_e| einval!("invalid configuration file"))?; - Some(config) - } - }; - let fscache = subargs.value_of("fscache").map(|s| s.as_str()); - let tag = subargs.value_of("fscache-tag").map(|s| s.as_str()); - let threads = subargs.value_of("fscache-threads").map(|s| s.as_str()); - info!("Start Nydus daemon in singleton mode!"); - let daemon = create_daemon( - id, - supervisor, - fscache, - tag, - threads, - config, - bti, - DAEMON_CONTROLLER.alloc_waker(), - apisock, - subargs.is_present("upgrade"), - ) - .map_err(|e| { - error!("Failed to start singleton daemon: {}", e); - e - })?; - DAEMON_CONTROLLER.set_singleton_mode(true); - if let Some(blob_mgr) = daemon.get_blob_cache_mgr() { - DAEMON_CONTROLLER.set_blob_cache_mgr(blob_mgr); - } - DAEMON_CONTROLLER.set_daemon(daemon); - Ok(()) -} - -#[cfg(feature = "block-nbd")] -mod nbd { - use super::*; - use nydus_api::BlobCacheEntry; - use nydus_service::block_nbd::create_nbd_daemon; - use std::str::FromStr; - - pub(super) fn append_nbd_subcmd_options(cmd: Command) -> Command { - let subcmd = Command::new("nbd") - .about("Export a RAFS v6 image as a block device through NBD (Experiment)"); - let subcmd = subcmd - .arg( - Arg::new("DEVICE") - .help("NBD device node to attach the block device") - .required(true) - .num_args(1), - ) - .arg( - Arg::new("bootstrap") - .long("bootstrap") - .short('B') - .help("Path to the RAFS filesystem metadata file") - .requires("localfs-dir") - .conflicts_with("config"), - ) - .arg( - Arg::new("localfs-dir") - .long("localfs-dir") - .requires("bootstrap") - .short('D') - .help( - "Path to the `localfs` working directory, which also enables the `localfs` storage backend" - ) - .conflicts_with("config"), - ) - .arg( - Arg::new("threads") - .long("threads") - .default_value("4") - .help("Number of worker threads to serve NBD requests") - .value_parser(thread_validator) - .required(false), - ); - cmd.subcommand(subcmd) - } - - pub(super) fn process_nbd_service( - args: SubCmdArgs, - bti: BuildTimeInfo, - _apisock: Option<&str>, - ) -> Result<()> { - let mut entry = if let Some(bootstrap) = args.value_of("bootstrap") { - let dir = args.value_of("localfs-dir").ok_or_else(|| { - einval!("option `-D/--localfs-dir` is required by `--boootstrap`") - })?; - let config = r#" - { - "type": "bootstrap", - "id": "disk-default", - "domain_id": "block-nbd", - "config_v2": { - "version": 2, - "id": "block-nbd-factory", - "backend": { - "type": "localfs", - "localfs": { - "dir": "LOCAL_FS_DIR" - } - }, - "cache": { - "type": "filecache", - "filecache": { - "work_dir": "LOCAL_FS_DIR" - } - }, - "metadata_path": "META_FILE_PATH" - } - }"#; - let config = config - .replace("LOCAL_FS_DIR", dir) - .replace("META_FILE_PATH", bootstrap); - BlobCacheEntry::from_str(&config)? - } else if let Some(v) = args.value_of("config") { - BlobCacheEntry::from_file(v)? - } else { - return Err(einval!( - "both option `-C/--config` and `-B/--bootstrap` are missing" - )); - }; - if !entry.prepare_configuration_info() { - return Err(einval!( - "invalid blob cache entry configuration information" - )); - } - if entry.validate() == false { - return Err(einval!( - "invalid blob cache entry configuration information" - )); - } - - // Safe to unwrap because `DEVICE` is mandatory option. - let device = args.value_of("DEVICE").unwrap().to_string(); - let id = args.value_of("id").map(|id| id.to_string()); - let supervisor = args.value_of("supervisor").map(|s| s.to_string()); - let threads: u32 = args - .value_of("threads") - .map(|n| n.parse().unwrap_or(1)) - .unwrap_or(1); - - let daemon = create_nbd_daemon( - device, - threads, - entry, - bti, - id, - supervisor, - DAEMON_CONTROLLER.alloc_waker(), - ) - .map(|d| { - info!("NBD daemon started!"); - d - }) - .map_err(|e| { - error!("Failed in starting NBD daemon: {}", e); - e - })?; - DAEMON_CONTROLLER.set_daemon(daemon); - - Ok(()) - } -} - -extern "C" fn sig_exit(_sig: std::os::raw::c_int) { - DAEMON_CONTROLLER.notify_shutdown(); -} - -fn main() -> Result<()> { - let bti = BTI.to_owned(); - let cmd_options = prepare_commandline_options().version(BTI_STRING.as_str()); - let args = cmd_options.get_matches(); - let logging_file = args.get_one::("log-file").map(|l| l.into()); - // Safe to unwrap because it has default value and possible values are defined - let level = args - .get_one::("log-level") - .unwrap() - .parse() - .unwrap(); - let apisock = args.get_one::("apisock").map(|s| s.as_str()); - let rotation_size = args - .get_one::("log-rotation-size") - .unwrap() - .parse::() - .map_err(|e| einval!(format!("Invalid log rotation size: {}", e)))?; - - setup_logging(logging_file, level, rotation_size)?; - - // Initialize and run the daemon controller event loop. - nydus::register_signal_handler(signal::SIGINT, sig_exit); - nydus::register_signal_handler(signal::SIGTERM, sig_exit); - - dump_program_info(); - handle_rlimit_nofile_option(&args, "rlimit-nofile")?; - - match args.subcommand_name() { - Some("singleton") => { - // Safe to unwrap because the subcommand is `singleton`. - let subargs = args.subcommand_matches("singleton").unwrap(); - let subargs = SubCmdArgs::new(&args, subargs); - process_singleton_arguments(&subargs, apisock, bti)?; - } - Some("fuse") => { - // Safe to unwrap because the subcommand is `fuse`. - let subargs = args.subcommand_matches("fuse").unwrap(); - let subargs = SubCmdArgs::new(&args, subargs); - process_fs_service(subargs, bti, apisock, true)?; - } - Some("virtiofs") => { - // Safe to unwrap because the subcommand is `virtiofs`. - let subargs = args.subcommand_matches("virtiofs").unwrap(); - let subargs = SubCmdArgs::new(&args, subargs); - process_fs_service(subargs, bti, apisock, false)?; - } - #[cfg(feature = "block-nbd")] - Some("nbd") => { - // Safe to unwrap because the subcommand is `nbd`. - let subargs = args.subcommand_matches("nbd").unwrap(); - let subargs = SubCmdArgs::new(&args, subargs); - self::nbd::process_nbd_service(subargs, bti, apisock)?; - } - _ => { - let subargs = SubCmdArgs::new(&args, &args); - process_fs_service(subargs, bti, apisock, true)?; - } - } - - let daemon = DAEMON_CONTROLLER.get_daemon(); - if let Some(fs) = daemon.get_default_fs_service() { - DAEMON_CONTROLLER.set_fs_service(fs); - } - - // Start the HTTP Administration API server - let mut api_controller = ApiServerController::new(apisock); - api_controller.start()?; - - // Run the main event loop - if DAEMON_CONTROLLER.is_active() { - DAEMON_CONTROLLER.run_loop(); - } - - // Gracefully shutdown system. - info!("nydusd quits"); - api_controller.stop(); - DAEMON_CONTROLLER.set_singleton_mode(false); - DAEMON_CONTROLLER.shutdown(); - - Ok(()) -} +// Copyright 2022 Alibaba Cloud. All rights reserved. +// Copyright 2020 Ant Group. All rights reserved. +// Copyright 2019 Intel Corporation. All Rights Reserved. +// +// SPDX-License-Identifier: (Apache-2.0 AND BSD-3-Clause) +#![deny(warnings)] + +#[macro_use] +extern crate log; +#[macro_use] +extern crate lazy_static; +#[macro_use] +extern crate nydus_api; + +use std::convert::TryInto; +use std::io::{Error, ErrorKind, Result}; + +use clap::{Arg, ArgAction, ArgMatches, Command}; +use nix::sys::signal; +use rlimit::Resource; + +use nydus::{dump_program_info, get_build_time_info, setup_logging, SubCmdArgs}; +use nydus_api::{BuildTimeInfo, ConfigV2}; +use nydus_service::daemon::DaemonController; +use nydus_service::{ + create_daemon, create_fuse_daemon, create_vfs_backend, validate_threads_configuration, + Error as NydusError, FsBackendMountCmd, FsBackendType, ServiceArgs, +}; + +use crate::api_server_glue::ApiServerController; + +#[cfg(feature = "virtiofs")] +mod virtiofs; + +mod api_server_glue; + +/// Minimal number of file descriptors reserved for system. +const RLIMIT_NOFILE_RESERVED: u64 = 16384; +/// Default number of file descriptors. +const RLIMIT_NOFILE_MAX: u64 = 1_000_000; + +lazy_static! { + static ref DAEMON_CONTROLLER: DaemonController = DaemonController::new(); + static ref BTI_STRING: String = get_build_time_info().0; + static ref BTI: BuildTimeInfo = get_build_time_info().1; +} + +fn thread_validator(v: &str) -> std::result::Result { + validate_threads_configuration(v).map(|s| s.to_string()) +} + +fn append_fs_options(app: Command) -> Command { + app.arg( + Arg::new("bootstrap") + .long("bootstrap") + .short('B') + .help("Path to the RAFS filesystem metadata file") + .conflicts_with("shared-dir"), + ) + .arg( + Arg::new("localfs-dir") + .long("localfs-dir") + .short('D') + .help( + "Path to the `localfs` working directory, which also enables the `localfs` storage backend" + ) + .conflicts_with("config"), + ) + .arg( + Arg::new("shared-dir") + .long("shared-dir") + .short('s') + .help("Path to the directory to be shared via the `passthroughfs` FUSE driver") + ) + .arg( + Arg::new("prefetch-files") + .long("prefetch-files") + .help("Path to the prefetch configuration file containing a list of directories/files separated by newlines") + .required(false) + .requires("bootstrap") + .num_args(1), + ) + .arg( + Arg::new("virtual-mountpoint") + .long("virtual-mountpoint") + .short('m') + .help("Mountpoint within the FUSE/virtiofs device to mount the RAFS/passthroughfs filesystem") + .default_value("/") + .required(false), + ) +} + +fn append_fuse_options(app: Command) -> Command { + app.arg( + Arg::new("mountpoint") + .long("mountpoint") + .short('M') + .help("Mountpoint for the FUSE filesystem, target for `mount.fuse`") + .required(true), + ) + .arg( + Arg::new("failover-policy") + .long("failover-policy") + .default_value("resend") + .help("FUSE server failover policy") + .value_parser(["resend", "flush"]) + .required(false), + ) + .arg( + Arg::new("fuse-threads") + .long("fuse-threads") + .alias("thread-num") + .default_value("4") + .help("Number of worker threads to serve FUSE I/O requests") + .value_parser(thread_validator) + .required(false), + ) + .arg( + Arg::new("writable") + .long("writable") + .action(ArgAction::SetTrue) + .help("Mounts FUSE filesystem in rw mode"), + ) +} + +fn append_fuse_subcmd_options(cmd: Command) -> Command { + let subcmd = Command::new("fuse").about("Run the Nydus daemon as a dedicated FUSE server"); + let subcmd = append_fuse_options(subcmd); + let subcmd = append_fs_options(subcmd); + cmd.subcommand(subcmd) +} + +#[cfg(feature = "virtiofs")] +fn append_virtiofs_options(cmd: Command) -> Command { + cmd.arg( + Arg::new("hybrid-mode") + .long("hybrid-mode") + .help("Enables both `RAFS` and `passthroughfs` filesystem drivers") + .action(ArgAction::SetFalse) + .required(false), + ) + .arg( + Arg::new("sock") + .long("sock") + .help("Path to the vhost-user API socket") + .required(true), + ) +} + +#[cfg(feature = "virtiofs")] +fn append_virtiofs_subcmd_options(cmd: Command) -> Command { + let subcmd = + Command::new("virtiofs").about("Run the Nydus daemon as a dedicated virtio-fs server"); + let subcmd = append_virtiofs_options(subcmd); + let subcmd = append_fs_options(subcmd); + cmd.subcommand(subcmd) +} + +fn append_fscache_options(app: Command) -> Command { + app.arg( + Arg::new("fscache") + .long("fscache") + .short('F') + .help("Working directory for Linux fscache driver to store cache files"), + ) + .arg( + Arg::new("fscache-tag") + .long("fscache-tag") + .help("Tag to identify the fscache daemon instance") + .requires("fscache"), + ) + .arg( + Arg::new("fscache-threads") + .long("fscache-threads") + .default_value("4") + .help("Number of working threads to serve fscache requests") + .required(false) + .value_parser(thread_validator), + ) +} + +fn append_singleton_subcmd_options(cmd: Command) -> Command { + let subcmd = Command::new("singleton") + .about("Run the Nydus daemon to host multiple blobcache/fscache/fuse/virtio-fs services"); + let subcmd = append_fscache_options(subcmd); + + // TODO: enable support of fuse service + /* + let subcmd = subcmd.arg( + Arg::new("fuse") + .long("fuse") + .short("f") + .help("Run as a shared FUSE server"), + ); + let subcmd = append_fuse_options(subcmd); + let subcmd = append_fs_options(subcmd); + */ + + cmd.subcommand(subcmd) +} + +fn prepare_commandline_options() -> Command { + let cmdline = Command::new("nydusd") + .about("Nydus daemon to provide BlobCache, FsCache, FUSE, Virtio-fs and container image services") + .arg( + Arg::new("apisock") + .long("apisock") + .short('A') + .help("Path to the Nydus daemon administration API socket") + .required(false) + .global(true), + ) + .arg( + Arg::new("config") + .long("config") + .short('C') + .help("Path to the Nydus daemon configuration file") + .required(false) + .global(true), + ) + .arg( + Arg::new("id") + .long("id") + .help("Service instance identifier") + .required(false) + .requires("supervisor") + .global(true), + ) + .arg( + Arg::new("log-level") + .long("log-level") + .short('l') + .help("Log level:") + .default_value("info") + .value_parser(["trace", "debug", "info", "warn", "error"]) + .required(false) + .global(true), + ) + .arg( + Arg::new("log-file") + .long("log-file") + .short('L') + .help("Log messages to the file. Default extension \".log\" will be used if no extension specified.") + .required(false) + .global(true), + ) + .arg( + Arg::new("log-rotation-size") + .long("log-rotation-size") + .help("Specify log rotation size(MB), 0 to disable") + .default_value("0") + .required(false) + .global(true), + ) + .arg( + Arg::new("rlimit-nofile") + .long("rlimit-nofile") + .default_value("1000000") + .help("Set rlimit for maximum file descriptor number (0 leaves it unchanged)") + .required(false) + .global(true), + ) + .arg( + Arg::new("supervisor") + .long("supervisor") + .help("Path to the Nydus daemon supervisor API socket") + .required(false) + .requires("id") + .global(true), + ) + .arg( + Arg::new("upgrade") + .long("upgrade") + .help("Start Nydus daemon in upgrade mode") + .action(ArgAction::SetTrue) + .required(false) + .global(true), + ) + .args_conflicts_with_subcommands(true); + + let cmdline = append_fuse_options(cmdline); + let cmdline = append_fs_options(cmdline); + let cmdline = append_fuse_subcmd_options(cmdline); + #[cfg(feature = "virtiofs")] + let cmdline = append_virtiofs_subcmd_options(cmdline); + #[cfg(feature = "block-nbd")] + let cmdline = self::nbd::append_nbd_subcmd_options(cmdline); + append_singleton_subcmd_options(cmdline) +} + +#[cfg(target_os = "macos")] +fn get_max_rlimit_nofile() -> Result { + let mut mib = [nix::libc::CTL_KERN, nix::libc::KERN_MAXFILES]; + let mut file_max: u64 = 0; + let mut size = std::mem::size_of::(); + // Safe because the arguments are valid and we have checked the result. + let res = unsafe { + nix::libc::sysctl( + mib.as_mut_ptr(), + 2, + (&mut file_max) as *mut u64 as *mut nix::libc::c_void, + &mut size, + std::ptr::null_mut(), + 0, + ) + }; + nix::errno::Errno::result(res)?; + Ok(file_max) +} + +#[cfg(target_os = "linux")] +fn get_max_rlimit_nofile() -> Result { + let file_max = std::fs::read_to_string("/proc/sys/fs/file-max")?; + file_max + .trim() + .parse::() + .map_err(|_| eother!("invalid content from fs.file-max")) +} + +/// Handle command line option to tune rlimit for maximum file descriptor number. +fn handle_rlimit_nofile_option(args: &ArgMatches, option_name: &str) -> Result<()> { + // `rlimit-nofile` has a default value, so safe to unwrap(). + let rlimit_nofile: u64 = args + .get_one::(option_name) + .unwrap() + .parse() + .map_err(|_e| { + Error::new( + ErrorKind::InvalidInput, + "invalid value for option `rlimit-nofile`", + ) + })?; + + if rlimit_nofile != 0 { + // Ensures there are fds available for other processes so we don't cause resource exhaustion. + let rlimit_nofile_max = get_max_rlimit_nofile()?; + if rlimit_nofile_max < 2 * RLIMIT_NOFILE_RESERVED { + return Err(eother!( + "The fs.file-max sysctl is too low to allow a reasonable number of open files." + )); + } + + // Reduce max_fds below the system-wide maximum, if necessary. + let rlimit_nofile_max = std::cmp::min( + rlimit_nofile_max - RLIMIT_NOFILE_RESERVED, + RLIMIT_NOFILE_MAX, + ); + let rlimit_nofile_max = Resource::NOFILE.get().map(|(curr, _)| { + if curr >= rlimit_nofile_max { + curr + } else { + rlimit_nofile_max + } + })?; + let rlimit_nofile = std::cmp::min(rlimit_nofile, rlimit_nofile_max); + info!( + "Set rlimit-nofile to {}, maximum {}", + rlimit_nofile, rlimit_nofile_max + ); + Resource::NOFILE.set(rlimit_nofile, rlimit_nofile)?; + } + + Ok(()) +} + +fn process_fs_service( + args: SubCmdArgs, + bti: BuildTimeInfo, + apisock: Option<&str>, + is_fuse: bool, +) -> Result<()> { + // shared-dir means fs passthrough + let shared_dir = args.value_of("shared-dir"); + // bootstrap means rafs only + let bootstrap = args.value_of("bootstrap"); + // safe as virtual_mountpoint default to "/" + let virtual_mnt = args.value_of("virtual-mountpoint").unwrap(); + + let mut fs_type = FsBackendType::PassthroughFs; + let mount_cmd = if let Some(shared_dir) = shared_dir { + let cmd = FsBackendMountCmd { + fs_type: FsBackendType::PassthroughFs, + source: shared_dir.to_string(), + config: "".to_string(), + mountpoint: virtual_mnt.to_string(), + prefetch_files: None, + }; + + Some(cmd) + } else if let Some(b) = bootstrap { + let config = match args.value_of("localfs-dir") { + Some(v) => { + format!( + r###" + {{ + "device": {{ + "backend": {{ + "type": "localfs", + "config": {{ + "dir": {:?}, + "readahead": true + }} + }}, + "cache": {{ + "type": "blobcache", + "config": {{ + "compressed": false, + "work_dir": {:?} + }} + }} + }}, + "mode": "direct", + "digest_validate": false, + "iostats_files": false + }} + "###, + v, v + ) + } + None => match args.value_of("config") { + Some(v) => { + let auth = std::env::var("IMAGE_PULL_AUTH").ok(); + if auth.is_some() { + let mut config = ConfigV2::from_file(v)?; + config.update_registry_auth_info(&auth); + serde_json::to_string(&config)? + } else { + std::fs::read_to_string(v)? + } + } + None => { + let e = NydusError::InvalidArguments( + "both --config and --localfs-dir are missing".to_string(), + ); + return Err(e.into()); + } + }, + }; + + // read the prefetch list of files from prefetch-files + let prefetch_files: Option> = match args.value_of("prefetch-files") { + Some(v) => { + let content = match std::fs::read_to_string(v) { + Ok(v) => v, + Err(_) => { + let e = NydusError::InvalidArguments( + "the prefetch-files arg is not a file path".to_string(), + ); + return Err(e.into()); + } + }; + let mut prefetch_files: Vec = Vec::new(); + for line in content.lines() { + if line.is_empty() || line.trim().is_empty() { + continue; + } + prefetch_files.push(line.trim().to_string()); + } + Some(prefetch_files) + } + None => None, + }; + + let cmd = FsBackendMountCmd { + fs_type: FsBackendType::Rafs, + source: b.to_string(), + config, + mountpoint: virtual_mnt.to_string(), + prefetch_files, + }; + + fs_type = FsBackendType::Rafs; + + Some(cmd) + } else { + None + }; + + let vfs = create_vfs_backend(fs_type, is_fuse, args.is_present("hybrid-mode"))?; + // Basically, below two arguments are essential for live-upgrade/failover/ and external management. + let daemon_id = args.value_of("id").map(|id| id.to_string()); + let supervisor = args.value_of("supervisor").map(|s| s.to_string()); + + if is_fuse { + // threads means number of fuse service threads + let threads: u32 = args + .value_of("fuse-threads") + .map(|n| n.parse().unwrap_or(1)) + .unwrap_or(1); + + let p = args + .value_of("failover-policy") + .unwrap_or(&"flush".to_string()) + .try_into() + .map_err(|e| { + error!("Invalid failover policy"); + e + })?; + + // mountpoint means fuse device only + let mountpoint = args.value_of("mountpoint").ok_or_else(|| { + NydusError::InvalidArguments("Mountpoint must be provided for FUSE server!".to_string()) + })?; + + let daemon = { + create_fuse_daemon( + mountpoint, + vfs, + supervisor, + daemon_id, + threads, + DAEMON_CONTROLLER.alloc_waker(), + apisock, + args.is_present("upgrade"), + !args.is_present("writable"), + p, + mount_cmd, + bti, + ) + .map(|d| { + info!("Fuse daemon started!"); + d + }) + .map_err(|e| { + error!("Failed in starting daemon: {}", e); + e + })? + }; + DAEMON_CONTROLLER.set_daemon(daemon); + } else { + #[cfg(feature = "virtiofs")] + { + let vu_sock = args.value_of("sock").ok_or_else(|| { + NydusError::InvalidArguments("vhost socket must be provided!".to_string()) + })?; + let _ = apisock.as_ref(); + DAEMON_CONTROLLER.set_daemon(virtiofs::create_virtiofs_daemon( + daemon_id, supervisor, vu_sock, vfs, mount_cmd, bti, + )?); + } + } + + Ok(()) +} + +fn process_singleton_arguments( + subargs: &SubCmdArgs, + apisock: Option<&str>, + bti: BuildTimeInfo, +) -> Result<()> { + let id = subargs.value_of("id").map(|id| id.to_string()); + let supervisor = subargs.value_of("supervisor").map(|s| s.to_string()); + let config = match subargs.value_of("config") { + None => None, + Some(path) => { + let config = std::fs::read_to_string(path)?; + let config: serde_json::Value = serde_json::from_str(&config) + .map_err(|_e| einval!("invalid configuration file"))?; + Some(config) + } + }; + let fscache = subargs.value_of("fscache").map(|s| s.as_str()); + let tag = subargs.value_of("fscache-tag").map(|s| s.as_str()); + let threads = subargs.value_of("fscache-threads").map(|s| s.as_str()); + info!("Start Nydus daemon in singleton mode!"); + let daemon = create_daemon( + id, + supervisor, + fscache, + tag, + threads, + config, + bti, + DAEMON_CONTROLLER.alloc_waker(), + apisock, + subargs.is_present("upgrade"), + ) + .map_err(|e| { + error!("Failed to start singleton daemon: {}", e); + e + })?; + DAEMON_CONTROLLER.set_singleton_mode(true); + if let Some(blob_mgr) = daemon.get_blob_cache_mgr() { + DAEMON_CONTROLLER.set_blob_cache_mgr(blob_mgr); + } + DAEMON_CONTROLLER.set_daemon(daemon); + Ok(()) +} + +#[cfg(feature = "block-nbd")] +mod nbd { + use super::*; + use nydus_api::BlobCacheEntry; + use nydus_service::block_nbd::create_nbd_daemon; + use std::str::FromStr; + + pub(super) fn append_nbd_subcmd_options(cmd: Command) -> Command { + let subcmd = Command::new("nbd") + .about("Export a RAFS v6 image as a block device through NBD (Experiment)"); + let subcmd = subcmd + .arg( + Arg::new("DEVICE") + .help("NBD device node to attach the block device") + .required(true) + .num_args(1), + ) + .arg( + Arg::new("bootstrap") + .long("bootstrap") + .short('B') + .help("Path to the RAFS filesystem metadata file") + .requires("localfs-dir") + .conflicts_with("config"), + ) + .arg( + Arg::new("localfs-dir") + .long("localfs-dir") + .requires("bootstrap") + .short('D') + .help( + "Path to the `localfs` working directory, which also enables the `localfs` storage backend" + ) + .conflicts_with("config"), + ) + .arg( + Arg::new("threads") + .long("threads") + .default_value("4") + .help("Number of worker threads to serve NBD requests") + .value_parser(thread_validator) + .required(false), + ); + cmd.subcommand(subcmd) + } + + pub(super) fn process_nbd_service( + args: SubCmdArgs, + bti: BuildTimeInfo, + _apisock: Option<&str>, + ) -> Result<()> { + let mut entry = if let Some(bootstrap) = args.value_of("bootstrap") { + let dir = args.value_of("localfs-dir").ok_or_else(|| { + einval!("option `-D/--localfs-dir` is required by `--boootstrap`") + })?; + let config = r#" + { + "type": "bootstrap", + "id": "disk-default", + "domain_id": "block-nbd", + "config_v2": { + "version": 2, + "id": "block-nbd-factory", + "backend": { + "type": "localfs", + "localfs": { + "dir": "LOCAL_FS_DIR" + } + }, + "cache": { + "type": "filecache", + "filecache": { + "work_dir": "LOCAL_FS_DIR" + } + }, + "metadata_path": "META_FILE_PATH" + } + }"#; + let config = config + .replace("LOCAL_FS_DIR", dir) + .replace("META_FILE_PATH", bootstrap); + BlobCacheEntry::from_str(&config)? + } else if let Some(v) = args.value_of("config") { + BlobCacheEntry::from_file(v)? + } else { + return Err(einval!( + "both option `-C/--config` and `-B/--bootstrap` are missing" + )); + }; + if !entry.prepare_configuration_info() { + return Err(einval!( + "invalid blob cache entry configuration information" + )); + } + if entry.validate() == false { + return Err(einval!( + "invalid blob cache entry configuration information" + )); + } + + // Safe to unwrap because `DEVICE` is mandatory option. + let device = args.value_of("DEVICE").unwrap().to_string(); + let id = args.value_of("id").map(|id| id.to_string()); + let supervisor = args.value_of("supervisor").map(|s| s.to_string()); + let threads: u32 = args + .value_of("threads") + .map(|n| n.parse().unwrap_or(1)) + .unwrap_or(1); + + let daemon = create_nbd_daemon( + device, + threads, + entry, + bti, + id, + supervisor, + DAEMON_CONTROLLER.alloc_waker(), + ) + .map(|d| { + info!("NBD daemon started!"); + d + }) + .map_err(|e| { + error!("Failed in starting NBD daemon: {}", e); + e + })?; + DAEMON_CONTROLLER.set_daemon(daemon); + + Ok(()) + } +} + +extern "C" fn sig_exit(_sig: std::os::raw::c_int) { + DAEMON_CONTROLLER.notify_shutdown(); +} + +fn main() -> Result<()> { + let bti = BTI.to_owned(); + let cmd_options = prepare_commandline_options().version(BTI_STRING.as_str()); + let args = cmd_options.get_matches(); + let logging_file = args.get_one::("log-file").map(|l| l.into()); + // Safe to unwrap because it has default value and possible values are defined + let level = args + .get_one::("log-level") + .unwrap() + .parse() + .unwrap(); + let apisock = args.get_one::("apisock").map(|s| s.as_str()); + let rotation_size = args + .get_one::("log-rotation-size") + .unwrap() + .parse::() + .map_err(|e| einval!(format!("Invalid log rotation size: {}", e)))?; + + setup_logging(logging_file, level, rotation_size)?; + + // Initialize and run the daemon controller event loop. + nydus::register_signal_handler(signal::SIGINT, sig_exit); + nydus::register_signal_handler(signal::SIGTERM, sig_exit); + + dump_program_info(); + handle_rlimit_nofile_option(&args, "rlimit-nofile")?; + + match args.subcommand_name() { + Some("singleton") => { + // Safe to unwrap because the subcommand is `singleton`. + let subargs = args.subcommand_matches("singleton").unwrap(); + let subargs = SubCmdArgs::new(&args, subargs); + process_singleton_arguments(&subargs, apisock, bti)?; + } + Some("fuse") => { + // Safe to unwrap because the subcommand is `fuse`. + let subargs = args.subcommand_matches("fuse").unwrap(); + let subargs = SubCmdArgs::new(&args, subargs); + process_fs_service(subargs, bti, apisock, true)?; + } + Some("virtiofs") => { + // Safe to unwrap because the subcommand is `virtiofs`. + let subargs = args.subcommand_matches("virtiofs").unwrap(); + let subargs = SubCmdArgs::new(&args, subargs); + process_fs_service(subargs, bti, apisock, false)?; + } + #[cfg(feature = "block-nbd")] + Some("nbd") => { + // Safe to unwrap because the subcommand is `nbd`. + let subargs = args.subcommand_matches("nbd").unwrap(); + let subargs = SubCmdArgs::new(&args, subargs); + self::nbd::process_nbd_service(subargs, bti, apisock)?; + } + _ => { + let subargs = SubCmdArgs::new(&args, &args); + process_fs_service(subargs, bti, apisock, true)?; + } + } + + let daemon = DAEMON_CONTROLLER.get_daemon(); + if let Some(fs) = daemon.get_default_fs_service() { + DAEMON_CONTROLLER.set_fs_service(fs); + } + + // Start the HTTP Administration API server + let mut api_controller = ApiServerController::new(apisock); + api_controller.start()?; + + // Run the main event loop + if DAEMON_CONTROLLER.is_active() { + DAEMON_CONTROLLER.run_loop(); + } + + // Gracefully shutdown system. + info!("nydusd quits"); + api_controller.stop(); + DAEMON_CONTROLLER.set_singleton_mode(false); + DAEMON_CONTROLLER.shutdown(); + + Ok(()) +} diff --git a/src/bin/nydusd/virtiofs.rs b/src/bin/nydusd/virtiofs.rs index f51e0b30993..cb5c1ed17be 100644 --- a/src/bin/nydusd/virtiofs.rs +++ b/src/bin/nydusd/virtiofs.rs @@ -1,417 +1,417 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2020 Alibaba Cloud. All rights reserved. -// Copyright 2019 Intel Corporation. All Rights Reserved. -// -// SPDX-License-Identifier: (Apache-2.0 AND BSD-3-Clause) - -use std::any::Any; -use std::sync::atomic::{AtomicI32, Ordering}; -use std::sync::mpsc::{channel, Receiver, Sender}; -use std::sync::{Arc, Mutex, MutexGuard, RwLock}; -use std::thread; - -use fuse_backend_rs::api::{server::Server, Vfs}; -use fuse_backend_rs::transport::{FsCacheReqHandler, Reader, VirtioFsWriter}; -use vhost::vhost_user::{message::*, Listener, SlaveFsCacheReq}; -use vhost_user_backend::{ - VhostUserBackend, VhostUserBackendMut, VhostUserDaemon, VringMutex, VringState, VringT, -}; -use virtio_bindings::bindings::virtio_ring::{ - VIRTIO_RING_F_EVENT_IDX, VIRTIO_RING_F_INDIRECT_DESC, -}; -use virtio_queue::DescriptorChain; -use virtio_queue::QueueOwnedT; -use vm_memory::{GuestAddressSpace, GuestMemoryAtomic, GuestMemoryLoadGuard, GuestMemoryMmap}; -use vmm_sys_util::epoll::EventSet; -use vmm_sys_util::eventfd::EventFd; - -use nydus::daemon::{ - DaemonState, DaemonStateMachineContext, DaemonStateMachineInput, DaemonStateMachineSubscriber, - NydusDaemon, -}; -use nydus::upgrade::UpgradeManager; -use nydus::{Error, FsBackendCollection, FsBackendMountCmd, FsService, Result}; -use nydus_api::BuildTimeInfo; - -const VIRTIO_F_VERSION_1: u32 = 32; -const QUEUE_SIZE: usize = 1024; -const NUM_QUEUES: usize = 2; - -// The guest queued an available buffer for the high priority queue. -const HIPRIO_QUEUE_EVENT: u16 = 0; -// The guest queued an available buffer for the request queue. -const REQ_QUEUE_EVENT: u16 = 1; -// The device has been dropped. -// const KILL_EVENT: u16 = 2; - -type VhostUserBackendResult = std::io::Result; - -struct VhostUserFsBackend { - event_idx: bool, - kill_evt: EventFd, - mem: Option>, - server: Arc>>, - // handle request from slave to master - vu_req: Option, -} - -impl VhostUserFsBackend { - // There's no way to recover if error happens during processing a virtq, let the caller - // to handle it. - fn process_queue(&mut self, vring_state: &mut MutexGuard) -> std::io::Result { - let mut used_any = false; - let guest_mem = match &self.mem { - Some(m) => m, - None => return Err(Error::QueueMemoryUnset.into()), - }; - - let avail_chains: Vec>> = vring_state - .get_queue_mut() - .iter(guest_mem.memory()) - .map_err(|_| Error::IterateQueue)? - .collect(); - - for chain in avail_chains { - used_any = true; - - let head_index = chain.head_index(); - let mem = chain.memory(); - - let reader = Reader::from_descriptor_chain(mem, chain.clone()) - .map_err(Error::InvalidDescriptorChain)?; - let writer = VirtioFsWriter::new(mem, chain.clone()) - .map(|w| w.into()) - .map_err(Error::InvalidDescriptorChain)?; - - self.server - .handle_message( - reader, - writer, - self.vu_req - .as_mut() - .map(|x| x as &mut dyn FsCacheReqHandler), - None, - ) - .map_err(Error::ProcessQueue)?; - - if self.event_idx { - if vring_state.add_used(head_index, 0).is_err() { - warn!("Couldn't return used descriptors to the ring"); - } - - match vring_state.needs_notification() { - Err(_) => { - warn!("Couldn't check if queue needs to be notified"); - vring_state.signal_used_queue().unwrap(); - } - Ok(needs_notification) => { - if needs_notification { - vring_state.signal_used_queue().unwrap(); - } - } - } - } else { - if vring_state.add_used(head_index, 0).is_err() { - warn!("Couldn't return used descriptors to the ring"); - } - vring_state.signal_used_queue().unwrap(); - } - } - - Ok(used_any) - } -} - -struct VhostUserFsBackendHandler { - backend: Mutex, -} - -impl VhostUserFsBackendHandler { - fn new(vfs: Arc) -> std::io::Result { - let backend = VhostUserFsBackend { - event_idx: false, - kill_evt: EventFd::new(libc::EFD_NONBLOCK).map_err(Error::Epoll)?, - mem: None, - server: Arc::new(Server::new(vfs)), - vu_req: None, - }; - - Ok(VhostUserFsBackendHandler { - backend: Mutex::new(backend), - }) - } -} - -impl VhostUserBackendMut for VhostUserFsBackendHandler { - fn num_queues(&self) -> usize { - NUM_QUEUES - } - - fn max_queue_size(&self) -> usize { - QUEUE_SIZE - } - - fn features(&self) -> u64 { - 1 << VIRTIO_F_VERSION_1 - | 1 << VIRTIO_RING_F_INDIRECT_DESC - | 1 << VIRTIO_RING_F_EVENT_IDX - | VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits() - } - - fn protocol_features(&self) -> VhostUserProtocolFeatures { - VhostUserProtocolFeatures::MQ | VhostUserProtocolFeatures::SLAVE_REQ - } - - fn set_event_idx(&mut self, _enabled: bool) { - self.backend.lock().unwrap().event_idx = true - } - - fn update_memory( - &mut self, - mem: GuestMemoryAtomic, - ) -> VhostUserBackendResult<()> { - self.backend.lock().unwrap().mem = Some(mem); - Ok(()) - } - - fn set_slave_req_fd(&mut self, vu_req: SlaveFsCacheReq) { - self.backend.lock().unwrap().vu_req = Some(vu_req); - } - - fn exit_event(&self, _thread_index: usize) -> Option { - // FIXME: need to patch vhost-user-backend to return KILL_EVENT - // so that daemon stop event gets popped up. - Some(self.backend.lock().unwrap().kill_evt.try_clone().unwrap()) - } - - fn handle_event( - &mut self, - device_event: u16, - evset: EventSet, - vrings: &[VringMutex], - _thread_id: usize, - ) -> VhostUserBackendResult { - if evset != EventSet::IN { - return Err(Error::HandleEventNotEpollIn.into()); - } - - let mut vring_state = match device_event { - HIPRIO_QUEUE_EVENT => { - debug!("HIPRIO_QUEUE_EVENT"); - vrings[0].get_mut() - } - REQ_QUEUE_EVENT => { - debug!("QUEUE_EVENT"); - vrings[1].get_mut() - } - _ => return Err(Error::HandleEventUnknownEvent.into()), - }; - - if self.backend.lock().unwrap().event_idx { - // vm-virtio's Queue implementation only checks avail_index - // once, so to properly support EVENT_IDX we need to keep - // calling process_queue() until it stops finding new - // requests on the queue. - loop { - vring_state.disable_notification().unwrap(); - self.backend - .lock() - .unwrap() - .process_queue(&mut vring_state)?; - if !vring_state.enable_notification().unwrap() { - break; - } - } - } else { - // Without EVENT_IDX, a single call is enough. - self.backend - .lock() - .unwrap() - .process_queue(&mut vring_state)?; - } - - Ok(false) - } -} - -pub struct VirtioFsService { - vfs: Arc, - upgrade_mgr: Option>, - backend_collection: Mutex, -} - -impl VirtioFsService { - fn new(vfs: Arc) -> Self { - VirtioFsService { - vfs, - upgrade_mgr: None, - backend_collection: Default::default(), - } - } -} - -impl FsService for VirtioFsService { - fn get_vfs(&self) -> &Vfs { - &self.vfs - } - - fn upgrade_mgr(&self) -> Option> { - self.upgrade_mgr.as_ref().map(|mgr| mgr.lock().unwrap()) - } - - fn backend_collection(&self) -> MutexGuard { - self.backend_collection.lock().unwrap() - } - - fn export_inflight_ops(&self) -> Result> { - Err(Error::Unsupported) - } - - fn as_any(&self) -> &dyn Any { - self - } -} - -struct VirtiofsDaemon + Clone> { - bti: BuildTimeInfo, - id: Option, - request_sender: Arc>>, - result_receiver: Mutex>>, - service: Arc, - state: AtomicI32, - supervisor: Option, - - daemon: Arc>>, - sock: String, -} - -impl + Clone> NydusDaemon for VirtiofsDaemon { - fn as_any(&self) -> &dyn Any { - self - } - - fn id(&self) -> Option { - self.id.clone() - } - - fn get_state(&self) -> DaemonState { - self.state.load(Ordering::Relaxed).into() - } - - fn set_state(&self, state: DaemonState) { - self.state.store(state as i32, Ordering::Relaxed); - } - - fn version(&self) -> BuildTimeInfo { - self.bti.clone() - } - - fn start(&self) -> Result<()> { - let listener = - Listener::new(&self.sock, true).map_err(|e| Error::StartService(format!("{}", e)))?; - let vu_daemon = self.daemon.clone(); - let _ = thread::Builder::new() - .name("vhost_user_listener".to_string()) - .spawn(move || { - vu_daemon - .lock() - .unwrap() - .start(listener) - .unwrap_or_else(|e| error!("{:?}", e)); - }) - .map_err(Error::ThreadSpawn)?; - - Ok(()) - } - - fn umount(&self) -> Result<()> { - Ok(()) - } - - fn wait(&self) -> Result<()> { - self.daemon - .lock() - .unwrap() - .wait() - .map_err(|e| Error::WaitDaemon(eother!(e))) - } - - fn supervisor(&self) -> Option { - self.supervisor.clone() - } - - fn save(&self) -> Result<()> { - Err(Error::Unsupported) - } - - fn restore(&self) -> Result<()> { - Err(Error::Unsupported) - } - - fn get_default_fs_service(&self) -> Option> { - Some(self.service.clone()) - } -} - -impl + Clone> DaemonStateMachineSubscriber - for VirtiofsDaemon -{ - fn on_event(&self, event: DaemonStateMachineInput) -> Result<()> { - self.request_sender - .lock() - .unwrap() - .send(event) - .map_err(Error::ChannelSend)?; - - self.result_receiver - .lock() - .expect("Not expect poisoned lock!") - .recv() - .map_err(Error::ChannelReceive)? - } -} - -pub fn create_virtiofs_daemon( - id: Option, - supervisor: Option, - sock: &str, - vfs: Arc, - mount_cmd: Option, - bti: BuildTimeInfo, -) -> std::io::Result> { - let vu_daemon = VhostUserDaemon::new( - String::from("vhost-user-fs-backend"), - Arc::new(RwLock::new(VhostUserFsBackendHandler::new(vfs.clone())?)), - GuestMemoryAtomic::new(GuestMemoryMmap::new()), - ) - .map_err(|e| Error::VhostUser(format!("{:?}", e)))?; - let (trigger, events_rx) = channel::(); - let (result_sender, result_receiver) = channel::>(); - let service = VirtioFsService::new(vfs); - let daemon = Arc::new(VirtiofsDaemon { - bti, - id, - request_sender: Arc::new(Mutex::new(trigger)), - result_receiver: Mutex::new(result_receiver), - service: Arc::new(service), - state: AtomicI32::new(DaemonState::INIT as i32), - supervisor, - - daemon: Arc::new(Mutex::new(vu_daemon)), - sock: sock.to_string(), - }); - let machine = DaemonStateMachineContext::new(daemon.clone(), events_rx, result_sender); - - machine.kick_state_machine()?; - if let Some(cmd) = mount_cmd { - daemon.service.mount(cmd)?; - } - daemon - .on_event(DaemonStateMachineInput::Mount) - .map_err(|e| eother!(e))?; - daemon - .on_event(DaemonStateMachineInput::Start) - .map_err(|e| eother!(e))?; - - Ok(daemon) -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2020 Alibaba Cloud. All rights reserved. +// Copyright 2019 Intel Corporation. All Rights Reserved. +// +// SPDX-License-Identifier: (Apache-2.0 AND BSD-3-Clause) + +use std::any::Any; +use std::sync::atomic::{AtomicI32, Ordering}; +use std::sync::mpsc::{channel, Receiver, Sender}; +use std::sync::{Arc, Mutex, MutexGuard, RwLock}; +use std::thread; + +use fuse_backend_rs::api::{server::Server, Vfs}; +use fuse_backend_rs::transport::{FsCacheReqHandler, Reader, VirtioFsWriter}; +use vhost::vhost_user::{message::*, Listener, SlaveFsCacheReq}; +use vhost_user_backend::{ + VhostUserBackend, VhostUserBackendMut, VhostUserDaemon, VringMutex, VringState, VringT, +}; +use virtio_bindings::bindings::virtio_ring::{ + VIRTIO_RING_F_EVENT_IDX, VIRTIO_RING_F_INDIRECT_DESC, +}; +use virtio_queue::DescriptorChain; +use virtio_queue::QueueOwnedT; +use vm_memory::{GuestAddressSpace, GuestMemoryAtomic, GuestMemoryLoadGuard, GuestMemoryMmap}; +use vmm_sys_util::epoll::EventSet; +use vmm_sys_util::eventfd::EventFd; + +use nydus::daemon::{ + DaemonState, DaemonStateMachineContext, DaemonStateMachineInput, DaemonStateMachineSubscriber, + NydusDaemon, +}; +use nydus::upgrade::UpgradeManager; +use nydus::{Error, FsBackendCollection, FsBackendMountCmd, FsService, Result}; +use nydus_api::BuildTimeInfo; + +const VIRTIO_F_VERSION_1: u32 = 32; +const QUEUE_SIZE: usize = 1024; +const NUM_QUEUES: usize = 2; + +// The guest queued an available buffer for the high priority queue. +const HIPRIO_QUEUE_EVENT: u16 = 0; +// The guest queued an available buffer for the request queue. +const REQ_QUEUE_EVENT: u16 = 1; +// The device has been dropped. +// const KILL_EVENT: u16 = 2; + +type VhostUserBackendResult = std::io::Result; + +struct VhostUserFsBackend { + event_idx: bool, + kill_evt: EventFd, + mem: Option>, + server: Arc>>, + // handle request from slave to master + vu_req: Option, +} + +impl VhostUserFsBackend { + // There's no way to recover if error happens during processing a virtq, let the caller + // to handle it. + fn process_queue(&mut self, vring_state: &mut MutexGuard) -> std::io::Result { + let mut used_any = false; + let guest_mem = match &self.mem { + Some(m) => m, + None => return Err(Error::QueueMemoryUnset.into()), + }; + + let avail_chains: Vec>> = vring_state + .get_queue_mut() + .iter(guest_mem.memory()) + .map_err(|_| Error::IterateQueue)? + .collect(); + + for chain in avail_chains { + used_any = true; + + let head_index = chain.head_index(); + let mem = chain.memory(); + + let reader = Reader::from_descriptor_chain(mem, chain.clone()) + .map_err(Error::InvalidDescriptorChain)?; + let writer = VirtioFsWriter::new(mem, chain.clone()) + .map(|w| w.into()) + .map_err(Error::InvalidDescriptorChain)?; + + self.server + .handle_message( + reader, + writer, + self.vu_req + .as_mut() + .map(|x| x as &mut dyn FsCacheReqHandler), + None, + ) + .map_err(Error::ProcessQueue)?; + + if self.event_idx { + if vring_state.add_used(head_index, 0).is_err() { + warn!("Couldn't return used descriptors to the ring"); + } + + match vring_state.needs_notification() { + Err(_) => { + warn!("Couldn't check if queue needs to be notified"); + vring_state.signal_used_queue().unwrap(); + } + Ok(needs_notification) => { + if needs_notification { + vring_state.signal_used_queue().unwrap(); + } + } + } + } else { + if vring_state.add_used(head_index, 0).is_err() { + warn!("Couldn't return used descriptors to the ring"); + } + vring_state.signal_used_queue().unwrap(); + } + } + + Ok(used_any) + } +} + +struct VhostUserFsBackendHandler { + backend: Mutex, +} + +impl VhostUserFsBackendHandler { + fn new(vfs: Arc) -> std::io::Result { + let backend = VhostUserFsBackend { + event_idx: false, + kill_evt: EventFd::new(libc::EFD_NONBLOCK).map_err(Error::Epoll)?, + mem: None, + server: Arc::new(Server::new(vfs)), + vu_req: None, + }; + + Ok(VhostUserFsBackendHandler { + backend: Mutex::new(backend), + }) + } +} + +impl VhostUserBackendMut for VhostUserFsBackendHandler { + fn num_queues(&self) -> usize { + NUM_QUEUES + } + + fn max_queue_size(&self) -> usize { + QUEUE_SIZE + } + + fn features(&self) -> u64 { + 1 << VIRTIO_F_VERSION_1 + | 1 << VIRTIO_RING_F_INDIRECT_DESC + | 1 << VIRTIO_RING_F_EVENT_IDX + | VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits() + } + + fn protocol_features(&self) -> VhostUserProtocolFeatures { + VhostUserProtocolFeatures::MQ | VhostUserProtocolFeatures::SLAVE_REQ + } + + fn set_event_idx(&mut self, _enabled: bool) { + self.backend.lock().unwrap().event_idx = true + } + + fn update_memory( + &mut self, + mem: GuestMemoryAtomic, + ) -> VhostUserBackendResult<()> { + self.backend.lock().unwrap().mem = Some(mem); + Ok(()) + } + + fn set_slave_req_fd(&mut self, vu_req: SlaveFsCacheReq) { + self.backend.lock().unwrap().vu_req = Some(vu_req); + } + + fn exit_event(&self, _thread_index: usize) -> Option { + // FIXME: need to patch vhost-user-backend to return KILL_EVENT + // so that daemon stop event gets popped up. + Some(self.backend.lock().unwrap().kill_evt.try_clone().unwrap()) + } + + fn handle_event( + &mut self, + device_event: u16, + evset: EventSet, + vrings: &[VringMutex], + _thread_id: usize, + ) -> VhostUserBackendResult { + if evset != EventSet::IN { + return Err(Error::HandleEventNotEpollIn.into()); + } + + let mut vring_state = match device_event { + HIPRIO_QUEUE_EVENT => { + debug!("HIPRIO_QUEUE_EVENT"); + vrings[0].get_mut() + } + REQ_QUEUE_EVENT => { + debug!("QUEUE_EVENT"); + vrings[1].get_mut() + } + _ => return Err(Error::HandleEventUnknownEvent.into()), + }; + + if self.backend.lock().unwrap().event_idx { + // vm-virtio's Queue implementation only checks avail_index + // once, so to properly support EVENT_IDX we need to keep + // calling process_queue() until it stops finding new + // requests on the queue. + loop { + vring_state.disable_notification().unwrap(); + self.backend + .lock() + .unwrap() + .process_queue(&mut vring_state)?; + if !vring_state.enable_notification().unwrap() { + break; + } + } + } else { + // Without EVENT_IDX, a single call is enough. + self.backend + .lock() + .unwrap() + .process_queue(&mut vring_state)?; + } + + Ok(false) + } +} + +pub struct VirtioFsService { + vfs: Arc, + upgrade_mgr: Option>, + backend_collection: Mutex, +} + +impl VirtioFsService { + fn new(vfs: Arc) -> Self { + VirtioFsService { + vfs, + upgrade_mgr: None, + backend_collection: Default::default(), + } + } +} + +impl FsService for VirtioFsService { + fn get_vfs(&self) -> &Vfs { + &self.vfs + } + + fn upgrade_mgr(&self) -> Option> { + self.upgrade_mgr.as_ref().map(|mgr| mgr.lock().unwrap()) + } + + fn backend_collection(&self) -> MutexGuard { + self.backend_collection.lock().unwrap() + } + + fn export_inflight_ops(&self) -> Result> { + Err(Error::Unsupported) + } + + fn as_any(&self) -> &dyn Any { + self + } +} + +struct VirtiofsDaemon + Clone> { + bti: BuildTimeInfo, + id: Option, + request_sender: Arc>>, + result_receiver: Mutex>>, + service: Arc, + state: AtomicI32, + supervisor: Option, + + daemon: Arc>>, + sock: String, +} + +impl + Clone> NydusDaemon for VirtiofsDaemon { + fn as_any(&self) -> &dyn Any { + self + } + + fn id(&self) -> Option { + self.id.clone() + } + + fn get_state(&self) -> DaemonState { + self.state.load(Ordering::Relaxed).into() + } + + fn set_state(&self, state: DaemonState) { + self.state.store(state as i32, Ordering::Relaxed); + } + + fn version(&self) -> BuildTimeInfo { + self.bti.clone() + } + + fn start(&self) -> Result<()> { + let listener = + Listener::new(&self.sock, true).map_err(|e| Error::StartService(format!("{}", e)))?; + let vu_daemon = self.daemon.clone(); + let _ = thread::Builder::new() + .name("vhost_user_listener".to_string()) + .spawn(move || { + vu_daemon + .lock() + .unwrap() + .start(listener) + .unwrap_or_else(|e| error!("{:?}", e)); + }) + .map_err(Error::ThreadSpawn)?; + + Ok(()) + } + + fn umount(&self) -> Result<()> { + Ok(()) + } + + fn wait(&self) -> Result<()> { + self.daemon + .lock() + .unwrap() + .wait() + .map_err(|e| Error::WaitDaemon(eother!(e))) + } + + fn supervisor(&self) -> Option { + self.supervisor.clone() + } + + fn save(&self) -> Result<()> { + Err(Error::Unsupported) + } + + fn restore(&self) -> Result<()> { + Err(Error::Unsupported) + } + + fn get_default_fs_service(&self) -> Option> { + Some(self.service.clone()) + } +} + +impl + Clone> DaemonStateMachineSubscriber + for VirtiofsDaemon +{ + fn on_event(&self, event: DaemonStateMachineInput) -> Result<()> { + self.request_sender + .lock() + .unwrap() + .send(event) + .map_err(Error::ChannelSend)?; + + self.result_receiver + .lock() + .expect("Not expect poisoned lock!") + .recv() + .map_err(Error::ChannelReceive)? + } +} + +pub fn create_virtiofs_daemon( + id: Option, + supervisor: Option, + sock: &str, + vfs: Arc, + mount_cmd: Option, + bti: BuildTimeInfo, +) -> std::io::Result> { + let vu_daemon = VhostUserDaemon::new( + String::from("vhost-user-fs-backend"), + Arc::new(RwLock::new(VhostUserFsBackendHandler::new(vfs.clone())?)), + GuestMemoryAtomic::new(GuestMemoryMmap::new()), + ) + .map_err(|e| Error::VhostUser(format!("{:?}", e)))?; + let (trigger, events_rx) = channel::(); + let (result_sender, result_receiver) = channel::>(); + let service = VirtioFsService::new(vfs); + let daemon = Arc::new(VirtiofsDaemon { + bti, + id, + request_sender: Arc::new(Mutex::new(trigger)), + result_receiver: Mutex::new(result_receiver), + service: Arc::new(service), + state: AtomicI32::new(DaemonState::INIT as i32), + supervisor, + + daemon: Arc::new(Mutex::new(vu_daemon)), + sock: sock.to_string(), + }); + let machine = DaemonStateMachineContext::new(daemon.clone(), events_rx, result_sender); + + machine.kick_state_machine()?; + if let Some(cmd) = mount_cmd { + daemon.service.mount(cmd)?; + } + daemon + .on_event(DaemonStateMachineInput::Mount) + .map_err(|e| eother!(e))?; + daemon + .on_event(DaemonStateMachineInput::Start) + .map_err(|e| eother!(e))?; + + Ok(daemon) +} diff --git a/src/lib.rs b/src/lib.rs index 2a3df9f389e..6d6df6f5e95 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,97 +1,97 @@ -// Copyright 2021 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -#[macro_use] -extern crate log; -#[macro_use] -extern crate nydus_api; - -use clap::parser::ValuesRef; -use clap::ArgMatches; -use nydus_api::BuildTimeInfo; - -pub use logger::{log_level_to_verbosity, setup_logging}; -pub use nydus_service::*; -pub use signal::register_signal_handler; - -mod logger; -mod signal; - -/// Helper to access commandline options. -pub struct SubCmdArgs<'a> { - args: &'a ArgMatches, - subargs: &'a ArgMatches, -} - -impl<'a> SubCmdArgs<'a> { - /// Create a new instance of [SubCmdArgs]. - pub fn new(args: &'a ArgMatches, subargs: &'a ArgMatches) -> Self { - SubCmdArgs { args, subargs } - } - - /// Get reference to commandline option `key`. - pub fn values_of(&self, key: &str) -> Option> { - if let Some(v) = self.subargs.get_many::(key) { - Some(v) - } else { - self.args.get_many::(key) - } - } -} - -impl<'a> ServiceArgs for SubCmdArgs<'a> { - fn value_of(&self, key: &str) -> Option<&String> { - if let Some(v) = self.subargs.get_one::(key) { - Some(v) - } else { - self.args.try_get_one::(key).unwrap_or_default() - } - } - - fn is_present(&self, key: &str) -> bool { - matches!(self.subargs.try_get_one::(key), Ok(Some(true))) - || matches!(self.args.try_get_one::(key), Ok(Some(true))) - } -} - -pub mod built_info { - pub const PROFILE: &str = env!("PROFILE"); - pub const RUSTC_VERSION: &str = env!("RUSTC_VERSION"); - pub const BUILT_TIME_UTC: &str = env!("BUILT_TIME_UTC"); - pub const GIT_COMMIT_VERSION: &str = env!("GIT_COMMIT_VERSION"); - pub const GIT_COMMIT_HASH: &str = env!("GIT_COMMIT_HASH"); -} - -/// Dump program build and version information. -pub fn dump_program_info() { - info!( - "Program Version: {}, Git Commit: {:?}, Build Time: {:?}, Profile: {:?}, Rustc Version: {:?}", - built_info::GIT_COMMIT_VERSION, - built_info::GIT_COMMIT_HASH, - built_info::BUILT_TIME_UTC, - built_info::PROFILE, - built_info::RUSTC_VERSION, - ); -} - -pub fn get_build_time_info() -> (String, BuildTimeInfo) { - let info_string = format!( - "\rVersion: \t{}\nGit Commit: \t{}\nBuild Time: \t{}\nProfile: \t{}\nRustc: \t\t{}\n", - built_info::GIT_COMMIT_VERSION, - built_info::GIT_COMMIT_HASH, - built_info::BUILT_TIME_UTC, - built_info::PROFILE, - built_info::RUSTC_VERSION, - ); - - let info = BuildTimeInfo { - package_ver: built_info::GIT_COMMIT_VERSION.to_string(), - git_commit: built_info::GIT_COMMIT_HASH.to_string(), - build_time: built_info::BUILT_TIME_UTC.to_string(), - profile: built_info::PROFILE.to_string(), - rustc: built_info::RUSTC_VERSION.to_string(), - }; - - (info_string, info) -} +// Copyright 2021 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +#[macro_use] +extern crate log; +#[macro_use] +extern crate nydus_api; + +use clap::parser::ValuesRef; +use clap::ArgMatches; +use nydus_api::BuildTimeInfo; + +pub use logger::{log_level_to_verbosity, setup_logging}; +pub use nydus_service::*; +pub use signal::register_signal_handler; + +mod logger; +mod signal; + +/// Helper to access commandline options. +pub struct SubCmdArgs<'a> { + args: &'a ArgMatches, + subargs: &'a ArgMatches, +} + +impl<'a> SubCmdArgs<'a> { + /// Create a new instance of [SubCmdArgs]. + pub fn new(args: &'a ArgMatches, subargs: &'a ArgMatches) -> Self { + SubCmdArgs { args, subargs } + } + + /// Get reference to commandline option `key`. + pub fn values_of(&self, key: &str) -> Option> { + if let Some(v) = self.subargs.get_many::(key) { + Some(v) + } else { + self.args.get_many::(key) + } + } +} + +impl<'a> ServiceArgs for SubCmdArgs<'a> { + fn value_of(&self, key: &str) -> Option<&String> { + if let Some(v) = self.subargs.get_one::(key) { + Some(v) + } else { + self.args.try_get_one::(key).unwrap_or_default() + } + } + + fn is_present(&self, key: &str) -> bool { + matches!(self.subargs.try_get_one::(key), Ok(Some(true))) + || matches!(self.args.try_get_one::(key), Ok(Some(true))) + } +} + +pub mod built_info { + pub const PROFILE: &str = env!("PROFILE"); + pub const RUSTC_VERSION: &str = env!("RUSTC_VERSION"); + pub const BUILT_TIME_UTC: &str = env!("BUILT_TIME_UTC"); + pub const GIT_COMMIT_VERSION: &str = env!("GIT_COMMIT_VERSION"); + pub const GIT_COMMIT_HASH: &str = env!("GIT_COMMIT_HASH"); +} + +/// Dump program build and version information. +pub fn dump_program_info() { + info!( + "Program Version: {}, Git Commit: {:?}, Build Time: {:?}, Profile: {:?}, Rustc Version: {:?}", + built_info::GIT_COMMIT_VERSION, + built_info::GIT_COMMIT_HASH, + built_info::BUILT_TIME_UTC, + built_info::PROFILE, + built_info::RUSTC_VERSION, + ); +} + +pub fn get_build_time_info() -> (String, BuildTimeInfo) { + let info_string = format!( + "\rVersion: \t{}\nGit Commit: \t{}\nBuild Time: \t{}\nProfile: \t{}\nRustc: \t\t{}\n", + built_info::GIT_COMMIT_VERSION, + built_info::GIT_COMMIT_HASH, + built_info::BUILT_TIME_UTC, + built_info::PROFILE, + built_info::RUSTC_VERSION, + ); + + let info = BuildTimeInfo { + package_ver: built_info::GIT_COMMIT_VERSION.to_string(), + git_commit: built_info::GIT_COMMIT_HASH.to_string(), + build_time: built_info::BUILT_TIME_UTC.to_string(), + profile: built_info::PROFILE.to_string(), + rustc: built_info::RUSTC_VERSION.to_string(), + }; + + (info_string, info) +} diff --git a/src/logger.rs b/src/logger.rs index b82dc686b6d..6af041496f9 100644 --- a/src/logger.rs +++ b/src/logger.rs @@ -1,204 +1,204 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::env::current_dir; -use std::io::Result; -use std::path::PathBuf; - -use flexi_logger::{ - self, style, Cleanup, Criterion, DeferredNow, FileSpec, Logger, Naming, - TS_DASHES_BLANK_COLONS_DOT_BLANK, -}; -use log::{Level, LevelFilter, Record}; - -pub fn log_level_to_verbosity(level: log::LevelFilter) -> usize { - if level == log::LevelFilter::Off { - 0 - } else { - level as usize - 1 - } -} - -fn get_file_name<'a>(record: &'a Record) -> Option<&'a str> { - record.file().map(|v| match v.rfind("/src/") { - None => v, - Some(pos) => match v[..pos].rfind('/') { - None => &v[pos..], - Some(p) => &v[p..], - }, - }) -} - -fn opt_format( - w: &mut dyn std::io::Write, - now: &mut DeferredNow, - record: &Record, -) -> std::result::Result<(), std::io::Error> { - let level = record.level(); - if level == Level::Info { - write!( - w, - "[{}] {} {}", - now.format(TS_DASHES_BLANK_COLONS_DOT_BLANK), - record.level(), - &record.args() - ) - } else { - write!( - w, - "[{}] {} [{}:{}] {}", - now.format(TS_DASHES_BLANK_COLONS_DOT_BLANK), - record.level(), - get_file_name(record).unwrap_or(""), - record.line().unwrap_or(0), - &record.args() - ) - } -} - -fn colored_opt_format( - w: &mut dyn std::io::Write, - now: &mut DeferredNow, - record: &Record, -) -> std::result::Result<(), std::io::Error> { - let level = record.level(); - if level == Level::Info { - write!( - w, - "[{}] {} {}", - style(level).paint(now.format(TS_DASHES_BLANK_COLONS_DOT_BLANK).to_string()), - style(level).paint(level.to_string()), - style(level).paint(&record.args().to_string()) - ) - } else { - write!( - w, - "[{}] {} [{}:{}] {}", - style(level).paint(now.format(TS_DASHES_BLANK_COLONS_DOT_BLANK).to_string()), - style(level).paint(level.to_string()), - get_file_name(record).unwrap_or(""), - record.line().unwrap_or(0), - style(level).paint(&record.args().to_string()) - ) - } -} - -/// Setup logging infrastructure for application. -/// -/// `log_file_path` is an absolute path to logging files or relative path from current working -/// directory to logging file. -/// Flexi logger always appends a suffix to file name whose default value is ".log" -/// unless we set it intentionally. I don't like this passion. When the basename of `log_file_path` -/// is "bar", the newly created log file will be "bar.log" -pub fn setup_logging( - log_file_path: Option, - level: LevelFilter, - rotation_size: u64, -) -> Result<()> { - if let Some(ref path) = log_file_path { - // Do not try to canonicalize the path since the file may not exist yet. - let mut spec = FileSpec::default().suppress_timestamp(); - - // Parse log file to get the `basename` and `suffix`(extension) because `flexi_logger` - // will automatically add `.log` suffix if we don't set explicitly, see: - // https://github.com/emabee/flexi_logger/issues/74 - let basename = path - .file_stem() - .ok_or_else(|| { - eprintln!("invalid file name input {:?}", path); - einval!() - })? - .to_str() - .ok_or_else(|| { - eprintln!("invalid file name input {:?}", path); - einval!() - })?; - spec = spec.basename(basename); - - // `flexi_logger` automatically add `.log` suffix if the file name has no extension. - if let Some(suffix) = path.extension() { - let suffix = suffix.to_str().ok_or_else(|| { - eprintln!("invalid file extension {:?}", suffix); - einval!() - })?; - spec = spec.suffix(suffix); - } - - // Set log directory - let parent_dir = path.parent(); - if let Some(p) = parent_dir { - let cwd = current_dir()?; - let dir = if !p.has_root() { - cwd.join(p) - } else { - p.to_path_buf() - }; - spec = spec.directory(dir); - } - - // We rely on rust `log` macro to limit current log level rather than `flexi_logger` - // So we set `flexi_logger` log level to "trace" which is High enough. Otherwise, we - // can't change log level to a higher level than what is passed to `flexi_logger`. - let mut logger = Logger::try_with_env_or_str("trace") - .map_err(|_e| enosys!())? - .log_to_file(spec) - .append() - .format(opt_format); - - // Set log rotation - if rotation_size > 0 { - let log_rotation_size_byte: u64 = rotation_size * 1024 * 1024; - logger = logger.rotate( - Criterion::Size(log_rotation_size_byte), - Naming::Timestamps, - Cleanup::KeepCompressedFiles(10), - ); - } - - logger.start().map_err(|e| { - eprintln!("{:?}", e); - eother!(e) - })?; - } else { - // We rely on rust `log` macro to limit current log level rather than `flexi_logger` - // So we set `flexi_logger` log level to "trace" which is High enough. Otherwise, we - // can't change log level to a higher level than what is passed to `flexi_logger`. - Logger::try_with_env_or_str("trace") - .map_err(|_e| enosys!())? - .format(colored_opt_format) - .start() - .map_err(|e| eother!(e))?; - } - - log::set_max_level(level); - - // Dump panic info and backtrace to logger. - log_panics::Config::new() - .backtrace_mode(log_panics::BacktraceMode::Resolved) - .install_panic_hook(); - - Ok(()) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_log_level_to_verbosity() { - assert_eq!(log_level_to_verbosity(log::LevelFilter::Off), 0); - assert_eq!(log_level_to_verbosity(log::LevelFilter::Error), 0); - assert_eq!(log_level_to_verbosity(log::LevelFilter::Warn), 1); - } - - #[test] - fn test_log_rotation() { - let log_file = Some(PathBuf::from("test_log_rotation")); - let level = LevelFilter::Info; - let rotation_size = 1; // 1MB - - assert!(setup_logging(log_file, level, rotation_size).is_ok()); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::env::current_dir; +use std::io::Result; +use std::path::PathBuf; + +use flexi_logger::{ + self, style, Cleanup, Criterion, DeferredNow, FileSpec, Logger, Naming, + TS_DASHES_BLANK_COLONS_DOT_BLANK, +}; +use log::{Level, LevelFilter, Record}; + +pub fn log_level_to_verbosity(level: log::LevelFilter) -> usize { + if level == log::LevelFilter::Off { + 0 + } else { + level as usize - 1 + } +} + +fn get_file_name<'a>(record: &'a Record) -> Option<&'a str> { + record.file().map(|v| match v.rfind("/src/") { + None => v, + Some(pos) => match v[..pos].rfind('/') { + None => &v[pos..], + Some(p) => &v[p..], + }, + }) +} + +fn opt_format( + w: &mut dyn std::io::Write, + now: &mut DeferredNow, + record: &Record, +) -> std::result::Result<(), std::io::Error> { + let level = record.level(); + if level == Level::Info { + write!( + w, + "[{}] {} {}", + now.format(TS_DASHES_BLANK_COLONS_DOT_BLANK), + record.level(), + &record.args() + ) + } else { + write!( + w, + "[{}] {} [{}:{}] {}", + now.format(TS_DASHES_BLANK_COLONS_DOT_BLANK), + record.level(), + get_file_name(record).unwrap_or(""), + record.line().unwrap_or(0), + &record.args() + ) + } +} + +fn colored_opt_format( + w: &mut dyn std::io::Write, + now: &mut DeferredNow, + record: &Record, +) -> std::result::Result<(), std::io::Error> { + let level = record.level(); + if level == Level::Info { + write!( + w, + "[{}] {} {}", + style(level).paint(now.format(TS_DASHES_BLANK_COLONS_DOT_BLANK).to_string()), + style(level).paint(level.to_string()), + style(level).paint(&record.args().to_string()) + ) + } else { + write!( + w, + "[{}] {} [{}:{}] {}", + style(level).paint(now.format(TS_DASHES_BLANK_COLONS_DOT_BLANK).to_string()), + style(level).paint(level.to_string()), + get_file_name(record).unwrap_or(""), + record.line().unwrap_or(0), + style(level).paint(&record.args().to_string()) + ) + } +} + +/// Setup logging infrastructure for application. +/// +/// `log_file_path` is an absolute path to logging files or relative path from current working +/// directory to logging file. +/// Flexi logger always appends a suffix to file name whose default value is ".log" +/// unless we set it intentionally. I don't like this passion. When the basename of `log_file_path` +/// is "bar", the newly created log file will be "bar.log" +pub fn setup_logging( + log_file_path: Option, + level: LevelFilter, + rotation_size: u64, +) -> Result<()> { + if let Some(ref path) = log_file_path { + // Do not try to canonicalize the path since the file may not exist yet. + let mut spec = FileSpec::default().suppress_timestamp(); + + // Parse log file to get the `basename` and `suffix`(extension) because `flexi_logger` + // will automatically add `.log` suffix if we don't set explicitly, see: + // https://github.com/emabee/flexi_logger/issues/74 + let basename = path + .file_stem() + .ok_or_else(|| { + eprintln!("invalid file name input {:?}", path); + einval!() + })? + .to_str() + .ok_or_else(|| { + eprintln!("invalid file name input {:?}", path); + einval!() + })?; + spec = spec.basename(basename); + + // `flexi_logger` automatically add `.log` suffix if the file name has no extension. + if let Some(suffix) = path.extension() { + let suffix = suffix.to_str().ok_or_else(|| { + eprintln!("invalid file extension {:?}", suffix); + einval!() + })?; + spec = spec.suffix(suffix); + } + + // Set log directory + let parent_dir = path.parent(); + if let Some(p) = parent_dir { + let cwd = current_dir()?; + let dir = if !p.has_root() { + cwd.join(p) + } else { + p.to_path_buf() + }; + spec = spec.directory(dir); + } + + // We rely on rust `log` macro to limit current log level rather than `flexi_logger` + // So we set `flexi_logger` log level to "trace" which is High enough. Otherwise, we + // can't change log level to a higher level than what is passed to `flexi_logger`. + let mut logger = Logger::try_with_env_or_str("trace") + .map_err(|_e| enosys!())? + .log_to_file(spec) + .append() + .format(opt_format); + + // Set log rotation + if rotation_size > 0 { + let log_rotation_size_byte: u64 = rotation_size * 1024 * 1024; + logger = logger.rotate( + Criterion::Size(log_rotation_size_byte), + Naming::Timestamps, + Cleanup::KeepCompressedFiles(10), + ); + } + + logger.start().map_err(|e| { + eprintln!("{:?}", e); + eother!(e) + })?; + } else { + // We rely on rust `log` macro to limit current log level rather than `flexi_logger` + // So we set `flexi_logger` log level to "trace" which is High enough. Otherwise, we + // can't change log level to a higher level than what is passed to `flexi_logger`. + Logger::try_with_env_or_str("trace") + .map_err(|_e| enosys!())? + .format(colored_opt_format) + .start() + .map_err(|e| eother!(e))?; + } + + log::set_max_level(level); + + // Dump panic info and backtrace to logger. + log_panics::Config::new() + .backtrace_mode(log_panics::BacktraceMode::Resolved) + .install_panic_hook(); + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_log_level_to_verbosity() { + assert_eq!(log_level_to_verbosity(log::LevelFilter::Off), 0); + assert_eq!(log_level_to_verbosity(log::LevelFilter::Error), 0); + assert_eq!(log_level_to_verbosity(log::LevelFilter::Warn), 1); + } + + #[test] + fn test_log_rotation() { + let log_file = Some(PathBuf::from("test_log_rotation")); + let level = LevelFilter::Info; + let rotation_size = 1; // 1MB + + assert!(setup_logging(log_file, level, rotation_size).is_ok()); + } +} diff --git a/src/signal.rs b/src/signal.rs index 44dd92a2e56..67bf745390a 100644 --- a/src/signal.rs +++ b/src/signal.rs @@ -1,20 +1,20 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2020 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use nix::sys::signal; - -/// Register signal handler for a signal. -pub fn register_signal_handler(sig: signal::Signal, handler: extern "C" fn(libc::c_int)) { - let sa = signal::SigAction::new( - signal::SigHandler::Handler(handler), - signal::SaFlags::empty(), - signal::SigSet::empty(), - ); - - unsafe { - // Signal registration fails, just panic since nydusd won't work properly. - signal::sigaction(sig, &sa).unwrap(); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2020 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use nix::sys::signal; + +/// Register signal handler for a signal. +pub fn register_signal_handler(sig: signal::Signal, handler: extern "C" fn(libc::c_int)) { + let sa = signal::SigAction::new( + signal::SigHandler::Handler(handler), + signal::SaFlags::empty(), + signal::SigSet::empty(), + ); + + unsafe { + // Signal registration fails, just panic since nydusd won't work properly. + signal::sigaction(sig, &sa).unwrap(); + } +} diff --git a/storage/Cargo.toml b/storage/Cargo.toml index a45ba0a1fe1..9025d06849d 100644 --- a/storage/Cargo.toml +++ b/storage/Cargo.toml @@ -1,78 +1,78 @@ -[package] -name = "nydus-storage" -version = "0.6.4" -description = "Storage subsystem for Nydus Image Service" -authors = ["The Nydus Developers"] -license = "Apache-2.0 OR BSD-3-Clause" -homepage = "https://nydus.dev/" -repository = "https://github.com/dragonflyoss/nydus" -edition = "2021" - -[dependencies] -arc-swap = "1.5" -base64 = { version = "0.21", optional = true } -bitflags = "1.2.1" -hex = "0.4.3" -hmac = { version = "0.12.1", optional = true } -http = { version = "0.2.8", optional = true } -httpdate = { version = "1.0", optional = true } -hyper = { version = "0.14.11", optional = true } -hyperlocal = { version = "0.8.0", optional = true } -lazy_static = "1.4.0" -leaky-bucket = { version = "0.12.1", optional = true } -libc = "0.2" -log = "0.4.8" -nix = "0.24" -reqwest = { version = "0.11.14", features = ["blocking", "json"], optional = true } -rusqlite = { version = "0.30", features = ["bundled"], optional = true } -r2d2 = { version = "0.8", optional = true } -r2d2_sqlite = { version = "0.23", optional = true } -serde = { version = "1.0.110", features = ["serde_derive", "rc"] } -serde_json = "1.0.53" -sha1 = { version = "0.10.5", optional = true } -sha2 = { version = "0.10.2", optional = true } -tar = "0.4.40" -time = { version = "0.3.14", features = ["formatting"], optional = true } -tokio = { version = "1.19.0", features = [ - "macros", - "rt", - "rt-multi-thread", - "sync", - "time", -] } -url = { version = "2.1.1", optional = true } -vm-memory = "0.10" -fuse-backend-rs = "^0.12.0" -gpt = { version = "3.1.0", optional = true } - -nydus-api = { version = "0.3", path = "../api" } -nydus-utils = { version = "0.4", path = "../utils", features = [ - "encryption", - "zran", -] } - -[dev-dependencies] -vmm-sys-util = "0.11" -tar = "0.4.40" -regex = "1.7.0" -toml = "0.5" - -[features] -default = ["dedup"] -backend-localdisk = [] -backend-localdisk-gpt = ["gpt", "backend-localdisk"] -backend-localfs = [] -backend-oss = ["base64", "httpdate", "hmac", "sha1", "reqwest", "url"] -backend-registry = ["base64", "reqwest", "url"] -backend-s3 = ["base64", "hmac", "http", "reqwest", "sha2", "time", "url"] -backend-http-proxy = ["hyper", "hyperlocal", "http", "reqwest", "url"] -dedup = ["rusqlite", "r2d2", "r2d2_sqlite"] -prefetch-rate-limit = ["leaky-bucket"] - -[package.metadata.docs.rs] -all-features = true -targets = [ - "x86_64-unknown-linux-gnu", - "aarch64-unknown-linux-gnu", - "aarch64-apple-darwin", -] +[package] +name = "nydus-storage" +version = "0.6.4" +description = "Storage subsystem for Nydus Image Service" +authors = ["The Nydus Developers"] +license = "Apache-2.0 OR BSD-3-Clause" +homepage = "https://nydus.dev/" +repository = "https://github.com/dragonflyoss/nydus" +edition = "2021" + +[dependencies] +arc-swap = "1.5" +base64 = { version = "0.21", optional = true } +bitflags = "1.2.1" +hex = "0.4.3" +hmac = { version = "0.12.1", optional = true } +http = { version = "0.2.8", optional = true } +httpdate = { version = "1.0", optional = true } +hyper = { version = "0.14.11", optional = true } +hyperlocal = { version = "0.8.0", optional = true } +lazy_static = "1.4.0" +leaky-bucket = { version = "0.12.1", optional = true } +libc = "0.2" +log = "0.4.8" +nix = "0.24" +reqwest = { version = "0.11.14", features = ["blocking", "json"], optional = true } +rusqlite = { version = "0.30", features = ["bundled"], optional = true } +r2d2 = { version = "0.8", optional = true } +r2d2_sqlite = { version = "0.23", optional = true } +serde = { version = "1.0.110", features = ["serde_derive", "rc"] } +serde_json = "1.0.53" +sha1 = { version = "0.10.5", optional = true } +sha2 = { version = "0.10.2", optional = true } +tar = "0.4.40" +time = { version = "0.3.14", features = ["formatting"], optional = true } +tokio = { version = "1.19.0", features = [ + "macros", + "rt", + "rt-multi-thread", + "sync", + "time", +] } +url = { version = "2.1.1", optional = true } +vm-memory = "0.10" +fuse-backend-rs = "^0.12.0" +gpt = { version = "3.1.0", optional = true } + +nydus-api = { version = "0.3", path = "../api" } +nydus-utils = { version = "0.4", path = "../utils", features = [ + "encryption", + "zran", +] } + +[dev-dependencies] +vmm-sys-util = "0.11" +tar = "0.4.40" +regex = "1.7.0" +toml = "0.5" + +[features] +default = ["dedup"] +backend-localdisk = [] +backend-localdisk-gpt = ["gpt", "backend-localdisk"] +backend-localfs = [] +backend-oss = ["base64", "httpdate", "hmac", "sha1", "reqwest", "url"] +backend-registry = ["base64", "reqwest", "url"] +backend-s3 = ["base64", "hmac", "http", "reqwest", "sha2", "time", "url"] +backend-http-proxy = ["hyper", "hyperlocal", "http", "reqwest", "url"] +dedup = ["rusqlite", "r2d2", "r2d2_sqlite"] +prefetch-rate-limit = ["leaky-bucket"] + +[package.metadata.docs.rs] +all-features = true +targets = [ + "x86_64-unknown-linux-gnu", + "aarch64-unknown-linux-gnu", + "aarch64-apple-darwin", +] diff --git a/storage/README.md b/storage/README.md index ff96ca4b3a9..4001a77f047 100644 --- a/storage/README.md +++ b/storage/README.md @@ -1,20 +1,20 @@ -# nydus-storage - -The core storage subsystem for [Nydus Image Service](https://nydus.dev/) to: -- Fetch blob objects from storage backend such as Registry, OSS, S3, local disk and file systems etc. -- Load data from storage backend on demand. -- Cache blob objects on local storage. - -## Support - -**Platforms**: -- x86_64 -- aarch64 - -**Operating Systems**: -- Linux -- MacOS - -## License - -This code is licensed under [Apache-2.0](LICENSE-APACHE) or [BSD-3-Clause](LICENSE-BSD-3-Clause). +# nydus-storage + +The core storage subsystem for [Nydus Image Service](https://nydus.dev/) to: +- Fetch blob objects from storage backend such as Registry, OSS, S3, local disk and file systems etc. +- Load data from storage backend on demand. +- Cache blob objects on local storage. + +## Support + +**Platforms**: +- x86_64 +- aarch64 + +**Operating Systems**: +- Linux +- MacOS + +## License + +This code is licensed under [Apache-2.0](LICENSE-APACHE) or [BSD-3-Clause](LICENSE-BSD-3-Clause). diff --git a/storage/src/backend/connection.rs b/storage/src/backend/connection.rs index 75de84a3640..ff4747e96dd 100644 --- a/storage/src/backend/connection.rs +++ b/storage/src/backend/connection.rs @@ -1,792 +1,792 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Help library to manage network connections. -use std::cell::RefCell; -use std::collections::HashMap; -use std::io::{Read, Result}; -use std::str::FromStr; -use std::sync::atomic::{AtomicBool, AtomicI16, AtomicU64, AtomicU8, Ordering}; -use std::sync::Arc; -use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; -use std::{fmt, thread}; - -use log::{max_level, Level}; - -use reqwest::header::{HeaderName, HeaderValue}; -use reqwest::{ - self, - blocking::{Body, Client, Response}, - header::HeaderMap, - redirect::Policy, - Method, StatusCode, Url, -}; - -use nydus_api::{HttpProxyConfig, MirrorConfig, OssConfig, ProxyConfig, RegistryConfig, S3Config}; -use url::ParseError; - -const HEADER_AUTHORIZATION: &str = "Authorization"; - -const RATE_LIMITED_LOG_TIME: u8 = 2; - -thread_local! { - pub static LAST_FALLBACK_AT: RefCell = RefCell::new(UNIX_EPOCH); -} - -/// Error codes related to network communication. -#[derive(Debug)] -pub enum ConnectionError { - Disconnected, - ErrorWithMsg(String), - Common(reqwest::Error), - Format(reqwest::Error), - Url(String, ParseError), - Scheme(String), - MirrorHost, - MirrorPort, -} - -impl fmt::Display for ConnectionError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - ConnectionError::Disconnected => write!(f, "network connection disconnected"), - ConnectionError::ErrorWithMsg(s) => write!(f, "network error, {}", s), - ConnectionError::Common(e) => write!(f, "network error, {}", e), - ConnectionError::Format(e) => write!(f, "{}", e), - ConnectionError::Url(s, e) => write!(f, "failed to parse URL {}, {}", s, e), - ConnectionError::Scheme(s) => write!(f, "invalid scheme {}", s), - ConnectionError::MirrorHost => write!(f, "invalid mirror host"), - ConnectionError::MirrorPort => write!(f, "invalid mirror port"), - } - } -} - -/// Specialized `Result` for network communication. -type ConnectionResult = std::result::Result; - -/// Generic configuration for storage backends. -#[derive(Debug, Clone)] -pub(crate) struct ConnectionConfig { - pub proxy: ProxyConfig, - pub mirrors: Vec, - pub skip_verify: bool, - pub timeout: u32, - pub connect_timeout: u32, - pub retry_limit: u8, -} - -impl Default for ConnectionConfig { - fn default() -> Self { - Self { - proxy: ProxyConfig::default(), - mirrors: Vec::::new(), - skip_verify: false, - timeout: 5, - connect_timeout: 5, - retry_limit: 0, - } - } -} - -impl From for ConnectionConfig { - fn from(c: OssConfig) -> ConnectionConfig { - ConnectionConfig { - proxy: c.proxy, - mirrors: c.mirrors, - skip_verify: c.skip_verify, - timeout: c.timeout, - connect_timeout: c.connect_timeout, - retry_limit: c.retry_limit, - } - } -} - -impl From for ConnectionConfig { - fn from(c: S3Config) -> ConnectionConfig { - ConnectionConfig { - proxy: c.proxy, - mirrors: c.mirrors, - skip_verify: c.skip_verify, - timeout: c.timeout, - connect_timeout: c.connect_timeout, - retry_limit: c.retry_limit, - } - } -} - -impl From for ConnectionConfig { - fn from(c: RegistryConfig) -> ConnectionConfig { - ConnectionConfig { - proxy: c.proxy, - mirrors: c.mirrors, - skip_verify: c.skip_verify, - timeout: c.timeout, - connect_timeout: c.connect_timeout, - retry_limit: c.retry_limit, - } - } -} - -impl From for ConnectionConfig { - fn from(c: HttpProxyConfig) -> ConnectionConfig { - ConnectionConfig { - proxy: c.proxy, - mirrors: c.mirrors, - skip_verify: c.skip_verify, - timeout: c.timeout, - connect_timeout: c.connect_timeout, - retry_limit: c.retry_limit, - } - } -} - -/// HTTP request data with progress callback. -#[derive(Clone)] -pub struct Progress { - inner: R, - current: usize, - total: usize, - callback: fn((usize, usize)), -} - -impl Progress { - /// Create a new `Progress` object. - pub fn new(r: R, total: usize, callback: fn((usize, usize))) -> Progress { - Progress { - inner: r, - current: 0, - total, - callback, - } - } -} - -impl Read for Progress { - fn read(&mut self, buf: &mut [u8]) -> Result { - self.inner.read(buf).map(|count| { - self.current += count as usize; - (self.callback)((self.current, self.total)); - count - }) - } -} - -/// HTTP request data to send to server. -#[derive(Clone)] -pub enum ReqBody { - Read(Progress, usize), - Buf(Vec), - Form(HashMap), -} - -#[derive(Debug)] -struct ProxyHealth { - status: AtomicBool, - ping_url: Option, - check_interval: Duration, - check_pause_elapsed: u64, -} - -impl ProxyHealth { - fn new(check_interval: u64, check_pause_elapsed: u64, ping_url: Option) -> Self { - ProxyHealth { - status: AtomicBool::from(true), - ping_url, - check_interval: Duration::from_secs(check_interval), - check_pause_elapsed, - } - } - - fn ok(&self) -> bool { - self.status.load(Ordering::Relaxed) - } - - fn set(&self, health: bool) { - self.status.store(health, Ordering::Relaxed); - } -} - -const SCHEME_REVERSION_CACHE_UNSET: i16 = 0; -const SCHEME_REVERSION_CACHE_REPLACE: i16 = 1; -const SCHEME_REVERSION_CACHE_RETAIN: i16 = 2; - -#[derive(Debug)] -struct Proxy { - client: Client, - health: ProxyHealth, - fallback: bool, - use_http: bool, - // Cache whether should try to replace scheme for proxy url. - replace_scheme: AtomicI16, -} - -impl Proxy { - fn try_use_http(&self, url: &str) -> Option { - if self.replace_scheme.load(Ordering::Relaxed) == SCHEME_REVERSION_CACHE_REPLACE { - Some(url.replacen("https", "http", 1)) - } else if self.replace_scheme.load(Ordering::Relaxed) == SCHEME_REVERSION_CACHE_UNSET { - if url.starts_with("https:") { - self.replace_scheme - .store(SCHEME_REVERSION_CACHE_REPLACE, Ordering::Relaxed); - info!("Will replace backend's URL's scheme with http"); - Some(url.replacen("https", "http", 1)) - } else if url.starts_with("http:") { - self.replace_scheme - .store(SCHEME_REVERSION_CACHE_RETAIN, Ordering::Relaxed); - None - } else { - warn!("Can't replace http scheme, url {}", url); - None - } - } else { - None - } - } -} - -/// Check whether the HTTP status code is a success result. -pub(crate) fn is_success_status(status: StatusCode) -> bool { - status >= StatusCode::OK && status < StatusCode::BAD_REQUEST -} - -/// Convert a HTTP `Response` into an `Result`. -pub(crate) fn respond(resp: Response, catch_status: bool) -> ConnectionResult { - if !catch_status || is_success_status(resp.status()) { - Ok(resp) - } else { - let msg = resp.text().map_err(ConnectionError::Format)?; - Err(ConnectionError::ErrorWithMsg(msg)) - } -} - -/// A network connection to communicate with remote server. -#[derive(Debug)] -pub(crate) struct Connection { - client: Client, - proxy: Option>, - pub mirrors: Vec>, - pub shutdown: AtomicBool, - /// Timestamp of connection's last active request, represents as duration since UNIX_EPOCH in seconds. - last_active: Arc, -} - -#[derive(Debug)] -pub(crate) struct Mirror { - /// Information for mirror from configuration file. - pub config: MirrorConfig, - /// Mirror status, it will be set to false by atomic operation when mirror is not work. - status: AtomicBool, - /// Failed times requesting mirror, the status will be marked as false when failed_times = failure_limit. - failed_times: AtomicU8, - /// Failure count for which mirror is considered unavailable. - failure_limit: u8, -} - -impl Mirror { - /// Convert original URL to mirror URL. - fn mirror_url(&self, url: &str) -> ConnectionResult { - let mirror_host = Url::parse(&self.config.host) - .map_err(|e| ConnectionError::Url(self.config.host.clone(), e))?; - let mut current_url = - Url::parse(url).map_err(|e| ConnectionError::Url(url.to_string(), e))?; - - current_url - .set_scheme(mirror_host.scheme()) - .map_err(|_| ConnectionError::Scheme(mirror_host.scheme().to_string()))?; - current_url - .set_host(mirror_host.host_str()) - .map_err(|_| ConnectionError::MirrorHost)?; - current_url - .set_port(mirror_host.port()) - .map_err(|_| ConnectionError::MirrorPort)?; - Ok(current_url) - } -} - -impl Connection { - /// Create a new connection according to the configuration. - pub fn new(config: &ConnectionConfig) -> Result> { - info!("backend config: {:?}", config); - let client = Self::build_connection("", config)?; - - let proxy = if !config.proxy.url.is_empty() { - let ping_url = if !config.proxy.ping_url.is_empty() { - Some(Url::from_str(&config.proxy.ping_url).map_err(|e| einval!(e))?) - } else { - None - }; - Some(Arc::new(Proxy { - client: Self::build_connection(&config.proxy.url, config)?, - health: ProxyHealth::new( - config.proxy.check_interval, - config.proxy.check_pause_elapsed, - ping_url, - ), - fallback: config.proxy.fallback, - use_http: config.proxy.use_http, - replace_scheme: AtomicI16::new(SCHEME_REVERSION_CACHE_UNSET), - })) - } else { - None - }; - - let mut mirrors = Vec::new(); - for mirror_config in config.mirrors.iter() { - if !mirror_config.host.is_empty() { - mirrors.push(Arc::new(Mirror { - config: mirror_config.clone(), - status: AtomicBool::from(true), - failed_times: AtomicU8::from(0), - failure_limit: mirror_config.failure_limit, - })); - } - } - - let connection = Arc::new(Connection { - client, - proxy, - mirrors, - shutdown: AtomicBool::new(false), - last_active: Arc::new(AtomicU64::new( - SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap() - .as_secs(), - )), - }); - - // Start proxy's health checking thread. - connection.start_proxy_health_thread(config.connect_timeout as u64); - - // Start mirrors' health checking thread. - connection.start_mirrors_health_thread(config.timeout as u64); - - Ok(connection) - } - - fn start_proxy_health_thread(&self, connect_timeout: u64) { - if let Some(proxy) = self.proxy.as_ref() { - if proxy.health.ping_url.is_some() { - let proxy = proxy.clone(); - let last_active = Arc::clone(&self.last_active); - - // Spawn thread to update the health status of proxy server. - thread::spawn(move || { - let ping_url = proxy.health.ping_url.as_ref().unwrap(); - let mut last_success = true; - - loop { - let elapsed = SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap() - .as_secs() - - last_active.load(Ordering::Relaxed); - // If the connection is not active for a set time, skip proxy health check. - if elapsed <= proxy.health.check_pause_elapsed { - let client = Client::new(); - let _ = client - .get(ping_url.clone()) - .timeout(Duration::from_secs(connect_timeout as u64)) - .send() - .map(|resp| { - let success = is_success_status(resp.status()); - if last_success && !success { - warn!( - "Detected proxy unhealthy when pinging proxy, response status {}", - resp.status() - ); - } else if !last_success && success { - info!("Backend proxy recovered") - } - last_success = success; - proxy.health.set(success); - }) - .map_err(|e| { - if last_success { - warn!("Detected proxy unhealthy when ping proxy, {}", e); - } - last_success = false; - proxy.health.set(false) - }); - } - - thread::sleep(proxy.health.check_interval); - } - }); - } - } - } - - fn start_mirrors_health_thread(&self, timeout: u64) { - for mirror in self.mirrors.iter() { - let mirror_cloned = mirror.clone(); - let last_active = Arc::clone(&self.last_active); - - // Spawn thread to update the health status of mirror server. - thread::spawn(move || { - let mirror_health_url = if mirror_cloned.config.ping_url.is_empty() { - format!("{}/v2", mirror_cloned.config.host) - } else { - mirror_cloned.config.ping_url.clone() - }; - info!( - "[mirror] start health check, ping url: {}", - mirror_health_url - ); - - let client = Client::new(); - loop { - let elapsed = SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap() - .as_secs() - - last_active.load(Ordering::Relaxed); - // If the connection is not active for a set time, skip mirror health check. - if elapsed <= mirror_cloned.config.health_check_pause_elapsed { - // Try to recover the mirror server when it is unavailable. - if !mirror_cloned.status.load(Ordering::Relaxed) { - info!( - "[mirror] server unhealthy, try to recover: {}", - mirror_cloned.config.host - ); - - let _ = client - .get(mirror_health_url.as_str()) - .timeout(Duration::from_secs(timeout as u64)) - .send() - .map(|resp| { - // If the response status is less than StatusCode::INTERNAL_SERVER_ERROR, - // the mirror server is recovered. - if resp.status() < StatusCode::INTERNAL_SERVER_ERROR { - info!( - "[mirror] server recovered: {}", - mirror_cloned.config.host - ); - mirror_cloned.failed_times.store(0, Ordering::Relaxed); - mirror_cloned.status.store(true, Ordering::Relaxed); - } - }) - .map_err(|e| { - warn!( - "[mirror] failed to recover server: {}, {}", - mirror_cloned.config.host, e - ); - }); - } - } - - thread::sleep(Duration::from_secs( - mirror_cloned.config.health_check_interval, - )); - } - }); - } - } - - /// Shutdown the connection. - pub fn shutdown(&self) { - self.shutdown.store(true, Ordering::Release); - } - - #[allow(clippy::too_many_arguments)] - pub fn call( - &self, - method: Method, - url: &str, - query: Option<&[(&str, &str)]>, - data: Option>, - headers: &mut HeaderMap, - catch_status: bool, - ) -> ConnectionResult { - if self.shutdown.load(Ordering::Acquire) { - return Err(ConnectionError::Disconnected); - } - self.last_active.store( - SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap() - .as_secs(), - Ordering::Relaxed, - ); - - if let Some(proxy) = &self.proxy { - if proxy.health.ok() { - let data_cloned = data.as_ref().cloned(); - - let http_url: Option; - let mut replaced_url = url; - - if proxy.use_http { - http_url = proxy.try_use_http(url); - if let Some(ref r) = http_url { - replaced_url = r.as_str(); - } - } - - let result = self.call_inner( - &proxy.client, - method.clone(), - replaced_url, - &query, - data_cloned, - headers, - catch_status, - true, - ); - - match result { - Ok(resp) => { - if !proxy.fallback || resp.status() < StatusCode::INTERNAL_SERVER_ERROR { - return Ok(resp); - } - } - Err(err) => { - if !proxy.fallback { - return Err(err); - } - } - } - // If proxy server responds invalid status code or http connection failed, we need to - // fallback to origin server, the policy only applicable to non-upload operation - warn!("Request proxy server failed, fallback to original server"); - } else { - LAST_FALLBACK_AT.with(|f| { - let current = SystemTime::now(); - if current.duration_since(*f.borrow()).unwrap().as_secs() - >= RATE_LIMITED_LOG_TIME as u64 - { - warn!("Proxy server is not healthy, fallback to original server"); - f.replace(current); - } - }) - } - } - - let mut mirror_enabled = false; - if !self.mirrors.is_empty() { - mirror_enabled = true; - for mirror in self.mirrors.iter() { - if mirror.status.load(Ordering::Relaxed) { - let data_cloned = data.as_ref().cloned(); - - for (key, value) in mirror.config.headers.iter() { - headers.insert( - HeaderName::from_str(key).unwrap(), - HeaderValue::from_str(value).unwrap(), - ); - } - - let current_url = mirror.mirror_url(url)?; - debug!("[mirror] replace to: {}", current_url); - - let result = self.call_inner( - &self.client, - method.clone(), - current_url.as_str(), - &query, - data_cloned, - headers, - catch_status, - false, - ); - - match result { - Ok(resp) => { - // If the response status >= INTERNAL_SERVER_ERROR, move to the next mirror server. - if resp.status() < StatusCode::INTERNAL_SERVER_ERROR { - return Ok(resp); - } - } - Err(err) => { - warn!( - "[mirror] request failed, server: {:?}, {:?}", - mirror.config.host, err - ); - mirror.failed_times.fetch_add(1, Ordering::Relaxed); - - if mirror.failed_times.load(Ordering::Relaxed) >= mirror.failure_limit { - warn!( - "[mirror] exceed failure limit {}, server disabled: {:?}", - mirror.failure_limit, mirror - ); - mirror.status.store(false, Ordering::Relaxed); - } - } - } - } - // Remove mirror-related headers to avoid sending them to the next mirror server and original registry. - for (key, _) in mirror.config.headers.iter() { - headers.remove(HeaderName::from_str(key).unwrap()); - } - } - } - - if mirror_enabled { - warn!("[mirror] request all servers failed, fallback to original server."); - } - - self.call_inner( - &self.client, - method, - url, - &query, - data, - headers, - catch_status, - false, - ) - } - - fn build_connection(proxy: &str, config: &ConnectionConfig) -> Result { - let connect_timeout = if config.connect_timeout != 0 { - Some(Duration::from_secs(config.connect_timeout as u64)) - } else { - None - }; - let timeout = if config.timeout != 0 { - Some(Duration::from_secs(config.timeout as u64)) - } else { - None - }; - - let mut cb = Client::builder() - .timeout(timeout) - .connect_timeout(connect_timeout) - .redirect(Policy::none()); - - if config.skip_verify { - cb = cb.danger_accept_invalid_certs(true); - } - - if !proxy.is_empty() { - cb = cb.proxy(reqwest::Proxy::all(proxy).map_err(|e| einval!(e))?) - } - - cb.build().map_err(|e| einval!(e)) - } - - #[allow(clippy::too_many_arguments)] - fn call_inner( - &self, - client: &Client, - method: Method, - url: &str, - query: &Option<&[(&str, &str)]>, - data: Option>, - headers: &HeaderMap, - catch_status: bool, - proxy: bool, - ) -> ConnectionResult { - // Only clone header when debugging to reduce potential overhead. - let display_headers = if max_level() >= Level::Debug { - let mut display_headers = headers.clone(); - display_headers.remove(HEADER_AUTHORIZATION); - Some(display_headers) - } else { - None - }; - let has_data = data.is_some(); - let start = Instant::now(); - - let mut rb = client.request(method.clone(), url).headers(headers.clone()); - if let Some(q) = query.as_ref() { - rb = rb.query(q); - } - - let ret; - if let Some(data) = data { - match data { - ReqBody::Read(body, total) => { - let body = Body::sized(body, total as u64); - ret = rb.body(body).send(); - } - ReqBody::Buf(buf) => { - ret = rb.body(buf).send(); - } - ReqBody::Form(form) => { - ret = rb.form(&form).send(); - } - } - } else { - ret = rb.body("").send(); - } - - debug!( - "{} Request: {} {} headers: {:?}, proxy: {}, data: {}, duration: {}ms", - std::thread::current().name().unwrap_or_default(), - method, - url, - display_headers, - proxy, - has_data, - Instant::now().duration_since(start).as_millis(), - ); - - match ret { - Err(err) => Err(ConnectionError::Common(err)), - Ok(resp) => respond(resp, catch_status), - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::io::Cursor; - - #[test] - fn test_progress() { - let buf = vec![0x1u8, 2, 3, 4, 5]; - let mut progress = Progress::new(Cursor::new(buf), 5, |(curr, total)| { - assert!(curr == 2 || curr == 4); - assert_eq!(total, 5); - }); - - let mut buf1 = [0x0u8; 2]; - assert_eq!(progress.read(&mut buf1).unwrap(), 2); - assert_eq!(buf1[0], 1); - assert_eq!(buf1[1], 2); - - assert_eq!(progress.read(&mut buf1).unwrap(), 2); - assert_eq!(buf1[0], 3); - assert_eq!(buf1[1], 4); - } - - #[test] - fn test_proxy_health() { - let checker = ProxyHealth::new(5, 300, None); - - assert!(checker.ok()); - assert!(checker.ok()); - checker.set(false); - assert!(!checker.ok()); - assert!(!checker.ok()); - checker.set(true); - assert!(checker.ok()); - assert!(checker.ok()); - } - - #[test] - fn test_is_success_status() { - assert!(!is_success_status(StatusCode::CONTINUE)); - assert!(is_success_status(StatusCode::OK)); - assert!(is_success_status(StatusCode::PERMANENT_REDIRECT)); - assert!(!is_success_status(StatusCode::BAD_REQUEST)); - } - - #[test] - fn test_connection_config_default() { - let config = ConnectionConfig::default(); - - assert_eq!(config.timeout, 5); - assert_eq!(config.connect_timeout, 5); - assert_eq!(config.retry_limit, 0); - assert_eq!(config.proxy.check_interval, 5); - assert_eq!(config.proxy.check_pause_elapsed, 300); - assert!(config.proxy.fallback); - assert_eq!(config.proxy.ping_url, ""); - assert_eq!(config.proxy.url, ""); - assert!(config.mirrors.is_empty()); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Help library to manage network connections. +use std::cell::RefCell; +use std::collections::HashMap; +use std::io::{Read, Result}; +use std::str::FromStr; +use std::sync::atomic::{AtomicBool, AtomicI16, AtomicU64, AtomicU8, Ordering}; +use std::sync::Arc; +use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; +use std::{fmt, thread}; + +use log::{max_level, Level}; + +use reqwest::header::{HeaderName, HeaderValue}; +use reqwest::{ + self, + blocking::{Body, Client, Response}, + header::HeaderMap, + redirect::Policy, + Method, StatusCode, Url, +}; + +use nydus_api::{HttpProxyConfig, MirrorConfig, OssConfig, ProxyConfig, RegistryConfig, S3Config}; +use url::ParseError; + +const HEADER_AUTHORIZATION: &str = "Authorization"; + +const RATE_LIMITED_LOG_TIME: u8 = 2; + +thread_local! { + pub static LAST_FALLBACK_AT: RefCell = RefCell::new(UNIX_EPOCH); +} + +/// Error codes related to network communication. +#[derive(Debug)] +pub enum ConnectionError { + Disconnected, + ErrorWithMsg(String), + Common(reqwest::Error), + Format(reqwest::Error), + Url(String, ParseError), + Scheme(String), + MirrorHost, + MirrorPort, +} + +impl fmt::Display for ConnectionError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ConnectionError::Disconnected => write!(f, "network connection disconnected"), + ConnectionError::ErrorWithMsg(s) => write!(f, "network error, {}", s), + ConnectionError::Common(e) => write!(f, "network error, {}", e), + ConnectionError::Format(e) => write!(f, "{}", e), + ConnectionError::Url(s, e) => write!(f, "failed to parse URL {}, {}", s, e), + ConnectionError::Scheme(s) => write!(f, "invalid scheme {}", s), + ConnectionError::MirrorHost => write!(f, "invalid mirror host"), + ConnectionError::MirrorPort => write!(f, "invalid mirror port"), + } + } +} + +/// Specialized `Result` for network communication. +type ConnectionResult = std::result::Result; + +/// Generic configuration for storage backends. +#[derive(Debug, Clone)] +pub(crate) struct ConnectionConfig { + pub proxy: ProxyConfig, + pub mirrors: Vec, + pub skip_verify: bool, + pub timeout: u32, + pub connect_timeout: u32, + pub retry_limit: u8, +} + +impl Default for ConnectionConfig { + fn default() -> Self { + Self { + proxy: ProxyConfig::default(), + mirrors: Vec::::new(), + skip_verify: false, + timeout: 5, + connect_timeout: 5, + retry_limit: 0, + } + } +} + +impl From for ConnectionConfig { + fn from(c: OssConfig) -> ConnectionConfig { + ConnectionConfig { + proxy: c.proxy, + mirrors: c.mirrors, + skip_verify: c.skip_verify, + timeout: c.timeout, + connect_timeout: c.connect_timeout, + retry_limit: c.retry_limit, + } + } +} + +impl From for ConnectionConfig { + fn from(c: S3Config) -> ConnectionConfig { + ConnectionConfig { + proxy: c.proxy, + mirrors: c.mirrors, + skip_verify: c.skip_verify, + timeout: c.timeout, + connect_timeout: c.connect_timeout, + retry_limit: c.retry_limit, + } + } +} + +impl From for ConnectionConfig { + fn from(c: RegistryConfig) -> ConnectionConfig { + ConnectionConfig { + proxy: c.proxy, + mirrors: c.mirrors, + skip_verify: c.skip_verify, + timeout: c.timeout, + connect_timeout: c.connect_timeout, + retry_limit: c.retry_limit, + } + } +} + +impl From for ConnectionConfig { + fn from(c: HttpProxyConfig) -> ConnectionConfig { + ConnectionConfig { + proxy: c.proxy, + mirrors: c.mirrors, + skip_verify: c.skip_verify, + timeout: c.timeout, + connect_timeout: c.connect_timeout, + retry_limit: c.retry_limit, + } + } +} + +/// HTTP request data with progress callback. +#[derive(Clone)] +pub struct Progress { + inner: R, + current: usize, + total: usize, + callback: fn((usize, usize)), +} + +impl Progress { + /// Create a new `Progress` object. + pub fn new(r: R, total: usize, callback: fn((usize, usize))) -> Progress { + Progress { + inner: r, + current: 0, + total, + callback, + } + } +} + +impl Read for Progress { + fn read(&mut self, buf: &mut [u8]) -> Result { + self.inner.read(buf).map(|count| { + self.current += count as usize; + (self.callback)((self.current, self.total)); + count + }) + } +} + +/// HTTP request data to send to server. +#[derive(Clone)] +pub enum ReqBody { + Read(Progress, usize), + Buf(Vec), + Form(HashMap), +} + +#[derive(Debug)] +struct ProxyHealth { + status: AtomicBool, + ping_url: Option, + check_interval: Duration, + check_pause_elapsed: u64, +} + +impl ProxyHealth { + fn new(check_interval: u64, check_pause_elapsed: u64, ping_url: Option) -> Self { + ProxyHealth { + status: AtomicBool::from(true), + ping_url, + check_interval: Duration::from_secs(check_interval), + check_pause_elapsed, + } + } + + fn ok(&self) -> bool { + self.status.load(Ordering::Relaxed) + } + + fn set(&self, health: bool) { + self.status.store(health, Ordering::Relaxed); + } +} + +const SCHEME_REVERSION_CACHE_UNSET: i16 = 0; +const SCHEME_REVERSION_CACHE_REPLACE: i16 = 1; +const SCHEME_REVERSION_CACHE_RETAIN: i16 = 2; + +#[derive(Debug)] +struct Proxy { + client: Client, + health: ProxyHealth, + fallback: bool, + use_http: bool, + // Cache whether should try to replace scheme for proxy url. + replace_scheme: AtomicI16, +} + +impl Proxy { + fn try_use_http(&self, url: &str) -> Option { + if self.replace_scheme.load(Ordering::Relaxed) == SCHEME_REVERSION_CACHE_REPLACE { + Some(url.replacen("https", "http", 1)) + } else if self.replace_scheme.load(Ordering::Relaxed) == SCHEME_REVERSION_CACHE_UNSET { + if url.starts_with("https:") { + self.replace_scheme + .store(SCHEME_REVERSION_CACHE_REPLACE, Ordering::Relaxed); + info!("Will replace backend's URL's scheme with http"); + Some(url.replacen("https", "http", 1)) + } else if url.starts_with("http:") { + self.replace_scheme + .store(SCHEME_REVERSION_CACHE_RETAIN, Ordering::Relaxed); + None + } else { + warn!("Can't replace http scheme, url {}", url); + None + } + } else { + None + } + } +} + +/// Check whether the HTTP status code is a success result. +pub(crate) fn is_success_status(status: StatusCode) -> bool { + status >= StatusCode::OK && status < StatusCode::BAD_REQUEST +} + +/// Convert a HTTP `Response` into an `Result`. +pub(crate) fn respond(resp: Response, catch_status: bool) -> ConnectionResult { + if !catch_status || is_success_status(resp.status()) { + Ok(resp) + } else { + let msg = resp.text().map_err(ConnectionError::Format)?; + Err(ConnectionError::ErrorWithMsg(msg)) + } +} + +/// A network connection to communicate with remote server. +#[derive(Debug)] +pub(crate) struct Connection { + client: Client, + proxy: Option>, + pub mirrors: Vec>, + pub shutdown: AtomicBool, + /// Timestamp of connection's last active request, represents as duration since UNIX_EPOCH in seconds. + last_active: Arc, +} + +#[derive(Debug)] +pub(crate) struct Mirror { + /// Information for mirror from configuration file. + pub config: MirrorConfig, + /// Mirror status, it will be set to false by atomic operation when mirror is not work. + status: AtomicBool, + /// Failed times requesting mirror, the status will be marked as false when failed_times = failure_limit. + failed_times: AtomicU8, + /// Failure count for which mirror is considered unavailable. + failure_limit: u8, +} + +impl Mirror { + /// Convert original URL to mirror URL. + fn mirror_url(&self, url: &str) -> ConnectionResult { + let mirror_host = Url::parse(&self.config.host) + .map_err(|e| ConnectionError::Url(self.config.host.clone(), e))?; + let mut current_url = + Url::parse(url).map_err(|e| ConnectionError::Url(url.to_string(), e))?; + + current_url + .set_scheme(mirror_host.scheme()) + .map_err(|_| ConnectionError::Scheme(mirror_host.scheme().to_string()))?; + current_url + .set_host(mirror_host.host_str()) + .map_err(|_| ConnectionError::MirrorHost)?; + current_url + .set_port(mirror_host.port()) + .map_err(|_| ConnectionError::MirrorPort)?; + Ok(current_url) + } +} + +impl Connection { + /// Create a new connection according to the configuration. + pub fn new(config: &ConnectionConfig) -> Result> { + info!("backend config: {:?}", config); + let client = Self::build_connection("", config)?; + + let proxy = if !config.proxy.url.is_empty() { + let ping_url = if !config.proxy.ping_url.is_empty() { + Some(Url::from_str(&config.proxy.ping_url).map_err(|e| einval!(e))?) + } else { + None + }; + Some(Arc::new(Proxy { + client: Self::build_connection(&config.proxy.url, config)?, + health: ProxyHealth::new( + config.proxy.check_interval, + config.proxy.check_pause_elapsed, + ping_url, + ), + fallback: config.proxy.fallback, + use_http: config.proxy.use_http, + replace_scheme: AtomicI16::new(SCHEME_REVERSION_CACHE_UNSET), + })) + } else { + None + }; + + let mut mirrors = Vec::new(); + for mirror_config in config.mirrors.iter() { + if !mirror_config.host.is_empty() { + mirrors.push(Arc::new(Mirror { + config: mirror_config.clone(), + status: AtomicBool::from(true), + failed_times: AtomicU8::from(0), + failure_limit: mirror_config.failure_limit, + })); + } + } + + let connection = Arc::new(Connection { + client, + proxy, + mirrors, + shutdown: AtomicBool::new(false), + last_active: Arc::new(AtomicU64::new( + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs(), + )), + }); + + // Start proxy's health checking thread. + connection.start_proxy_health_thread(config.connect_timeout as u64); + + // Start mirrors' health checking thread. + connection.start_mirrors_health_thread(config.timeout as u64); + + Ok(connection) + } + + fn start_proxy_health_thread(&self, connect_timeout: u64) { + if let Some(proxy) = self.proxy.as_ref() { + if proxy.health.ping_url.is_some() { + let proxy = proxy.clone(); + let last_active = Arc::clone(&self.last_active); + + // Spawn thread to update the health status of proxy server. + thread::spawn(move || { + let ping_url = proxy.health.ping_url.as_ref().unwrap(); + let mut last_success = true; + + loop { + let elapsed = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs() + - last_active.load(Ordering::Relaxed); + // If the connection is not active for a set time, skip proxy health check. + if elapsed <= proxy.health.check_pause_elapsed { + let client = Client::new(); + let _ = client + .get(ping_url.clone()) + .timeout(Duration::from_secs(connect_timeout as u64)) + .send() + .map(|resp| { + let success = is_success_status(resp.status()); + if last_success && !success { + warn!( + "Detected proxy unhealthy when pinging proxy, response status {}", + resp.status() + ); + } else if !last_success && success { + info!("Backend proxy recovered") + } + last_success = success; + proxy.health.set(success); + }) + .map_err(|e| { + if last_success { + warn!("Detected proxy unhealthy when ping proxy, {}", e); + } + last_success = false; + proxy.health.set(false) + }); + } + + thread::sleep(proxy.health.check_interval); + } + }); + } + } + } + + fn start_mirrors_health_thread(&self, timeout: u64) { + for mirror in self.mirrors.iter() { + let mirror_cloned = mirror.clone(); + let last_active = Arc::clone(&self.last_active); + + // Spawn thread to update the health status of mirror server. + thread::spawn(move || { + let mirror_health_url = if mirror_cloned.config.ping_url.is_empty() { + format!("{}/v2", mirror_cloned.config.host) + } else { + mirror_cloned.config.ping_url.clone() + }; + info!( + "[mirror] start health check, ping url: {}", + mirror_health_url + ); + + let client = Client::new(); + loop { + let elapsed = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs() + - last_active.load(Ordering::Relaxed); + // If the connection is not active for a set time, skip mirror health check. + if elapsed <= mirror_cloned.config.health_check_pause_elapsed { + // Try to recover the mirror server when it is unavailable. + if !mirror_cloned.status.load(Ordering::Relaxed) { + info!( + "[mirror] server unhealthy, try to recover: {}", + mirror_cloned.config.host + ); + + let _ = client + .get(mirror_health_url.as_str()) + .timeout(Duration::from_secs(timeout as u64)) + .send() + .map(|resp| { + // If the response status is less than StatusCode::INTERNAL_SERVER_ERROR, + // the mirror server is recovered. + if resp.status() < StatusCode::INTERNAL_SERVER_ERROR { + info!( + "[mirror] server recovered: {}", + mirror_cloned.config.host + ); + mirror_cloned.failed_times.store(0, Ordering::Relaxed); + mirror_cloned.status.store(true, Ordering::Relaxed); + } + }) + .map_err(|e| { + warn!( + "[mirror] failed to recover server: {}, {}", + mirror_cloned.config.host, e + ); + }); + } + } + + thread::sleep(Duration::from_secs( + mirror_cloned.config.health_check_interval, + )); + } + }); + } + } + + /// Shutdown the connection. + pub fn shutdown(&self) { + self.shutdown.store(true, Ordering::Release); + } + + #[allow(clippy::too_many_arguments)] + pub fn call( + &self, + method: Method, + url: &str, + query: Option<&[(&str, &str)]>, + data: Option>, + headers: &mut HeaderMap, + catch_status: bool, + ) -> ConnectionResult { + if self.shutdown.load(Ordering::Acquire) { + return Err(ConnectionError::Disconnected); + } + self.last_active.store( + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs(), + Ordering::Relaxed, + ); + + if let Some(proxy) = &self.proxy { + if proxy.health.ok() { + let data_cloned = data.as_ref().cloned(); + + let http_url: Option; + let mut replaced_url = url; + + if proxy.use_http { + http_url = proxy.try_use_http(url); + if let Some(ref r) = http_url { + replaced_url = r.as_str(); + } + } + + let result = self.call_inner( + &proxy.client, + method.clone(), + replaced_url, + &query, + data_cloned, + headers, + catch_status, + true, + ); + + match result { + Ok(resp) => { + if !proxy.fallback || resp.status() < StatusCode::INTERNAL_SERVER_ERROR { + return Ok(resp); + } + } + Err(err) => { + if !proxy.fallback { + return Err(err); + } + } + } + // If proxy server responds invalid status code or http connection failed, we need to + // fallback to origin server, the policy only applicable to non-upload operation + warn!("Request proxy server failed, fallback to original server"); + } else { + LAST_FALLBACK_AT.with(|f| { + let current = SystemTime::now(); + if current.duration_since(*f.borrow()).unwrap().as_secs() + >= RATE_LIMITED_LOG_TIME as u64 + { + warn!("Proxy server is not healthy, fallback to original server"); + f.replace(current); + } + }) + } + } + + let mut mirror_enabled = false; + if !self.mirrors.is_empty() { + mirror_enabled = true; + for mirror in self.mirrors.iter() { + if mirror.status.load(Ordering::Relaxed) { + let data_cloned = data.as_ref().cloned(); + + for (key, value) in mirror.config.headers.iter() { + headers.insert( + HeaderName::from_str(key).unwrap(), + HeaderValue::from_str(value).unwrap(), + ); + } + + let current_url = mirror.mirror_url(url)?; + debug!("[mirror] replace to: {}", current_url); + + let result = self.call_inner( + &self.client, + method.clone(), + current_url.as_str(), + &query, + data_cloned, + headers, + catch_status, + false, + ); + + match result { + Ok(resp) => { + // If the response status >= INTERNAL_SERVER_ERROR, move to the next mirror server. + if resp.status() < StatusCode::INTERNAL_SERVER_ERROR { + return Ok(resp); + } + } + Err(err) => { + warn!( + "[mirror] request failed, server: {:?}, {:?}", + mirror.config.host, err + ); + mirror.failed_times.fetch_add(1, Ordering::Relaxed); + + if mirror.failed_times.load(Ordering::Relaxed) >= mirror.failure_limit { + warn!( + "[mirror] exceed failure limit {}, server disabled: {:?}", + mirror.failure_limit, mirror + ); + mirror.status.store(false, Ordering::Relaxed); + } + } + } + } + // Remove mirror-related headers to avoid sending them to the next mirror server and original registry. + for (key, _) in mirror.config.headers.iter() { + headers.remove(HeaderName::from_str(key).unwrap()); + } + } + } + + if mirror_enabled { + warn!("[mirror] request all servers failed, fallback to original server."); + } + + self.call_inner( + &self.client, + method, + url, + &query, + data, + headers, + catch_status, + false, + ) + } + + fn build_connection(proxy: &str, config: &ConnectionConfig) -> Result { + let connect_timeout = if config.connect_timeout != 0 { + Some(Duration::from_secs(config.connect_timeout as u64)) + } else { + None + }; + let timeout = if config.timeout != 0 { + Some(Duration::from_secs(config.timeout as u64)) + } else { + None + }; + + let mut cb = Client::builder() + .timeout(timeout) + .connect_timeout(connect_timeout) + .redirect(Policy::none()); + + if config.skip_verify { + cb = cb.danger_accept_invalid_certs(true); + } + + if !proxy.is_empty() { + cb = cb.proxy(reqwest::Proxy::all(proxy).map_err(|e| einval!(e))?) + } + + cb.build().map_err(|e| einval!(e)) + } + + #[allow(clippy::too_many_arguments)] + fn call_inner( + &self, + client: &Client, + method: Method, + url: &str, + query: &Option<&[(&str, &str)]>, + data: Option>, + headers: &HeaderMap, + catch_status: bool, + proxy: bool, + ) -> ConnectionResult { + // Only clone header when debugging to reduce potential overhead. + let display_headers = if max_level() >= Level::Debug { + let mut display_headers = headers.clone(); + display_headers.remove(HEADER_AUTHORIZATION); + Some(display_headers) + } else { + None + }; + let has_data = data.is_some(); + let start = Instant::now(); + + let mut rb = client.request(method.clone(), url).headers(headers.clone()); + if let Some(q) = query.as_ref() { + rb = rb.query(q); + } + + let ret; + if let Some(data) = data { + match data { + ReqBody::Read(body, total) => { + let body = Body::sized(body, total as u64); + ret = rb.body(body).send(); + } + ReqBody::Buf(buf) => { + ret = rb.body(buf).send(); + } + ReqBody::Form(form) => { + ret = rb.form(&form).send(); + } + } + } else { + ret = rb.body("").send(); + } + + debug!( + "{} Request: {} {} headers: {:?}, proxy: {}, data: {}, duration: {}ms", + std::thread::current().name().unwrap_or_default(), + method, + url, + display_headers, + proxy, + has_data, + Instant::now().duration_since(start).as_millis(), + ); + + match ret { + Err(err) => Err(ConnectionError::Common(err)), + Ok(resp) => respond(resp, catch_status), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Cursor; + + #[test] + fn test_progress() { + let buf = vec![0x1u8, 2, 3, 4, 5]; + let mut progress = Progress::new(Cursor::new(buf), 5, |(curr, total)| { + assert!(curr == 2 || curr == 4); + assert_eq!(total, 5); + }); + + let mut buf1 = [0x0u8; 2]; + assert_eq!(progress.read(&mut buf1).unwrap(), 2); + assert_eq!(buf1[0], 1); + assert_eq!(buf1[1], 2); + + assert_eq!(progress.read(&mut buf1).unwrap(), 2); + assert_eq!(buf1[0], 3); + assert_eq!(buf1[1], 4); + } + + #[test] + fn test_proxy_health() { + let checker = ProxyHealth::new(5, 300, None); + + assert!(checker.ok()); + assert!(checker.ok()); + checker.set(false); + assert!(!checker.ok()); + assert!(!checker.ok()); + checker.set(true); + assert!(checker.ok()); + assert!(checker.ok()); + } + + #[test] + fn test_is_success_status() { + assert!(!is_success_status(StatusCode::CONTINUE)); + assert!(is_success_status(StatusCode::OK)); + assert!(is_success_status(StatusCode::PERMANENT_REDIRECT)); + assert!(!is_success_status(StatusCode::BAD_REQUEST)); + } + + #[test] + fn test_connection_config_default() { + let config = ConnectionConfig::default(); + + assert_eq!(config.timeout, 5); + assert_eq!(config.connect_timeout, 5); + assert_eq!(config.retry_limit, 0); + assert_eq!(config.proxy.check_interval, 5); + assert_eq!(config.proxy.check_pause_elapsed, 300); + assert!(config.proxy.fallback); + assert_eq!(config.proxy.ping_url, ""); + assert_eq!(config.proxy.url, ""); + assert!(config.mirrors.is_empty()); + } +} diff --git a/storage/src/backend/http_proxy.rs b/storage/src/backend/http_proxy.rs index c1324fbef78..f4529a60ffd 100644 --- a/storage/src/backend/http_proxy.rs +++ b/storage/src/backend/http_proxy.rs @@ -1,530 +1,530 @@ -// Copyright 2023 Ant Group. All rights reserved. - -// SPDX-License-Identifier: Apache-2.0 - -// ! Storage backend driver to access the blobs through a http proxy. - -use http::{HeaderMap, HeaderValue, Method, Request}; -use hyper::Client as HyperClient; -use hyper::{body, Body, Response}; -use hyperlocal::Uri as HyperLocalUri; -use hyperlocal::{UnixClientExt, UnixConnector}; -use nydus_api::HttpProxyConfig; -use nydus_utils::metrics::BackendMetrics; -use reqwest; -use tokio::runtime::Runtime; - -use super::connection::{Connection, ConnectionConfig, ConnectionError}; -use super::{BackendError, BackendResult, BlobBackend, BlobReader}; -use std::path::Path; -use std::{ - fmt, - io::{Error, Result}, - num::ParseIntError, - str::{self}, - sync::Arc, -}; - -const HYPER_LOCAL_CLIENT_RUNTIME_THREAD_NUM: usize = 1; - -#[derive(Debug)] -pub enum HttpProxyError { - /// Failed to parse string to integer. - ParseStringToInteger(ParseIntError), - ParseContentLengthFromHeader(http::header::ToStrError), - /// Failed to get response from the local http server. - LocalRequest(hyper::Error), - /// Failed to get response from the remote http server. - RemoteRequest(ConnectionError), - /// Failed to build the tokio runtime. - BuildTokioRuntime(Error), - /// Failed to build local http request. - BuildHttpRequest(http::Error), - /// Failed to read the response body. - ReadResponseBody(hyper::Error), - /// Failed to transport the remote response body. - Transport(reqwest::Error), - /// Failed to copy the buffer. - CopyBuffer(Error), - /// Invalid path. - InvalidPath, - /// Failed to build request header. - ConstructHeader(String), -} - -impl fmt::Display for HttpProxyError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - HttpProxyError::ParseStringToInteger(e) => { - write!(f, "failed to parse string to integer, {}", e) - } - HttpProxyError::ParseContentLengthFromHeader(e) => { - write!(f, "failed to parse content length from header, {}", e) - } - HttpProxyError::LocalRequest(e) => write!(f, "failed to get response, {}", e), - HttpProxyError::RemoteRequest(e) => write!(f, "failed to get response, {}", e), - HttpProxyError::BuildTokioRuntime(e) => { - write!(f, "failed to build tokio runtime, {}", e) - } - HttpProxyError::BuildHttpRequest(e) => { - write!(f, "failed to build http request, {}", e) - } - HttpProxyError::Transport(e) => { - write!(f, "failed to transport remote response body, {}", e) - } - HttpProxyError::ReadResponseBody(e) => { - write!(f, "failed to read response body, {}", e) - } - HttpProxyError::CopyBuffer(e) => write!(f, "failed to copy buffer, {}", e), - HttpProxyError::InvalidPath => write!(f, "invalid path"), - HttpProxyError::ConstructHeader(e) => { - write!(f, "failed to construct request header, {}", e) - } - } - } -} - -impl From for BackendError { - fn from(error: HttpProxyError) -> Self { - BackendError::HttpProxy(error) - } -} - -/// A storage backend driver to access blobs through a http proxy server. -/// The http proxy server may be local (using unix socket) or be remote (using `http://` or `https://`). -/// -/// `HttpProxy` uses two API endpoints to access the blobs: -/// - `HEAD /path/to/blob` to get the blob size -/// - `GET /path/to/blob` to read the blob -/// -/// The http proxy server should respect [the `Range` header](https://www.rfc-editor.org/rfc/rfc9110.html#name-range) to support range reading. -pub struct HttpProxy { - addr: String, - path: String, - client: Client, - metrics: Option>, -} - -/// HttpProxyReader is a BlobReader to implement the HttpProxy backend driver. -pub struct HttpProxyReader { - client: Client, - uri: Uri, - metrics: Arc, -} - -#[derive(Clone)] -struct LocalClient { - client: Arc>, - runtime: Arc, -} - -#[derive(Clone)] -enum Client { - Local(LocalClient), - Remote(Arc), -} - -enum Uri { - Local(Arc), - Remote(String), -} - -fn range_str_for_header(offset: u64, len: Option) -> String { - match len { - Some(len) => format!("bytes={}-{}", offset, offset + len as u64 - 1), - None => format!("bytes={}-", offset), - } -} - -fn build_tokio_runtime(name: &str, thread_num: usize) -> Result { - let runtime = tokio::runtime::Builder::new_multi_thread() - .thread_name(name) - .worker_threads(thread_num) - .enable_all() - .build()?; - Ok(runtime) -} - -impl LocalClient { - async fn do_req( - &self, - uri: Arc, - only_head: bool, - offset: u64, - len: Option, - ) -> BackendResult> { - let method = if only_head { Method::HEAD } else { Method::GET }; - let req = Request::builder() - .method(method) - .uri(uri.as_ref()) - .header(http::header::RANGE, range_str_for_header(offset, len)) - .body(Body::default()) - .map_err(HttpProxyError::BuildHttpRequest)?; - let resp = self - .client - .request(req) - .await - .map_err(HttpProxyError::LocalRequest)?; - Ok(resp) - } - - fn get_headers(&self, uri: Arc) -> BackendResult> { - let headers = self - .runtime - .block_on(self.do_req(uri, true, 0, None))? - .headers() - .to_owned(); - Ok(headers) - } - - fn try_read(&self, uri: Arc, offset: u64, len: usize) -> BackendResult> { - self.runtime.block_on(async { - let resp = self.do_req(uri, false, offset, Some(len)).await; - match resp { - Ok(resp) => body::to_bytes(resp) - .await - .map_err(|e| HttpProxyError::ReadResponseBody(e).into()) - .map(|bytes| bytes.to_vec()), - Err(e) => Err(e), - } - }) - } -} - -impl BlobReader for HttpProxyReader { - fn blob_size(&self) -> super::BackendResult { - let headers = match &self.client { - Client::Local(client) => { - let uri = match self.uri { - Uri::Local(ref uri) => uri.clone(), - Uri::Remote(_) => unreachable!(), - }; - client.get_headers(uri) - } - Client::Remote(connection) => { - let uri = match self.uri { - Uri::Local(_) => unreachable!(), - Uri::Remote(ref uri) => uri.clone(), - }; - connection - .call::<&[u8]>( - Method::HEAD, - uri.as_str(), - None, - None, - &mut HeaderMap::new(), - true, - ) - .map(|resp| resp.headers().to_owned()) - .map_err(|e| HttpProxyError::RemoteRequest(e).into()) - } - }; - let content_length = headers?[http::header::CONTENT_LENGTH] - .to_str() - .map_err(HttpProxyError::ParseContentLengthFromHeader)? - .parse::() - .map_err(HttpProxyError::ParseStringToInteger)?; - Ok(content_length) - } - - fn try_read(&self, mut buf: &mut [u8], offset: u64) -> BackendResult { - match &self.client { - Client::Local(client) => { - let uri = match self.uri { - Uri::Local(ref uri) => uri.clone(), - Uri::Remote(_) => unreachable!(), - }; - let content = client.try_read(uri, offset, buf.len())?; - let copied_size = std::io::copy(&mut content.as_slice(), &mut buf) - .map_err(HttpProxyError::CopyBuffer)?; - Ok(copied_size as usize) - } - Client::Remote(connection) => { - let uri = match self.uri { - Uri::Local(_) => unreachable!(), - Uri::Remote(ref uri) => uri.clone(), - }; - let mut headers = HeaderMap::new(); - let range = range_str_for_header(offset, Some(buf.len())); - headers.insert( - http::header::RANGE, - range - .as_str() - .parse() - .map_err(|e| HttpProxyError::ConstructHeader(format!("{}", e)))?, - ); - let mut resp = connection - .call::<&[u8]>(Method::GET, uri.as_str(), None, None, &mut headers, true) - .map_err(HttpProxyError::RemoteRequest)?; - - Ok(resp - .copy_to(&mut buf) - .map_err(HttpProxyError::Transport) - .map(|size| size as usize)?) - } - } - } - - fn metrics(&self) -> &nydus_utils::metrics::BackendMetrics { - &self.metrics - } -} - -impl HttpProxy { - pub fn new(config: &HttpProxyConfig, id: Option<&str>) -> Result { - let client = if config.addr.starts_with("http://") || config.addr.starts_with("https://") { - let conn_cfg: ConnectionConfig = config.clone().into(); - let conn = Connection::new(&conn_cfg)?; - Client::Remote(conn) - } else { - let client = HyperClient::unix(); - let runtime = build_tokio_runtime("http-proxy", HYPER_LOCAL_CLIENT_RUNTIME_THREAD_NUM)?; - let local_client = LocalClient { - client: Arc::new(client), - runtime: Arc::new(runtime), - }; - Client::Local(local_client) - }; - Ok(HttpProxy { - addr: config.addr.to_string(), - path: config.path.to_string(), - client, - metrics: id.map(|i| BackendMetrics::new(i, "http-proxy")), - }) - } -} - -impl BlobBackend for HttpProxy { - fn shutdown(&self) { - match &self.client { - Client::Local(_) => { - // do nothing - } - Client::Remote(remote_client) => { - remote_client.shutdown(); - } - } - } - - fn metrics(&self) -> &nydus_utils::metrics::BackendMetrics { - // `metrics()` is only used for nydusd, which will always provide valid `blob_id`, thus - // `self.metrics` has valid value. - self.metrics.as_ref().unwrap() - } - - fn get_reader( - &self, - blob_id: &str, - ) -> super::BackendResult> { - let path = Path::new(&self.path).join(blob_id); - let path = path.to_str().ok_or(HttpProxyError::InvalidPath)?; - let uri = match &self.client { - Client::Local(_) => { - let uri: Arc = - Arc::new(HyperLocalUri::new(self.addr.clone(), "/").into()); - Uri::Local(uri) - } - Client::Remote(_) => { - let uri = format!("{}{}", self.addr, path); - Uri::Remote(uri) - } - }; - let reader = Arc::new(HttpProxyReader { - client: self.client.clone(), - uri, - metrics: self.metrics.as_ref().unwrap().clone(), - }); - Ok(reader) - } -} - -impl Drop for HttpProxy { - fn drop(&mut self) { - self.shutdown(); - if let Some(metrics) = self.metrics.as_ref() { - metrics.release().unwrap_or_else(|e| error!("{:?}", e)); - } - } -} - -#[cfg(test)] -mod tests { - - use crate::{ - backend::{http_proxy::HttpProxy, BlobBackend}, - utils::alloc_buf, - }; - - use http::{status, Request}; - use hyper::{ - service::{make_service_fn, service_fn}, - Body, Response, Server, - }; - use hyperlocal::UnixServerExt; - use nydus_api::HttpProxyConfig; - use std::{ - cmp, - fs::{self}, - net::{IpAddr, Ipv4Addr, SocketAddr}, - path::Path, - thread, - time::Duration, - }; - - use super::build_tokio_runtime; - - const CONTENT: &str = "some content for test"; - const SOCKET_PATH: &str = "/tmp/nydus-test-local-http-proxy.sock"; - - fn parse_range_header(range_str: &str) -> (u64, Option) { - let range_str = range_str.trim_start_matches("bytes="); - let range: Vec<&str> = range_str.split('-').collect(); - let start = range[0].parse::().unwrap(); - let end = match range[1] { - "" => None, - _ => Some(cmp::min( - range[1].parse::().unwrap(), - (CONTENT.len() - 1) as u64, - )), - }; - (start, end) - } - - async fn server_handler(req: Request) -> Result, hyper::Error> { - return match *req.method() { - hyper::Method::HEAD => Ok::<_, hyper::Error>( - Response::builder() - .status(200) - .header(http::header::CONTENT_LENGTH, CONTENT.len()) - .body(Body::empty()) - .unwrap(), - ), - hyper::Method::GET => { - let range = req.headers()[http::header::RANGE].to_str().unwrap(); - println!("range: {}", range); - let (start, end) = parse_range_header(range); - let length = match end { - Some(e) => e - start + 1, - None => CONTENT.len() as u64, - }; - println!("start: {}, end: {:?}, length: {}", start, end, length); - let end = match end { - Some(e) => e, - None => (CONTENT.len() - 1) as u64, - }; - let content = CONTENT.as_bytes()[start as usize..(end + 1) as usize].to_vec(); - Ok::<_, hyper::Error>( - Response::builder() - .status(200) - .header(http::header::CONTENT_LENGTH, length) - .body(Body::from(content)) - .unwrap(), - ) - } - _ => Ok::<_, hyper::Error>( - Response::builder() - .status(status::StatusCode::METHOD_NOT_ALLOWED) - .body(Body::empty()) - .unwrap(), - ), - }; - } - - #[test] - fn test_head_and_get() { - thread::spawn(|| { - let rt = build_tokio_runtime("test-local-http-proxy-server", 1).unwrap(); - rt.block_on(async { - println!("\nstarting local http proxy server......"); - let path = Path::new(SOCKET_PATH); - if path.exists() { - fs::remove_file(path).unwrap(); - } - Server::bind_unix(path) - .unwrap() - .serve(make_service_fn(|_| async { - Ok::<_, hyper::Error>(service_fn(server_handler)) - })) - .await - .unwrap(); - }); - }); - - thread::spawn(|| { - let rt = build_tokio_runtime("test-remote-http-proxy-server", 1).unwrap(); - rt.block_on(async { - println!("\nstarting remote http proxy server......"); - Server::bind(&SocketAddr::new( - IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), - 9977, - )) - .serve(make_service_fn(|_| async { - Ok::<_, hyper::Error>(service_fn(server_handler)) - })) - .await - .unwrap(); - }); - }); - - // wait for server to start - thread::sleep(Duration::from_secs(5)); - - // start the client and test - let test_list: Vec<(String, String)> = vec![ - ( - format!( - "{{\"addr\":\"{}\",\"path\":\"/namespace//blobs\"}}", - SOCKET_PATH, - ), - "test-local-http-proxy".to_string(), - ), - ( - "{\"addr\":\"http://127.0.0.1:9977\",\"path\":\"/namespace//blobs\"}" - .to_string(), - "test-remote-http-proxy".to_string(), - ), - ]; - for test_case in test_list.iter() { - let config: HttpProxyConfig = serde_json::from_str(test_case.0.as_str()).unwrap(); - let backend = HttpProxy::new(&config, Some(test_case.1.as_str())).unwrap(); - let reader = backend.get_reader("blob_id").unwrap(); - - println!(); - println!("testing blob_size()......"); - let blob_size = reader - .blob_size() - .map_err(|e| { - println!("blob_size() failed: {}", e); - e - }) - .unwrap(); - assert_eq!(blob_size, CONTENT.len() as u64); - - println!(); - println!("testing read() range......"); - let mut buf = alloc_buf(3); - let size = reader - .try_read(&mut buf, 0) - .map_err(|e| { - println!("read() range failed: {}", e); - e - }) - .unwrap(); - assert_eq!(size, 3); - assert_eq!(buf, CONTENT.as_bytes()[0..3]); - - println!(); - println!("testing read() full......"); - let mut buf = alloc_buf(80); - let size = reader - .try_read(&mut buf, 0) - .map_err(|e| { - println!("read() range failed: {}", e); - e - }) - .unwrap(); - assert_eq!(size, CONTENT.len() as usize); - assert_eq!(&buf[0..CONTENT.len()], CONTENT.as_bytes()); - } - } -} +// Copyright 2023 Ant Group. All rights reserved. + +// SPDX-License-Identifier: Apache-2.0 + +// ! Storage backend driver to access the blobs through a http proxy. + +use http::{HeaderMap, HeaderValue, Method, Request}; +use hyper::Client as HyperClient; +use hyper::{body, Body, Response}; +use hyperlocal::Uri as HyperLocalUri; +use hyperlocal::{UnixClientExt, UnixConnector}; +use nydus_api::HttpProxyConfig; +use nydus_utils::metrics::BackendMetrics; +use reqwest; +use tokio::runtime::Runtime; + +use super::connection::{Connection, ConnectionConfig, ConnectionError}; +use super::{BackendError, BackendResult, BlobBackend, BlobReader}; +use std::path::Path; +use std::{ + fmt, + io::{Error, Result}, + num::ParseIntError, + str::{self}, + sync::Arc, +}; + +const HYPER_LOCAL_CLIENT_RUNTIME_THREAD_NUM: usize = 1; + +#[derive(Debug)] +pub enum HttpProxyError { + /// Failed to parse string to integer. + ParseStringToInteger(ParseIntError), + ParseContentLengthFromHeader(http::header::ToStrError), + /// Failed to get response from the local http server. + LocalRequest(hyper::Error), + /// Failed to get response from the remote http server. + RemoteRequest(ConnectionError), + /// Failed to build the tokio runtime. + BuildTokioRuntime(Error), + /// Failed to build local http request. + BuildHttpRequest(http::Error), + /// Failed to read the response body. + ReadResponseBody(hyper::Error), + /// Failed to transport the remote response body. + Transport(reqwest::Error), + /// Failed to copy the buffer. + CopyBuffer(Error), + /// Invalid path. + InvalidPath, + /// Failed to build request header. + ConstructHeader(String), +} + +impl fmt::Display for HttpProxyError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + HttpProxyError::ParseStringToInteger(e) => { + write!(f, "failed to parse string to integer, {}", e) + } + HttpProxyError::ParseContentLengthFromHeader(e) => { + write!(f, "failed to parse content length from header, {}", e) + } + HttpProxyError::LocalRequest(e) => write!(f, "failed to get response, {}", e), + HttpProxyError::RemoteRequest(e) => write!(f, "failed to get response, {}", e), + HttpProxyError::BuildTokioRuntime(e) => { + write!(f, "failed to build tokio runtime, {}", e) + } + HttpProxyError::BuildHttpRequest(e) => { + write!(f, "failed to build http request, {}", e) + } + HttpProxyError::Transport(e) => { + write!(f, "failed to transport remote response body, {}", e) + } + HttpProxyError::ReadResponseBody(e) => { + write!(f, "failed to read response body, {}", e) + } + HttpProxyError::CopyBuffer(e) => write!(f, "failed to copy buffer, {}", e), + HttpProxyError::InvalidPath => write!(f, "invalid path"), + HttpProxyError::ConstructHeader(e) => { + write!(f, "failed to construct request header, {}", e) + } + } + } +} + +impl From for BackendError { + fn from(error: HttpProxyError) -> Self { + BackendError::HttpProxy(error) + } +} + +/// A storage backend driver to access blobs through a http proxy server. +/// The http proxy server may be local (using unix socket) or be remote (using `http://` or `https://`). +/// +/// `HttpProxy` uses two API endpoints to access the blobs: +/// - `HEAD /path/to/blob` to get the blob size +/// - `GET /path/to/blob` to read the blob +/// +/// The http proxy server should respect [the `Range` header](https://www.rfc-editor.org/rfc/rfc9110.html#name-range) to support range reading. +pub struct HttpProxy { + addr: String, + path: String, + client: Client, + metrics: Option>, +} + +/// HttpProxyReader is a BlobReader to implement the HttpProxy backend driver. +pub struct HttpProxyReader { + client: Client, + uri: Uri, + metrics: Arc, +} + +#[derive(Clone)] +struct LocalClient { + client: Arc>, + runtime: Arc, +} + +#[derive(Clone)] +enum Client { + Local(LocalClient), + Remote(Arc), +} + +enum Uri { + Local(Arc), + Remote(String), +} + +fn range_str_for_header(offset: u64, len: Option) -> String { + match len { + Some(len) => format!("bytes={}-{}", offset, offset + len as u64 - 1), + None => format!("bytes={}-", offset), + } +} + +fn build_tokio_runtime(name: &str, thread_num: usize) -> Result { + let runtime = tokio::runtime::Builder::new_multi_thread() + .thread_name(name) + .worker_threads(thread_num) + .enable_all() + .build()?; + Ok(runtime) +} + +impl LocalClient { + async fn do_req( + &self, + uri: Arc, + only_head: bool, + offset: u64, + len: Option, + ) -> BackendResult> { + let method = if only_head { Method::HEAD } else { Method::GET }; + let req = Request::builder() + .method(method) + .uri(uri.as_ref()) + .header(http::header::RANGE, range_str_for_header(offset, len)) + .body(Body::default()) + .map_err(HttpProxyError::BuildHttpRequest)?; + let resp = self + .client + .request(req) + .await + .map_err(HttpProxyError::LocalRequest)?; + Ok(resp) + } + + fn get_headers(&self, uri: Arc) -> BackendResult> { + let headers = self + .runtime + .block_on(self.do_req(uri, true, 0, None))? + .headers() + .to_owned(); + Ok(headers) + } + + fn try_read(&self, uri: Arc, offset: u64, len: usize) -> BackendResult> { + self.runtime.block_on(async { + let resp = self.do_req(uri, false, offset, Some(len)).await; + match resp { + Ok(resp) => body::to_bytes(resp) + .await + .map_err(|e| HttpProxyError::ReadResponseBody(e).into()) + .map(|bytes| bytes.to_vec()), + Err(e) => Err(e), + } + }) + } +} + +impl BlobReader for HttpProxyReader { + fn blob_size(&self) -> super::BackendResult { + let headers = match &self.client { + Client::Local(client) => { + let uri = match self.uri { + Uri::Local(ref uri) => uri.clone(), + Uri::Remote(_) => unreachable!(), + }; + client.get_headers(uri) + } + Client::Remote(connection) => { + let uri = match self.uri { + Uri::Local(_) => unreachable!(), + Uri::Remote(ref uri) => uri.clone(), + }; + connection + .call::<&[u8]>( + Method::HEAD, + uri.as_str(), + None, + None, + &mut HeaderMap::new(), + true, + ) + .map(|resp| resp.headers().to_owned()) + .map_err(|e| HttpProxyError::RemoteRequest(e).into()) + } + }; + let content_length = headers?[http::header::CONTENT_LENGTH] + .to_str() + .map_err(HttpProxyError::ParseContentLengthFromHeader)? + .parse::() + .map_err(HttpProxyError::ParseStringToInteger)?; + Ok(content_length) + } + + fn try_read(&self, mut buf: &mut [u8], offset: u64) -> BackendResult { + match &self.client { + Client::Local(client) => { + let uri = match self.uri { + Uri::Local(ref uri) => uri.clone(), + Uri::Remote(_) => unreachable!(), + }; + let content = client.try_read(uri, offset, buf.len())?; + let copied_size = std::io::copy(&mut content.as_slice(), &mut buf) + .map_err(HttpProxyError::CopyBuffer)?; + Ok(copied_size as usize) + } + Client::Remote(connection) => { + let uri = match self.uri { + Uri::Local(_) => unreachable!(), + Uri::Remote(ref uri) => uri.clone(), + }; + let mut headers = HeaderMap::new(); + let range = range_str_for_header(offset, Some(buf.len())); + headers.insert( + http::header::RANGE, + range + .as_str() + .parse() + .map_err(|e| HttpProxyError::ConstructHeader(format!("{}", e)))?, + ); + let mut resp = connection + .call::<&[u8]>(Method::GET, uri.as_str(), None, None, &mut headers, true) + .map_err(HttpProxyError::RemoteRequest)?; + + Ok(resp + .copy_to(&mut buf) + .map_err(HttpProxyError::Transport) + .map(|size| size as usize)?) + } + } + } + + fn metrics(&self) -> &nydus_utils::metrics::BackendMetrics { + &self.metrics + } +} + +impl HttpProxy { + pub fn new(config: &HttpProxyConfig, id: Option<&str>) -> Result { + let client = if config.addr.starts_with("http://") || config.addr.starts_with("https://") { + let conn_cfg: ConnectionConfig = config.clone().into(); + let conn = Connection::new(&conn_cfg)?; + Client::Remote(conn) + } else { + let client = HyperClient::unix(); + let runtime = build_tokio_runtime("http-proxy", HYPER_LOCAL_CLIENT_RUNTIME_THREAD_NUM)?; + let local_client = LocalClient { + client: Arc::new(client), + runtime: Arc::new(runtime), + }; + Client::Local(local_client) + }; + Ok(HttpProxy { + addr: config.addr.to_string(), + path: config.path.to_string(), + client, + metrics: id.map(|i| BackendMetrics::new(i, "http-proxy")), + }) + } +} + +impl BlobBackend for HttpProxy { + fn shutdown(&self) { + match &self.client { + Client::Local(_) => { + // do nothing + } + Client::Remote(remote_client) => { + remote_client.shutdown(); + } + } + } + + fn metrics(&self) -> &nydus_utils::metrics::BackendMetrics { + // `metrics()` is only used for nydusd, which will always provide valid `blob_id`, thus + // `self.metrics` has valid value. + self.metrics.as_ref().unwrap() + } + + fn get_reader( + &self, + blob_id: &str, + ) -> super::BackendResult> { + let path = Path::new(&self.path).join(blob_id); + let path = path.to_str().ok_or(HttpProxyError::InvalidPath)?; + let uri = match &self.client { + Client::Local(_) => { + let uri: Arc = + Arc::new(HyperLocalUri::new(self.addr.clone(), "/").into()); + Uri::Local(uri) + } + Client::Remote(_) => { + let uri = format!("{}{}", self.addr, path); + Uri::Remote(uri) + } + }; + let reader = Arc::new(HttpProxyReader { + client: self.client.clone(), + uri, + metrics: self.metrics.as_ref().unwrap().clone(), + }); + Ok(reader) + } +} + +impl Drop for HttpProxy { + fn drop(&mut self) { + self.shutdown(); + if let Some(metrics) = self.metrics.as_ref() { + metrics.release().unwrap_or_else(|e| error!("{:?}", e)); + } + } +} + +#[cfg(test)] +mod tests { + + use crate::{ + backend::{http_proxy::HttpProxy, BlobBackend}, + utils::alloc_buf, + }; + + use http::{status, Request}; + use hyper::{ + service::{make_service_fn, service_fn}, + Body, Response, Server, + }; + use hyperlocal::UnixServerExt; + use nydus_api::HttpProxyConfig; + use std::{ + cmp, + fs::{self}, + net::{IpAddr, Ipv4Addr, SocketAddr}, + path::Path, + thread, + time::Duration, + }; + + use super::build_tokio_runtime; + + const CONTENT: &str = "some content for test"; + const SOCKET_PATH: &str = "/tmp/nydus-test-local-http-proxy.sock"; + + fn parse_range_header(range_str: &str) -> (u64, Option) { + let range_str = range_str.trim_start_matches("bytes="); + let range: Vec<&str> = range_str.split('-').collect(); + let start = range[0].parse::().unwrap(); + let end = match range[1] { + "" => None, + _ => Some(cmp::min( + range[1].parse::().unwrap(), + (CONTENT.len() - 1) as u64, + )), + }; + (start, end) + } + + async fn server_handler(req: Request) -> Result, hyper::Error> { + return match *req.method() { + hyper::Method::HEAD => Ok::<_, hyper::Error>( + Response::builder() + .status(200) + .header(http::header::CONTENT_LENGTH, CONTENT.len()) + .body(Body::empty()) + .unwrap(), + ), + hyper::Method::GET => { + let range = req.headers()[http::header::RANGE].to_str().unwrap(); + println!("range: {}", range); + let (start, end) = parse_range_header(range); + let length = match end { + Some(e) => e - start + 1, + None => CONTENT.len() as u64, + }; + println!("start: {}, end: {:?}, length: {}", start, end, length); + let end = match end { + Some(e) => e, + None => (CONTENT.len() - 1) as u64, + }; + let content = CONTENT.as_bytes()[start as usize..(end + 1) as usize].to_vec(); + Ok::<_, hyper::Error>( + Response::builder() + .status(200) + .header(http::header::CONTENT_LENGTH, length) + .body(Body::from(content)) + .unwrap(), + ) + } + _ => Ok::<_, hyper::Error>( + Response::builder() + .status(status::StatusCode::METHOD_NOT_ALLOWED) + .body(Body::empty()) + .unwrap(), + ), + }; + } + + #[test] + fn test_head_and_get() { + thread::spawn(|| { + let rt = build_tokio_runtime("test-local-http-proxy-server", 1).unwrap(); + rt.block_on(async { + println!("\nstarting local http proxy server......"); + let path = Path::new(SOCKET_PATH); + if path.exists() { + fs::remove_file(path).unwrap(); + } + Server::bind_unix(path) + .unwrap() + .serve(make_service_fn(|_| async { + Ok::<_, hyper::Error>(service_fn(server_handler)) + })) + .await + .unwrap(); + }); + }); + + thread::spawn(|| { + let rt = build_tokio_runtime("test-remote-http-proxy-server", 1).unwrap(); + rt.block_on(async { + println!("\nstarting remote http proxy server......"); + Server::bind(&SocketAddr::new( + IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), + 9977, + )) + .serve(make_service_fn(|_| async { + Ok::<_, hyper::Error>(service_fn(server_handler)) + })) + .await + .unwrap(); + }); + }); + + // wait for server to start + thread::sleep(Duration::from_secs(5)); + + // start the client and test + let test_list: Vec<(String, String)> = vec![ + ( + format!( + "{{\"addr\":\"{}\",\"path\":\"/namespace//blobs\"}}", + SOCKET_PATH, + ), + "test-local-http-proxy".to_string(), + ), + ( + "{\"addr\":\"http://127.0.0.1:9977\",\"path\":\"/namespace//blobs\"}" + .to_string(), + "test-remote-http-proxy".to_string(), + ), + ]; + for test_case in test_list.iter() { + let config: HttpProxyConfig = serde_json::from_str(test_case.0.as_str()).unwrap(); + let backend = HttpProxy::new(&config, Some(test_case.1.as_str())).unwrap(); + let reader = backend.get_reader("blob_id").unwrap(); + + println!(); + println!("testing blob_size()......"); + let blob_size = reader + .blob_size() + .map_err(|e| { + println!("blob_size() failed: {}", e); + e + }) + .unwrap(); + assert_eq!(blob_size, CONTENT.len() as u64); + + println!(); + println!("testing read() range......"); + let mut buf = alloc_buf(3); + let size = reader + .try_read(&mut buf, 0) + .map_err(|e| { + println!("read() range failed: {}", e); + e + }) + .unwrap(); + assert_eq!(size, 3); + assert_eq!(buf, CONTENT.as_bytes()[0..3]); + + println!(); + println!("testing read() full......"); + let mut buf = alloc_buf(80); + let size = reader + .try_read(&mut buf, 0) + .map_err(|e| { + println!("read() range failed: {}", e); + e + }) + .unwrap(); + assert_eq!(size, CONTENT.len() as usize); + assert_eq!(&buf[0..CONTENT.len()], CONTENT.as_bytes()); + } + } +} diff --git a/storage/src/backend/localdisk.rs b/storage/src/backend/localdisk.rs index 4475797d5ad..e47c53f643f 100644 --- a/storage/src/backend/localdisk.rs +++ b/storage/src/backend/localdisk.rs @@ -1,432 +1,432 @@ -// Copyright (C) 2022 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Storage backend driver to access blobs on local disks. - -use std::collections::HashMap; -use std::fmt; -use std::fs::{File, OpenOptions}; -use std::io::Result; -use std::os::unix::io::AsRawFd; -use std::path::Path; -use std::sync::{Arc, RwLock}; - -use fuse_backend_rs::file_buf::FileVolatileSlice; -use nix::sys::uio; -use nydus_api::LocalDiskConfig; -use nydus_utils::metrics::BackendMetrics; - -use crate::backend::{BackendError, BackendResult, BlobBackend, BlobReader}; -use crate::utils::{readv, MemSliceCursor}; - -type LocalDiskResult = std::result::Result; - -/// Error codes related to localdisk storage backend. -#[derive(Debug)] -pub enum LocalDiskError { - BlobFile(String), - ReadBlob(String), -} - -impl fmt::Display for LocalDiskError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - LocalDiskError::BlobFile(s) => write!(f, "{}", s), - LocalDiskError::ReadBlob(s) => write!(f, "{}", s), - } - } -} - -impl From for BackendError { - fn from(error: LocalDiskError) -> Self { - BackendError::LocalDisk(error) - } -} - -#[derive(Debug)] -struct LocalDiskBlob { - // The file descriptor of the disk - device_file: File, - // Start offset of the partition - blob_offset: u64, - // Length of the partition - blob_length: u64, - // The identifier for the corresponding blob. - blob_id: String, - // Metrics collector. - metrics: Arc, -} - -impl BlobReader for LocalDiskBlob { - fn blob_size(&self) -> BackendResult { - Ok(self.blob_length) - } - - fn try_read(&self, buf: &mut [u8], offset: u64) -> BackendResult { - let msg = format!( - "localdisk: invalid offset 0x{:x}, base 0x{:x}, length 0x{:x}", - offset, self.blob_offset, self.blob_length - ); - if offset >= self.blob_length { - return Ok(0); - } - let actual_offset = self - .blob_offset - .checked_add(offset) - .ok_or(LocalDiskError::ReadBlob(msg))?; - let len = std::cmp::min(self.blob_length - offset, buf.len() as u64) as usize; - - uio::pread( - self.device_file.as_raw_fd(), - &mut buf[..len], - actual_offset as i64, - ) - .map_err(|e| { - let msg = format!( - "localdisk: failed to read data from blob {}, {}", - self.blob_id, e - ); - LocalDiskError::ReadBlob(msg).into() - }) - } - - fn readv( - &self, - bufs: &[FileVolatileSlice], - offset: u64, - max_size: usize, - ) -> BackendResult { - let msg = format!( - "localdisk: invalid offset 0x{:x}, base 0x{:x}, length 0x{:x}", - offset, self.blob_offset, self.blob_length - ); - if offset >= self.blob_length { - return Ok(0); - } - let actual_offset = self - .blob_offset - .checked_add(offset) - .ok_or(LocalDiskError::ReadBlob(msg.clone()))?; - - let mut c = MemSliceCursor::new(bufs); - let mut iovec = c.consume(max_size); - let mut len = 0; - for buf in bufs { - len += buf.len(); - } - - // Guarantees that reads do not exceed the size of the blob - if offset.checked_add(len as u64).is_none() || offset + len as u64 > self.blob_length { - return Err(LocalDiskError::ReadBlob(msg).into()); - } - - readv(self.device_file.as_raw_fd(), &mut iovec, actual_offset).map_err(|e| { - let msg = format!( - "localdisk: failed to read data from blob {}, {}", - self.blob_id, e - ); - LocalDiskError::ReadBlob(msg).into() - }) - } - - fn metrics(&self) -> &BackendMetrics { - &self.metrics - } -} - -/// Storage backend based on local disk. -pub struct LocalDisk { - // A reference to an open device - device_file: File, - // The disk device path specified by the user - device_path: String, - // Size of the block device. - device_capacity: u64, - // Blobs are discovered by scanning GPT or not. - is_gpt_mode: bool, - // Metrics collector. - metrics: Arc, - // Hashmap to map blob id to disk entry. - entries: RwLock>>, -} - -impl LocalDisk { - pub fn new(config: &LocalDiskConfig, id: Option<&str>) -> Result { - let id = id.ok_or_else(|| einval!("localdisk: argument `id` is empty"))?; - let path = &config.device_path; - let path_buf = Path::new(path).to_path_buf().canonicalize().map_err(|e| { - einval!(format!( - "localdisk: invalid disk device path {}, {}", - path, e - )) - })?; - let device_file = OpenOptions::new().read(true).open(path_buf).map_err(|e| { - einval!(format!( - "localdisk: can not open disk device at {}, {}", - path, e - )) - })?; - let md = device_file.metadata().map_err(|e| { - eio!(format!( - "localdisk: can not get file meta data about disk device {}, {}", - path, e - )) - })?; - let mut local_disk = LocalDisk { - device_file, - device_path: path.clone(), - device_capacity: md.len(), - is_gpt_mode: false, - metrics: BackendMetrics::new(id, "localdisk"), - entries: RwLock::new(HashMap::new()), - }; - - if !config.disable_gpt { - local_disk.scan_blobs_by_gpt()?; - } - - Ok(local_disk) - } - - pub fn add_blob(&self, blob_id: &str, offset: u64, length: u64) -> LocalDiskResult<()> { - if self.is_gpt_mode { - let msg = format!( - "localdisk: device {} is in legacy gpt mode", - self.device_path - ); - return Err(LocalDiskError::BlobFile(msg)); - } - if offset.checked_add(length).is_none() || offset + length > self.device_capacity { - let msg = format!( - "localdisk: add blob {} with invalid offset 0x{:x} and length 0x{:x}, device size 0x{:x}", - blob_id, offset, length, self.device_capacity - ); - return Err(LocalDiskError::BlobFile(msg)); - }; - - let device_file = self.device_file.try_clone().map_err(|e| { - LocalDiskError::BlobFile(format!("localdisk: can not duplicate file, {}", e)) - })?; - let blob = Arc::new(LocalDiskBlob { - blob_id: blob_id.to_string(), - device_file, - blob_offset: offset, - blob_length: length, - metrics: self.metrics.clone(), - }); - - let mut table_guard = self.entries.write().unwrap(); - if table_guard.contains_key(blob_id) { - let msg = format!("localdisk: blob {} already exists", blob_id); - return Err(LocalDiskError::BlobFile(msg)); - } - table_guard.insert(blob_id.to_string(), blob); - - Ok(()) - } - - fn get_blob(&self, blob_id: &str) -> LocalDiskResult> { - // Don't expect poisoned lock here. - if let Some(entry) = self.entries.read().unwrap().get(blob_id) { - Ok(entry.clone()) - } else { - self.get_blob_from_gpt(blob_id) - } - } -} - -#[cfg(feature = "backend-localdisk-gpt")] -impl LocalDisk { - // Disk names in GPT tables cannot store full 64-byte blob ids, so we should truncate them to 32 bytes. - fn truncate_blob_id(blob_id: &str) -> Option<&str> { - const LOCALDISK_BLOB_ID_LEN: usize = 32; - if blob_id.len() >= LOCALDISK_BLOB_ID_LEN { - let new_blob_id = &blob_id[0..LOCALDISK_BLOB_ID_LEN]; - Some(new_blob_id) - } else { - None - } - } - - fn get_blob_from_gpt(&self, blob_id: &str) -> LocalDiskResult> { - if self.is_gpt_mode { - if let Some(localdisk_blob_id) = LocalDisk::truncate_blob_id(blob_id) { - // Don't expect poisoned lock here. - if let Some(entry) = self.entries.read().unwrap().get(localdisk_blob_id) { - return Ok(entry.clone()); - } - } - } - - let msg = format!("localdisk: can not find such blob: {}", blob_id); - Err(LocalDiskError::ReadBlob(msg)) - } - - fn scan_blobs_by_gpt(&mut self) -> Result<()> { - // Open disk image. - let cfg = gpt::GptConfig::new().writable(false); - let disk = cfg.open(&self.device_path)?; - let partitions = disk.partitions(); - let sector_size = gpt::disk::DEFAULT_SECTOR_SIZE; - info!( - "Localdisk initializing storage backend for device {} with {} partitions, GUID: {}", - self.device_path, - partitions.len(), - disk.guid() - ); - - let mut table_guard = self.entries.write().unwrap(); - for (k, v) in partitions { - let length = v.bytes_len(sector_size)?; - let base_offset = v.bytes_start(sector_size)?; - if base_offset.checked_add(length).is_none() - || base_offset + length > self.device_capacity - { - let msg = format!( - "localdisk: partition {} with invalid offset and length", - v.part_guid - ); - return Err(einval!(msg)); - }; - let guid = v.part_guid; - let mut is_gpt_mode = false; - let name = if v.part_type_guid == gpt::partition_types::BASIC { - is_gpt_mode = true; - // Compatible with old versions of localdisk image - v.name.clone() - } else { - // The 64-byte blob_id is stored in two parts - v.name.clone() + guid.simple().to_string().as_str() - }; - - if name.is_empty() { - let msg = format!("localdisk: partition {} has empty blob id", v.part_guid); - return Err(einval!(msg)); - } - if table_guard.contains_key(&name) { - let msg = format!("localdisk: blob {} already exists", name); - return Err(einval!(msg)); - } - - let device_file = self.device_file.try_clone()?; - let partition = Arc::new(LocalDiskBlob { - blob_id: name.clone(), - device_file, - blob_offset: base_offset, - blob_length: length, - metrics: self.metrics.clone(), - }); - - debug!( - "Localdisk partition {} initialized, blob id: {}, offset {}, length {}", - k, partition.blob_id, partition.blob_offset, partition.blob_length - ); - table_guard.insert(name, partition); - if is_gpt_mode { - self.is_gpt_mode = true; - } - } - - Ok(()) - } -} - -#[cfg(not(feature = "backend-localdisk-gpt"))] -impl LocalDisk { - fn get_blob_from_gpt(&self, blob_id: &str) -> LocalDiskResult> { - Err(LocalDiskError::ReadBlob(format!( - "can not find such blob: {}, this image might be corrupted", - blob_id - ))) - } - - fn scan_blobs_by_gpt(&mut self) -> Result<()> { - Ok(()) - } -} - -impl BlobBackend for LocalDisk { - fn shutdown(&self) {} - - fn metrics(&self) -> &BackendMetrics { - &self.metrics - } - - fn get_reader(&self, blob_id: &str) -> BackendResult> { - self.get_blob(blob_id).map_err(|e| e.into()) - } -} - -impl Drop for LocalDisk { - fn drop(&mut self) { - self.metrics.release().unwrap_or_else(|e| error!("{:?}", e)); - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_invalid_localdisk_new() { - let config = LocalDiskConfig { - device_path: "".to_string(), - disable_gpt: true, - }; - assert!(LocalDisk::new(&config, Some("test")).is_err()); - - let config = LocalDiskConfig { - device_path: "/a/b/c".to_string(), - disable_gpt: true, - }; - assert!(LocalDisk::new(&config, None).is_err()); - } - - #[test] - fn test_add_disk_blob() { - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let root_dir = Path::new(root_dir).join("../tests/texture/blobs/"); - - let config = LocalDiskConfig { - device_path: root_dir.join("nonexist_blob_file").display().to_string(), - disable_gpt: true, - }; - assert!(LocalDisk::new(&config, Some("test")).is_err()); - - let blob_id = "be7d77eeb719f70884758d1aa800ed0fb09d701aaec469964e9d54325f0d5fef"; - let config = LocalDiskConfig { - device_path: root_dir.join(blob_id).display().to_string(), - disable_gpt: true, - }; - let disk = LocalDisk::new(&config, Some("test")).unwrap(); - - assert!(disk.add_blob(blob_id, u64::MAX, 1).is_err()); - assert!(disk.add_blob(blob_id, 14553, 2).is_err()); - assert!(disk.add_blob(blob_id, 14554, 1).is_err()); - assert!(disk.add_blob(blob_id, 0, 4096).is_ok()); - assert!(disk.add_blob(blob_id, 0, 4096).is_err()); - let blob = disk.get_blob(blob_id).unwrap(); - assert_eq!(blob.blob_size().unwrap(), 4096); - - let mut buf = vec![0u8; 4096]; - let sz = blob.read(&mut buf, 0).unwrap(); - assert_eq!(sz, 4096); - let sz = blob.read(&mut buf, 4095).unwrap(); - assert_eq!(sz, 1); - let sz = blob.read(&mut buf, 4096).unwrap(); - assert_eq!(sz, 0); - let sz = blob.read(&mut buf, 4097).unwrap(); - assert_eq!(sz, 0); - } - - #[cfg(feature = "backend-localdisk-gpt")] - #[test] - fn test_truncate_blob_id() { - let guid = "50ad3c8243e0a08ecdebde0ef8afcc6f2abca44498ad15491acbe58c83acb66f"; - let guid_truncated = "50ad3c8243e0a08ecdebde0ef8afcc6f"; - - let result = LocalDisk::truncate_blob_id(guid).unwrap(); - assert_eq!(result, guid_truncated) - } -} +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Storage backend driver to access blobs on local disks. + +use std::collections::HashMap; +use std::fmt; +use std::fs::{File, OpenOptions}; +use std::io::Result; +use std::os::unix::io::AsRawFd; +use std::path::Path; +use std::sync::{Arc, RwLock}; + +use fuse_backend_rs::file_buf::FileVolatileSlice; +use nix::sys::uio; +use nydus_api::LocalDiskConfig; +use nydus_utils::metrics::BackendMetrics; + +use crate::backend::{BackendError, BackendResult, BlobBackend, BlobReader}; +use crate::utils::{readv, MemSliceCursor}; + +type LocalDiskResult = std::result::Result; + +/// Error codes related to localdisk storage backend. +#[derive(Debug)] +pub enum LocalDiskError { + BlobFile(String), + ReadBlob(String), +} + +impl fmt::Display for LocalDiskError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + LocalDiskError::BlobFile(s) => write!(f, "{}", s), + LocalDiskError::ReadBlob(s) => write!(f, "{}", s), + } + } +} + +impl From for BackendError { + fn from(error: LocalDiskError) -> Self { + BackendError::LocalDisk(error) + } +} + +#[derive(Debug)] +struct LocalDiskBlob { + // The file descriptor of the disk + device_file: File, + // Start offset of the partition + blob_offset: u64, + // Length of the partition + blob_length: u64, + // The identifier for the corresponding blob. + blob_id: String, + // Metrics collector. + metrics: Arc, +} + +impl BlobReader for LocalDiskBlob { + fn blob_size(&self) -> BackendResult { + Ok(self.blob_length) + } + + fn try_read(&self, buf: &mut [u8], offset: u64) -> BackendResult { + let msg = format!( + "localdisk: invalid offset 0x{:x}, base 0x{:x}, length 0x{:x}", + offset, self.blob_offset, self.blob_length + ); + if offset >= self.blob_length { + return Ok(0); + } + let actual_offset = self + .blob_offset + .checked_add(offset) + .ok_or(LocalDiskError::ReadBlob(msg))?; + let len = std::cmp::min(self.blob_length - offset, buf.len() as u64) as usize; + + uio::pread( + self.device_file.as_raw_fd(), + &mut buf[..len], + actual_offset as i64, + ) + .map_err(|e| { + let msg = format!( + "localdisk: failed to read data from blob {}, {}", + self.blob_id, e + ); + LocalDiskError::ReadBlob(msg).into() + }) + } + + fn readv( + &self, + bufs: &[FileVolatileSlice], + offset: u64, + max_size: usize, + ) -> BackendResult { + let msg = format!( + "localdisk: invalid offset 0x{:x}, base 0x{:x}, length 0x{:x}", + offset, self.blob_offset, self.blob_length + ); + if offset >= self.blob_length { + return Ok(0); + } + let actual_offset = self + .blob_offset + .checked_add(offset) + .ok_or(LocalDiskError::ReadBlob(msg.clone()))?; + + let mut c = MemSliceCursor::new(bufs); + let mut iovec = c.consume(max_size); + let mut len = 0; + for buf in bufs { + len += buf.len(); + } + + // Guarantees that reads do not exceed the size of the blob + if offset.checked_add(len as u64).is_none() || offset + len as u64 > self.blob_length { + return Err(LocalDiskError::ReadBlob(msg).into()); + } + + readv(self.device_file.as_raw_fd(), &mut iovec, actual_offset).map_err(|e| { + let msg = format!( + "localdisk: failed to read data from blob {}, {}", + self.blob_id, e + ); + LocalDiskError::ReadBlob(msg).into() + }) + } + + fn metrics(&self) -> &BackendMetrics { + &self.metrics + } +} + +/// Storage backend based on local disk. +pub struct LocalDisk { + // A reference to an open device + device_file: File, + // The disk device path specified by the user + device_path: String, + // Size of the block device. + device_capacity: u64, + // Blobs are discovered by scanning GPT or not. + is_gpt_mode: bool, + // Metrics collector. + metrics: Arc, + // Hashmap to map blob id to disk entry. + entries: RwLock>>, +} + +impl LocalDisk { + pub fn new(config: &LocalDiskConfig, id: Option<&str>) -> Result { + let id = id.ok_or_else(|| einval!("localdisk: argument `id` is empty"))?; + let path = &config.device_path; + let path_buf = Path::new(path).to_path_buf().canonicalize().map_err(|e| { + einval!(format!( + "localdisk: invalid disk device path {}, {}", + path, e + )) + })?; + let device_file = OpenOptions::new().read(true).open(path_buf).map_err(|e| { + einval!(format!( + "localdisk: can not open disk device at {}, {}", + path, e + )) + })?; + let md = device_file.metadata().map_err(|e| { + eio!(format!( + "localdisk: can not get file meta data about disk device {}, {}", + path, e + )) + })?; + let mut local_disk = LocalDisk { + device_file, + device_path: path.clone(), + device_capacity: md.len(), + is_gpt_mode: false, + metrics: BackendMetrics::new(id, "localdisk"), + entries: RwLock::new(HashMap::new()), + }; + + if !config.disable_gpt { + local_disk.scan_blobs_by_gpt()?; + } + + Ok(local_disk) + } + + pub fn add_blob(&self, blob_id: &str, offset: u64, length: u64) -> LocalDiskResult<()> { + if self.is_gpt_mode { + let msg = format!( + "localdisk: device {} is in legacy gpt mode", + self.device_path + ); + return Err(LocalDiskError::BlobFile(msg)); + } + if offset.checked_add(length).is_none() || offset + length > self.device_capacity { + let msg = format!( + "localdisk: add blob {} with invalid offset 0x{:x} and length 0x{:x}, device size 0x{:x}", + blob_id, offset, length, self.device_capacity + ); + return Err(LocalDiskError::BlobFile(msg)); + }; + + let device_file = self.device_file.try_clone().map_err(|e| { + LocalDiskError::BlobFile(format!("localdisk: can not duplicate file, {}", e)) + })?; + let blob = Arc::new(LocalDiskBlob { + blob_id: blob_id.to_string(), + device_file, + blob_offset: offset, + blob_length: length, + metrics: self.metrics.clone(), + }); + + let mut table_guard = self.entries.write().unwrap(); + if table_guard.contains_key(blob_id) { + let msg = format!("localdisk: blob {} already exists", blob_id); + return Err(LocalDiskError::BlobFile(msg)); + } + table_guard.insert(blob_id.to_string(), blob); + + Ok(()) + } + + fn get_blob(&self, blob_id: &str) -> LocalDiskResult> { + // Don't expect poisoned lock here. + if let Some(entry) = self.entries.read().unwrap().get(blob_id) { + Ok(entry.clone()) + } else { + self.get_blob_from_gpt(blob_id) + } + } +} + +#[cfg(feature = "backend-localdisk-gpt")] +impl LocalDisk { + // Disk names in GPT tables cannot store full 64-byte blob ids, so we should truncate them to 32 bytes. + fn truncate_blob_id(blob_id: &str) -> Option<&str> { + const LOCALDISK_BLOB_ID_LEN: usize = 32; + if blob_id.len() >= LOCALDISK_BLOB_ID_LEN { + let new_blob_id = &blob_id[0..LOCALDISK_BLOB_ID_LEN]; + Some(new_blob_id) + } else { + None + } + } + + fn get_blob_from_gpt(&self, blob_id: &str) -> LocalDiskResult> { + if self.is_gpt_mode { + if let Some(localdisk_blob_id) = LocalDisk::truncate_blob_id(blob_id) { + // Don't expect poisoned lock here. + if let Some(entry) = self.entries.read().unwrap().get(localdisk_blob_id) { + return Ok(entry.clone()); + } + } + } + + let msg = format!("localdisk: can not find such blob: {}", blob_id); + Err(LocalDiskError::ReadBlob(msg)) + } + + fn scan_blobs_by_gpt(&mut self) -> Result<()> { + // Open disk image. + let cfg = gpt::GptConfig::new().writable(false); + let disk = cfg.open(&self.device_path)?; + let partitions = disk.partitions(); + let sector_size = gpt::disk::DEFAULT_SECTOR_SIZE; + info!( + "Localdisk initializing storage backend for device {} with {} partitions, GUID: {}", + self.device_path, + partitions.len(), + disk.guid() + ); + + let mut table_guard = self.entries.write().unwrap(); + for (k, v) in partitions { + let length = v.bytes_len(sector_size)?; + let base_offset = v.bytes_start(sector_size)?; + if base_offset.checked_add(length).is_none() + || base_offset + length > self.device_capacity + { + let msg = format!( + "localdisk: partition {} with invalid offset and length", + v.part_guid + ); + return Err(einval!(msg)); + }; + let guid = v.part_guid; + let mut is_gpt_mode = false; + let name = if v.part_type_guid == gpt::partition_types::BASIC { + is_gpt_mode = true; + // Compatible with old versions of localdisk image + v.name.clone() + } else { + // The 64-byte blob_id is stored in two parts + v.name.clone() + guid.simple().to_string().as_str() + }; + + if name.is_empty() { + let msg = format!("localdisk: partition {} has empty blob id", v.part_guid); + return Err(einval!(msg)); + } + if table_guard.contains_key(&name) { + let msg = format!("localdisk: blob {} already exists", name); + return Err(einval!(msg)); + } + + let device_file = self.device_file.try_clone()?; + let partition = Arc::new(LocalDiskBlob { + blob_id: name.clone(), + device_file, + blob_offset: base_offset, + blob_length: length, + metrics: self.metrics.clone(), + }); + + debug!( + "Localdisk partition {} initialized, blob id: {}, offset {}, length {}", + k, partition.blob_id, partition.blob_offset, partition.blob_length + ); + table_guard.insert(name, partition); + if is_gpt_mode { + self.is_gpt_mode = true; + } + } + + Ok(()) + } +} + +#[cfg(not(feature = "backend-localdisk-gpt"))] +impl LocalDisk { + fn get_blob_from_gpt(&self, blob_id: &str) -> LocalDiskResult> { + Err(LocalDiskError::ReadBlob(format!( + "can not find such blob: {}, this image might be corrupted", + blob_id + ))) + } + + fn scan_blobs_by_gpt(&mut self) -> Result<()> { + Ok(()) + } +} + +impl BlobBackend for LocalDisk { + fn shutdown(&self) {} + + fn metrics(&self) -> &BackendMetrics { + &self.metrics + } + + fn get_reader(&self, blob_id: &str) -> BackendResult> { + self.get_blob(blob_id).map_err(|e| e.into()) + } +} + +impl Drop for LocalDisk { + fn drop(&mut self) { + self.metrics.release().unwrap_or_else(|e| error!("{:?}", e)); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_invalid_localdisk_new() { + let config = LocalDiskConfig { + device_path: "".to_string(), + disable_gpt: true, + }; + assert!(LocalDisk::new(&config, Some("test")).is_err()); + + let config = LocalDiskConfig { + device_path: "/a/b/c".to_string(), + disable_gpt: true, + }; + assert!(LocalDisk::new(&config, None).is_err()); + } + + #[test] + fn test_add_disk_blob() { + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let root_dir = Path::new(root_dir).join("../tests/texture/blobs/"); + + let config = LocalDiskConfig { + device_path: root_dir.join("nonexist_blob_file").display().to_string(), + disable_gpt: true, + }; + assert!(LocalDisk::new(&config, Some("test")).is_err()); + + let blob_id = "be7d77eeb719f70884758d1aa800ed0fb09d701aaec469964e9d54325f0d5fef"; + let config = LocalDiskConfig { + device_path: root_dir.join(blob_id).display().to_string(), + disable_gpt: true, + }; + let disk = LocalDisk::new(&config, Some("test")).unwrap(); + + assert!(disk.add_blob(blob_id, u64::MAX, 1).is_err()); + assert!(disk.add_blob(blob_id, 14553, 2).is_err()); + assert!(disk.add_blob(blob_id, 14554, 1).is_err()); + assert!(disk.add_blob(blob_id, 0, 4096).is_ok()); + assert!(disk.add_blob(blob_id, 0, 4096).is_err()); + let blob = disk.get_blob(blob_id).unwrap(); + assert_eq!(blob.blob_size().unwrap(), 4096); + + let mut buf = vec![0u8; 4096]; + let sz = blob.read(&mut buf, 0).unwrap(); + assert_eq!(sz, 4096); + let sz = blob.read(&mut buf, 4095).unwrap(); + assert_eq!(sz, 1); + let sz = blob.read(&mut buf, 4096).unwrap(); + assert_eq!(sz, 0); + let sz = blob.read(&mut buf, 4097).unwrap(); + assert_eq!(sz, 0); + } + + #[cfg(feature = "backend-localdisk-gpt")] + #[test] + fn test_truncate_blob_id() { + let guid = "50ad3c8243e0a08ecdebde0ef8afcc6f2abca44498ad15491acbe58c83acb66f"; + let guid_truncated = "50ad3c8243e0a08ecdebde0ef8afcc6f"; + + let result = LocalDisk::truncate_blob_id(guid).unwrap(); + assert_eq!(result, guid_truncated) + } +} diff --git a/storage/src/backend/localfs.rs b/storage/src/backend/localfs.rs index 6168a1903da..7c44d3470fe 100644 --- a/storage/src/backend/localfs.rs +++ b/storage/src/backend/localfs.rs @@ -1,339 +1,339 @@ -// Copyright (C) 2020-2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Storage backend driver to access blobs on local filesystems. - -use std::collections::HashMap; -use std::fmt; -use std::fs::{File, OpenOptions}; -use std::io::Result; -use std::os::unix::io::AsRawFd; -use std::path::{Path, PathBuf}; -use std::sync::{Arc, RwLock}; - -use fuse_backend_rs::file_buf::FileVolatileSlice; -use nix::sys::uio; - -use nydus_api::LocalFsConfig; -use nydus_utils::metrics::BackendMetrics; - -use crate::backend::{BackendError, BackendResult, BlobBackend, BlobReader}; -use crate::utils::{readv, MemSliceCursor}; - -type LocalFsResult = std::result::Result; - -/// Error codes related to localfs storage backend. -#[derive(Debug)] -pub enum LocalFsError { - BlobFile(String), - ReadBlob(String), -} - -impl fmt::Display for LocalFsError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - LocalFsError::BlobFile(s) => write!(f, "{}", s), - LocalFsError::ReadBlob(s) => write!(f, "{}", s), - } - } -} - -impl From for BackendError { - fn from(error: LocalFsError) -> Self { - BackendError::LocalFs(error) - } -} - -struct LocalFsEntry { - id: String, - file: File, - metrics: Arc, -} - -impl BlobReader for LocalFsEntry { - fn blob_size(&self) -> BackendResult { - self.file.metadata().map(|v| v.len()).map_err(|e| { - let msg = format!("failed to get size of localfs blob {}, {}", self.id, e); - LocalFsError::BlobFile(msg).into() - }) - } - - fn try_read(&self, buf: &mut [u8], offset: u64) -> BackendResult { - uio::pread(self.file.as_raw_fd(), buf, offset as i64).map_err(|e| { - let msg = format!("failed to read data from blob {}, {}", self.id, e); - LocalFsError::ReadBlob(msg).into() - }) - } - - fn readv( - &self, - bufs: &[FileVolatileSlice], - offset: u64, - max_size: usize, - ) -> BackendResult { - let mut c = MemSliceCursor::new(bufs); - let mut iovec = c.consume(max_size); - - readv(self.file.as_raw_fd(), &mut iovec, offset).map_err(|e| { - let msg = format!("failed to read data from blob {}, {}", self.id, e); - LocalFsError::ReadBlob(msg).into() - }) - } - - fn metrics(&self) -> &BackendMetrics { - &self.metrics - } -} - -/// Storage backend based on local filesystem. -#[derive(Default)] -pub struct LocalFs { - // The blob file specified by the user. - blob_file: String, - // Directory to store blob files. If `blob_file` is not specified, `dir`/`blob_id` will be used - // as the blob file name. - dir: String, - // Alternative directories to store blob files - alt_dirs: Vec, - // Metrics collector. - metrics: Arc, - // Hashmap to map blob id to blob file. - entries: RwLock>>, -} - -impl LocalFs { - pub fn new(config: &LocalFsConfig, id: Option<&str>) -> Result { - let id = id.ok_or_else(|| einval!("LocalFs requires blob_id"))?; - - if config.blob_file.is_empty() && config.dir.is_empty() { - return Err(einval!("blob file or dir is required")); - } - - Ok(LocalFs { - blob_file: config.blob_file.clone(), - dir: config.dir.clone(), - alt_dirs: config.alt_dirs.clone(), - metrics: BackendMetrics::new(id, "localfs"), - entries: RwLock::new(HashMap::new()), - }) - } - - // Use the user specified blob file name if available, otherwise generate the file name by - // concatenating `dir` and `blob_id`. - fn get_blob_path(&self, blob_id: &str) -> LocalFsResult { - let path = if !self.blob_file.is_empty() { - Path::new(&self.blob_file).to_path_buf() - } else { - // Search blob file in dir and additionally in alt_dirs - let is_valid = |dir: &PathBuf| -> bool { - let blob = Path::new(&dir).join(blob_id); - if let Ok(meta) = std::fs::metadata(blob) { - meta.len() != 0 - } else { - false - } - }; - - let blob = Path::new(&self.dir).join(blob_id); - if is_valid(&blob) || self.alt_dirs.is_empty() { - blob - } else { - let mut file = PathBuf::new(); - for dir in &self.alt_dirs { - file = Path::new(dir).join(blob_id); - if is_valid(&file) { - break; - } - } - file - } - }; - - path.canonicalize().map_err(|e| { - LocalFsError::BlobFile(format!("invalid file path {}, {}", path.display(), e)) - }) - } - - #[allow(clippy::mutex_atomic)] - fn get_blob(&self, blob_id: &str) -> LocalFsResult> { - // Don't expect poisoned lock here. - if let Some(entry) = self.entries.read().unwrap().get(blob_id) { - return Ok(entry.clone()); - } - - let blob_file_path = self.get_blob_path(blob_id)?; - let file = OpenOptions::new() - .read(true) - .open(&blob_file_path) - .map_err(|e| { - let msg = format!( - "failed to open blob file {}, {}", - blob_file_path.display(), - e - ); - LocalFsError::BlobFile(msg) - })?; - // Don't expect poisoned lock here. - let mut table_guard = self.entries.write().unwrap(); - if let Some(entry) = table_guard.get(blob_id) { - Ok(entry.clone()) - } else { - let entry = Arc::new(LocalFsEntry { - id: blob_id.to_owned(), - file, - metrics: self.metrics.clone(), - }); - table_guard.insert(blob_id.to_string(), entry.clone()); - Ok(entry) - } - } -} - -impl BlobBackend for LocalFs { - fn shutdown(&self) {} - - fn metrics(&self) -> &BackendMetrics { - &self.metrics - } - - fn get_reader(&self, blob_id: &str) -> BackendResult> { - self.get_blob(blob_id).map_err(|e| e.into()) - } -} - -impl Drop for LocalFs { - fn drop(&mut self) { - self.metrics.release().unwrap_or_else(|e| error!("{:?}", e)); - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::io::Write; - use std::os::unix::io::{FromRawFd, IntoRawFd}; - use vmm_sys_util::tempfile::TempFile; - - #[test] - fn test_invalid_localfs_new() { - let config = LocalFsConfig { - blob_file: "".to_string(), - dir: "".to_string(), - alt_dirs: Vec::new(), - }; - assert!(LocalFs::new(&config, Some("test")).is_err()); - - let config = LocalFsConfig { - blob_file: "/a/b/c".to_string(), - dir: "/a/b".to_string(), - alt_dirs: Vec::new(), - }; - assert!(LocalFs::new(&config, None).is_err()); - } - - #[test] - fn test_localfs_get_blob_path() { - let config = LocalFsConfig { - blob_file: "/a/b/cxxxxxxxxxxxxxxxxxxxxxxx".to_string(), - dir: "/a/b".to_string(), - alt_dirs: Vec::new(), - }; - let fs = LocalFs::new(&config, Some("test")).unwrap(); - assert!(fs.get_blob_path("test").is_err()); - - let tempfile = TempFile::new().unwrap(); - let path = tempfile.as_path(); - let filename = path.file_name().unwrap().to_str().unwrap(); - - let config = LocalFsConfig { - blob_file: path.to_str().unwrap().to_owned(), - dir: path.parent().unwrap().to_str().unwrap().to_owned(), - alt_dirs: Vec::new(), - }; - let fs = LocalFs::new(&config, Some("test")).unwrap(); - assert_eq!(fs.get_blob_path("test").unwrap().to_str(), path.to_str()); - - let config = LocalFsConfig { - blob_file: "".to_string(), - dir: path.parent().unwrap().to_str().unwrap().to_owned(), - alt_dirs: Vec::new(), - }; - let fs = LocalFs::new(&config, Some(filename)).unwrap(); - assert_eq!(fs.get_blob_path(filename).unwrap().to_str(), path.to_str()); - - let config = LocalFsConfig { - blob_file: "".to_string(), - dir: "/a/b".to_string(), - alt_dirs: vec![ - "/test".to_string(), - path.parent().unwrap().to_str().unwrap().to_owned(), - ], - }; - let fs = LocalFs::new(&config, Some(filename)).unwrap(); - assert_eq!(fs.get_blob_path(filename).unwrap().to_str(), path.to_str()); - } - - #[test] - fn test_localfs_get_blob() { - let tempfile = TempFile::new().unwrap(); - let path = tempfile.as_path(); - let filename = path.file_name().unwrap().to_str().unwrap(); - let config = LocalFsConfig { - blob_file: "".to_string(), - dir: path.parent().unwrap().to_str().unwrap().to_owned(), - alt_dirs: Vec::new(), - }; - let fs = LocalFs::new(&config, Some(filename)).unwrap(); - let blob1 = fs.get_blob(filename).unwrap(); - let blob2 = fs.get_blob(filename).unwrap(); - assert_eq!(Arc::strong_count(&blob1), 3); - assert_eq!(Arc::strong_count(&blob2), 3); - } - - #[test] - fn test_localfs_get_reader() { - let tempfile = TempFile::new().unwrap(); - let path = tempfile.as_path(); - let filename = path.file_name().unwrap().to_str().unwrap(); - - { - let mut file = unsafe { File::from_raw_fd(tempfile.as_file().as_raw_fd()) }; - file.write_all(&[0x1u8, 0x2, 0x3, 0x4]).unwrap(); - let _ = file.into_raw_fd(); - } - - let config = LocalFsConfig { - blob_file: "".to_string(), - dir: path.parent().unwrap().to_str().unwrap().to_owned(), - alt_dirs: Vec::new(), - }; - let fs = LocalFs::new(&config, Some(filename)).unwrap(); - let blob1 = fs.get_reader(filename).unwrap(); - let blob2 = fs.get_reader(filename).unwrap(); - assert_eq!(Arc::strong_count(&blob1), 3); - - let mut buf1 = [0x0u8]; - blob1.read(&mut buf1, 0x0).unwrap(); - assert_eq!(buf1[0], 0x1); - - let mut buf2 = [0x0u8]; - let mut buf3 = [0x0u8]; - let bufs = [ - unsafe { FileVolatileSlice::from_raw_ptr(buf2.as_mut_ptr(), buf2.len()) }, - unsafe { FileVolatileSlice::from_raw_ptr(buf3.as_mut_ptr(), buf3.len()) }, - ]; - - assert_eq!(blob2.readv(&bufs, 0x1, 2).unwrap(), 2); - assert_eq!(buf2[0], 0x2); - assert_eq!(buf3[0], 0x3); - - assert_eq!(blob2.readv(&bufs, 0x3, 3).unwrap(), 1); - assert_eq!(buf2[0], 0x4); - assert_eq!(buf3[0], 0x3); - - assert_eq!(blob2.blob_size().unwrap(), 4); - let blob4 = fs.get_blob(filename).unwrap(); - assert_eq!(blob4.blob_size().unwrap(), 4); - } -} +// Copyright (C) 2020-2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Storage backend driver to access blobs on local filesystems. + +use std::collections::HashMap; +use std::fmt; +use std::fs::{File, OpenOptions}; +use std::io::Result; +use std::os::unix::io::AsRawFd; +use std::path::{Path, PathBuf}; +use std::sync::{Arc, RwLock}; + +use fuse_backend_rs::file_buf::FileVolatileSlice; +use nix::sys::uio; + +use nydus_api::LocalFsConfig; +use nydus_utils::metrics::BackendMetrics; + +use crate::backend::{BackendError, BackendResult, BlobBackend, BlobReader}; +use crate::utils::{readv, MemSliceCursor}; + +type LocalFsResult = std::result::Result; + +/// Error codes related to localfs storage backend. +#[derive(Debug)] +pub enum LocalFsError { + BlobFile(String), + ReadBlob(String), +} + +impl fmt::Display for LocalFsError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + LocalFsError::BlobFile(s) => write!(f, "{}", s), + LocalFsError::ReadBlob(s) => write!(f, "{}", s), + } + } +} + +impl From for BackendError { + fn from(error: LocalFsError) -> Self { + BackendError::LocalFs(error) + } +} + +struct LocalFsEntry { + id: String, + file: File, + metrics: Arc, +} + +impl BlobReader for LocalFsEntry { + fn blob_size(&self) -> BackendResult { + self.file.metadata().map(|v| v.len()).map_err(|e| { + let msg = format!("failed to get size of localfs blob {}, {}", self.id, e); + LocalFsError::BlobFile(msg).into() + }) + } + + fn try_read(&self, buf: &mut [u8], offset: u64) -> BackendResult { + uio::pread(self.file.as_raw_fd(), buf, offset as i64).map_err(|e| { + let msg = format!("failed to read data from blob {}, {}", self.id, e); + LocalFsError::ReadBlob(msg).into() + }) + } + + fn readv( + &self, + bufs: &[FileVolatileSlice], + offset: u64, + max_size: usize, + ) -> BackendResult { + let mut c = MemSliceCursor::new(bufs); + let mut iovec = c.consume(max_size); + + readv(self.file.as_raw_fd(), &mut iovec, offset).map_err(|e| { + let msg = format!("failed to read data from blob {}, {}", self.id, e); + LocalFsError::ReadBlob(msg).into() + }) + } + + fn metrics(&self) -> &BackendMetrics { + &self.metrics + } +} + +/// Storage backend based on local filesystem. +#[derive(Default)] +pub struct LocalFs { + // The blob file specified by the user. + blob_file: String, + // Directory to store blob files. If `blob_file` is not specified, `dir`/`blob_id` will be used + // as the blob file name. + dir: String, + // Alternative directories to store blob files + alt_dirs: Vec, + // Metrics collector. + metrics: Arc, + // Hashmap to map blob id to blob file. + entries: RwLock>>, +} + +impl LocalFs { + pub fn new(config: &LocalFsConfig, id: Option<&str>) -> Result { + let id = id.ok_or_else(|| einval!("LocalFs requires blob_id"))?; + + if config.blob_file.is_empty() && config.dir.is_empty() { + return Err(einval!("blob file or dir is required")); + } + + Ok(LocalFs { + blob_file: config.blob_file.clone(), + dir: config.dir.clone(), + alt_dirs: config.alt_dirs.clone(), + metrics: BackendMetrics::new(id, "localfs"), + entries: RwLock::new(HashMap::new()), + }) + } + + // Use the user specified blob file name if available, otherwise generate the file name by + // concatenating `dir` and `blob_id`. + fn get_blob_path(&self, blob_id: &str) -> LocalFsResult { + let path = if !self.blob_file.is_empty() { + Path::new(&self.blob_file).to_path_buf() + } else { + // Search blob file in dir and additionally in alt_dirs + let is_valid = |dir: &PathBuf| -> bool { + let blob = Path::new(&dir).join(blob_id); + if let Ok(meta) = std::fs::metadata(blob) { + meta.len() != 0 + } else { + false + } + }; + + let blob = Path::new(&self.dir).join(blob_id); + if is_valid(&blob) || self.alt_dirs.is_empty() { + blob + } else { + let mut file = PathBuf::new(); + for dir in &self.alt_dirs { + file = Path::new(dir).join(blob_id); + if is_valid(&file) { + break; + } + } + file + } + }; + + path.canonicalize().map_err(|e| { + LocalFsError::BlobFile(format!("invalid file path {}, {}", path.display(), e)) + }) + } + + #[allow(clippy::mutex_atomic)] + fn get_blob(&self, blob_id: &str) -> LocalFsResult> { + // Don't expect poisoned lock here. + if let Some(entry) = self.entries.read().unwrap().get(blob_id) { + return Ok(entry.clone()); + } + + let blob_file_path = self.get_blob_path(blob_id)?; + let file = OpenOptions::new() + .read(true) + .open(&blob_file_path) + .map_err(|e| { + let msg = format!( + "failed to open blob file {}, {}", + blob_file_path.display(), + e + ); + LocalFsError::BlobFile(msg) + })?; + // Don't expect poisoned lock here. + let mut table_guard = self.entries.write().unwrap(); + if let Some(entry) = table_guard.get(blob_id) { + Ok(entry.clone()) + } else { + let entry = Arc::new(LocalFsEntry { + id: blob_id.to_owned(), + file, + metrics: self.metrics.clone(), + }); + table_guard.insert(blob_id.to_string(), entry.clone()); + Ok(entry) + } + } +} + +impl BlobBackend for LocalFs { + fn shutdown(&self) {} + + fn metrics(&self) -> &BackendMetrics { + &self.metrics + } + + fn get_reader(&self, blob_id: &str) -> BackendResult> { + self.get_blob(blob_id).map_err(|e| e.into()) + } +} + +impl Drop for LocalFs { + fn drop(&mut self) { + self.metrics.release().unwrap_or_else(|e| error!("{:?}", e)); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + use std::os::unix::io::{FromRawFd, IntoRawFd}; + use vmm_sys_util::tempfile::TempFile; + + #[test] + fn test_invalid_localfs_new() { + let config = LocalFsConfig { + blob_file: "".to_string(), + dir: "".to_string(), + alt_dirs: Vec::new(), + }; + assert!(LocalFs::new(&config, Some("test")).is_err()); + + let config = LocalFsConfig { + blob_file: "/a/b/c".to_string(), + dir: "/a/b".to_string(), + alt_dirs: Vec::new(), + }; + assert!(LocalFs::new(&config, None).is_err()); + } + + #[test] + fn test_localfs_get_blob_path() { + let config = LocalFsConfig { + blob_file: "/a/b/cxxxxxxxxxxxxxxxxxxxxxxx".to_string(), + dir: "/a/b".to_string(), + alt_dirs: Vec::new(), + }; + let fs = LocalFs::new(&config, Some("test")).unwrap(); + assert!(fs.get_blob_path("test").is_err()); + + let tempfile = TempFile::new().unwrap(); + let path = tempfile.as_path(); + let filename = path.file_name().unwrap().to_str().unwrap(); + + let config = LocalFsConfig { + blob_file: path.to_str().unwrap().to_owned(), + dir: path.parent().unwrap().to_str().unwrap().to_owned(), + alt_dirs: Vec::new(), + }; + let fs = LocalFs::new(&config, Some("test")).unwrap(); + assert_eq!(fs.get_blob_path("test").unwrap().to_str(), path.to_str()); + + let config = LocalFsConfig { + blob_file: "".to_string(), + dir: path.parent().unwrap().to_str().unwrap().to_owned(), + alt_dirs: Vec::new(), + }; + let fs = LocalFs::new(&config, Some(filename)).unwrap(); + assert_eq!(fs.get_blob_path(filename).unwrap().to_str(), path.to_str()); + + let config = LocalFsConfig { + blob_file: "".to_string(), + dir: "/a/b".to_string(), + alt_dirs: vec![ + "/test".to_string(), + path.parent().unwrap().to_str().unwrap().to_owned(), + ], + }; + let fs = LocalFs::new(&config, Some(filename)).unwrap(); + assert_eq!(fs.get_blob_path(filename).unwrap().to_str(), path.to_str()); + } + + #[test] + fn test_localfs_get_blob() { + let tempfile = TempFile::new().unwrap(); + let path = tempfile.as_path(); + let filename = path.file_name().unwrap().to_str().unwrap(); + let config = LocalFsConfig { + blob_file: "".to_string(), + dir: path.parent().unwrap().to_str().unwrap().to_owned(), + alt_dirs: Vec::new(), + }; + let fs = LocalFs::new(&config, Some(filename)).unwrap(); + let blob1 = fs.get_blob(filename).unwrap(); + let blob2 = fs.get_blob(filename).unwrap(); + assert_eq!(Arc::strong_count(&blob1), 3); + assert_eq!(Arc::strong_count(&blob2), 3); + } + + #[test] + fn test_localfs_get_reader() { + let tempfile = TempFile::new().unwrap(); + let path = tempfile.as_path(); + let filename = path.file_name().unwrap().to_str().unwrap(); + + { + let mut file = unsafe { File::from_raw_fd(tempfile.as_file().as_raw_fd()) }; + file.write_all(&[0x1u8, 0x2, 0x3, 0x4]).unwrap(); + let _ = file.into_raw_fd(); + } + + let config = LocalFsConfig { + blob_file: "".to_string(), + dir: path.parent().unwrap().to_str().unwrap().to_owned(), + alt_dirs: Vec::new(), + }; + let fs = LocalFs::new(&config, Some(filename)).unwrap(); + let blob1 = fs.get_reader(filename).unwrap(); + let blob2 = fs.get_reader(filename).unwrap(); + assert_eq!(Arc::strong_count(&blob1), 3); + + let mut buf1 = [0x0u8]; + blob1.read(&mut buf1, 0x0).unwrap(); + assert_eq!(buf1[0], 0x1); + + let mut buf2 = [0x0u8]; + let mut buf3 = [0x0u8]; + let bufs = [ + unsafe { FileVolatileSlice::from_raw_ptr(buf2.as_mut_ptr(), buf2.len()) }, + unsafe { FileVolatileSlice::from_raw_ptr(buf3.as_mut_ptr(), buf3.len()) }, + ]; + + assert_eq!(blob2.readv(&bufs, 0x1, 2).unwrap(), 2); + assert_eq!(buf2[0], 0x2); + assert_eq!(buf3[0], 0x3); + + assert_eq!(blob2.readv(&bufs, 0x3, 3).unwrap(), 1); + assert_eq!(buf2[0], 0x4); + assert_eq!(buf3[0], 0x3); + + assert_eq!(blob2.blob_size().unwrap(), 4); + let blob4 = fs.get_blob(filename).unwrap(); + assert_eq!(blob4.blob_size().unwrap(), 4); + } +} diff --git a/storage/src/backend/mod.rs b/storage/src/backend/mod.rs index 155fcecf8d5..271f49b1768 100644 --- a/storage/src/backend/mod.rs +++ b/storage/src/backend/mod.rs @@ -1,276 +1,276 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Storage backends to read blob data from Registry, OSS, disk, file system etc. -//! -//! There are several types of storage backend drivers implemented: -//! - [Registry](registry/struct.Registry.html): backend driver to access blobs on container image -//! registry. -//! - [Oss](oss/struct.Oss.html): backend driver to access blobs on Oss(Object Storage System). -//! - [LocalFs](localfs/struct.LocalFs.html): backend driver to access blobs on local file system. -//! The [LocalFs](localfs/struct.LocalFs.html) storage backend supports backend level data -//! prefetching, which is to load data into page cache. -//! - [LocalDisk](localdisk/struct.LocalDisk.html): backend driver to access blobs on local disk. - -use std::fmt; -use std::io::Read; -use std::{sync::Arc, time::Duration}; - -use fuse_backend_rs::file_buf::FileVolatileSlice; -use nydus_utils::{ - metrics::{BackendMetrics, ERROR_HOLDER}, - DelayType, Delayer, -}; - -use crate::utils::{alloc_buf, copyv}; -use crate::StorageError; - -#[cfg(any( - feature = "backend-oss", - feature = "backend-registry", - feature = "backend-s3", - feature = "backend-http-proxy", -))] -pub mod connection; -#[cfg(feature = "backend-http-proxy")] -pub mod http_proxy; -#[cfg(feature = "backend-localdisk")] -pub mod localdisk; -#[cfg(feature = "backend-localfs")] -pub mod localfs; -#[cfg(any(feature = "backend-oss", feature = "backend-s3"))] -pub mod object_storage; -#[cfg(feature = "backend-oss")] -pub mod oss; -#[cfg(feature = "backend-registry")] -pub mod registry; -#[cfg(feature = "backend-s3")] -pub mod s3; - -/// Error codes related to storage backend operations. -#[derive(Debug)] -pub enum BackendError { - /// Unsupported operation. - Unsupported(String), - /// Failed to copy data from/into blob. - CopyData(StorageError), - #[cfg(feature = "backend-localdisk")] - /// Error from LocalDisk storage backend. - LocalDisk(self::localdisk::LocalDiskError), - #[cfg(feature = "backend-registry")] - /// Error from Registry storage backend. - Registry(self::registry::RegistryError), - #[cfg(feature = "backend-localfs")] - /// Error from LocalFs storage backend. - LocalFs(self::localfs::LocalFsError), - #[cfg(any(feature = "backend-oss", feature = "backend-s3"))] - /// Error from object storage backend. - ObjectStorage(self::object_storage::ObjectStorageError), - #[cfg(feature = "backend-http-proxy")] - /// Error from local http proxy backend. - HttpProxy(self::http_proxy::HttpProxyError), -} - -impl fmt::Display for BackendError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - BackendError::Unsupported(s) => write!(f, "{}", s), - BackendError::CopyData(e) => write!(f, "failed to copy data, {}", e), - #[cfg(feature = "backend-registry")] - BackendError::Registry(e) => write!(f, "{:?}", e), - #[cfg(feature = "backend-localfs")] - BackendError::LocalFs(e) => write!(f, "{}", e), - #[cfg(any(feature = "backend-oss", feature = "backend-s3"))] - BackendError::ObjectStorage(e) => write!(f, "{}", e), - #[cfg(feature = "backend-localdisk")] - BackendError::LocalDisk(e) => write!(f, "{:?}", e), - #[cfg(feature = "backend-http-proxy")] - BackendError::HttpProxy(e) => write!(f, "{}", e), - } - } -} - -/// Specialized `Result` for storage backends. -pub type BackendResult = std::result::Result; - -/// Trait to read data from a on storage backend. -pub trait BlobReader: Send + Sync { - /// Get size of the blob file. - fn blob_size(&self) -> BackendResult; - - /// Try to read a range of data from the blob file into the provided buffer. - /// - /// Try to read data of range [offset, offset + buf.len()) from the blob file, and returns: - /// - bytes of data read, which may be smaller than buf.len() - /// - error code if error happens - fn try_read(&self, buf: &mut [u8], offset: u64) -> BackendResult; - - /// Read a range of data from the blob file into the provided buffer. - /// - /// Read data of range [offset, offset + buf.len()) from the blob file, and returns: - /// - bytes of data read, which may be smaller than buf.len() - /// - error code if error happens - /// - /// It will try `BlobBackend::retry_limit()` times at most and return the first successfully - /// read data. - fn read(&self, buf: &mut [u8], offset: u64) -> BackendResult { - let mut retry_count = self.retry_limit(); - let begin_time = self.metrics().begin(); - - let mut delayer = Delayer::new(DelayType::BackOff, Duration::from_millis(500)); - - loop { - match self.try_read(buf, offset) { - Ok(size) => { - self.metrics().end(&begin_time, buf.len(), false); - return Ok(size); - } - Err(err) => { - if retry_count > 0 { - warn!( - "Read from backend failed: {:?}, retry count {}", - err, retry_count - ); - retry_count -= 1; - delayer.delay(); - } else { - self.metrics().end(&begin_time, buf.len(), true); - ERROR_HOLDER - .lock() - .unwrap() - .push(&format!("{:?}", err)) - .unwrap_or_else(|_| error!("Failed when try to hold error")); - return Err(err); - } - } - } - } - } - - /// Read as much as possible data into buffer. - fn read_all(&self, buf: &mut [u8], offset: u64) -> BackendResult { - let mut off = 0usize; - let mut left = buf.len(); - - while left > 0 { - let cnt = self.read(&mut buf[off..], offset + off as u64)?; - if cnt == 0 { - break; - } - off += cnt; - left -= cnt; - } - - Ok(off as usize) - } - - /// Read a range of data from the blob file into the provided buffers. - /// - /// Read data of range [offset, offset + max_size) from the blob file, and returns: - /// - bytes of data read, which may be smaller than max_size - /// - error code if error happens - /// - /// It will try `BlobBackend::retry_limit()` times at most and return the first successfully - /// read data. - fn readv( - &self, - bufs: &[FileVolatileSlice], - offset: u64, - max_size: usize, - ) -> BackendResult { - if bufs.len() == 1 && max_size >= bufs[0].len() { - let buf = unsafe { std::slice::from_raw_parts_mut(bufs[0].as_ptr(), bufs[0].len()) }; - self.read(buf, offset) - } else { - // Use std::alloc to avoid zeroing the allocated buffer. - let size = bufs.iter().fold(0usize, move |size, s| size + s.len()); - let size = std::cmp::min(size, max_size); - let mut data = alloc_buf(size); - - let result = self.read(&mut data, offset)?; - copyv(&[&data], bufs, 0, result, 0, 0) - .map(|r| r.0) - .map_err(BackendError::CopyData) - } - } - - /// Get metrics object. - fn metrics(&self) -> &BackendMetrics; - - /// Get maximum number of times to retry when encountering IO errors. - fn retry_limit(&self) -> u8 { - 0 - } -} - -/// Trait to access blob files on backend storages, such as OSS, registry, local fs etc. -pub trait BlobBackend: Send + Sync { - /// Destroy the `BlobBackend` storage object. - fn shutdown(&self); - - /// Get metrics object. - fn metrics(&self) -> &BackendMetrics; - - /// Get a blob reader object to access blob `blob_id`. - fn get_reader(&self, blob_id: &str) -> BackendResult>; -} - -/// A buffered reader for `BlobReader` object. -pub struct BlobBufReader { - buf: Vec, - pos: usize, - len: usize, - start: u64, - size: u64, - reader: Arc, -} - -impl BlobBufReader { - /// Create a new instance of `BlobBufReader`. - pub fn new(buf_size: usize, reader: Arc, start: u64, size: u64) -> Self { - Self { - buf: alloc_buf(buf_size), - pos: 0, - len: 0, - start, - size, - reader, - } - } -} - -impl Read for BlobBufReader { - fn read(&mut self, buf: &mut [u8]) -> std::io::Result { - let mut sz = self.len; - if sz == 0 && self.size == 0 { - // No more data. - return Ok(0); - } - - // Refill the buffer. - if sz == 0 && self.size > 0 { - let cnt = std::cmp::min(self.buf.len() as u64, self.size) as usize; - let ret = self - .reader - .read(&mut self.buf[..cnt], self.start) - .map_err(|e| eio!(format!("failed to read data from backend, {:?}", e)))?; - self.start += ret as u64; - self.size -= ret as u64; - self.pos = 0; - self.len = ret; - sz = ret; - } - if self.size != 0 && sz == 0 { - return Err(eio!("unexpected EOF when reading data from backend")); - } - - let sz = std::cmp::min(sz, buf.len()); - buf[..sz].copy_from_slice(&self.buf[self.pos..self.pos + sz]); - self.pos += sz; - self.len -= sz; - - Ok(sz) - } -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Storage backends to read blob data from Registry, OSS, disk, file system etc. +//! +//! There are several types of storage backend drivers implemented: +//! - [Registry](registry/struct.Registry.html): backend driver to access blobs on container image +//! registry. +//! - [Oss](oss/struct.Oss.html): backend driver to access blobs on Oss(Object Storage System). +//! - [LocalFs](localfs/struct.LocalFs.html): backend driver to access blobs on local file system. +//! The [LocalFs](localfs/struct.LocalFs.html) storage backend supports backend level data +//! prefetching, which is to load data into page cache. +//! - [LocalDisk](localdisk/struct.LocalDisk.html): backend driver to access blobs on local disk. + +use std::fmt; +use std::io::Read; +use std::{sync::Arc, time::Duration}; + +use fuse_backend_rs::file_buf::FileVolatileSlice; +use nydus_utils::{ + metrics::{BackendMetrics, ERROR_HOLDER}, + DelayType, Delayer, +}; + +use crate::utils::{alloc_buf, copyv}; +use crate::StorageError; + +#[cfg(any( + feature = "backend-oss", + feature = "backend-registry", + feature = "backend-s3", + feature = "backend-http-proxy", +))] +pub mod connection; +#[cfg(feature = "backend-http-proxy")] +pub mod http_proxy; +#[cfg(feature = "backend-localdisk")] +pub mod localdisk; +#[cfg(feature = "backend-localfs")] +pub mod localfs; +#[cfg(any(feature = "backend-oss", feature = "backend-s3"))] +pub mod object_storage; +#[cfg(feature = "backend-oss")] +pub mod oss; +#[cfg(feature = "backend-registry")] +pub mod registry; +#[cfg(feature = "backend-s3")] +pub mod s3; + +/// Error codes related to storage backend operations. +#[derive(Debug)] +pub enum BackendError { + /// Unsupported operation. + Unsupported(String), + /// Failed to copy data from/into blob. + CopyData(StorageError), + #[cfg(feature = "backend-localdisk")] + /// Error from LocalDisk storage backend. + LocalDisk(self::localdisk::LocalDiskError), + #[cfg(feature = "backend-registry")] + /// Error from Registry storage backend. + Registry(self::registry::RegistryError), + #[cfg(feature = "backend-localfs")] + /// Error from LocalFs storage backend. + LocalFs(self::localfs::LocalFsError), + #[cfg(any(feature = "backend-oss", feature = "backend-s3"))] + /// Error from object storage backend. + ObjectStorage(self::object_storage::ObjectStorageError), + #[cfg(feature = "backend-http-proxy")] + /// Error from local http proxy backend. + HttpProxy(self::http_proxy::HttpProxyError), +} + +impl fmt::Display for BackendError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + BackendError::Unsupported(s) => write!(f, "{}", s), + BackendError::CopyData(e) => write!(f, "failed to copy data, {}", e), + #[cfg(feature = "backend-registry")] + BackendError::Registry(e) => write!(f, "{:?}", e), + #[cfg(feature = "backend-localfs")] + BackendError::LocalFs(e) => write!(f, "{}", e), + #[cfg(any(feature = "backend-oss", feature = "backend-s3"))] + BackendError::ObjectStorage(e) => write!(f, "{}", e), + #[cfg(feature = "backend-localdisk")] + BackendError::LocalDisk(e) => write!(f, "{:?}", e), + #[cfg(feature = "backend-http-proxy")] + BackendError::HttpProxy(e) => write!(f, "{}", e), + } + } +} + +/// Specialized `Result` for storage backends. +pub type BackendResult = std::result::Result; + +/// Trait to read data from a on storage backend. +pub trait BlobReader: Send + Sync { + /// Get size of the blob file. + fn blob_size(&self) -> BackendResult; + + /// Try to read a range of data from the blob file into the provided buffer. + /// + /// Try to read data of range [offset, offset + buf.len()) from the blob file, and returns: + /// - bytes of data read, which may be smaller than buf.len() + /// - error code if error happens + fn try_read(&self, buf: &mut [u8], offset: u64) -> BackendResult; + + /// Read a range of data from the blob file into the provided buffer. + /// + /// Read data of range [offset, offset + buf.len()) from the blob file, and returns: + /// - bytes of data read, which may be smaller than buf.len() + /// - error code if error happens + /// + /// It will try `BlobBackend::retry_limit()` times at most and return the first successfully + /// read data. + fn read(&self, buf: &mut [u8], offset: u64) -> BackendResult { + let mut retry_count = self.retry_limit(); + let begin_time = self.metrics().begin(); + + let mut delayer = Delayer::new(DelayType::BackOff, Duration::from_millis(500)); + + loop { + match self.try_read(buf, offset) { + Ok(size) => { + self.metrics().end(&begin_time, buf.len(), false); + return Ok(size); + } + Err(err) => { + if retry_count > 0 { + warn!( + "Read from backend failed: {:?}, retry count {}", + err, retry_count + ); + retry_count -= 1; + delayer.delay(); + } else { + self.metrics().end(&begin_time, buf.len(), true); + ERROR_HOLDER + .lock() + .unwrap() + .push(&format!("{:?}", err)) + .unwrap_or_else(|_| error!("Failed when try to hold error")); + return Err(err); + } + } + } + } + } + + /// Read as much as possible data into buffer. + fn read_all(&self, buf: &mut [u8], offset: u64) -> BackendResult { + let mut off = 0usize; + let mut left = buf.len(); + + while left > 0 { + let cnt = self.read(&mut buf[off..], offset + off as u64)?; + if cnt == 0 { + break; + } + off += cnt; + left -= cnt; + } + + Ok(off as usize) + } + + /// Read a range of data from the blob file into the provided buffers. + /// + /// Read data of range [offset, offset + max_size) from the blob file, and returns: + /// - bytes of data read, which may be smaller than max_size + /// - error code if error happens + /// + /// It will try `BlobBackend::retry_limit()` times at most and return the first successfully + /// read data. + fn readv( + &self, + bufs: &[FileVolatileSlice], + offset: u64, + max_size: usize, + ) -> BackendResult { + if bufs.len() == 1 && max_size >= bufs[0].len() { + let buf = unsafe { std::slice::from_raw_parts_mut(bufs[0].as_ptr(), bufs[0].len()) }; + self.read(buf, offset) + } else { + // Use std::alloc to avoid zeroing the allocated buffer. + let size = bufs.iter().fold(0usize, move |size, s| size + s.len()); + let size = std::cmp::min(size, max_size); + let mut data = alloc_buf(size); + + let result = self.read(&mut data, offset)?; + copyv(&[&data], bufs, 0, result, 0, 0) + .map(|r| r.0) + .map_err(BackendError::CopyData) + } + } + + /// Get metrics object. + fn metrics(&self) -> &BackendMetrics; + + /// Get maximum number of times to retry when encountering IO errors. + fn retry_limit(&self) -> u8 { + 0 + } +} + +/// Trait to access blob files on backend storages, such as OSS, registry, local fs etc. +pub trait BlobBackend: Send + Sync { + /// Destroy the `BlobBackend` storage object. + fn shutdown(&self); + + /// Get metrics object. + fn metrics(&self) -> &BackendMetrics; + + /// Get a blob reader object to access blob `blob_id`. + fn get_reader(&self, blob_id: &str) -> BackendResult>; +} + +/// A buffered reader for `BlobReader` object. +pub struct BlobBufReader { + buf: Vec, + pos: usize, + len: usize, + start: u64, + size: u64, + reader: Arc, +} + +impl BlobBufReader { + /// Create a new instance of `BlobBufReader`. + pub fn new(buf_size: usize, reader: Arc, start: u64, size: u64) -> Self { + Self { + buf: alloc_buf(buf_size), + pos: 0, + len: 0, + start, + size, + reader, + } + } +} + +impl Read for BlobBufReader { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + let mut sz = self.len; + if sz == 0 && self.size == 0 { + // No more data. + return Ok(0); + } + + // Refill the buffer. + if sz == 0 && self.size > 0 { + let cnt = std::cmp::min(self.buf.len() as u64, self.size) as usize; + let ret = self + .reader + .read(&mut self.buf[..cnt], self.start) + .map_err(|e| eio!(format!("failed to read data from backend, {:?}", e)))?; + self.start += ret as u64; + self.size -= ret as u64; + self.pos = 0; + self.len = ret; + sz = ret; + } + if self.size != 0 && sz == 0 { + return Err(eio!("unexpected EOF when reading data from backend")); + } + + let sz = std::cmp::min(sz, buf.len()); + buf[..sz].copy_from_slice(&self.buf[self.pos..self.pos + sz]); + self.pos += sz; + self.len -= sz; + + Ok(sz) + } +} diff --git a/storage/src/backend/object_storage.rs b/storage/src/backend/object_storage.rs index 7c2b8ba655c..6a4674d0c95 100644 --- a/storage/src/backend/object_storage.rs +++ b/storage/src/backend/object_storage.rs @@ -1,218 +1,218 @@ -// Copyright 2022 Ant Group. All rights reserved. -// Copyright (C) 2022 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Base module used to implement object storage backend drivers (such as oss, s3, etc.). - -use std::fmt; -use std::fmt::Debug; -use std::io::{Error, Result}; -use std::marker::Send; -use std::sync::Arc; - -use reqwest::header::{HeaderMap, CONTENT_LENGTH}; -use reqwest::Method; - -use nydus_utils::metrics::BackendMetrics; - -use super::connection::{Connection, ConnectionError}; -use super::{BackendError, BackendResult, BlobBackend, BlobReader}; - -/// Error codes related to object storage backend. -#[derive(Debug)] -pub enum ObjectStorageError { - Auth(Error), - Request(ConnectionError), - ConstructHeader(String), - Transport(reqwest::Error), - Response(String), -} - -impl fmt::Display for ObjectStorageError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - ObjectStorageError::Auth(e) => write!(f, "failed to generate auth info, {}", e), - ObjectStorageError::Request(e) => write!(f, "network communication error, {}", e), - ObjectStorageError::ConstructHeader(e) => { - write!(f, "failed to generate HTTP header, {}", e) - } - ObjectStorageError::Transport(e) => write!(f, "network communication error, {}", e), - ObjectStorageError::Response(s) => write!(f, "network communication error, {}", s), - } - } -} - -impl From for BackendError { - fn from(err: ObjectStorageError) -> Self { - BackendError::ObjectStorage(err) - } -} - -pub trait ObjectStorageState: Send + Sync + Debug { - // `url` builds the resource path and full url for the object. - fn url(&self, object_key: &str, query: &[&str]) -> (String, String); - - // `sign` signs the request with the access key and secret key. - fn sign( - &self, - verb: Method, - headers: &mut HeaderMap, - canonicalized_resource: &str, - full_resource_url: &str, - ) -> Result<()>; - - fn retry_limit(&self) -> u8; -} - -struct ObjectStorageReader -where - T: ObjectStorageState, -{ - blob_id: String, - connection: Arc, - state: Arc, - metrics: Arc, -} - -impl BlobReader for ObjectStorageReader -where - T: ObjectStorageState, -{ - fn blob_size(&self) -> BackendResult { - let (resource, url) = self.state.url(&self.blob_id, &[]); - let mut headers = HeaderMap::new(); - - self.state - .sign(Method::HEAD, &mut headers, resource.as_str(), url.as_str()) - .map_err(ObjectStorageError::Auth)?; - - let resp = self - .connection - .call::<&[u8]>(Method::HEAD, url.as_str(), None, None, &mut headers, true) - .map_err(ObjectStorageError::Request)?; - let content_length = resp - .headers() - .get(CONTENT_LENGTH) - .ok_or_else(|| ObjectStorageError::Response("invalid content length".to_string()))?; - - Ok(content_length - .to_str() - .map_err(|err| { - ObjectStorageError::Response(format!("invalid content length: {:?}", err)) - })? - .parse::() - .map_err(|err| { - ObjectStorageError::Response(format!("invalid content length: {:?}", err)) - })?) - } - - fn try_read(&self, mut buf: &mut [u8], offset: u64) -> BackendResult { - let query = &[]; - let (resource, url) = self.state.url(&self.blob_id, query); - let mut headers = HeaderMap::new(); - let end_at = offset + buf.len() as u64 - 1; - let range = format!("bytes={}-{}", offset, end_at); - - headers.insert( - "Range", - range - .as_str() - .parse() - .map_err(|e| ObjectStorageError::ConstructHeader(format!("{}", e)))?, - ); - self.state - .sign(Method::GET, &mut headers, resource.as_str(), url.as_str()) - .map_err(ObjectStorageError::Auth)?; - - // Safe because the the call() is a synchronous operation. - let mut resp = self - .connection - .call::<&[u8]>(Method::GET, url.as_str(), None, None, &mut headers, true) - .map_err(ObjectStorageError::Request)?; - Ok(resp - .copy_to(&mut buf) - .map_err(ObjectStorageError::Transport) - .map(|size| size as usize)?) - } - - fn metrics(&self) -> &BackendMetrics { - &self.metrics - } - - fn retry_limit(&self) -> u8 { - self.state.retry_limit() - } -} - -#[derive(Debug)] -pub struct ObjectStorage -where - T: ObjectStorageState, -{ - connection: Arc, - state: Arc, - metrics: Option>, - #[allow(unused)] - id: Option, -} - -impl ObjectStorage -where - T: ObjectStorageState, -{ - pub(crate) fn new_object_storage( - connection: Arc, - state: Arc, - metrics: Option>, - id: Option, - ) -> Self { - ObjectStorage { - connection, - state, - metrics, - id, - } - } -} - -impl BlobBackend for ObjectStorage -where - T: ObjectStorageState, -{ - fn shutdown(&self) { - self.connection.shutdown(); - } - - fn metrics(&self) -> &BackendMetrics { - // `metrics()` is only used for nydusd, which will always provide valid `blob_id`, thus - // `self.metrics` has valid value. - self.metrics.as_ref().unwrap() - } - - fn get_reader(&self, blob_id: &str) -> BackendResult> { - if let Some(metrics) = self.metrics.as_ref() { - Ok(Arc::new(ObjectStorageReader { - blob_id: blob_id.to_string(), - state: self.state.clone(), - connection: self.connection.clone(), - metrics: metrics.clone(), - })) - } else { - Err(BackendError::Unsupported( - "no metrics object available for OssReader".to_string(), - )) - } - } -} - -impl Drop for ObjectStorage -where - T: ObjectStorageState, -{ - fn drop(&mut self) { - if let Some(metrics) = self.metrics.as_ref() { - metrics.release().unwrap_or_else(|e| error!("{:?}", e)); - } - } -} +// Copyright 2022 Ant Group. All rights reserved. +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Base module used to implement object storage backend drivers (such as oss, s3, etc.). + +use std::fmt; +use std::fmt::Debug; +use std::io::{Error, Result}; +use std::marker::Send; +use std::sync::Arc; + +use reqwest::header::{HeaderMap, CONTENT_LENGTH}; +use reqwest::Method; + +use nydus_utils::metrics::BackendMetrics; + +use super::connection::{Connection, ConnectionError}; +use super::{BackendError, BackendResult, BlobBackend, BlobReader}; + +/// Error codes related to object storage backend. +#[derive(Debug)] +pub enum ObjectStorageError { + Auth(Error), + Request(ConnectionError), + ConstructHeader(String), + Transport(reqwest::Error), + Response(String), +} + +impl fmt::Display for ObjectStorageError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ObjectStorageError::Auth(e) => write!(f, "failed to generate auth info, {}", e), + ObjectStorageError::Request(e) => write!(f, "network communication error, {}", e), + ObjectStorageError::ConstructHeader(e) => { + write!(f, "failed to generate HTTP header, {}", e) + } + ObjectStorageError::Transport(e) => write!(f, "network communication error, {}", e), + ObjectStorageError::Response(s) => write!(f, "network communication error, {}", s), + } + } +} + +impl From for BackendError { + fn from(err: ObjectStorageError) -> Self { + BackendError::ObjectStorage(err) + } +} + +pub trait ObjectStorageState: Send + Sync + Debug { + // `url` builds the resource path and full url for the object. + fn url(&self, object_key: &str, query: &[&str]) -> (String, String); + + // `sign` signs the request with the access key and secret key. + fn sign( + &self, + verb: Method, + headers: &mut HeaderMap, + canonicalized_resource: &str, + full_resource_url: &str, + ) -> Result<()>; + + fn retry_limit(&self) -> u8; +} + +struct ObjectStorageReader +where + T: ObjectStorageState, +{ + blob_id: String, + connection: Arc, + state: Arc, + metrics: Arc, +} + +impl BlobReader for ObjectStorageReader +where + T: ObjectStorageState, +{ + fn blob_size(&self) -> BackendResult { + let (resource, url) = self.state.url(&self.blob_id, &[]); + let mut headers = HeaderMap::new(); + + self.state + .sign(Method::HEAD, &mut headers, resource.as_str(), url.as_str()) + .map_err(ObjectStorageError::Auth)?; + + let resp = self + .connection + .call::<&[u8]>(Method::HEAD, url.as_str(), None, None, &mut headers, true) + .map_err(ObjectStorageError::Request)?; + let content_length = resp + .headers() + .get(CONTENT_LENGTH) + .ok_or_else(|| ObjectStorageError::Response("invalid content length".to_string()))?; + + Ok(content_length + .to_str() + .map_err(|err| { + ObjectStorageError::Response(format!("invalid content length: {:?}", err)) + })? + .parse::() + .map_err(|err| { + ObjectStorageError::Response(format!("invalid content length: {:?}", err)) + })?) + } + + fn try_read(&self, mut buf: &mut [u8], offset: u64) -> BackendResult { + let query = &[]; + let (resource, url) = self.state.url(&self.blob_id, query); + let mut headers = HeaderMap::new(); + let end_at = offset + buf.len() as u64 - 1; + let range = format!("bytes={}-{}", offset, end_at); + + headers.insert( + "Range", + range + .as_str() + .parse() + .map_err(|e| ObjectStorageError::ConstructHeader(format!("{}", e)))?, + ); + self.state + .sign(Method::GET, &mut headers, resource.as_str(), url.as_str()) + .map_err(ObjectStorageError::Auth)?; + + // Safe because the the call() is a synchronous operation. + let mut resp = self + .connection + .call::<&[u8]>(Method::GET, url.as_str(), None, None, &mut headers, true) + .map_err(ObjectStorageError::Request)?; + Ok(resp + .copy_to(&mut buf) + .map_err(ObjectStorageError::Transport) + .map(|size| size as usize)?) + } + + fn metrics(&self) -> &BackendMetrics { + &self.metrics + } + + fn retry_limit(&self) -> u8 { + self.state.retry_limit() + } +} + +#[derive(Debug)] +pub struct ObjectStorage +where + T: ObjectStorageState, +{ + connection: Arc, + state: Arc, + metrics: Option>, + #[allow(unused)] + id: Option, +} + +impl ObjectStorage +where + T: ObjectStorageState, +{ + pub(crate) fn new_object_storage( + connection: Arc, + state: Arc, + metrics: Option>, + id: Option, + ) -> Self { + ObjectStorage { + connection, + state, + metrics, + id, + } + } +} + +impl BlobBackend for ObjectStorage +where + T: ObjectStorageState, +{ + fn shutdown(&self) { + self.connection.shutdown(); + } + + fn metrics(&self) -> &BackendMetrics { + // `metrics()` is only used for nydusd, which will always provide valid `blob_id`, thus + // `self.metrics` has valid value. + self.metrics.as_ref().unwrap() + } + + fn get_reader(&self, blob_id: &str) -> BackendResult> { + if let Some(metrics) = self.metrics.as_ref() { + Ok(Arc::new(ObjectStorageReader { + blob_id: blob_id.to_string(), + state: self.state.clone(), + connection: self.connection.clone(), + metrics: metrics.clone(), + })) + } else { + Err(BackendError::Unsupported( + "no metrics object available for OssReader".to_string(), + )) + } + } +} + +impl Drop for ObjectStorage +where + T: ObjectStorageState, +{ + fn drop(&mut self) { + if let Some(metrics) = self.metrics.as_ref() { + metrics.release().unwrap_or_else(|e| error!("{:?}", e)); + } + } +} diff --git a/storage/src/backend/oss.rs b/storage/src/backend/oss.rs index 9ec08cfd71c..29b5d8918bc 100644 --- a/storage/src/backend/oss.rs +++ b/storage/src/backend/oss.rs @@ -1,198 +1,198 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Storage backend driver to access blobs on Oss(Object Storage System). -use std::io::Result; -use std::sync::Arc; -use std::time::SystemTime; - -use base64::Engine; -use hmac::{Hmac, Mac}; -use reqwest::header::HeaderMap; -use reqwest::Method; -use sha1::Sha1; - -use nydus_api::OssConfig; -use nydus_utils::metrics::BackendMetrics; - -use crate::backend::connection::{Connection, ConnectionConfig}; -use crate::backend::object_storage::{ObjectStorage, ObjectStorageState}; - -const HEADER_DATE: &str = "Date"; -const HEADER_AUTHORIZATION: &str = "Authorization"; - -type HmacSha1 = Hmac; - -// `OssState` is almost identical to `OssConfig`, but let's keep them separated. -#[derive(Debug)] -pub struct OssState { - access_key_id: String, - access_key_secret: String, - scheme: String, - object_prefix: String, - endpoint: String, - bucket_name: String, - retry_limit: u8, -} - -impl OssState { - fn resource(&self, object_key: &str, query_str: &str) -> String { - format!("/{}/{}{}", self.bucket_name, object_key, query_str) - } -} - -impl ObjectStorageState for OssState { - fn url(&self, object_key: &str, query: &[&str]) -> (String, String) { - let object_key = &format!("{}{}", self.object_prefix, object_key); - let url = format!( - "{}://{}.{}/{}", - self.scheme, self.bucket_name, self.endpoint, object_key - ); - - if query.is_empty() { - (self.resource(object_key, ""), url) - } else { - let query_str = format!("?{}", query.join("&")); - let resource = self.resource(object_key, &query_str); - let url = format!("{}{}", url.as_str(), &query_str); - (resource, url) - } - } - - /// generate oss request signature - fn sign( - &self, - verb: Method, - headers: &mut HeaderMap, - canonicalized_resource: &str, - _: &str, - ) -> Result<()> { - let content_md5 = ""; - let content_type = ""; - let mut canonicalized_oss_headers = vec![]; - let date = httpdate::fmt_http_date(SystemTime::now()); - let mut data = vec![ - verb.as_str(), - content_md5, - content_type, - date.as_str(), - // canonicalized_oss_headers, - canonicalized_resource, - ]; - - for (name, value) in headers.iter() { - let name = name.as_str(); - let value = value.to_str().map_err(|e| einval!(e))?; - if name.starts_with("x-oss-") { - let header = format!("{}:{}", name.to_lowercase(), value); - canonicalized_oss_headers.push(header); - } - } - let canonicalized_oss_headers = canonicalized_oss_headers.join("\n"); - if !canonicalized_oss_headers.is_empty() { - data.insert(4, canonicalized_oss_headers.as_str()); - } - let data = data.join("\n"); - let hmac = HmacSha1::new_from_slice(self.access_key_secret.as_bytes()) - .map_err(|e| einval!(e))? - .chain_update(data.as_bytes()) - .finalize() - .into_bytes(); - let signature = base64::engine::general_purpose::STANDARD.encode(hmac); - - let authorization = format!("OSS {}:{}", self.access_key_id, signature); - - headers.insert(HEADER_DATE, date.as_str().parse().map_err(|e| einval!(e))?); - headers.insert( - HEADER_AUTHORIZATION, - authorization.as_str().parse().map_err(|e| einval!(e))?, - ); - - Ok(()) - } - - fn retry_limit(&self) -> u8 { - self.retry_limit - } -} - -/// Storage backend to access data stored in OSS. -pub type Oss = ObjectStorage; - -impl Oss { - /// Create a new OSS storage backend. - pub fn new(oss_config: &OssConfig, id: Option<&str>) -> Result { - let con_config: ConnectionConfig = oss_config.clone().into(); - let retry_limit = con_config.retry_limit; - let connection = Connection::new(&con_config)?; - let state = Arc::new(OssState { - scheme: oss_config.scheme.clone(), - object_prefix: oss_config.object_prefix.clone(), - endpoint: oss_config.endpoint.clone(), - access_key_id: oss_config.access_key_id.clone(), - access_key_secret: oss_config.access_key_secret.clone(), - bucket_name: oss_config.bucket_name.clone(), - retry_limit, - }); - let metrics = id.map(|i| BackendMetrics::new(i, "oss")); - - Ok(ObjectStorage::new_object_storage( - connection, - state, - metrics, - id.map(|i| i.to_string()), - )) - } -} - -#[cfg(test)] -mod tests { - use crate::backend::BlobBackend; - - use super::*; - - #[test] - fn test_oss_state() { - let state = OssState { - access_key_id: "key".to_string(), - access_key_secret: "secret".to_string(), - scheme: "https".to_string(), - object_prefix: "nydus".to_string(), - endpoint: "oss".to_string(), - bucket_name: "images".to_string(), - retry_limit: 5, - }; - - assert_eq!( - state.resource("obj_key", "?idontcare"), - "/images/obj_key?idontcare" - ); - - let (resource, url) = state.url("obj_key", &["idontcare", "second"]); - assert_eq!(resource, "/images/nydusobj_key?idontcare&second"); - assert_eq!(url, "https://images.oss/nydusobj_key?idontcare&second"); - - let mut headers = HeaderMap::new(); - state - .sign(Method::HEAD, &mut headers, resource.as_str(), "") - .unwrap(); - let signature = headers.get(HEADER_AUTHORIZATION).unwrap(); - assert!(signature.to_str().unwrap().contains("OSS key:")); - } - - #[test] - fn test_oss_new() { - let json_str = "{\"access_key_id\":\"key\",\"access_key_secret\":\"secret\",\"bucket_name\":\"images\",\"endpoint\":\"/oss\",\"object_prefix\":\"nydus\",\"scheme\":\"\",\"proxy\":{\"url\":\"\",\"ping_url\":\"\",\"fallback\":true,\"check_interval\":5},\"timeout\":5,\"connect_timeout\":5,\"retry_limit\":5}"; - let config: OssConfig = serde_json::from_str(json_str).unwrap(); - let oss = Oss::new(&config, Some("test-image")).unwrap(); - - oss.metrics(); - - let reader = oss.get_reader("test").unwrap(); - assert_eq!(reader.retry_limit(), 5); - - oss.shutdown(); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Storage backend driver to access blobs on Oss(Object Storage System). +use std::io::Result; +use std::sync::Arc; +use std::time::SystemTime; + +use base64::Engine; +use hmac::{Hmac, Mac}; +use reqwest::header::HeaderMap; +use reqwest::Method; +use sha1::Sha1; + +use nydus_api::OssConfig; +use nydus_utils::metrics::BackendMetrics; + +use crate::backend::connection::{Connection, ConnectionConfig}; +use crate::backend::object_storage::{ObjectStorage, ObjectStorageState}; + +const HEADER_DATE: &str = "Date"; +const HEADER_AUTHORIZATION: &str = "Authorization"; + +type HmacSha1 = Hmac; + +// `OssState` is almost identical to `OssConfig`, but let's keep them separated. +#[derive(Debug)] +pub struct OssState { + access_key_id: String, + access_key_secret: String, + scheme: String, + object_prefix: String, + endpoint: String, + bucket_name: String, + retry_limit: u8, +} + +impl OssState { + fn resource(&self, object_key: &str, query_str: &str) -> String { + format!("/{}/{}{}", self.bucket_name, object_key, query_str) + } +} + +impl ObjectStorageState for OssState { + fn url(&self, object_key: &str, query: &[&str]) -> (String, String) { + let object_key = &format!("{}{}", self.object_prefix, object_key); + let url = format!( + "{}://{}.{}/{}", + self.scheme, self.bucket_name, self.endpoint, object_key + ); + + if query.is_empty() { + (self.resource(object_key, ""), url) + } else { + let query_str = format!("?{}", query.join("&")); + let resource = self.resource(object_key, &query_str); + let url = format!("{}{}", url.as_str(), &query_str); + (resource, url) + } + } + + /// generate oss request signature + fn sign( + &self, + verb: Method, + headers: &mut HeaderMap, + canonicalized_resource: &str, + _: &str, + ) -> Result<()> { + let content_md5 = ""; + let content_type = ""; + let mut canonicalized_oss_headers = vec![]; + let date = httpdate::fmt_http_date(SystemTime::now()); + let mut data = vec![ + verb.as_str(), + content_md5, + content_type, + date.as_str(), + // canonicalized_oss_headers, + canonicalized_resource, + ]; + + for (name, value) in headers.iter() { + let name = name.as_str(); + let value = value.to_str().map_err(|e| einval!(e))?; + if name.starts_with("x-oss-") { + let header = format!("{}:{}", name.to_lowercase(), value); + canonicalized_oss_headers.push(header); + } + } + let canonicalized_oss_headers = canonicalized_oss_headers.join("\n"); + if !canonicalized_oss_headers.is_empty() { + data.insert(4, canonicalized_oss_headers.as_str()); + } + let data = data.join("\n"); + let hmac = HmacSha1::new_from_slice(self.access_key_secret.as_bytes()) + .map_err(|e| einval!(e))? + .chain_update(data.as_bytes()) + .finalize() + .into_bytes(); + let signature = base64::engine::general_purpose::STANDARD.encode(hmac); + + let authorization = format!("OSS {}:{}", self.access_key_id, signature); + + headers.insert(HEADER_DATE, date.as_str().parse().map_err(|e| einval!(e))?); + headers.insert( + HEADER_AUTHORIZATION, + authorization.as_str().parse().map_err(|e| einval!(e))?, + ); + + Ok(()) + } + + fn retry_limit(&self) -> u8 { + self.retry_limit + } +} + +/// Storage backend to access data stored in OSS. +pub type Oss = ObjectStorage; + +impl Oss { + /// Create a new OSS storage backend. + pub fn new(oss_config: &OssConfig, id: Option<&str>) -> Result { + let con_config: ConnectionConfig = oss_config.clone().into(); + let retry_limit = con_config.retry_limit; + let connection = Connection::new(&con_config)?; + let state = Arc::new(OssState { + scheme: oss_config.scheme.clone(), + object_prefix: oss_config.object_prefix.clone(), + endpoint: oss_config.endpoint.clone(), + access_key_id: oss_config.access_key_id.clone(), + access_key_secret: oss_config.access_key_secret.clone(), + bucket_name: oss_config.bucket_name.clone(), + retry_limit, + }); + let metrics = id.map(|i| BackendMetrics::new(i, "oss")); + + Ok(ObjectStorage::new_object_storage( + connection, + state, + metrics, + id.map(|i| i.to_string()), + )) + } +} + +#[cfg(test)] +mod tests { + use crate::backend::BlobBackend; + + use super::*; + + #[test] + fn test_oss_state() { + let state = OssState { + access_key_id: "key".to_string(), + access_key_secret: "secret".to_string(), + scheme: "https".to_string(), + object_prefix: "nydus".to_string(), + endpoint: "oss".to_string(), + bucket_name: "images".to_string(), + retry_limit: 5, + }; + + assert_eq!( + state.resource("obj_key", "?idontcare"), + "/images/obj_key?idontcare" + ); + + let (resource, url) = state.url("obj_key", &["idontcare", "second"]); + assert_eq!(resource, "/images/nydusobj_key?idontcare&second"); + assert_eq!(url, "https://images.oss/nydusobj_key?idontcare&second"); + + let mut headers = HeaderMap::new(); + state + .sign(Method::HEAD, &mut headers, resource.as_str(), "") + .unwrap(); + let signature = headers.get(HEADER_AUTHORIZATION).unwrap(); + assert!(signature.to_str().unwrap().contains("OSS key:")); + } + + #[test] + fn test_oss_new() { + let json_str = "{\"access_key_id\":\"key\",\"access_key_secret\":\"secret\",\"bucket_name\":\"images\",\"endpoint\":\"/oss\",\"object_prefix\":\"nydus\",\"scheme\":\"\",\"proxy\":{\"url\":\"\",\"ping_url\":\"\",\"fallback\":true,\"check_interval\":5},\"timeout\":5,\"connect_timeout\":5,\"retry_limit\":5}"; + let config: OssConfig = serde_json::from_str(json_str).unwrap(); + let oss = Oss::new(&config, Some("test-image")).unwrap(); + + oss.metrics(); + + let reader = oss.get_reader("test").unwrap(); + assert_eq!(reader.retry_limit(), 5); + + oss.shutdown(); + } +} diff --git a/storage/src/backend/registry.rs b/storage/src/backend/registry.rs index 4e1ac4a9035..73953994d4b 100644 --- a/storage/src/backend/registry.rs +++ b/storage/src/backend/registry.rs @@ -1,1175 +1,1175 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Storage backend driver to access blobs on container image registry. -use std::collections::HashMap; -use std::error::Error; -use std::io::{Read, Result}; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::{Arc, Once, RwLock}; -use std::time::{Duration, SystemTime, UNIX_EPOCH}; -use std::{fmt, thread}; - -use arc_swap::{ArcSwap, ArcSwapOption}; -use base64::Engine; -use reqwest::blocking::Response; -pub use reqwest::header::HeaderMap; -use reqwest::header::{HeaderValue, CONTENT_LENGTH}; -use reqwest::{Method, StatusCode}; -use url::{ParseError, Url}; - -use nydus_api::RegistryConfig; -use nydus_utils::metrics::BackendMetrics; - -use crate::backend::connection::{ - is_success_status, respond, Connection, ConnectionConfig, ConnectionError, ReqBody, -}; -use crate::backend::{BackendError, BackendResult, BlobBackend, BlobReader}; - -const REGISTRY_CLIENT_ID: &str = "nydus-registry-client"; -const HEADER_AUTHORIZATION: &str = "Authorization"; -const HEADER_WWW_AUTHENTICATE: &str = "www-authenticate"; - -const REDIRECTED_STATUS_CODE: [StatusCode; 2] = [ - StatusCode::MOVED_PERMANENTLY, - StatusCode::TEMPORARY_REDIRECT, -]; - -const REGISTRY_DEFAULT_TOKEN_EXPIRATION: u64 = 10 * 60; // in seconds - -/// Error codes related to registry storage backend operations. -#[derive(Debug)] -pub enum RegistryError { - Common(String), - Url(String, ParseError), - Request(ConnectionError), - Scheme(String), - Transport(reqwest::Error), -} - -impl fmt::Display for RegistryError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - RegistryError::Common(s) => write!(f, "failed to access blob from registry, {}", s), - RegistryError::Url(u, e) => write!(f, "failed to parse URL {}, {}", u, e), - RegistryError::Request(e) => write!(f, "failed to issue request, {}", e), - RegistryError::Scheme(s) => write!(f, "invalid scheme, {}", s), - RegistryError::Transport(e) => write!(f, "network transport error, {}", e), - } - } -} - -impl From for BackendError { - fn from(error: RegistryError) -> Self { - BackendError::Registry(error) - } -} - -type RegistryResult = std::result::Result; - -#[derive(Default)] -struct Cache(RwLock); - -impl Cache { - fn new(val: String) -> Self { - Cache(RwLock::new(val)) - } - - fn get(&self) -> String { - let cached_guard = self.0.read().unwrap(); - if !cached_guard.is_empty() { - return cached_guard.clone(); - } - String::new() - } - - fn set(&self, last: &str, current: String) { - if last != current { - let mut cached_guard = self.0.write().unwrap(); - *cached_guard = current; - } - } -} - -#[derive(Default)] -struct HashCache(RwLock>); - -impl HashCache { - fn new() -> Self { - HashCache(RwLock::new(HashMap::new())) - } - - fn get(&self, key: &str) -> Option - where - T: Clone, - { - let cached_guard = self.0.read().unwrap(); - cached_guard.get(key).cloned() - } - - fn set(&self, key: String, value: T) { - let mut cached_guard = self.0.write().unwrap(); - cached_guard.insert(key, value); - } - - fn remove(&self, key: &str) { - let mut cached_guard = self.0.write().unwrap(); - cached_guard.remove(key); - } -} - -#[derive(Clone, serde::Deserialize)] -struct TokenResponse { - /// Registry token string. - token: String, - /// Registry token period of validity, in seconds. - #[serde(default = "default_expires_in")] - expires_in: u64, -} - -fn default_expires_in() -> u64 { - REGISTRY_DEFAULT_TOKEN_EXPIRATION -} - -#[derive(Debug)] -struct BasicAuth { - #[allow(unused)] - realm: String, -} - -#[derive(Debug, Clone)] -#[allow(dead_code)] -struct BearerAuth { - realm: String, - service: String, - scope: String, -} - -#[derive(Debug)] -enum Auth { - Basic(BasicAuth), - Bearer(BearerAuth), -} - -pub struct Scheme(AtomicBool); - -impl Scheme { - fn new(value: bool) -> Self { - Scheme(AtomicBool::new(value)) - } -} - -impl fmt::Display for Scheme { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - if self.0.load(Ordering::Relaxed) { - write!(f, "https") - } else { - write!(f, "http") - } - } -} - -struct RegistryState { - // HTTP scheme like: https, http - scheme: Scheme, - host: String, - // Image repo name like: library/ubuntu - repo: String, - // Base64 encoded registry auth - auth: Option, - username: String, - password: String, - // Retry limit for read operation - retry_limit: u8, - // Scheme specified for blob server - blob_url_scheme: String, - // Replace registry redirected url host with the given host - blob_redirected_host: String, - // Cache bearer token (get from registry authentication server) or basic authentication auth string. - // We need use it to reduce the pressure on token authentication server or reduce the base64 compute workload for every request. - // Use RwLock here to avoid using mut backend trait object. - // Example: RwLock<"Bearer "> - // RwLock<"Basic base64()"> - cached_auth: Cache, - // Cache for the HTTP method when getting auth, it is "true" when using "GET" method. - // Due to the different implementations of various image registries, auth requests - // may use the GET or POST methods, we need to cache the method after the - // fallback, so it can be reused next time and reduce an unnecessary request. - cached_auth_using_http_get: HashCache, - // Cache 30X redirect url - // Example: RwLock", "">> - cached_redirect: HashCache, - // The epoch timestamp of token expiration, which is obtained from the registry server. - token_expired_at: ArcSwapOption, - // Cache bearer auth for refreshing token. - cached_bearer_auth: ArcSwapOption, -} - -impl RegistryState { - fn url(&self, path: &str, query: &[&str]) -> std::result::Result { - let path = if query.is_empty() { - format!("/v2/{}{}", self.repo, path) - } else { - format!("/v2/{}{}?{}", self.repo, path, query.join("&")) - }; - let url = format!("{}://{}", self.scheme, self.host.as_str()); - let url = Url::parse(url.as_str())?; - let url = url.join(path.as_str())?; - - Ok(url.to_string()) - } - - fn needs_fallback_http(&self, e: &dyn Error) -> bool { - match e.source() { - Some(err) => match err.source() { - Some(err) => { - if !self.scheme.0.load(Ordering::Relaxed) { - return false; - } - let msg = err.to_string().to_lowercase(); - // If we attempt to establish a TLS connection with the HTTP registry server, - // we are likely to encounter these types of error: - // https://github.com/openssl/openssl/blob/6b3d28757620e0781bb1556032bb6961ee39af63/crypto/err/openssl.txt#L1574 - // https://github.com/containerd/nerdctl/blob/225a70bdc3b93cdb00efac7db1ceb50c098a8a16/pkg/cmd/image/push.go#LL135C66-L135C66 - let fallback = - msg.contains("wrong version number") || msg.contains("connection refused"); - if fallback { - warn!("fallback to http due to tls connection error: {}", err); - } - fallback - } - None => false, - }, - None => false, - } - } - - // Request registry authentication server to get bearer token - fn get_token(&self, auth: BearerAuth, connection: &Arc) -> Result { - let http_get = self - .cached_auth_using_http_get - .get(&self.host) - .unwrap_or_default(); - let resp = if http_get { - self.get_token_with_get(&auth, connection)? - } else { - match self.get_token_with_post(&auth, connection) { - Ok(resp) => resp, - Err(_) => { - warn!("retry http GET method to get auth token"); - let resp = self.get_token_with_get(&auth, connection)?; - // Cache http method for next use. - self.cached_auth_using_http_get.set(self.host.clone(), true); - resp - } - } - }; - - let ret: TokenResponse = resp.json().map_err(|e| { - einval!(format!( - "registry auth server response decode failed: {:?}", - e - )) - })?; - - if let Ok(now_timestamp) = SystemTime::now().duration_since(UNIX_EPOCH) { - self.token_expired_at - .store(Some(Arc::new(now_timestamp.as_secs() + ret.expires_in))); - debug!( - "cached bearer auth, next time: {}", - now_timestamp.as_secs() + ret.expires_in - ); - } - - // Cache bearer auth for refreshing token. - self.cached_bearer_auth.store(Some(Arc::new(auth))); - - Ok(ret) - } - - // Get bearer token using a POST request - fn get_token_with_post( - &self, - auth: &BearerAuth, - connection: &Arc, - ) -> Result { - let mut form = HashMap::new(); - form.insert("service".to_string(), auth.service.clone()); - form.insert("scope".to_string(), auth.scope.clone()); - form.insert("grant_type".to_string(), "password".to_string()); - form.insert("username".to_string(), self.username.clone()); - form.insert("password".to_string(), self.password.clone()); - form.insert("client_id".to_string(), REGISTRY_CLIENT_ID.to_string()); - - let token_resp = connection - .call::<&[u8]>( - Method::POST, - auth.realm.as_str(), - None, - Some(ReqBody::Form(form)), - &mut HeaderMap::new(), - true, - ) - .map_err(|e| { - warn!( - "failed to request registry auth server by POST method: {:?}", - e - ); - einval!() - })?; - - Ok(token_resp) - } - - // Get bearer token using a GET request - fn get_token_with_get( - &self, - auth: &BearerAuth, - connection: &Arc, - ) -> Result { - let query = [ - ("service", auth.service.as_str()), - ("scope", auth.scope.as_str()), - ("grant_type", "password"), - ("username", self.username.as_str()), - ("password", self.password.as_str()), - ("client_id", REGISTRY_CLIENT_ID), - ]; - - let mut headers = HeaderMap::new(); - - // Insert the basic auth header to ensure the compatibility (e.g. Harbor registry) - // of fetching token by HTTP GET method. - // This refers containerd implementation: https://github.com/containerd/containerd/blob/dc7dba9c20f7210c38e8255487fc0ee12692149d/remotes/docker/auth/fetch.go#L187 - if let Some(auth) = &self.auth { - headers.insert( - HEADER_AUTHORIZATION, - format!("Basic {}", auth).parse().unwrap(), - ); - } - - let token_resp = connection - .call::<&[u8]>( - Method::GET, - auth.realm.as_str(), - Some(&query), - None, - &mut headers, - true, - ) - .map_err(|e| { - warn!( - "failed to request registry auth server by GET method: {:?}", - e - ); - einval!() - })?; - - Ok(token_resp) - } - - fn get_auth_header(&self, auth: Auth, connection: &Arc) -> Result { - match auth { - Auth::Basic(_) => self - .auth - .as_ref() - .map(|auth| format!("Basic {}", auth)) - .ok_or_else(|| einval!("invalid auth config")), - Auth::Bearer(auth) => { - let token = self.get_token(auth, connection)?; - Ok(format!("Bearer {}", token.token)) - } - } - } - - /// Parse `www-authenticate` response header respond from registry server - /// The header format like: `Bearer realm="https://auth.my-registry.com/token",service="my-registry.com",scope="repository:test/repo:pull,push"` - fn parse_auth(source: &HeaderValue) -> Option { - let source = source.to_str().unwrap(); - let source: Vec<&str> = source.splitn(2, ' ').collect(); - if source.len() < 2 { - return None; - } - let scheme = source[0].trim(); - let pairs = source[1].trim(); - let pairs = pairs.split("\","); - let mut paras = HashMap::new(); - for pair in pairs { - let pair: Vec<&str> = pair.trim().split('=').collect(); - if pair.len() < 2 { - return None; - } - let key = pair[0].trim(); - let value = pair[1].trim().trim_matches('"'); - paras.insert(key, value); - } - - match scheme { - "Basic" => { - let realm = if let Some(realm) = paras.get("realm") { - (*realm).to_string() - } else { - String::new() - }; - Some(Auth::Basic(BasicAuth { realm })) - } - "Bearer" => { - if paras.get("realm").is_none() - || paras.get("service").is_none() - || paras.get("scope").is_none() - { - return None; - } - - Some(Auth::Bearer(BearerAuth { - realm: (*paras.get("realm").unwrap()).to_string(), - service: (*paras.get("service").unwrap()).to_string(), - scope: (*paras.get("scope").unwrap()).to_string(), - })) - } - _ => None, - } - } - - fn fallback_http(&self) { - self.scheme.0.store(false, Ordering::Relaxed); - } -} - -#[derive(Clone)] -struct First { - inner: Arc>, -} - -impl First { - fn new() -> Self { - First { - inner: Arc::new(ArcSwap::new(Arc::new(Once::new()))), - } - } - - fn once(&self, f: F) - where - F: FnOnce(), - { - self.inner.load().call_once(f) - } - - fn renew(&self) { - self.inner.store(Arc::new(Once::new())); - } - - fn handle(&self, handle: &mut F) -> Option> - where - F: FnMut() -> BackendResult, - { - let mut ret = None; - // Call once twice to ensure the subsequent requests use the new - // Once instance after renew happens. - for _ in 0..=1 { - self.once(|| { - ret = Some(handle().map_err(|err| { - // Replace the Once instance so that we can retry it when - // the handle call failed. - self.renew(); - err - })); - }); - if ret.is_some() { - break; - } - } - ret - } - - /// When invoking concurrently, only one of the handle methods will be executed first, - /// then subsequent handle methods will be allowed to execute concurrently. - /// - /// Nydusd uses a registry backend which generates a surge of blob requests without - /// auth tokens on initial startup, this caused mirror backends (e.g. dragonfly) - /// to process very slowly. The method implements waiting for the first blob request - /// to complete before making other blob requests, this ensures the first request - /// caches a valid registry auth token, and subsequent concurrent blob requests can - /// reuse the cached token. - fn handle_force(&self, handle: &mut F) -> BackendResult - where - F: FnMut() -> BackendResult, - { - self.handle(handle).unwrap_or_else(handle) - } -} - -struct RegistryReader { - blob_id: String, - connection: Arc, - state: Arc, - metrics: Arc, - first: First, -} - -impl RegistryReader { - /// Request registry server with `authorization` header - /// - /// Bearer token authenticate workflow: - /// - /// Request: POST https://my-registry.com/test/repo/blobs/uploads - /// Response: status: 401 Unauthorized - /// header: www-authenticate: Bearer realm="https://auth.my-registry.com/token",service="my-registry.com",scope="repository:test/repo:pull,push" - /// - /// Request: POST https://auth.my-registry.com/token - /// body: "service=my-registry.com&scope=repository:test/repo:pull,push&grant_type=password&username=x&password=x&client_id=nydus-registry-client" - /// Response: status: 200 Ok - /// body: { "token": "" } - /// - /// Request: POST https://my-registry.com/test/repo/blobs/uploads - /// header: authorization: Bearer - /// Response: status: 200 Ok - /// - /// Basic authenticate workflow: - /// - /// Request: POST https://my-registry.com/test/repo/blobs/uploads - /// Response: status: 401 Unauthorized - /// header: www-authenticate: Basic - /// - /// Request: POST https://my-registry.com/test/repo/blobs/uploads - /// header: authorization: Basic base64() - /// Response: status: 200 Ok - fn request( - &self, - method: Method, - url: &str, - data: Option>, - mut headers: HeaderMap, - catch_status: bool, - ) -> RegistryResult { - // Try get authorization header from cache for this request - let mut last_cached_auth = String::new(); - let cached_auth = self.state.cached_auth.get(); - if !cached_auth.is_empty() { - last_cached_auth = cached_auth.clone(); - headers.insert( - HEADER_AUTHORIZATION, - HeaderValue::from_str(cached_auth.as_str()).unwrap(), - ); - } - - // For upload request with payload, the auth header should be cached - // after create_upload(), so we can request registry server directly - if let Some(data) = data { - return self - .connection - .call(method, url, None, Some(data), &mut headers, catch_status) - .map_err(RegistryError::Request); - } - - // Try to request registry server with `authorization` header - let mut resp = self - .connection - .call::<&[u8]>(method.clone(), url, None, None, &mut headers, false) - .map_err(RegistryError::Request)?; - if resp.status() == StatusCode::UNAUTHORIZED { - if headers.contains_key(HEADER_AUTHORIZATION) { - // If we request registry (harbor server) with expired authorization token, - // the `www-authenticate: Basic realm="harbor"` in response headers is not expected. - // Related code in harbor: - // https://github.com/goharbor/harbor/blob/v2.5.3/src/server/middleware/v2auth/auth.go#L98 - // - // We can remove the expired authorization token and - // resend the request to get the correct "www-authenticate" value. - headers.remove(HEADER_AUTHORIZATION); - - resp = self - .connection - .call::<&[u8]>(method.clone(), url, None, None, &mut headers, false) - .map_err(RegistryError::Request)?; - }; - - if let Some(resp_auth_header) = resp.headers().get(HEADER_WWW_AUTHENTICATE) { - // Get token from registry authorization server - if let Some(auth) = RegistryState::parse_auth(resp_auth_header) { - let auth_header = self - .state - .get_auth_header(auth, &self.connection) - .map_err(|e| RegistryError::Common(e.to_string()))?; - - headers.insert( - HEADER_AUTHORIZATION, - HeaderValue::from_str(auth_header.as_str()).unwrap(), - ); - - // Try to request registry server with `authorization` header again - let resp = self - .connection - .call(method, url, None, data, &mut headers, catch_status) - .map_err(RegistryError::Request)?; - - let status = resp.status(); - if is_success_status(status) { - // Cache authorization header for next request - self.state.cached_auth.set(&last_cached_auth, auth_header) - } - return respond(resp, catch_status).map_err(RegistryError::Request); - } - } - } - - respond(resp, catch_status).map_err(RegistryError::Request) - } - - /// Read data from registry server - /// - /// Step: - /// - /// Request: GET /blobs/sha256: - /// Response: status: 307 Temporary Redirect - /// header: location: https://raw-blob-storage-host.com/signature=x - /// - /// Request: GET https://raw-blob-storage-host.com/signature=x - /// Response: status: 200 Ok / 403 Forbidden - /// If responding 403, we need to repeat step one - fn _try_read( - &self, - mut buf: &mut [u8], - offset: u64, - allow_retry: bool, - ) -> RegistryResult { - let url = format!("/blobs/sha256:{}", self.blob_id); - let url = self - .state - .url(url.as_str(), &[]) - .map_err(|e| RegistryError::Url(url, e))?; - let mut headers = HeaderMap::new(); - let end_at = offset + buf.len() as u64 - 1; - let range = format!("bytes={}-{}", offset, end_at); - headers.insert("Range", range.parse().unwrap()); - - let mut resp; - let cached_redirect = self.state.cached_redirect.get(&self.blob_id); - - if let Some(cached_redirect) = cached_redirect { - resp = self - .connection - .call::<&[u8]>( - Method::GET, - cached_redirect.as_str(), - None, - None, - &mut headers, - false, - ) - .map_err(RegistryError::Request)?; - - // The request has expired or has been denied, need to re-request - if allow_retry - && [StatusCode::UNAUTHORIZED, StatusCode::FORBIDDEN].contains(&resp.status()) - { - warn!( - "The redirected link has expired: {}, will retry read", - cached_redirect.as_str() - ); - self.state.cached_redirect.remove(&self.blob_id); - // Try read again only once - return self._try_read(buf, offset, false); - } - } else { - resp = match self.request::<&[u8]>( - Method::GET, - url.as_str(), - None, - headers.clone(), - false, - ) { - Ok(res) => res, - Err(RegistryError::Request(ConnectionError::Common(e))) - if self.state.needs_fallback_http(&e) => - { - self.state.fallback_http(); - let url = format!("/blobs/sha256:{}", self.blob_id); - let url = self - .state - .url(url.as_str(), &[]) - .map_err(|e| RegistryError::Url(url, e))?; - self.request::<&[u8]>(Method::GET, url.as_str(), None, headers.clone(), false)? - } - Err(RegistryError::Request(ConnectionError::Common(e))) => { - if e.to_string().contains("self signed certificate") { - warn!("try to enable \"skip_verify: true\" option"); - } - return Err(RegistryError::Request(ConnectionError::Common(e))); - } - Err(e) => { - return Err(e); - } - }; - let status = resp.status(); - - // Handle redirect request and cache redirect url - if REDIRECTED_STATUS_CODE.contains(&status) { - if let Some(location) = resp.headers().get("location") { - let location = location.to_str().unwrap(); - let mut location = Url::parse(location) - .map_err(|e| RegistryError::Url(location.to_string(), e))?; - // Note: Some P2P proxy server supports only scheme specified origin blob server, - // so we need change scheme to `blob_url_scheme` here - if !self.state.blob_url_scheme.is_empty() { - location - .set_scheme(&self.state.blob_url_scheme) - .map_err(|_| { - RegistryError::Scheme(self.state.blob_url_scheme.clone()) - })?; - } - if !self.state.blob_redirected_host.is_empty() { - location - .set_host(Some(self.state.blob_redirected_host.as_str())) - .map_err(|e| { - error!( - "Failed to set blob redirected host to {}: {:?}", - self.state.blob_redirected_host.as_str(), - e - ); - RegistryError::Url(location.to_string(), e) - })?; - debug!("New redirected location {:?}", location.host_str()); - } - let resp_ret = self - .connection - .call::<&[u8]>( - Method::GET, - location.as_str(), - None, - None, - &mut headers, - true, - ) - .map_err(RegistryError::Request); - match resp_ret { - Ok(_resp) => { - resp = _resp; - self.state - .cached_redirect - .set(self.blob_id.clone(), location.as_str().to_string()) - } - Err(err) => { - return Err(err); - } - } - }; - } else { - resp = respond(resp, true).map_err(RegistryError::Request)?; - } - } - - resp.copy_to(&mut buf) - .map_err(RegistryError::Transport) - .map(|size| size as usize) - } -} - -impl BlobReader for RegistryReader { - fn blob_size(&self) -> BackendResult { - self.first.handle_force(&mut || -> BackendResult { - let url = format!("/blobs/sha256:{}", self.blob_id); - let url = self - .state - .url(&url, &[]) - .map_err(|e| RegistryError::Url(url, e))?; - - let resp = match self.request::<&[u8]>( - Method::HEAD, - url.as_str(), - None, - HeaderMap::new(), - true, - ) { - Ok(res) => res, - Err(RegistryError::Request(ConnectionError::Common(e))) - if self.state.needs_fallback_http(&e) => - { - self.state.fallback_http(); - let url = format!("/blobs/sha256:{}", self.blob_id); - let url = self - .state - .url(&url, &[]) - .map_err(|e| RegistryError::Url(url, e))?; - self.request::<&[u8]>(Method::HEAD, url.as_str(), None, HeaderMap::new(), true)? - } - Err(e) => { - return Err(BackendError::Registry(e)); - } - }; - let content_length = resp - .headers() - .get(CONTENT_LENGTH) - .ok_or_else(|| RegistryError::Common("invalid content length".to_string()))?; - - Ok(content_length - .to_str() - .map_err(|err| RegistryError::Common(format!("invalid content length: {:?}", err)))? - .parse::() - .map_err(|err| { - RegistryError::Common(format!("invalid content length: {:?}", err)) - })?) - }) - } - - fn try_read(&self, buf: &mut [u8], offset: u64) -> BackendResult { - self.first.handle_force(&mut || -> BackendResult { - self._try_read(buf, offset, true) - .map_err(BackendError::Registry) - }) - } - - fn metrics(&self) -> &BackendMetrics { - &self.metrics - } - - fn retry_limit(&self) -> u8 { - self.state.retry_limit - } -} - -/// Storage backend based on image registry. -pub struct Registry { - connection: Arc, - state: Arc, - metrics: Arc, - first: First, -} - -impl Registry { - #[allow(clippy::useless_let_if_seq)] - pub fn new(config: &RegistryConfig, id: Option<&str>) -> Result { - let id = id.ok_or_else(|| einval!("Registry backend requires blob_id"))?; - let con_config: ConnectionConfig = config.clone().into(); - - if !config.proxy.url.is_empty() && !config.mirrors.is_empty() { - return Err(einval!( - "connection: proxy and mirrors cannot be configured at the same time." - )); - } - - let retry_limit = con_config.retry_limit; - let connection = Connection::new(&con_config)?; - let auth = trim(config.auth.clone()); - let registry_token = trim(config.registry_token.clone()); - let (username, password) = Self::get_authorization_info(&auth)?; - let cached_auth = if let Some(registry_token) = registry_token { - // Store the registry bearer token to cached_auth, prefer to - // use the token stored in cached_auth to request registry. - Cache::new(format!("Bearer {}", registry_token)) - } else { - Cache::new(String::new()) - }; - - let scheme = if !config.scheme.is_empty() && config.scheme == "http" { - Scheme::new(false) - } else { - Scheme::new(true) - }; - - let state = Arc::new(RegistryState { - scheme, - host: config.host.clone(), - repo: config.repo.clone(), - auth, - cached_auth, - username, - password, - retry_limit, - blob_url_scheme: config.blob_url_scheme.clone(), - blob_redirected_host: config.blob_redirected_host.clone(), - cached_auth_using_http_get: HashCache::new(), - cached_redirect: HashCache::new(), - token_expired_at: ArcSwapOption::new(None), - cached_bearer_auth: ArcSwapOption::new(None), - }); - - let registry = Registry { - connection, - state, - metrics: BackendMetrics::new(id, "registry"), - first: First::new(), - }; - - registry.start_refresh_token_thread(); - info!("Refresh token thread started."); - - Ok(registry) - } - - fn get_authorization_info(auth: &Option) -> Result<(String, String)> { - if let Some(auth) = &auth { - let auth: Vec = base64::engine::general_purpose::STANDARD - .decode(auth.as_bytes()) - .map_err(|e| { - einval!(format!( - "Invalid base64 encoded registry auth config: {:?}", - e - )) - })?; - let auth = std::str::from_utf8(&auth).map_err(|e| { - einval!(format!( - "Invalid utf-8 encoded registry auth config: {:?}", - e - )) - })?; - let auth: Vec<&str> = auth.splitn(2, ':').collect(); - if auth.len() < 2 { - return Err(einval!("Invalid registry auth config")); - } - - Ok((auth[0].to_string(), auth[1].to_string())) - } else { - Ok((String::new(), String::new())) - } - } - - fn start_refresh_token_thread(&self) { - let conn = self.connection.clone(); - let state = self.state.clone(); - // FIXME: we'd better allow users to specify the expiration time. - let mut refresh_interval = REGISTRY_DEFAULT_TOKEN_EXPIRATION; - thread::spawn(move || { - loop { - if let Ok(now_timestamp) = SystemTime::now().duration_since(UNIX_EPOCH) { - if let Some(token_expired_at) = state.token_expired_at.load().as_deref() { - // If the token will expire within the next refresh interval, - // refresh it immediately. - if now_timestamp.as_secs() + refresh_interval >= *token_expired_at { - if let Some(cached_bearer_auth) = - state.cached_bearer_auth.load().as_deref() - { - if let Ok(token) = - state.get_token(cached_bearer_auth.to_owned(), &conn) - { - let new_cached_auth = format!("Bearer {}", token.token); - debug!( - "[refresh_token_thread] registry token has been refreshed" - ); - // Refresh cached token. - state - .cached_auth - .set(&state.cached_auth.get(), new_cached_auth); - // Reset refresh interval according to real expiration time, - // and advance 20s to handle the unexpected cases. - refresh_interval = token - .expires_in - .checked_sub(20) - .unwrap_or(token.expires_in); - } else { - error!( - "[refresh_token_thread] failed to refresh registry token" - ); - } - } - } - } - } - - if conn.shutdown.load(Ordering::Acquire) { - break; - } - thread::sleep(Duration::from_secs(refresh_interval)); - if conn.shutdown.load(Ordering::Acquire) { - break; - } - } - }); - } -} - -impl BlobBackend for Registry { - fn shutdown(&self) { - self.connection.shutdown(); - } - - fn metrics(&self) -> &BackendMetrics { - &self.metrics - } - - fn get_reader(&self, blob_id: &str) -> BackendResult> { - Ok(Arc::new(RegistryReader { - blob_id: blob_id.to_owned(), - state: self.state.clone(), - connection: self.connection.clone(), - metrics: self.metrics.clone(), - first: self.first.clone(), - })) - } -} - -impl Drop for Registry { - fn drop(&mut self) { - self.metrics.release().unwrap_or_else(|e| error!("{:?}", e)); - } -} - -fn trim(value: Option) -> Option { - if let Some(val) = value.as_ref() { - let trimmed_val = val.trim(); - if trimmed_val.is_empty() { - None - } else if trimmed_val.len() == val.len() { - value - } else { - Some(trimmed_val.to_string()) - } - } else { - None - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_string_cache() { - let cache = Cache::new("test".to_owned()); - - assert_eq!(cache.get(), "test"); - - cache.set("test", "test1".to_owned()); - assert_eq!(cache.get(), "test1"); - cache.set("test1", "test1".to_owned()); - assert_eq!(cache.get(), "test1"); - } - - #[test] - fn test_hash_cache() { - let cache = HashCache::new(); - - assert_eq!(cache.get("test"), None); - cache.set("test".to_owned(), "test".to_owned()); - assert_eq!(cache.get("test"), Some("test".to_owned())); - cache.set("test".to_owned(), "test1".to_owned()); - assert_eq!(cache.get("test"), Some("test1".to_owned())); - cache.remove("test"); - assert_eq!(cache.get("test"), None); - } - - #[test] - fn test_state_url() { - let state = RegistryState { - scheme: Scheme::new(false), - host: "alibaba-inc.com".to_string(), - repo: "nydus".to_string(), - auth: None, - username: "test".to_string(), - password: "password".to_string(), - retry_limit: 5, - blob_url_scheme: "https".to_string(), - blob_redirected_host: "oss.alibaba-inc.com".to_string(), - cached_auth_using_http_get: Default::default(), - cached_auth: Default::default(), - cached_redirect: Default::default(), - token_expired_at: ArcSwapOption::new(None), - cached_bearer_auth: ArcSwapOption::new(None), - }; - - assert_eq!( - state.url("image", &["blabla"]).unwrap(), - "http://alibaba-inc.com/v2/nydusimage?blabla".to_owned() - ); - assert_eq!( - state.url("image", &[]).unwrap(), - "http://alibaba-inc.com/v2/nydusimage".to_owned() - ); - } - - #[test] - fn test_parse_auth() { - let str = "Bearer realm=\"https://auth.my-registry.com/token\",service=\"my-registry.com\",scope=\"repository:test/repo:pull,push\""; - let header = HeaderValue::from_str(str).unwrap(); - let auth = RegistryState::parse_auth(&header).unwrap(); - match auth { - Auth::Bearer(auth) => { - assert_eq!(&auth.realm, "https://auth.my-registry.com/token"); - assert_eq!(&auth.service, "my-registry.com"); - assert_eq!(&auth.scope, "repository:test/repo:pull,push"); - } - _ => panic!("failed to parse `Bearer` authentication header"), - } - - let str = "Basic realm=\"https://auth.my-registry.com/token\""; - let header = HeaderValue::from_str(str).unwrap(); - let auth = RegistryState::parse_auth(&header).unwrap(); - match auth { - Auth::Basic(auth) => assert_eq!(&auth.realm, "https://auth.my-registry.com/token"), - _ => panic!("failed to parse `Bearer` authentication header"), - } - - let str = "Base realm=\"https://auth.my-registry.com/token\""; - let header = HeaderValue::from_str(str).unwrap(); - assert!(RegistryState::parse_auth(&header).is_none()); - } - - #[test] - fn test_trim() { - assert_eq!(trim(None), None); - assert_eq!(trim(Some("".to_owned())), None); - assert_eq!(trim(Some(" ".to_owned())), None); - assert_eq!(trim(Some(" test ".to_owned())), Some("test".to_owned())); - assert_eq!(trim(Some("test ".to_owned())), Some("test".to_owned())); - assert_eq!(trim(Some(" test".to_owned())), Some("test".to_owned())); - assert_eq!(trim(Some(" te st ".to_owned())), Some("te st".to_owned())); - assert_eq!(trim(Some("te st".to_owned())), Some("te st".to_owned())); - } - - #[test] - #[allow(clippy::redundant_clone)] - fn test_first_basically() { - let first = First::new(); - let mut val = 0; - first.once(|| { - val += 1; - }); - assert_eq!(val, 1); - - first.clone().once(|| { - val += 1; - }); - assert_eq!(val, 1); - - first.renew(); - first.clone().once(|| { - val += 1; - }); - assert_eq!(val, 2); - } - - #[test] - #[allow(clippy::redundant_clone)] - fn test_first_concurrently() { - let val = Arc::new(ArcSwap::new(Arc::new(0))); - let first = First::new(); - - let mut handlers = Vec::new(); - for _ in 0..100 { - let val_cloned = val.clone(); - let first_cloned = first.clone(); - handlers.push(std::thread::spawn(move || { - let _ = first_cloned.handle(&mut || -> BackendResult<()> { - let val = val_cloned.load(); - let ret = if *val.as_ref() == 0 { - std::thread::sleep(std::time::Duration::from_secs(2)); - Err(BackendError::Registry(RegistryError::Common(String::from( - "network error", - )))) - } else { - Ok(()) - }; - val_cloned.store(Arc::new(val.as_ref() + 1)); - ret - }); - })); - } - - for handler in handlers { - handler.join().unwrap(); - } - - assert_eq!(*val.load().as_ref(), 2); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Storage backend driver to access blobs on container image registry. +use std::collections::HashMap; +use std::error::Error; +use std::io::{Read, Result}; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::{Arc, Once, RwLock}; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; +use std::{fmt, thread}; + +use arc_swap::{ArcSwap, ArcSwapOption}; +use base64::Engine; +use reqwest::blocking::Response; +pub use reqwest::header::HeaderMap; +use reqwest::header::{HeaderValue, CONTENT_LENGTH}; +use reqwest::{Method, StatusCode}; +use url::{ParseError, Url}; + +use nydus_api::RegistryConfig; +use nydus_utils::metrics::BackendMetrics; + +use crate::backend::connection::{ + is_success_status, respond, Connection, ConnectionConfig, ConnectionError, ReqBody, +}; +use crate::backend::{BackendError, BackendResult, BlobBackend, BlobReader}; + +const REGISTRY_CLIENT_ID: &str = "nydus-registry-client"; +const HEADER_AUTHORIZATION: &str = "Authorization"; +const HEADER_WWW_AUTHENTICATE: &str = "www-authenticate"; + +const REDIRECTED_STATUS_CODE: [StatusCode; 2] = [ + StatusCode::MOVED_PERMANENTLY, + StatusCode::TEMPORARY_REDIRECT, +]; + +const REGISTRY_DEFAULT_TOKEN_EXPIRATION: u64 = 10 * 60; // in seconds + +/// Error codes related to registry storage backend operations. +#[derive(Debug)] +pub enum RegistryError { + Common(String), + Url(String, ParseError), + Request(ConnectionError), + Scheme(String), + Transport(reqwest::Error), +} + +impl fmt::Display for RegistryError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + RegistryError::Common(s) => write!(f, "failed to access blob from registry, {}", s), + RegistryError::Url(u, e) => write!(f, "failed to parse URL {}, {}", u, e), + RegistryError::Request(e) => write!(f, "failed to issue request, {}", e), + RegistryError::Scheme(s) => write!(f, "invalid scheme, {}", s), + RegistryError::Transport(e) => write!(f, "network transport error, {}", e), + } + } +} + +impl From for BackendError { + fn from(error: RegistryError) -> Self { + BackendError::Registry(error) + } +} + +type RegistryResult = std::result::Result; + +#[derive(Default)] +struct Cache(RwLock); + +impl Cache { + fn new(val: String) -> Self { + Cache(RwLock::new(val)) + } + + fn get(&self) -> String { + let cached_guard = self.0.read().unwrap(); + if !cached_guard.is_empty() { + return cached_guard.clone(); + } + String::new() + } + + fn set(&self, last: &str, current: String) { + if last != current { + let mut cached_guard = self.0.write().unwrap(); + *cached_guard = current; + } + } +} + +#[derive(Default)] +struct HashCache(RwLock>); + +impl HashCache { + fn new() -> Self { + HashCache(RwLock::new(HashMap::new())) + } + + fn get(&self, key: &str) -> Option + where + T: Clone, + { + let cached_guard = self.0.read().unwrap(); + cached_guard.get(key).cloned() + } + + fn set(&self, key: String, value: T) { + let mut cached_guard = self.0.write().unwrap(); + cached_guard.insert(key, value); + } + + fn remove(&self, key: &str) { + let mut cached_guard = self.0.write().unwrap(); + cached_guard.remove(key); + } +} + +#[derive(Clone, serde::Deserialize)] +struct TokenResponse { + /// Registry token string. + token: String, + /// Registry token period of validity, in seconds. + #[serde(default = "default_expires_in")] + expires_in: u64, +} + +fn default_expires_in() -> u64 { + REGISTRY_DEFAULT_TOKEN_EXPIRATION +} + +#[derive(Debug)] +struct BasicAuth { + #[allow(unused)] + realm: String, +} + +#[derive(Debug, Clone)] +#[allow(dead_code)] +struct BearerAuth { + realm: String, + service: String, + scope: String, +} + +#[derive(Debug)] +enum Auth { + Basic(BasicAuth), + Bearer(BearerAuth), +} + +pub struct Scheme(AtomicBool); + +impl Scheme { + fn new(value: bool) -> Self { + Scheme(AtomicBool::new(value)) + } +} + +impl fmt::Display for Scheme { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if self.0.load(Ordering::Relaxed) { + write!(f, "https") + } else { + write!(f, "http") + } + } +} + +struct RegistryState { + // HTTP scheme like: https, http + scheme: Scheme, + host: String, + // Image repo name like: library/ubuntu + repo: String, + // Base64 encoded registry auth + auth: Option, + username: String, + password: String, + // Retry limit for read operation + retry_limit: u8, + // Scheme specified for blob server + blob_url_scheme: String, + // Replace registry redirected url host with the given host + blob_redirected_host: String, + // Cache bearer token (get from registry authentication server) or basic authentication auth string. + // We need use it to reduce the pressure on token authentication server or reduce the base64 compute workload for every request. + // Use RwLock here to avoid using mut backend trait object. + // Example: RwLock<"Bearer "> + // RwLock<"Basic base64()"> + cached_auth: Cache, + // Cache for the HTTP method when getting auth, it is "true" when using "GET" method. + // Due to the different implementations of various image registries, auth requests + // may use the GET or POST methods, we need to cache the method after the + // fallback, so it can be reused next time and reduce an unnecessary request. + cached_auth_using_http_get: HashCache, + // Cache 30X redirect url + // Example: RwLock", "">> + cached_redirect: HashCache, + // The epoch timestamp of token expiration, which is obtained from the registry server. + token_expired_at: ArcSwapOption, + // Cache bearer auth for refreshing token. + cached_bearer_auth: ArcSwapOption, +} + +impl RegistryState { + fn url(&self, path: &str, query: &[&str]) -> std::result::Result { + let path = if query.is_empty() { + format!("/v2/{}{}", self.repo, path) + } else { + format!("/v2/{}{}?{}", self.repo, path, query.join("&")) + }; + let url = format!("{}://{}", self.scheme, self.host.as_str()); + let url = Url::parse(url.as_str())?; + let url = url.join(path.as_str())?; + + Ok(url.to_string()) + } + + fn needs_fallback_http(&self, e: &dyn Error) -> bool { + match e.source() { + Some(err) => match err.source() { + Some(err) => { + if !self.scheme.0.load(Ordering::Relaxed) { + return false; + } + let msg = err.to_string().to_lowercase(); + // If we attempt to establish a TLS connection with the HTTP registry server, + // we are likely to encounter these types of error: + // https://github.com/openssl/openssl/blob/6b3d28757620e0781bb1556032bb6961ee39af63/crypto/err/openssl.txt#L1574 + // https://github.com/containerd/nerdctl/blob/225a70bdc3b93cdb00efac7db1ceb50c098a8a16/pkg/cmd/image/push.go#LL135C66-L135C66 + let fallback = + msg.contains("wrong version number") || msg.contains("connection refused"); + if fallback { + warn!("fallback to http due to tls connection error: {}", err); + } + fallback + } + None => false, + }, + None => false, + } + } + + // Request registry authentication server to get bearer token + fn get_token(&self, auth: BearerAuth, connection: &Arc) -> Result { + let http_get = self + .cached_auth_using_http_get + .get(&self.host) + .unwrap_or_default(); + let resp = if http_get { + self.get_token_with_get(&auth, connection)? + } else { + match self.get_token_with_post(&auth, connection) { + Ok(resp) => resp, + Err(_) => { + warn!("retry http GET method to get auth token"); + let resp = self.get_token_with_get(&auth, connection)?; + // Cache http method for next use. + self.cached_auth_using_http_get.set(self.host.clone(), true); + resp + } + } + }; + + let ret: TokenResponse = resp.json().map_err(|e| { + einval!(format!( + "registry auth server response decode failed: {:?}", + e + )) + })?; + + if let Ok(now_timestamp) = SystemTime::now().duration_since(UNIX_EPOCH) { + self.token_expired_at + .store(Some(Arc::new(now_timestamp.as_secs() + ret.expires_in))); + debug!( + "cached bearer auth, next time: {}", + now_timestamp.as_secs() + ret.expires_in + ); + } + + // Cache bearer auth for refreshing token. + self.cached_bearer_auth.store(Some(Arc::new(auth))); + + Ok(ret) + } + + // Get bearer token using a POST request + fn get_token_with_post( + &self, + auth: &BearerAuth, + connection: &Arc, + ) -> Result { + let mut form = HashMap::new(); + form.insert("service".to_string(), auth.service.clone()); + form.insert("scope".to_string(), auth.scope.clone()); + form.insert("grant_type".to_string(), "password".to_string()); + form.insert("username".to_string(), self.username.clone()); + form.insert("password".to_string(), self.password.clone()); + form.insert("client_id".to_string(), REGISTRY_CLIENT_ID.to_string()); + + let token_resp = connection + .call::<&[u8]>( + Method::POST, + auth.realm.as_str(), + None, + Some(ReqBody::Form(form)), + &mut HeaderMap::new(), + true, + ) + .map_err(|e| { + warn!( + "failed to request registry auth server by POST method: {:?}", + e + ); + einval!() + })?; + + Ok(token_resp) + } + + // Get bearer token using a GET request + fn get_token_with_get( + &self, + auth: &BearerAuth, + connection: &Arc, + ) -> Result { + let query = [ + ("service", auth.service.as_str()), + ("scope", auth.scope.as_str()), + ("grant_type", "password"), + ("username", self.username.as_str()), + ("password", self.password.as_str()), + ("client_id", REGISTRY_CLIENT_ID), + ]; + + let mut headers = HeaderMap::new(); + + // Insert the basic auth header to ensure the compatibility (e.g. Harbor registry) + // of fetching token by HTTP GET method. + // This refers containerd implementation: https://github.com/containerd/containerd/blob/dc7dba9c20f7210c38e8255487fc0ee12692149d/remotes/docker/auth/fetch.go#L187 + if let Some(auth) = &self.auth { + headers.insert( + HEADER_AUTHORIZATION, + format!("Basic {}", auth).parse().unwrap(), + ); + } + + let token_resp = connection + .call::<&[u8]>( + Method::GET, + auth.realm.as_str(), + Some(&query), + None, + &mut headers, + true, + ) + .map_err(|e| { + warn!( + "failed to request registry auth server by GET method: {:?}", + e + ); + einval!() + })?; + + Ok(token_resp) + } + + fn get_auth_header(&self, auth: Auth, connection: &Arc) -> Result { + match auth { + Auth::Basic(_) => self + .auth + .as_ref() + .map(|auth| format!("Basic {}", auth)) + .ok_or_else(|| einval!("invalid auth config")), + Auth::Bearer(auth) => { + let token = self.get_token(auth, connection)?; + Ok(format!("Bearer {}", token.token)) + } + } + } + + /// Parse `www-authenticate` response header respond from registry server + /// The header format like: `Bearer realm="https://auth.my-registry.com/token",service="my-registry.com",scope="repository:test/repo:pull,push"` + fn parse_auth(source: &HeaderValue) -> Option { + let source = source.to_str().unwrap(); + let source: Vec<&str> = source.splitn(2, ' ').collect(); + if source.len() < 2 { + return None; + } + let scheme = source[0].trim(); + let pairs = source[1].trim(); + let pairs = pairs.split("\","); + let mut paras = HashMap::new(); + for pair in pairs { + let pair: Vec<&str> = pair.trim().split('=').collect(); + if pair.len() < 2 { + return None; + } + let key = pair[0].trim(); + let value = pair[1].trim().trim_matches('"'); + paras.insert(key, value); + } + + match scheme { + "Basic" => { + let realm = if let Some(realm) = paras.get("realm") { + (*realm).to_string() + } else { + String::new() + }; + Some(Auth::Basic(BasicAuth { realm })) + } + "Bearer" => { + if paras.get("realm").is_none() + || paras.get("service").is_none() + || paras.get("scope").is_none() + { + return None; + } + + Some(Auth::Bearer(BearerAuth { + realm: (*paras.get("realm").unwrap()).to_string(), + service: (*paras.get("service").unwrap()).to_string(), + scope: (*paras.get("scope").unwrap()).to_string(), + })) + } + _ => None, + } + } + + fn fallback_http(&self) { + self.scheme.0.store(false, Ordering::Relaxed); + } +} + +#[derive(Clone)] +struct First { + inner: Arc>, +} + +impl First { + fn new() -> Self { + First { + inner: Arc::new(ArcSwap::new(Arc::new(Once::new()))), + } + } + + fn once(&self, f: F) + where + F: FnOnce(), + { + self.inner.load().call_once(f) + } + + fn renew(&self) { + self.inner.store(Arc::new(Once::new())); + } + + fn handle(&self, handle: &mut F) -> Option> + where + F: FnMut() -> BackendResult, + { + let mut ret = None; + // Call once twice to ensure the subsequent requests use the new + // Once instance after renew happens. + for _ in 0..=1 { + self.once(|| { + ret = Some(handle().map_err(|err| { + // Replace the Once instance so that we can retry it when + // the handle call failed. + self.renew(); + err + })); + }); + if ret.is_some() { + break; + } + } + ret + } + + /// When invoking concurrently, only one of the handle methods will be executed first, + /// then subsequent handle methods will be allowed to execute concurrently. + /// + /// Nydusd uses a registry backend which generates a surge of blob requests without + /// auth tokens on initial startup, this caused mirror backends (e.g. dragonfly) + /// to process very slowly. The method implements waiting for the first blob request + /// to complete before making other blob requests, this ensures the first request + /// caches a valid registry auth token, and subsequent concurrent blob requests can + /// reuse the cached token. + fn handle_force(&self, handle: &mut F) -> BackendResult + where + F: FnMut() -> BackendResult, + { + self.handle(handle).unwrap_or_else(handle) + } +} + +struct RegistryReader { + blob_id: String, + connection: Arc, + state: Arc, + metrics: Arc, + first: First, +} + +impl RegistryReader { + /// Request registry server with `authorization` header + /// + /// Bearer token authenticate workflow: + /// + /// Request: POST https://my-registry.com/test/repo/blobs/uploads + /// Response: status: 401 Unauthorized + /// header: www-authenticate: Bearer realm="https://auth.my-registry.com/token",service="my-registry.com",scope="repository:test/repo:pull,push" + /// + /// Request: POST https://auth.my-registry.com/token + /// body: "service=my-registry.com&scope=repository:test/repo:pull,push&grant_type=password&username=x&password=x&client_id=nydus-registry-client" + /// Response: status: 200 Ok + /// body: { "token": "" } + /// + /// Request: POST https://my-registry.com/test/repo/blobs/uploads + /// header: authorization: Bearer + /// Response: status: 200 Ok + /// + /// Basic authenticate workflow: + /// + /// Request: POST https://my-registry.com/test/repo/blobs/uploads + /// Response: status: 401 Unauthorized + /// header: www-authenticate: Basic + /// + /// Request: POST https://my-registry.com/test/repo/blobs/uploads + /// header: authorization: Basic base64() + /// Response: status: 200 Ok + fn request( + &self, + method: Method, + url: &str, + data: Option>, + mut headers: HeaderMap, + catch_status: bool, + ) -> RegistryResult { + // Try get authorization header from cache for this request + let mut last_cached_auth = String::new(); + let cached_auth = self.state.cached_auth.get(); + if !cached_auth.is_empty() { + last_cached_auth = cached_auth.clone(); + headers.insert( + HEADER_AUTHORIZATION, + HeaderValue::from_str(cached_auth.as_str()).unwrap(), + ); + } + + // For upload request with payload, the auth header should be cached + // after create_upload(), so we can request registry server directly + if let Some(data) = data { + return self + .connection + .call(method, url, None, Some(data), &mut headers, catch_status) + .map_err(RegistryError::Request); + } + + // Try to request registry server with `authorization` header + let mut resp = self + .connection + .call::<&[u8]>(method.clone(), url, None, None, &mut headers, false) + .map_err(RegistryError::Request)?; + if resp.status() == StatusCode::UNAUTHORIZED { + if headers.contains_key(HEADER_AUTHORIZATION) { + // If we request registry (harbor server) with expired authorization token, + // the `www-authenticate: Basic realm="harbor"` in response headers is not expected. + // Related code in harbor: + // https://github.com/goharbor/harbor/blob/v2.5.3/src/server/middleware/v2auth/auth.go#L98 + // + // We can remove the expired authorization token and + // resend the request to get the correct "www-authenticate" value. + headers.remove(HEADER_AUTHORIZATION); + + resp = self + .connection + .call::<&[u8]>(method.clone(), url, None, None, &mut headers, false) + .map_err(RegistryError::Request)?; + }; + + if let Some(resp_auth_header) = resp.headers().get(HEADER_WWW_AUTHENTICATE) { + // Get token from registry authorization server + if let Some(auth) = RegistryState::parse_auth(resp_auth_header) { + let auth_header = self + .state + .get_auth_header(auth, &self.connection) + .map_err(|e| RegistryError::Common(e.to_string()))?; + + headers.insert( + HEADER_AUTHORIZATION, + HeaderValue::from_str(auth_header.as_str()).unwrap(), + ); + + // Try to request registry server with `authorization` header again + let resp = self + .connection + .call(method, url, None, data, &mut headers, catch_status) + .map_err(RegistryError::Request)?; + + let status = resp.status(); + if is_success_status(status) { + // Cache authorization header for next request + self.state.cached_auth.set(&last_cached_auth, auth_header) + } + return respond(resp, catch_status).map_err(RegistryError::Request); + } + } + } + + respond(resp, catch_status).map_err(RegistryError::Request) + } + + /// Read data from registry server + /// + /// Step: + /// + /// Request: GET /blobs/sha256: + /// Response: status: 307 Temporary Redirect + /// header: location: https://raw-blob-storage-host.com/signature=x + /// + /// Request: GET https://raw-blob-storage-host.com/signature=x + /// Response: status: 200 Ok / 403 Forbidden + /// If responding 403, we need to repeat step one + fn _try_read( + &self, + mut buf: &mut [u8], + offset: u64, + allow_retry: bool, + ) -> RegistryResult { + let url = format!("/blobs/sha256:{}", self.blob_id); + let url = self + .state + .url(url.as_str(), &[]) + .map_err(|e| RegistryError::Url(url, e))?; + let mut headers = HeaderMap::new(); + let end_at = offset + buf.len() as u64 - 1; + let range = format!("bytes={}-{}", offset, end_at); + headers.insert("Range", range.parse().unwrap()); + + let mut resp; + let cached_redirect = self.state.cached_redirect.get(&self.blob_id); + + if let Some(cached_redirect) = cached_redirect { + resp = self + .connection + .call::<&[u8]>( + Method::GET, + cached_redirect.as_str(), + None, + None, + &mut headers, + false, + ) + .map_err(RegistryError::Request)?; + + // The request has expired or has been denied, need to re-request + if allow_retry + && [StatusCode::UNAUTHORIZED, StatusCode::FORBIDDEN].contains(&resp.status()) + { + warn!( + "The redirected link has expired: {}, will retry read", + cached_redirect.as_str() + ); + self.state.cached_redirect.remove(&self.blob_id); + // Try read again only once + return self._try_read(buf, offset, false); + } + } else { + resp = match self.request::<&[u8]>( + Method::GET, + url.as_str(), + None, + headers.clone(), + false, + ) { + Ok(res) => res, + Err(RegistryError::Request(ConnectionError::Common(e))) + if self.state.needs_fallback_http(&e) => + { + self.state.fallback_http(); + let url = format!("/blobs/sha256:{}", self.blob_id); + let url = self + .state + .url(url.as_str(), &[]) + .map_err(|e| RegistryError::Url(url, e))?; + self.request::<&[u8]>(Method::GET, url.as_str(), None, headers.clone(), false)? + } + Err(RegistryError::Request(ConnectionError::Common(e))) => { + if e.to_string().contains("self signed certificate") { + warn!("try to enable \"skip_verify: true\" option"); + } + return Err(RegistryError::Request(ConnectionError::Common(e))); + } + Err(e) => { + return Err(e); + } + }; + let status = resp.status(); + + // Handle redirect request and cache redirect url + if REDIRECTED_STATUS_CODE.contains(&status) { + if let Some(location) = resp.headers().get("location") { + let location = location.to_str().unwrap(); + let mut location = Url::parse(location) + .map_err(|e| RegistryError::Url(location.to_string(), e))?; + // Note: Some P2P proxy server supports only scheme specified origin blob server, + // so we need change scheme to `blob_url_scheme` here + if !self.state.blob_url_scheme.is_empty() { + location + .set_scheme(&self.state.blob_url_scheme) + .map_err(|_| { + RegistryError::Scheme(self.state.blob_url_scheme.clone()) + })?; + } + if !self.state.blob_redirected_host.is_empty() { + location + .set_host(Some(self.state.blob_redirected_host.as_str())) + .map_err(|e| { + error!( + "Failed to set blob redirected host to {}: {:?}", + self.state.blob_redirected_host.as_str(), + e + ); + RegistryError::Url(location.to_string(), e) + })?; + debug!("New redirected location {:?}", location.host_str()); + } + let resp_ret = self + .connection + .call::<&[u8]>( + Method::GET, + location.as_str(), + None, + None, + &mut headers, + true, + ) + .map_err(RegistryError::Request); + match resp_ret { + Ok(_resp) => { + resp = _resp; + self.state + .cached_redirect + .set(self.blob_id.clone(), location.as_str().to_string()) + } + Err(err) => { + return Err(err); + } + } + }; + } else { + resp = respond(resp, true).map_err(RegistryError::Request)?; + } + } + + resp.copy_to(&mut buf) + .map_err(RegistryError::Transport) + .map(|size| size as usize) + } +} + +impl BlobReader for RegistryReader { + fn blob_size(&self) -> BackendResult { + self.first.handle_force(&mut || -> BackendResult { + let url = format!("/blobs/sha256:{}", self.blob_id); + let url = self + .state + .url(&url, &[]) + .map_err(|e| RegistryError::Url(url, e))?; + + let resp = match self.request::<&[u8]>( + Method::HEAD, + url.as_str(), + None, + HeaderMap::new(), + true, + ) { + Ok(res) => res, + Err(RegistryError::Request(ConnectionError::Common(e))) + if self.state.needs_fallback_http(&e) => + { + self.state.fallback_http(); + let url = format!("/blobs/sha256:{}", self.blob_id); + let url = self + .state + .url(&url, &[]) + .map_err(|e| RegistryError::Url(url, e))?; + self.request::<&[u8]>(Method::HEAD, url.as_str(), None, HeaderMap::new(), true)? + } + Err(e) => { + return Err(BackendError::Registry(e)); + } + }; + let content_length = resp + .headers() + .get(CONTENT_LENGTH) + .ok_or_else(|| RegistryError::Common("invalid content length".to_string()))?; + + Ok(content_length + .to_str() + .map_err(|err| RegistryError::Common(format!("invalid content length: {:?}", err)))? + .parse::() + .map_err(|err| { + RegistryError::Common(format!("invalid content length: {:?}", err)) + })?) + }) + } + + fn try_read(&self, buf: &mut [u8], offset: u64) -> BackendResult { + self.first.handle_force(&mut || -> BackendResult { + self._try_read(buf, offset, true) + .map_err(BackendError::Registry) + }) + } + + fn metrics(&self) -> &BackendMetrics { + &self.metrics + } + + fn retry_limit(&self) -> u8 { + self.state.retry_limit + } +} + +/// Storage backend based on image registry. +pub struct Registry { + connection: Arc, + state: Arc, + metrics: Arc, + first: First, +} + +impl Registry { + #[allow(clippy::useless_let_if_seq)] + pub fn new(config: &RegistryConfig, id: Option<&str>) -> Result { + let id = id.ok_or_else(|| einval!("Registry backend requires blob_id"))?; + let con_config: ConnectionConfig = config.clone().into(); + + if !config.proxy.url.is_empty() && !config.mirrors.is_empty() { + return Err(einval!( + "connection: proxy and mirrors cannot be configured at the same time." + )); + } + + let retry_limit = con_config.retry_limit; + let connection = Connection::new(&con_config)?; + let auth = trim(config.auth.clone()); + let registry_token = trim(config.registry_token.clone()); + let (username, password) = Self::get_authorization_info(&auth)?; + let cached_auth = if let Some(registry_token) = registry_token { + // Store the registry bearer token to cached_auth, prefer to + // use the token stored in cached_auth to request registry. + Cache::new(format!("Bearer {}", registry_token)) + } else { + Cache::new(String::new()) + }; + + let scheme = if !config.scheme.is_empty() && config.scheme == "http" { + Scheme::new(false) + } else { + Scheme::new(true) + }; + + let state = Arc::new(RegistryState { + scheme, + host: config.host.clone(), + repo: config.repo.clone(), + auth, + cached_auth, + username, + password, + retry_limit, + blob_url_scheme: config.blob_url_scheme.clone(), + blob_redirected_host: config.blob_redirected_host.clone(), + cached_auth_using_http_get: HashCache::new(), + cached_redirect: HashCache::new(), + token_expired_at: ArcSwapOption::new(None), + cached_bearer_auth: ArcSwapOption::new(None), + }); + + let registry = Registry { + connection, + state, + metrics: BackendMetrics::new(id, "registry"), + first: First::new(), + }; + + registry.start_refresh_token_thread(); + info!("Refresh token thread started."); + + Ok(registry) + } + + fn get_authorization_info(auth: &Option) -> Result<(String, String)> { + if let Some(auth) = &auth { + let auth: Vec = base64::engine::general_purpose::STANDARD + .decode(auth.as_bytes()) + .map_err(|e| { + einval!(format!( + "Invalid base64 encoded registry auth config: {:?}", + e + )) + })?; + let auth = std::str::from_utf8(&auth).map_err(|e| { + einval!(format!( + "Invalid utf-8 encoded registry auth config: {:?}", + e + )) + })?; + let auth: Vec<&str> = auth.splitn(2, ':').collect(); + if auth.len() < 2 { + return Err(einval!("Invalid registry auth config")); + } + + Ok((auth[0].to_string(), auth[1].to_string())) + } else { + Ok((String::new(), String::new())) + } + } + + fn start_refresh_token_thread(&self) { + let conn = self.connection.clone(); + let state = self.state.clone(); + // FIXME: we'd better allow users to specify the expiration time. + let mut refresh_interval = REGISTRY_DEFAULT_TOKEN_EXPIRATION; + thread::spawn(move || { + loop { + if let Ok(now_timestamp) = SystemTime::now().duration_since(UNIX_EPOCH) { + if let Some(token_expired_at) = state.token_expired_at.load().as_deref() { + // If the token will expire within the next refresh interval, + // refresh it immediately. + if now_timestamp.as_secs() + refresh_interval >= *token_expired_at { + if let Some(cached_bearer_auth) = + state.cached_bearer_auth.load().as_deref() + { + if let Ok(token) = + state.get_token(cached_bearer_auth.to_owned(), &conn) + { + let new_cached_auth = format!("Bearer {}", token.token); + debug!( + "[refresh_token_thread] registry token has been refreshed" + ); + // Refresh cached token. + state + .cached_auth + .set(&state.cached_auth.get(), new_cached_auth); + // Reset refresh interval according to real expiration time, + // and advance 20s to handle the unexpected cases. + refresh_interval = token + .expires_in + .checked_sub(20) + .unwrap_or(token.expires_in); + } else { + error!( + "[refresh_token_thread] failed to refresh registry token" + ); + } + } + } + } + } + + if conn.shutdown.load(Ordering::Acquire) { + break; + } + thread::sleep(Duration::from_secs(refresh_interval)); + if conn.shutdown.load(Ordering::Acquire) { + break; + } + } + }); + } +} + +impl BlobBackend for Registry { + fn shutdown(&self) { + self.connection.shutdown(); + } + + fn metrics(&self) -> &BackendMetrics { + &self.metrics + } + + fn get_reader(&self, blob_id: &str) -> BackendResult> { + Ok(Arc::new(RegistryReader { + blob_id: blob_id.to_owned(), + state: self.state.clone(), + connection: self.connection.clone(), + metrics: self.metrics.clone(), + first: self.first.clone(), + })) + } +} + +impl Drop for Registry { + fn drop(&mut self) { + self.metrics.release().unwrap_or_else(|e| error!("{:?}", e)); + } +} + +fn trim(value: Option) -> Option { + if let Some(val) = value.as_ref() { + let trimmed_val = val.trim(); + if trimmed_val.is_empty() { + None + } else if trimmed_val.len() == val.len() { + value + } else { + Some(trimmed_val.to_string()) + } + } else { + None + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_string_cache() { + let cache = Cache::new("test".to_owned()); + + assert_eq!(cache.get(), "test"); + + cache.set("test", "test1".to_owned()); + assert_eq!(cache.get(), "test1"); + cache.set("test1", "test1".to_owned()); + assert_eq!(cache.get(), "test1"); + } + + #[test] + fn test_hash_cache() { + let cache = HashCache::new(); + + assert_eq!(cache.get("test"), None); + cache.set("test".to_owned(), "test".to_owned()); + assert_eq!(cache.get("test"), Some("test".to_owned())); + cache.set("test".to_owned(), "test1".to_owned()); + assert_eq!(cache.get("test"), Some("test1".to_owned())); + cache.remove("test"); + assert_eq!(cache.get("test"), None); + } + + #[test] + fn test_state_url() { + let state = RegistryState { + scheme: Scheme::new(false), + host: "alibaba-inc.com".to_string(), + repo: "nydus".to_string(), + auth: None, + username: "test".to_string(), + password: "password".to_string(), + retry_limit: 5, + blob_url_scheme: "https".to_string(), + blob_redirected_host: "oss.alibaba-inc.com".to_string(), + cached_auth_using_http_get: Default::default(), + cached_auth: Default::default(), + cached_redirect: Default::default(), + token_expired_at: ArcSwapOption::new(None), + cached_bearer_auth: ArcSwapOption::new(None), + }; + + assert_eq!( + state.url("image", &["blabla"]).unwrap(), + "http://alibaba-inc.com/v2/nydusimage?blabla".to_owned() + ); + assert_eq!( + state.url("image", &[]).unwrap(), + "http://alibaba-inc.com/v2/nydusimage".to_owned() + ); + } + + #[test] + fn test_parse_auth() { + let str = "Bearer realm=\"https://auth.my-registry.com/token\",service=\"my-registry.com\",scope=\"repository:test/repo:pull,push\""; + let header = HeaderValue::from_str(str).unwrap(); + let auth = RegistryState::parse_auth(&header).unwrap(); + match auth { + Auth::Bearer(auth) => { + assert_eq!(&auth.realm, "https://auth.my-registry.com/token"); + assert_eq!(&auth.service, "my-registry.com"); + assert_eq!(&auth.scope, "repository:test/repo:pull,push"); + } + _ => panic!("failed to parse `Bearer` authentication header"), + } + + let str = "Basic realm=\"https://auth.my-registry.com/token\""; + let header = HeaderValue::from_str(str).unwrap(); + let auth = RegistryState::parse_auth(&header).unwrap(); + match auth { + Auth::Basic(auth) => assert_eq!(&auth.realm, "https://auth.my-registry.com/token"), + _ => panic!("failed to parse `Bearer` authentication header"), + } + + let str = "Base realm=\"https://auth.my-registry.com/token\""; + let header = HeaderValue::from_str(str).unwrap(); + assert!(RegistryState::parse_auth(&header).is_none()); + } + + #[test] + fn test_trim() { + assert_eq!(trim(None), None); + assert_eq!(trim(Some("".to_owned())), None); + assert_eq!(trim(Some(" ".to_owned())), None); + assert_eq!(trim(Some(" test ".to_owned())), Some("test".to_owned())); + assert_eq!(trim(Some("test ".to_owned())), Some("test".to_owned())); + assert_eq!(trim(Some(" test".to_owned())), Some("test".to_owned())); + assert_eq!(trim(Some(" te st ".to_owned())), Some("te st".to_owned())); + assert_eq!(trim(Some("te st".to_owned())), Some("te st".to_owned())); + } + + #[test] + #[allow(clippy::redundant_clone)] + fn test_first_basically() { + let first = First::new(); + let mut val = 0; + first.once(|| { + val += 1; + }); + assert_eq!(val, 1); + + first.clone().once(|| { + val += 1; + }); + assert_eq!(val, 1); + + first.renew(); + first.clone().once(|| { + val += 1; + }); + assert_eq!(val, 2); + } + + #[test] + #[allow(clippy::redundant_clone)] + fn test_first_concurrently() { + let val = Arc::new(ArcSwap::new(Arc::new(0))); + let first = First::new(); + + let mut handlers = Vec::new(); + for _ in 0..100 { + let val_cloned = val.clone(); + let first_cloned = first.clone(); + handlers.push(std::thread::spawn(move || { + let _ = first_cloned.handle(&mut || -> BackendResult<()> { + let val = val_cloned.load(); + let ret = if *val.as_ref() == 0 { + std::thread::sleep(std::time::Duration::from_secs(2)); + Err(BackendError::Registry(RegistryError::Common(String::from( + "network error", + )))) + } else { + Ok(()) + }; + val_cloned.store(Arc::new(val.as_ref() + 1)); + ret + }); + })); + } + + for handler in handlers { + handler.join().unwrap(); + } + + assert_eq!(*val.load().as_ref(), 2); + } +} diff --git a/storage/src/backend/s3.rs b/storage/src/backend/s3.rs index 3e4c72d257e..766008f608d 100644 --- a/storage/src/backend/s3.rs +++ b/storage/src/backend/s3.rs @@ -1,337 +1,337 @@ -// Copyright 2022 Ant Group. All rights reserved. -// Copyright (C) 2022 Alibaba Cloud. All rights reserved. - -// SPDX-License-Identifier: Apache-2.0 - -// ! Storage backend driver to access blobs on s3. - -use std::collections::BTreeMap; -use std::fmt::Debug; -use std::io::Result; -use std::sync::Arc; - -use hmac::{Hmac, Mac}; -use http::Uri; -use nydus_api::S3Config; -use nydus_utils::metrics::BackendMetrics; -use reqwest::header::HeaderMap; -use reqwest::Method; -use sha2::{Digest, Sha256}; -use time::{format_description, OffsetDateTime}; - -use crate::backend::connection::{Connection, ConnectionConfig}; -use crate::backend::object_storage::{ObjectStorage, ObjectStorageState}; - -const EMPTY_SHA256: &str = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"; -const HEADER_HOST: &str = "Host"; -const HEADER_AWZ_DATE: &str = "x-amz-date"; -const HEADER_AWZ_CONTENT_SHA256: &str = "x-amz-content-sha256"; -const S3_DEFAULT_ENDPOINT: &str = "s3.amazonaws.com"; - -#[derive(Debug)] -pub struct S3State { - region: String, - access_key_id: String, - access_key_secret: String, - scheme: String, - object_prefix: String, - endpoint: String, - bucket_name: String, - retry_limit: u8, -} - -/// Storage backend to access data stored in S3. -pub type S3 = ObjectStorage; - -impl S3 { - /// Create a new S3 storage backend. - pub fn new(s3_config: &S3Config, id: Option<&str>) -> Result { - let con_config: ConnectionConfig = s3_config.clone().into(); - let retry_limit = con_config.retry_limit; - let connection = Connection::new(&con_config)?; - let final_endpoint = if s3_config.endpoint.is_empty() { - S3_DEFAULT_ENDPOINT.to_string() - } else { - s3_config.endpoint.clone() - }; - - let state = Arc::new(S3State { - region: s3_config.region.clone(), - scheme: s3_config.scheme.clone(), - object_prefix: s3_config.object_prefix.clone(), - endpoint: final_endpoint, - access_key_id: s3_config.access_key_id.clone(), - access_key_secret: s3_config.access_key_secret.clone(), - bucket_name: s3_config.bucket_name.clone(), - retry_limit, - }); - let metrics = id.map(|i| BackendMetrics::new(i, "oss")); - - Ok(ObjectStorage::new_object_storage( - connection, - state, - metrics, - id.map(|i| i.to_string()), - )) - } -} - -impl S3State { - // modified based on https://github.com/minio/minio-rs/blob/5fea81d68d381fd2a4c27e4d259f7012de08ab77/src/s3/utils.rs#L155-L200 - // under apache 2.0 license - fn get_canonical_headers(&self, map: &HeaderMap) -> (String, String) { - let mut btmap: BTreeMap = BTreeMap::new(); - - for (k, values) in map.iter() { - let key = k.as_str().to_lowercase(); - if "authorization" == key || "user-agent" == key { - continue; - } - btmap.insert(key.clone(), values.to_str().unwrap().to_string()); - } - - let mut signed_headers = String::new(); - let mut canonical_headers = String::new(); - let mut add_delim = false; - for (key, value) in &btmap { - if add_delim { - signed_headers.push(';'); - canonical_headers.push('\n'); - } - - signed_headers.push_str(key); - - canonical_headers.push_str(key); - canonical_headers.push(':'); - canonical_headers.push_str(value); - - add_delim = true; - } - - (signed_headers, canonical_headers) - } - - // modified based on https://github.com/minio/minio-rs/blob/5fea81d68d381fd2a4c27e4d259f7012de08ab77/src/s3/signer.rs#L44-L64 - // under apache 2.0 license - fn get_canonical_request_hash( - &self, - method: &Method, - uri: &str, - query_string: &str, - headers: &str, - signed_headers: &str, - content_sha256: &str, - ) -> String { - let canonical_request = format!( - "{}\n{}\n{}\n{}\n\n{}\n{}", - method, uri, query_string, headers, signed_headers, content_sha256 - ); - return sha256_hash(canonical_request.as_bytes()); - } - - // modified based on https://github.com/minio/minio-rs/blob/5fea81d68d381fd2a4c27e4d259f7012de08ab77/src/s3/signer.rs#L75-88 - // under apache 2.0 license - pub fn get_signing_key(&self, date: &OffsetDateTime) -> Vec { - let mut key: Vec = b"AWS4".to_vec(); - key.extend(self.access_key_secret.as_bytes()); - - let date_key = hmac_hash(key.as_slice(), to_signer_date(date).as_bytes()); - let date_region_key = hmac_hash(date_key.as_slice(), self.region.as_bytes()); - let date_region_service_key = hmac_hash(date_region_key.as_slice(), "s3".as_bytes()); - return hmac_hash(date_region_service_key.as_slice(), b"aws4_request"); - } -} - -impl ObjectStorageState for S3State { - fn url(&self, obj_key: &str, query_str: &[&str]) -> (String, String) { - let query_str = if query_str.is_empty() { - "".to_string() - } else { - format!("?{}", query_str.join("&")) - }; - let resource = format!( - "/{}/{}{}{}", - self.bucket_name, self.object_prefix, obj_key, query_str - ); - let url = format!("{}://{}{}", self.scheme, self.endpoint, resource,); - (resource, url) - } - - // modified based on https://github.com/minio/minio-rs/blob/5fea81d68d381fd2a4c27e4d259f7012de08ab77/src/s3/signer.rs#L106-L135 - // under apache 2.0 license - /// generate s3 request signature - fn sign( - &self, - verb: Method, - headers: &mut HeaderMap, - _: &str, - full_resource_url: &str, - ) -> Result<()> { - let date = OffsetDateTime::now_utc(); - let content_sha256 = EMPTY_SHA256; - let parsed_uri = full_resource_url - .to_string() - .parse::() - .map_err(|e| einval!(e))?; - let uri_path = parsed_uri.path(); - let query = parsed_uri.query().unwrap_or(""); - let host = parsed_uri.host().unwrap_or(self.endpoint.as_str()); - - headers.insert(HEADER_HOST, host.parse().map_err(|e| einval!(e))?); - headers.insert( - HEADER_AWZ_DATE, - to_awz_date(&date).parse().map_err(|e| einval!(e))?, - ); - headers.insert( - HEADER_AWZ_CONTENT_SHA256, - EMPTY_SHA256.parse().map_err(|e| einval!(e))?, - ); - let scope = format!( - "{}/{}/{}/aws4_request", - to_signer_date(&date), - self.region, - "s3", - ); - let (signed_headers, canonical_headers) = self.get_canonical_headers(headers); - let canonical_request_hash = self.get_canonical_request_hash( - &verb, - uri_path, - query, - &canonical_headers, - &signed_headers, - content_sha256, - ); - let string_to_sign = format!( - "AWS4-HMAC-SHA256\n{}\n{}\n{}", - to_awz_date(&date), - scope, - canonical_request_hash - ); - let signing_key = self.get_signing_key(&date); - let signature = hmac_hash_hex(signing_key.as_slice(), string_to_sign.as_bytes()); - let authorization = format!( - "AWS4-HMAC-SHA256 Credential={}/{}, SignedHeaders={}, Signature={}", - self.access_key_id, scope, signed_headers, signature - ); - headers.insert( - "Authorization", - authorization.parse().map_err(|e| einval!(e))?, - ); - - Ok(()) - } - - fn retry_limit(&self) -> u8 { - self.retry_limit - } -} - -// modified based on https://github.com/minio/minio-rs/blob/5fea81d68d381fd2a4c27e4d259f7012de08ab77/src/s3/utils.rs#L52-L56 -// under apache 2.0 license -fn sha256_hash(data: &[u8]) -> String { - let mut hasher = Sha256::new(); - hasher.update(data); - format!("{:x}", hasher.finalize()) -} - -// modified based on https://github.com/minio/minio-rs/blob/5fea81d68d381fd2a4c27e4d259f7012de08ab77/src/s3/signer.rs#L25-L29 -// under apache 2.0 license -fn hmac_hash(key: &[u8], data: &[u8]) -> Vec { - let mut hasher = Hmac::::new_from_slice(key).expect("HMAC can take key of any size"); - hasher.update(data); - hasher.finalize().into_bytes().to_vec() -} - -// modified based on https://github.com/minio/minio-rs/blob/5fea81d68d381fd2a4c27e4d259f7012de08ab77/src/s3/signer.rs#L31-L33 -// under apache 2.0 license -fn hmac_hash_hex(key: &[u8], data: &[u8]) -> String { - hex::encode(hmac_hash(key, data)) -} - -// modified based on https://github.com/minio/minio-rs/blob/5fea81d68d381fd2a4c27e4d259f7012de08ab77/src/s3/utils.rs#L66-L68 -// under apache 2.0 license -fn to_signer_date(date: &OffsetDateTime) -> String { - let format = format_description::parse("[year][month][day]").unwrap(); - date.format(&format).unwrap() -} - -// modified based on https://github.com/minio/minio-rs/blob/5fea81d68d381fd2a4c27e4d259f7012de08ab77/src/s3/utils.rs#L70-L72 -// under apache 2.0 license -fn to_awz_date(date: &OffsetDateTime) -> String { - let format = format_description::parse("[year][month][day]T[hour][minute][second]Z").unwrap(); - date.format(&format).unwrap() -} - -#[cfg(test)] -mod tests { - use http::{HeaderMap, Method}; - use nydus_api::S3Config; - - use crate::backend::object_storage::ObjectStorageState; - use crate::backend::s3::S3State; - use crate::backend::BlobBackend; - - use super::S3; - - fn get_test_s3_state() -> (S3State, String, String) { - let state = S3State { - region: "us-east-1".to_string(), - access_key_id: "test-key".to_string(), - access_key_secret: "test-key-secret".to_string(), - scheme: "http".to_string(), - object_prefix: "test-prefix-".to_string(), - endpoint: "localhost:9000".to_string(), - bucket_name: "test-bucket".to_string(), - retry_limit: 6, - }; - let (resource, url) = state.url("test-object", &["a=b", "c=d"]); - (state, resource, url) - } - - #[test] - fn test_s3_new() { - let config_str = r#"{ - "endpoint": "https://test.com", - "region": "us-east-1", - "access_key_id": "test", - "access_key_secret": "test", - "bucket_name": "antsys-nydus", - "object_prefix":"nydus_v2/", - "retry_limit": 6 - }"#; - let config: S3Config = serde_json::from_str(config_str).unwrap(); - let s3 = S3::new(&config, Some("test-image")).unwrap(); - - s3.metrics(); - - let reader = s3.get_reader("test").unwrap(); - assert_eq!(reader.retry_limit(), 6); - - s3.shutdown(); - } - - #[test] - fn test_s3_state_url() { - let (_, resource, url) = get_test_s3_state(); - assert_eq!(resource, "/test-bucket/test-prefix-test-object?a=b&c=d"); - assert_eq!( - url, - "http://localhost:9000/test-bucket/test-prefix-test-object?a=b&c=d" - ); - } - - #[test] - fn test_s3_state_sign() { - let (state, resource, url) = get_test_s3_state(); - println!("{}", url); - let mut headers = HeaderMap::new(); - headers.append("Range", "bytes=5242900-".parse().unwrap()); - let result = state.sign(Method::GET, &mut headers, &resource, &url); - assert!(result.is_ok()); - - use regex::Regex; - let re = Regex::new(r"^AWS4-HMAC-SHA256 Credential=test-key/[0-9]{8}/us-east-1/s3/aws4_request, SignedHeaders=host;range;x-amz-content-sha256;x-amz-date, Signature=[A-Fa-f0-9]{64}$").unwrap(); - let authorization = headers.get("Authorization").unwrap(); - assert!(re.is_match(authorization.to_str().unwrap())); - } -} +// Copyright 2022 Ant Group. All rights reserved. +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. + +// SPDX-License-Identifier: Apache-2.0 + +// ! Storage backend driver to access blobs on s3. + +use std::collections::BTreeMap; +use std::fmt::Debug; +use std::io::Result; +use std::sync::Arc; + +use hmac::{Hmac, Mac}; +use http::Uri; +use nydus_api::S3Config; +use nydus_utils::metrics::BackendMetrics; +use reqwest::header::HeaderMap; +use reqwest::Method; +use sha2::{Digest, Sha256}; +use time::{format_description, OffsetDateTime}; + +use crate::backend::connection::{Connection, ConnectionConfig}; +use crate::backend::object_storage::{ObjectStorage, ObjectStorageState}; + +const EMPTY_SHA256: &str = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"; +const HEADER_HOST: &str = "Host"; +const HEADER_AWZ_DATE: &str = "x-amz-date"; +const HEADER_AWZ_CONTENT_SHA256: &str = "x-amz-content-sha256"; +const S3_DEFAULT_ENDPOINT: &str = "s3.amazonaws.com"; + +#[derive(Debug)] +pub struct S3State { + region: String, + access_key_id: String, + access_key_secret: String, + scheme: String, + object_prefix: String, + endpoint: String, + bucket_name: String, + retry_limit: u8, +} + +/// Storage backend to access data stored in S3. +pub type S3 = ObjectStorage; + +impl S3 { + /// Create a new S3 storage backend. + pub fn new(s3_config: &S3Config, id: Option<&str>) -> Result { + let con_config: ConnectionConfig = s3_config.clone().into(); + let retry_limit = con_config.retry_limit; + let connection = Connection::new(&con_config)?; + let final_endpoint = if s3_config.endpoint.is_empty() { + S3_DEFAULT_ENDPOINT.to_string() + } else { + s3_config.endpoint.clone() + }; + + let state = Arc::new(S3State { + region: s3_config.region.clone(), + scheme: s3_config.scheme.clone(), + object_prefix: s3_config.object_prefix.clone(), + endpoint: final_endpoint, + access_key_id: s3_config.access_key_id.clone(), + access_key_secret: s3_config.access_key_secret.clone(), + bucket_name: s3_config.bucket_name.clone(), + retry_limit, + }); + let metrics = id.map(|i| BackendMetrics::new(i, "oss")); + + Ok(ObjectStorage::new_object_storage( + connection, + state, + metrics, + id.map(|i| i.to_string()), + )) + } +} + +impl S3State { + // modified based on https://github.com/minio/minio-rs/blob/5fea81d68d381fd2a4c27e4d259f7012de08ab77/src/s3/utils.rs#L155-L200 + // under apache 2.0 license + fn get_canonical_headers(&self, map: &HeaderMap) -> (String, String) { + let mut btmap: BTreeMap = BTreeMap::new(); + + for (k, values) in map.iter() { + let key = k.as_str().to_lowercase(); + if "authorization" == key || "user-agent" == key { + continue; + } + btmap.insert(key.clone(), values.to_str().unwrap().to_string()); + } + + let mut signed_headers = String::new(); + let mut canonical_headers = String::new(); + let mut add_delim = false; + for (key, value) in &btmap { + if add_delim { + signed_headers.push(';'); + canonical_headers.push('\n'); + } + + signed_headers.push_str(key); + + canonical_headers.push_str(key); + canonical_headers.push(':'); + canonical_headers.push_str(value); + + add_delim = true; + } + + (signed_headers, canonical_headers) + } + + // modified based on https://github.com/minio/minio-rs/blob/5fea81d68d381fd2a4c27e4d259f7012de08ab77/src/s3/signer.rs#L44-L64 + // under apache 2.0 license + fn get_canonical_request_hash( + &self, + method: &Method, + uri: &str, + query_string: &str, + headers: &str, + signed_headers: &str, + content_sha256: &str, + ) -> String { + let canonical_request = format!( + "{}\n{}\n{}\n{}\n\n{}\n{}", + method, uri, query_string, headers, signed_headers, content_sha256 + ); + return sha256_hash(canonical_request.as_bytes()); + } + + // modified based on https://github.com/minio/minio-rs/blob/5fea81d68d381fd2a4c27e4d259f7012de08ab77/src/s3/signer.rs#L75-88 + // under apache 2.0 license + pub fn get_signing_key(&self, date: &OffsetDateTime) -> Vec { + let mut key: Vec = b"AWS4".to_vec(); + key.extend(self.access_key_secret.as_bytes()); + + let date_key = hmac_hash(key.as_slice(), to_signer_date(date).as_bytes()); + let date_region_key = hmac_hash(date_key.as_slice(), self.region.as_bytes()); + let date_region_service_key = hmac_hash(date_region_key.as_slice(), "s3".as_bytes()); + return hmac_hash(date_region_service_key.as_slice(), b"aws4_request"); + } +} + +impl ObjectStorageState for S3State { + fn url(&self, obj_key: &str, query_str: &[&str]) -> (String, String) { + let query_str = if query_str.is_empty() { + "".to_string() + } else { + format!("?{}", query_str.join("&")) + }; + let resource = format!( + "/{}/{}{}{}", + self.bucket_name, self.object_prefix, obj_key, query_str + ); + let url = format!("{}://{}{}", self.scheme, self.endpoint, resource,); + (resource, url) + } + + // modified based on https://github.com/minio/minio-rs/blob/5fea81d68d381fd2a4c27e4d259f7012de08ab77/src/s3/signer.rs#L106-L135 + // under apache 2.0 license + /// generate s3 request signature + fn sign( + &self, + verb: Method, + headers: &mut HeaderMap, + _: &str, + full_resource_url: &str, + ) -> Result<()> { + let date = OffsetDateTime::now_utc(); + let content_sha256 = EMPTY_SHA256; + let parsed_uri = full_resource_url + .to_string() + .parse::() + .map_err(|e| einval!(e))?; + let uri_path = parsed_uri.path(); + let query = parsed_uri.query().unwrap_or(""); + let host = parsed_uri.host().unwrap_or(self.endpoint.as_str()); + + headers.insert(HEADER_HOST, host.parse().map_err(|e| einval!(e))?); + headers.insert( + HEADER_AWZ_DATE, + to_awz_date(&date).parse().map_err(|e| einval!(e))?, + ); + headers.insert( + HEADER_AWZ_CONTENT_SHA256, + EMPTY_SHA256.parse().map_err(|e| einval!(e))?, + ); + let scope = format!( + "{}/{}/{}/aws4_request", + to_signer_date(&date), + self.region, + "s3", + ); + let (signed_headers, canonical_headers) = self.get_canonical_headers(headers); + let canonical_request_hash = self.get_canonical_request_hash( + &verb, + uri_path, + query, + &canonical_headers, + &signed_headers, + content_sha256, + ); + let string_to_sign = format!( + "AWS4-HMAC-SHA256\n{}\n{}\n{}", + to_awz_date(&date), + scope, + canonical_request_hash + ); + let signing_key = self.get_signing_key(&date); + let signature = hmac_hash_hex(signing_key.as_slice(), string_to_sign.as_bytes()); + let authorization = format!( + "AWS4-HMAC-SHA256 Credential={}/{}, SignedHeaders={}, Signature={}", + self.access_key_id, scope, signed_headers, signature + ); + headers.insert( + "Authorization", + authorization.parse().map_err(|e| einval!(e))?, + ); + + Ok(()) + } + + fn retry_limit(&self) -> u8 { + self.retry_limit + } +} + +// modified based on https://github.com/minio/minio-rs/blob/5fea81d68d381fd2a4c27e4d259f7012de08ab77/src/s3/utils.rs#L52-L56 +// under apache 2.0 license +fn sha256_hash(data: &[u8]) -> String { + let mut hasher = Sha256::new(); + hasher.update(data); + format!("{:x}", hasher.finalize()) +} + +// modified based on https://github.com/minio/minio-rs/blob/5fea81d68d381fd2a4c27e4d259f7012de08ab77/src/s3/signer.rs#L25-L29 +// under apache 2.0 license +fn hmac_hash(key: &[u8], data: &[u8]) -> Vec { + let mut hasher = Hmac::::new_from_slice(key).expect("HMAC can take key of any size"); + hasher.update(data); + hasher.finalize().into_bytes().to_vec() +} + +// modified based on https://github.com/minio/minio-rs/blob/5fea81d68d381fd2a4c27e4d259f7012de08ab77/src/s3/signer.rs#L31-L33 +// under apache 2.0 license +fn hmac_hash_hex(key: &[u8], data: &[u8]) -> String { + hex::encode(hmac_hash(key, data)) +} + +// modified based on https://github.com/minio/minio-rs/blob/5fea81d68d381fd2a4c27e4d259f7012de08ab77/src/s3/utils.rs#L66-L68 +// under apache 2.0 license +fn to_signer_date(date: &OffsetDateTime) -> String { + let format = format_description::parse("[year][month][day]").unwrap(); + date.format(&format).unwrap() +} + +// modified based on https://github.com/minio/minio-rs/blob/5fea81d68d381fd2a4c27e4d259f7012de08ab77/src/s3/utils.rs#L70-L72 +// under apache 2.0 license +fn to_awz_date(date: &OffsetDateTime) -> String { + let format = format_description::parse("[year][month][day]T[hour][minute][second]Z").unwrap(); + date.format(&format).unwrap() +} + +#[cfg(test)] +mod tests { + use http::{HeaderMap, Method}; + use nydus_api::S3Config; + + use crate::backend::object_storage::ObjectStorageState; + use crate::backend::s3::S3State; + use crate::backend::BlobBackend; + + use super::S3; + + fn get_test_s3_state() -> (S3State, String, String) { + let state = S3State { + region: "us-east-1".to_string(), + access_key_id: "test-key".to_string(), + access_key_secret: "test-key-secret".to_string(), + scheme: "http".to_string(), + object_prefix: "test-prefix-".to_string(), + endpoint: "localhost:9000".to_string(), + bucket_name: "test-bucket".to_string(), + retry_limit: 6, + }; + let (resource, url) = state.url("test-object", &["a=b", "c=d"]); + (state, resource, url) + } + + #[test] + fn test_s3_new() { + let config_str = r#"{ + "endpoint": "https://test.com", + "region": "us-east-1", + "access_key_id": "test", + "access_key_secret": "test", + "bucket_name": "antsys-nydus", + "object_prefix":"nydus_v2/", + "retry_limit": 6 + }"#; + let config: S3Config = serde_json::from_str(config_str).unwrap(); + let s3 = S3::new(&config, Some("test-image")).unwrap(); + + s3.metrics(); + + let reader = s3.get_reader("test").unwrap(); + assert_eq!(reader.retry_limit(), 6); + + s3.shutdown(); + } + + #[test] + fn test_s3_state_url() { + let (_, resource, url) = get_test_s3_state(); + assert_eq!(resource, "/test-bucket/test-prefix-test-object?a=b&c=d"); + assert_eq!( + url, + "http://localhost:9000/test-bucket/test-prefix-test-object?a=b&c=d" + ); + } + + #[test] + fn test_s3_state_sign() { + let (state, resource, url) = get_test_s3_state(); + println!("{}", url); + let mut headers = HeaderMap::new(); + headers.append("Range", "bytes=5242900-".parse().unwrap()); + let result = state.sign(Method::GET, &mut headers, &resource, &url); + assert!(result.is_ok()); + + use regex::Regex; + let re = Regex::new(r"^AWS4-HMAC-SHA256 Credential=test-key/[0-9]{8}/us-east-1/s3/aws4_request, SignedHeaders=host;range;x-amz-content-sha256;x-amz-date, Signature=[A-Fa-f0-9]{64}$").unwrap(); + let authorization = headers.get("Authorization").unwrap(); + assert!(re.is_match(authorization.to_str().unwrap())); + } +} diff --git a/storage/src/cache/cachedfile.rs b/storage/src/cache/cachedfile.rs index d30bcb1762b..925245bdf84 100644 --- a/storage/src/cache/cachedfile.rs +++ b/storage/src/cache/cachedfile.rs @@ -1,1920 +1,1920 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Common cached file object for `FileCacheMgr` and `FsCacheMgr`. -//! -//! The `FileCacheEntry` manages local cached blob objects from remote backends to improve -//! performance. It may be used by both the userspace `FileCacheMgr` or the `FsCacheMgr` based -//! on the in-kernel fscache system. - -use std::collections::HashSet; -use std::fs::File; -use std::io::{ErrorKind, Read, Result}; -use std::mem::ManuallyDrop; -use std::os::unix::io::{AsRawFd, RawFd}; -use std::sync::atomic::{AtomicBool, AtomicU32, Ordering}; -use std::sync::{Arc, Mutex}; -use std::time::Duration; - -use fuse_backend_rs::file_buf::FileVolatileSlice; -use nix::sys::uio; -use nydus_utils::compress::Decoder; -use nydus_utils::crypt::{self, Cipher, CipherContext}; -use nydus_utils::metrics::{BlobcacheMetrics, Metric}; -use nydus_utils::{compress, digest, round_up_usize, DelayType, Delayer, FileRangeReader}; -use tokio::runtime::Runtime; - -use crate::backend::BlobReader; -use crate::cache::state::ChunkMap; -use crate::cache::worker::{AsyncPrefetchConfig, AsyncPrefetchMessage, AsyncWorkerMgr}; -use crate::cache::{BlobCache, BlobIoMergeState}; -use crate::device::{ - BlobChunkInfo, BlobInfo, BlobIoDesc, BlobIoRange, BlobIoSegment, BlobIoTag, BlobIoVec, - BlobObject, BlobPrefetchRequest, -}; -use crate::meta::{BlobCompressionContextInfo, BlobMetaChunk}; -use crate::utils::{alloc_buf, copyv, readv, MemSliceCursor}; -use crate::{StorageError, StorageResult, RAFS_BATCH_SIZE_TO_GAP_SHIFT, RAFS_DEFAULT_CHUNK_SIZE}; - -const DOWNLOAD_META_RETRY_COUNT: u32 = 5; -const DOWNLOAD_META_RETRY_DELAY: u64 = 400; -const ENCRYPTION_PAGE_SIZE: usize = 4096; - -#[derive(Default, Clone)] -pub(crate) struct FileCacheMeta { - has_error: Arc, - meta: Arc>>>, -} - -impl FileCacheMeta { - pub(crate) fn new( - blob_file: String, - blob_info: Arc, - reader: Option>, - runtime: Option>, - sync: bool, - validation: bool, - ) -> Result { - if sync { - match BlobCompressionContextInfo::new( - &blob_file, - &blob_info, - reader.as_ref(), - validation, - ) { - Ok(m) => Ok(FileCacheMeta { - has_error: Arc::new(AtomicBool::new(false)), - meta: Arc::new(Mutex::new(Some(Arc::new(m)))), - }), - Err(e) => Err(e), - } - } else { - let meta = FileCacheMeta { - has_error: Arc::new(AtomicBool::new(false)), - meta: Arc::new(Mutex::new(None)), - }; - let meta1 = meta.clone(); - - if let Some(r) = runtime { - r.as_ref().spawn_blocking(move || { - let mut retry = 0; - let mut delayer = Delayer::new( - DelayType::BackOff, - Duration::from_millis(DOWNLOAD_META_RETRY_DELAY), - ); - while retry < DOWNLOAD_META_RETRY_COUNT { - match BlobCompressionContextInfo::new( - &blob_file, - &blob_info, - reader.as_ref(), - validation, - ) { - Ok(m) => { - *meta1.meta.lock().unwrap() = Some(Arc::new(m)); - return; - } - Err(e) => { - info!("temporarily failed to get blob.meta, {}", e); - delayer.delay(); - retry += 1; - } - } - } - warn!("failed to get blob.meta"); - meta1.has_error.store(true, Ordering::Release); - }); - } else { - warn!("Want download blob meta asynchronously but no runtime."); - } - - Ok(meta) - } - } - - pub(crate) fn get_blob_meta(&self) -> Option> { - loop { - let meta = self.meta.lock().unwrap(); - if meta.is_some() { - return meta.clone(); - } - drop(meta); - if self.has_error.load(Ordering::Acquire) { - return None; - } - std::thread::sleep(Duration::from_millis(2)); - } - } -} - -/// Helper struct to manage and call BlobCompressionContextInfo. -struct BlobCCI { - meta: Option>, -} - -impl BlobCCI { - fn new() -> Self { - BlobCCI { meta: None } - } - - fn is_none(&self) -> bool { - self.meta.is_none() - } - - fn set_meta(&mut self, meta: Option>) -> Result<&Self> { - if meta.is_none() { - return Err(einval!("failed to get blob meta info")); - } - self.meta = meta; - Ok(self) - } - - fn get_compressed_offset(&self, chunk: &Arc) -> Result { - Ok(chunk.compressed_offset()) - } - - fn get_compressed_size(&self, chunk: &Arc) -> Result { - let size = if chunk.is_batch() { - self.meta - .as_ref() - .unwrap() - .get_compressed_size(chunk.id())? - } else { - chunk.compressed_size() - }; - Ok(size) - } - - fn get_compressed_info(&self, chunk: &Arc) -> Result<(u64, u32)> { - Ok(( - self.get_compressed_offset(chunk)?, - self.get_compressed_size(chunk)?, - )) - } - - fn get_compressed_end(&self, chunk: &Arc) -> Result { - let (offset, size) = self.get_compressed_info(chunk)?; - Ok(offset + size as u64) - } -} - -pub(crate) struct FileCacheEntry { - pub(crate) blob_id: String, - pub(crate) blob_info: Arc, - pub(crate) cache_cipher_object: Arc, - pub(crate) cache_cipher_context: Arc, - pub(crate) chunk_map: Arc, - pub(crate) file: Arc, - pub(crate) meta: Option, - pub(crate) metrics: Arc, - pub(crate) prefetch_state: Arc, - pub(crate) reader: Arc, - pub(crate) runtime: Arc, - pub(crate) workers: Arc, - - pub(crate) blob_compressed_size: u64, - pub(crate) blob_uncompressed_size: u64, - // Whether `get_blob_object()` is supported. - pub(crate) is_get_blob_object_supported: bool, - // Cache raw data from backend instead of decompressed/decrypted plaintext. - pub(crate) is_raw_data: bool, - // The data in cache file is uncompressed and encrypted. - pub(crate) is_cache_encrypted: bool, - // Whether direct chunkmap is used. - pub(crate) is_direct_chunkmap: bool, - // The blob is for an stargz image. - pub(crate) is_legacy_stargz: bool, - // The blob is for an RAFS filesystem in `TARFS` mode. - pub(crate) is_tarfs: bool, - // The blob contains batch chunks. - pub(crate) is_batch: bool, - // The blob is based on ZRan decompression algorithm. - pub(crate) is_zran: bool, - // True if direct IO is enabled for the `self.file`, supported for fscache only. - pub(crate) dio_enabled: bool, - // Data from the file cache should be validated before use. - pub(crate) need_validation: bool, - // Amplified user IO request batch size to read data from remote storage backend / local cache. - pub(crate) user_io_batch_size: u32, - pub(crate) prefetch_config: Arc, -} - -impl FileCacheEntry { - pub(crate) fn get_blob_size(reader: &Arc, blob_info: &BlobInfo) -> Result { - // Stargz needs blob size information, so hacky! - let size = if blob_info.is_legacy_stargz() { - reader.blob_size().map_err(|e| einval!(e))? - } else { - blob_info.compressed_size() - }; - - Ok(size) - } - - fn delay_persist_chunk_data(&self, chunk: Arc, buffer: Arc) { - let delayed_chunk_map = self.chunk_map.clone(); - let file = self.file.clone(); - let metrics = self.metrics.clone(); - let is_raw_data = self.is_raw_data; - let is_cache_encrypted = self.is_cache_encrypted; - let cipher_object = self.cache_cipher_object.clone(); - let cipher_context = self.cache_cipher_context.clone(); - - metrics.buffered_backend_size.add(buffer.size() as u64); - self.runtime.spawn_blocking(move || { - metrics.buffered_backend_size.sub(buffer.size() as u64); - let mut t_buf; - let buf = if !is_raw_data && is_cache_encrypted { - let (key, iv) = cipher_context.generate_cipher_meta(&chunk.chunk_id().data); - let buf = buffer.slice(); - t_buf = alloc_buf(round_up_usize(buf.len(), ENCRYPTION_PAGE_SIZE)); - - let mut pos = 0; - while pos < buf.len() { - let mut s_buf; - // Padding to buffer to 4096 bytes if needed. - let buf = if pos + ENCRYPTION_PAGE_SIZE > buf.len() { - s_buf = buf[pos..].to_vec(); - s_buf.resize(ENCRYPTION_PAGE_SIZE, 0); - &s_buf - } else { - &buf[pos..pos + ENCRYPTION_PAGE_SIZE] - }; - - assert_eq!(buf.len(), ENCRYPTION_PAGE_SIZE); - match cipher_object.encrypt(key, Some(&iv), buf) { - Ok(buf2) => { - assert_eq!(buf2.len(), ENCRYPTION_PAGE_SIZE); - t_buf[pos..pos + ENCRYPTION_PAGE_SIZE].copy_from_slice(buf2.as_ref()); - pos += ENCRYPTION_PAGE_SIZE; - } - Err(_) => { - Self::_update_chunk_pending_status( - &delayed_chunk_map, - chunk.as_ref(), - false, - ); - return; - } - } - } - &t_buf - } else { - buffer.slice() - }; - - let offset = if is_raw_data { - chunk.compressed_offset() - } else { - chunk.uncompressed_offset() - }; - let res = Self::persist_cached_data(&file, offset, buf); - Self::_update_chunk_pending_status(&delayed_chunk_map, chunk.as_ref(), res.is_ok()); - }); - } - - fn persist_chunk_data(&self, chunk: &dyn BlobChunkInfo, buf: &[u8]) { - let offset = chunk.uncompressed_offset(); - let res = Self::persist_cached_data(&self.file, offset, buf); - self.update_chunk_pending_status(chunk, res.is_ok()); - } - - fn persist_cached_data(file: &Arc, offset: u64, buffer: &[u8]) -> Result<()> { - let fd = file.as_raw_fd(); - - let n = loop { - let ret = uio::pwrite(fd, buffer, offset as i64).map_err(|_| last_error!()); - match ret { - Ok(nr_write) => { - trace!("write {}(offset={}) bytes to cache file", nr_write, offset); - break nr_write; - } - Err(err) => { - // Retry if the IO is interrupted by signal. - if err.kind() != ErrorKind::Interrupted { - return Err(err); - } - } - } - }; - - if n != buffer.len() { - Err(eio!("failed to write data to file cache")) - } else { - Ok(()) - } - } - - fn update_chunk_pending_status(&self, chunk: &dyn BlobChunkInfo, success: bool) { - Self::_update_chunk_pending_status(&self.chunk_map, chunk, success) - } - - fn _update_chunk_pending_status( - chunk_map: &Arc, - chunk: &dyn BlobChunkInfo, - success: bool, - ) { - if success { - if let Err(e) = chunk_map.set_ready_and_clear_pending(chunk) { - error!( - "Failed change caching state for chunk of offset {}, {:?}", - chunk.compressed_offset(), - e - ) - } - } else { - error!( - "Failed to persist data for chunk at offset {}", - chunk.compressed_offset() - ); - chunk_map.clear_pending(chunk); - } - } - - fn prefetch_batch_size(&self) -> u64 { - if self.prefetch_config.batch_size < 0x2_0000 { - 0x2_0000 - } else { - self.prefetch_config.batch_size as u64 - } - } - - fn user_io_batch_size(&self) -> u64 { - if self.user_io_batch_size < 0x2_0000 { - 0x2_0000 - } else { - self.user_io_batch_size as u64 - } - } - - fn extend_pending_chunks( - &self, - chunks: &[Arc], - batch_size: u64, - ) -> Result>>> { - assert!(!chunks.is_empty()); - match self.get_blob_meta_info() { - Err(e) => Err(e), - Ok(None) => Ok(None), - Ok(Some(bm)) => { - let v = bm.add_more_chunks(chunks, batch_size)?; - Ok(Some(self.strip_ready_chunks(bm, Some(chunks), v))) - } - } - } - - fn strip_ready_chunks( - &self, - meta: Arc, - old_chunks: Option<&[Arc]>, - mut extended_chunks: Vec>, - ) -> Vec> { - if self.is_zran { - // Special handling for zran chunk. - // Because zran chunk has not been deduplicated at build time. - // So zran index is used to check if chunk is ready. - let mut set = HashSet::new(); - for c in extended_chunks.iter() { - if !matches!(self.chunk_map.is_ready(c.as_ref()), Ok(true)) { - let zran_idx = meta - .get_zran_index(c.id()) - .map_err(|e| error!("Failed to get zran index for chunk {}: {}", c.id(), e)) - .unwrap_or(u32::MAX); - set.insert(zran_idx); - } - } - - let first = old_chunks.as_ref().map(|v| v[0].id()).unwrap_or(u32::MAX); - let mut start = 0; - while start < extended_chunks.len() - 1 { - let id = extended_chunks[start].id(); - if id == first { - break; - } - match &meta.get_zran_index(id) { - Ok(i) => { - if set.contains(i) { - break; - } - } - Err(_e) => break, - } - start += 1; - } - - let last = old_chunks - .as_ref() - .map(|v| v[v.len() - 1].id()) - .unwrap_or(u32::MAX); - let mut end = extended_chunks.len() - 1; - while end > start { - let id = extended_chunks[end].id(); - if id == last { - break; - } - match &meta.get_zran_index(id) { - Ok(i) => { - if set.contains(i) { - break; - } - } - Err(_e) => break, - } - end -= 1; - } - - assert!(end >= start, "start 0x{:x}, end 0x{:x}", start, end); - if start == 0 && end == extended_chunks.len() - 1 { - extended_chunks - } else { - extended_chunks[start..=end].to_vec() - } - } else { - // For normal chunks and batch chunks. - // No special handling for batch chunk. - // Because batch chunk has been deduplicated at build time. - // It is enough to just check if chunk is ready. - while !extended_chunks.is_empty() { - let chunk = &extended_chunks[extended_chunks.len() - 1]; - if matches!(self.chunk_map.is_ready(chunk.as_ref()), Ok(true)) { - extended_chunks.pop(); - } else { - break; - } - } - extended_chunks - } - } - - fn get_blob_range(&self, chunks: &[Arc]) -> Result<(u64, u64, usize)> { - assert!(!chunks.is_empty()); - let (start, end) = if self.is_zran { - let meta = self - .get_blob_meta_info()? - .ok_or_else(|| einval!("failed to get blob meta object"))?; - let zran_index = meta.get_zran_index(chunks[0].id())?; - let (ctx, _) = meta.get_zran_context(zran_index)?; - let blob_start = ctx.in_offset; - let zran_index = meta.get_zran_index(chunks[chunks.len() - 1].id())?; - let (ctx, _) = meta.get_zran_context(zran_index)?; - let blob_end = ctx.in_offset + ctx.in_len as u64; - (blob_start, blob_end) - } else if self.is_batch { - let first_chunk = &chunks[0]; - let last_chunk = &chunks[chunks.len() - 1]; - - let mut blob_cci = BlobCCI::new(); - - // Get blob meta info iff the chunk is batch chunk. - if first_chunk.is_batch() || last_chunk.is_batch() { - blob_cci.set_meta(self.get_blob_meta_info()?)?; - } - - let blob_start = blob_cci.get_compressed_offset(first_chunk)?; - let blob_end = blob_cci.get_compressed_end(last_chunk)?; - - (blob_start, blob_end) - } else { - let last = chunks.len() - 1; - (chunks[0].compressed_offset(), chunks[last].compressed_end()) - }; - - let size = end - start; - if end - start > u32::MAX as u64 { - Err(einval!( - "requested blob range is too bigger, larger than u32::MAX" - )) - } else { - Ok((start, end, size as usize)) - } - } -} - -impl AsRawFd for FileCacheEntry { - fn as_raw_fd(&self) -> RawFd { - self.file.as_raw_fd() - } -} - -impl BlobCache for FileCacheEntry { - fn blob_id(&self) -> &str { - &self.blob_id - } - - fn blob_uncompressed_size(&self) -> Result { - Ok(self.blob_uncompressed_size) - } - - fn blob_compressed_size(&self) -> Result { - Ok(self.blob_compressed_size) - } - - fn blob_compressor(&self) -> compress::Algorithm { - self.blob_info.compressor() - } - - fn blob_cipher(&self) -> crypt::Algorithm { - self.blob_info.cipher() - } - - fn blob_cipher_object(&self) -> Arc { - self.blob_info.cipher_object() - } - - fn blob_cipher_context(&self) -> Option { - self.blob_info.cipher_context() - } - - fn blob_digester(&self) -> digest::Algorithm { - self.blob_info.digester() - } - - fn is_legacy_stargz(&self) -> bool { - self.is_legacy_stargz - } - - fn is_batch(&self) -> bool { - self.is_batch - } - - fn is_zran(&self) -> bool { - self.is_zran - } - - fn need_validation(&self) -> bool { - self.need_validation - } - - fn reader(&self) -> &dyn BlobReader { - &*self.reader - } - - fn get_chunk_map(&self) -> &Arc { - &self.chunk_map - } - - fn get_chunk_info(&self, chunk_index: u32) -> Option> { - self.meta - .as_ref() - .and_then(|v| v.get_blob_meta()) - .map(|v| BlobMetaChunk::new(chunk_index as usize, &v.state)) - } - - fn get_blob_object(&self) -> Option<&dyn BlobObject> { - if self.is_get_blob_object_supported { - Some(self) - } else { - None - } - } - - fn start_prefetch(&self) -> StorageResult<()> { - self.prefetch_state.fetch_add(1, Ordering::Release); - Ok(()) - } - - fn stop_prefetch(&self) -> StorageResult<()> { - loop { - let val = self.prefetch_state.load(Ordering::Acquire); - if val > 0 - && self - .prefetch_state - .compare_exchange(val, val - 1, Ordering::AcqRel, Ordering::Relaxed) - .is_err() - { - continue; - } - - if val == 0 { - warn!("storage: inaccurate prefetch status"); - } - if val == 0 || val == 1 { - self.workers.flush_pending_prefetch_requests(&self.blob_id); - return Ok(()); - } - } - } - - fn is_prefetch_active(&self) -> bool { - self.prefetch_state.load(Ordering::Acquire) > 0 - } - - fn prefetch( - &self, - blob_cache: Arc, - prefetches: &[BlobPrefetchRequest], - bios: &[BlobIoDesc], - ) -> StorageResult { - // Handle blob prefetch request first, it may help performance. - for req in prefetches { - let msg = AsyncPrefetchMessage::new_blob_prefetch( - blob_cache.clone(), - req.offset as u64, - req.len as u64, - ); - let _ = self.workers.send_prefetch_message(msg); - } - - // Then handle fs prefetch - let max_comp_size = self.prefetch_batch_size(); - let mut bios = bios.to_vec(); - bios.sort_by_key(|entry| entry.chunkinfo.compressed_offset()); - self.metrics.prefetch_unmerged_chunks.add(bios.len() as u64); - BlobIoMergeState::merge_and_issue( - &bios, - max_comp_size, - max_comp_size as u64 >> RAFS_BATCH_SIZE_TO_GAP_SHIFT, - |req: BlobIoRange| { - let msg = AsyncPrefetchMessage::new_fs_prefetch(blob_cache.clone(), req); - let _ = self.workers.send_prefetch_message(msg); - }, - ); - - Ok(0) - } - - fn prefetch_range(&self, range: &BlobIoRange) -> Result { - let mut pending = Vec::with_capacity(range.chunks.len()); - if !self.chunk_map.is_persist() { - let mut d_size = 0; - for c in range.chunks.iter() { - d_size = std::cmp::max(d_size, c.uncompressed_size() as usize); - } - let mut buf = alloc_buf(d_size); - - for c in range.chunks.iter() { - if let Ok(true) = self.chunk_map.check_ready_and_mark_pending(c.as_ref()) { - // The chunk is ready, so skip it. - continue; - } - - // For digested chunk map, we must check whether the cached data is valid because - // the digested chunk map cannot persist readiness state. - let d_size = c.uncompressed_size() as usize; - match self.read_file_cache(c.as_ref(), &mut buf[0..d_size]) { - // The cached data is valid, set the chunk as ready. - Ok(_v) => self.update_chunk_pending_status(c.as_ref(), true), - // The cached data is invalid, queue the chunk for reading from backend. - Err(_e) => pending.push(c.clone()), - } - } - } else { - for c in range.chunks.iter() { - if let Ok(true) = self.chunk_map.check_ready_and_mark_pending(c.as_ref()) { - // The chunk is ready, so skip it. - continue; - } else { - pending.push(c.clone()); - } - } - } - - let mut total_size = 0; - let mut start = 0; - while start < pending.len() { - // Figure out the range with continuous chunk ids, be careful that `end` is inclusive. - let mut end = start; - while end < pending.len() - 1 && pending[end + 1].id() == pending[end].id() + 1 { - end += 1; - } - - let (blob_offset, _blob_end, blob_size) = self.get_blob_range(&pending[start..=end])?; - match self.read_chunks_from_backend(blob_offset, blob_size, &pending[start..=end], true) - { - Ok(mut bufs) => { - total_size += blob_size; - if self.is_raw_data { - let res = Self::persist_cached_data( - &self.file, - blob_offset, - bufs.compressed_buf(), - ); - for c in pending.iter().take(end + 1).skip(start) { - self.update_chunk_pending_status(c.as_ref(), res.is_ok()); - } - } else { - for idx in start..=end { - let buf = match bufs.next() { - None => return Err(einval!("invalid chunk decompressed status")), - Some(Err(e)) => { - for chunk in &mut pending[idx..=end] { - self.update_chunk_pending_status(chunk.as_ref(), false); - } - return Err(e); - } - Some(Ok(v)) => v, - }; - self.persist_chunk_data(pending[idx].as_ref(), &buf); - } - } - } - Err(_e) => { - // Clear the pending flag for all chunks in processing. - for chunk in &mut pending[start..=end] { - self.update_chunk_pending_status(chunk.as_ref(), false); - } - } - } - - start = end + 1; - } - - Ok(total_size) - } - - fn read(&self, iovec: &mut BlobIoVec, buffers: &[FileVolatileSlice]) -> Result { - self.metrics.total.inc(); - self.workers.consume_prefetch_budget(iovec.size()); - - if iovec.is_empty() { - Ok(0) - } else if iovec.len() == 1 { - let mut state = FileIoMergeState::new(); - let mut cursor = MemSliceCursor::new(buffers); - let req = BlobIoRange::new(&iovec.bi_vec[0], 1); - self.dispatch_one_range(&req, &mut cursor, &mut state) - } else { - self.read_iter(&mut iovec.bi_vec, buffers) - } - } - - fn get_blob_meta_info(&self) -> Result>> { - if let Some(meta) = self.meta.as_ref() { - if let Some(bm) = meta.get_blob_meta() { - Ok(Some(bm)) - } else { - Err(einval!("failed to get blob meta object for cache file")) - } - } else { - Ok(None) - } - } -} - -impl BlobObject for FileCacheEntry { - fn base_offset(&self) -> u64 { - 0 - } - - fn is_all_data_ready(&self) -> bool { - // Assume data from tar file is always ready. - if self.is_tarfs { - true - } else if let Some(b) = self.chunk_map.as_range_map() { - b.is_range_all_ready() - } else { - false - } - } - - fn fetch_range_compressed(&self, offset: u64, size: u64, prefetch: bool) -> Result<()> { - // Assume data from tar file is always ready. - if self.is_tarfs { - return Ok(()); - } - - let meta = self.meta.as_ref().ok_or_else(|| enoent!())?; - let meta = meta.get_blob_meta().ok_or_else(|| einval!())?; - let mut chunks = - meta.get_chunks_compressed(offset, size, self.prefetch_batch_size(), prefetch)?; - if !chunks.is_empty() { - if let Some(meta) = self.get_blob_meta_info()? { - chunks = self.strip_ready_chunks(meta, None, chunks); - } - } else { - return Err(einval!(format!( - "fetch_range_compressed offset 0x{:x}, size 0x{:x}", - offset, size - ))); - } - if chunks.is_empty() { - Ok(()) - } else { - self.do_fetch_chunks(&chunks, true) - } - } - - fn fetch_range_uncompressed(&self, offset: u64, size: u64) -> Result<()> { - // Assume data from tar file is always ready. - if self.is_tarfs { - return Ok(()); - } - - let meta = self.meta.as_ref().ok_or_else(|| einval!())?; - let meta = meta.get_blob_meta().ok_or_else(|| einval!())?; - let mut chunks = meta.get_chunks_uncompressed(offset, size, self.user_io_batch_size())?; - if let Some(meta) = self.get_blob_meta_info()? { - chunks = self.strip_ready_chunks(meta, None, chunks); - } - if chunks.is_empty() { - Ok(()) - } else { - self.do_fetch_chunks(&chunks, false) - } - } - - fn prefetch_chunks(&self, range: &BlobIoRange) -> Result<()> { - // Assume data from tar file is always ready. - if self.is_tarfs { - return Ok(()); - } - - let chunks_extended; - let mut chunks = &range.chunks; - if let Some(v) = self.extend_pending_chunks(chunks, self.prefetch_batch_size())? { - chunks_extended = v; - chunks = &chunks_extended; - } - - let mut start = 0; - while start < chunks.len() { - // Figure out the range with continuous chunk ids, be careful that `end` is inclusive. - let mut end = start; - while end < chunks.len() - 1 && chunks[end + 1].id() == chunks[end].id() + 1 { - end += 1; - } - self.do_fetch_chunks(&chunks[start..=end], true)?; - start = end + 1; - } - - Ok(()) - } -} - -impl FileCacheEntry { - fn do_fetch_chunks(&self, chunks: &[Arc], prefetch: bool) -> Result<()> { - // Validate input parameters. - assert!(!chunks.is_empty()); - - // Get chunks not ready yet, also marking them as in-flight. - let bitmap = self - .chunk_map - .as_range_map() - .ok_or_else(|| einval!("invalid chunk_map for do_fetch_chunks()"))?; - let chunk_index = chunks[0].id(); - let count = chunks.len() as u32; - let pending = match bitmap.check_range_ready_and_mark_pending(chunk_index, count)? { - None => return Ok(()), - Some(v) => v, - }; - - let mut status = vec![false; count as usize]; - let (start_idx, end_idx) = { - let mut start = u32::MAX; - let mut end = 0; - for chunk_id in pending.iter() { - status[(*chunk_id - chunk_index) as usize] = true; - start = std::cmp::min(*chunk_id - chunk_index, start); - end = std::cmp::max(*chunk_id - chunk_index, end); - } - (start as usize, end as usize) - }; - - if start_idx <= end_idx { - let start_chunk = &chunks[start_idx]; - let end_chunk = &chunks[end_idx]; - let (blob_offset, blob_end, blob_size) = - self.get_blob_range(&chunks[start_idx..=end_idx])?; - trace!( - "fetch data range {:x}-{:x} for chunk {}-{} from blob {:x}", - blob_offset, - blob_end, - start_chunk.id(), - end_chunk.id(), - chunks[0].blob_index() - ); - - match self.read_chunks_from_backend( - blob_offset, - blob_size, - &chunks[start_idx..=end_idx], - prefetch, - ) { - Ok(mut bufs) => { - if self.is_raw_data { - let res = Self::persist_cached_data( - &self.file, - blob_offset, - bufs.compressed_buf(), - ); - for idx in start_idx..=end_idx { - if status[idx] { - self.update_chunk_pending_status(chunks[idx].as_ref(), res.is_ok()); - } - } - } else { - for idx in start_idx..=end_idx { - let mut buf = match bufs.next() { - None => return Err(einval!("invalid chunk decompressed status")), - Some(Err(e)) => { - for idx in idx..=end_idx { - if status[idx] { - bitmap.clear_range_pending(chunks[idx].id(), 1) - } - } - return Err(e); - } - Some(Ok(v)) => v, - }; - - if status[idx] { - if self.dio_enabled { - self.adjust_buffer_for_dio(&mut buf) - } - self.persist_chunk_data(chunks[idx].as_ref(), buf.as_ref()); - } - } - } - } - Err(e) => { - for idx in 0..chunks.len() { - if status[idx] { - bitmap.clear_range_pending(chunks[idx].id(), 1) - } - } - return Err(e); - } - } - } - - if !bitmap.wait_for_range_ready(chunk_index, count)? { - if prefetch { - return Err(eio!(format!( - "failed to prefetch data from storage backend for chunk {}/{}", - chunk_index, count - ))); - } - - // if we are in on-demand path, retry for the timeout chunks - for chunk in chunks { - match self.chunk_map.check_ready_and_mark_pending(chunk.as_ref()) { - Err(e) => return Err(eio!(format!("do_fetch_chunks failed, {:?}", e))), - Ok(true) => {} - Ok(false) => { - info!("retry for timeout chunk, {}", chunk.id()); - let mut buf = alloc_buf(chunk.uncompressed_size() as usize); - self.read_chunk_from_backend(chunk.as_ref(), &mut buf) - .map_err(|e| { - self.update_chunk_pending_status(chunk.as_ref(), false); - eio!(format!("read_raw_chunk failed, {:?}", e)) - })?; - if self.dio_enabled { - self.adjust_buffer_for_dio(&mut buf) - } - self.persist_chunk_data(chunk.as_ref(), &buf); - } - } - } - } - - Ok(()) - } - - fn adjust_buffer_for_dio(&self, buf: &mut Vec) { - assert_eq!(buf.capacity() % 0x1000, 0); - if buf.len() != buf.capacity() { - // Padding with 0 for direct IO. - buf.resize(buf.capacity(), 0); - } - } -} - -impl FileCacheEntry { - // There are some assumption applied to the `bios` passed to `read_iter()`. - // - The blob address of chunks in `bios` are continuous. - // - There is at most one user io request in the `bios`. - // - The user io request may not be aligned on chunk boundary. - // - The user io request may partially consume data from the first and last chunk of user io - // request. - // - Optionally there may be some prefetch/read amplify requests following the user io request. - // - The optional prefetch/read amplify requests may be silently dropped. - fn read_iter(&self, bios: &mut [BlobIoDesc], buffers: &[FileVolatileSlice]) -> Result { - // Merge requests with continuous blob addresses. - let requests = self - .merge_requests_for_user(bios, self.user_io_batch_size()) - .ok_or_else(|| { - for bio in bios.iter() { - self.update_chunk_pending_status(&bio.chunkinfo, false); - } - einval!("Empty bios list") - })?; - - let mut state = FileIoMergeState::new(); - let mut cursor = MemSliceCursor::new(buffers); - let mut total_read: usize = 0; - for (idx, req) in requests.iter().enumerate() { - total_read += self - .dispatch_one_range(req, &mut cursor, &mut state) - .map_err(|e| { - for req in requests.iter().skip(idx) { - for chunk in req.chunks.iter() { - self.update_chunk_pending_status(chunk.as_ref(), false); - } - } - e - })?; - state.reset(); - } - - Ok(total_read) - } - - fn dispatch_one_range( - &self, - req: &BlobIoRange, - cursor: &mut MemSliceCursor, - state: &mut FileIoMergeState, - ) -> Result { - let mut total_read: usize = 0; - - trace!("dispatch single io range {:?}", req); - let mut blob_cci = BlobCCI::new(); - for (i, chunk) in req.chunks.iter().enumerate() { - let is_ready = match self.chunk_map.check_ready_and_mark_pending(chunk.as_ref()) { - Ok(true) => true, - Ok(false) => false, - Err(StorageError::Timeout) => false, // Retry if waiting for inflight IO timeouts - Err(e) => return Err(einval!(e)), - }; - - // Directly read chunk data from file cache into user buffer iff: - // - the chunk is ready in the file cache - // - data in the file cache is plaintext. - // - data validation is disabled - if is_ready && !self.is_raw_data && !self.is_cache_encrypted && !self.need_validation() - { - // Internal IO should not be committed to local cache region, just - // commit this region without pushing any chunk to avoid discontinuous - // chunks in a region. - if req.tags[i].is_user_io() { - state.push( - RegionType::CacheFast, - chunk.uncompressed_offset(), - chunk.uncompressed_size(), - req.tags[i].clone(), - None, - )?; - } else { - state.commit() - } - } else if !self.is_direct_chunkmap || is_ready { - // Case to try loading data from cache - // - chunk is ready but data validation is needed. - // - direct chunk map is not used, so there may be data in the file cache but - // the readiness flag has been lost. - if req.tags[i].is_user_io() { - state.push( - RegionType::CacheSlow, - chunk.uncompressed_offset(), - chunk.uncompressed_size(), - req.tags[i].clone(), - Some(req.chunks[i].clone()), - )?; - } else { - state.commit(); - // On slow path, don't try to handle internal(read amplification) IO. - if !is_ready { - self.chunk_map.clear_pending(chunk.as_ref()); - } - } - } else { - let tag = if let BlobIoTag::User(ref s) = req.tags[i] { - BlobIoTag::User(s.clone()) - } else { - BlobIoTag::Internal - }; - - // Lazy load blob meta info if needed. - if chunk.is_batch() && blob_cci.is_none() { - blob_cci.set_meta(self.get_blob_meta_info()?)?; - } - - let (start, len) = blob_cci.get_compressed_info(chunk)?; - - // NOTE: Only this request region can read more chunks from backend with user io. - state.push(RegionType::Backend, start, len, tag, Some(chunk.clone()))?; - } - } - - for r in &state.regions { - use RegionType::*; - - total_read += match r.r#type { - CacheFast => self.dispatch_cache_fast(cursor, r)?, - CacheSlow => self.dispatch_cache_slow(cursor, r)?, - Backend => self.dispatch_backend(cursor, r)?, - } - } - - Ok(total_read) - } - - // Directly read data requested by user from the file cache into the user memory buffer. - fn dispatch_cache_fast(&self, cursor: &mut MemSliceCursor, region: &Region) -> Result { - let offset = region.blob_address + region.seg.offset as u64; - let size = region.seg.len as usize; - let mut iovec = cursor.consume(size); - - self.metrics.partial_hits.inc(); - readv(self.file.as_raw_fd(), &mut iovec, offset) - } - - // Try to read data from blob cache and validate it, fallback to storage backend. - fn dispatch_cache_slow(&self, cursor: &mut MemSliceCursor, region: &Region) -> Result { - let mut total_read = 0; - - for (i, c) in region.chunks.iter().enumerate() { - let user_offset = if i == 0 { region.seg.offset } else { 0 }; - let size = std::cmp::min( - c.uncompressed_size() - user_offset, - region.seg.len - total_read as u32, - ); - total_read += self.read_single_chunk(c.clone(), user_offset, size, cursor)?; - } - - Ok(total_read) - } - - fn dispatch_backend(&self, mem_cursor: &mut MemSliceCursor, r: &Region) -> Result { - let mut region = r; - debug!( - "{} try to read {} bytes of {} chunks from backend", - std::thread::current().name().unwrap_or_default(), - region.blob_len, - region.chunks.len() - ); - - if region.chunks.is_empty() { - return Ok(0); - } else if !region.has_user_io() { - debug!("No user data"); - for c in ®ion.chunks { - self.chunk_map.clear_pending(c.as_ref()); - } - return Ok(0); - } - if region.chunks.len() > 1 { - let mut blob_cci = BlobCCI::new(); - // Validate the chunk order. - for idx in 0..region.chunks.len() - 1 { - let pre_chunk = ®ion.chunks[idx]; - let next_chunk = ®ion.chunks[idx + 1]; - - // Lazy load blob meta info if needed. - if (pre_chunk.is_batch() || next_chunk.is_batch()) && blob_cci.is_none() { - blob_cci.set_meta(self.get_blob_meta_info()?)?; - } - - let (pre_offset, pre_size) = blob_cci.get_compressed_info(pre_chunk)?; - let end = pre_offset + pre_size as u64; - - let start = blob_cci.get_compressed_offset(next_chunk)?; - - assert!(end <= start); - assert!(start - end <= self.user_io_batch_size() >> RAFS_BATCH_SIZE_TO_GAP_SHIFT); - assert!(region.chunks[idx].id() < region.chunks[idx + 1].id()); - } - } - - // Try to extend requests. - let mut region_hold; - if let Some(v) = self.extend_pending_chunks(®ion.chunks, self.user_io_batch_size())? { - if v.len() > r.chunks.len() { - let mut tag_set = HashSet::new(); - for (idx, chunk) in region.chunks.iter().enumerate() { - if region.tags[idx] { - tag_set.insert(chunk.id()); - } - } - - region_hold = Region::with(self, region, v)?; - for (idx, c) in region_hold.chunks.iter().enumerate() { - if tag_set.contains(&c.id()) { - region_hold.tags[idx] = true; - } - } - region = ®ion_hold; - trace!( - "extended blob request from 0x{:x}/0x{:x} to 0x{:x}/0x{:x} with {} chunks", - r.blob_address, - r.blob_len, - region_hold.blob_address, - region_hold.blob_len, - region_hold.chunks.len(), - ); - } - } - - if self.is_zran() { - let mut r = region.clone(); - let (blob_offset, _blob_end, blob_size) = self.get_blob_range(&r.chunks)?; - r.blob_address = blob_offset; - r.blob_len = blob_size as u32; - region_hold = r; - region = ®ion_hold; - } - - let bufs = self - .read_chunks_from_backend( - region.blob_address, - region.blob_len as usize, - ®ion.chunks, - false, - ) - .map_err(|e| { - for c in ®ion.chunks { - self.chunk_map.clear_pending(c.as_ref()); - } - e - })?; - - if self.is_raw_data { - let res = - Self::persist_cached_data(&self.file, region.blob_address, bufs.compressed_buf()); - for chunk in region.chunks.iter() { - self.update_chunk_pending_status(chunk.as_ref(), res.is_ok()); - } - res?; - } - - let mut chunk_buffers = Vec::with_capacity(region.chunks.len()); - let mut buffer_holder = Vec::with_capacity(region.chunks.len()); - for (i, v) in bufs.enumerate() { - let d = Arc::new(DataBuffer::Allocated(v?)); - if region.tags[i] { - buffer_holder.push(d.clone()); - } - if !self.is_raw_data { - self.delay_persist_chunk_data(region.chunks[i].clone(), d); - } - } - for d in buffer_holder.iter() { - chunk_buffers.push(d.as_ref().slice()); - } - - let total_read = copyv( - &chunk_buffers, - mem_cursor.mem_slice, - region.seg.offset as usize, - region.seg.len as usize, - mem_cursor.index, - mem_cursor.offset, - ) - .map(|(n, _)| n) - .map_err(|e| { - error!("failed to copy from chunk buf to buf: {:?}", e); - eio!(e) - })?; - mem_cursor.move_cursor(total_read); - - Ok(total_read) - } - - // Called with chunk in READY or PENDING state, exit with chunk set to READY or PENDING cleared. - fn read_single_chunk( - &self, - chunk: Arc, - user_offset: u32, - size: u32, - mem_cursor: &mut MemSliceCursor, - ) -> Result { - trace!( - "read_single_chunk {:x}:{:x}:{:x}/@{}", - chunk.compressed_offset(), - user_offset, - size, - chunk.blob_index() - ); - - let buffer_holder; - let d_size = chunk.uncompressed_size() as usize; - let mut d = DataBuffer::Allocated(alloc_buf(d_size)); - - // Try to read and validate data from cache if: - // - it's an stargz image and the chunk is ready. - // - chunk data validation is enabled. - // - digested or dummy chunk map is used. - let is_ready = self.chunk_map.is_ready(chunk.as_ref())?; - let try_cache = is_ready || !self.is_direct_chunkmap; - let buffer = if try_cache && self.read_file_cache(chunk.as_ref(), d.mut_slice()).is_ok() { - self.metrics.whole_hits.inc(); - self.chunk_map.set_ready_and_clear_pending(chunk.as_ref())?; - trace!( - "recover blob cache {} {} offset {} size {}", - chunk.id(), - d_size, - user_offset, - size, - ); - &d - } else { - let c = self - .read_chunk_from_backend(chunk.as_ref(), d.mut_slice()) - .map_err(|e| { - self.chunk_map.clear_pending(chunk.as_ref()); - e - })?; - if self.is_raw_data { - match c { - Some(v) => { - let buf = Arc::new(DataBuffer::Allocated(v)); - self.delay_persist_chunk_data(chunk.clone(), buf); - &d - } - None => { - buffer_holder = Arc::new(d.convert_to_owned_buffer()); - self.delay_persist_chunk_data(chunk.clone(), buffer_holder.clone()); - buffer_holder.as_ref() - } - } - } else { - buffer_holder = Arc::new(d.convert_to_owned_buffer()); - self.delay_persist_chunk_data(chunk.clone(), buffer_holder.clone()); - buffer_holder.as_ref() - } - }; - - let dst_buffers = mem_cursor.inner_slice(); - let read_size = copyv( - &[buffer.slice()], - dst_buffers, - user_offset as usize, - size as usize, - mem_cursor.index, - mem_cursor.offset, - ) - .map(|r| r.0) - .map_err(|e| { - error!("failed to copy from chunk buf to buf: {:?}", e); - eother!(e) - })?; - mem_cursor.move_cursor(read_size); - - Ok(read_size) - } - - fn read_file_cache(&self, chunk: &dyn BlobChunkInfo, buffer: &mut [u8]) -> Result<()> { - if self.is_raw_data { - let offset = chunk.compressed_offset(); - let size = if self.is_legacy_stargz() { - self.get_legacy_stargz_size(offset, chunk.uncompressed_size() as usize)? as u64 - } else { - chunk.compressed_size() as u64 - }; - let mut reader = FileRangeReader::new(&self.file, offset, size); - if !chunk.is_compressed() { - reader.read_exact(buffer)?; - } else if self.blob_compressor() == compress::Algorithm::Lz4Block { - let mut buf = alloc_buf(size as usize); - reader.read_exact(&mut buf)?; - let size = compress::decompress(&buf, buffer, self.blob_compressor())?; - if size != buffer.len() { - return Err(einval!( - "data size decoded by lz4_block doesn't match expected" - )); - } - } else { - let mut decoder = Decoder::new(reader, self.blob_compressor())?; - decoder.read_exact(buffer)?; - } - } else if self.is_cache_encrypted { - let offset = chunk.uncompressed_offset(); - let size = chunk.uncompressed_size() as usize; - let cipher_object = self.cache_cipher_object.clone(); - let cipher_context = self.cache_cipher_context.clone(); - let (key, iv) = cipher_context.generate_cipher_meta(&chunk.chunk_id().data); - - let align_size = round_up_usize(size, ENCRYPTION_PAGE_SIZE); - let mut buf = alloc_buf(align_size); - FileRangeReader::new(&self.file, offset, align_size as u64).read_exact(&mut buf)?; - - let mut pos = 0; - while pos < buffer.len() { - assert!(pos + ENCRYPTION_PAGE_SIZE <= buf.len()); - match cipher_object.decrypt(key, Some(&iv), &buf[pos..pos + ENCRYPTION_PAGE_SIZE]) { - Ok(buf2) => { - let len = std::cmp::min(buffer.len() - pos, ENCRYPTION_PAGE_SIZE); - buffer[pos..pos + len].copy_from_slice(&buf2[..len]); - pos += ENCRYPTION_PAGE_SIZE; - } - Err(_) => return Err(eother!("failed to decrypt data from cache file")), - } - } - } else { - let offset = chunk.uncompressed_offset(); - let size = chunk.uncompressed_size() as u64; - FileRangeReader::new(&self.file, offset, size).read_exact(buffer)?; - } - self.validate_chunk_data(chunk, buffer, false)?; - Ok(()) - } - - fn merge_requests_for_user( - &self, - bios: &[BlobIoDesc], - max_comp_size: u64, - ) -> Option> { - let mut requests: Vec = Vec::with_capacity(bios.len()); - - BlobIoMergeState::merge_and_issue( - bios, - max_comp_size, - max_comp_size >> RAFS_BATCH_SIZE_TO_GAP_SHIFT, - |mr: BlobIoRange| { - requests.push(mr); - }, - ); - - if requests.is_empty() { - None - } else { - Some(requests) - } - } -} - -/// An enum to reuse existing buffers for IO operations, and CoW on demand. -#[allow(dead_code)] -enum DataBuffer { - Reuse(ManuallyDrop>), - Allocated(Vec), -} - -impl DataBuffer { - fn slice(&self) -> &[u8] { - match self { - Self::Reuse(data) => data.as_slice(), - Self::Allocated(data) => data.as_slice(), - } - } - - fn mut_slice(&mut self) -> &mut [u8] { - match self { - Self::Reuse(ref mut data) => data.as_mut_slice(), - Self::Allocated(ref mut data) => data.as_mut_slice(), - } - } - - fn size(&self) -> usize { - match self { - Self::Reuse(_) => 0, - Self::Allocated(data) => data.capacity(), - } - } - - /// Make sure it owns the underlying memory buffer. - fn convert_to_owned_buffer(self) -> Self { - if let DataBuffer::Reuse(data) = self { - DataBuffer::Allocated((*data).to_vec()) - } else { - self - } - } - - #[allow(dead_code)] - unsafe fn from_mut_slice(buf: &mut [u8]) -> Self { - DataBuffer::Reuse(ManuallyDrop::new(Vec::from_raw_parts( - buf.as_mut_ptr(), - buf.len(), - buf.len(), - ))) - } -} - -#[derive(Clone, Copy, Debug, PartialEq)] -enum RegionStatus { - Init, - Open, - Committed, -} - -#[derive(Clone, Copy, Debug, PartialEq)] -enum RegionType { - // Fast path to read data from the cache directly, no decompression and validation needed. - CacheFast, - // Slow path to read data from the cache, due to decompression or validation. - CacheSlow, - // Need to read data from storage backend. - Backend, -} - -impl RegionType { - fn joinable(&self, other: Self) -> bool { - *self == other - } -} - -/// A continuous region in cache file or backend storage/blob, it may contain several chunks. -#[derive(Clone)] -struct Region { - r#type: RegionType, - status: RegionStatus, - // For debug and trace purpose implying how many chunks are concatenated - count: u32, - - chunks: Vec>, - tags: Vec, - - // The range [blob_address, blob_address + blob_len) specifies data to be read from backend. - blob_address: u64, - blob_len: u32, - // The range specifying data to return to user. - seg: BlobIoSegment, -} - -impl Region { - fn new(region_type: RegionType) -> Self { - Region { - r#type: region_type, - status: RegionStatus::Init, - count: 0, - chunks: Vec::with_capacity(8), - tags: Vec::with_capacity(8), - blob_address: 0, - blob_len: 0, - seg: Default::default(), - } - } - - fn with( - ctx: &FileCacheEntry, - region: &Region, - chunks: Vec>, - ) -> Result { - assert!(!chunks.is_empty()); - let len = chunks.len(); - let first_chunk = &chunks[0]; - let last_chunk = &chunks[len - 1]; - - let mut blob_cci = BlobCCI::new(); - if first_chunk.is_batch() || last_chunk.is_batch() { - blob_cci.set_meta(ctx.get_blob_meta_info()?)?; - } - - let (blob_address, blob_len) = { - let first_offset = blob_cci.get_compressed_offset(first_chunk)?; - let (last_offset, last_size) = blob_cci.get_compressed_info(last_chunk)?; - let size_between = last_offset - first_offset; - assert!(size_between < u32::MAX as u64); - (first_offset, size_between as u32 + last_size) - }; - - Ok(Region { - r#type: region.r#type, - status: region.status, - count: len as u32, - chunks, - tags: vec![false; len], - blob_address, - blob_len, - seg: region.seg.clone(), - }) - } - - fn append( - &mut self, - start: u64, - len: u32, - tag: BlobIoTag, - chunk: Option>, - ) -> StorageResult<()> { - assert_ne!(self.status, RegionStatus::Committed); - - if self.status == RegionStatus::Init { - self.status = RegionStatus::Open; - self.blob_address = start; - self.blob_len = len; - self.count = 1; - } else { - assert_eq!(self.status, RegionStatus::Open); - let end = self.blob_address + self.blob_len as u64; - if end + RAFS_DEFAULT_CHUNK_SIZE < start || start.checked_add(len as u64).is_none() { - return Err(StorageError::NotContinuous); - } - let sz = start + len as u64 - end; - self.blob_len += sz as u32; - self.count += 1; - } - - // Maintain information for user triggered IO requests. - if let BlobIoTag::User(ref s) = tag { - if self.seg.is_empty() { - self.seg = BlobIoSegment::new(s.offset, s.len); - } else { - self.seg.append(s.offset, s.len); - } - } - - if let Some(c) = chunk { - self.chunks.push(c); - self.tags.push(tag.is_user_io()); - } - - Ok(()) - } - - fn has_user_io(&self) -> bool { - !self.seg.is_empty() - } -} - -struct FileIoMergeState { - regions: Vec, - // Whether last region can take in more io chunks. If not, a new region has to be - // created for following chunks. - last_region_joinable: bool, -} - -impl FileIoMergeState { - fn new() -> Self { - FileIoMergeState { - regions: Vec::with_capacity(8), - last_region_joinable: true, - } - } - - fn push( - &mut self, - region_type: RegionType, - start: u64, - len: u32, - tag: BlobIoTag, - chunk: Option>, - ) -> Result<()> { - // Make sure user io of same region continuous - if !self.regions.is_empty() && self.joinable(region_type) { - let region = &self.regions[self.regions.len() - 1]; - if !region.seg.is_empty() && tag.is_user_io() { - if let BlobIoTag::User(ref seg) = tag { - if seg.offset as u64 + start - != region.blob_address + region.seg.offset as u64 + region.seg.len as u64 - { - self.commit(); - } - } - } - } - - if self.regions.is_empty() || !self.joinable(region_type) { - self.regions.push(Region::new(region_type)); - self.last_region_joinable = true; - } - - let idx = self.regions.len() - 1; - self.regions[idx] - .append(start, len, tag, chunk) - .map_err(|e| einval!(e)) - } - - // Committing current region ensures a new region will be created when more - // chunks has to be added since `push` checks if newly pushed chunk is continuous - // After committing, following `push` will create a new region. - fn commit(&mut self) { - self.last_region_joinable = false; - } - - fn reset(&mut self) { - self.regions.truncate(0); - self.last_region_joinable = true; - } - - #[inline] - fn joinable(&self, region_type: RegionType) -> bool { - assert!(!self.regions.is_empty()); - let idx = self.regions.len() - 1; - - self.regions[idx].r#type.joinable(region_type) && self.last_region_joinable - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::device::{BlobChunkFlags, BlobFeatures}; - use crate::meta::*; - use crate::test::MockChunkInfo; - - #[test] - fn test_data_buffer() { - let mut buf1 = vec![0x1u8; 8]; - let buf2 = unsafe { DataBuffer::from_mut_slice(buf1.as_mut_slice()) }; - - assert_eq!(buf2.slice()[1], 0x1); - let mut buf2 = buf2.convert_to_owned_buffer(); - buf2.mut_slice()[1] = 0x2; - assert_eq!(buf1[1], 0x1); - } - - #[test] - fn test_region_type() { - assert!(RegionType::CacheFast.joinable(RegionType::CacheFast)); - assert!(RegionType::CacheSlow.joinable(RegionType::CacheSlow)); - assert!(RegionType::Backend.joinable(RegionType::Backend)); - - assert!(!RegionType::CacheFast.joinable(RegionType::CacheSlow)); - assert!(!RegionType::CacheFast.joinable(RegionType::Backend)); - assert!(!RegionType::CacheSlow.joinable(RegionType::CacheFast)); - assert!(!RegionType::CacheSlow.joinable(RegionType::Backend)); - assert!(!RegionType::Backend.joinable(RegionType::CacheFast)); - assert!(!RegionType::Backend.joinable(RegionType::CacheSlow)); - } - - #[test] - fn test_region_new() { - let region = Region::new(RegionType::CacheFast); - - assert_eq!(region.status, RegionStatus::Init); - assert!(!region.has_user_io()); - assert!(region.seg.is_empty()); - assert_eq!(region.chunks.len(), 0); - assert_eq!(region.tags.len(), 0); - assert_eq!(region.blob_address, 0); - assert_eq!(region.blob_len, 0); - } - - #[test] - fn test_region_append() { - let mut region = Region::new(RegionType::CacheFast); - - let tag = BlobIoTag::User(BlobIoSegment { - offset: 0x1800, - len: 0x1800, - }); - region.append(0x1000, 0x2000, tag, None).unwrap(); - assert_eq!(region.status, RegionStatus::Open); - assert_eq!(region.blob_address, 0x1000); - assert_eq!(region.blob_len, 0x2000); - assert_eq!(region.chunks.len(), 0); - assert_eq!(region.tags.len(), 0); - assert!(!region.seg.is_empty()); - assert!(region.has_user_io()); - - let tag = BlobIoTag::User(BlobIoSegment { - offset: 0x0000, - len: 0x2000, - }); - region.append(0x100004000, 0x2000, tag, None).unwrap_err(); - assert_eq!(region.status, RegionStatus::Open); - assert_eq!(region.blob_address, 0x1000); - assert_eq!(region.blob_len, 0x2000); - assert_eq!(region.seg.offset, 0x1800); - assert_eq!(region.seg.len, 0x1800); - assert_eq!(region.chunks.len(), 0); - assert_eq!(region.tags.len(), 0); - assert!(region.has_user_io()); - - let tag = BlobIoTag::User(BlobIoSegment { - offset: 0x0000, - len: 0x2000, - }); - region.append(0x4000, 0x2000, tag, None).unwrap(); - assert_eq!(region.status, RegionStatus::Open); - assert_eq!(region.blob_address, 0x1000); - assert_eq!(region.blob_len, 0x5000); - assert_eq!(region.seg.offset, 0x1800); - assert_eq!(region.seg.len, 0x3800); - assert_eq!(region.chunks.len(), 0); - assert_eq!(region.tags.len(), 0); - assert!(!region.seg.is_empty()); - assert!(region.has_user_io()); - } - - #[test] - fn test_file_io_merge_state() { - let mut state = FileIoMergeState::new(); - assert_eq!(state.regions.len(), 0); - - let tag = BlobIoTag::User(BlobIoSegment { - offset: 0x1800, - len: 0x800, - }); - state - .push(RegionType::CacheFast, 0x1000, 0x2000, tag, None) - .unwrap(); - assert_eq!(state.regions.len(), 1); - - let tag = BlobIoTag::User(BlobIoSegment { - offset: 0x0000, - len: 0x2000, - }); - state - .push(RegionType::CacheFast, 0x3000, 0x2000, tag, None) - .unwrap(); - assert_eq!(state.regions.len(), 1); - - let tag = BlobIoTag::User(BlobIoSegment { - offset: 0x0001, - len: 0x1fff, - }); - state - .push(RegionType::CacheSlow, 0x5000, 0x2000, tag, None) - .unwrap(); - assert_eq!(state.regions.len(), 2); - } - - #[test] - fn test_blob_cci() { - // Batch chunks: [chunk0, chunk1] - let mut chunk0 = BlobChunkInfoV2Ondisk::default(); - chunk0.set_batch(true); - chunk0.set_compressed(true); - chunk0.set_batch_index(0); - chunk0.set_uncompressed_offset_in_batch_buf(0); - chunk0.set_uncompressed_offset(0); - chunk0.set_uncompressed_size(0x2000); - - let mut chunk1 = BlobChunkInfoV2Ondisk::default(); - chunk1.set_batch(true); - chunk1.set_compressed(true); - chunk1.set_batch_index(0); - chunk1.set_uncompressed_offset_in_batch_buf(0x2000); - chunk1.set_uncompressed_offset(0x2000); - chunk1.set_uncompressed_size(0x1000); - - let mut batch_ctx0 = BatchInflateContext::default(); - batch_ctx0.set_uncompressed_batch_size(0x3000); - batch_ctx0.set_compressed_size(0x2000); - - let chunk_info_array = vec![chunk0, chunk1]; - let chunk_infos = BlobMetaChunkArray::V2(chunk_info_array); - let chunk_infos = ManuallyDrop::new(chunk_infos); - - let batch_ctx_array = vec![batch_ctx0]; - let batch_ctxes = ManuallyDrop::new(batch_ctx_array); - - let mut state = BlobCompressionContext::default(); - state.chunk_info_array = chunk_infos; - state.batch_info_array = batch_ctxes; - state.compressed_size = 0x2000; - state.uncompressed_size = 0x3000; - state.blob_features = (BlobFeatures::BATCH - | BlobFeatures::ALIGNED - | BlobFeatures::INLINED_FS_META - | BlobFeatures::CHUNK_INFO_V2) - .bits(); - - let state = Arc::new(state); - let meta = BlobCompressionContextInfo { state }; - - let mut blob_cci = BlobCCI::new(); - assert!(blob_cci.set_meta(None).is_err()); - - blob_cci.set_meta(Some(Arc::new(meta))).unwrap(); - assert!(!blob_cci.is_none()); - - let normal_chunk: Arc = Arc::new(MockChunkInfo { - compress_size: 0x100, - compress_offset: 0x1000, - ..Default::default() - }); - // For normal chunk, just read the BlobChunkInfo. - let c_offset = blob_cci.get_compressed_offset(&normal_chunk).unwrap(); - assert_eq!(c_offset, 0x1000); - - let (c_offset, c_size) = blob_cci.get_compressed_info(&normal_chunk).unwrap(); - assert_eq!(c_offset, 0x1000); - assert_eq!(c_size, 0x100); - - let c_end = blob_cci.get_compressed_end(&normal_chunk).unwrap(); - assert_eq!(c_end, 0x1100); - - let batch_chunk: Arc = Arc::new(MockChunkInfo { - index: 1, - blob_index: 0, - flags: BlobChunkFlags::BATCH, - ..Default::default() - }); - assert!(batch_chunk.is_batch()); - // For batch chunk, read from BlobCompressionContext. - let c_offset = blob_cci.get_compressed_offset(&batch_chunk).unwrap(); - assert_eq!(c_offset, 0); - - let (c_offset, c_size) = blob_cci.get_compressed_info(&batch_chunk).unwrap(); - assert_eq!(c_offset, 0); - assert_eq!(c_size, 0x2000); - - let c_end = blob_cci.get_compressed_end(&batch_chunk).unwrap(); - assert_eq!(c_end, 0x2000); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Common cached file object for `FileCacheMgr` and `FsCacheMgr`. +//! +//! The `FileCacheEntry` manages local cached blob objects from remote backends to improve +//! performance. It may be used by both the userspace `FileCacheMgr` or the `FsCacheMgr` based +//! on the in-kernel fscache system. + +use std::collections::HashSet; +use std::fs::File; +use std::io::{ErrorKind, Read, Result}; +use std::mem::ManuallyDrop; +use std::os::unix::io::{AsRawFd, RawFd}; +use std::sync::atomic::{AtomicBool, AtomicU32, Ordering}; +use std::sync::{Arc, Mutex}; +use std::time::Duration; + +use fuse_backend_rs::file_buf::FileVolatileSlice; +use nix::sys::uio; +use nydus_utils::compress::Decoder; +use nydus_utils::crypt::{self, Cipher, CipherContext}; +use nydus_utils::metrics::{BlobcacheMetrics, Metric}; +use nydus_utils::{compress, digest, round_up_usize, DelayType, Delayer, FileRangeReader}; +use tokio::runtime::Runtime; + +use crate::backend::BlobReader; +use crate::cache::state::ChunkMap; +use crate::cache::worker::{AsyncPrefetchConfig, AsyncPrefetchMessage, AsyncWorkerMgr}; +use crate::cache::{BlobCache, BlobIoMergeState}; +use crate::device::{ + BlobChunkInfo, BlobInfo, BlobIoDesc, BlobIoRange, BlobIoSegment, BlobIoTag, BlobIoVec, + BlobObject, BlobPrefetchRequest, +}; +use crate::meta::{BlobCompressionContextInfo, BlobMetaChunk}; +use crate::utils::{alloc_buf, copyv, readv, MemSliceCursor}; +use crate::{StorageError, StorageResult, RAFS_BATCH_SIZE_TO_GAP_SHIFT, RAFS_DEFAULT_CHUNK_SIZE}; + +const DOWNLOAD_META_RETRY_COUNT: u32 = 5; +const DOWNLOAD_META_RETRY_DELAY: u64 = 400; +const ENCRYPTION_PAGE_SIZE: usize = 4096; + +#[derive(Default, Clone)] +pub(crate) struct FileCacheMeta { + has_error: Arc, + meta: Arc>>>, +} + +impl FileCacheMeta { + pub(crate) fn new( + blob_file: String, + blob_info: Arc, + reader: Option>, + runtime: Option>, + sync: bool, + validation: bool, + ) -> Result { + if sync { + match BlobCompressionContextInfo::new( + &blob_file, + &blob_info, + reader.as_ref(), + validation, + ) { + Ok(m) => Ok(FileCacheMeta { + has_error: Arc::new(AtomicBool::new(false)), + meta: Arc::new(Mutex::new(Some(Arc::new(m)))), + }), + Err(e) => Err(e), + } + } else { + let meta = FileCacheMeta { + has_error: Arc::new(AtomicBool::new(false)), + meta: Arc::new(Mutex::new(None)), + }; + let meta1 = meta.clone(); + + if let Some(r) = runtime { + r.as_ref().spawn_blocking(move || { + let mut retry = 0; + let mut delayer = Delayer::new( + DelayType::BackOff, + Duration::from_millis(DOWNLOAD_META_RETRY_DELAY), + ); + while retry < DOWNLOAD_META_RETRY_COUNT { + match BlobCompressionContextInfo::new( + &blob_file, + &blob_info, + reader.as_ref(), + validation, + ) { + Ok(m) => { + *meta1.meta.lock().unwrap() = Some(Arc::new(m)); + return; + } + Err(e) => { + info!("temporarily failed to get blob.meta, {}", e); + delayer.delay(); + retry += 1; + } + } + } + warn!("failed to get blob.meta"); + meta1.has_error.store(true, Ordering::Release); + }); + } else { + warn!("Want download blob meta asynchronously but no runtime."); + } + + Ok(meta) + } + } + + pub(crate) fn get_blob_meta(&self) -> Option> { + loop { + let meta = self.meta.lock().unwrap(); + if meta.is_some() { + return meta.clone(); + } + drop(meta); + if self.has_error.load(Ordering::Acquire) { + return None; + } + std::thread::sleep(Duration::from_millis(2)); + } + } +} + +/// Helper struct to manage and call BlobCompressionContextInfo. +struct BlobCCI { + meta: Option>, +} + +impl BlobCCI { + fn new() -> Self { + BlobCCI { meta: None } + } + + fn is_none(&self) -> bool { + self.meta.is_none() + } + + fn set_meta(&mut self, meta: Option>) -> Result<&Self> { + if meta.is_none() { + return Err(einval!("failed to get blob meta info")); + } + self.meta = meta; + Ok(self) + } + + fn get_compressed_offset(&self, chunk: &Arc) -> Result { + Ok(chunk.compressed_offset()) + } + + fn get_compressed_size(&self, chunk: &Arc) -> Result { + let size = if chunk.is_batch() { + self.meta + .as_ref() + .unwrap() + .get_compressed_size(chunk.id())? + } else { + chunk.compressed_size() + }; + Ok(size) + } + + fn get_compressed_info(&self, chunk: &Arc) -> Result<(u64, u32)> { + Ok(( + self.get_compressed_offset(chunk)?, + self.get_compressed_size(chunk)?, + )) + } + + fn get_compressed_end(&self, chunk: &Arc) -> Result { + let (offset, size) = self.get_compressed_info(chunk)?; + Ok(offset + size as u64) + } +} + +pub(crate) struct FileCacheEntry { + pub(crate) blob_id: String, + pub(crate) blob_info: Arc, + pub(crate) cache_cipher_object: Arc, + pub(crate) cache_cipher_context: Arc, + pub(crate) chunk_map: Arc, + pub(crate) file: Arc, + pub(crate) meta: Option, + pub(crate) metrics: Arc, + pub(crate) prefetch_state: Arc, + pub(crate) reader: Arc, + pub(crate) runtime: Arc, + pub(crate) workers: Arc, + + pub(crate) blob_compressed_size: u64, + pub(crate) blob_uncompressed_size: u64, + // Whether `get_blob_object()` is supported. + pub(crate) is_get_blob_object_supported: bool, + // Cache raw data from backend instead of decompressed/decrypted plaintext. + pub(crate) is_raw_data: bool, + // The data in cache file is uncompressed and encrypted. + pub(crate) is_cache_encrypted: bool, + // Whether direct chunkmap is used. + pub(crate) is_direct_chunkmap: bool, + // The blob is for an stargz image. + pub(crate) is_legacy_stargz: bool, + // The blob is for an RAFS filesystem in `TARFS` mode. + pub(crate) is_tarfs: bool, + // The blob contains batch chunks. + pub(crate) is_batch: bool, + // The blob is based on ZRan decompression algorithm. + pub(crate) is_zran: bool, + // True if direct IO is enabled for the `self.file`, supported for fscache only. + pub(crate) dio_enabled: bool, + // Data from the file cache should be validated before use. + pub(crate) need_validation: bool, + // Amplified user IO request batch size to read data from remote storage backend / local cache. + pub(crate) user_io_batch_size: u32, + pub(crate) prefetch_config: Arc, +} + +impl FileCacheEntry { + pub(crate) fn get_blob_size(reader: &Arc, blob_info: &BlobInfo) -> Result { + // Stargz needs blob size information, so hacky! + let size = if blob_info.is_legacy_stargz() { + reader.blob_size().map_err(|e| einval!(e))? + } else { + blob_info.compressed_size() + }; + + Ok(size) + } + + fn delay_persist_chunk_data(&self, chunk: Arc, buffer: Arc) { + let delayed_chunk_map = self.chunk_map.clone(); + let file = self.file.clone(); + let metrics = self.metrics.clone(); + let is_raw_data = self.is_raw_data; + let is_cache_encrypted = self.is_cache_encrypted; + let cipher_object = self.cache_cipher_object.clone(); + let cipher_context = self.cache_cipher_context.clone(); + + metrics.buffered_backend_size.add(buffer.size() as u64); + self.runtime.spawn_blocking(move || { + metrics.buffered_backend_size.sub(buffer.size() as u64); + let mut t_buf; + let buf = if !is_raw_data && is_cache_encrypted { + let (key, iv) = cipher_context.generate_cipher_meta(&chunk.chunk_id().data); + let buf = buffer.slice(); + t_buf = alloc_buf(round_up_usize(buf.len(), ENCRYPTION_PAGE_SIZE)); + + let mut pos = 0; + while pos < buf.len() { + let mut s_buf; + // Padding to buffer to 4096 bytes if needed. + let buf = if pos + ENCRYPTION_PAGE_SIZE > buf.len() { + s_buf = buf[pos..].to_vec(); + s_buf.resize(ENCRYPTION_PAGE_SIZE, 0); + &s_buf + } else { + &buf[pos..pos + ENCRYPTION_PAGE_SIZE] + }; + + assert_eq!(buf.len(), ENCRYPTION_PAGE_SIZE); + match cipher_object.encrypt(key, Some(&iv), buf) { + Ok(buf2) => { + assert_eq!(buf2.len(), ENCRYPTION_PAGE_SIZE); + t_buf[pos..pos + ENCRYPTION_PAGE_SIZE].copy_from_slice(buf2.as_ref()); + pos += ENCRYPTION_PAGE_SIZE; + } + Err(_) => { + Self::_update_chunk_pending_status( + &delayed_chunk_map, + chunk.as_ref(), + false, + ); + return; + } + } + } + &t_buf + } else { + buffer.slice() + }; + + let offset = if is_raw_data { + chunk.compressed_offset() + } else { + chunk.uncompressed_offset() + }; + let res = Self::persist_cached_data(&file, offset, buf); + Self::_update_chunk_pending_status(&delayed_chunk_map, chunk.as_ref(), res.is_ok()); + }); + } + + fn persist_chunk_data(&self, chunk: &dyn BlobChunkInfo, buf: &[u8]) { + let offset = chunk.uncompressed_offset(); + let res = Self::persist_cached_data(&self.file, offset, buf); + self.update_chunk_pending_status(chunk, res.is_ok()); + } + + fn persist_cached_data(file: &Arc, offset: u64, buffer: &[u8]) -> Result<()> { + let fd = file.as_raw_fd(); + + let n = loop { + let ret = uio::pwrite(fd, buffer, offset as i64).map_err(|_| last_error!()); + match ret { + Ok(nr_write) => { + trace!("write {}(offset={}) bytes to cache file", nr_write, offset); + break nr_write; + } + Err(err) => { + // Retry if the IO is interrupted by signal. + if err.kind() != ErrorKind::Interrupted { + return Err(err); + } + } + } + }; + + if n != buffer.len() { + Err(eio!("failed to write data to file cache")) + } else { + Ok(()) + } + } + + fn update_chunk_pending_status(&self, chunk: &dyn BlobChunkInfo, success: bool) { + Self::_update_chunk_pending_status(&self.chunk_map, chunk, success) + } + + fn _update_chunk_pending_status( + chunk_map: &Arc, + chunk: &dyn BlobChunkInfo, + success: bool, + ) { + if success { + if let Err(e) = chunk_map.set_ready_and_clear_pending(chunk) { + error!( + "Failed change caching state for chunk of offset {}, {:?}", + chunk.compressed_offset(), + e + ) + } + } else { + error!( + "Failed to persist data for chunk at offset {}", + chunk.compressed_offset() + ); + chunk_map.clear_pending(chunk); + } + } + + fn prefetch_batch_size(&self) -> u64 { + if self.prefetch_config.batch_size < 0x2_0000 { + 0x2_0000 + } else { + self.prefetch_config.batch_size as u64 + } + } + + fn user_io_batch_size(&self) -> u64 { + if self.user_io_batch_size < 0x2_0000 { + 0x2_0000 + } else { + self.user_io_batch_size as u64 + } + } + + fn extend_pending_chunks( + &self, + chunks: &[Arc], + batch_size: u64, + ) -> Result>>> { + assert!(!chunks.is_empty()); + match self.get_blob_meta_info() { + Err(e) => Err(e), + Ok(None) => Ok(None), + Ok(Some(bm)) => { + let v = bm.add_more_chunks(chunks, batch_size)?; + Ok(Some(self.strip_ready_chunks(bm, Some(chunks), v))) + } + } + } + + fn strip_ready_chunks( + &self, + meta: Arc, + old_chunks: Option<&[Arc]>, + mut extended_chunks: Vec>, + ) -> Vec> { + if self.is_zran { + // Special handling for zran chunk. + // Because zran chunk has not been deduplicated at build time. + // So zran index is used to check if chunk is ready. + let mut set = HashSet::new(); + for c in extended_chunks.iter() { + if !matches!(self.chunk_map.is_ready(c.as_ref()), Ok(true)) { + let zran_idx = meta + .get_zran_index(c.id()) + .map_err(|e| error!("Failed to get zran index for chunk {}: {}", c.id(), e)) + .unwrap_or(u32::MAX); + set.insert(zran_idx); + } + } + + let first = old_chunks.as_ref().map(|v| v[0].id()).unwrap_or(u32::MAX); + let mut start = 0; + while start < extended_chunks.len() - 1 { + let id = extended_chunks[start].id(); + if id == first { + break; + } + match &meta.get_zran_index(id) { + Ok(i) => { + if set.contains(i) { + break; + } + } + Err(_e) => break, + } + start += 1; + } + + let last = old_chunks + .as_ref() + .map(|v| v[v.len() - 1].id()) + .unwrap_or(u32::MAX); + let mut end = extended_chunks.len() - 1; + while end > start { + let id = extended_chunks[end].id(); + if id == last { + break; + } + match &meta.get_zran_index(id) { + Ok(i) => { + if set.contains(i) { + break; + } + } + Err(_e) => break, + } + end -= 1; + } + + assert!(end >= start, "start 0x{:x}, end 0x{:x}", start, end); + if start == 0 && end == extended_chunks.len() - 1 { + extended_chunks + } else { + extended_chunks[start..=end].to_vec() + } + } else { + // For normal chunks and batch chunks. + // No special handling for batch chunk. + // Because batch chunk has been deduplicated at build time. + // It is enough to just check if chunk is ready. + while !extended_chunks.is_empty() { + let chunk = &extended_chunks[extended_chunks.len() - 1]; + if matches!(self.chunk_map.is_ready(chunk.as_ref()), Ok(true)) { + extended_chunks.pop(); + } else { + break; + } + } + extended_chunks + } + } + + fn get_blob_range(&self, chunks: &[Arc]) -> Result<(u64, u64, usize)> { + assert!(!chunks.is_empty()); + let (start, end) = if self.is_zran { + let meta = self + .get_blob_meta_info()? + .ok_or_else(|| einval!("failed to get blob meta object"))?; + let zran_index = meta.get_zran_index(chunks[0].id())?; + let (ctx, _) = meta.get_zran_context(zran_index)?; + let blob_start = ctx.in_offset; + let zran_index = meta.get_zran_index(chunks[chunks.len() - 1].id())?; + let (ctx, _) = meta.get_zran_context(zran_index)?; + let blob_end = ctx.in_offset + ctx.in_len as u64; + (blob_start, blob_end) + } else if self.is_batch { + let first_chunk = &chunks[0]; + let last_chunk = &chunks[chunks.len() - 1]; + + let mut blob_cci = BlobCCI::new(); + + // Get blob meta info iff the chunk is batch chunk. + if first_chunk.is_batch() || last_chunk.is_batch() { + blob_cci.set_meta(self.get_blob_meta_info()?)?; + } + + let blob_start = blob_cci.get_compressed_offset(first_chunk)?; + let blob_end = blob_cci.get_compressed_end(last_chunk)?; + + (blob_start, blob_end) + } else { + let last = chunks.len() - 1; + (chunks[0].compressed_offset(), chunks[last].compressed_end()) + }; + + let size = end - start; + if end - start > u32::MAX as u64 { + Err(einval!( + "requested blob range is too bigger, larger than u32::MAX" + )) + } else { + Ok((start, end, size as usize)) + } + } +} + +impl AsRawFd for FileCacheEntry { + fn as_raw_fd(&self) -> RawFd { + self.file.as_raw_fd() + } +} + +impl BlobCache for FileCacheEntry { + fn blob_id(&self) -> &str { + &self.blob_id + } + + fn blob_uncompressed_size(&self) -> Result { + Ok(self.blob_uncompressed_size) + } + + fn blob_compressed_size(&self) -> Result { + Ok(self.blob_compressed_size) + } + + fn blob_compressor(&self) -> compress::Algorithm { + self.blob_info.compressor() + } + + fn blob_cipher(&self) -> crypt::Algorithm { + self.blob_info.cipher() + } + + fn blob_cipher_object(&self) -> Arc { + self.blob_info.cipher_object() + } + + fn blob_cipher_context(&self) -> Option { + self.blob_info.cipher_context() + } + + fn blob_digester(&self) -> digest::Algorithm { + self.blob_info.digester() + } + + fn is_legacy_stargz(&self) -> bool { + self.is_legacy_stargz + } + + fn is_batch(&self) -> bool { + self.is_batch + } + + fn is_zran(&self) -> bool { + self.is_zran + } + + fn need_validation(&self) -> bool { + self.need_validation + } + + fn reader(&self) -> &dyn BlobReader { + &*self.reader + } + + fn get_chunk_map(&self) -> &Arc { + &self.chunk_map + } + + fn get_chunk_info(&self, chunk_index: u32) -> Option> { + self.meta + .as_ref() + .and_then(|v| v.get_blob_meta()) + .map(|v| BlobMetaChunk::new(chunk_index as usize, &v.state)) + } + + fn get_blob_object(&self) -> Option<&dyn BlobObject> { + if self.is_get_blob_object_supported { + Some(self) + } else { + None + } + } + + fn start_prefetch(&self) -> StorageResult<()> { + self.prefetch_state.fetch_add(1, Ordering::Release); + Ok(()) + } + + fn stop_prefetch(&self) -> StorageResult<()> { + loop { + let val = self.prefetch_state.load(Ordering::Acquire); + if val > 0 + && self + .prefetch_state + .compare_exchange(val, val - 1, Ordering::AcqRel, Ordering::Relaxed) + .is_err() + { + continue; + } + + if val == 0 { + warn!("storage: inaccurate prefetch status"); + } + if val == 0 || val == 1 { + self.workers.flush_pending_prefetch_requests(&self.blob_id); + return Ok(()); + } + } + } + + fn is_prefetch_active(&self) -> bool { + self.prefetch_state.load(Ordering::Acquire) > 0 + } + + fn prefetch( + &self, + blob_cache: Arc, + prefetches: &[BlobPrefetchRequest], + bios: &[BlobIoDesc], + ) -> StorageResult { + // Handle blob prefetch request first, it may help performance. + for req in prefetches { + let msg = AsyncPrefetchMessage::new_blob_prefetch( + blob_cache.clone(), + req.offset as u64, + req.len as u64, + ); + let _ = self.workers.send_prefetch_message(msg); + } + + // Then handle fs prefetch + let max_comp_size = self.prefetch_batch_size(); + let mut bios = bios.to_vec(); + bios.sort_by_key(|entry| entry.chunkinfo.compressed_offset()); + self.metrics.prefetch_unmerged_chunks.add(bios.len() as u64); + BlobIoMergeState::merge_and_issue( + &bios, + max_comp_size, + max_comp_size as u64 >> RAFS_BATCH_SIZE_TO_GAP_SHIFT, + |req: BlobIoRange| { + let msg = AsyncPrefetchMessage::new_fs_prefetch(blob_cache.clone(), req); + let _ = self.workers.send_prefetch_message(msg); + }, + ); + + Ok(0) + } + + fn prefetch_range(&self, range: &BlobIoRange) -> Result { + let mut pending = Vec::with_capacity(range.chunks.len()); + if !self.chunk_map.is_persist() { + let mut d_size = 0; + for c in range.chunks.iter() { + d_size = std::cmp::max(d_size, c.uncompressed_size() as usize); + } + let mut buf = alloc_buf(d_size); + + for c in range.chunks.iter() { + if let Ok(true) = self.chunk_map.check_ready_and_mark_pending(c.as_ref()) { + // The chunk is ready, so skip it. + continue; + } + + // For digested chunk map, we must check whether the cached data is valid because + // the digested chunk map cannot persist readiness state. + let d_size = c.uncompressed_size() as usize; + match self.read_file_cache(c.as_ref(), &mut buf[0..d_size]) { + // The cached data is valid, set the chunk as ready. + Ok(_v) => self.update_chunk_pending_status(c.as_ref(), true), + // The cached data is invalid, queue the chunk for reading from backend. + Err(_e) => pending.push(c.clone()), + } + } + } else { + for c in range.chunks.iter() { + if let Ok(true) = self.chunk_map.check_ready_and_mark_pending(c.as_ref()) { + // The chunk is ready, so skip it. + continue; + } else { + pending.push(c.clone()); + } + } + } + + let mut total_size = 0; + let mut start = 0; + while start < pending.len() { + // Figure out the range with continuous chunk ids, be careful that `end` is inclusive. + let mut end = start; + while end < pending.len() - 1 && pending[end + 1].id() == pending[end].id() + 1 { + end += 1; + } + + let (blob_offset, _blob_end, blob_size) = self.get_blob_range(&pending[start..=end])?; + match self.read_chunks_from_backend(blob_offset, blob_size, &pending[start..=end], true) + { + Ok(mut bufs) => { + total_size += blob_size; + if self.is_raw_data { + let res = Self::persist_cached_data( + &self.file, + blob_offset, + bufs.compressed_buf(), + ); + for c in pending.iter().take(end + 1).skip(start) { + self.update_chunk_pending_status(c.as_ref(), res.is_ok()); + } + } else { + for idx in start..=end { + let buf = match bufs.next() { + None => return Err(einval!("invalid chunk decompressed status")), + Some(Err(e)) => { + for chunk in &mut pending[idx..=end] { + self.update_chunk_pending_status(chunk.as_ref(), false); + } + return Err(e); + } + Some(Ok(v)) => v, + }; + self.persist_chunk_data(pending[idx].as_ref(), &buf); + } + } + } + Err(_e) => { + // Clear the pending flag for all chunks in processing. + for chunk in &mut pending[start..=end] { + self.update_chunk_pending_status(chunk.as_ref(), false); + } + } + } + + start = end + 1; + } + + Ok(total_size) + } + + fn read(&self, iovec: &mut BlobIoVec, buffers: &[FileVolatileSlice]) -> Result { + self.metrics.total.inc(); + self.workers.consume_prefetch_budget(iovec.size()); + + if iovec.is_empty() { + Ok(0) + } else if iovec.len() == 1 { + let mut state = FileIoMergeState::new(); + let mut cursor = MemSliceCursor::new(buffers); + let req = BlobIoRange::new(&iovec.bi_vec[0], 1); + self.dispatch_one_range(&req, &mut cursor, &mut state) + } else { + self.read_iter(&mut iovec.bi_vec, buffers) + } + } + + fn get_blob_meta_info(&self) -> Result>> { + if let Some(meta) = self.meta.as_ref() { + if let Some(bm) = meta.get_blob_meta() { + Ok(Some(bm)) + } else { + Err(einval!("failed to get blob meta object for cache file")) + } + } else { + Ok(None) + } + } +} + +impl BlobObject for FileCacheEntry { + fn base_offset(&self) -> u64 { + 0 + } + + fn is_all_data_ready(&self) -> bool { + // Assume data from tar file is always ready. + if self.is_tarfs { + true + } else if let Some(b) = self.chunk_map.as_range_map() { + b.is_range_all_ready() + } else { + false + } + } + + fn fetch_range_compressed(&self, offset: u64, size: u64, prefetch: bool) -> Result<()> { + // Assume data from tar file is always ready. + if self.is_tarfs { + return Ok(()); + } + + let meta = self.meta.as_ref().ok_or_else(|| enoent!())?; + let meta = meta.get_blob_meta().ok_or_else(|| einval!())?; + let mut chunks = + meta.get_chunks_compressed(offset, size, self.prefetch_batch_size(), prefetch)?; + if !chunks.is_empty() { + if let Some(meta) = self.get_blob_meta_info()? { + chunks = self.strip_ready_chunks(meta, None, chunks); + } + } else { + return Err(einval!(format!( + "fetch_range_compressed offset 0x{:x}, size 0x{:x}", + offset, size + ))); + } + if chunks.is_empty() { + Ok(()) + } else { + self.do_fetch_chunks(&chunks, true) + } + } + + fn fetch_range_uncompressed(&self, offset: u64, size: u64) -> Result<()> { + // Assume data from tar file is always ready. + if self.is_tarfs { + return Ok(()); + } + + let meta = self.meta.as_ref().ok_or_else(|| einval!())?; + let meta = meta.get_blob_meta().ok_or_else(|| einval!())?; + let mut chunks = meta.get_chunks_uncompressed(offset, size, self.user_io_batch_size())?; + if let Some(meta) = self.get_blob_meta_info()? { + chunks = self.strip_ready_chunks(meta, None, chunks); + } + if chunks.is_empty() { + Ok(()) + } else { + self.do_fetch_chunks(&chunks, false) + } + } + + fn prefetch_chunks(&self, range: &BlobIoRange) -> Result<()> { + // Assume data from tar file is always ready. + if self.is_tarfs { + return Ok(()); + } + + let chunks_extended; + let mut chunks = &range.chunks; + if let Some(v) = self.extend_pending_chunks(chunks, self.prefetch_batch_size())? { + chunks_extended = v; + chunks = &chunks_extended; + } + + let mut start = 0; + while start < chunks.len() { + // Figure out the range with continuous chunk ids, be careful that `end` is inclusive. + let mut end = start; + while end < chunks.len() - 1 && chunks[end + 1].id() == chunks[end].id() + 1 { + end += 1; + } + self.do_fetch_chunks(&chunks[start..=end], true)?; + start = end + 1; + } + + Ok(()) + } +} + +impl FileCacheEntry { + fn do_fetch_chunks(&self, chunks: &[Arc], prefetch: bool) -> Result<()> { + // Validate input parameters. + assert!(!chunks.is_empty()); + + // Get chunks not ready yet, also marking them as in-flight. + let bitmap = self + .chunk_map + .as_range_map() + .ok_or_else(|| einval!("invalid chunk_map for do_fetch_chunks()"))?; + let chunk_index = chunks[0].id(); + let count = chunks.len() as u32; + let pending = match bitmap.check_range_ready_and_mark_pending(chunk_index, count)? { + None => return Ok(()), + Some(v) => v, + }; + + let mut status = vec![false; count as usize]; + let (start_idx, end_idx) = { + let mut start = u32::MAX; + let mut end = 0; + for chunk_id in pending.iter() { + status[(*chunk_id - chunk_index) as usize] = true; + start = std::cmp::min(*chunk_id - chunk_index, start); + end = std::cmp::max(*chunk_id - chunk_index, end); + } + (start as usize, end as usize) + }; + + if start_idx <= end_idx { + let start_chunk = &chunks[start_idx]; + let end_chunk = &chunks[end_idx]; + let (blob_offset, blob_end, blob_size) = + self.get_blob_range(&chunks[start_idx..=end_idx])?; + trace!( + "fetch data range {:x}-{:x} for chunk {}-{} from blob {:x}", + blob_offset, + blob_end, + start_chunk.id(), + end_chunk.id(), + chunks[0].blob_index() + ); + + match self.read_chunks_from_backend( + blob_offset, + blob_size, + &chunks[start_idx..=end_idx], + prefetch, + ) { + Ok(mut bufs) => { + if self.is_raw_data { + let res = Self::persist_cached_data( + &self.file, + blob_offset, + bufs.compressed_buf(), + ); + for idx in start_idx..=end_idx { + if status[idx] { + self.update_chunk_pending_status(chunks[idx].as_ref(), res.is_ok()); + } + } + } else { + for idx in start_idx..=end_idx { + let mut buf = match bufs.next() { + None => return Err(einval!("invalid chunk decompressed status")), + Some(Err(e)) => { + for idx in idx..=end_idx { + if status[idx] { + bitmap.clear_range_pending(chunks[idx].id(), 1) + } + } + return Err(e); + } + Some(Ok(v)) => v, + }; + + if status[idx] { + if self.dio_enabled { + self.adjust_buffer_for_dio(&mut buf) + } + self.persist_chunk_data(chunks[idx].as_ref(), buf.as_ref()); + } + } + } + } + Err(e) => { + for idx in 0..chunks.len() { + if status[idx] { + bitmap.clear_range_pending(chunks[idx].id(), 1) + } + } + return Err(e); + } + } + } + + if !bitmap.wait_for_range_ready(chunk_index, count)? { + if prefetch { + return Err(eio!(format!( + "failed to prefetch data from storage backend for chunk {}/{}", + chunk_index, count + ))); + } + + // if we are in on-demand path, retry for the timeout chunks + for chunk in chunks { + match self.chunk_map.check_ready_and_mark_pending(chunk.as_ref()) { + Err(e) => return Err(eio!(format!("do_fetch_chunks failed, {:?}", e))), + Ok(true) => {} + Ok(false) => { + info!("retry for timeout chunk, {}", chunk.id()); + let mut buf = alloc_buf(chunk.uncompressed_size() as usize); + self.read_chunk_from_backend(chunk.as_ref(), &mut buf) + .map_err(|e| { + self.update_chunk_pending_status(chunk.as_ref(), false); + eio!(format!("read_raw_chunk failed, {:?}", e)) + })?; + if self.dio_enabled { + self.adjust_buffer_for_dio(&mut buf) + } + self.persist_chunk_data(chunk.as_ref(), &buf); + } + } + } + } + + Ok(()) + } + + fn adjust_buffer_for_dio(&self, buf: &mut Vec) { + assert_eq!(buf.capacity() % 0x1000, 0); + if buf.len() != buf.capacity() { + // Padding with 0 for direct IO. + buf.resize(buf.capacity(), 0); + } + } +} + +impl FileCacheEntry { + // There are some assumption applied to the `bios` passed to `read_iter()`. + // - The blob address of chunks in `bios` are continuous. + // - There is at most one user io request in the `bios`. + // - The user io request may not be aligned on chunk boundary. + // - The user io request may partially consume data from the first and last chunk of user io + // request. + // - Optionally there may be some prefetch/read amplify requests following the user io request. + // - The optional prefetch/read amplify requests may be silently dropped. + fn read_iter(&self, bios: &mut [BlobIoDesc], buffers: &[FileVolatileSlice]) -> Result { + // Merge requests with continuous blob addresses. + let requests = self + .merge_requests_for_user(bios, self.user_io_batch_size()) + .ok_or_else(|| { + for bio in bios.iter() { + self.update_chunk_pending_status(&bio.chunkinfo, false); + } + einval!("Empty bios list") + })?; + + let mut state = FileIoMergeState::new(); + let mut cursor = MemSliceCursor::new(buffers); + let mut total_read: usize = 0; + for (idx, req) in requests.iter().enumerate() { + total_read += self + .dispatch_one_range(req, &mut cursor, &mut state) + .map_err(|e| { + for req in requests.iter().skip(idx) { + for chunk in req.chunks.iter() { + self.update_chunk_pending_status(chunk.as_ref(), false); + } + } + e + })?; + state.reset(); + } + + Ok(total_read) + } + + fn dispatch_one_range( + &self, + req: &BlobIoRange, + cursor: &mut MemSliceCursor, + state: &mut FileIoMergeState, + ) -> Result { + let mut total_read: usize = 0; + + trace!("dispatch single io range {:?}", req); + let mut blob_cci = BlobCCI::new(); + for (i, chunk) in req.chunks.iter().enumerate() { + let is_ready = match self.chunk_map.check_ready_and_mark_pending(chunk.as_ref()) { + Ok(true) => true, + Ok(false) => false, + Err(StorageError::Timeout) => false, // Retry if waiting for inflight IO timeouts + Err(e) => return Err(einval!(e)), + }; + + // Directly read chunk data from file cache into user buffer iff: + // - the chunk is ready in the file cache + // - data in the file cache is plaintext. + // - data validation is disabled + if is_ready && !self.is_raw_data && !self.is_cache_encrypted && !self.need_validation() + { + // Internal IO should not be committed to local cache region, just + // commit this region without pushing any chunk to avoid discontinuous + // chunks in a region. + if req.tags[i].is_user_io() { + state.push( + RegionType::CacheFast, + chunk.uncompressed_offset(), + chunk.uncompressed_size(), + req.tags[i].clone(), + None, + )?; + } else { + state.commit() + } + } else if !self.is_direct_chunkmap || is_ready { + // Case to try loading data from cache + // - chunk is ready but data validation is needed. + // - direct chunk map is not used, so there may be data in the file cache but + // the readiness flag has been lost. + if req.tags[i].is_user_io() { + state.push( + RegionType::CacheSlow, + chunk.uncompressed_offset(), + chunk.uncompressed_size(), + req.tags[i].clone(), + Some(req.chunks[i].clone()), + )?; + } else { + state.commit(); + // On slow path, don't try to handle internal(read amplification) IO. + if !is_ready { + self.chunk_map.clear_pending(chunk.as_ref()); + } + } + } else { + let tag = if let BlobIoTag::User(ref s) = req.tags[i] { + BlobIoTag::User(s.clone()) + } else { + BlobIoTag::Internal + }; + + // Lazy load blob meta info if needed. + if chunk.is_batch() && blob_cci.is_none() { + blob_cci.set_meta(self.get_blob_meta_info()?)?; + } + + let (start, len) = blob_cci.get_compressed_info(chunk)?; + + // NOTE: Only this request region can read more chunks from backend with user io. + state.push(RegionType::Backend, start, len, tag, Some(chunk.clone()))?; + } + } + + for r in &state.regions { + use RegionType::*; + + total_read += match r.r#type { + CacheFast => self.dispatch_cache_fast(cursor, r)?, + CacheSlow => self.dispatch_cache_slow(cursor, r)?, + Backend => self.dispatch_backend(cursor, r)?, + } + } + + Ok(total_read) + } + + // Directly read data requested by user from the file cache into the user memory buffer. + fn dispatch_cache_fast(&self, cursor: &mut MemSliceCursor, region: &Region) -> Result { + let offset = region.blob_address + region.seg.offset as u64; + let size = region.seg.len as usize; + let mut iovec = cursor.consume(size); + + self.metrics.partial_hits.inc(); + readv(self.file.as_raw_fd(), &mut iovec, offset) + } + + // Try to read data from blob cache and validate it, fallback to storage backend. + fn dispatch_cache_slow(&self, cursor: &mut MemSliceCursor, region: &Region) -> Result { + let mut total_read = 0; + + for (i, c) in region.chunks.iter().enumerate() { + let user_offset = if i == 0 { region.seg.offset } else { 0 }; + let size = std::cmp::min( + c.uncompressed_size() - user_offset, + region.seg.len - total_read as u32, + ); + total_read += self.read_single_chunk(c.clone(), user_offset, size, cursor)?; + } + + Ok(total_read) + } + + fn dispatch_backend(&self, mem_cursor: &mut MemSliceCursor, r: &Region) -> Result { + let mut region = r; + debug!( + "{} try to read {} bytes of {} chunks from backend", + std::thread::current().name().unwrap_or_default(), + region.blob_len, + region.chunks.len() + ); + + if region.chunks.is_empty() { + return Ok(0); + } else if !region.has_user_io() { + debug!("No user data"); + for c in ®ion.chunks { + self.chunk_map.clear_pending(c.as_ref()); + } + return Ok(0); + } + if region.chunks.len() > 1 { + let mut blob_cci = BlobCCI::new(); + // Validate the chunk order. + for idx in 0..region.chunks.len() - 1 { + let pre_chunk = ®ion.chunks[idx]; + let next_chunk = ®ion.chunks[idx + 1]; + + // Lazy load blob meta info if needed. + if (pre_chunk.is_batch() || next_chunk.is_batch()) && blob_cci.is_none() { + blob_cci.set_meta(self.get_blob_meta_info()?)?; + } + + let (pre_offset, pre_size) = blob_cci.get_compressed_info(pre_chunk)?; + let end = pre_offset + pre_size as u64; + + let start = blob_cci.get_compressed_offset(next_chunk)?; + + assert!(end <= start); + assert!(start - end <= self.user_io_batch_size() >> RAFS_BATCH_SIZE_TO_GAP_SHIFT); + assert!(region.chunks[idx].id() < region.chunks[idx + 1].id()); + } + } + + // Try to extend requests. + let mut region_hold; + if let Some(v) = self.extend_pending_chunks(®ion.chunks, self.user_io_batch_size())? { + if v.len() > r.chunks.len() { + let mut tag_set = HashSet::new(); + for (idx, chunk) in region.chunks.iter().enumerate() { + if region.tags[idx] { + tag_set.insert(chunk.id()); + } + } + + region_hold = Region::with(self, region, v)?; + for (idx, c) in region_hold.chunks.iter().enumerate() { + if tag_set.contains(&c.id()) { + region_hold.tags[idx] = true; + } + } + region = ®ion_hold; + trace!( + "extended blob request from 0x{:x}/0x{:x} to 0x{:x}/0x{:x} with {} chunks", + r.blob_address, + r.blob_len, + region_hold.blob_address, + region_hold.blob_len, + region_hold.chunks.len(), + ); + } + } + + if self.is_zran() { + let mut r = region.clone(); + let (blob_offset, _blob_end, blob_size) = self.get_blob_range(&r.chunks)?; + r.blob_address = blob_offset; + r.blob_len = blob_size as u32; + region_hold = r; + region = ®ion_hold; + } + + let bufs = self + .read_chunks_from_backend( + region.blob_address, + region.blob_len as usize, + ®ion.chunks, + false, + ) + .map_err(|e| { + for c in ®ion.chunks { + self.chunk_map.clear_pending(c.as_ref()); + } + e + })?; + + if self.is_raw_data { + let res = + Self::persist_cached_data(&self.file, region.blob_address, bufs.compressed_buf()); + for chunk in region.chunks.iter() { + self.update_chunk_pending_status(chunk.as_ref(), res.is_ok()); + } + res?; + } + + let mut chunk_buffers = Vec::with_capacity(region.chunks.len()); + let mut buffer_holder = Vec::with_capacity(region.chunks.len()); + for (i, v) in bufs.enumerate() { + let d = Arc::new(DataBuffer::Allocated(v?)); + if region.tags[i] { + buffer_holder.push(d.clone()); + } + if !self.is_raw_data { + self.delay_persist_chunk_data(region.chunks[i].clone(), d); + } + } + for d in buffer_holder.iter() { + chunk_buffers.push(d.as_ref().slice()); + } + + let total_read = copyv( + &chunk_buffers, + mem_cursor.mem_slice, + region.seg.offset as usize, + region.seg.len as usize, + mem_cursor.index, + mem_cursor.offset, + ) + .map(|(n, _)| n) + .map_err(|e| { + error!("failed to copy from chunk buf to buf: {:?}", e); + eio!(e) + })?; + mem_cursor.move_cursor(total_read); + + Ok(total_read) + } + + // Called with chunk in READY or PENDING state, exit with chunk set to READY or PENDING cleared. + fn read_single_chunk( + &self, + chunk: Arc, + user_offset: u32, + size: u32, + mem_cursor: &mut MemSliceCursor, + ) -> Result { + trace!( + "read_single_chunk {:x}:{:x}:{:x}/@{}", + chunk.compressed_offset(), + user_offset, + size, + chunk.blob_index() + ); + + let buffer_holder; + let d_size = chunk.uncompressed_size() as usize; + let mut d = DataBuffer::Allocated(alloc_buf(d_size)); + + // Try to read and validate data from cache if: + // - it's an stargz image and the chunk is ready. + // - chunk data validation is enabled. + // - digested or dummy chunk map is used. + let is_ready = self.chunk_map.is_ready(chunk.as_ref())?; + let try_cache = is_ready || !self.is_direct_chunkmap; + let buffer = if try_cache && self.read_file_cache(chunk.as_ref(), d.mut_slice()).is_ok() { + self.metrics.whole_hits.inc(); + self.chunk_map.set_ready_and_clear_pending(chunk.as_ref())?; + trace!( + "recover blob cache {} {} offset {} size {}", + chunk.id(), + d_size, + user_offset, + size, + ); + &d + } else { + let c = self + .read_chunk_from_backend(chunk.as_ref(), d.mut_slice()) + .map_err(|e| { + self.chunk_map.clear_pending(chunk.as_ref()); + e + })?; + if self.is_raw_data { + match c { + Some(v) => { + let buf = Arc::new(DataBuffer::Allocated(v)); + self.delay_persist_chunk_data(chunk.clone(), buf); + &d + } + None => { + buffer_holder = Arc::new(d.convert_to_owned_buffer()); + self.delay_persist_chunk_data(chunk.clone(), buffer_holder.clone()); + buffer_holder.as_ref() + } + } + } else { + buffer_holder = Arc::new(d.convert_to_owned_buffer()); + self.delay_persist_chunk_data(chunk.clone(), buffer_holder.clone()); + buffer_holder.as_ref() + } + }; + + let dst_buffers = mem_cursor.inner_slice(); + let read_size = copyv( + &[buffer.slice()], + dst_buffers, + user_offset as usize, + size as usize, + mem_cursor.index, + mem_cursor.offset, + ) + .map(|r| r.0) + .map_err(|e| { + error!("failed to copy from chunk buf to buf: {:?}", e); + eother!(e) + })?; + mem_cursor.move_cursor(read_size); + + Ok(read_size) + } + + fn read_file_cache(&self, chunk: &dyn BlobChunkInfo, buffer: &mut [u8]) -> Result<()> { + if self.is_raw_data { + let offset = chunk.compressed_offset(); + let size = if self.is_legacy_stargz() { + self.get_legacy_stargz_size(offset, chunk.uncompressed_size() as usize)? as u64 + } else { + chunk.compressed_size() as u64 + }; + let mut reader = FileRangeReader::new(&self.file, offset, size); + if !chunk.is_compressed() { + reader.read_exact(buffer)?; + } else if self.blob_compressor() == compress::Algorithm::Lz4Block { + let mut buf = alloc_buf(size as usize); + reader.read_exact(&mut buf)?; + let size = compress::decompress(&buf, buffer, self.blob_compressor())?; + if size != buffer.len() { + return Err(einval!( + "data size decoded by lz4_block doesn't match expected" + )); + } + } else { + let mut decoder = Decoder::new(reader, self.blob_compressor())?; + decoder.read_exact(buffer)?; + } + } else if self.is_cache_encrypted { + let offset = chunk.uncompressed_offset(); + let size = chunk.uncompressed_size() as usize; + let cipher_object = self.cache_cipher_object.clone(); + let cipher_context = self.cache_cipher_context.clone(); + let (key, iv) = cipher_context.generate_cipher_meta(&chunk.chunk_id().data); + + let align_size = round_up_usize(size, ENCRYPTION_PAGE_SIZE); + let mut buf = alloc_buf(align_size); + FileRangeReader::new(&self.file, offset, align_size as u64).read_exact(&mut buf)?; + + let mut pos = 0; + while pos < buffer.len() { + assert!(pos + ENCRYPTION_PAGE_SIZE <= buf.len()); + match cipher_object.decrypt(key, Some(&iv), &buf[pos..pos + ENCRYPTION_PAGE_SIZE]) { + Ok(buf2) => { + let len = std::cmp::min(buffer.len() - pos, ENCRYPTION_PAGE_SIZE); + buffer[pos..pos + len].copy_from_slice(&buf2[..len]); + pos += ENCRYPTION_PAGE_SIZE; + } + Err(_) => return Err(eother!("failed to decrypt data from cache file")), + } + } + } else { + let offset = chunk.uncompressed_offset(); + let size = chunk.uncompressed_size() as u64; + FileRangeReader::new(&self.file, offset, size).read_exact(buffer)?; + } + self.validate_chunk_data(chunk, buffer, false)?; + Ok(()) + } + + fn merge_requests_for_user( + &self, + bios: &[BlobIoDesc], + max_comp_size: u64, + ) -> Option> { + let mut requests: Vec = Vec::with_capacity(bios.len()); + + BlobIoMergeState::merge_and_issue( + bios, + max_comp_size, + max_comp_size >> RAFS_BATCH_SIZE_TO_GAP_SHIFT, + |mr: BlobIoRange| { + requests.push(mr); + }, + ); + + if requests.is_empty() { + None + } else { + Some(requests) + } + } +} + +/// An enum to reuse existing buffers for IO operations, and CoW on demand. +#[allow(dead_code)] +enum DataBuffer { + Reuse(ManuallyDrop>), + Allocated(Vec), +} + +impl DataBuffer { + fn slice(&self) -> &[u8] { + match self { + Self::Reuse(data) => data.as_slice(), + Self::Allocated(data) => data.as_slice(), + } + } + + fn mut_slice(&mut self) -> &mut [u8] { + match self { + Self::Reuse(ref mut data) => data.as_mut_slice(), + Self::Allocated(ref mut data) => data.as_mut_slice(), + } + } + + fn size(&self) -> usize { + match self { + Self::Reuse(_) => 0, + Self::Allocated(data) => data.capacity(), + } + } + + /// Make sure it owns the underlying memory buffer. + fn convert_to_owned_buffer(self) -> Self { + if let DataBuffer::Reuse(data) = self { + DataBuffer::Allocated((*data).to_vec()) + } else { + self + } + } + + #[allow(dead_code)] + unsafe fn from_mut_slice(buf: &mut [u8]) -> Self { + DataBuffer::Reuse(ManuallyDrop::new(Vec::from_raw_parts( + buf.as_mut_ptr(), + buf.len(), + buf.len(), + ))) + } +} + +#[derive(Clone, Copy, Debug, PartialEq)] +enum RegionStatus { + Init, + Open, + Committed, +} + +#[derive(Clone, Copy, Debug, PartialEq)] +enum RegionType { + // Fast path to read data from the cache directly, no decompression and validation needed. + CacheFast, + // Slow path to read data from the cache, due to decompression or validation. + CacheSlow, + // Need to read data from storage backend. + Backend, +} + +impl RegionType { + fn joinable(&self, other: Self) -> bool { + *self == other + } +} + +/// A continuous region in cache file or backend storage/blob, it may contain several chunks. +#[derive(Clone)] +struct Region { + r#type: RegionType, + status: RegionStatus, + // For debug and trace purpose implying how many chunks are concatenated + count: u32, + + chunks: Vec>, + tags: Vec, + + // The range [blob_address, blob_address + blob_len) specifies data to be read from backend. + blob_address: u64, + blob_len: u32, + // The range specifying data to return to user. + seg: BlobIoSegment, +} + +impl Region { + fn new(region_type: RegionType) -> Self { + Region { + r#type: region_type, + status: RegionStatus::Init, + count: 0, + chunks: Vec::with_capacity(8), + tags: Vec::with_capacity(8), + blob_address: 0, + blob_len: 0, + seg: Default::default(), + } + } + + fn with( + ctx: &FileCacheEntry, + region: &Region, + chunks: Vec>, + ) -> Result { + assert!(!chunks.is_empty()); + let len = chunks.len(); + let first_chunk = &chunks[0]; + let last_chunk = &chunks[len - 1]; + + let mut blob_cci = BlobCCI::new(); + if first_chunk.is_batch() || last_chunk.is_batch() { + blob_cci.set_meta(ctx.get_blob_meta_info()?)?; + } + + let (blob_address, blob_len) = { + let first_offset = blob_cci.get_compressed_offset(first_chunk)?; + let (last_offset, last_size) = blob_cci.get_compressed_info(last_chunk)?; + let size_between = last_offset - first_offset; + assert!(size_between < u32::MAX as u64); + (first_offset, size_between as u32 + last_size) + }; + + Ok(Region { + r#type: region.r#type, + status: region.status, + count: len as u32, + chunks, + tags: vec![false; len], + blob_address, + blob_len, + seg: region.seg.clone(), + }) + } + + fn append( + &mut self, + start: u64, + len: u32, + tag: BlobIoTag, + chunk: Option>, + ) -> StorageResult<()> { + assert_ne!(self.status, RegionStatus::Committed); + + if self.status == RegionStatus::Init { + self.status = RegionStatus::Open; + self.blob_address = start; + self.blob_len = len; + self.count = 1; + } else { + assert_eq!(self.status, RegionStatus::Open); + let end = self.blob_address + self.blob_len as u64; + if end + RAFS_DEFAULT_CHUNK_SIZE < start || start.checked_add(len as u64).is_none() { + return Err(StorageError::NotContinuous); + } + let sz = start + len as u64 - end; + self.blob_len += sz as u32; + self.count += 1; + } + + // Maintain information for user triggered IO requests. + if let BlobIoTag::User(ref s) = tag { + if self.seg.is_empty() { + self.seg = BlobIoSegment::new(s.offset, s.len); + } else { + self.seg.append(s.offset, s.len); + } + } + + if let Some(c) = chunk { + self.chunks.push(c); + self.tags.push(tag.is_user_io()); + } + + Ok(()) + } + + fn has_user_io(&self) -> bool { + !self.seg.is_empty() + } +} + +struct FileIoMergeState { + regions: Vec, + // Whether last region can take in more io chunks. If not, a new region has to be + // created for following chunks. + last_region_joinable: bool, +} + +impl FileIoMergeState { + fn new() -> Self { + FileIoMergeState { + regions: Vec::with_capacity(8), + last_region_joinable: true, + } + } + + fn push( + &mut self, + region_type: RegionType, + start: u64, + len: u32, + tag: BlobIoTag, + chunk: Option>, + ) -> Result<()> { + // Make sure user io of same region continuous + if !self.regions.is_empty() && self.joinable(region_type) { + let region = &self.regions[self.regions.len() - 1]; + if !region.seg.is_empty() && tag.is_user_io() { + if let BlobIoTag::User(ref seg) = tag { + if seg.offset as u64 + start + != region.blob_address + region.seg.offset as u64 + region.seg.len as u64 + { + self.commit(); + } + } + } + } + + if self.regions.is_empty() || !self.joinable(region_type) { + self.regions.push(Region::new(region_type)); + self.last_region_joinable = true; + } + + let idx = self.regions.len() - 1; + self.regions[idx] + .append(start, len, tag, chunk) + .map_err(|e| einval!(e)) + } + + // Committing current region ensures a new region will be created when more + // chunks has to be added since `push` checks if newly pushed chunk is continuous + // After committing, following `push` will create a new region. + fn commit(&mut self) { + self.last_region_joinable = false; + } + + fn reset(&mut self) { + self.regions.truncate(0); + self.last_region_joinable = true; + } + + #[inline] + fn joinable(&self, region_type: RegionType) -> bool { + assert!(!self.regions.is_empty()); + let idx = self.regions.len() - 1; + + self.regions[idx].r#type.joinable(region_type) && self.last_region_joinable + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::device::{BlobChunkFlags, BlobFeatures}; + use crate::meta::*; + use crate::test::MockChunkInfo; + + #[test] + fn test_data_buffer() { + let mut buf1 = vec![0x1u8; 8]; + let buf2 = unsafe { DataBuffer::from_mut_slice(buf1.as_mut_slice()) }; + + assert_eq!(buf2.slice()[1], 0x1); + let mut buf2 = buf2.convert_to_owned_buffer(); + buf2.mut_slice()[1] = 0x2; + assert_eq!(buf1[1], 0x1); + } + + #[test] + fn test_region_type() { + assert!(RegionType::CacheFast.joinable(RegionType::CacheFast)); + assert!(RegionType::CacheSlow.joinable(RegionType::CacheSlow)); + assert!(RegionType::Backend.joinable(RegionType::Backend)); + + assert!(!RegionType::CacheFast.joinable(RegionType::CacheSlow)); + assert!(!RegionType::CacheFast.joinable(RegionType::Backend)); + assert!(!RegionType::CacheSlow.joinable(RegionType::CacheFast)); + assert!(!RegionType::CacheSlow.joinable(RegionType::Backend)); + assert!(!RegionType::Backend.joinable(RegionType::CacheFast)); + assert!(!RegionType::Backend.joinable(RegionType::CacheSlow)); + } + + #[test] + fn test_region_new() { + let region = Region::new(RegionType::CacheFast); + + assert_eq!(region.status, RegionStatus::Init); + assert!(!region.has_user_io()); + assert!(region.seg.is_empty()); + assert_eq!(region.chunks.len(), 0); + assert_eq!(region.tags.len(), 0); + assert_eq!(region.blob_address, 0); + assert_eq!(region.blob_len, 0); + } + + #[test] + fn test_region_append() { + let mut region = Region::new(RegionType::CacheFast); + + let tag = BlobIoTag::User(BlobIoSegment { + offset: 0x1800, + len: 0x1800, + }); + region.append(0x1000, 0x2000, tag, None).unwrap(); + assert_eq!(region.status, RegionStatus::Open); + assert_eq!(region.blob_address, 0x1000); + assert_eq!(region.blob_len, 0x2000); + assert_eq!(region.chunks.len(), 0); + assert_eq!(region.tags.len(), 0); + assert!(!region.seg.is_empty()); + assert!(region.has_user_io()); + + let tag = BlobIoTag::User(BlobIoSegment { + offset: 0x0000, + len: 0x2000, + }); + region.append(0x100004000, 0x2000, tag, None).unwrap_err(); + assert_eq!(region.status, RegionStatus::Open); + assert_eq!(region.blob_address, 0x1000); + assert_eq!(region.blob_len, 0x2000); + assert_eq!(region.seg.offset, 0x1800); + assert_eq!(region.seg.len, 0x1800); + assert_eq!(region.chunks.len(), 0); + assert_eq!(region.tags.len(), 0); + assert!(region.has_user_io()); + + let tag = BlobIoTag::User(BlobIoSegment { + offset: 0x0000, + len: 0x2000, + }); + region.append(0x4000, 0x2000, tag, None).unwrap(); + assert_eq!(region.status, RegionStatus::Open); + assert_eq!(region.blob_address, 0x1000); + assert_eq!(region.blob_len, 0x5000); + assert_eq!(region.seg.offset, 0x1800); + assert_eq!(region.seg.len, 0x3800); + assert_eq!(region.chunks.len(), 0); + assert_eq!(region.tags.len(), 0); + assert!(!region.seg.is_empty()); + assert!(region.has_user_io()); + } + + #[test] + fn test_file_io_merge_state() { + let mut state = FileIoMergeState::new(); + assert_eq!(state.regions.len(), 0); + + let tag = BlobIoTag::User(BlobIoSegment { + offset: 0x1800, + len: 0x800, + }); + state + .push(RegionType::CacheFast, 0x1000, 0x2000, tag, None) + .unwrap(); + assert_eq!(state.regions.len(), 1); + + let tag = BlobIoTag::User(BlobIoSegment { + offset: 0x0000, + len: 0x2000, + }); + state + .push(RegionType::CacheFast, 0x3000, 0x2000, tag, None) + .unwrap(); + assert_eq!(state.regions.len(), 1); + + let tag = BlobIoTag::User(BlobIoSegment { + offset: 0x0001, + len: 0x1fff, + }); + state + .push(RegionType::CacheSlow, 0x5000, 0x2000, tag, None) + .unwrap(); + assert_eq!(state.regions.len(), 2); + } + + #[test] + fn test_blob_cci() { + // Batch chunks: [chunk0, chunk1] + let mut chunk0 = BlobChunkInfoV2Ondisk::default(); + chunk0.set_batch(true); + chunk0.set_compressed(true); + chunk0.set_batch_index(0); + chunk0.set_uncompressed_offset_in_batch_buf(0); + chunk0.set_uncompressed_offset(0); + chunk0.set_uncompressed_size(0x2000); + + let mut chunk1 = BlobChunkInfoV2Ondisk::default(); + chunk1.set_batch(true); + chunk1.set_compressed(true); + chunk1.set_batch_index(0); + chunk1.set_uncompressed_offset_in_batch_buf(0x2000); + chunk1.set_uncompressed_offset(0x2000); + chunk1.set_uncompressed_size(0x1000); + + let mut batch_ctx0 = BatchInflateContext::default(); + batch_ctx0.set_uncompressed_batch_size(0x3000); + batch_ctx0.set_compressed_size(0x2000); + + let chunk_info_array = vec![chunk0, chunk1]; + let chunk_infos = BlobMetaChunkArray::V2(chunk_info_array); + let chunk_infos = ManuallyDrop::new(chunk_infos); + + let batch_ctx_array = vec![batch_ctx0]; + let batch_ctxes = ManuallyDrop::new(batch_ctx_array); + + let mut state = BlobCompressionContext::default(); + state.chunk_info_array = chunk_infos; + state.batch_info_array = batch_ctxes; + state.compressed_size = 0x2000; + state.uncompressed_size = 0x3000; + state.blob_features = (BlobFeatures::BATCH + | BlobFeatures::ALIGNED + | BlobFeatures::INLINED_FS_META + | BlobFeatures::CHUNK_INFO_V2) + .bits(); + + let state = Arc::new(state); + let meta = BlobCompressionContextInfo { state }; + + let mut blob_cci = BlobCCI::new(); + assert!(blob_cci.set_meta(None).is_err()); + + blob_cci.set_meta(Some(Arc::new(meta))).unwrap(); + assert!(!blob_cci.is_none()); + + let normal_chunk: Arc = Arc::new(MockChunkInfo { + compress_size: 0x100, + compress_offset: 0x1000, + ..Default::default() + }); + // For normal chunk, just read the BlobChunkInfo. + let c_offset = blob_cci.get_compressed_offset(&normal_chunk).unwrap(); + assert_eq!(c_offset, 0x1000); + + let (c_offset, c_size) = blob_cci.get_compressed_info(&normal_chunk).unwrap(); + assert_eq!(c_offset, 0x1000); + assert_eq!(c_size, 0x100); + + let c_end = blob_cci.get_compressed_end(&normal_chunk).unwrap(); + assert_eq!(c_end, 0x1100); + + let batch_chunk: Arc = Arc::new(MockChunkInfo { + index: 1, + blob_index: 0, + flags: BlobChunkFlags::BATCH, + ..Default::default() + }); + assert!(batch_chunk.is_batch()); + // For batch chunk, read from BlobCompressionContext. + let c_offset = blob_cci.get_compressed_offset(&batch_chunk).unwrap(); + assert_eq!(c_offset, 0); + + let (c_offset, c_size) = blob_cci.get_compressed_info(&batch_chunk).unwrap(); + assert_eq!(c_offset, 0); + assert_eq!(c_size, 0x2000); + + let c_end = blob_cci.get_compressed_end(&batch_chunk).unwrap(); + assert_eq!(c_end, 0x2000); + } +} diff --git a/storage/src/cache/dedup/db.rs b/storage/src/cache/dedup/db.rs index 6daff37c70b..c447d22203b 100644 --- a/storage/src/cache/dedup/db.rs +++ b/storage/src/cache/dedup/db.rs @@ -1,317 +1,317 @@ -// Copyright (C) 2022-2023 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -#![allow(unused)] - -use std::path::Path; - -use r2d2::{Pool, PooledConnection}; -use r2d2_sqlite::SqliteConnectionManager; -use rusqlite::{Connection, DropBehavior, OptionalExtension, Transaction}; - -use super::Result; - -pub struct CasDb { - pool: Pool, -} - -impl CasDb { - pub fn new(path: impl AsRef) -> Result { - let mut db_path = path.as_ref().to_owned(); - db_path.push("cas.db"); - Self::from_file(db_path) - } - - pub fn from_file(db_path: impl AsRef) -> Result { - let mgr = SqliteConnectionManager::file(db_path); - let pool = r2d2::Pool::new(mgr)?; - let conn = pool.get()?; - - conn.execute( - "CREATE TABLE IF NOT EXISTS Blobs ( - BlobId INTEGER PRIMARY KEY, - FilePath TEXT NOT NULL UNIQUE - )", - (), - )?; - - conn.execute( - "CREATE TABLE IF NOT EXISTS Chunks ( - ChunkId TEXT NOT NULL, - ChunkOffset INTEGER, - BlobId INTEGER, - UNIQUE(ChunkId, BlobId) ON CONFLICT IGNORE, - FOREIGN KEY(BlobId) REFERENCES Blobs(BlobId) - )", - (), - )?; - conn.execute( - "CREATE INDEX IF NOT EXISTS ChunkIndex ON Chunks(ChunkId)", - (), - )?; - - Ok(CasDb { pool }) - } - - pub fn get_blob_id_with_tx(tran: &Transaction, blob: &str) -> Result> { - let sql = "SELECT BlobId FROM Blobs WHERE FilePath = ?"; - - if let Some(id) = tran - .query_row(sql, [blob], |row| row.get::(0)) - .optional()? - { - return Ok(Some(id)); - } - - Ok(None) - } - - pub fn get_blob_id(&self, blob: &str) -> Result> { - let sql = "SELECT BlobId FROM Blobs WHERE FilePath = ?"; - - if let Some(id) = self - .get_connection()? - .query_row(sql, [blob], |row| row.get::(0)) - .optional()? - { - return Ok(Some(id)); - } - - Ok(None) - } - - pub fn get_blob_path(&self, id: u64) -> Result> { - let sql = "SELECT FilePath FROM Blobs WHERE BlobId = ?"; - - if let Some(path) = self - .get_connection()? - .query_row(sql, [id], |row| row.get::(0)) - .optional()? - { - return Ok(Some(path)); - }; - - Ok(None) - } - - pub fn get_all_blobs(&self) -> Result> { - let conn = self.get_connection()?; - let mut stmt = conn.prepare_cached("SELECT BlobId, FilePath FROM Blobs")?; - let rows = stmt.query_map([], |row| Ok((row.get::(0)?, row.get(1)?)))?; - let mut results: Vec<(u64, String)> = Vec::new(); - for row in rows { - results.push(row?); - } - Ok(results) - } - - pub fn add_blobs(&mut self, blobs: &[String]) -> Result<()> { - let sql = "INSERT OR IGNORE INTO Blobs (FilePath) VALUES (?1)"; - let mut conn = self.get_connection()?; - let tran = Self::begin_transaction(&mut conn)?; - - for blob in blobs { - if let Err(e) = tran.execute(sql, [blob]) { - return Err(e.into()); - }; - } - tran.commit()?; - - Ok(()) - } - - pub fn add_blob(&self, blob: &str) -> Result { - let sql = "INSERT OR IGNORE INTO Blobs (FilePath) VALUES (?1)"; - let conn = self.get_connection()?; - conn.execute(sql, [blob])?; - Ok(conn.last_insert_rowid() as u64) - } - - pub fn delete_blobs(&mut self, blobs: &[String]) -> Result<()> { - let delete_blobs_sql = "DELETE FROM Blobs WHERE BlobId = (?1)"; - let delete_chunks_sql = "DELETE FROM Chunks WHERE BlobId = (?1)"; - let mut conn = self.get_connection()?; - let tran = Self::begin_transaction(&mut conn)?; - - for blob in blobs { - if let Some(id) = Self::get_blob_id_with_tx(&tran, blob)? { - if let Err(e) = tran.execute(delete_chunks_sql, [id]) { - return Err(e.into()); - } - if let Err(e) = tran.execute(delete_blobs_sql, [id]) { - return Err(e.into()); - } - } - } - tran.commit()?; - - Ok(()) - } - - pub fn get_chunk_info(&self, chunk_id: &str) -> Result> { - let sql = "SELECT FilePath, ChunkOffset \ - FROM Chunks INDEXED BY ChunkIndex \ - JOIN Blobs ON Chunks.BlobId = Blobs.BlobId \ - WHERE ChunkId = ?\ - ORDER BY Blobs.BlobId LIMIT 1 OFFSET 0"; - - if let Some((new_blob_id, chunk_info)) = self - .get_connection()? - .query_row(sql, [chunk_id], |row| { - Ok((row.get(0)?, row.get::(1)?)) - }) - .optional()? - { - return Ok(Some((new_blob_id, chunk_info))); - } - - Ok(None) - } - - pub fn add_chunks(&mut self, chunks: &[(String, u64, String)]) -> Result<()> { - let sql = "INSERT OR IGNORE INTO Chunks (ChunkId, ChunkOffset, BlobId) VALUES (?1, ?2, ?3)"; - let mut conn = self.get_connection()?; - let tran = Self::begin_transaction(&mut conn)?; - - for chunk in chunks { - match Self::get_blob_id_with_tx(&tran, &chunk.2) { - Err(e) => return Err(e), - Ok(id) => { - if let Err(e) = tran.execute(sql, (&chunk.0, &chunk.1, id)) { - return Err(e.into()); - } - } - } - } - tran.commit()?; - - Ok(()) - } - - pub fn add_chunk(&self, chunk_id: &str, chunk_offset: u64, blob_id: &str) -> Result<()> { - let sql = "INSERT OR IGNORE INTO Chunks (ChunkId, ChunkOffset, BlobId) VALUES (?1, ?2, ?3)"; - let mut conn = self.get_connection()?; - let tran = Self::begin_transaction(&mut conn)?; - - match Self::get_blob_id_with_tx(&tran, blob_id) { - Err(e) => return Err(e), - Ok(id) => { - if let Err(e) = tran.execute(sql, (chunk_id, chunk_offset, id)) { - return Err(e.into()); - } - } - } - tran.commit()?; - - Ok(()) - } - - fn begin_transaction( - conn: &mut PooledConnection, - ) -> Result { - let mut tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?; - tx.set_drop_behavior(DropBehavior::Rollback); - Ok(tx) - } - - fn get_connection(&self) -> Result> { - let conn = self.pool.get()?; - conn.busy_handler(Some(|_v| true))?; - Ok(conn) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use vmm_sys_util::tempdir::TempDir; - - #[test] - fn test_cas_blob() { - let tmpdir = TempDir::new().unwrap(); - - let mut cas_mgr = CasDb::new(tmpdir.as_path()).unwrap(); - cas_mgr - .add_blobs(&["/tmp/blob1".to_string(), "/tmp/blob2".to_string()]) - .unwrap(); - - let mut mgr2 = CasDb::new(tmpdir.as_path()).unwrap(); - assert_eq!(mgr2.add_blob("/tmp/blob3").unwrap(), 3); - - drop(cas_mgr); - - assert_eq!(mgr2.get_blob_id("/tmp/blob1").unwrap(), Some(1)); - assert_eq!(mgr2.get_blob_id("/tmp/blob2").unwrap(), Some(2)); - assert_eq!(mgr2.get_blob_id("/tmp/blob3").unwrap(), Some(3)); - assert_eq!(mgr2.get_blob_id("/tmp/blob4").unwrap(), None); - - assert_eq!( - mgr2.get_blob_path(1).unwrap(), - Some("/tmp/blob1".to_string()) - ); - assert_eq!( - mgr2.get_blob_path(2).unwrap(), - Some("/tmp/blob2".to_string()) - ); - assert_eq!( - mgr2.get_blob_path(3).unwrap(), - Some("/tmp/blob3".to_string()) - ); - assert_eq!(mgr2.get_blob_path(4).unwrap(), None); - - let blobs = mgr2.get_all_blobs().unwrap(); - assert_eq!(blobs.len(), 3); - - mgr2.delete_blobs(&["/tmp/blob1".to_string(), "/tmp/blob2".to_string()]) - .unwrap(); - assert_eq!(mgr2.get_blob_path(1).unwrap(), None); - assert_eq!(mgr2.get_blob_path(2).unwrap(), None); - assert_eq!( - mgr2.get_blob_path(3).unwrap(), - Some("/tmp/blob3".to_string()) - ); - - let blobs = mgr2.get_all_blobs().unwrap(); - assert_eq!(blobs.len(), 1); - } - - #[test] - fn test_cas_chunk() { - let tmpdir = TempDir::new().unwrap(); - let mut cas_mgr = CasDb::new(tmpdir.as_path()).unwrap(); - cas_mgr - .add_blobs(&["/tmp/blob1".to_string(), "/tmp/blob2".to_string()]) - .unwrap(); - - cas_mgr - .add_chunks(&[ - ("chunk1".to_string(), 4096, "/tmp/blob1".to_string()), - ("chunk2".to_string(), 0, "/tmp/blob2".to_string()), - ]) - .unwrap(); - - let (file, offset) = cas_mgr.get_chunk_info("chunk1").unwrap().unwrap(); - assert_eq!(&file, "/tmp/blob1"); - assert_eq!(offset, 4096); - let (file, offset) = cas_mgr.get_chunk_info("chunk2").unwrap().unwrap(); - assert_eq!(&file, "/tmp/blob2"); - assert_eq!(offset, 0); - - cas_mgr.add_chunk("chunk1", 8192, "/tmp/blob2").unwrap(); - let (file, offset) = cas_mgr.get_chunk_info("chunk1").unwrap().unwrap(); - assert_eq!(&file, "/tmp/blob1"); - assert_eq!(offset, 4096); - - cas_mgr.delete_blobs(&["/tmp/blob1".to_string()]).unwrap(); - let (file, offset) = cas_mgr.get_chunk_info("chunk1").unwrap().unwrap(); - assert_eq!(&file, "/tmp/blob2"); - assert_eq!(offset, 8192); - - cas_mgr.delete_blobs(&["/tmp/blob2".to_string()]).unwrap(); - let res = cas_mgr.get_chunk_info("chunk1").unwrap(); - assert!(res.is_none()); - let res = cas_mgr.get_chunk_info("chunk2").unwrap(); - assert!(res.is_none()); - } -} +// Copyright (C) 2022-2023 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +#![allow(unused)] + +use std::path::Path; + +use r2d2::{Pool, PooledConnection}; +use r2d2_sqlite::SqliteConnectionManager; +use rusqlite::{Connection, DropBehavior, OptionalExtension, Transaction}; + +use super::Result; + +pub struct CasDb { + pool: Pool, +} + +impl CasDb { + pub fn new(path: impl AsRef) -> Result { + let mut db_path = path.as_ref().to_owned(); + db_path.push("cas.db"); + Self::from_file(db_path) + } + + pub fn from_file(db_path: impl AsRef) -> Result { + let mgr = SqliteConnectionManager::file(db_path); + let pool = r2d2::Pool::new(mgr)?; + let conn = pool.get()?; + + conn.execute( + "CREATE TABLE IF NOT EXISTS Blobs ( + BlobId INTEGER PRIMARY KEY, + FilePath TEXT NOT NULL UNIQUE + )", + (), + )?; + + conn.execute( + "CREATE TABLE IF NOT EXISTS Chunks ( + ChunkId TEXT NOT NULL, + ChunkOffset INTEGER, + BlobId INTEGER, + UNIQUE(ChunkId, BlobId) ON CONFLICT IGNORE, + FOREIGN KEY(BlobId) REFERENCES Blobs(BlobId) + )", + (), + )?; + conn.execute( + "CREATE INDEX IF NOT EXISTS ChunkIndex ON Chunks(ChunkId)", + (), + )?; + + Ok(CasDb { pool }) + } + + pub fn get_blob_id_with_tx(tran: &Transaction, blob: &str) -> Result> { + let sql = "SELECT BlobId FROM Blobs WHERE FilePath = ?"; + + if let Some(id) = tran + .query_row(sql, [blob], |row| row.get::(0)) + .optional()? + { + return Ok(Some(id)); + } + + Ok(None) + } + + pub fn get_blob_id(&self, blob: &str) -> Result> { + let sql = "SELECT BlobId FROM Blobs WHERE FilePath = ?"; + + if let Some(id) = self + .get_connection()? + .query_row(sql, [blob], |row| row.get::(0)) + .optional()? + { + return Ok(Some(id)); + } + + Ok(None) + } + + pub fn get_blob_path(&self, id: u64) -> Result> { + let sql = "SELECT FilePath FROM Blobs WHERE BlobId = ?"; + + if let Some(path) = self + .get_connection()? + .query_row(sql, [id], |row| row.get::(0)) + .optional()? + { + return Ok(Some(path)); + }; + + Ok(None) + } + + pub fn get_all_blobs(&self) -> Result> { + let conn = self.get_connection()?; + let mut stmt = conn.prepare_cached("SELECT BlobId, FilePath FROM Blobs")?; + let rows = stmt.query_map([], |row| Ok((row.get::(0)?, row.get(1)?)))?; + let mut results: Vec<(u64, String)> = Vec::new(); + for row in rows { + results.push(row?); + } + Ok(results) + } + + pub fn add_blobs(&mut self, blobs: &[String]) -> Result<()> { + let sql = "INSERT OR IGNORE INTO Blobs (FilePath) VALUES (?1)"; + let mut conn = self.get_connection()?; + let tran = Self::begin_transaction(&mut conn)?; + + for blob in blobs { + if let Err(e) = tran.execute(sql, [blob]) { + return Err(e.into()); + }; + } + tran.commit()?; + + Ok(()) + } + + pub fn add_blob(&self, blob: &str) -> Result { + let sql = "INSERT OR IGNORE INTO Blobs (FilePath) VALUES (?1)"; + let conn = self.get_connection()?; + conn.execute(sql, [blob])?; + Ok(conn.last_insert_rowid() as u64) + } + + pub fn delete_blobs(&mut self, blobs: &[String]) -> Result<()> { + let delete_blobs_sql = "DELETE FROM Blobs WHERE BlobId = (?1)"; + let delete_chunks_sql = "DELETE FROM Chunks WHERE BlobId = (?1)"; + let mut conn = self.get_connection()?; + let tran = Self::begin_transaction(&mut conn)?; + + for blob in blobs { + if let Some(id) = Self::get_blob_id_with_tx(&tran, blob)? { + if let Err(e) = tran.execute(delete_chunks_sql, [id]) { + return Err(e.into()); + } + if let Err(e) = tran.execute(delete_blobs_sql, [id]) { + return Err(e.into()); + } + } + } + tran.commit()?; + + Ok(()) + } + + pub fn get_chunk_info(&self, chunk_id: &str) -> Result> { + let sql = "SELECT FilePath, ChunkOffset \ + FROM Chunks INDEXED BY ChunkIndex \ + JOIN Blobs ON Chunks.BlobId = Blobs.BlobId \ + WHERE ChunkId = ?\ + ORDER BY Blobs.BlobId LIMIT 1 OFFSET 0"; + + if let Some((new_blob_id, chunk_info)) = self + .get_connection()? + .query_row(sql, [chunk_id], |row| { + Ok((row.get(0)?, row.get::(1)?)) + }) + .optional()? + { + return Ok(Some((new_blob_id, chunk_info))); + } + + Ok(None) + } + + pub fn add_chunks(&mut self, chunks: &[(String, u64, String)]) -> Result<()> { + let sql = "INSERT OR IGNORE INTO Chunks (ChunkId, ChunkOffset, BlobId) VALUES (?1, ?2, ?3)"; + let mut conn = self.get_connection()?; + let tran = Self::begin_transaction(&mut conn)?; + + for chunk in chunks { + match Self::get_blob_id_with_tx(&tran, &chunk.2) { + Err(e) => return Err(e), + Ok(id) => { + if let Err(e) = tran.execute(sql, (&chunk.0, &chunk.1, id)) { + return Err(e.into()); + } + } + } + } + tran.commit()?; + + Ok(()) + } + + pub fn add_chunk(&self, chunk_id: &str, chunk_offset: u64, blob_id: &str) -> Result<()> { + let sql = "INSERT OR IGNORE INTO Chunks (ChunkId, ChunkOffset, BlobId) VALUES (?1, ?2, ?3)"; + let mut conn = self.get_connection()?; + let tran = Self::begin_transaction(&mut conn)?; + + match Self::get_blob_id_with_tx(&tran, blob_id) { + Err(e) => return Err(e), + Ok(id) => { + if let Err(e) = tran.execute(sql, (chunk_id, chunk_offset, id)) { + return Err(e.into()); + } + } + } + tran.commit()?; + + Ok(()) + } + + fn begin_transaction( + conn: &mut PooledConnection, + ) -> Result { + let mut tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?; + tx.set_drop_behavior(DropBehavior::Rollback); + Ok(tx) + } + + fn get_connection(&self) -> Result> { + let conn = self.pool.get()?; + conn.busy_handler(Some(|_v| true))?; + Ok(conn) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use vmm_sys_util::tempdir::TempDir; + + #[test] + fn test_cas_blob() { + let tmpdir = TempDir::new().unwrap(); + + let mut cas_mgr = CasDb::new(tmpdir.as_path()).unwrap(); + cas_mgr + .add_blobs(&["/tmp/blob1".to_string(), "/tmp/blob2".to_string()]) + .unwrap(); + + let mut mgr2 = CasDb::new(tmpdir.as_path()).unwrap(); + assert_eq!(mgr2.add_blob("/tmp/blob3").unwrap(), 3); + + drop(cas_mgr); + + assert_eq!(mgr2.get_blob_id("/tmp/blob1").unwrap(), Some(1)); + assert_eq!(mgr2.get_blob_id("/tmp/blob2").unwrap(), Some(2)); + assert_eq!(mgr2.get_blob_id("/tmp/blob3").unwrap(), Some(3)); + assert_eq!(mgr2.get_blob_id("/tmp/blob4").unwrap(), None); + + assert_eq!( + mgr2.get_blob_path(1).unwrap(), + Some("/tmp/blob1".to_string()) + ); + assert_eq!( + mgr2.get_blob_path(2).unwrap(), + Some("/tmp/blob2".to_string()) + ); + assert_eq!( + mgr2.get_blob_path(3).unwrap(), + Some("/tmp/blob3".to_string()) + ); + assert_eq!(mgr2.get_blob_path(4).unwrap(), None); + + let blobs = mgr2.get_all_blobs().unwrap(); + assert_eq!(blobs.len(), 3); + + mgr2.delete_blobs(&["/tmp/blob1".to_string(), "/tmp/blob2".to_string()]) + .unwrap(); + assert_eq!(mgr2.get_blob_path(1).unwrap(), None); + assert_eq!(mgr2.get_blob_path(2).unwrap(), None); + assert_eq!( + mgr2.get_blob_path(3).unwrap(), + Some("/tmp/blob3".to_string()) + ); + + let blobs = mgr2.get_all_blobs().unwrap(); + assert_eq!(blobs.len(), 1); + } + + #[test] + fn test_cas_chunk() { + let tmpdir = TempDir::new().unwrap(); + let mut cas_mgr = CasDb::new(tmpdir.as_path()).unwrap(); + cas_mgr + .add_blobs(&["/tmp/blob1".to_string(), "/tmp/blob2".to_string()]) + .unwrap(); + + cas_mgr + .add_chunks(&[ + ("chunk1".to_string(), 4096, "/tmp/blob1".to_string()), + ("chunk2".to_string(), 0, "/tmp/blob2".to_string()), + ]) + .unwrap(); + + let (file, offset) = cas_mgr.get_chunk_info("chunk1").unwrap().unwrap(); + assert_eq!(&file, "/tmp/blob1"); + assert_eq!(offset, 4096); + let (file, offset) = cas_mgr.get_chunk_info("chunk2").unwrap().unwrap(); + assert_eq!(&file, "/tmp/blob2"); + assert_eq!(offset, 0); + + cas_mgr.add_chunk("chunk1", 8192, "/tmp/blob2").unwrap(); + let (file, offset) = cas_mgr.get_chunk_info("chunk1").unwrap().unwrap(); + assert_eq!(&file, "/tmp/blob1"); + assert_eq!(offset, 4096); + + cas_mgr.delete_blobs(&["/tmp/blob1".to_string()]).unwrap(); + let (file, offset) = cas_mgr.get_chunk_info("chunk1").unwrap().unwrap(); + assert_eq!(&file, "/tmp/blob2"); + assert_eq!(offset, 8192); + + cas_mgr.delete_blobs(&["/tmp/blob2".to_string()]).unwrap(); + let res = cas_mgr.get_chunk_info("chunk1").unwrap(); + assert!(res.is_none()); + let res = cas_mgr.get_chunk_info("chunk2").unwrap(); + assert!(res.is_none()); + } +} diff --git a/storage/src/cache/dedup/mod.rs b/storage/src/cache/dedup/mod.rs index f52a8fcc1de..e303474b765 100644 --- a/storage/src/cache/dedup/mod.rs +++ b/storage/src/cache/dedup/mod.rs @@ -1,49 +1,49 @@ -// Copyright (C) 2022-2023 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::fmt::{self, Display, Formatter}; -use std::io::Error; - -mod db; - -/// Error codes related to local cas. -#[derive(Debug)] -pub enum CasError { - Io(Error), - Db(rusqlite::Error), - R2D2(r2d2::Error), -} - -impl Display for CasError { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - match self { - CasError::Io(e) => write!(f, "{}", e), - CasError::Db(e) => write!(f, "{}", e), - CasError::R2D2(e) => write!(f, "{}", e), - } - } -} - -impl std::error::Error for CasError {} - -impl From for CasError { - fn from(e: rusqlite::Error) -> Self { - CasError::Db(e) - } -} - -impl From for CasError { - fn from(e: r2d2::Error) -> Self { - CasError::R2D2(e) - } -} - -impl From for CasError { - fn from(e: Error) -> Self { - CasError::Io(e) - } -} - -/// Specialized `Result` for local cas. -type Result = std::result::Result; +// Copyright (C) 2022-2023 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::fmt::{self, Display, Formatter}; +use std::io::Error; + +mod db; + +/// Error codes related to local cas. +#[derive(Debug)] +pub enum CasError { + Io(Error), + Db(rusqlite::Error), + R2D2(r2d2::Error), +} + +impl Display for CasError { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + CasError::Io(e) => write!(f, "{}", e), + CasError::Db(e) => write!(f, "{}", e), + CasError::R2D2(e) => write!(f, "{}", e), + } + } +} + +impl std::error::Error for CasError {} + +impl From for CasError { + fn from(e: rusqlite::Error) -> Self { + CasError::Db(e) + } +} + +impl From for CasError { + fn from(e: r2d2::Error) -> Self { + CasError::R2D2(e) + } +} + +impl From for CasError { + fn from(e: Error) -> Self { + CasError::Io(e) + } +} + +/// Specialized `Result` for local cas. +type Result = std::result::Result; diff --git a/storage/src/cache/dummycache.rs b/storage/src/cache/dummycache.rs index 3fc4fcf9c72..e42d0c1f56c 100644 --- a/storage/src/cache/dummycache.rs +++ b/storage/src/cache/dummycache.rs @@ -1,472 +1,472 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! A dummy implementation of the [BlobCacheMgr](trait.BlobCacheMgr.html) trait. -//! -//! The [DummyCacheMgr](struct.DummyCacheMgr.html) is a dummy implementation of the -//! [BlobCacheMgr](../trait.BlobCacheMgr.html) trait, which doesn't really cache any data. -//! Instead it just reads data from the backend, uncompressed it if needed and then pass on -//! the data to the clients. -//! -//! There are two possible usage mode of the [DummyCacheMgr]: -//! - Read compressed/uncompressed data from remote Registry/OSS backend but not cache the -//! uncompressed data on local storage. The -//! [is_chunk_cached()](../trait.BlobCache.html#tymethod.is_chunk_cached) -//! method always return false to disable data prefetching. -//! - Read uncompressed data from local disk and no need to double cache the data. -//! The [is_chunk_cached()](../trait.BlobCache.html#tymethod.is_chunk_cached) method always -//! return true to enable data prefetching. -use std::io::Result; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::Arc; - -use fuse_backend_rs::file_buf::FileVolatileSlice; -use nydus_api::CacheConfigV2; -use nydus_utils::crypt::{Algorithm, Cipher, CipherContext}; -use nydus_utils::{compress, digest}; - -use crate::backend::{BlobBackend, BlobReader}; -use crate::cache::state::{ChunkMap, NoopChunkMap}; -use crate::cache::{BlobCache, BlobCacheMgr}; -use crate::device::{ - BlobChunkInfo, BlobFeatures, BlobInfo, BlobIoDesc, BlobIoVec, BlobPrefetchRequest, -}; -use crate::utils::{alloc_buf, copyv}; -use crate::{StorageError, StorageResult}; - -struct DummyCache { - blob_id: String, - blob_info: Arc, - chunk_map: Arc, - reader: Arc, - compressor: compress::Algorithm, - digester: digest::Algorithm, - is_legacy_stargz: bool, - need_validation: bool, -} - -impl BlobCache for DummyCache { - fn blob_id(&self) -> &str { - &self.blob_id - } - - fn blob_uncompressed_size(&self) -> Result { - Ok(self.blob_info.uncompressed_size()) - } - - fn blob_compressed_size(&self) -> Result { - self.reader.blob_size().map_err(|e| eother!(e)) - } - - fn blob_compressor(&self) -> compress::Algorithm { - self.compressor - } - - fn blob_cipher(&self) -> Algorithm { - self.blob_info.cipher() - } - - fn blob_cipher_object(&self) -> Arc { - self.blob_info.cipher_object() - } - - fn blob_cipher_context(&self) -> Option { - self.blob_info.cipher_context() - } - - fn blob_digester(&self) -> digest::Algorithm { - self.digester - } - - fn is_legacy_stargz(&self) -> bool { - self.is_legacy_stargz - } - - fn need_validation(&self) -> bool { - self.need_validation - } - - fn reader(&self) -> &dyn BlobReader { - &*self.reader - } - - fn get_chunk_map(&self) -> &Arc { - &self.chunk_map - } - - fn get_chunk_info(&self, _chunk_index: u32) -> Option> { - None - } - - fn start_prefetch(&self) -> StorageResult<()> { - Ok(()) - } - - fn stop_prefetch(&self) -> StorageResult<()> { - Ok(()) - } - - fn is_prefetch_active(&self) -> bool { - false - } - - fn prefetch( - &self, - _blob_cache: Arc, - _prefetches: &[BlobPrefetchRequest], - _bios: &[BlobIoDesc], - ) -> StorageResult { - Err(StorageError::Unsupported) - } - - fn read(&self, iovec: &mut BlobIoVec, bufs: &[FileVolatileSlice]) -> Result { - let bios = &iovec.bi_vec; - - if iovec.size() == 0 || bios.is_empty() { - return Err(einval!("parameter `bios` is empty")); - } - - let bios_len = bios.len(); - let offset = bios[0].offset; - let d_size = bios[0].chunkinfo.uncompressed_size() as usize; - // Use the destination buffer to receive the uncompressed data if possible. - if bufs.len() == 1 && bios_len == 1 && offset == 0 && bufs[0].len() >= d_size { - if !bios[0].user_io { - return Ok(0); - } - let buf = unsafe { std::slice::from_raw_parts_mut(bufs[0].as_ptr(), d_size) }; - self.read_chunk_from_backend(&bios[0].chunkinfo, buf)?; - return Ok(buf.len()); - } - - let mut user_size = 0; - let mut buffer_holder: Vec> = Vec::with_capacity(bios.len()); - for bio in bios.iter() { - if bio.user_io { - let mut d = alloc_buf(bio.chunkinfo.uncompressed_size() as usize); - self.read_chunk_from_backend(&bio.chunkinfo, d.as_mut_slice())?; - buffer_holder.push(d); - // Even a merged IO can hardly reach u32::MAX. So this is safe - user_size += bio.size; - } - } - - copyv( - &buffer_holder, - bufs, - offset as usize, - user_size as usize, - 0, - 0, - ) - .map(|(n, _)| n) - .map_err(|e| eother!(e)) - } -} - -/// A dummy implementation of [BlobCacheMgr](../trait.BlobCacheMgr.html), simply reporting each -/// chunk as cached or not cached according to configuration. -/// -/// The `DummyCacheMgr` is a dummy implementation of the `BlobCacheMgr`, which doesn't really cache -/// data. Instead it just reads data from the backend, uncompressed it if needed and then pass on -/// the data to the clients. -pub struct DummyCacheMgr { - backend: Arc, - cached: bool, - need_validation: bool, - closed: AtomicBool, -} - -impl DummyCacheMgr { - /// Create a new instance of `DummyCacheMgr`. - pub fn new( - config: &CacheConfigV2, - backend: Arc, - cached: bool, - ) -> Result { - Ok(DummyCacheMgr { - backend, - cached, - need_validation: config.cache_validate, - closed: AtomicBool::new(false), - }) - } -} - -impl BlobCacheMgr for DummyCacheMgr { - fn init(&self) -> Result<()> { - Ok(()) - } - - fn destroy(&self) { - if !self.closed.load(Ordering::Acquire) { - self.closed.store(true, Ordering::Release); - self.backend().shutdown(); - } - } - - fn gc(&self, _id: Option<&str>) -> bool { - false - } - - fn backend(&self) -> &(dyn BlobBackend) { - self.backend.as_ref() - } - - fn get_blob_cache(&self, blob_info: &Arc) -> Result> { - if blob_info.has_feature(BlobFeatures::ZRAN) { - return Err(einval!( - "BlobCacheMgr doesn't support ZRan based RAFS data blobs" - )); - } - - let blob_id = blob_info.blob_id(); - let reader = self.backend.get_reader(&blob_id).map_err(|e| eother!(e))?; - - Ok(Arc::new(DummyCache { - blob_id, - blob_info: blob_info.clone(), - chunk_map: Arc::new(NoopChunkMap::new(self.cached)), - reader, - compressor: blob_info.compressor(), - digester: blob_info.digester(), - is_legacy_stargz: blob_info.is_legacy_stargz(), - need_validation: self.need_validation && !blob_info.is_legacy_stargz(), - })) - } - - fn check_stat(&self) {} -} - -impl Drop for DummyCacheMgr { - fn drop(&mut self) { - self.destroy(); - } -} - -#[cfg(test)] -mod tests { - use std::fs::OpenOptions; - - use nydus_api::ConfigV2; - use nydus_utils::metrics::BackendMetrics; - use vmm_sys_util::tempdir::TempDir; - - use crate::{ - cache::state::IndexedChunkMap, - device::{BlobIoChunk, BlobIoRange}, - meta::tests::DummyBlobReader, - test::{MockBackend, MockChunkInfo}, - }; - - use super::*; - - #[test] - fn test_dummy_cache() { - let info = BlobInfo::new( - 0, - "blob-0".to_string(), - 800, - 0, - 8, - 100, - BlobFeatures::empty(), - ); - let dir = TempDir::new().unwrap(); - let blob_path = dir - .as_path() - .join("blob-0") - .as_os_str() - .to_str() - .unwrap() - .to_string(); - let chunkmap = IndexedChunkMap::new(blob_path.as_str(), 100, true).unwrap(); - let chunkmap_unuse = IndexedChunkMap::new(blob_path.as_str(), 100, true).unwrap(); - - let f = OpenOptions::new() - .truncate(true) - .create(true) - .write(true) - .read(true) - .open(blob_path.as_str()) - .unwrap(); - assert!(f.set_len(800).is_ok()); - let reader: Arc = Arc::new(DummyBlobReader { - metrics: BackendMetrics::new("dummy", "localfs"), - file: f, - }); - let cache = DummyCache { - blob_id: "0".to_string(), - blob_info: Arc::new(info.clone()), - chunk_map: Arc::new(chunkmap), - reader: reader.clone(), - compressor: compress::Algorithm::None, - digester: digest::Algorithm::Blake3, - is_legacy_stargz: false, - need_validation: false, - }; - - let cache_unuse = DummyCache { - blob_id: "1".to_string(), - blob_info: Arc::new(info.clone()), - chunk_map: Arc::new(chunkmap_unuse), - reader, - compressor: compress::Algorithm::None, - digester: digest::Algorithm::Blake3, - is_legacy_stargz: false, - need_validation: false, - }; - - assert!(cache.get_legacy_stargz_size(0, 100).is_ok()); - assert!(!cache.is_zran()); - assert!(!cache.is_batch()); - assert!(cache.get_blob_object().is_none()); - assert!(cache.prefetch_range(&BlobIoRange::default()).is_err()); - assert_eq!(cache.blob_id, "0"); - assert_eq!(cache.blob_uncompressed_size().unwrap(), 800); - assert_eq!(cache.blob_compressed_size().unwrap(), 0); - assert_eq!(cache.blob_compressor(), compress::Algorithm::None); - assert_eq!(cache.blob_cipher(), Algorithm::None); - match cache.blob_cipher_object().as_ref() { - Cipher::None => {} - _ => panic!(), - } - assert!(cache.blob_cipher_context().is_none()); - assert_eq!(cache.blob_digester(), digest::Algorithm::Blake3); - assert!(!cache.is_legacy_stargz()); - assert!(!cache.need_validation()); - let _r = cache.reader(); - let _m = cache.get_chunk_map(); - assert!(cache.get_chunk_info(0).is_none()); - - assert!(cache.start_prefetch().is_ok()); - let reqs = BlobPrefetchRequest { - blob_id: "blob-0".to_string(), - offset: 0, - len: 10, - }; - let iovec_arr: &[BlobIoDesc] = &[]; - let reqs = &[reqs]; - - assert!(cache - .prefetch(Arc::new(cache_unuse), reqs, iovec_arr) - .is_err()); - assert!(cache.stop_prefetch().is_ok()); - let mut iovec = BlobIoVec::new(Arc::new(info.clone())); - let chunk: Arc = Arc::new(MockChunkInfo { - block_id: Default::default(), - blob_index: 0, - flags: Default::default(), - compress_size: 0, - uncompress_size: 800, - compress_offset: 0, - uncompress_offset: 0, - file_offset: 0, - index: 0, - reserved: 0, - }); - iovec.push(BlobIoDesc::new( - Arc::new(info.clone()), - BlobIoChunk::from(chunk.clone()), - 0, - 10, - true, - )); - - let mut dst_buf1 = vec![0x0u8; 800]; - let volatile_slice_1 = - unsafe { FileVolatileSlice::from_raw_ptr(dst_buf1.as_mut_ptr(), dst_buf1.len()) }; - let bufs: &[FileVolatileSlice] = &[volatile_slice_1]; - assert_eq!(cache.read(&mut iovec, bufs).unwrap(), 800); - - let chunk2: Arc = Arc::new(MockChunkInfo { - block_id: Default::default(), - blob_index: 0, - flags: Default::default(), - compress_size: 0, - uncompress_size: 100, - compress_offset: 0, - uncompress_offset: 0, - file_offset: 0, - index: 0, - reserved: 0, - }); - - let chunk3: Arc = Arc::new(MockChunkInfo { - block_id: Default::default(), - blob_index: 0, - flags: Default::default(), - compress_size: 0, - uncompress_size: 100, - compress_offset: 100, - uncompress_offset: 0, - file_offset: 0, - index: 0, - reserved: 0, - }); - - let mut iovec = BlobIoVec::new(Arc::new(info.clone())); - - iovec.push(BlobIoDesc::new( - Arc::new(info.clone()), - BlobIoChunk::from(chunk2.clone()), - 0, - 100, - true, - )); - - iovec.push(BlobIoDesc::new( - Arc::new(info), - BlobIoChunk::from(chunk3.clone()), - 100, - 100, - true, - )); - - let mut dst_buf2 = vec![0x0u8; 100]; - let mut dst_buf3 = vec![0x0u8; 100]; - let volatile_slice_2 = - unsafe { FileVolatileSlice::from_raw_ptr(dst_buf2.as_mut_ptr(), dst_buf2.len()) }; - - let volatile_slice_3 = - unsafe { FileVolatileSlice::from_raw_ptr(dst_buf3.as_mut_ptr(), dst_buf3.len()) }; - let bufs: &[FileVolatileSlice] = &[volatile_slice_2, volatile_slice_3]; - assert_eq!(cache.read(&mut iovec, bufs).unwrap(), 200); - } - - #[test] - fn test_dummy_cache_mgr() { - let content = r#"version=2 - id = "my_id" - metadata_path = "meta_path" - [backend] - type = "localfs" - [backend.localfs] - blob_file = "/tmp/nydus.blob.data" - dir = "/tmp" - alt_dirs = ["/var/nydus/cache"] - [cache] - type = "filecache" - compressed = true - validate = true - [cache.filecache] - work_dir = "/tmp" - "#; - - let cfg: ConfigV2 = toml::from_str(content).unwrap(); - let backend = MockBackend { - metrics: BackendMetrics::new("dummy", "localfs"), - }; - let mgr = - DummyCacheMgr::new(cfg.get_cache_config().unwrap(), Arc::new(backend), false).unwrap(); - assert!(mgr.init().is_ok()); - assert!(!mgr.gc(Some("blob-0"))); - let _bak = mgr.backend(); - mgr.check_stat(); - mgr.destroy(); - assert!(mgr.closed.load(Ordering::Acquire)); - drop(mgr); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! A dummy implementation of the [BlobCacheMgr](trait.BlobCacheMgr.html) trait. +//! +//! The [DummyCacheMgr](struct.DummyCacheMgr.html) is a dummy implementation of the +//! [BlobCacheMgr](../trait.BlobCacheMgr.html) trait, which doesn't really cache any data. +//! Instead it just reads data from the backend, uncompressed it if needed and then pass on +//! the data to the clients. +//! +//! There are two possible usage mode of the [DummyCacheMgr]: +//! - Read compressed/uncompressed data from remote Registry/OSS backend but not cache the +//! uncompressed data on local storage. The +//! [is_chunk_cached()](../trait.BlobCache.html#tymethod.is_chunk_cached) +//! method always return false to disable data prefetching. +//! - Read uncompressed data from local disk and no need to double cache the data. +//! The [is_chunk_cached()](../trait.BlobCache.html#tymethod.is_chunk_cached) method always +//! return true to enable data prefetching. +use std::io::Result; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; + +use fuse_backend_rs::file_buf::FileVolatileSlice; +use nydus_api::CacheConfigV2; +use nydus_utils::crypt::{Algorithm, Cipher, CipherContext}; +use nydus_utils::{compress, digest}; + +use crate::backend::{BlobBackend, BlobReader}; +use crate::cache::state::{ChunkMap, NoopChunkMap}; +use crate::cache::{BlobCache, BlobCacheMgr}; +use crate::device::{ + BlobChunkInfo, BlobFeatures, BlobInfo, BlobIoDesc, BlobIoVec, BlobPrefetchRequest, +}; +use crate::utils::{alloc_buf, copyv}; +use crate::{StorageError, StorageResult}; + +struct DummyCache { + blob_id: String, + blob_info: Arc, + chunk_map: Arc, + reader: Arc, + compressor: compress::Algorithm, + digester: digest::Algorithm, + is_legacy_stargz: bool, + need_validation: bool, +} + +impl BlobCache for DummyCache { + fn blob_id(&self) -> &str { + &self.blob_id + } + + fn blob_uncompressed_size(&self) -> Result { + Ok(self.blob_info.uncompressed_size()) + } + + fn blob_compressed_size(&self) -> Result { + self.reader.blob_size().map_err(|e| eother!(e)) + } + + fn blob_compressor(&self) -> compress::Algorithm { + self.compressor + } + + fn blob_cipher(&self) -> Algorithm { + self.blob_info.cipher() + } + + fn blob_cipher_object(&self) -> Arc { + self.blob_info.cipher_object() + } + + fn blob_cipher_context(&self) -> Option { + self.blob_info.cipher_context() + } + + fn blob_digester(&self) -> digest::Algorithm { + self.digester + } + + fn is_legacy_stargz(&self) -> bool { + self.is_legacy_stargz + } + + fn need_validation(&self) -> bool { + self.need_validation + } + + fn reader(&self) -> &dyn BlobReader { + &*self.reader + } + + fn get_chunk_map(&self) -> &Arc { + &self.chunk_map + } + + fn get_chunk_info(&self, _chunk_index: u32) -> Option> { + None + } + + fn start_prefetch(&self) -> StorageResult<()> { + Ok(()) + } + + fn stop_prefetch(&self) -> StorageResult<()> { + Ok(()) + } + + fn is_prefetch_active(&self) -> bool { + false + } + + fn prefetch( + &self, + _blob_cache: Arc, + _prefetches: &[BlobPrefetchRequest], + _bios: &[BlobIoDesc], + ) -> StorageResult { + Err(StorageError::Unsupported) + } + + fn read(&self, iovec: &mut BlobIoVec, bufs: &[FileVolatileSlice]) -> Result { + let bios = &iovec.bi_vec; + + if iovec.size() == 0 || bios.is_empty() { + return Err(einval!("parameter `bios` is empty")); + } + + let bios_len = bios.len(); + let offset = bios[0].offset; + let d_size = bios[0].chunkinfo.uncompressed_size() as usize; + // Use the destination buffer to receive the uncompressed data if possible. + if bufs.len() == 1 && bios_len == 1 && offset == 0 && bufs[0].len() >= d_size { + if !bios[0].user_io { + return Ok(0); + } + let buf = unsafe { std::slice::from_raw_parts_mut(bufs[0].as_ptr(), d_size) }; + self.read_chunk_from_backend(&bios[0].chunkinfo, buf)?; + return Ok(buf.len()); + } + + let mut user_size = 0; + let mut buffer_holder: Vec> = Vec::with_capacity(bios.len()); + for bio in bios.iter() { + if bio.user_io { + let mut d = alloc_buf(bio.chunkinfo.uncompressed_size() as usize); + self.read_chunk_from_backend(&bio.chunkinfo, d.as_mut_slice())?; + buffer_holder.push(d); + // Even a merged IO can hardly reach u32::MAX. So this is safe + user_size += bio.size; + } + } + + copyv( + &buffer_holder, + bufs, + offset as usize, + user_size as usize, + 0, + 0, + ) + .map(|(n, _)| n) + .map_err(|e| eother!(e)) + } +} + +/// A dummy implementation of [BlobCacheMgr](../trait.BlobCacheMgr.html), simply reporting each +/// chunk as cached or not cached according to configuration. +/// +/// The `DummyCacheMgr` is a dummy implementation of the `BlobCacheMgr`, which doesn't really cache +/// data. Instead it just reads data from the backend, uncompressed it if needed and then pass on +/// the data to the clients. +pub struct DummyCacheMgr { + backend: Arc, + cached: bool, + need_validation: bool, + closed: AtomicBool, +} + +impl DummyCacheMgr { + /// Create a new instance of `DummyCacheMgr`. + pub fn new( + config: &CacheConfigV2, + backend: Arc, + cached: bool, + ) -> Result { + Ok(DummyCacheMgr { + backend, + cached, + need_validation: config.cache_validate, + closed: AtomicBool::new(false), + }) + } +} + +impl BlobCacheMgr for DummyCacheMgr { + fn init(&self) -> Result<()> { + Ok(()) + } + + fn destroy(&self) { + if !self.closed.load(Ordering::Acquire) { + self.closed.store(true, Ordering::Release); + self.backend().shutdown(); + } + } + + fn gc(&self, _id: Option<&str>) -> bool { + false + } + + fn backend(&self) -> &(dyn BlobBackend) { + self.backend.as_ref() + } + + fn get_blob_cache(&self, blob_info: &Arc) -> Result> { + if blob_info.has_feature(BlobFeatures::ZRAN) { + return Err(einval!( + "BlobCacheMgr doesn't support ZRan based RAFS data blobs" + )); + } + + let blob_id = blob_info.blob_id(); + let reader = self.backend.get_reader(&blob_id).map_err(|e| eother!(e))?; + + Ok(Arc::new(DummyCache { + blob_id, + blob_info: blob_info.clone(), + chunk_map: Arc::new(NoopChunkMap::new(self.cached)), + reader, + compressor: blob_info.compressor(), + digester: blob_info.digester(), + is_legacy_stargz: blob_info.is_legacy_stargz(), + need_validation: self.need_validation && !blob_info.is_legacy_stargz(), + })) + } + + fn check_stat(&self) {} +} + +impl Drop for DummyCacheMgr { + fn drop(&mut self) { + self.destroy(); + } +} + +#[cfg(test)] +mod tests { + use std::fs::OpenOptions; + + use nydus_api::ConfigV2; + use nydus_utils::metrics::BackendMetrics; + use vmm_sys_util::tempdir::TempDir; + + use crate::{ + cache::state::IndexedChunkMap, + device::{BlobIoChunk, BlobIoRange}, + meta::tests::DummyBlobReader, + test::{MockBackend, MockChunkInfo}, + }; + + use super::*; + + #[test] + fn test_dummy_cache() { + let info = BlobInfo::new( + 0, + "blob-0".to_string(), + 800, + 0, + 8, + 100, + BlobFeatures::empty(), + ); + let dir = TempDir::new().unwrap(); + let blob_path = dir + .as_path() + .join("blob-0") + .as_os_str() + .to_str() + .unwrap() + .to_string(); + let chunkmap = IndexedChunkMap::new(blob_path.as_str(), 100, true).unwrap(); + let chunkmap_unuse = IndexedChunkMap::new(blob_path.as_str(), 100, true).unwrap(); + + let f = OpenOptions::new() + .truncate(true) + .create(true) + .write(true) + .read(true) + .open(blob_path.as_str()) + .unwrap(); + assert!(f.set_len(800).is_ok()); + let reader: Arc = Arc::new(DummyBlobReader { + metrics: BackendMetrics::new("dummy", "localfs"), + file: f, + }); + let cache = DummyCache { + blob_id: "0".to_string(), + blob_info: Arc::new(info.clone()), + chunk_map: Arc::new(chunkmap), + reader: reader.clone(), + compressor: compress::Algorithm::None, + digester: digest::Algorithm::Blake3, + is_legacy_stargz: false, + need_validation: false, + }; + + let cache_unuse = DummyCache { + blob_id: "1".to_string(), + blob_info: Arc::new(info.clone()), + chunk_map: Arc::new(chunkmap_unuse), + reader, + compressor: compress::Algorithm::None, + digester: digest::Algorithm::Blake3, + is_legacy_stargz: false, + need_validation: false, + }; + + assert!(cache.get_legacy_stargz_size(0, 100).is_ok()); + assert!(!cache.is_zran()); + assert!(!cache.is_batch()); + assert!(cache.get_blob_object().is_none()); + assert!(cache.prefetch_range(&BlobIoRange::default()).is_err()); + assert_eq!(cache.blob_id, "0"); + assert_eq!(cache.blob_uncompressed_size().unwrap(), 800); + assert_eq!(cache.blob_compressed_size().unwrap(), 0); + assert_eq!(cache.blob_compressor(), compress::Algorithm::None); + assert_eq!(cache.blob_cipher(), Algorithm::None); + match cache.blob_cipher_object().as_ref() { + Cipher::None => {} + _ => panic!(), + } + assert!(cache.blob_cipher_context().is_none()); + assert_eq!(cache.blob_digester(), digest::Algorithm::Blake3); + assert!(!cache.is_legacy_stargz()); + assert!(!cache.need_validation()); + let _r = cache.reader(); + let _m = cache.get_chunk_map(); + assert!(cache.get_chunk_info(0).is_none()); + + assert!(cache.start_prefetch().is_ok()); + let reqs = BlobPrefetchRequest { + blob_id: "blob-0".to_string(), + offset: 0, + len: 10, + }; + let iovec_arr: &[BlobIoDesc] = &[]; + let reqs = &[reqs]; + + assert!(cache + .prefetch(Arc::new(cache_unuse), reqs, iovec_arr) + .is_err()); + assert!(cache.stop_prefetch().is_ok()); + let mut iovec = BlobIoVec::new(Arc::new(info.clone())); + let chunk: Arc = Arc::new(MockChunkInfo { + block_id: Default::default(), + blob_index: 0, + flags: Default::default(), + compress_size: 0, + uncompress_size: 800, + compress_offset: 0, + uncompress_offset: 0, + file_offset: 0, + index: 0, + reserved: 0, + }); + iovec.push(BlobIoDesc::new( + Arc::new(info.clone()), + BlobIoChunk::from(chunk.clone()), + 0, + 10, + true, + )); + + let mut dst_buf1 = vec![0x0u8; 800]; + let volatile_slice_1 = + unsafe { FileVolatileSlice::from_raw_ptr(dst_buf1.as_mut_ptr(), dst_buf1.len()) }; + let bufs: &[FileVolatileSlice] = &[volatile_slice_1]; + assert_eq!(cache.read(&mut iovec, bufs).unwrap(), 800); + + let chunk2: Arc = Arc::new(MockChunkInfo { + block_id: Default::default(), + blob_index: 0, + flags: Default::default(), + compress_size: 0, + uncompress_size: 100, + compress_offset: 0, + uncompress_offset: 0, + file_offset: 0, + index: 0, + reserved: 0, + }); + + let chunk3: Arc = Arc::new(MockChunkInfo { + block_id: Default::default(), + blob_index: 0, + flags: Default::default(), + compress_size: 0, + uncompress_size: 100, + compress_offset: 100, + uncompress_offset: 0, + file_offset: 0, + index: 0, + reserved: 0, + }); + + let mut iovec = BlobIoVec::new(Arc::new(info.clone())); + + iovec.push(BlobIoDesc::new( + Arc::new(info.clone()), + BlobIoChunk::from(chunk2.clone()), + 0, + 100, + true, + )); + + iovec.push(BlobIoDesc::new( + Arc::new(info), + BlobIoChunk::from(chunk3.clone()), + 100, + 100, + true, + )); + + let mut dst_buf2 = vec![0x0u8; 100]; + let mut dst_buf3 = vec![0x0u8; 100]; + let volatile_slice_2 = + unsafe { FileVolatileSlice::from_raw_ptr(dst_buf2.as_mut_ptr(), dst_buf2.len()) }; + + let volatile_slice_3 = + unsafe { FileVolatileSlice::from_raw_ptr(dst_buf3.as_mut_ptr(), dst_buf3.len()) }; + let bufs: &[FileVolatileSlice] = &[volatile_slice_2, volatile_slice_3]; + assert_eq!(cache.read(&mut iovec, bufs).unwrap(), 200); + } + + #[test] + fn test_dummy_cache_mgr() { + let content = r#"version=2 + id = "my_id" + metadata_path = "meta_path" + [backend] + type = "localfs" + [backend.localfs] + blob_file = "/tmp/nydus.blob.data" + dir = "/tmp" + alt_dirs = ["/var/nydus/cache"] + [cache] + type = "filecache" + compressed = true + validate = true + [cache.filecache] + work_dir = "/tmp" + "#; + + let cfg: ConfigV2 = toml::from_str(content).unwrap(); + let backend = MockBackend { + metrics: BackendMetrics::new("dummy", "localfs"), + }; + let mgr = + DummyCacheMgr::new(cfg.get_cache_config().unwrap(), Arc::new(backend), false).unwrap(); + assert!(mgr.init().is_ok()); + assert!(!mgr.gc(Some("blob-0"))); + let _bak = mgr.backend(); + mgr.check_stat(); + mgr.destroy(); + assert!(mgr.closed.load(Ordering::Acquire)); + drop(mgr); + } +} diff --git a/storage/src/cache/filecache/mod.rs b/storage/src/cache/filecache/mod.rs index e6b8c5b80da..c50381c7890 100644 --- a/storage/src/cache/filecache/mod.rs +++ b/storage/src/cache/filecache/mod.rs @@ -1,841 +1,841 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::collections::HashMap; -use std::fs::OpenOptions; -use std::io::Result; -use std::sync::atomic::{AtomicBool, AtomicU32, Ordering}; -use std::sync::{Arc, RwLock}; - -use tokio::runtime::Runtime; - -use nydus_api::CacheConfigV2; -use nydus_utils::crypt; -use nydus_utils::metrics::BlobcacheMetrics; - -use crate::backend::BlobBackend; -use crate::cache::cachedfile::{FileCacheEntry, FileCacheMeta}; -use crate::cache::state::{ - BlobStateMap, ChunkMap, DigestedChunkMap, IndexedChunkMap, NoopChunkMap, -}; -use crate::cache::worker::{AsyncPrefetchConfig, AsyncWorkerMgr}; -use crate::cache::{BlobCache, BlobCacheMgr}; -use crate::device::{BlobFeatures, BlobInfo}; - -pub const BLOB_RAW_FILE_SUFFIX: &str = ".blob.raw"; -pub const BLOB_DATA_FILE_SUFFIX: &str = ".blob.data"; - -/// An implementation of [BlobCacheMgr](../trait.BlobCacheMgr.html) to improve performance by -/// caching uncompressed blob with local storage. -#[derive(Clone)] -pub struct FileCacheMgr { - blobs: Arc>>>, - backend: Arc, - metrics: Arc, - prefetch_config: Arc, - runtime: Arc, - worker_mgr: Arc, - work_dir: String, - validate: bool, - disable_indexed_map: bool, - cache_raw_data: bool, - cache_encrypted: bool, - cache_convergent_encryption: bool, - cache_encryption_key: String, - closed: Arc, - user_io_batch_size: u32, -} - -impl FileCacheMgr { - /// Create a new instance of `FileCacheMgr`. - pub fn new( - config: &CacheConfigV2, - backend: Arc, - runtime: Arc, - id: &str, - user_io_batch_size: u32, - ) -> Result { - let blob_cfg = config.get_filecache_config()?; - let work_dir = blob_cfg.get_work_dir()?; - let metrics = BlobcacheMetrics::new(id, work_dir); - let prefetch_config: Arc = Arc::new((&config.prefetch).into()); - let worker_mgr = AsyncWorkerMgr::new(metrics.clone(), prefetch_config.clone())?; - - Ok(FileCacheMgr { - blobs: Arc::new(RwLock::new(HashMap::new())), - backend, - metrics, - prefetch_config, - runtime, - worker_mgr: Arc::new(worker_mgr), - work_dir: work_dir.to_owned(), - disable_indexed_map: blob_cfg.disable_indexed_map, - validate: config.cache_validate, - cache_raw_data: config.cache_compressed, - cache_encrypted: blob_cfg.enable_encryption, - cache_convergent_encryption: blob_cfg.enable_convergent_encryption, - cache_encryption_key: blob_cfg.encryption_key.clone(), - closed: Arc::new(AtomicBool::new(false)), - user_io_batch_size, - }) - } - - // Get the file cache entry for the specified blob object. - fn get(&self, blob: &Arc) -> Option> { - self.blobs.read().unwrap().get(&blob.blob_id()).cloned() - } - - // Create a file cache entry for the specified blob object if not present, otherwise - // return the existing one. - fn get_or_create_cache_entry(&self, blob: &Arc) -> Result> { - if let Some(entry) = self.get(blob) { - return Ok(entry); - } - - let entry = FileCacheEntry::new_file_cache( - self, - blob.clone(), - self.prefetch_config.clone(), - self.runtime.clone(), - self.worker_mgr.clone(), - )?; - let entry = Arc::new(entry); - let mut guard = self.blobs.write().unwrap(); - if let Some(entry) = guard.get(&blob.blob_id()) { - Ok(entry.clone()) - } else { - let blob_id = blob.blob_id(); - guard.insert(blob_id.clone(), entry.clone()); - self.metrics - .underlying_files - .lock() - .unwrap() - .insert(blob_id + BLOB_DATA_FILE_SUFFIX); - Ok(entry) - } - } -} - -impl BlobCacheMgr for FileCacheMgr { - fn init(&self) -> Result<()> { - AsyncWorkerMgr::start(self.worker_mgr.clone()) - } - - fn destroy(&self) { - if !self.closed.load(Ordering::Acquire) { - self.closed.store(true, Ordering::Release); - self.worker_mgr.stop(); - self.backend().shutdown(); - self.metrics.release().unwrap_or_else(|e| error!("{:?}", e)); - } - } - - fn gc(&self, id: Option<&str>) -> bool { - let mut reclaim = Vec::new(); - - if let Some(blob_id) = id { - reclaim.push(blob_id.to_string()); - } else { - let guard = self.blobs.write().unwrap(); - for (id, entry) in guard.iter() { - if Arc::strong_count(entry) == 1 { - reclaim.push(id.to_owned()); - } - } - } - - for key in reclaim.iter() { - let mut guard = self.blobs.write().unwrap(); - if let Some(entry) = guard.get(key) { - if Arc::strong_count(entry) == 1 { - guard.remove(key); - } - } - } - - self.blobs.read().unwrap().len() == 0 - } - - fn backend(&self) -> &(dyn BlobBackend) { - self.backend.as_ref() - } - - fn get_blob_cache(&self, blob_info: &Arc) -> Result> { - self.get_or_create_cache_entry(blob_info) - .map(|v| v as Arc) - } - - fn check_stat(&self) {} -} - -impl Drop for FileCacheMgr { - fn drop(&mut self) { - self.destroy(); - } -} - -impl FileCacheEntry { - fn new_file_cache( - mgr: &FileCacheMgr, - blob_info: Arc, - prefetch_config: Arc, - runtime: Arc, - workers: Arc, - ) -> Result { - let is_separate_meta = blob_info.has_feature(BlobFeatures::SEPARATE); - let is_tarfs = blob_info.features().is_tarfs(); - let is_batch = blob_info.has_feature(BlobFeatures::BATCH); - let is_zran = blob_info.has_feature(BlobFeatures::ZRAN); - let blob_id = blob_info.blob_id(); - let blob_meta_id = if is_separate_meta { - blob_info.get_blob_meta_id()? - } else { - blob_id.clone() - }; - let reader = mgr - .backend - .get_reader(&blob_id) - .map_err(|e| eio!(format!("failed to get reader for blob {}, {}", blob_id, e)))?; - let blob_meta_reader = if is_separate_meta { - mgr.backend.get_reader(&blob_meta_id).map_err(|e| { - eio!(format!( - "failed to get reader for blob.meta {}, {}", - blob_id, e - )) - })? - } else { - reader.clone() - }; - - let blob_compressed_size = Self::get_blob_size(&reader, &blob_info)?; - let blob_uncompressed_size = blob_info.uncompressed_size(); - let is_legacy_stargz = blob_info.is_legacy_stargz(); - - let ( - file, - meta, - chunk_map, - is_direct_chunkmap, - is_get_blob_object_supported, - need_validation, - ) = if is_tarfs { - let blob_file_path = format!("{}/{}", mgr.work_dir, blob_id); - let file = OpenOptions::new() - .create(false) - .write(false) - .read(true) - .open(blob_file_path)?; - let chunk_map = - Arc::new(BlobStateMap::from(NoopChunkMap::new(true))) as Arc; - (file, None, chunk_map, true, true, false) - } else { - let blob_file_path = format!("{}/{}", mgr.work_dir, blob_id); - let (chunk_map, is_direct_chunkmap) = - Self::create_chunk_map(mgr, &blob_info, &blob_file_path)?; - // Validation is supported by RAFS v5 (which has no meta_ci) or v6 with chunk digest array. - let validation_supported = !blob_info.meta_ci_is_valid() - || blob_info.has_feature(BlobFeatures::INLINED_CHUNK_DIGEST); - let need_validation = ((mgr.validate && validation_supported) || !is_direct_chunkmap) - && !is_legacy_stargz; - // Set cache file to its expected size. - let suffix = if mgr.cache_raw_data { - BLOB_RAW_FILE_SUFFIX - } else { - BLOB_DATA_FILE_SUFFIX - }; - let blob_data_file_path = blob_file_path.clone() + suffix; - let file = OpenOptions::new() - .create(true) - .write(true) - .read(true) - .open(blob_data_file_path)?; - let file_size = file.metadata()?.len(); - let cached_file_size = if mgr.cache_raw_data { - blob_info.compressed_data_size() - } else { - blob_info.uncompressed_size() - }; - if file_size == 0 || file_size < cached_file_size { - file.set_len(cached_file_size)?; - } else if cached_file_size != 0 && file_size != cached_file_size { - let msg = format!( - "blob data file size doesn't match: got 0x{:x}, expect 0x{:x}", - file_size, cached_file_size - ); - return Err(einval!(msg)); - } - let meta = if blob_info.meta_ci_is_valid() - || blob_info.has_feature(BlobFeatures::IS_CHUNKDICT_GENERATED) - { - let meta = FileCacheMeta::new( - blob_file_path, - blob_info.clone(), - Some(blob_meta_reader), - Some(runtime.clone()), - false, - need_validation, - )?; - Some(meta) - } else { - None - }; - let is_get_blob_object_supported = meta.is_some() && is_direct_chunkmap; - ( - file, - meta, - chunk_map, - is_direct_chunkmap, - is_get_blob_object_supported, - need_validation, - ) - }; - - let (cache_cipher_object, cache_cipher_context) = if mgr.cache_encrypted { - let key = hex::decode(mgr.cache_encryption_key.clone()) - .map_err(|_e| einval!("invalid cache file encryption key"))?; - let cipher = crypt::Algorithm::Aes128Xts.new_cipher()?; - let ctx = crypt::CipherContext::new( - key, - [0u8; 16].to_vec(), - mgr.cache_convergent_encryption, - crypt::Algorithm::Aes128Xts, - )?; - (Arc::new(cipher), Arc::new(ctx)) - } else { - (Default::default(), Default::default()) - }; - - trace!( - "filecache entry: is_raw_data {}, direct {}, legacy_stargz {}, separate_meta {}, tarfs {}, batch {}, zran {}", - mgr.cache_raw_data, - is_direct_chunkmap, - is_legacy_stargz, - is_separate_meta, - is_tarfs, - is_batch, - is_zran, - ); - Ok(FileCacheEntry { - blob_id, - blob_info, - cache_cipher_object, - cache_cipher_context, - chunk_map, - file: Arc::new(file), - meta, - metrics: mgr.metrics.clone(), - prefetch_state: Arc::new(AtomicU32::new(0)), - reader, - runtime, - workers, - - blob_compressed_size, - blob_uncompressed_size, - is_get_blob_object_supported, - is_raw_data: mgr.cache_raw_data, - is_cache_encrypted: mgr.cache_encrypted, - is_direct_chunkmap, - is_legacy_stargz, - is_tarfs, - is_batch, - is_zran, - dio_enabled: false, - need_validation, - user_io_batch_size: mgr.user_io_batch_size, - prefetch_config, - }) - } - - fn create_chunk_map( - mgr: &FileCacheMgr, - blob_info: &BlobInfo, - blob_file: &str, - ) -> Result<(Arc, bool)> { - // The builder now records the number of chunks in the blob table, so we can - // use IndexedChunkMap as a chunk map, but for the old Nydus bootstrap, we - // need downgrade to use DigestedChunkMap as a compatible solution. - let is_v5 = !blob_info.meta_ci_is_valid(); - let mut direct_chunkmap = true; - let chunk_map: Arc = if (is_v5 && mgr.disable_indexed_map) - || blob_info.has_feature(BlobFeatures::_V5_NO_EXT_BLOB_TABLE) - { - direct_chunkmap = false; - Arc::new(BlobStateMap::from(DigestedChunkMap::new())) - } else { - Arc::new(BlobStateMap::from(IndexedChunkMap::new( - &format!("{}{}", blob_file, BLOB_DATA_FILE_SUFFIX), - blob_info.chunk_count(), - true, - )?)) - }; - - Ok((chunk_map, direct_chunkmap)) - } -} - -#[cfg(test)] -pub mod blob_cache_tests { - use nydus_api::FileCacheConfig; - use vmm_sys_util::tempdir::TempDir; - use vmm_sys_util::tempfile::TempFile; - - #[test] - fn test_blob_cache_config() { - // new blob cache - let tmp_dir = TempDir::new().unwrap(); - let dir = tmp_dir.as_path().to_path_buf(); - let s = format!( - r###" - {{ - "work_dir": {:?} - }} - "###, - dir - ); - - let mut blob_config: FileCacheConfig = serde_json::from_str(&s).unwrap(); - assert!(!blob_config.disable_indexed_map); - assert_eq!(blob_config.work_dir, dir.to_str().unwrap()); - - let tmp_file = TempFile::new().unwrap(); - let file = tmp_file.as_path().to_path_buf(); - blob_config.work_dir = file.to_str().unwrap().to_owned(); - assert!(blob_config.get_work_dir().is_err()); - } - - /* - #[test] - fn test_add() { - // new blob cache - let tmp_dir = TempDir::new().unwrap(); - let s = format!( - r###" - {{ - "work_dir": {:?} - }} - "###, - tmp_dir.as_path().to_path_buf().join("cache"), - ); - - let cache_config = CacheConfig { - cache_validate: true, - cache_compressed: false, - cache_type: String::from("blobcache"), - cache_config: serde_json::from_str(&s).unwrap(), - prefetch_config: BlobPrefetchConfig::default(), - }; - let blob_cache = filecache::new( - cache_config, - Arc::new(MockBackend { - metrics: BackendMetrics::new("id", "mock"), - }) as Arc, - compress::Algorithm::Lz4Block, - digest::Algorithm::Blake3, - "id", - ) - .unwrap(); - - // generate backend data - let mut expect = vec![1u8; 100]; - let blob_id = "blobcache"; - blob_cache - .backend - .read(blob_id, expect.as_mut(), 0) - .unwrap(); - - // generate chunk and bio - let mut chunk = MockChunkInfo::new(); - chunk.block_id = RafsDigest::from_buf(&expect, digest::Algorithm::Blake3); - chunk.file_offset = 0; - chunk.compress_offset = 0; - chunk.compress_size = 100; - chunk.decompress_offset = 0; - chunk.decompress_size = 100; - let bio = BlobIoDesc::new( - Arc::new(chunk), - Arc::new(BlobInfo { - chunk_count: 0, - readahead_offset: 0, - readahead_size: 0, - blob_id: blob_id.to_string(), - blob_index: 0, - blob_decompressed_size: 0, - blob_compressed_size: 0, - }), - 50, - 50, - RAFS_DEFAULT_BLOCK_SIZE as u32, - true, - ); - - // read from cache - let r1 = unsafe { - let layout = Layout::from_size_align(50, 1).unwrap(); - let ptr = alloc_zeroed(layout); - let vs = VolatileSlice::new(ptr, 50); - blob_cache.read(&mut [bio.clone()], &[vs]).unwrap(); - Vec::from(from_raw_parts(ptr, 50)) - }; - - let r2 = unsafe { - let layout = Layout::from_size_align(50, 1).unwrap(); - let ptr = alloc_zeroed(layout); - let vs = VolatileSlice::new(ptr, 50); - blob_cache.read(&mut [bio], &[vs]).unwrap(); - Vec::from(from_raw_parts(ptr, 50)) - }; - - assert_eq!(r1, &expect[50..]); - assert_eq!(r2, &expect[50..]); - } - - #[test] - fn test_merge_bio() { - let tmp_dir = TempDir::new().unwrap(); - let s = format!( - r###" - {{ - "work_dir": {:?} - }} - "###, - tmp_dir.as_path().to_path_buf().join("cache"), - ); - - let cache_config = CacheConfig { - cache_validate: true, - cache_compressed: false, - cache_type: String::from("blobcache"), - cache_config: serde_json::from_str(&s).unwrap(), - prefetch_worker: BlobPrefetchConfig::default(), - }; - - let blob_cache = filecache::new( - cache_config, - Arc::new(MockBackend { - metrics: BackendMetrics::new("id", "mock"), - }) as Arc, - compress::Algorithm::Lz4Block, - digest::Algorithm::Blake3, - "id", - ) - .unwrap(); - - let merging_size: u64 = 128 * 1024 * 1024; - - let single_chunk = MockChunkInfo { - compress_offset: 1000, - compress_size: merging_size as u32 - 1, - ..Default::default() - }; - - let bio = BlobIoDesc::new( - Arc::new(single_chunk.clone()), - Arc::new(BlobInfo { - chunk_count: 0, - readahead_offset: 0, - readahead_size: 0, - blob_id: "1".to_string(), - blob_index: 0, - blob_decompressed_size: 0, - blob_compressed_size: 0, - }), - 50, - 50, - RAFS_DEFAULT_BLOCK_SIZE as u32, - true, - ); - - let (mut send, recv) = spmc::channel::(); - let mut bios = vec![bio]; - - blob_cache.generate_merged_requests_for_prefetch( - &mut bios, - &mut send, - merging_size as usize, - ); - let mr = recv.recv().unwrap(); - - assert_eq!(mr.blob_offset, single_chunk.compress_offset()); - assert_eq!(mr.blob_size, single_chunk.compress_size()); - - // --- - let chunk1 = MockChunkInfo { - compress_offset: 1000, - compress_size: merging_size as u32 - 2000, - ..Default::default() - }; - - let bio1 = BlobIoDesc::new( - Arc::new(chunk1.clone()), - Arc::new(BlobInfo { - chunk_count: 0, - readahead_offset: 0, - readahead_size: 0, - blob_id: "1".to_string(), - blob_index: 0, - blob_decompressed_size: 0, - blob_compressed_size: 0, - }), - 50, - 50, - RAFS_DEFAULT_BLOCK_SIZE as u32, - true, - ); - - let chunk2 = MockChunkInfo { - compress_offset: 1000 + merging_size - 2000, - compress_size: 200, - ..Default::default() - }; - - let bio2 = BlobIoDesc::new( - Arc::new(chunk2.clone()), - Arc::new(BlobInfo { - chunk_count: 0, - readahead_offset: 0, - readahead_size: 0, - blob_id: "1".to_string(), - blob_index: 0, - blob_decompressed_size: 0, - blob_compressed_size: 0, - }), - 50, - 50, - RAFS_DEFAULT_BLOCK_SIZE as u32, - true, - ); - - let mut bios = vec![bio1, bio2]; - let (mut send, recv) = spmc::channel::(); - blob_cache.generate_merged_requests_for_prefetch( - &mut bios, - &mut send, - merging_size as usize, - ); - let mr = recv.recv().unwrap(); - - assert_eq!(mr.blob_offset, chunk1.compress_offset()); - assert_eq!( - mr.blob_size, - chunk1.compress_size() + chunk2.compress_size() - ); - - // --- - let chunk1 = MockChunkInfo { - compress_offset: 1000, - compress_size: merging_size as u32 - 2000, - ..Default::default() - }; - - let bio1 = BlobIoDesc::new( - Arc::new(chunk1.clone()), - Arc::new(BlobInfo { - chunk_count: 0, - readahead_offset: 0, - readahead_size: 0, - blob_id: "1".to_string(), - blob_index: 0, - blob_decompressed_size: 0, - blob_compressed_size: 0, - }), - 50, - 50, - RAFS_DEFAULT_BLOCK_SIZE as u32, - true, - ); - - let chunk2 = MockChunkInfo { - compress_offset: 1000 + merging_size - 2000 + 1, - compress_size: 200, - ..Default::default() - }; - - let bio2 = BlobIoDesc::new( - Arc::new(chunk2.clone()), - Arc::new(BlobInfo { - chunk_count: 0, - readahead_offset: 0, - readahead_size: 0, - blob_id: "1".to_string(), - blob_index: 0, - blob_decompressed_size: 0, - blob_compressed_size: 0, - }), - 50, - 50, - RAFS_DEFAULT_BLOCK_SIZE as u32, - true, - ); - - let mut bios = vec![bio1, bio2]; - let (mut send, recv) = spmc::channel::(); - blob_cache.generate_merged_requests_for_prefetch( - &mut bios, - &mut send, - merging_size as usize, - ); - - let mr = recv.recv().unwrap(); - assert_eq!(mr.blob_offset, chunk1.compress_offset()); - assert_eq!(mr.blob_size, chunk1.compress_size()); - - let mr = recv.recv().unwrap(); - assert_eq!(mr.blob_offset, chunk2.compress_offset()); - assert_eq!(mr.blob_size, chunk2.compress_size()); - - // --- - let chunk1 = MockChunkInfo { - compress_offset: 1000, - compress_size: merging_size as u32 - 2000, - ..Default::default() - }; - - let bio1 = BlobIoDesc::new( - Arc::new(chunk1.clone()), - Arc::new(BlobInfo { - chunk_count: 0, - readahead_offset: 0, - readahead_size: 0, - blob_id: "1".to_string(), - blob_index: 0, - blob_decompressed_size: 0, - blob_compressed_size: 0, - }), - 50, - 50, - RAFS_DEFAULT_BLOCK_SIZE as u32, - true, - ); - - let chunk2 = MockChunkInfo { - compress_offset: 1000 + merging_size - 2000, - compress_size: 200, - ..Default::default() - }; - - let bio2 = BlobIoDesc::new( - Arc::new(chunk2.clone()), - Arc::new(BlobInfo { - chunk_count: 0, - readahead_offset: 0, - readahead_size: 0, - blob_id: "2".to_string(), - blob_index: 0, - blob_decompressed_size: 0, - blob_compressed_size: 0, - }), - 50, - 50, - RAFS_DEFAULT_BLOCK_SIZE as u32, - true, - ); - - let mut bios = vec![bio1, bio2]; - let (mut send, recv) = spmc::channel::(); - blob_cache.generate_merged_requests_for_prefetch( - &mut bios, - &mut send, - merging_size as usize, - ); - - let mr = recv.recv().unwrap(); - assert_eq!(mr.blob_offset, chunk1.compress_offset()); - assert_eq!(mr.blob_size, chunk1.compress_size()); - - let mr = recv.recv().unwrap(); - assert_eq!(mr.blob_offset, chunk2.compress_offset()); - assert_eq!(mr.blob_size, chunk2.compress_size()); - - // --- - let chunk1 = MockChunkInfo { - compress_offset: 1000, - compress_size: merging_size as u32 - 2000, - ..Default::default() - }; - - let bio1 = BlobIoDesc::new( - Arc::new(chunk1.clone()), - Arc::new(BlobInfo { - chunk_count: 0, - readahead_offset: 0, - readahead_size: 0, - blob_id: "1".to_string(), - blob_index: 0, - blob_decompressed_size: 0, - blob_compressed_size: 0, - }), - 50, - 50, - RAFS_DEFAULT_BLOCK_SIZE as u32, - true, - ); - - let chunk2 = MockChunkInfo { - compress_offset: 1000 + merging_size - 2000, - compress_size: 200, - ..Default::default() - }; - - let bio2 = BlobIoDesc::new( - Arc::new(chunk2.clone()), - Arc::new(BlobInfo { - chunk_count: 0, - readahead_offset: 0, - readahead_size: 0, - blob_id: "1".to_string(), - blob_index: 0, - blob_decompressed_size: 0, - blob_compressed_size: 0, - }), - 50, - 50, - RAFS_DEFAULT_BLOCK_SIZE as u32, - true, - ); - - let chunk3 = MockChunkInfo { - compress_offset: 1000 + merging_size - 2000, - compress_size: 200, - ..Default::default() - }; - - let bio3 = BlobIoDesc::new( - Arc::new(chunk3.clone()), - Arc::new(BlobInfo { - chunk_count: 0, - readahead_offset: 0, - readahead_size: 0, - blob_id: "2".to_string(), - blob_index: 0, - blob_decompressed_size: 0, - blob_compressed_size: 0, - }), - 50, - 50, - RAFS_DEFAULT_BLOCK_SIZE as u32, - true, - ); - - let mut bios = vec![bio1, bio2, bio3]; - let (mut send, recv) = spmc::channel::(); - blob_cache.generate_merged_requests_for_prefetch( - &mut bios, - &mut send, - merging_size as usize, - ); - - let mr = recv.recv().unwrap(); - assert_eq!(mr.blob_offset, chunk1.compress_offset()); - assert_eq!( - mr.blob_size, - chunk1.compress_size() + chunk2.compress_size() - ); - - let mr = recv.recv().unwrap(); - assert_eq!(mr.blob_offset, chunk3.compress_offset()); - assert_eq!(mr.blob_size, chunk3.compress_size()); - } - */ -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::collections::HashMap; +use std::fs::OpenOptions; +use std::io::Result; +use std::sync::atomic::{AtomicBool, AtomicU32, Ordering}; +use std::sync::{Arc, RwLock}; + +use tokio::runtime::Runtime; + +use nydus_api::CacheConfigV2; +use nydus_utils::crypt; +use nydus_utils::metrics::BlobcacheMetrics; + +use crate::backend::BlobBackend; +use crate::cache::cachedfile::{FileCacheEntry, FileCacheMeta}; +use crate::cache::state::{ + BlobStateMap, ChunkMap, DigestedChunkMap, IndexedChunkMap, NoopChunkMap, +}; +use crate::cache::worker::{AsyncPrefetchConfig, AsyncWorkerMgr}; +use crate::cache::{BlobCache, BlobCacheMgr}; +use crate::device::{BlobFeatures, BlobInfo}; + +pub const BLOB_RAW_FILE_SUFFIX: &str = ".blob.raw"; +pub const BLOB_DATA_FILE_SUFFIX: &str = ".blob.data"; + +/// An implementation of [BlobCacheMgr](../trait.BlobCacheMgr.html) to improve performance by +/// caching uncompressed blob with local storage. +#[derive(Clone)] +pub struct FileCacheMgr { + blobs: Arc>>>, + backend: Arc, + metrics: Arc, + prefetch_config: Arc, + runtime: Arc, + worker_mgr: Arc, + work_dir: String, + validate: bool, + disable_indexed_map: bool, + cache_raw_data: bool, + cache_encrypted: bool, + cache_convergent_encryption: bool, + cache_encryption_key: String, + closed: Arc, + user_io_batch_size: u32, +} + +impl FileCacheMgr { + /// Create a new instance of `FileCacheMgr`. + pub fn new( + config: &CacheConfigV2, + backend: Arc, + runtime: Arc, + id: &str, + user_io_batch_size: u32, + ) -> Result { + let blob_cfg = config.get_filecache_config()?; + let work_dir = blob_cfg.get_work_dir()?; + let metrics = BlobcacheMetrics::new(id, work_dir); + let prefetch_config: Arc = Arc::new((&config.prefetch).into()); + let worker_mgr = AsyncWorkerMgr::new(metrics.clone(), prefetch_config.clone())?; + + Ok(FileCacheMgr { + blobs: Arc::new(RwLock::new(HashMap::new())), + backend, + metrics, + prefetch_config, + runtime, + worker_mgr: Arc::new(worker_mgr), + work_dir: work_dir.to_owned(), + disable_indexed_map: blob_cfg.disable_indexed_map, + validate: config.cache_validate, + cache_raw_data: config.cache_compressed, + cache_encrypted: blob_cfg.enable_encryption, + cache_convergent_encryption: blob_cfg.enable_convergent_encryption, + cache_encryption_key: blob_cfg.encryption_key.clone(), + closed: Arc::new(AtomicBool::new(false)), + user_io_batch_size, + }) + } + + // Get the file cache entry for the specified blob object. + fn get(&self, blob: &Arc) -> Option> { + self.blobs.read().unwrap().get(&blob.blob_id()).cloned() + } + + // Create a file cache entry for the specified blob object if not present, otherwise + // return the existing one. + fn get_or_create_cache_entry(&self, blob: &Arc) -> Result> { + if let Some(entry) = self.get(blob) { + return Ok(entry); + } + + let entry = FileCacheEntry::new_file_cache( + self, + blob.clone(), + self.prefetch_config.clone(), + self.runtime.clone(), + self.worker_mgr.clone(), + )?; + let entry = Arc::new(entry); + let mut guard = self.blobs.write().unwrap(); + if let Some(entry) = guard.get(&blob.blob_id()) { + Ok(entry.clone()) + } else { + let blob_id = blob.blob_id(); + guard.insert(blob_id.clone(), entry.clone()); + self.metrics + .underlying_files + .lock() + .unwrap() + .insert(blob_id + BLOB_DATA_FILE_SUFFIX); + Ok(entry) + } + } +} + +impl BlobCacheMgr for FileCacheMgr { + fn init(&self) -> Result<()> { + AsyncWorkerMgr::start(self.worker_mgr.clone()) + } + + fn destroy(&self) { + if !self.closed.load(Ordering::Acquire) { + self.closed.store(true, Ordering::Release); + self.worker_mgr.stop(); + self.backend().shutdown(); + self.metrics.release().unwrap_or_else(|e| error!("{:?}", e)); + } + } + + fn gc(&self, id: Option<&str>) -> bool { + let mut reclaim = Vec::new(); + + if let Some(blob_id) = id { + reclaim.push(blob_id.to_string()); + } else { + let guard = self.blobs.write().unwrap(); + for (id, entry) in guard.iter() { + if Arc::strong_count(entry) == 1 { + reclaim.push(id.to_owned()); + } + } + } + + for key in reclaim.iter() { + let mut guard = self.blobs.write().unwrap(); + if let Some(entry) = guard.get(key) { + if Arc::strong_count(entry) == 1 { + guard.remove(key); + } + } + } + + self.blobs.read().unwrap().len() == 0 + } + + fn backend(&self) -> &(dyn BlobBackend) { + self.backend.as_ref() + } + + fn get_blob_cache(&self, blob_info: &Arc) -> Result> { + self.get_or_create_cache_entry(blob_info) + .map(|v| v as Arc) + } + + fn check_stat(&self) {} +} + +impl Drop for FileCacheMgr { + fn drop(&mut self) { + self.destroy(); + } +} + +impl FileCacheEntry { + fn new_file_cache( + mgr: &FileCacheMgr, + blob_info: Arc, + prefetch_config: Arc, + runtime: Arc, + workers: Arc, + ) -> Result { + let is_separate_meta = blob_info.has_feature(BlobFeatures::SEPARATE); + let is_tarfs = blob_info.features().is_tarfs(); + let is_batch = blob_info.has_feature(BlobFeatures::BATCH); + let is_zran = blob_info.has_feature(BlobFeatures::ZRAN); + let blob_id = blob_info.blob_id(); + let blob_meta_id = if is_separate_meta { + blob_info.get_blob_meta_id()? + } else { + blob_id.clone() + }; + let reader = mgr + .backend + .get_reader(&blob_id) + .map_err(|e| eio!(format!("failed to get reader for blob {}, {}", blob_id, e)))?; + let blob_meta_reader = if is_separate_meta { + mgr.backend.get_reader(&blob_meta_id).map_err(|e| { + eio!(format!( + "failed to get reader for blob.meta {}, {}", + blob_id, e + )) + })? + } else { + reader.clone() + }; + + let blob_compressed_size = Self::get_blob_size(&reader, &blob_info)?; + let blob_uncompressed_size = blob_info.uncompressed_size(); + let is_legacy_stargz = blob_info.is_legacy_stargz(); + + let ( + file, + meta, + chunk_map, + is_direct_chunkmap, + is_get_blob_object_supported, + need_validation, + ) = if is_tarfs { + let blob_file_path = format!("{}/{}", mgr.work_dir, blob_id); + let file = OpenOptions::new() + .create(false) + .write(false) + .read(true) + .open(blob_file_path)?; + let chunk_map = + Arc::new(BlobStateMap::from(NoopChunkMap::new(true))) as Arc; + (file, None, chunk_map, true, true, false) + } else { + let blob_file_path = format!("{}/{}", mgr.work_dir, blob_id); + let (chunk_map, is_direct_chunkmap) = + Self::create_chunk_map(mgr, &blob_info, &blob_file_path)?; + // Validation is supported by RAFS v5 (which has no meta_ci) or v6 with chunk digest array. + let validation_supported = !blob_info.meta_ci_is_valid() + || blob_info.has_feature(BlobFeatures::INLINED_CHUNK_DIGEST); + let need_validation = ((mgr.validate && validation_supported) || !is_direct_chunkmap) + && !is_legacy_stargz; + // Set cache file to its expected size. + let suffix = if mgr.cache_raw_data { + BLOB_RAW_FILE_SUFFIX + } else { + BLOB_DATA_FILE_SUFFIX + }; + let blob_data_file_path = blob_file_path.clone() + suffix; + let file = OpenOptions::new() + .create(true) + .write(true) + .read(true) + .open(blob_data_file_path)?; + let file_size = file.metadata()?.len(); + let cached_file_size = if mgr.cache_raw_data { + blob_info.compressed_data_size() + } else { + blob_info.uncompressed_size() + }; + if file_size == 0 || file_size < cached_file_size { + file.set_len(cached_file_size)?; + } else if cached_file_size != 0 && file_size != cached_file_size { + let msg = format!( + "blob data file size doesn't match: got 0x{:x}, expect 0x{:x}", + file_size, cached_file_size + ); + return Err(einval!(msg)); + } + let meta = if blob_info.meta_ci_is_valid() + || blob_info.has_feature(BlobFeatures::IS_CHUNKDICT_GENERATED) + { + let meta = FileCacheMeta::new( + blob_file_path, + blob_info.clone(), + Some(blob_meta_reader), + Some(runtime.clone()), + false, + need_validation, + )?; + Some(meta) + } else { + None + }; + let is_get_blob_object_supported = meta.is_some() && is_direct_chunkmap; + ( + file, + meta, + chunk_map, + is_direct_chunkmap, + is_get_blob_object_supported, + need_validation, + ) + }; + + let (cache_cipher_object, cache_cipher_context) = if mgr.cache_encrypted { + let key = hex::decode(mgr.cache_encryption_key.clone()) + .map_err(|_e| einval!("invalid cache file encryption key"))?; + let cipher = crypt::Algorithm::Aes128Xts.new_cipher()?; + let ctx = crypt::CipherContext::new( + key, + [0u8; 16].to_vec(), + mgr.cache_convergent_encryption, + crypt::Algorithm::Aes128Xts, + )?; + (Arc::new(cipher), Arc::new(ctx)) + } else { + (Default::default(), Default::default()) + }; + + trace!( + "filecache entry: is_raw_data {}, direct {}, legacy_stargz {}, separate_meta {}, tarfs {}, batch {}, zran {}", + mgr.cache_raw_data, + is_direct_chunkmap, + is_legacy_stargz, + is_separate_meta, + is_tarfs, + is_batch, + is_zran, + ); + Ok(FileCacheEntry { + blob_id, + blob_info, + cache_cipher_object, + cache_cipher_context, + chunk_map, + file: Arc::new(file), + meta, + metrics: mgr.metrics.clone(), + prefetch_state: Arc::new(AtomicU32::new(0)), + reader, + runtime, + workers, + + blob_compressed_size, + blob_uncompressed_size, + is_get_blob_object_supported, + is_raw_data: mgr.cache_raw_data, + is_cache_encrypted: mgr.cache_encrypted, + is_direct_chunkmap, + is_legacy_stargz, + is_tarfs, + is_batch, + is_zran, + dio_enabled: false, + need_validation, + user_io_batch_size: mgr.user_io_batch_size, + prefetch_config, + }) + } + + fn create_chunk_map( + mgr: &FileCacheMgr, + blob_info: &BlobInfo, + blob_file: &str, + ) -> Result<(Arc, bool)> { + // The builder now records the number of chunks in the blob table, so we can + // use IndexedChunkMap as a chunk map, but for the old Nydus bootstrap, we + // need downgrade to use DigestedChunkMap as a compatible solution. + let is_v5 = !blob_info.meta_ci_is_valid(); + let mut direct_chunkmap = true; + let chunk_map: Arc = if (is_v5 && mgr.disable_indexed_map) + || blob_info.has_feature(BlobFeatures::_V5_NO_EXT_BLOB_TABLE) + { + direct_chunkmap = false; + Arc::new(BlobStateMap::from(DigestedChunkMap::new())) + } else { + Arc::new(BlobStateMap::from(IndexedChunkMap::new( + &format!("{}{}", blob_file, BLOB_DATA_FILE_SUFFIX), + blob_info.chunk_count(), + true, + )?)) + }; + + Ok((chunk_map, direct_chunkmap)) + } +} + +#[cfg(test)] +pub mod blob_cache_tests { + use nydus_api::FileCacheConfig; + use vmm_sys_util::tempdir::TempDir; + use vmm_sys_util::tempfile::TempFile; + + #[test] + fn test_blob_cache_config() { + // new blob cache + let tmp_dir = TempDir::new().unwrap(); + let dir = tmp_dir.as_path().to_path_buf(); + let s = format!( + r###" + {{ + "work_dir": {:?} + }} + "###, + dir + ); + + let mut blob_config: FileCacheConfig = serde_json::from_str(&s).unwrap(); + assert!(!blob_config.disable_indexed_map); + assert_eq!(blob_config.work_dir, dir.to_str().unwrap()); + + let tmp_file = TempFile::new().unwrap(); + let file = tmp_file.as_path().to_path_buf(); + blob_config.work_dir = file.to_str().unwrap().to_owned(); + assert!(blob_config.get_work_dir().is_err()); + } + + /* + #[test] + fn test_add() { + // new blob cache + let tmp_dir = TempDir::new().unwrap(); + let s = format!( + r###" + {{ + "work_dir": {:?} + }} + "###, + tmp_dir.as_path().to_path_buf().join("cache"), + ); + + let cache_config = CacheConfig { + cache_validate: true, + cache_compressed: false, + cache_type: String::from("blobcache"), + cache_config: serde_json::from_str(&s).unwrap(), + prefetch_config: BlobPrefetchConfig::default(), + }; + let blob_cache = filecache::new( + cache_config, + Arc::new(MockBackend { + metrics: BackendMetrics::new("id", "mock"), + }) as Arc, + compress::Algorithm::Lz4Block, + digest::Algorithm::Blake3, + "id", + ) + .unwrap(); + + // generate backend data + let mut expect = vec![1u8; 100]; + let blob_id = "blobcache"; + blob_cache + .backend + .read(blob_id, expect.as_mut(), 0) + .unwrap(); + + // generate chunk and bio + let mut chunk = MockChunkInfo::new(); + chunk.block_id = RafsDigest::from_buf(&expect, digest::Algorithm::Blake3); + chunk.file_offset = 0; + chunk.compress_offset = 0; + chunk.compress_size = 100; + chunk.decompress_offset = 0; + chunk.decompress_size = 100; + let bio = BlobIoDesc::new( + Arc::new(chunk), + Arc::new(BlobInfo { + chunk_count: 0, + readahead_offset: 0, + readahead_size: 0, + blob_id: blob_id.to_string(), + blob_index: 0, + blob_decompressed_size: 0, + blob_compressed_size: 0, + }), + 50, + 50, + RAFS_DEFAULT_BLOCK_SIZE as u32, + true, + ); + + // read from cache + let r1 = unsafe { + let layout = Layout::from_size_align(50, 1).unwrap(); + let ptr = alloc_zeroed(layout); + let vs = VolatileSlice::new(ptr, 50); + blob_cache.read(&mut [bio.clone()], &[vs]).unwrap(); + Vec::from(from_raw_parts(ptr, 50)) + }; + + let r2 = unsafe { + let layout = Layout::from_size_align(50, 1).unwrap(); + let ptr = alloc_zeroed(layout); + let vs = VolatileSlice::new(ptr, 50); + blob_cache.read(&mut [bio], &[vs]).unwrap(); + Vec::from(from_raw_parts(ptr, 50)) + }; + + assert_eq!(r1, &expect[50..]); + assert_eq!(r2, &expect[50..]); + } + + #[test] + fn test_merge_bio() { + let tmp_dir = TempDir::new().unwrap(); + let s = format!( + r###" + {{ + "work_dir": {:?} + }} + "###, + tmp_dir.as_path().to_path_buf().join("cache"), + ); + + let cache_config = CacheConfig { + cache_validate: true, + cache_compressed: false, + cache_type: String::from("blobcache"), + cache_config: serde_json::from_str(&s).unwrap(), + prefetch_worker: BlobPrefetchConfig::default(), + }; + + let blob_cache = filecache::new( + cache_config, + Arc::new(MockBackend { + metrics: BackendMetrics::new("id", "mock"), + }) as Arc, + compress::Algorithm::Lz4Block, + digest::Algorithm::Blake3, + "id", + ) + .unwrap(); + + let merging_size: u64 = 128 * 1024 * 1024; + + let single_chunk = MockChunkInfo { + compress_offset: 1000, + compress_size: merging_size as u32 - 1, + ..Default::default() + }; + + let bio = BlobIoDesc::new( + Arc::new(single_chunk.clone()), + Arc::new(BlobInfo { + chunk_count: 0, + readahead_offset: 0, + readahead_size: 0, + blob_id: "1".to_string(), + blob_index: 0, + blob_decompressed_size: 0, + blob_compressed_size: 0, + }), + 50, + 50, + RAFS_DEFAULT_BLOCK_SIZE as u32, + true, + ); + + let (mut send, recv) = spmc::channel::(); + let mut bios = vec![bio]; + + blob_cache.generate_merged_requests_for_prefetch( + &mut bios, + &mut send, + merging_size as usize, + ); + let mr = recv.recv().unwrap(); + + assert_eq!(mr.blob_offset, single_chunk.compress_offset()); + assert_eq!(mr.blob_size, single_chunk.compress_size()); + + // --- + let chunk1 = MockChunkInfo { + compress_offset: 1000, + compress_size: merging_size as u32 - 2000, + ..Default::default() + }; + + let bio1 = BlobIoDesc::new( + Arc::new(chunk1.clone()), + Arc::new(BlobInfo { + chunk_count: 0, + readahead_offset: 0, + readahead_size: 0, + blob_id: "1".to_string(), + blob_index: 0, + blob_decompressed_size: 0, + blob_compressed_size: 0, + }), + 50, + 50, + RAFS_DEFAULT_BLOCK_SIZE as u32, + true, + ); + + let chunk2 = MockChunkInfo { + compress_offset: 1000 + merging_size - 2000, + compress_size: 200, + ..Default::default() + }; + + let bio2 = BlobIoDesc::new( + Arc::new(chunk2.clone()), + Arc::new(BlobInfo { + chunk_count: 0, + readahead_offset: 0, + readahead_size: 0, + blob_id: "1".to_string(), + blob_index: 0, + blob_decompressed_size: 0, + blob_compressed_size: 0, + }), + 50, + 50, + RAFS_DEFAULT_BLOCK_SIZE as u32, + true, + ); + + let mut bios = vec![bio1, bio2]; + let (mut send, recv) = spmc::channel::(); + blob_cache.generate_merged_requests_for_prefetch( + &mut bios, + &mut send, + merging_size as usize, + ); + let mr = recv.recv().unwrap(); + + assert_eq!(mr.blob_offset, chunk1.compress_offset()); + assert_eq!( + mr.blob_size, + chunk1.compress_size() + chunk2.compress_size() + ); + + // --- + let chunk1 = MockChunkInfo { + compress_offset: 1000, + compress_size: merging_size as u32 - 2000, + ..Default::default() + }; + + let bio1 = BlobIoDesc::new( + Arc::new(chunk1.clone()), + Arc::new(BlobInfo { + chunk_count: 0, + readahead_offset: 0, + readahead_size: 0, + blob_id: "1".to_string(), + blob_index: 0, + blob_decompressed_size: 0, + blob_compressed_size: 0, + }), + 50, + 50, + RAFS_DEFAULT_BLOCK_SIZE as u32, + true, + ); + + let chunk2 = MockChunkInfo { + compress_offset: 1000 + merging_size - 2000 + 1, + compress_size: 200, + ..Default::default() + }; + + let bio2 = BlobIoDesc::new( + Arc::new(chunk2.clone()), + Arc::new(BlobInfo { + chunk_count: 0, + readahead_offset: 0, + readahead_size: 0, + blob_id: "1".to_string(), + blob_index: 0, + blob_decompressed_size: 0, + blob_compressed_size: 0, + }), + 50, + 50, + RAFS_DEFAULT_BLOCK_SIZE as u32, + true, + ); + + let mut bios = vec![bio1, bio2]; + let (mut send, recv) = spmc::channel::(); + blob_cache.generate_merged_requests_for_prefetch( + &mut bios, + &mut send, + merging_size as usize, + ); + + let mr = recv.recv().unwrap(); + assert_eq!(mr.blob_offset, chunk1.compress_offset()); + assert_eq!(mr.blob_size, chunk1.compress_size()); + + let mr = recv.recv().unwrap(); + assert_eq!(mr.blob_offset, chunk2.compress_offset()); + assert_eq!(mr.blob_size, chunk2.compress_size()); + + // --- + let chunk1 = MockChunkInfo { + compress_offset: 1000, + compress_size: merging_size as u32 - 2000, + ..Default::default() + }; + + let bio1 = BlobIoDesc::new( + Arc::new(chunk1.clone()), + Arc::new(BlobInfo { + chunk_count: 0, + readahead_offset: 0, + readahead_size: 0, + blob_id: "1".to_string(), + blob_index: 0, + blob_decompressed_size: 0, + blob_compressed_size: 0, + }), + 50, + 50, + RAFS_DEFAULT_BLOCK_SIZE as u32, + true, + ); + + let chunk2 = MockChunkInfo { + compress_offset: 1000 + merging_size - 2000, + compress_size: 200, + ..Default::default() + }; + + let bio2 = BlobIoDesc::new( + Arc::new(chunk2.clone()), + Arc::new(BlobInfo { + chunk_count: 0, + readahead_offset: 0, + readahead_size: 0, + blob_id: "2".to_string(), + blob_index: 0, + blob_decompressed_size: 0, + blob_compressed_size: 0, + }), + 50, + 50, + RAFS_DEFAULT_BLOCK_SIZE as u32, + true, + ); + + let mut bios = vec![bio1, bio2]; + let (mut send, recv) = spmc::channel::(); + blob_cache.generate_merged_requests_for_prefetch( + &mut bios, + &mut send, + merging_size as usize, + ); + + let mr = recv.recv().unwrap(); + assert_eq!(mr.blob_offset, chunk1.compress_offset()); + assert_eq!(mr.blob_size, chunk1.compress_size()); + + let mr = recv.recv().unwrap(); + assert_eq!(mr.blob_offset, chunk2.compress_offset()); + assert_eq!(mr.blob_size, chunk2.compress_size()); + + // --- + let chunk1 = MockChunkInfo { + compress_offset: 1000, + compress_size: merging_size as u32 - 2000, + ..Default::default() + }; + + let bio1 = BlobIoDesc::new( + Arc::new(chunk1.clone()), + Arc::new(BlobInfo { + chunk_count: 0, + readahead_offset: 0, + readahead_size: 0, + blob_id: "1".to_string(), + blob_index: 0, + blob_decompressed_size: 0, + blob_compressed_size: 0, + }), + 50, + 50, + RAFS_DEFAULT_BLOCK_SIZE as u32, + true, + ); + + let chunk2 = MockChunkInfo { + compress_offset: 1000 + merging_size - 2000, + compress_size: 200, + ..Default::default() + }; + + let bio2 = BlobIoDesc::new( + Arc::new(chunk2.clone()), + Arc::new(BlobInfo { + chunk_count: 0, + readahead_offset: 0, + readahead_size: 0, + blob_id: "1".to_string(), + blob_index: 0, + blob_decompressed_size: 0, + blob_compressed_size: 0, + }), + 50, + 50, + RAFS_DEFAULT_BLOCK_SIZE as u32, + true, + ); + + let chunk3 = MockChunkInfo { + compress_offset: 1000 + merging_size - 2000, + compress_size: 200, + ..Default::default() + }; + + let bio3 = BlobIoDesc::new( + Arc::new(chunk3.clone()), + Arc::new(BlobInfo { + chunk_count: 0, + readahead_offset: 0, + readahead_size: 0, + blob_id: "2".to_string(), + blob_index: 0, + blob_decompressed_size: 0, + blob_compressed_size: 0, + }), + 50, + 50, + RAFS_DEFAULT_BLOCK_SIZE as u32, + true, + ); + + let mut bios = vec![bio1, bio2, bio3]; + let (mut send, recv) = spmc::channel::(); + blob_cache.generate_merged_requests_for_prefetch( + &mut bios, + &mut send, + merging_size as usize, + ); + + let mr = recv.recv().unwrap(); + assert_eq!(mr.blob_offset, chunk1.compress_offset()); + assert_eq!( + mr.blob_size, + chunk1.compress_size() + chunk2.compress_size() + ); + + let mr = recv.recv().unwrap(); + assert_eq!(mr.blob_offset, chunk3.compress_offset()); + assert_eq!(mr.blob_size, chunk3.compress_size()); + } + */ +} diff --git a/storage/src/cache/fscache/mod.rs b/storage/src/cache/fscache/mod.rs index 5b2285c9b0e..cc251b6a40a 100644 --- a/storage/src/cache/fscache/mod.rs +++ b/storage/src/cache/fscache/mod.rs @@ -1,458 +1,458 @@ -// Copyright (C) 2022 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::collections::HashMap; -use std::fs::File; -use std::io::{Error, Result}; -use std::os::unix::io::AsRawFd; -use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU8, Ordering}; -use std::sync::{Arc, RwLock}; - -use nydus_api::CacheConfigV2; -use nydus_utils::metrics::BlobcacheMetrics; -use tokio::runtime::Runtime; - -use crate::backend::BlobBackend; -use crate::cache::cachedfile::{FileCacheEntry, FileCacheMeta}; -use crate::cache::state::{BlobStateMap, IndexedChunkMap, RangeMap}; -use crate::cache::worker::{AsyncPrefetchConfig, AsyncWorkerMgr}; -use crate::cache::{BlobCache, BlobCacheMgr}; -use crate::device::{BlobFeatures, BlobInfo, BlobObject}; -use crate::factory::BLOB_FACTORY; - -use crate::cache::filecache::BLOB_DATA_FILE_SUFFIX; - -const FSCACHE_BLOBS_CHECK_NUM: u8 = 1; - -/// An implementation of [BlobCacheMgr](../trait.BlobCacheMgr.html) to improve performance by -/// caching uncompressed blob with Linux fscache subsystem. -#[derive(Clone)] -pub struct FsCacheMgr { - blobs: Arc>>>, - backend: Arc, - metrics: Arc, - prefetch_config: Arc, - runtime: Arc, - worker_mgr: Arc, - work_dir: String, - need_validation: bool, - blobs_check_count: Arc, - closed: Arc, - user_io_batch_size: u32, -} - -impl FsCacheMgr { - /// Create a new instance of `FileCacheMgr`. - pub fn new( - config: &CacheConfigV2, - backend: Arc, - runtime: Arc, - id: &str, - user_io_batch_size: u32, - ) -> Result { - if config.cache_compressed { - return Err(enosys!("fscache doesn't support compressed cache mode")); - } - - let blob_cfg = config.get_fscache_config()?; - let work_dir = blob_cfg.get_work_dir()?; - let metrics = BlobcacheMetrics::new(id, work_dir); - let prefetch_config: Arc = Arc::new((&config.prefetch).into()); - let worker_mgr = AsyncWorkerMgr::new(metrics.clone(), prefetch_config.clone())?; - - BLOB_FACTORY.start_mgr_checker(); - - Ok(FsCacheMgr { - blobs: Arc::new(RwLock::new(HashMap::new())), - backend, - metrics, - prefetch_config, - runtime, - worker_mgr: Arc::new(worker_mgr), - work_dir: work_dir.to_owned(), - need_validation: config.cache_validate, - blobs_check_count: Arc::new(AtomicU8::new(0)), - closed: Arc::new(AtomicBool::new(false)), - user_io_batch_size, - }) - } - - // Get the file cache entry for the specified blob object. - fn get(&self, blob: &Arc) -> Option> { - self.blobs.read().unwrap().get(&blob.blob_id()).cloned() - } - - // Create a file cache entry for the specified blob object if not present, otherwise - // return the existing one. - fn get_or_create_cache_entry(&self, blob: &Arc) -> Result> { - if let Some(entry) = self.get(blob) { - return Ok(entry); - } - - let entry = FileCacheEntry::new_fs_cache( - self, - blob.clone(), - self.prefetch_config.clone(), - self.runtime.clone(), - self.worker_mgr.clone(), - )?; - let entry = Arc::new(entry); - let mut guard = self.blobs.write().unwrap(); - if let Some(entry) = guard.get(&blob.blob_id()) { - Ok(entry.clone()) - } else { - let blob_id = blob.blob_id(); - guard.insert(blob_id.clone(), entry.clone()); - self.metrics - .underlying_files - .lock() - .unwrap() - .insert(blob_id + BLOB_DATA_FILE_SUFFIX); - Ok(entry) - } - } -} - -impl BlobCacheMgr for FsCacheMgr { - fn init(&self) -> Result<()> { - AsyncWorkerMgr::start(self.worker_mgr.clone()) - } - - fn destroy(&self) { - if !self.closed.load(Ordering::Acquire) { - self.closed.store(true, Ordering::Release); - self.worker_mgr.stop(); - self.backend().shutdown(); - self.metrics.release().unwrap_or_else(|e| error!("{:?}", e)); - } - } - - fn gc(&self, id: Option<&str>) -> bool { - if let Some(blob_id) = id { - self.blobs.write().unwrap().remove(blob_id); - } else { - let mut reclaim = Vec::new(); - let guard = self.blobs.write().unwrap(); - for (id, entry) in guard.iter() { - if Arc::strong_count(entry) == 1 { - reclaim.push(id.to_owned()); - } - } - drop(guard); - - for key in reclaim.iter() { - let mut guard = self.blobs.write().unwrap(); - if let Some(entry) = guard.get(key) { - if Arc::strong_count(entry) == 1 { - guard.remove(key); - } - } - } - } - - self.blobs.read().unwrap().len() == 0 - } - - fn backend(&self) -> &(dyn BlobBackend) { - self.backend.as_ref() - } - - fn get_blob_cache(&self, blob_info: &Arc) -> Result> { - self.get_or_create_cache_entry(blob_info) - .map(|v| v as Arc) - } - - fn check_stat(&self) { - let guard = self.blobs.read().unwrap(); - - let mut all_ready = true; - for (_id, entry) in guard.iter() { - if !entry.is_all_data_ready() { - all_ready = false; - break; - } - } - - // we should double check blobs stat, in case some blobs hadn't been created when we checked. - if all_ready { - if self.blobs_check_count.load(Ordering::Acquire) == FSCACHE_BLOBS_CHECK_NUM { - self.worker_mgr.stop(); - self.metrics.data_all_ready.store(true, Ordering::Release); - } else { - self.blobs_check_count.fetch_add(1, Ordering::Acquire); - } - } else { - self.blobs_check_count.store(0, Ordering::Release); - } - } -} - -impl Drop for FsCacheMgr { - fn drop(&mut self) { - self.destroy(); - } -} - -impl FileCacheEntry { - pub fn new_fs_cache( - mgr: &FsCacheMgr, - blob_info: Arc, - prefetch_config: Arc, - runtime: Arc, - workers: Arc, - ) -> Result { - if blob_info.has_feature(BlobFeatures::_V5_NO_EXT_BLOB_TABLE) { - return Err(einval!("fscache does not support Rafs v5 blobs")); - } - let is_tarfs = blob_info.features().is_tarfs(); - if is_tarfs { - return Err(einval!("fscache does not support RAFS in tarfs mode")); - } - - let file = blob_info - .get_fscache_file() - .ok_or_else(|| einval!("No fscache file associated with the blob_info"))?; - let is_separate_meta = blob_info.has_feature(BlobFeatures::SEPARATE); - let is_batch = blob_info.has_feature(BlobFeatures::BATCH); - let is_zran = blob_info.has_feature(BlobFeatures::ZRAN); - let cache_cipher = blob_info.cipher(); - let is_cache_encrypted = cache_cipher.is_encryption_enabled(); - let blob_id = blob_info.blob_id(); - let blob_meta_id = if is_separate_meta { - blob_info.get_blob_meta_id()? - } else { - blob_id.clone() - }; - let reader = mgr - .backend - .get_reader(&blob_id) - .map_err(|_e| eio!("failed to get reader for data blob"))?; - let blob_meta_reader = if is_separate_meta { - mgr.backend.get_reader(&blob_meta_id).map_err(|e| { - eio!(format!( - "failed to get reader for blob.meta {}, {}", - blob_id, e - )) - })? - } else { - reader.clone() - }; - let blob_compressed_size = Self::get_blob_size(&reader, &blob_info)?; - - let need_validation = mgr.need_validation - && !blob_info.is_legacy_stargz() - && blob_info.has_feature(BlobFeatures::INLINED_CHUNK_DIGEST); - let blob_file_path = format!("{}/{}", mgr.work_dir, blob_meta_id); - let meta = if blob_info.meta_ci_is_valid() { - FileCacheMeta::new( - blob_file_path.clone(), - blob_info.clone(), - Some(blob_meta_reader), - None, - true, - need_validation, - )? - } else { - return Err(enosys!( - "fscache doesn't support blobs without blob meta information" - )); - }; - - let chunk_map = Arc::new(BlobStateMap::from(IndexedChunkMap::new( - &format!("{}{}", blob_file_path, BLOB_DATA_FILE_SUFFIX), - blob_info.chunk_count(), - false, - )?)); - Self::restore_chunk_map(blob_info.clone(), file.clone(), &meta, &chunk_map); - - Ok(FileCacheEntry { - blob_id, - blob_info: blob_info.clone(), - cache_cipher_object: Default::default(), - cache_cipher_context: Default::default(), - chunk_map, - file, - meta: Some(meta), - metrics: mgr.metrics.clone(), - prefetch_state: Arc::new(AtomicU32::new(0)), - reader, - runtime, - workers, - - blob_compressed_size, - blob_uncompressed_size: blob_info.uncompressed_size(), - is_get_blob_object_supported: true, - is_raw_data: false, - is_direct_chunkmap: true, - is_cache_encrypted, - is_legacy_stargz: blob_info.is_legacy_stargz(), - is_tarfs, - is_batch, - is_zran, - dio_enabled: true, - need_validation, - user_io_batch_size: mgr.user_io_batch_size, - prefetch_config, - }) - } - - fn restore_chunk_map( - blob_info: Arc, - file: Arc, - meta: &FileCacheMeta, - chunk_map: &BlobStateMap, - ) { - let blob_meta = match meta.get_blob_meta() { - Some(v) => v, - None => { - warn!("failed to get blob meta object for blob, skip chunkmap recover"); - return; - } - }; - - let mut i = 0; - while i < blob_info.chunk_count() { - let hole_offset = unsafe { - libc::lseek64( - file.as_raw_fd(), - blob_meta.get_uncompressed_offset(i as usize) as i64, - libc::SEEK_HOLE, - ) - }; - - if hole_offset < 0 { - warn!( - "seek hole err {} for blob {}", - Error::last_os_error(), - blob_info.blob_id() - ); - break; - } - - if hole_offset as u64 == blob_info.uncompressed_size() { - debug!( - "seek hole to file end, blob {} rest chunks {} - {} all ready", - blob_info.blob_id(), - i, - blob_info.chunk_count() - 1, - ); - if let Err(e) = - chunk_map.set_range_ready_and_clear_pending(i, blob_info.chunk_count() - i) - { - warn!("set range ready err {}", e); - } - break; - } - - let hole_index = match blob_meta.get_chunk_index(hole_offset as u64) { - Ok(h) => h as u32, - Err(e) => { - warn!("get offset chunk index err {}", e); - break; - } - }; - if hole_index > i { - debug!( - "set blob {} rang {}-{} ready", - blob_info.blob_id(), - i, - hole_index - 1, - ); - if let Err(e) = chunk_map.set_range_ready_and_clear_pending(i, hole_index - i) { - warn!("set range ready err {}", e); - break; - } - } - i = hole_index + 1; - } - } -} - -#[cfg(test)] -mod tests { - use std::{fs::OpenOptions, path::PathBuf}; - - use nydus_api::ConfigV2; - use nydus_utils::{compress, metrics::BackendMetrics}; - - use crate::{factory::ASYNC_RUNTIME, test::MockBackend, RAFS_DEFAULT_CHUNK_SIZE}; - - use super::*; - - #[test] - fn test_fs_cache_mgr() { - let content = r#"version=2 - id = "my_id" - metadata_path = "meta_path" - [backend] - type = "localfs" - [backend.localfs] - blob_file = "/tmp/nydus.blob.data" - dir = "/tmp" - alt_dirs = ["/var/nydus/cache"] - [cache] - type = "fscache" - compressed = false - validate = true - [cache.fscache] - work_dir = "/tmp" - "#; - - let cfg: ConfigV2 = toml::from_str(content).unwrap(); - let backend = MockBackend { - metrics: BackendMetrics::new("dummy", "localfs"), - }; - - let mut mgr: FsCacheMgr = FsCacheMgr::new( - cfg.get_cache_config().unwrap(), - Arc::new(backend), - ASYNC_RUNTIME.clone(), - &cfg.id, - 0, - ) - .unwrap(); - assert!(mgr.init().is_ok()); - mgr.work_dir = "../tests/texture/zran/".to_string(); - - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let path = PathBuf::from(root_dir).join("../tests/texture/zran/233c72f2b6b698c07021c4da367cfe2dff4f049efbaa885ca0ff760ea297865a"); - - let features = BlobFeatures::ALIGNED - | BlobFeatures::INLINED_FS_META - | BlobFeatures::CHUNK_INFO_V2 - | BlobFeatures::ZRAN; - - let mut blob_info = BlobInfo::new( - 0, - "233c72f2b6b698c07021c4da367cfe2dff4f049efbaa885ca0ff760ea297865a".to_string(), - 0x16c6000, - 9839040, - RAFS_DEFAULT_CHUNK_SIZE as u32, - 0xa3, - features, - ); - - blob_info.set_blob_meta_info(0, 0xa1290, 0xa1290, compress::Algorithm::None as u32); - - let f1: File = OpenOptions::new() - .truncate(true) - .create(true) - .write(true) - .read(true) - .open(path.as_os_str()) - .unwrap(); - f1.set_len(800).unwrap(); - - blob_info.set_fscache_file(Some(Arc::new(f1.try_clone().unwrap()))); - - assert!(mgr.get_blob_cache(&Arc::new(blob_info.clone())).is_ok()); - assert!(mgr.gc(Some( - "233c72f2b6b698c07021c4da367cfe2dff4f049efbaa885ca0ff760ea297865a" - ))); - mgr.check_stat(); - let _backend = mgr.backend(); - mgr.destroy(); - drop(mgr); - } -} +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::collections::HashMap; +use std::fs::File; +use std::io::{Error, Result}; +use std::os::unix::io::AsRawFd; +use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU8, Ordering}; +use std::sync::{Arc, RwLock}; + +use nydus_api::CacheConfigV2; +use nydus_utils::metrics::BlobcacheMetrics; +use tokio::runtime::Runtime; + +use crate::backend::BlobBackend; +use crate::cache::cachedfile::{FileCacheEntry, FileCacheMeta}; +use crate::cache::state::{BlobStateMap, IndexedChunkMap, RangeMap}; +use crate::cache::worker::{AsyncPrefetchConfig, AsyncWorkerMgr}; +use crate::cache::{BlobCache, BlobCacheMgr}; +use crate::device::{BlobFeatures, BlobInfo, BlobObject}; +use crate::factory::BLOB_FACTORY; + +use crate::cache::filecache::BLOB_DATA_FILE_SUFFIX; + +const FSCACHE_BLOBS_CHECK_NUM: u8 = 1; + +/// An implementation of [BlobCacheMgr](../trait.BlobCacheMgr.html) to improve performance by +/// caching uncompressed blob with Linux fscache subsystem. +#[derive(Clone)] +pub struct FsCacheMgr { + blobs: Arc>>>, + backend: Arc, + metrics: Arc, + prefetch_config: Arc, + runtime: Arc, + worker_mgr: Arc, + work_dir: String, + need_validation: bool, + blobs_check_count: Arc, + closed: Arc, + user_io_batch_size: u32, +} + +impl FsCacheMgr { + /// Create a new instance of `FileCacheMgr`. + pub fn new( + config: &CacheConfigV2, + backend: Arc, + runtime: Arc, + id: &str, + user_io_batch_size: u32, + ) -> Result { + if config.cache_compressed { + return Err(enosys!("fscache doesn't support compressed cache mode")); + } + + let blob_cfg = config.get_fscache_config()?; + let work_dir = blob_cfg.get_work_dir()?; + let metrics = BlobcacheMetrics::new(id, work_dir); + let prefetch_config: Arc = Arc::new((&config.prefetch).into()); + let worker_mgr = AsyncWorkerMgr::new(metrics.clone(), prefetch_config.clone())?; + + BLOB_FACTORY.start_mgr_checker(); + + Ok(FsCacheMgr { + blobs: Arc::new(RwLock::new(HashMap::new())), + backend, + metrics, + prefetch_config, + runtime, + worker_mgr: Arc::new(worker_mgr), + work_dir: work_dir.to_owned(), + need_validation: config.cache_validate, + blobs_check_count: Arc::new(AtomicU8::new(0)), + closed: Arc::new(AtomicBool::new(false)), + user_io_batch_size, + }) + } + + // Get the file cache entry for the specified blob object. + fn get(&self, blob: &Arc) -> Option> { + self.blobs.read().unwrap().get(&blob.blob_id()).cloned() + } + + // Create a file cache entry for the specified blob object if not present, otherwise + // return the existing one. + fn get_or_create_cache_entry(&self, blob: &Arc) -> Result> { + if let Some(entry) = self.get(blob) { + return Ok(entry); + } + + let entry = FileCacheEntry::new_fs_cache( + self, + blob.clone(), + self.prefetch_config.clone(), + self.runtime.clone(), + self.worker_mgr.clone(), + )?; + let entry = Arc::new(entry); + let mut guard = self.blobs.write().unwrap(); + if let Some(entry) = guard.get(&blob.blob_id()) { + Ok(entry.clone()) + } else { + let blob_id = blob.blob_id(); + guard.insert(blob_id.clone(), entry.clone()); + self.metrics + .underlying_files + .lock() + .unwrap() + .insert(blob_id + BLOB_DATA_FILE_SUFFIX); + Ok(entry) + } + } +} + +impl BlobCacheMgr for FsCacheMgr { + fn init(&self) -> Result<()> { + AsyncWorkerMgr::start(self.worker_mgr.clone()) + } + + fn destroy(&self) { + if !self.closed.load(Ordering::Acquire) { + self.closed.store(true, Ordering::Release); + self.worker_mgr.stop(); + self.backend().shutdown(); + self.metrics.release().unwrap_or_else(|e| error!("{:?}", e)); + } + } + + fn gc(&self, id: Option<&str>) -> bool { + if let Some(blob_id) = id { + self.blobs.write().unwrap().remove(blob_id); + } else { + let mut reclaim = Vec::new(); + let guard = self.blobs.write().unwrap(); + for (id, entry) in guard.iter() { + if Arc::strong_count(entry) == 1 { + reclaim.push(id.to_owned()); + } + } + drop(guard); + + for key in reclaim.iter() { + let mut guard = self.blobs.write().unwrap(); + if let Some(entry) = guard.get(key) { + if Arc::strong_count(entry) == 1 { + guard.remove(key); + } + } + } + } + + self.blobs.read().unwrap().len() == 0 + } + + fn backend(&self) -> &(dyn BlobBackend) { + self.backend.as_ref() + } + + fn get_blob_cache(&self, blob_info: &Arc) -> Result> { + self.get_or_create_cache_entry(blob_info) + .map(|v| v as Arc) + } + + fn check_stat(&self) { + let guard = self.blobs.read().unwrap(); + + let mut all_ready = true; + for (_id, entry) in guard.iter() { + if !entry.is_all_data_ready() { + all_ready = false; + break; + } + } + + // we should double check blobs stat, in case some blobs hadn't been created when we checked. + if all_ready { + if self.blobs_check_count.load(Ordering::Acquire) == FSCACHE_BLOBS_CHECK_NUM { + self.worker_mgr.stop(); + self.metrics.data_all_ready.store(true, Ordering::Release); + } else { + self.blobs_check_count.fetch_add(1, Ordering::Acquire); + } + } else { + self.blobs_check_count.store(0, Ordering::Release); + } + } +} + +impl Drop for FsCacheMgr { + fn drop(&mut self) { + self.destroy(); + } +} + +impl FileCacheEntry { + pub fn new_fs_cache( + mgr: &FsCacheMgr, + blob_info: Arc, + prefetch_config: Arc, + runtime: Arc, + workers: Arc, + ) -> Result { + if blob_info.has_feature(BlobFeatures::_V5_NO_EXT_BLOB_TABLE) { + return Err(einval!("fscache does not support Rafs v5 blobs")); + } + let is_tarfs = blob_info.features().is_tarfs(); + if is_tarfs { + return Err(einval!("fscache does not support RAFS in tarfs mode")); + } + + let file = blob_info + .get_fscache_file() + .ok_or_else(|| einval!("No fscache file associated with the blob_info"))?; + let is_separate_meta = blob_info.has_feature(BlobFeatures::SEPARATE); + let is_batch = blob_info.has_feature(BlobFeatures::BATCH); + let is_zran = blob_info.has_feature(BlobFeatures::ZRAN); + let cache_cipher = blob_info.cipher(); + let is_cache_encrypted = cache_cipher.is_encryption_enabled(); + let blob_id = blob_info.blob_id(); + let blob_meta_id = if is_separate_meta { + blob_info.get_blob_meta_id()? + } else { + blob_id.clone() + }; + let reader = mgr + .backend + .get_reader(&blob_id) + .map_err(|_e| eio!("failed to get reader for data blob"))?; + let blob_meta_reader = if is_separate_meta { + mgr.backend.get_reader(&blob_meta_id).map_err(|e| { + eio!(format!( + "failed to get reader for blob.meta {}, {}", + blob_id, e + )) + })? + } else { + reader.clone() + }; + let blob_compressed_size = Self::get_blob_size(&reader, &blob_info)?; + + let need_validation = mgr.need_validation + && !blob_info.is_legacy_stargz() + && blob_info.has_feature(BlobFeatures::INLINED_CHUNK_DIGEST); + let blob_file_path = format!("{}/{}", mgr.work_dir, blob_meta_id); + let meta = if blob_info.meta_ci_is_valid() { + FileCacheMeta::new( + blob_file_path.clone(), + blob_info.clone(), + Some(blob_meta_reader), + None, + true, + need_validation, + )? + } else { + return Err(enosys!( + "fscache doesn't support blobs without blob meta information" + )); + }; + + let chunk_map = Arc::new(BlobStateMap::from(IndexedChunkMap::new( + &format!("{}{}", blob_file_path, BLOB_DATA_FILE_SUFFIX), + blob_info.chunk_count(), + false, + )?)); + Self::restore_chunk_map(blob_info.clone(), file.clone(), &meta, &chunk_map); + + Ok(FileCacheEntry { + blob_id, + blob_info: blob_info.clone(), + cache_cipher_object: Default::default(), + cache_cipher_context: Default::default(), + chunk_map, + file, + meta: Some(meta), + metrics: mgr.metrics.clone(), + prefetch_state: Arc::new(AtomicU32::new(0)), + reader, + runtime, + workers, + + blob_compressed_size, + blob_uncompressed_size: blob_info.uncompressed_size(), + is_get_blob_object_supported: true, + is_raw_data: false, + is_direct_chunkmap: true, + is_cache_encrypted, + is_legacy_stargz: blob_info.is_legacy_stargz(), + is_tarfs, + is_batch, + is_zran, + dio_enabled: true, + need_validation, + user_io_batch_size: mgr.user_io_batch_size, + prefetch_config, + }) + } + + fn restore_chunk_map( + blob_info: Arc, + file: Arc, + meta: &FileCacheMeta, + chunk_map: &BlobStateMap, + ) { + let blob_meta = match meta.get_blob_meta() { + Some(v) => v, + None => { + warn!("failed to get blob meta object for blob, skip chunkmap recover"); + return; + } + }; + + let mut i = 0; + while i < blob_info.chunk_count() { + let hole_offset = unsafe { + libc::lseek64( + file.as_raw_fd(), + blob_meta.get_uncompressed_offset(i as usize) as i64, + libc::SEEK_HOLE, + ) + }; + + if hole_offset < 0 { + warn!( + "seek hole err {} for blob {}", + Error::last_os_error(), + blob_info.blob_id() + ); + break; + } + + if hole_offset as u64 == blob_info.uncompressed_size() { + debug!( + "seek hole to file end, blob {} rest chunks {} - {} all ready", + blob_info.blob_id(), + i, + blob_info.chunk_count() - 1, + ); + if let Err(e) = + chunk_map.set_range_ready_and_clear_pending(i, blob_info.chunk_count() - i) + { + warn!("set range ready err {}", e); + } + break; + } + + let hole_index = match blob_meta.get_chunk_index(hole_offset as u64) { + Ok(h) => h as u32, + Err(e) => { + warn!("get offset chunk index err {}", e); + break; + } + }; + if hole_index > i { + debug!( + "set blob {} rang {}-{} ready", + blob_info.blob_id(), + i, + hole_index - 1, + ); + if let Err(e) = chunk_map.set_range_ready_and_clear_pending(i, hole_index - i) { + warn!("set range ready err {}", e); + break; + } + } + i = hole_index + 1; + } + } +} + +#[cfg(test)] +mod tests { + use std::{fs::OpenOptions, path::PathBuf}; + + use nydus_api::ConfigV2; + use nydus_utils::{compress, metrics::BackendMetrics}; + + use crate::{factory::ASYNC_RUNTIME, test::MockBackend, RAFS_DEFAULT_CHUNK_SIZE}; + + use super::*; + + #[test] + fn test_fs_cache_mgr() { + let content = r#"version=2 + id = "my_id" + metadata_path = "meta_path" + [backend] + type = "localfs" + [backend.localfs] + blob_file = "/tmp/nydus.blob.data" + dir = "/tmp" + alt_dirs = ["/var/nydus/cache"] + [cache] + type = "fscache" + compressed = false + validate = true + [cache.fscache] + work_dir = "/tmp" + "#; + + let cfg: ConfigV2 = toml::from_str(content).unwrap(); + let backend = MockBackend { + metrics: BackendMetrics::new("dummy", "localfs"), + }; + + let mut mgr: FsCacheMgr = FsCacheMgr::new( + cfg.get_cache_config().unwrap(), + Arc::new(backend), + ASYNC_RUNTIME.clone(), + &cfg.id, + 0, + ) + .unwrap(); + assert!(mgr.init().is_ok()); + mgr.work_dir = "../tests/texture/zran/".to_string(); + + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let path = PathBuf::from(root_dir).join("../tests/texture/zran/233c72f2b6b698c07021c4da367cfe2dff4f049efbaa885ca0ff760ea297865a"); + + let features = BlobFeatures::ALIGNED + | BlobFeatures::INLINED_FS_META + | BlobFeatures::CHUNK_INFO_V2 + | BlobFeatures::ZRAN; + + let mut blob_info = BlobInfo::new( + 0, + "233c72f2b6b698c07021c4da367cfe2dff4f049efbaa885ca0ff760ea297865a".to_string(), + 0x16c6000, + 9839040, + RAFS_DEFAULT_CHUNK_SIZE as u32, + 0xa3, + features, + ); + + blob_info.set_blob_meta_info(0, 0xa1290, 0xa1290, compress::Algorithm::None as u32); + + let f1: File = OpenOptions::new() + .truncate(true) + .create(true) + .write(true) + .read(true) + .open(path.as_os_str()) + .unwrap(); + f1.set_len(800).unwrap(); + + blob_info.set_fscache_file(Some(Arc::new(f1.try_clone().unwrap()))); + + assert!(mgr.get_blob_cache(&Arc::new(blob_info.clone())).is_ok()); + assert!(mgr.gc(Some( + "233c72f2b6b698c07021c4da367cfe2dff4f049efbaa885ca0ff760ea297865a" + ))); + mgr.check_stat(); + let _backend = mgr.backend(); + mgr.destroy(); + drop(mgr); + } +} diff --git a/storage/src/cache/mod.rs b/storage/src/cache/mod.rs index 7d91862b78d..35857cdea4b 100644 --- a/storage/src/cache/mod.rs +++ b/storage/src/cache/mod.rs @@ -1,786 +1,786 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! A blob cache layer over storage backend to improve performance. -//! -//! One of Rafs filesystem's goal is to support "on demand data loading". On demand loading may -//! help to speed up application/container startup, but it may also cause serious performance -//! penalty if all data chunks are retrieved from remoted backend storage. So cache layer is -//! introduced between Rafs filesystem and backend storage, which caches remote data onto local -//! storage and merge small data request into bigger request to improve network performance. -//! -//! There are several cache drivers implemented: -//! - [DummyCacheMgr](dummycache/struct.DummyCacheMgr.html): a dummy implementation of -//! `BlobCacheMgr`, simply reporting each chunk as cached or not cached according to -//! configuration. - -use std::cmp; -use std::io::Result; -use std::sync::Arc; -use std::time::Instant; - -use fuse_backend_rs::file_buf::FileVolatileSlice; -use nydus_utils::compress::zlib_random::ZranDecoder; -use nydus_utils::crypt::{self, Cipher, CipherContext}; -use nydus_utils::{compress, digest}; - -use crate::backend::{BlobBackend, BlobReader}; -use crate::cache::state::ChunkMap; -use crate::device::{ - BlobChunkInfo, BlobInfo, BlobIoDesc, BlobIoRange, BlobIoVec, BlobObject, BlobPrefetchRequest, -}; -use crate::meta::BlobCompressionContextInfo; -use crate::utils::{alloc_buf, check_digest}; -use crate::{StorageResult, RAFS_MAX_CHUNK_SIZE}; - -mod cachedfile; -#[cfg(feature = "dedup")] -mod dedup; -mod dummycache; -mod filecache; -#[cfg(target_os = "linux")] -mod fscache; -mod worker; - -pub mod state; - -pub use dummycache::DummyCacheMgr; -pub use filecache::FileCacheMgr; -#[cfg(target_os = "linux")] -pub use fscache::FsCacheMgr; - -/// Timeout in milli-seconds to retrieve blob data from backend storage. -pub const SINGLE_INFLIGHT_WAIT_TIMEOUT: u64 = 2000; - -struct BlobIoMergeState<'a, F: FnMut(BlobIoRange)> { - cb: F, - // size of compressed data - size: u32, - bios: Vec<&'a BlobIoDesc>, -} - -impl<'a, F: FnMut(BlobIoRange)> BlobIoMergeState<'a, F> { - /// Create a new instance of 'IoMergeState`. - pub fn new(bio: &'a BlobIoDesc, cb: F) -> Self { - let size = bio.chunkinfo.compressed_size(); - - BlobIoMergeState { - cb, - size, - bios: vec![bio], - } - } - - /// Get size of pending compressed data. - #[inline] - fn size(&self) -> usize { - self.size as usize - } - - /// Push a new io descriptor into the pending list. - #[inline] - fn push(&mut self, bio: &'a BlobIoDesc) { - let start = bio.chunkinfo.compressed_offset(); - let size = if !self.bios.is_empty() { - let last = &self.bios[self.bios.len() - 1].chunkinfo; - let prev = last.compressed_offset() + last.compressed_size() as u64; - assert!(prev <= start); - assert!(start - prev < u32::MAX as u64); - (start - prev) as u32 + bio.chunkinfo.compressed_size() - } else { - bio.chunkinfo.compressed_size() - }; - assert!(self.size.checked_add(size).is_some()); - self.size += size; - self.bios.push(bio); - } - - /// Issue all pending io descriptors. - #[inline] - pub fn issue(&mut self, max_gap: u64) { - if !self.bios.is_empty() { - let mut mr = BlobIoRange::new(self.bios[0], self.bios.len()); - for bio in self.bios[1..].iter() { - mr.merge(bio, max_gap); - } - (self.cb)(mr); - - self.bios.truncate(0); - self.size = 0; - } - } - - /// Merge adjacent chunks into bigger request with compressed size no bigger than `max_size` - /// and issue all blob IO descriptors. - pub fn merge_and_issue(bios: &[BlobIoDesc], max_comp_size: u64, max_gap: u64, op: F) { - if !bios.is_empty() { - let mut index = 1; - let mut state = BlobIoMergeState::new(&bios[0], op); - - for cur_bio in &bios[1..] { - // Issue pending descriptors when next chunk is not continuous with current chunk - // or the accumulated compressed data size is big enough. - if !bios[index - 1].is_continuous(cur_bio, max_gap) - || state.size() as u64 >= max_comp_size - { - state.issue(max_gap); - } - state.push(cur_bio); - index += 1 - } - state.issue(max_gap); - } - } -} - -/// Trait representing a cache object for a blob on backend storage. -/// -/// The caller may use the `BlobCache` trait to access blob data on backend storage, with an -/// optional intermediate cache layer to improve performance. -pub trait BlobCache: Send + Sync { - /// Get id of the blob object. - fn blob_id(&self) -> &str; - - /// Get size of the decompressed blob object. - fn blob_uncompressed_size(&self) -> Result; - - /// Get size of the compressed blob object. - fn blob_compressed_size(&self) -> Result; - - /// Get data compression algorithm to handle chunks in the blob. - fn blob_compressor(&self) -> compress::Algorithm; - - /// Get data encryption algorithm to handle chunks in the blob. - fn blob_cipher(&self) -> crypt::Algorithm; - - /// Cipher object to encrypt/decrypt chunk data. - fn blob_cipher_object(&self) -> Arc; - - /// Cipher context to encrypt/decrypt chunk data. - fn blob_cipher_context(&self) -> Option; - - /// Get message digest algorithm to handle chunks in the blob. - fn blob_digester(&self) -> digest::Algorithm; - - /// Check whether the cache object is for an stargz image with legacy chunk format. - fn is_legacy_stargz(&self) -> bool; - - /// Get maximum size of gzip compressed data. - fn get_legacy_stargz_size(&self, offset: u64, uncomp_size: usize) -> Result { - let blob_size = self.blob_compressed_size()?; - let max_size = blob_size.checked_sub(offset).ok_or_else(|| { - einval!(format!( - "chunk compressed offset {:x} is bigger than blob file size {:x}", - offset, blob_size - )) - })?; - let max_size = cmp::min(max_size, usize::MAX as u64) as usize; - Ok(compress::compute_compressed_gzip_size( - uncomp_size, - max_size, - )) - } - - /// Check whether the blob is ZRan based. - fn is_zran(&self) -> bool { - false - } - - /// Check whether the blob is Batch based. - fn is_batch(&self) -> bool { - false - } - - /// Check whether need to validate the data chunk by digest value. - fn need_validation(&self) -> bool; - - /// Get the [BlobReader](../backend/trait.BlobReader.html) to read data from storage backend. - fn reader(&self) -> &dyn BlobReader; - - /// Get the underlying `ChunkMap` object. - fn get_chunk_map(&self) -> &Arc; - - /// Get the `BlobChunkInfo` object corresponding to `chunk_index`. - fn get_chunk_info(&self, chunk_index: u32) -> Option>; - - /// Get a `BlobObject` instance to directly access uncompressed blob file. - fn get_blob_object(&self) -> Option<&dyn BlobObject> { - None - } - - /// Enable prefetching blob data in background. - /// - /// It should be paired with stop_prefetch(). - fn start_prefetch(&self) -> StorageResult<()>; - - /// Stop prefetching blob data in background. - /// - /// It should be paired with start_prefetch(). - fn stop_prefetch(&self) -> StorageResult<()>; - - // Check whether data prefetch is still active. - fn is_prefetch_active(&self) -> bool; - - /// Start to prefetch requested data in background. - fn prefetch( - &self, - cache: Arc, - prefetches: &[BlobPrefetchRequest], - bios: &[BlobIoDesc], - ) -> StorageResult; - - /// Execute filesystem data prefetch. - fn prefetch_range(&self, _range: &BlobIoRange) -> Result { - Err(enosys!("doesn't support prefetch_range()")) - } - - /// Read chunk data described by the blob Io descriptors from the blob cache into the buffer. - fn read(&self, iovec: &mut BlobIoVec, buffers: &[FileVolatileSlice]) -> Result; - - /// Read multiple chunks from the blob cache in batch mode. - /// - /// This is an interface to optimize chunk data fetch performance by merging multiple continuous - /// chunks into one backend request. Callers must ensure that chunks in `chunks` covers a - /// continuous range, and the range exactly matches [`blob_offset`..`blob_offset` + `blob_size`]. - /// Function `read_chunks_from_backend()` returns one buffer containing decompressed chunk data - /// for each entry in the `chunks` array in corresponding order. - /// - /// This method returns success only if all requested data are successfully fetched. - fn read_chunks_from_backend<'a, 'b>( - &'a self, - blob_offset: u64, - blob_size: usize, - chunks: &'b [Arc], - prefetch: bool, - ) -> Result> - where - Self: Sized, - { - // Read requested data from the backend by altogether. - let mut c_buf = alloc_buf(blob_size); - let start = Instant::now(); - let nr_read = self - .reader() - .read(c_buf.as_mut_slice(), blob_offset) - .map_err(|e| eio!(e))?; - if nr_read != blob_size { - return Err(eio!(format!( - "request for {} bytes but got {} bytes", - blob_size, nr_read - ))); - } - let duration = Instant::now().duration_since(start).as_millis(); - debug!( - "read_chunks_from_backend: {} {} {} bytes at {}, duration {}ms", - std::thread::current().name().unwrap_or_default(), - if prefetch { "prefetch" } else { "fetch" }, - blob_size, - blob_offset, - duration - ); - - let chunks = chunks.iter().map(|v| v.as_ref()).collect(); - Ok(ChunkDecompressState::new(blob_offset, self, chunks, c_buf)) - } - - /// Read a whole chunk directly from the storage backend. - /// - /// The fetched chunk data may be compressed or encrypted or not, which depends on chunk information - /// from `chunk`. Moreover, chunk data from backend storage may be validated per user's configuration. - fn read_chunk_from_backend( - &self, - chunk: &dyn BlobChunkInfo, - buffer: &mut [u8], - ) -> Result>> { - let start = Instant::now(); - let offset = chunk.compressed_offset(); - let mut c_buf = None; - - if self.is_zran() || self.is_batch() { - return Err(enosys!("read_chunk_from_backend")); - } else if !chunk.is_compressed() && !chunk.is_encrypted() { - let size = self.reader().read(buffer, offset).map_err(|e| eio!(e))?; - if size != buffer.len() { - return Err(eio!("storage backend returns less data than requested")); - } - } else { - let c_size = if self.is_legacy_stargz() { - self.get_legacy_stargz_size(offset, buffer.len())? - } else { - chunk.compressed_size() as usize - }; - let mut raw_buffer = alloc_buf(c_size); - let size = self - .reader() - .read(raw_buffer.as_mut_slice(), offset) - .map_err(|e| eio!(e))?; - if size != raw_buffer.len() { - return Err(eio!("storage backend returns less data than requested")); - } - let decrypted_buffer = crypt::decrypt_with_context( - &raw_buffer, - &self.blob_cipher_object(), - &self.blob_cipher_context(), - chunk.is_encrypted(), - )?; - self.decompress_chunk_data(&decrypted_buffer, buffer, chunk.is_compressed())?; - c_buf = Some(raw_buffer); - } - - let duration = Instant::now().duration_since(start).as_millis(); - debug!( - "read_chunk_from_backend: {} {} bytes at {}, duration {}ms", - std::thread::current().name().unwrap_or_default(), - chunk.compressed_size(), - chunk.compressed_offset(), - duration - ); - self.validate_chunk_data(chunk, buffer, false) - .map_err(|e| { - warn!("failed to read data from backend, {}", e); - e - })?; - - Ok(c_buf) - } - - /// Decompress chunk data. - fn decompress_chunk_data( - &self, - raw_buffer: &[u8], - buffer: &mut [u8], - is_compressed: bool, - ) -> Result<()> { - if is_compressed { - let compressor = self.blob_compressor(); - let ret = compress::decompress(raw_buffer, buffer, compressor).map_err(|e| { - error!("failed to decompress chunk: {}", e); - e - })?; - if ret != buffer.len() { - return Err(einval!(format!( - "size of decompressed data doesn't match expected, {} vs {}, raw_buffer: {}", - ret, - buffer.len(), - raw_buffer.len() - ))); - } - } else if raw_buffer.as_ptr() != buffer.as_ptr() { - // raw_chunk and chunk may point to the same buffer, so only copy data when needed. - buffer.copy_from_slice(raw_buffer); - } - Ok(()) - } - - /// Validate chunk data. - fn validate_chunk_data( - &self, - chunk: &dyn BlobChunkInfo, - buffer: &[u8], - force_validation: bool, - ) -> Result { - let d_size = chunk.uncompressed_size() as usize; - if buffer.len() != d_size { - Err(eio!("uncompressed size and buffer size doesn't match")) - } else if (self.need_validation() || force_validation) - && !self.is_legacy_stargz() - && !check_digest(buffer, chunk.chunk_id(), self.blob_digester()) - { - Err(std::io::Error::new( - std::io::ErrorKind::InvalidData, - "data digest value doesn't match", - )) - } else { - Ok(d_size) - } - } - - fn get_blob_meta_info(&self) -> Result>> { - Ok(None) - } -} - -/// An iterator to enumerate decompressed data for chunks. -pub struct ChunkDecompressState<'a, 'b> { - blob_offset: u64, - chunk_idx: usize, - batch_idx: u32, - zran_idx: u32, - cache: &'a dyn BlobCache, - chunks: Vec<&'b dyn BlobChunkInfo>, - c_buf: Vec, - d_buf: Vec, -} - -impl<'a, 'b> ChunkDecompressState<'a, 'b> { - fn new( - blob_offset: u64, - cache: &'a dyn BlobCache, - chunks: Vec<&'b dyn BlobChunkInfo>, - c_buf: Vec, - ) -> Self { - ChunkDecompressState { - blob_offset, - chunk_idx: 0, - batch_idx: u32::MAX, - zran_idx: u32::MAX, - cache, - chunks, - c_buf, - d_buf: Vec::new(), - } - } - - fn decompress_batch( - &mut self, - meta: &Arc, - c_offset: u64, - ) -> Result<()> { - let ctx = meta.get_batch_context(self.batch_idx)?; - let c_size = ctx.compressed_size() as u64; - let d_size = ctx.uncompressed_batch_size() as u64; - if c_offset < self.blob_offset - || c_offset.checked_add(c_size).is_none() - || c_offset + c_size > self.blob_offset + self.c_buf.len() as u64 - || d_size > RAFS_MAX_CHUNK_SIZE - { - let msg = format!( - "invalid chunk: z_offset 0x{:x}, z_size 0x{:x}, c_offset 0x{:x}, c_size 0x{:x}, d_size 0x{:x}", - self.blob_offset, - self.c_buf.len(), - c_offset, - c_size, - d_size - ); - return Err(einval!(msg)); - } - - let c_offset = (c_offset - self.blob_offset) as usize; - let input = &self.c_buf[c_offset..c_offset + c_size as usize]; - let decrypted_buffer = crypt::decrypt_with_context( - input, - &self.cache.blob_cipher_object(), - &self.cache.blob_cipher_context(), - meta.state.is_encrypted(), - )?; - let mut output = alloc_buf(d_size as usize); - - self.cache - .decompress_chunk_data(&decrypted_buffer, &mut output, c_size != d_size)?; - - if output.len() != d_size as usize { - return Err(einval!(format!( - "decompressed data size doesn't match: {} vs {}", - output.len(), - d_size - ))); - } - - self.d_buf = output; - - Ok(()) - } - - fn decompress_zran(&mut self, meta: &Arc) -> Result<()> { - let (ctx, dict) = meta.get_zran_context(self.zran_idx)?; - let c_offset = ctx.in_offset; - let c_size = ctx.in_len as u64; - if c_offset < self.blob_offset - || c_offset.checked_add(c_size).is_none() - || c_offset + c_size > self.blob_offset + self.c_buf.len() as u64 - || ctx.out_len as u64 > RAFS_MAX_CHUNK_SIZE - { - let msg = format!( - "invalid chunk: z_offset 0x{:x}, z_size 0x{:x}, c_offset 0x{:x}, c_size 0x{:x}, d_size 0x{:x}", - self.blob_offset, - self.c_buf.len(), - c_offset, - c_size, - ctx.out_len - ); - return Err(einval!(msg)); - } - - let c_offset = (c_offset - self.blob_offset) as usize; - let input = &self.c_buf[c_offset..c_offset + c_size as usize]; - let mut output = alloc_buf(ctx.out_len as usize); - let mut decoder = ZranDecoder::new()?; - decoder.uncompress(&ctx, Some(dict), input, &mut output)?; - self.d_buf = output; - - Ok(()) - } - - fn next_batch(&mut self, chunk: &dyn BlobChunkInfo) -> Result> { - // If the chunk is not a batch chunk, decompress it as normal. - if !chunk.is_batch() { - return self.next_buf(chunk); - } - - let meta = self - .cache - .get_blob_meta_info()? - .ok_or_else(|| einval!("failed to get blob meta object for Batch"))?; - - let batch_idx = meta.get_batch_index(chunk.id())?; - if batch_idx != self.batch_idx { - self.batch_idx = batch_idx; - self.decompress_batch(&meta, chunk.compressed_offset())?; - } - let offset = meta.get_uncompressed_offset_in_batch_buf(chunk.id())? as usize; - let end = offset + chunk.uncompressed_size() as usize; - if end > self.d_buf.len() { - return Err(einval!(format!( - "invalid Batch decompression status, end: {}, len: {}", - end, - self.d_buf.len() - ))); - } - - // Use alloc_buf here to ensure 4k alignment for later use - // in adjust_buffer_for_dio. - let mut buffer = alloc_buf(chunk.uncompressed_size() as usize); - buffer.copy_from_slice(&self.d_buf[offset as usize..end]); - Ok(buffer) - } - - fn next_zran(&mut self, chunk: &dyn BlobChunkInfo) -> Result> { - let meta = self - .cache - .get_blob_meta_info()? - .ok_or_else(|| einval!("failed to get blob meta object for ZRan"))?; - let zran_idx = meta.get_zran_index(chunk.id())?; - if zran_idx != self.zran_idx { - self.zran_idx = zran_idx; - self.decompress_zran(&meta)?; - } - let offset = meta.get_zran_offset(chunk.id())? as usize; - let end = offset + chunk.uncompressed_size() as usize; - if end > self.d_buf.len() { - return Err(einval!("invalid ZRan decompression status")); - } - // Use alloc_buf here to ensure 4k alignment for later use - // in adjust_buffer_for_dio. - let mut buffer = alloc_buf(chunk.uncompressed_size() as usize); - buffer.copy_from_slice(&self.d_buf[offset as usize..end]); - Ok(buffer) - } - - fn next_buf(&mut self, chunk: &dyn BlobChunkInfo) -> Result> { - let c_offset = chunk.compressed_offset(); - let c_size = chunk.compressed_size(); - let d_size = chunk.uncompressed_size() as usize; - if c_offset < self.blob_offset - || c_offset - self.blob_offset > usize::MAX as u64 - || c_offset.checked_add(c_size as u64).is_none() - || c_offset + c_size as u64 > self.blob_offset + self.c_buf.len() as u64 - || d_size as u64 > RAFS_MAX_CHUNK_SIZE - { - let msg = format!( - "invalid chunk info: c_offset 0x{:x}, c_size 0x{:x}, d_size 0x{:x}, blob_offset 0x{:x}", - c_offset, c_size, d_size, self.blob_offset - ); - return Err(eio!(msg)); - } - - let offset_merged = (c_offset - self.blob_offset) as usize; - let end_merged = offset_merged + c_size as usize; - let decrypted_buffer = crypt::decrypt_with_context( - &self.c_buf[offset_merged..end_merged], - &self.cache.blob_cipher_object(), - &self.cache.blob_cipher_context(), - chunk.is_encrypted(), - )?; - let mut buffer = alloc_buf(d_size); - self.cache - .decompress_chunk_data(&decrypted_buffer, &mut buffer, chunk.is_compressed())?; - self.cache - .validate_chunk_data(chunk, &buffer, false) - .map_err(|e| { - warn!("failed to read data from backend, {}", e); - e - })?; - Ok(buffer) - } - - /// Get an immutable reference to the compressed data buffer. - pub fn compressed_buf(&self) -> &[u8] { - &self.c_buf - } -} - -impl<'a, 'b> Iterator for ChunkDecompressState<'a, 'b> { - type Item = Result>; - - fn next(&mut self) -> Option { - if self.chunk_idx >= self.chunks.len() { - return None; - } - - let cache = self.cache; - let chunk = self.chunks[self.chunk_idx]; - self.chunk_idx += 1; - let res = if cache.is_batch() { - self.next_batch(chunk) - } else if cache.is_zran() { - self.next_zran(chunk) - } else { - self.next_buf(chunk) - }; - Some(res) - } -} - -/// Trait representing blob manager to manage a group of [BlobCache](trait.BlobCache.html) objects. -/// -/// The main responsibility of the blob cache manager is to create blob cache objects for blobs, -/// all IO requests should be issued to the blob cache object directly. -pub(crate) trait BlobCacheMgr: Send + Sync { - /// Initialize the blob cache manager. - fn init(&self) -> Result<()>; - - /// Tear down the blob cache manager. - fn destroy(&self); - - /// Garbage-collect unused resources. - /// - /// Return true if the blob cache manager itself should be garbage-collected. - fn gc(&self, _id: Option<&str>) -> bool; - - /// Get the underlying `BlobBackend` object of the blob cache object. - fn backend(&self) -> &(dyn BlobBackend); - - /// Get the blob cache to provide access to the `blob` object. - fn get_blob_cache(&self, blob_info: &Arc) -> Result>; - - /// Check the blob cache data status, if data all ready stop prefetch workers. - fn check_stat(&self); -} - -#[cfg(test)] -mod tests { - use crate::device::{BlobChunkFlags, BlobFeatures}; - use crate::test::MockChunkInfo; - - use super::*; - - #[test] - fn test_io_merge_state_new() { - let blob_info = Arc::new(BlobInfo::new( - 1, - "test1".to_owned(), - 0x200000, - 0x100000, - 0x100000, - 512, - BlobFeatures::_V5_NO_EXT_BLOB_TABLE, - )); - let chunk1 = Arc::new(MockChunkInfo { - block_id: Default::default(), - blob_index: 1, - flags: BlobChunkFlags::empty(), - compress_size: 0x800, - uncompress_size: 0x1000, - compress_offset: 0, - uncompress_offset: 0, - file_offset: 0, - index: 0, - reserved: 0, - }) as Arc; - let chunk2 = Arc::new(MockChunkInfo { - block_id: Default::default(), - blob_index: 1, - flags: BlobChunkFlags::empty(), - compress_size: 0x800, - uncompress_size: 0x1000, - compress_offset: 0x800, - uncompress_offset: 0x1000, - file_offset: 0x1000, - index: 1, - reserved: 0, - }) as Arc; - let chunk3 = Arc::new(MockChunkInfo { - block_id: Default::default(), - blob_index: 1, - flags: BlobChunkFlags::empty(), - compress_size: 0x800, - uncompress_size: 0x1000, - compress_offset: 0x1000, - uncompress_offset: 0x1000, - file_offset: 0x1000, - index: 1, - reserved: 0, - }) as Arc; - - let cb = |_merged| {}; - let desc1 = BlobIoDesc { - blob: blob_info.clone(), - chunkinfo: chunk1.into(), - offset: 0, - size: 0x1000, - user_io: true, - }; - let mut state = BlobIoMergeState::new(&desc1, cb); - assert_eq!(state.size(), 0x800); - assert_eq!(state.bios.len(), 1); - - let desc2 = BlobIoDesc { - blob: blob_info.clone(), - chunkinfo: chunk2.into(), - offset: 0, - size: 0x1000, - user_io: true, - }; - state.push(&desc2); - assert_eq!(state.size, 0x1000); - assert_eq!(state.bios.len(), 2); - - state.issue(0); - assert_eq!(state.size(), 0x0); - assert_eq!(state.bios.len(), 0); - - let desc3 = BlobIoDesc { - blob: blob_info, - chunkinfo: chunk3.into(), - offset: 0, - size: 0x1000, - user_io: true, - }; - state.push(&desc3); - assert_eq!(state.size, 0x800); - assert_eq!(state.bios.len(), 1); - - state.issue(0); - assert_eq!(state.size(), 0x0); - assert_eq!(state.bios.len(), 0); - - let mut count = 0; - BlobIoMergeState::merge_and_issue( - &[desc1.clone(), desc2.clone(), desc3.clone()], - 0x4000, - 0x0, - |_v| count += 1, - ); - assert_eq!(count, 1); - - let mut count = 0; - BlobIoMergeState::merge_and_issue( - &[desc1.clone(), desc2.clone(), desc3.clone()], - 0x1000, - 0x0, - |_v| count += 1, - ); - assert_eq!(count, 2); - - let mut count = 0; - BlobIoMergeState::merge_and_issue(&[desc1.clone(), desc3.clone()], 0x4000, 0x0, |_v| { - count += 1 - }); - assert_eq!(count, 2); - - assert!(desc1.is_continuous(&desc2, 0)); - assert!(!desc1.is_continuous(&desc3, 0)); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! A blob cache layer over storage backend to improve performance. +//! +//! One of Rafs filesystem's goal is to support "on demand data loading". On demand loading may +//! help to speed up application/container startup, but it may also cause serious performance +//! penalty if all data chunks are retrieved from remoted backend storage. So cache layer is +//! introduced between Rafs filesystem and backend storage, which caches remote data onto local +//! storage and merge small data request into bigger request to improve network performance. +//! +//! There are several cache drivers implemented: +//! - [DummyCacheMgr](dummycache/struct.DummyCacheMgr.html): a dummy implementation of +//! `BlobCacheMgr`, simply reporting each chunk as cached or not cached according to +//! configuration. + +use std::cmp; +use std::io::Result; +use std::sync::Arc; +use std::time::Instant; + +use fuse_backend_rs::file_buf::FileVolatileSlice; +use nydus_utils::compress::zlib_random::ZranDecoder; +use nydus_utils::crypt::{self, Cipher, CipherContext}; +use nydus_utils::{compress, digest}; + +use crate::backend::{BlobBackend, BlobReader}; +use crate::cache::state::ChunkMap; +use crate::device::{ + BlobChunkInfo, BlobInfo, BlobIoDesc, BlobIoRange, BlobIoVec, BlobObject, BlobPrefetchRequest, +}; +use crate::meta::BlobCompressionContextInfo; +use crate::utils::{alloc_buf, check_digest}; +use crate::{StorageResult, RAFS_MAX_CHUNK_SIZE}; + +mod cachedfile; +#[cfg(feature = "dedup")] +mod dedup; +mod dummycache; +mod filecache; +#[cfg(target_os = "linux")] +mod fscache; +mod worker; + +pub mod state; + +pub use dummycache::DummyCacheMgr; +pub use filecache::FileCacheMgr; +#[cfg(target_os = "linux")] +pub use fscache::FsCacheMgr; + +/// Timeout in milli-seconds to retrieve blob data from backend storage. +pub const SINGLE_INFLIGHT_WAIT_TIMEOUT: u64 = 2000; + +struct BlobIoMergeState<'a, F: FnMut(BlobIoRange)> { + cb: F, + // size of compressed data + size: u32, + bios: Vec<&'a BlobIoDesc>, +} + +impl<'a, F: FnMut(BlobIoRange)> BlobIoMergeState<'a, F> { + /// Create a new instance of 'IoMergeState`. + pub fn new(bio: &'a BlobIoDesc, cb: F) -> Self { + let size = bio.chunkinfo.compressed_size(); + + BlobIoMergeState { + cb, + size, + bios: vec![bio], + } + } + + /// Get size of pending compressed data. + #[inline] + fn size(&self) -> usize { + self.size as usize + } + + /// Push a new io descriptor into the pending list. + #[inline] + fn push(&mut self, bio: &'a BlobIoDesc) { + let start = bio.chunkinfo.compressed_offset(); + let size = if !self.bios.is_empty() { + let last = &self.bios[self.bios.len() - 1].chunkinfo; + let prev = last.compressed_offset() + last.compressed_size() as u64; + assert!(prev <= start); + assert!(start - prev < u32::MAX as u64); + (start - prev) as u32 + bio.chunkinfo.compressed_size() + } else { + bio.chunkinfo.compressed_size() + }; + assert!(self.size.checked_add(size).is_some()); + self.size += size; + self.bios.push(bio); + } + + /// Issue all pending io descriptors. + #[inline] + pub fn issue(&mut self, max_gap: u64) { + if !self.bios.is_empty() { + let mut mr = BlobIoRange::new(self.bios[0], self.bios.len()); + for bio in self.bios[1..].iter() { + mr.merge(bio, max_gap); + } + (self.cb)(mr); + + self.bios.truncate(0); + self.size = 0; + } + } + + /// Merge adjacent chunks into bigger request with compressed size no bigger than `max_size` + /// and issue all blob IO descriptors. + pub fn merge_and_issue(bios: &[BlobIoDesc], max_comp_size: u64, max_gap: u64, op: F) { + if !bios.is_empty() { + let mut index = 1; + let mut state = BlobIoMergeState::new(&bios[0], op); + + for cur_bio in &bios[1..] { + // Issue pending descriptors when next chunk is not continuous with current chunk + // or the accumulated compressed data size is big enough. + if !bios[index - 1].is_continuous(cur_bio, max_gap) + || state.size() as u64 >= max_comp_size + { + state.issue(max_gap); + } + state.push(cur_bio); + index += 1 + } + state.issue(max_gap); + } + } +} + +/// Trait representing a cache object for a blob on backend storage. +/// +/// The caller may use the `BlobCache` trait to access blob data on backend storage, with an +/// optional intermediate cache layer to improve performance. +pub trait BlobCache: Send + Sync { + /// Get id of the blob object. + fn blob_id(&self) -> &str; + + /// Get size of the decompressed blob object. + fn blob_uncompressed_size(&self) -> Result; + + /// Get size of the compressed blob object. + fn blob_compressed_size(&self) -> Result; + + /// Get data compression algorithm to handle chunks in the blob. + fn blob_compressor(&self) -> compress::Algorithm; + + /// Get data encryption algorithm to handle chunks in the blob. + fn blob_cipher(&self) -> crypt::Algorithm; + + /// Cipher object to encrypt/decrypt chunk data. + fn blob_cipher_object(&self) -> Arc; + + /// Cipher context to encrypt/decrypt chunk data. + fn blob_cipher_context(&self) -> Option; + + /// Get message digest algorithm to handle chunks in the blob. + fn blob_digester(&self) -> digest::Algorithm; + + /// Check whether the cache object is for an stargz image with legacy chunk format. + fn is_legacy_stargz(&self) -> bool; + + /// Get maximum size of gzip compressed data. + fn get_legacy_stargz_size(&self, offset: u64, uncomp_size: usize) -> Result { + let blob_size = self.blob_compressed_size()?; + let max_size = blob_size.checked_sub(offset).ok_or_else(|| { + einval!(format!( + "chunk compressed offset {:x} is bigger than blob file size {:x}", + offset, blob_size + )) + })?; + let max_size = cmp::min(max_size, usize::MAX as u64) as usize; + Ok(compress::compute_compressed_gzip_size( + uncomp_size, + max_size, + )) + } + + /// Check whether the blob is ZRan based. + fn is_zran(&self) -> bool { + false + } + + /// Check whether the blob is Batch based. + fn is_batch(&self) -> bool { + false + } + + /// Check whether need to validate the data chunk by digest value. + fn need_validation(&self) -> bool; + + /// Get the [BlobReader](../backend/trait.BlobReader.html) to read data from storage backend. + fn reader(&self) -> &dyn BlobReader; + + /// Get the underlying `ChunkMap` object. + fn get_chunk_map(&self) -> &Arc; + + /// Get the `BlobChunkInfo` object corresponding to `chunk_index`. + fn get_chunk_info(&self, chunk_index: u32) -> Option>; + + /// Get a `BlobObject` instance to directly access uncompressed blob file. + fn get_blob_object(&self) -> Option<&dyn BlobObject> { + None + } + + /// Enable prefetching blob data in background. + /// + /// It should be paired with stop_prefetch(). + fn start_prefetch(&self) -> StorageResult<()>; + + /// Stop prefetching blob data in background. + /// + /// It should be paired with start_prefetch(). + fn stop_prefetch(&self) -> StorageResult<()>; + + // Check whether data prefetch is still active. + fn is_prefetch_active(&self) -> bool; + + /// Start to prefetch requested data in background. + fn prefetch( + &self, + cache: Arc, + prefetches: &[BlobPrefetchRequest], + bios: &[BlobIoDesc], + ) -> StorageResult; + + /// Execute filesystem data prefetch. + fn prefetch_range(&self, _range: &BlobIoRange) -> Result { + Err(enosys!("doesn't support prefetch_range()")) + } + + /// Read chunk data described by the blob Io descriptors from the blob cache into the buffer. + fn read(&self, iovec: &mut BlobIoVec, buffers: &[FileVolatileSlice]) -> Result; + + /// Read multiple chunks from the blob cache in batch mode. + /// + /// This is an interface to optimize chunk data fetch performance by merging multiple continuous + /// chunks into one backend request. Callers must ensure that chunks in `chunks` covers a + /// continuous range, and the range exactly matches [`blob_offset`..`blob_offset` + `blob_size`]. + /// Function `read_chunks_from_backend()` returns one buffer containing decompressed chunk data + /// for each entry in the `chunks` array in corresponding order. + /// + /// This method returns success only if all requested data are successfully fetched. + fn read_chunks_from_backend<'a, 'b>( + &'a self, + blob_offset: u64, + blob_size: usize, + chunks: &'b [Arc], + prefetch: bool, + ) -> Result> + where + Self: Sized, + { + // Read requested data from the backend by altogether. + let mut c_buf = alloc_buf(blob_size); + let start = Instant::now(); + let nr_read = self + .reader() + .read(c_buf.as_mut_slice(), blob_offset) + .map_err(|e| eio!(e))?; + if nr_read != blob_size { + return Err(eio!(format!( + "request for {} bytes but got {} bytes", + blob_size, nr_read + ))); + } + let duration = Instant::now().duration_since(start).as_millis(); + debug!( + "read_chunks_from_backend: {} {} {} bytes at {}, duration {}ms", + std::thread::current().name().unwrap_or_default(), + if prefetch { "prefetch" } else { "fetch" }, + blob_size, + blob_offset, + duration + ); + + let chunks = chunks.iter().map(|v| v.as_ref()).collect(); + Ok(ChunkDecompressState::new(blob_offset, self, chunks, c_buf)) + } + + /// Read a whole chunk directly from the storage backend. + /// + /// The fetched chunk data may be compressed or encrypted or not, which depends on chunk information + /// from `chunk`. Moreover, chunk data from backend storage may be validated per user's configuration. + fn read_chunk_from_backend( + &self, + chunk: &dyn BlobChunkInfo, + buffer: &mut [u8], + ) -> Result>> { + let start = Instant::now(); + let offset = chunk.compressed_offset(); + let mut c_buf = None; + + if self.is_zran() || self.is_batch() { + return Err(enosys!("read_chunk_from_backend")); + } else if !chunk.is_compressed() && !chunk.is_encrypted() { + let size = self.reader().read(buffer, offset).map_err(|e| eio!(e))?; + if size != buffer.len() { + return Err(eio!("storage backend returns less data than requested")); + } + } else { + let c_size = if self.is_legacy_stargz() { + self.get_legacy_stargz_size(offset, buffer.len())? + } else { + chunk.compressed_size() as usize + }; + let mut raw_buffer = alloc_buf(c_size); + let size = self + .reader() + .read(raw_buffer.as_mut_slice(), offset) + .map_err(|e| eio!(e))?; + if size != raw_buffer.len() { + return Err(eio!("storage backend returns less data than requested")); + } + let decrypted_buffer = crypt::decrypt_with_context( + &raw_buffer, + &self.blob_cipher_object(), + &self.blob_cipher_context(), + chunk.is_encrypted(), + )?; + self.decompress_chunk_data(&decrypted_buffer, buffer, chunk.is_compressed())?; + c_buf = Some(raw_buffer); + } + + let duration = Instant::now().duration_since(start).as_millis(); + debug!( + "read_chunk_from_backend: {} {} bytes at {}, duration {}ms", + std::thread::current().name().unwrap_or_default(), + chunk.compressed_size(), + chunk.compressed_offset(), + duration + ); + self.validate_chunk_data(chunk, buffer, false) + .map_err(|e| { + warn!("failed to read data from backend, {}", e); + e + })?; + + Ok(c_buf) + } + + /// Decompress chunk data. + fn decompress_chunk_data( + &self, + raw_buffer: &[u8], + buffer: &mut [u8], + is_compressed: bool, + ) -> Result<()> { + if is_compressed { + let compressor = self.blob_compressor(); + let ret = compress::decompress(raw_buffer, buffer, compressor).map_err(|e| { + error!("failed to decompress chunk: {}", e); + e + })?; + if ret != buffer.len() { + return Err(einval!(format!( + "size of decompressed data doesn't match expected, {} vs {}, raw_buffer: {}", + ret, + buffer.len(), + raw_buffer.len() + ))); + } + } else if raw_buffer.as_ptr() != buffer.as_ptr() { + // raw_chunk and chunk may point to the same buffer, so only copy data when needed. + buffer.copy_from_slice(raw_buffer); + } + Ok(()) + } + + /// Validate chunk data. + fn validate_chunk_data( + &self, + chunk: &dyn BlobChunkInfo, + buffer: &[u8], + force_validation: bool, + ) -> Result { + let d_size = chunk.uncompressed_size() as usize; + if buffer.len() != d_size { + Err(eio!("uncompressed size and buffer size doesn't match")) + } else if (self.need_validation() || force_validation) + && !self.is_legacy_stargz() + && !check_digest(buffer, chunk.chunk_id(), self.blob_digester()) + { + Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "data digest value doesn't match", + )) + } else { + Ok(d_size) + } + } + + fn get_blob_meta_info(&self) -> Result>> { + Ok(None) + } +} + +/// An iterator to enumerate decompressed data for chunks. +pub struct ChunkDecompressState<'a, 'b> { + blob_offset: u64, + chunk_idx: usize, + batch_idx: u32, + zran_idx: u32, + cache: &'a dyn BlobCache, + chunks: Vec<&'b dyn BlobChunkInfo>, + c_buf: Vec, + d_buf: Vec, +} + +impl<'a, 'b> ChunkDecompressState<'a, 'b> { + fn new( + blob_offset: u64, + cache: &'a dyn BlobCache, + chunks: Vec<&'b dyn BlobChunkInfo>, + c_buf: Vec, + ) -> Self { + ChunkDecompressState { + blob_offset, + chunk_idx: 0, + batch_idx: u32::MAX, + zran_idx: u32::MAX, + cache, + chunks, + c_buf, + d_buf: Vec::new(), + } + } + + fn decompress_batch( + &mut self, + meta: &Arc, + c_offset: u64, + ) -> Result<()> { + let ctx = meta.get_batch_context(self.batch_idx)?; + let c_size = ctx.compressed_size() as u64; + let d_size = ctx.uncompressed_batch_size() as u64; + if c_offset < self.blob_offset + || c_offset.checked_add(c_size).is_none() + || c_offset + c_size > self.blob_offset + self.c_buf.len() as u64 + || d_size > RAFS_MAX_CHUNK_SIZE + { + let msg = format!( + "invalid chunk: z_offset 0x{:x}, z_size 0x{:x}, c_offset 0x{:x}, c_size 0x{:x}, d_size 0x{:x}", + self.blob_offset, + self.c_buf.len(), + c_offset, + c_size, + d_size + ); + return Err(einval!(msg)); + } + + let c_offset = (c_offset - self.blob_offset) as usize; + let input = &self.c_buf[c_offset..c_offset + c_size as usize]; + let decrypted_buffer = crypt::decrypt_with_context( + input, + &self.cache.blob_cipher_object(), + &self.cache.blob_cipher_context(), + meta.state.is_encrypted(), + )?; + let mut output = alloc_buf(d_size as usize); + + self.cache + .decompress_chunk_data(&decrypted_buffer, &mut output, c_size != d_size)?; + + if output.len() != d_size as usize { + return Err(einval!(format!( + "decompressed data size doesn't match: {} vs {}", + output.len(), + d_size + ))); + } + + self.d_buf = output; + + Ok(()) + } + + fn decompress_zran(&mut self, meta: &Arc) -> Result<()> { + let (ctx, dict) = meta.get_zran_context(self.zran_idx)?; + let c_offset = ctx.in_offset; + let c_size = ctx.in_len as u64; + if c_offset < self.blob_offset + || c_offset.checked_add(c_size).is_none() + || c_offset + c_size > self.blob_offset + self.c_buf.len() as u64 + || ctx.out_len as u64 > RAFS_MAX_CHUNK_SIZE + { + let msg = format!( + "invalid chunk: z_offset 0x{:x}, z_size 0x{:x}, c_offset 0x{:x}, c_size 0x{:x}, d_size 0x{:x}", + self.blob_offset, + self.c_buf.len(), + c_offset, + c_size, + ctx.out_len + ); + return Err(einval!(msg)); + } + + let c_offset = (c_offset - self.blob_offset) as usize; + let input = &self.c_buf[c_offset..c_offset + c_size as usize]; + let mut output = alloc_buf(ctx.out_len as usize); + let mut decoder = ZranDecoder::new()?; + decoder.uncompress(&ctx, Some(dict), input, &mut output)?; + self.d_buf = output; + + Ok(()) + } + + fn next_batch(&mut self, chunk: &dyn BlobChunkInfo) -> Result> { + // If the chunk is not a batch chunk, decompress it as normal. + if !chunk.is_batch() { + return self.next_buf(chunk); + } + + let meta = self + .cache + .get_blob_meta_info()? + .ok_or_else(|| einval!("failed to get blob meta object for Batch"))?; + + let batch_idx = meta.get_batch_index(chunk.id())?; + if batch_idx != self.batch_idx { + self.batch_idx = batch_idx; + self.decompress_batch(&meta, chunk.compressed_offset())?; + } + let offset = meta.get_uncompressed_offset_in_batch_buf(chunk.id())? as usize; + let end = offset + chunk.uncompressed_size() as usize; + if end > self.d_buf.len() { + return Err(einval!(format!( + "invalid Batch decompression status, end: {}, len: {}", + end, + self.d_buf.len() + ))); + } + + // Use alloc_buf here to ensure 4k alignment for later use + // in adjust_buffer_for_dio. + let mut buffer = alloc_buf(chunk.uncompressed_size() as usize); + buffer.copy_from_slice(&self.d_buf[offset as usize..end]); + Ok(buffer) + } + + fn next_zran(&mut self, chunk: &dyn BlobChunkInfo) -> Result> { + let meta = self + .cache + .get_blob_meta_info()? + .ok_or_else(|| einval!("failed to get blob meta object for ZRan"))?; + let zran_idx = meta.get_zran_index(chunk.id())?; + if zran_idx != self.zran_idx { + self.zran_idx = zran_idx; + self.decompress_zran(&meta)?; + } + let offset = meta.get_zran_offset(chunk.id())? as usize; + let end = offset + chunk.uncompressed_size() as usize; + if end > self.d_buf.len() { + return Err(einval!("invalid ZRan decompression status")); + } + // Use alloc_buf here to ensure 4k alignment for later use + // in adjust_buffer_for_dio. + let mut buffer = alloc_buf(chunk.uncompressed_size() as usize); + buffer.copy_from_slice(&self.d_buf[offset as usize..end]); + Ok(buffer) + } + + fn next_buf(&mut self, chunk: &dyn BlobChunkInfo) -> Result> { + let c_offset = chunk.compressed_offset(); + let c_size = chunk.compressed_size(); + let d_size = chunk.uncompressed_size() as usize; + if c_offset < self.blob_offset + || c_offset - self.blob_offset > usize::MAX as u64 + || c_offset.checked_add(c_size as u64).is_none() + || c_offset + c_size as u64 > self.blob_offset + self.c_buf.len() as u64 + || d_size as u64 > RAFS_MAX_CHUNK_SIZE + { + let msg = format!( + "invalid chunk info: c_offset 0x{:x}, c_size 0x{:x}, d_size 0x{:x}, blob_offset 0x{:x}", + c_offset, c_size, d_size, self.blob_offset + ); + return Err(eio!(msg)); + } + + let offset_merged = (c_offset - self.blob_offset) as usize; + let end_merged = offset_merged + c_size as usize; + let decrypted_buffer = crypt::decrypt_with_context( + &self.c_buf[offset_merged..end_merged], + &self.cache.blob_cipher_object(), + &self.cache.blob_cipher_context(), + chunk.is_encrypted(), + )?; + let mut buffer = alloc_buf(d_size); + self.cache + .decompress_chunk_data(&decrypted_buffer, &mut buffer, chunk.is_compressed())?; + self.cache + .validate_chunk_data(chunk, &buffer, false) + .map_err(|e| { + warn!("failed to read data from backend, {}", e); + e + })?; + Ok(buffer) + } + + /// Get an immutable reference to the compressed data buffer. + pub fn compressed_buf(&self) -> &[u8] { + &self.c_buf + } +} + +impl<'a, 'b> Iterator for ChunkDecompressState<'a, 'b> { + type Item = Result>; + + fn next(&mut self) -> Option { + if self.chunk_idx >= self.chunks.len() { + return None; + } + + let cache = self.cache; + let chunk = self.chunks[self.chunk_idx]; + self.chunk_idx += 1; + let res = if cache.is_batch() { + self.next_batch(chunk) + } else if cache.is_zran() { + self.next_zran(chunk) + } else { + self.next_buf(chunk) + }; + Some(res) + } +} + +/// Trait representing blob manager to manage a group of [BlobCache](trait.BlobCache.html) objects. +/// +/// The main responsibility of the blob cache manager is to create blob cache objects for blobs, +/// all IO requests should be issued to the blob cache object directly. +pub(crate) trait BlobCacheMgr: Send + Sync { + /// Initialize the blob cache manager. + fn init(&self) -> Result<()>; + + /// Tear down the blob cache manager. + fn destroy(&self); + + /// Garbage-collect unused resources. + /// + /// Return true if the blob cache manager itself should be garbage-collected. + fn gc(&self, _id: Option<&str>) -> bool; + + /// Get the underlying `BlobBackend` object of the blob cache object. + fn backend(&self) -> &(dyn BlobBackend); + + /// Get the blob cache to provide access to the `blob` object. + fn get_blob_cache(&self, blob_info: &Arc) -> Result>; + + /// Check the blob cache data status, if data all ready stop prefetch workers. + fn check_stat(&self); +} + +#[cfg(test)] +mod tests { + use crate::device::{BlobChunkFlags, BlobFeatures}; + use crate::test::MockChunkInfo; + + use super::*; + + #[test] + fn test_io_merge_state_new() { + let blob_info = Arc::new(BlobInfo::new( + 1, + "test1".to_owned(), + 0x200000, + 0x100000, + 0x100000, + 512, + BlobFeatures::_V5_NO_EXT_BLOB_TABLE, + )); + let chunk1 = Arc::new(MockChunkInfo { + block_id: Default::default(), + blob_index: 1, + flags: BlobChunkFlags::empty(), + compress_size: 0x800, + uncompress_size: 0x1000, + compress_offset: 0, + uncompress_offset: 0, + file_offset: 0, + index: 0, + reserved: 0, + }) as Arc; + let chunk2 = Arc::new(MockChunkInfo { + block_id: Default::default(), + blob_index: 1, + flags: BlobChunkFlags::empty(), + compress_size: 0x800, + uncompress_size: 0x1000, + compress_offset: 0x800, + uncompress_offset: 0x1000, + file_offset: 0x1000, + index: 1, + reserved: 0, + }) as Arc; + let chunk3 = Arc::new(MockChunkInfo { + block_id: Default::default(), + blob_index: 1, + flags: BlobChunkFlags::empty(), + compress_size: 0x800, + uncompress_size: 0x1000, + compress_offset: 0x1000, + uncompress_offset: 0x1000, + file_offset: 0x1000, + index: 1, + reserved: 0, + }) as Arc; + + let cb = |_merged| {}; + let desc1 = BlobIoDesc { + blob: blob_info.clone(), + chunkinfo: chunk1.into(), + offset: 0, + size: 0x1000, + user_io: true, + }; + let mut state = BlobIoMergeState::new(&desc1, cb); + assert_eq!(state.size(), 0x800); + assert_eq!(state.bios.len(), 1); + + let desc2 = BlobIoDesc { + blob: blob_info.clone(), + chunkinfo: chunk2.into(), + offset: 0, + size: 0x1000, + user_io: true, + }; + state.push(&desc2); + assert_eq!(state.size, 0x1000); + assert_eq!(state.bios.len(), 2); + + state.issue(0); + assert_eq!(state.size(), 0x0); + assert_eq!(state.bios.len(), 0); + + let desc3 = BlobIoDesc { + blob: blob_info, + chunkinfo: chunk3.into(), + offset: 0, + size: 0x1000, + user_io: true, + }; + state.push(&desc3); + assert_eq!(state.size, 0x800); + assert_eq!(state.bios.len(), 1); + + state.issue(0); + assert_eq!(state.size(), 0x0); + assert_eq!(state.bios.len(), 0); + + let mut count = 0; + BlobIoMergeState::merge_and_issue( + &[desc1.clone(), desc2.clone(), desc3.clone()], + 0x4000, + 0x0, + |_v| count += 1, + ); + assert_eq!(count, 1); + + let mut count = 0; + BlobIoMergeState::merge_and_issue( + &[desc1.clone(), desc2.clone(), desc3.clone()], + 0x1000, + 0x0, + |_v| count += 1, + ); + assert_eq!(count, 2); + + let mut count = 0; + BlobIoMergeState::merge_and_issue(&[desc1.clone(), desc3.clone()], 0x4000, 0x0, |_v| { + count += 1 + }); + assert_eq!(count, 2); + + assert!(desc1.is_continuous(&desc2, 0)); + assert!(!desc1.is_continuous(&desc3, 0)); + } +} diff --git a/storage/src/cache/state/blob_state_map.rs b/storage/src/cache/state/blob_state_map.rs index cff376f839c..942e804cc4e 100644 --- a/storage/src/cache/state/blob_state_map.rs +++ b/storage/src/cache/state/blob_state_map.rs @@ -1,793 +1,793 @@ -// Copyright 2021 Ant Group. All rights reserved. -// Copyright (C) 2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::any::Any; -use std::collections::HashMap; -use std::fmt::Display; -use std::hash::Hash; -use std::io::Result; -use std::sync::{Arc, Condvar, Mutex, WaitTimeoutResult}; -use std::time::Duration; - -use crate::cache::state::{BlobRangeMap, ChunkIndexGetter, ChunkMap, IndexedChunkMap, RangeMap}; -use crate::cache::SINGLE_INFLIGHT_WAIT_TIMEOUT; -use crate::device::BlobChunkInfo; -use crate::{StorageError, StorageResult}; - -#[derive(PartialEq, Copy, Clone)] -enum Status { - Inflight, - Complete, -} - -struct Slot { - state: Mutex, - condvar: Condvar, -} - -impl Slot { - fn new() -> Self { - Slot { - state: Mutex::new(Status::Inflight), - condvar: Condvar::new(), - } - } - - fn notify(&self) { - self.condvar.notify_all(); - } - - fn done(&self) { - // Not expect poisoned lock here - *self.state.lock().unwrap() = Status::Complete; - self.notify(); - } - - fn wait_for_inflight(&self, timeout: Duration) -> StorageResult { - let mut state = self.state.lock().unwrap(); - let mut tor: WaitTimeoutResult; - - while *state == Status::Inflight { - // Do not expect poisoned lock, so unwrap here. - let r = self.condvar.wait_timeout(state, timeout).unwrap(); - state = r.0; - tor = r.1; - if tor.timed_out() { - return Err(StorageError::Timeout); - } - } - - Ok(*state) - } -} - -/// Adapter structure to enable concurrent chunk readiness manipulating based on a base [ChunkMap] -/// object. -/// -/// A base [ChunkMap], such as [IndexedChunkMap](../chunk_indexed/struct.IndexedChunkMap.html), only -/// tracks chunk readiness state, but doesn't support concurrent manipulating of the chunk readiness -/// state. The `BlobStateMap` structure acts as an adapter to enable concurrent chunk readiness -/// state manipulation. -pub struct BlobStateMap { - c: C, - inflight_tracer: Mutex>>, -} - -impl From for BlobStateMap -where - C: ChunkMap + ChunkIndexGetter, - I: Eq + Hash + Display, -{ - fn from(c: C) -> Self { - Self { - c, - inflight_tracer: Mutex::new(HashMap::new()), - } - } -} - -impl ChunkMap for BlobStateMap -where - C: ChunkMap + ChunkIndexGetter, - I: Eq + Hash + Display + Send + 'static, -{ - fn is_ready(&self, chunk: &dyn BlobChunkInfo) -> Result { - self.c.is_ready(chunk) - } - - fn is_pending(&self, chunk: &dyn BlobChunkInfo) -> Result { - let index = C::get_index(chunk); - Ok(self.inflight_tracer.lock().unwrap().get(&index).is_some()) - } - - fn check_ready_and_mark_pending(&self, chunk: &dyn BlobChunkInfo) -> StorageResult { - let mut ready = self.c.is_ready(chunk).map_err(StorageError::CacheIndex)?; - - if ready { - return Ok(true); - } - - let index = C::get_index(chunk); - let mut guard = self.inflight_tracer.lock().unwrap(); - - if let Some(i) = guard.get(&index).cloned() { - drop(guard); - let result = i.wait_for_inflight(Duration::from_millis(SINGLE_INFLIGHT_WAIT_TIMEOUT)); - if let Err(StorageError::Timeout) = result { - warn!( - "Waiting for backend IO expires. chunk index {}, compressed offset {}", - index, - chunk.compressed_offset() - ); - - Err(StorageError::Timeout) - } else { - // Check if the chunk is ready in local cache again. It should be READY - // since wait_for_inflight must return OK in this branch by one more check. - self.check_ready_and_mark_pending(chunk) - } - } else { - // Double check to close the window where prior slot was just removed after backend IO - // returned. - if self.c.is_ready(chunk).map_err(StorageError::CacheIndex)? { - ready = true; - } else { - guard.insert(index, Arc::new(Slot::new())); - } - Ok(ready) - } - } - - fn set_ready_and_clear_pending(&self, chunk: &dyn BlobChunkInfo) -> Result<()> { - let res = self.c.set_ready_and_clear_pending(chunk); - self.clear_pending(chunk); - res - } - - fn clear_pending(&self, chunk: &dyn BlobChunkInfo) { - let index = C::get_index(chunk); - let mut guard = self.inflight_tracer.lock().unwrap(); - if let Some(i) = guard.remove(&index) { - i.done(); - } - } - - fn is_persist(&self) -> bool { - self.c.is_persist() - } - - fn as_range_map(&self) -> Option<&dyn RangeMap> { - let any = self as &dyn Any; - - any.downcast_ref::>() - .map(|v| v as &dyn RangeMap) - } -} - -impl RangeMap for BlobStateMap { - type I = u32; - - fn is_range_all_ready(&self) -> bool { - self.c.is_range_all_ready() - } - - fn is_range_ready(&self, start: Self::I, count: Self::I) -> Result { - self.c.is_range_ready(start, count) - } - - fn check_range_ready_and_mark_pending( - &self, - start: Self::I, - count: Self::I, - ) -> Result>> { - let pending = match self.c.check_range_ready_and_mark_pending(start, count) { - Err(e) => return Err(e), - Ok(None) => return Ok(None), - Ok(Some(v)) => { - if v.is_empty() { - return Ok(None); - } - v - } - }; - - let mut res = Vec::with_capacity(pending.len()); - let mut guard = self.inflight_tracer.lock().unwrap(); - for index in pending.iter() { - if guard.get(index).is_none() { - // Double check to close the window where prior slot was just removed after backend - // IO returned. - if !self.c.is_range_ready(*index, 1)? { - guard.insert(*index, Arc::new(Slot::new())); - res.push(*index); - } - } - } - - Ok(Some(res)) - } - - fn set_range_ready_and_clear_pending(&self, start: Self::I, count: Self::I) -> Result<()> { - let res = self.c.set_range_ready_and_clear_pending(start, count); - self.clear_range_pending(start, count); - res - } - - fn clear_range_pending(&self, start: Self::I, count: Self::I) { - let count = std::cmp::min(count, u32::MAX - start); - let end = start + count; - let mut guard = self.inflight_tracer.lock().unwrap(); - - for index in start..end { - if let Some(i) = guard.remove(&index) { - i.done(); - } - } - } - - fn wait_for_range_ready(&self, start: Self::I, count: Self::I) -> Result { - let count = std::cmp::min(count, u32::MAX - start); - let end = start + count; - if self.is_range_ready(start, count)? { - return Ok(true); - } - - let mut guard = self.inflight_tracer.lock().unwrap(); - for index in start..end { - if let Some(i) = guard.get(&index).cloned() { - drop(guard); - let result = - i.wait_for_inflight(Duration::from_millis(SINGLE_INFLIGHT_WAIT_TIMEOUT)); - if let Err(StorageError::Timeout) = result { - warn!( - "Waiting for range backend IO expires. chunk index {}. range[{}, {}]", - index, start, count - ); - break; - }; - if !self.c.is_range_ready(index, 1)? { - return Ok(false); - } - guard = self.inflight_tracer.lock().unwrap(); - } - } - - self.is_range_ready(start, count) - } -} - -impl RangeMap for BlobStateMap { - type I = u64; - - fn is_range_all_ready(&self) -> bool { - self.c.is_range_all_ready() - } - - fn is_range_ready(&self, start: Self::I, count: Self::I) -> Result { - self.c.is_range_ready(start, count) - } - - fn check_range_ready_and_mark_pending( - &self, - start: Self::I, - count: Self::I, - ) -> Result>> { - let pending = match self.c.check_range_ready_and_mark_pending(start, count) { - Err(e) => return Err(e), - Ok(None) => return Ok(None), - Ok(Some(v)) => { - if v.is_empty() { - return Ok(None); - } - v - } - }; - - let mut res = Vec::with_capacity(pending.len()); - let mut guard = self.inflight_tracer.lock().unwrap(); - for index in pending.iter() { - if guard.get(index).is_none() { - // Double check to close the window where prior slot was just removed after backend - // IO returned. - if !self.c.is_range_ready(*index, 1)? { - guard.insert(*index, Arc::new(Slot::new())); - res.push(*index); - } - } - } - - Ok(Some(res)) - } - - fn set_range_ready_and_clear_pending(&self, start: Self::I, count: Self::I) -> Result<()> { - let res = self.c.set_range_ready_and_clear_pending(start, count); - self.clear_range_pending(start, count); - res - } - - fn clear_range_pending(&self, start: Self::I, count: Self::I) { - let (start_index, end_index) = match self.c.get_range(start, count) { - Ok(v) => v, - Err(_) => { - debug_assert!(false); - return; - } - }; - - let mut guard = self.inflight_tracer.lock().unwrap(); - for index in start_index..end_index { - let idx = (index as u64) << self.c.shift; - if let Some(i) = guard.remove(&idx) { - i.done(); - } - } - } - - fn wait_for_range_ready(&self, start: Self::I, count: Self::I) -> Result { - if self.c.is_range_ready(start, count)? { - return Ok(true); - } - - let (start_index, end_index) = self.c.get_range(start, count)?; - let mut guard = self.inflight_tracer.lock().unwrap(); - for index in start_index..end_index { - let idx = (index as u64) << self.c.shift; - if let Some(i) = guard.get(&idx).cloned() { - drop(guard); - let result = - i.wait_for_inflight(Duration::from_millis(SINGLE_INFLIGHT_WAIT_TIMEOUT)); - if let Err(StorageError::Timeout) = result { - warn!( - "Waiting for range backend IO expires. chunk index {}. range[{}, {}]", - index, start, count - ); - break; - }; - if !self.c.is_range_ready(idx, 1)? { - return Ok(false); - } - guard = self.inflight_tracer.lock().unwrap(); - } - } - - self.c.is_range_ready(start, count) - } -} - -impl BlobStateMap { - /// Create a new instance of `BlobStateMap` from a `BlobRangeMap` object. - pub fn from_range_map(map: BlobRangeMap) -> Self { - Self { - c: map, - inflight_tracer: Mutex::new(HashMap::new()), - } - } -} - -#[cfg(test)] -pub(crate) mod tests { - use std::sync::Arc; - use std::thread; - use std::time::Instant; - - use nydus_utils::digest::Algorithm::Blake3; - use nydus_utils::digest::{Algorithm, RafsDigest}; - use vmm_sys_util::tempdir::TempDir; - use vmm_sys_util::tempfile::TempFile; - - use super::*; - use crate::cache::state::DigestedChunkMap; - use crate::device::BlobChunkInfo; - use crate::test::MockChunkInfo; - - struct Chunk { - index: u32, - digest: RafsDigest, - } - - impl Chunk { - fn new(index: u32) -> Arc { - Arc::new(Self { - index, - digest: RafsDigest::from_buf( - unsafe { std::slice::from_raw_parts(&index as *const u32 as *const u8, 4) }, - Algorithm::Blake3, - ), - }) - } - } - - impl BlobChunkInfo for Chunk { - fn chunk_id(&self) -> &RafsDigest { - &self.digest - } - - fn id(&self) -> u32 { - self.index - } - - fn blob_index(&self) -> u32 { - 0 - } - - fn compressed_offset(&self) -> u64 { - unimplemented!(); - } - - fn compressed_size(&self) -> u32 { - unimplemented!(); - } - - fn uncompressed_offset(&self) -> u64 { - unimplemented!(); - } - - fn uncompressed_size(&self) -> u32 { - unimplemented!(); - } - - fn is_batch(&self) -> bool { - unimplemented!(); - } - - fn is_compressed(&self) -> bool { - unimplemented!(); - } - - fn is_encrypted(&self) -> bool { - false - } - - fn as_any(&self) -> &dyn Any { - self - } - } - - #[test] - fn test_chunk_map() { - let dir = TempDir::new().unwrap(); - let blob_path = dir.as_path().join("blob-1"); - let blob_path = blob_path.as_os_str().to_str().unwrap().to_string(); - let chunk_count = 1000000; - let skip_index = 77; - - let indexed_chunk_map1 = Arc::new(BlobStateMap::from( - IndexedChunkMap::new(&blob_path, chunk_count, true).unwrap(), - )); - let indexed_chunk_map2 = Arc::new(BlobStateMap::from( - IndexedChunkMap::new(&blob_path, chunk_count, true).unwrap(), - )); - let indexed_chunk_map3 = Arc::new(BlobStateMap::from( - IndexedChunkMap::new(&blob_path, chunk_count, true).unwrap(), - )); - - let now = Instant::now(); - - let h1 = thread::spawn(move || { - for idx in 0..chunk_count { - let chunk = Chunk::new(idx); - if idx % skip_index != 0 { - indexed_chunk_map1 - .set_ready_and_clear_pending(chunk.as_ref()) - .unwrap(); - } - } - }); - - let h2 = thread::spawn(move || { - for idx in 0..chunk_count { - let chunk = Chunk::new(idx); - if idx % skip_index != 0 { - indexed_chunk_map2 - .set_ready_and_clear_pending(chunk.as_ref()) - .unwrap(); - } - } - }); - - h1.join() - .map_err(|e| { - error!("Join error {:?}", e); - e - }) - .unwrap(); - h2.join() - .map_err(|e| { - error!("Join error {:?}", e); - e - }) - .unwrap(); - - println!( - "IndexedChunkMap Concurrency: {}ms", - now.elapsed().as_millis() - ); - - for idx in 0..chunk_count { - let chunk = Chunk::new(idx); - - let has_ready = indexed_chunk_map3 - .check_ready_and_mark_pending(chunk.as_ref()) - .unwrap(); - if idx % skip_index == 0 { - if has_ready { - panic!("indexed chunk map: index {} shouldn't be ready", idx); - } - } else if !has_ready { - panic!("indexed chunk map: index {} should be ready", idx); - } - } - } - - fn iterate(chunks: &[Arc], chunk_map: &dyn ChunkMap, chunk_count: u32) { - for idx in 0..chunk_count { - chunk_map - .set_ready_and_clear_pending(chunks[idx as usize].as_ref()) - .unwrap(); - } - for idx in 0..chunk_count { - assert!(chunk_map - .check_ready_and_mark_pending(chunks[idx as usize].as_ref()) - .unwrap(),); - } - } - - #[test] - fn test_chunk_map_perf() { - let dir = TempDir::new().unwrap(); - let blob_path = dir.as_path().join("blob-1"); - let blob_path = blob_path.as_os_str().to_str().unwrap().to_string(); - let chunk_count = 1000000; - - let mut chunks = Vec::new(); - for idx in 0..chunk_count { - chunks.push(Chunk::new(idx)) - } - - let indexed_chunk_map = - BlobStateMap::from(IndexedChunkMap::new(&blob_path, chunk_count, true).unwrap()); - let now = Instant::now(); - iterate(&chunks, &indexed_chunk_map as &dyn ChunkMap, chunk_count); - let elapsed1 = now.elapsed().as_millis(); - - let digested_chunk_map = BlobStateMap::from(DigestedChunkMap::new()); - let now = Instant::now(); - iterate(&chunks, &digested_chunk_map as &dyn ChunkMap, chunk_count); - let elapsed2 = now.elapsed().as_millis(); - - println!( - "IndexedChunkMap vs DigestedChunkMap: {}ms vs {}ms", - elapsed1, elapsed2 - ); - } - - #[test] - fn test_inflight_tracer() { - let chunk_1: Arc = Arc::new({ - let mut c = MockChunkInfo::new(); - c.index = 1; - c.block_id = RafsDigest::from_buf("hello world".as_bytes(), Blake3); - c - }); - let chunk_2: Arc = Arc::new({ - let mut c = MockChunkInfo::new(); - c.index = 2; - c.block_id = RafsDigest::from_buf("hello world 2".as_bytes(), Blake3); - c - }); - // indexed ChunkMap - let tmp_file = TempFile::new().unwrap(); - let index_map = Arc::new(BlobStateMap::from( - IndexedChunkMap::new(tmp_file.as_path().to_str().unwrap(), 10, true).unwrap(), - )); - index_map - .check_ready_and_mark_pending(chunk_1.as_ref()) - .unwrap(); - assert_eq!(index_map.inflight_tracer.lock().unwrap().len(), 1); - index_map - .check_ready_and_mark_pending(chunk_2.as_ref()) - .unwrap(); - assert_eq!(index_map.inflight_tracer.lock().unwrap().len(), 2); - index_map - .check_ready_and_mark_pending(chunk_1.as_ref()) - .unwrap_err(); - index_map - .check_ready_and_mark_pending(chunk_2.as_ref()) - .unwrap_err(); - assert_eq!(index_map.inflight_tracer.lock().unwrap().len(), 2); - - index_map - .set_ready_and_clear_pending(chunk_1.as_ref()) - .unwrap(); - assert!(index_map - .check_ready_and_mark_pending(chunk_1.as_ref()) - .unwrap(),); - assert_eq!(index_map.inflight_tracer.lock().unwrap().len(), 1); - - index_map.clear_pending(chunk_2.as_ref()); - assert_eq!(index_map.inflight_tracer.lock().unwrap().len(), 0); - assert!(!index_map - .check_ready_and_mark_pending(chunk_2.as_ref()) - .unwrap(),); - assert_eq!(index_map.inflight_tracer.lock().unwrap().len(), 1); - index_map.clear_pending(chunk_2.as_ref()); - assert_eq!(index_map.inflight_tracer.lock().unwrap().len(), 0); - index_map - .set_ready_and_clear_pending(chunk_2.as_ref()) - .unwrap(); - assert!(index_map - .check_ready_and_mark_pending(chunk_2.as_ref()) - .unwrap(),); - assert_eq!(index_map.inflight_tracer.lock().unwrap().len(), 0); - - // digested ChunkMap - let digest_map = Arc::new(BlobStateMap::from(DigestedChunkMap::new())); - digest_map - .check_ready_and_mark_pending(chunk_1.as_ref()) - .unwrap(); - assert_eq!(digest_map.inflight_tracer.lock().unwrap().len(), 1); - digest_map - .check_ready_and_mark_pending(chunk_2.as_ref()) - .unwrap(); - assert_eq!(digest_map.inflight_tracer.lock().unwrap().len(), 2); - digest_map - .check_ready_and_mark_pending(chunk_1.as_ref()) - .unwrap_err(); - digest_map - .check_ready_and_mark_pending(chunk_2.as_ref()) - .unwrap_err(); - digest_map - .set_ready_and_clear_pending(chunk_1.as_ref()) - .unwrap(); - assert!(digest_map - .check_ready_and_mark_pending(chunk_1.as_ref()) - .unwrap(),); - digest_map.clear_pending(chunk_2.as_ref()); - assert!(!digest_map - .check_ready_and_mark_pending(chunk_2.as_ref()) - .unwrap(),); - digest_map.clear_pending(chunk_2.as_ref()); - assert_eq!(digest_map.inflight_tracer.lock().unwrap().len(), 0); - } - - #[test] - fn test_inflight_tracer_race() { - let tmp_file = TempFile::new().unwrap(); - let map = Arc::new(BlobStateMap::from( - IndexedChunkMap::new(tmp_file.as_path().to_str().unwrap(), 10, true).unwrap(), - )); - - let chunk_4: Arc = Arc::new({ - let mut c = MockChunkInfo::new(); - c.index = 4; - c - }); - - assert!(!map - .as_ref() - .check_ready_and_mark_pending(chunk_4.as_ref()) - .unwrap(),); - let map_cloned = map.clone(); - assert_eq!(map.inflight_tracer.lock().unwrap().len(), 1); - - let chunk_4_cloned = chunk_4.clone(); - let t1 = thread::Builder::new() - .spawn(move || { - for _ in 0..4 { - let ready = map_cloned - .check_ready_and_mark_pending(chunk_4_cloned.as_ref()) - .unwrap(); - assert!(ready); - } - }) - .unwrap(); - - let map_cloned_2 = map.clone(); - let chunk_4_cloned_2 = chunk_4.clone(); - let t2 = thread::Builder::new() - .spawn(move || { - for _ in 0..2 { - let ready = map_cloned_2 - .check_ready_and_mark_pending(chunk_4_cloned_2.as_ref()) - .unwrap(); - assert!(ready); - } - }) - .unwrap(); - - thread::sleep(Duration::from_secs(1)); - - map.set_ready_and_clear_pending(chunk_4.as_ref()).unwrap(); - - // Fuzz - map.set_ready_and_clear_pending(chunk_4.as_ref()).unwrap(); - map.set_ready_and_clear_pending(chunk_4.as_ref()).unwrap(); - - assert_eq!(map.inflight_tracer.lock().unwrap().len(), 0); - - t1.join().unwrap(); - t2.join().unwrap(); - } - - #[test] - /// Case description: - /// Never invoke `set_ready` method, thus to let each caller of `has_ready` reach - /// a point of timeout. - /// Expect: - /// The chunk of index 4 is never marked as ready/downloaded. - /// Each caller of `has_ready` can escape from where it is blocked. - /// After timeout, no slot is left in inflight tracer. - fn test_inflight_tracer_timeout() { - let tmp_file = TempFile::new().unwrap(); - let map = Arc::new(BlobStateMap::from( - IndexedChunkMap::new(tmp_file.as_path().to_str().unwrap(), 10, true).unwrap(), - )); - - let chunk_4: Arc = Arc::new({ - let mut c = MockChunkInfo::new(); - c.index = 4; - c - }); - - map.as_ref() - .check_ready_and_mark_pending(chunk_4.as_ref()) - .unwrap(); - let map_cloned = map.clone(); - - assert_eq!(map.inflight_tracer.lock().unwrap().len(), 1); - - let chunk_4_cloned = chunk_4.clone(); - let t1 = thread::Builder::new() - .spawn(move || { - for _ in 0..4 { - map_cloned - .check_ready_and_mark_pending(chunk_4_cloned.as_ref()) - .unwrap_err(); - } - }) - .unwrap(); - - t1.join().unwrap(); - - assert_eq!(map.inflight_tracer.lock().unwrap().len(), 1); - - map.as_ref() - .check_ready_and_mark_pending(chunk_4.as_ref()) - .unwrap_err(); - assert_eq!(map.inflight_tracer.lock().unwrap().len(), 1); - - map.clear_pending(chunk_4.as_ref()); - assert_eq!(map.inflight_tracer.lock().unwrap().len(), 0); - } - - #[test] - fn test_inflight_tracer_race_range() { - let tmp_file = TempFile::new().unwrap(); - let map = Arc::new(BlobStateMap::from( - IndexedChunkMap::new(tmp_file.as_path().to_str().unwrap(), 10, true).unwrap(), - )); - - assert!(!map.is_range_all_ready()); - assert!(!map.is_range_ready(0, 1).unwrap()); - assert!(!map.is_range_ready(9, 1).unwrap()); - assert!(map.is_range_ready(10, 1).is_err()); - assert_eq!( - map.check_range_ready_and_mark_pending(0, 2).unwrap(), - Some(vec![0, 1]) - ); - map.set_range_ready_and_clear_pending(0, 2).unwrap(); - assert_eq!(map.check_range_ready_and_mark_pending(0, 2).unwrap(), None); - map.wait_for_range_ready(0, 2).unwrap(); - assert_eq!( - map.check_range_ready_and_mark_pending(1, 2).unwrap(), - Some(vec![2]) - ); - map.set_range_ready_and_clear_pending(2, 1).unwrap(); - map.set_range_ready_and_clear_pending(3, 7).unwrap(); - assert!(map.is_range_ready(0, 1).unwrap()); - assert!(map.is_range_ready(9, 1).unwrap()); - assert!(map.is_range_all_ready()); - } -} +// Copyright 2021 Ant Group. All rights reserved. +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::any::Any; +use std::collections::HashMap; +use std::fmt::Display; +use std::hash::Hash; +use std::io::Result; +use std::sync::{Arc, Condvar, Mutex, WaitTimeoutResult}; +use std::time::Duration; + +use crate::cache::state::{BlobRangeMap, ChunkIndexGetter, ChunkMap, IndexedChunkMap, RangeMap}; +use crate::cache::SINGLE_INFLIGHT_WAIT_TIMEOUT; +use crate::device::BlobChunkInfo; +use crate::{StorageError, StorageResult}; + +#[derive(PartialEq, Copy, Clone)] +enum Status { + Inflight, + Complete, +} + +struct Slot { + state: Mutex, + condvar: Condvar, +} + +impl Slot { + fn new() -> Self { + Slot { + state: Mutex::new(Status::Inflight), + condvar: Condvar::new(), + } + } + + fn notify(&self) { + self.condvar.notify_all(); + } + + fn done(&self) { + // Not expect poisoned lock here + *self.state.lock().unwrap() = Status::Complete; + self.notify(); + } + + fn wait_for_inflight(&self, timeout: Duration) -> StorageResult { + let mut state = self.state.lock().unwrap(); + let mut tor: WaitTimeoutResult; + + while *state == Status::Inflight { + // Do not expect poisoned lock, so unwrap here. + let r = self.condvar.wait_timeout(state, timeout).unwrap(); + state = r.0; + tor = r.1; + if tor.timed_out() { + return Err(StorageError::Timeout); + } + } + + Ok(*state) + } +} + +/// Adapter structure to enable concurrent chunk readiness manipulating based on a base [ChunkMap] +/// object. +/// +/// A base [ChunkMap], such as [IndexedChunkMap](../chunk_indexed/struct.IndexedChunkMap.html), only +/// tracks chunk readiness state, but doesn't support concurrent manipulating of the chunk readiness +/// state. The `BlobStateMap` structure acts as an adapter to enable concurrent chunk readiness +/// state manipulation. +pub struct BlobStateMap { + c: C, + inflight_tracer: Mutex>>, +} + +impl From for BlobStateMap +where + C: ChunkMap + ChunkIndexGetter, + I: Eq + Hash + Display, +{ + fn from(c: C) -> Self { + Self { + c, + inflight_tracer: Mutex::new(HashMap::new()), + } + } +} + +impl ChunkMap for BlobStateMap +where + C: ChunkMap + ChunkIndexGetter, + I: Eq + Hash + Display + Send + 'static, +{ + fn is_ready(&self, chunk: &dyn BlobChunkInfo) -> Result { + self.c.is_ready(chunk) + } + + fn is_pending(&self, chunk: &dyn BlobChunkInfo) -> Result { + let index = C::get_index(chunk); + Ok(self.inflight_tracer.lock().unwrap().get(&index).is_some()) + } + + fn check_ready_and_mark_pending(&self, chunk: &dyn BlobChunkInfo) -> StorageResult { + let mut ready = self.c.is_ready(chunk).map_err(StorageError::CacheIndex)?; + + if ready { + return Ok(true); + } + + let index = C::get_index(chunk); + let mut guard = self.inflight_tracer.lock().unwrap(); + + if let Some(i) = guard.get(&index).cloned() { + drop(guard); + let result = i.wait_for_inflight(Duration::from_millis(SINGLE_INFLIGHT_WAIT_TIMEOUT)); + if let Err(StorageError::Timeout) = result { + warn!( + "Waiting for backend IO expires. chunk index {}, compressed offset {}", + index, + chunk.compressed_offset() + ); + + Err(StorageError::Timeout) + } else { + // Check if the chunk is ready in local cache again. It should be READY + // since wait_for_inflight must return OK in this branch by one more check. + self.check_ready_and_mark_pending(chunk) + } + } else { + // Double check to close the window where prior slot was just removed after backend IO + // returned. + if self.c.is_ready(chunk).map_err(StorageError::CacheIndex)? { + ready = true; + } else { + guard.insert(index, Arc::new(Slot::new())); + } + Ok(ready) + } + } + + fn set_ready_and_clear_pending(&self, chunk: &dyn BlobChunkInfo) -> Result<()> { + let res = self.c.set_ready_and_clear_pending(chunk); + self.clear_pending(chunk); + res + } + + fn clear_pending(&self, chunk: &dyn BlobChunkInfo) { + let index = C::get_index(chunk); + let mut guard = self.inflight_tracer.lock().unwrap(); + if let Some(i) = guard.remove(&index) { + i.done(); + } + } + + fn is_persist(&self) -> bool { + self.c.is_persist() + } + + fn as_range_map(&self) -> Option<&dyn RangeMap> { + let any = self as &dyn Any; + + any.downcast_ref::>() + .map(|v| v as &dyn RangeMap) + } +} + +impl RangeMap for BlobStateMap { + type I = u32; + + fn is_range_all_ready(&self) -> bool { + self.c.is_range_all_ready() + } + + fn is_range_ready(&self, start: Self::I, count: Self::I) -> Result { + self.c.is_range_ready(start, count) + } + + fn check_range_ready_and_mark_pending( + &self, + start: Self::I, + count: Self::I, + ) -> Result>> { + let pending = match self.c.check_range_ready_and_mark_pending(start, count) { + Err(e) => return Err(e), + Ok(None) => return Ok(None), + Ok(Some(v)) => { + if v.is_empty() { + return Ok(None); + } + v + } + }; + + let mut res = Vec::with_capacity(pending.len()); + let mut guard = self.inflight_tracer.lock().unwrap(); + for index in pending.iter() { + if guard.get(index).is_none() { + // Double check to close the window where prior slot was just removed after backend + // IO returned. + if !self.c.is_range_ready(*index, 1)? { + guard.insert(*index, Arc::new(Slot::new())); + res.push(*index); + } + } + } + + Ok(Some(res)) + } + + fn set_range_ready_and_clear_pending(&self, start: Self::I, count: Self::I) -> Result<()> { + let res = self.c.set_range_ready_and_clear_pending(start, count); + self.clear_range_pending(start, count); + res + } + + fn clear_range_pending(&self, start: Self::I, count: Self::I) { + let count = std::cmp::min(count, u32::MAX - start); + let end = start + count; + let mut guard = self.inflight_tracer.lock().unwrap(); + + for index in start..end { + if let Some(i) = guard.remove(&index) { + i.done(); + } + } + } + + fn wait_for_range_ready(&self, start: Self::I, count: Self::I) -> Result { + let count = std::cmp::min(count, u32::MAX - start); + let end = start + count; + if self.is_range_ready(start, count)? { + return Ok(true); + } + + let mut guard = self.inflight_tracer.lock().unwrap(); + for index in start..end { + if let Some(i) = guard.get(&index).cloned() { + drop(guard); + let result = + i.wait_for_inflight(Duration::from_millis(SINGLE_INFLIGHT_WAIT_TIMEOUT)); + if let Err(StorageError::Timeout) = result { + warn!( + "Waiting for range backend IO expires. chunk index {}. range[{}, {}]", + index, start, count + ); + break; + }; + if !self.c.is_range_ready(index, 1)? { + return Ok(false); + } + guard = self.inflight_tracer.lock().unwrap(); + } + } + + self.is_range_ready(start, count) + } +} + +impl RangeMap for BlobStateMap { + type I = u64; + + fn is_range_all_ready(&self) -> bool { + self.c.is_range_all_ready() + } + + fn is_range_ready(&self, start: Self::I, count: Self::I) -> Result { + self.c.is_range_ready(start, count) + } + + fn check_range_ready_and_mark_pending( + &self, + start: Self::I, + count: Self::I, + ) -> Result>> { + let pending = match self.c.check_range_ready_and_mark_pending(start, count) { + Err(e) => return Err(e), + Ok(None) => return Ok(None), + Ok(Some(v)) => { + if v.is_empty() { + return Ok(None); + } + v + } + }; + + let mut res = Vec::with_capacity(pending.len()); + let mut guard = self.inflight_tracer.lock().unwrap(); + for index in pending.iter() { + if guard.get(index).is_none() { + // Double check to close the window where prior slot was just removed after backend + // IO returned. + if !self.c.is_range_ready(*index, 1)? { + guard.insert(*index, Arc::new(Slot::new())); + res.push(*index); + } + } + } + + Ok(Some(res)) + } + + fn set_range_ready_and_clear_pending(&self, start: Self::I, count: Self::I) -> Result<()> { + let res = self.c.set_range_ready_and_clear_pending(start, count); + self.clear_range_pending(start, count); + res + } + + fn clear_range_pending(&self, start: Self::I, count: Self::I) { + let (start_index, end_index) = match self.c.get_range(start, count) { + Ok(v) => v, + Err(_) => { + debug_assert!(false); + return; + } + }; + + let mut guard = self.inflight_tracer.lock().unwrap(); + for index in start_index..end_index { + let idx = (index as u64) << self.c.shift; + if let Some(i) = guard.remove(&idx) { + i.done(); + } + } + } + + fn wait_for_range_ready(&self, start: Self::I, count: Self::I) -> Result { + if self.c.is_range_ready(start, count)? { + return Ok(true); + } + + let (start_index, end_index) = self.c.get_range(start, count)?; + let mut guard = self.inflight_tracer.lock().unwrap(); + for index in start_index..end_index { + let idx = (index as u64) << self.c.shift; + if let Some(i) = guard.get(&idx).cloned() { + drop(guard); + let result = + i.wait_for_inflight(Duration::from_millis(SINGLE_INFLIGHT_WAIT_TIMEOUT)); + if let Err(StorageError::Timeout) = result { + warn!( + "Waiting for range backend IO expires. chunk index {}. range[{}, {}]", + index, start, count + ); + break; + }; + if !self.c.is_range_ready(idx, 1)? { + return Ok(false); + } + guard = self.inflight_tracer.lock().unwrap(); + } + } + + self.c.is_range_ready(start, count) + } +} + +impl BlobStateMap { + /// Create a new instance of `BlobStateMap` from a `BlobRangeMap` object. + pub fn from_range_map(map: BlobRangeMap) -> Self { + Self { + c: map, + inflight_tracer: Mutex::new(HashMap::new()), + } + } +} + +#[cfg(test)] +pub(crate) mod tests { + use std::sync::Arc; + use std::thread; + use std::time::Instant; + + use nydus_utils::digest::Algorithm::Blake3; + use nydus_utils::digest::{Algorithm, RafsDigest}; + use vmm_sys_util::tempdir::TempDir; + use vmm_sys_util::tempfile::TempFile; + + use super::*; + use crate::cache::state::DigestedChunkMap; + use crate::device::BlobChunkInfo; + use crate::test::MockChunkInfo; + + struct Chunk { + index: u32, + digest: RafsDigest, + } + + impl Chunk { + fn new(index: u32) -> Arc { + Arc::new(Self { + index, + digest: RafsDigest::from_buf( + unsafe { std::slice::from_raw_parts(&index as *const u32 as *const u8, 4) }, + Algorithm::Blake3, + ), + }) + } + } + + impl BlobChunkInfo for Chunk { + fn chunk_id(&self) -> &RafsDigest { + &self.digest + } + + fn id(&self) -> u32 { + self.index + } + + fn blob_index(&self) -> u32 { + 0 + } + + fn compressed_offset(&self) -> u64 { + unimplemented!(); + } + + fn compressed_size(&self) -> u32 { + unimplemented!(); + } + + fn uncompressed_offset(&self) -> u64 { + unimplemented!(); + } + + fn uncompressed_size(&self) -> u32 { + unimplemented!(); + } + + fn is_batch(&self) -> bool { + unimplemented!(); + } + + fn is_compressed(&self) -> bool { + unimplemented!(); + } + + fn is_encrypted(&self) -> bool { + false + } + + fn as_any(&self) -> &dyn Any { + self + } + } + + #[test] + fn test_chunk_map() { + let dir = TempDir::new().unwrap(); + let blob_path = dir.as_path().join("blob-1"); + let blob_path = blob_path.as_os_str().to_str().unwrap().to_string(); + let chunk_count = 1000000; + let skip_index = 77; + + let indexed_chunk_map1 = Arc::new(BlobStateMap::from( + IndexedChunkMap::new(&blob_path, chunk_count, true).unwrap(), + )); + let indexed_chunk_map2 = Arc::new(BlobStateMap::from( + IndexedChunkMap::new(&blob_path, chunk_count, true).unwrap(), + )); + let indexed_chunk_map3 = Arc::new(BlobStateMap::from( + IndexedChunkMap::new(&blob_path, chunk_count, true).unwrap(), + )); + + let now = Instant::now(); + + let h1 = thread::spawn(move || { + for idx in 0..chunk_count { + let chunk = Chunk::new(idx); + if idx % skip_index != 0 { + indexed_chunk_map1 + .set_ready_and_clear_pending(chunk.as_ref()) + .unwrap(); + } + } + }); + + let h2 = thread::spawn(move || { + for idx in 0..chunk_count { + let chunk = Chunk::new(idx); + if idx % skip_index != 0 { + indexed_chunk_map2 + .set_ready_and_clear_pending(chunk.as_ref()) + .unwrap(); + } + } + }); + + h1.join() + .map_err(|e| { + error!("Join error {:?}", e); + e + }) + .unwrap(); + h2.join() + .map_err(|e| { + error!("Join error {:?}", e); + e + }) + .unwrap(); + + println!( + "IndexedChunkMap Concurrency: {}ms", + now.elapsed().as_millis() + ); + + for idx in 0..chunk_count { + let chunk = Chunk::new(idx); + + let has_ready = indexed_chunk_map3 + .check_ready_and_mark_pending(chunk.as_ref()) + .unwrap(); + if idx % skip_index == 0 { + if has_ready { + panic!("indexed chunk map: index {} shouldn't be ready", idx); + } + } else if !has_ready { + panic!("indexed chunk map: index {} should be ready", idx); + } + } + } + + fn iterate(chunks: &[Arc], chunk_map: &dyn ChunkMap, chunk_count: u32) { + for idx in 0..chunk_count { + chunk_map + .set_ready_and_clear_pending(chunks[idx as usize].as_ref()) + .unwrap(); + } + for idx in 0..chunk_count { + assert!(chunk_map + .check_ready_and_mark_pending(chunks[idx as usize].as_ref()) + .unwrap(),); + } + } + + #[test] + fn test_chunk_map_perf() { + let dir = TempDir::new().unwrap(); + let blob_path = dir.as_path().join("blob-1"); + let blob_path = blob_path.as_os_str().to_str().unwrap().to_string(); + let chunk_count = 1000000; + + let mut chunks = Vec::new(); + for idx in 0..chunk_count { + chunks.push(Chunk::new(idx)) + } + + let indexed_chunk_map = + BlobStateMap::from(IndexedChunkMap::new(&blob_path, chunk_count, true).unwrap()); + let now = Instant::now(); + iterate(&chunks, &indexed_chunk_map as &dyn ChunkMap, chunk_count); + let elapsed1 = now.elapsed().as_millis(); + + let digested_chunk_map = BlobStateMap::from(DigestedChunkMap::new()); + let now = Instant::now(); + iterate(&chunks, &digested_chunk_map as &dyn ChunkMap, chunk_count); + let elapsed2 = now.elapsed().as_millis(); + + println!( + "IndexedChunkMap vs DigestedChunkMap: {}ms vs {}ms", + elapsed1, elapsed2 + ); + } + + #[test] + fn test_inflight_tracer() { + let chunk_1: Arc = Arc::new({ + let mut c = MockChunkInfo::new(); + c.index = 1; + c.block_id = RafsDigest::from_buf("hello world".as_bytes(), Blake3); + c + }); + let chunk_2: Arc = Arc::new({ + let mut c = MockChunkInfo::new(); + c.index = 2; + c.block_id = RafsDigest::from_buf("hello world 2".as_bytes(), Blake3); + c + }); + // indexed ChunkMap + let tmp_file = TempFile::new().unwrap(); + let index_map = Arc::new(BlobStateMap::from( + IndexedChunkMap::new(tmp_file.as_path().to_str().unwrap(), 10, true).unwrap(), + )); + index_map + .check_ready_and_mark_pending(chunk_1.as_ref()) + .unwrap(); + assert_eq!(index_map.inflight_tracer.lock().unwrap().len(), 1); + index_map + .check_ready_and_mark_pending(chunk_2.as_ref()) + .unwrap(); + assert_eq!(index_map.inflight_tracer.lock().unwrap().len(), 2); + index_map + .check_ready_and_mark_pending(chunk_1.as_ref()) + .unwrap_err(); + index_map + .check_ready_and_mark_pending(chunk_2.as_ref()) + .unwrap_err(); + assert_eq!(index_map.inflight_tracer.lock().unwrap().len(), 2); + + index_map + .set_ready_and_clear_pending(chunk_1.as_ref()) + .unwrap(); + assert!(index_map + .check_ready_and_mark_pending(chunk_1.as_ref()) + .unwrap(),); + assert_eq!(index_map.inflight_tracer.lock().unwrap().len(), 1); + + index_map.clear_pending(chunk_2.as_ref()); + assert_eq!(index_map.inflight_tracer.lock().unwrap().len(), 0); + assert!(!index_map + .check_ready_and_mark_pending(chunk_2.as_ref()) + .unwrap(),); + assert_eq!(index_map.inflight_tracer.lock().unwrap().len(), 1); + index_map.clear_pending(chunk_2.as_ref()); + assert_eq!(index_map.inflight_tracer.lock().unwrap().len(), 0); + index_map + .set_ready_and_clear_pending(chunk_2.as_ref()) + .unwrap(); + assert!(index_map + .check_ready_and_mark_pending(chunk_2.as_ref()) + .unwrap(),); + assert_eq!(index_map.inflight_tracer.lock().unwrap().len(), 0); + + // digested ChunkMap + let digest_map = Arc::new(BlobStateMap::from(DigestedChunkMap::new())); + digest_map + .check_ready_and_mark_pending(chunk_1.as_ref()) + .unwrap(); + assert_eq!(digest_map.inflight_tracer.lock().unwrap().len(), 1); + digest_map + .check_ready_and_mark_pending(chunk_2.as_ref()) + .unwrap(); + assert_eq!(digest_map.inflight_tracer.lock().unwrap().len(), 2); + digest_map + .check_ready_and_mark_pending(chunk_1.as_ref()) + .unwrap_err(); + digest_map + .check_ready_and_mark_pending(chunk_2.as_ref()) + .unwrap_err(); + digest_map + .set_ready_and_clear_pending(chunk_1.as_ref()) + .unwrap(); + assert!(digest_map + .check_ready_and_mark_pending(chunk_1.as_ref()) + .unwrap(),); + digest_map.clear_pending(chunk_2.as_ref()); + assert!(!digest_map + .check_ready_and_mark_pending(chunk_2.as_ref()) + .unwrap(),); + digest_map.clear_pending(chunk_2.as_ref()); + assert_eq!(digest_map.inflight_tracer.lock().unwrap().len(), 0); + } + + #[test] + fn test_inflight_tracer_race() { + let tmp_file = TempFile::new().unwrap(); + let map = Arc::new(BlobStateMap::from( + IndexedChunkMap::new(tmp_file.as_path().to_str().unwrap(), 10, true).unwrap(), + )); + + let chunk_4: Arc = Arc::new({ + let mut c = MockChunkInfo::new(); + c.index = 4; + c + }); + + assert!(!map + .as_ref() + .check_ready_and_mark_pending(chunk_4.as_ref()) + .unwrap(),); + let map_cloned = map.clone(); + assert_eq!(map.inflight_tracer.lock().unwrap().len(), 1); + + let chunk_4_cloned = chunk_4.clone(); + let t1 = thread::Builder::new() + .spawn(move || { + for _ in 0..4 { + let ready = map_cloned + .check_ready_and_mark_pending(chunk_4_cloned.as_ref()) + .unwrap(); + assert!(ready); + } + }) + .unwrap(); + + let map_cloned_2 = map.clone(); + let chunk_4_cloned_2 = chunk_4.clone(); + let t2 = thread::Builder::new() + .spawn(move || { + for _ in 0..2 { + let ready = map_cloned_2 + .check_ready_and_mark_pending(chunk_4_cloned_2.as_ref()) + .unwrap(); + assert!(ready); + } + }) + .unwrap(); + + thread::sleep(Duration::from_secs(1)); + + map.set_ready_and_clear_pending(chunk_4.as_ref()).unwrap(); + + // Fuzz + map.set_ready_and_clear_pending(chunk_4.as_ref()).unwrap(); + map.set_ready_and_clear_pending(chunk_4.as_ref()).unwrap(); + + assert_eq!(map.inflight_tracer.lock().unwrap().len(), 0); + + t1.join().unwrap(); + t2.join().unwrap(); + } + + #[test] + /// Case description: + /// Never invoke `set_ready` method, thus to let each caller of `has_ready` reach + /// a point of timeout. + /// Expect: + /// The chunk of index 4 is never marked as ready/downloaded. + /// Each caller of `has_ready` can escape from where it is blocked. + /// After timeout, no slot is left in inflight tracer. + fn test_inflight_tracer_timeout() { + let tmp_file = TempFile::new().unwrap(); + let map = Arc::new(BlobStateMap::from( + IndexedChunkMap::new(tmp_file.as_path().to_str().unwrap(), 10, true).unwrap(), + )); + + let chunk_4: Arc = Arc::new({ + let mut c = MockChunkInfo::new(); + c.index = 4; + c + }); + + map.as_ref() + .check_ready_and_mark_pending(chunk_4.as_ref()) + .unwrap(); + let map_cloned = map.clone(); + + assert_eq!(map.inflight_tracer.lock().unwrap().len(), 1); + + let chunk_4_cloned = chunk_4.clone(); + let t1 = thread::Builder::new() + .spawn(move || { + for _ in 0..4 { + map_cloned + .check_ready_and_mark_pending(chunk_4_cloned.as_ref()) + .unwrap_err(); + } + }) + .unwrap(); + + t1.join().unwrap(); + + assert_eq!(map.inflight_tracer.lock().unwrap().len(), 1); + + map.as_ref() + .check_ready_and_mark_pending(chunk_4.as_ref()) + .unwrap_err(); + assert_eq!(map.inflight_tracer.lock().unwrap().len(), 1); + + map.clear_pending(chunk_4.as_ref()); + assert_eq!(map.inflight_tracer.lock().unwrap().len(), 0); + } + + #[test] + fn test_inflight_tracer_race_range() { + let tmp_file = TempFile::new().unwrap(); + let map = Arc::new(BlobStateMap::from( + IndexedChunkMap::new(tmp_file.as_path().to_str().unwrap(), 10, true).unwrap(), + )); + + assert!(!map.is_range_all_ready()); + assert!(!map.is_range_ready(0, 1).unwrap()); + assert!(!map.is_range_ready(9, 1).unwrap()); + assert!(map.is_range_ready(10, 1).is_err()); + assert_eq!( + map.check_range_ready_and_mark_pending(0, 2).unwrap(), + Some(vec![0, 1]) + ); + map.set_range_ready_and_clear_pending(0, 2).unwrap(); + assert_eq!(map.check_range_ready_and_mark_pending(0, 2).unwrap(), None); + map.wait_for_range_ready(0, 2).unwrap(); + assert_eq!( + map.check_range_ready_and_mark_pending(1, 2).unwrap(), + Some(vec![2]) + ); + map.set_range_ready_and_clear_pending(2, 1).unwrap(); + map.set_range_ready_and_clear_pending(3, 7).unwrap(); + assert!(map.is_range_ready(0, 1).unwrap()); + assert!(map.is_range_ready(9, 1).unwrap()); + assert!(map.is_range_all_ready()); + } +} diff --git a/storage/src/cache/state/digested_chunk_map.rs b/storage/src/cache/state/digested_chunk_map.rs index 30df1e147af..b44ba0939ce 100644 --- a/storage/src/cache/state/digested_chunk_map.rs +++ b/storage/src/cache/state/digested_chunk_map.rs @@ -1,61 +1,61 @@ -// Copyright 2021 Ant Group. All rights reserved. -// Copyright (C) 2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! A chunk state tracking driver for legacy Nydus images without chunk array -//! -//! This module provides a chunk state tracking driver for legacy Rafs images without chunk array, -//! which uses chunk digest as id to track chunk readiness state. The [DigestedChunkMap] is not -//! optimal in case of performance and memory consumption. So it is only used to keep backward -/// compatibility with the old nydus image format. -use std::collections::HashSet; -use std::io::Result; -use std::sync::RwLock; - -use nydus_utils::digest::RafsDigest; - -use crate::cache::state::{ChunkIndexGetter, ChunkMap}; -use crate::device::BlobChunkInfo; - -/// An implementation of [ChunkMap](trait.ChunkMap.html) to support chunk state tracking by using -/// `HashSet`. -/// -/// The `DigestedChunkMap` is an implementation of [ChunkMap] which uses a hash set -/// (HashSet) to record whether a chunk has already been cached by the blob cache. -/// The implementation is memory and computation heavy, so it is used only to keep backward -/// compatibility with the previous old nydus bootstrap format. For new clients, please use other -/// alternative implementations. -#[derive(Default)] -pub struct DigestedChunkMap { - cache: RwLock>, -} - -impl DigestedChunkMap { - /// Create a new instance of `DigestedChunkMap`. - pub fn new() -> Self { - Self { - cache: RwLock::new(HashSet::new()), - } - } -} - -impl ChunkMap for DigestedChunkMap { - fn is_ready(&self, chunk: &dyn BlobChunkInfo) -> Result { - Ok(self.cache.read().unwrap().contains(chunk.chunk_id())) - } - - fn set_ready_and_clear_pending(&self, chunk: &dyn BlobChunkInfo) -> Result<()> { - // Do not expect poisoned lock. - self.cache.write().unwrap().insert(*chunk.chunk_id()); - Ok(()) - } -} - -impl ChunkIndexGetter for DigestedChunkMap { - type Index = RafsDigest; - - fn get_index(chunk: &dyn BlobChunkInfo) -> Self::Index { - *chunk.chunk_id() - } -} +// Copyright 2021 Ant Group. All rights reserved. +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! A chunk state tracking driver for legacy Nydus images without chunk array +//! +//! This module provides a chunk state tracking driver for legacy Rafs images without chunk array, +//! which uses chunk digest as id to track chunk readiness state. The [DigestedChunkMap] is not +//! optimal in case of performance and memory consumption. So it is only used to keep backward +/// compatibility with the old nydus image format. +use std::collections::HashSet; +use std::io::Result; +use std::sync::RwLock; + +use nydus_utils::digest::RafsDigest; + +use crate::cache::state::{ChunkIndexGetter, ChunkMap}; +use crate::device::BlobChunkInfo; + +/// An implementation of [ChunkMap](trait.ChunkMap.html) to support chunk state tracking by using +/// `HashSet`. +/// +/// The `DigestedChunkMap` is an implementation of [ChunkMap] which uses a hash set +/// (HashSet) to record whether a chunk has already been cached by the blob cache. +/// The implementation is memory and computation heavy, so it is used only to keep backward +/// compatibility with the previous old nydus bootstrap format. For new clients, please use other +/// alternative implementations. +#[derive(Default)] +pub struct DigestedChunkMap { + cache: RwLock>, +} + +impl DigestedChunkMap { + /// Create a new instance of `DigestedChunkMap`. + pub fn new() -> Self { + Self { + cache: RwLock::new(HashSet::new()), + } + } +} + +impl ChunkMap for DigestedChunkMap { + fn is_ready(&self, chunk: &dyn BlobChunkInfo) -> Result { + Ok(self.cache.read().unwrap().contains(chunk.chunk_id())) + } + + fn set_ready_and_clear_pending(&self, chunk: &dyn BlobChunkInfo) -> Result<()> { + // Do not expect poisoned lock. + self.cache.write().unwrap().insert(*chunk.chunk_id()); + Ok(()) + } +} + +impl ChunkIndexGetter for DigestedChunkMap { + type Index = RafsDigest; + + fn get_index(chunk: &dyn BlobChunkInfo) -> Self::Index { + *chunk.chunk_id() + } +} diff --git a/storage/src/cache/state/indexed_chunk_map.rs b/storage/src/cache/state/indexed_chunk_map.rs index ff5808cdac5..0ee7bde8b9c 100644 --- a/storage/src/cache/state/indexed_chunk_map.rs +++ b/storage/src/cache/state/indexed_chunk_map.rs @@ -1,338 +1,338 @@ -// Copyright 2021 Ant Group. All rights reserved. -// Copyright (C) 2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! A chunk state tracking driver based on a bitmap file. -//! -//! This module provides a chunk state tracking driver based on a bitmap file. There's a state bit -//! in the bitmap file for each chunk, and atomic operations are used to manipulate the bitmap. -//! So it supports concurrent downloading. -use std::io::Result; - -use crate::cache::state::persist_map::PersistMap; -use crate::cache::state::{ChunkIndexGetter, ChunkMap, RangeMap}; -use crate::device::BlobChunkInfo; - -/// The name suffix of blob chunk_map file, named $blob_id.chunk_map. -const FILE_SUFFIX: &str = "chunk_map"; - -/// An implementation of [ChunkMap] to support chunk state tracking by using a bitmap file. -/// -/// The `IndexedChunkMap` is an implementation of [ChunkMap] which uses a bitmap file and atomic -/// bitmap operations to track readiness state. It creates or opens a file with the name -/// `$blob_id.chunk_map` to record whether a chunk has been cached by the blob cache, and atomic -/// bitmap operations are used to manipulate the state bit. The bitmap file will be persisted to -/// disk. -/// -/// This approach can be used to share chunk ready state between multiple nydusd instances. -/// For example: the bitmap file layout is [0b00000000, 0b00000000], when blobcache calls -/// set_ready(3), the layout should be changed to [0b00010000, 0b00000000]. -pub struct IndexedChunkMap { - map: PersistMap, -} - -impl IndexedChunkMap { - /// Create a new instance of `IndexedChunkMap`. - pub fn new(blob_path: &str, chunk_count: u32, persist: bool) -> Result { - let filename = format!("{}.{}", blob_path, FILE_SUFFIX); - - PersistMap::open(&filename, chunk_count, true, persist).map(|map| IndexedChunkMap { map }) - } -} - -impl ChunkMap for IndexedChunkMap { - fn is_ready(&self, chunk: &dyn BlobChunkInfo) -> Result { - if self.is_range_all_ready() { - Ok(true) - } else { - let index = self.map.validate_index(chunk.id())?; - Ok(self.map.is_chunk_ready(index).0) - } - } - - fn set_ready_and_clear_pending(&self, chunk: &dyn BlobChunkInfo) -> Result<()> { - self.map.set_chunk_ready(chunk.id()) - } - - fn is_persist(&self) -> bool { - true - } - - fn as_range_map(&self) -> Option<&dyn RangeMap> { - Some(self) - } -} - -impl RangeMap for IndexedChunkMap { - type I = u32; - - #[inline] - fn is_range_all_ready(&self) -> bool { - self.map.is_range_all_ready() - } - - fn is_range_ready(&self, start_index: u32, count: u32) -> Result { - if !self.is_range_all_ready() { - for idx in 0..count { - let index = self - .map - .validate_index(start_index.checked_add(idx).ok_or_else(|| einval!())?)?; - if !self.map.is_chunk_ready(index).0 { - return Ok(false); - } - } - } - - Ok(true) - } - - fn check_range_ready_and_mark_pending( - &self, - start_index: u32, - count: u32, - ) -> Result>> { - if self.is_range_all_ready() { - return Ok(None); - } - - let mut vec = Vec::with_capacity(count as usize); - let count = std::cmp::min(count, u32::MAX - start_index); - let end = start_index + count; - - for index in start_index..end { - if !self.map.is_chunk_ready(index).0 { - vec.push(index); - } - } - - if vec.is_empty() { - Ok(None) - } else { - Ok(Some(vec)) - } - } - - fn set_range_ready_and_clear_pending(&self, start_index: u32, count: u32) -> Result<()> { - let count = std::cmp::min(count, u32::MAX - start_index); - let end = start_index + count; - - for index in start_index..end { - self.map.set_chunk_ready(index)?; - } - - Ok(()) - } -} - -impl ChunkIndexGetter for IndexedChunkMap { - type Index = u32; - - fn get_index(chunk: &dyn BlobChunkInfo) -> Self::Index { - chunk.id() - } -} - -#[cfg(test)] -mod tests { - use std::fs::OpenOptions; - use std::io::Write; - use std::sync::atomic::Ordering; - use vmm_sys_util::tempdir::TempDir; - - use super::super::persist_map::*; - use super::*; - use crate::device::v5::BlobV5ChunkInfo; - use crate::test::MockChunkInfo; - - #[test] - fn test_indexed_new_invalid_file_size() { - let dir = TempDir::new().unwrap(); - let blob_path = dir.as_path().join("blob-1"); - let blob_path = blob_path.as_os_str().to_str().unwrap().to_string(); - - assert!(IndexedChunkMap::new(&blob_path, 0, false).is_err()); - - let cache_path = format!("{}.{}", blob_path, FILE_SUFFIX); - let mut file = OpenOptions::new() - .read(true) - .write(true) - .create(true) - .open(&cache_path) - .map_err(|err| { - einval!(format!( - "failed to open/create blob chunk_map file {:?}: {:?}", - cache_path, err - )) - }) - .unwrap(); - file.write_all(&[0x0u8]).unwrap(); - - let chunk = MockChunkInfo::new(); - assert_eq!(chunk.id(), 0); - - assert!(IndexedChunkMap::new(&blob_path, 1, true).is_err()); - } - - #[test] - fn test_indexed_new_zero_file_size() { - let dir = TempDir::new().unwrap(); - let blob_path = dir.as_path().join("blob-1"); - let blob_path = blob_path.as_os_str().to_str().unwrap().to_string(); - - assert!(IndexedChunkMap::new(&blob_path, 0, true).is_err()); - - let cache_path = format!("{}.{}", blob_path, FILE_SUFFIX); - let _file = OpenOptions::new() - .read(true) - .write(true) - .create(true) - .open(&cache_path) - .map_err(|err| { - einval!(format!( - "failed to open/create blob chunk_map file {:?}: {:?}", - cache_path, err - )) - }) - .unwrap(); - - let chunk = MockChunkInfo::new(); - assert_eq!(chunk.id(), 0); - - let map = IndexedChunkMap::new(&blob_path, 1, true).unwrap(); - assert_eq!(map.map.not_ready_count.load(Ordering::Acquire), 1); - assert_eq!(map.map.count, 1); - assert_eq!(map.map.size(), 0x1001); - assert!(!map.is_range_all_ready()); - assert!(!map.is_ready(chunk.as_base()).unwrap()); - map.set_ready_and_clear_pending(chunk.as_base()).unwrap(); - assert!(map.is_ready(chunk.as_base()).unwrap()); - } - - #[test] - fn test_indexed_new_header_not_ready() { - let dir = TempDir::new().unwrap(); - let blob_path = dir.as_path().join("blob-1"); - let blob_path = blob_path.as_os_str().to_str().unwrap().to_string(); - - assert!(IndexedChunkMap::new(&blob_path, 0, true).is_err()); - - let cache_path = format!("{}.{}", blob_path, FILE_SUFFIX); - let file = OpenOptions::new() - .read(true) - .write(true) - .create(true) - .open(&cache_path) - .map_err(|err| { - einval!(format!( - "failed to open/create blob chunk_map file {:?}: {:?}", - cache_path, err - )) - }) - .unwrap(); - file.set_len(0x1001).unwrap(); - - let chunk = MockChunkInfo::new(); - assert_eq!(chunk.id(), 0); - - let map = IndexedChunkMap::new(&blob_path, 1, true).unwrap(); - assert_eq!(map.map.not_ready_count.load(Ordering::Acquire), 1); - assert_eq!(map.map.count, 1); - assert_eq!(map.map.size(), 0x1001); - assert!(!map.is_range_all_ready()); - assert!(!map.is_ready(chunk.as_base()).unwrap()); - map.set_ready_and_clear_pending(chunk.as_base()).unwrap(); - assert!(map.is_ready(chunk.as_base()).unwrap()); - } - - #[test] - fn test_indexed_new_all_ready() { - let dir = TempDir::new().unwrap(); - let blob_path = dir.as_path().join("blob-1"); - let blob_path = blob_path.as_os_str().to_str().unwrap().to_string(); - - assert!(IndexedChunkMap::new(&blob_path, 0, true).is_err()); - - let cache_path = format!("{}.{}", blob_path, FILE_SUFFIX); - let mut file = OpenOptions::new() - .read(true) - .write(true) - .create(true) - .open(&cache_path) - .map_err(|err| { - einval!(format!( - "failed to open/create blob chunk_map file {:?}: {:?}", - cache_path, err - )) - }) - .unwrap(); - let header = Header { - magic: MAGIC1, - version: 1, - magic2: MAGIC2, - all_ready: MAGIC_ALL_READY, - reserved: [0x0u8; HEADER_RESERVED_SIZE], - }; - - // write file header and sync to disk. - file.write_all(header.as_slice()).unwrap(); - file.write_all(&[0x0u8]).unwrap(); - - let chunk = MockChunkInfo::new(); - assert_eq!(chunk.id(), 0); - - let map = IndexedChunkMap::new(&blob_path, 1, true).unwrap(); - assert!(map.is_range_all_ready()); - assert_eq!(map.map.count, 1); - assert_eq!(map.map.size(), 0x1001); - assert!(map.is_ready(chunk.as_base()).unwrap()); - map.set_ready_and_clear_pending(chunk.as_base()).unwrap(); - assert!(map.is_ready(chunk.as_base()).unwrap()); - } - - #[test] - fn test_indexed_new_load_v0() { - let dir = TempDir::new().unwrap(); - let blob_path = dir.as_path().join("blob-1"); - let blob_path = blob_path.as_os_str().to_str().unwrap().to_string(); - - assert!(IndexedChunkMap::new(&blob_path, 0, true).is_err()); - - let cache_path = format!("{}.{}", blob_path, FILE_SUFFIX); - let mut file = OpenOptions::new() - .read(true) - .write(true) - .create(true) - .open(&cache_path) - .map_err(|err| { - einval!(format!( - "failed to open/create blob chunk_map file {:?}: {:?}", - cache_path, err - )) - }) - .unwrap(); - let header = Header { - magic: MAGIC1, - version: 0, - magic2: 0, - all_ready: 0, - reserved: [0x0u8; HEADER_RESERVED_SIZE], - }; - - // write file header and sync to disk. - file.write_all(header.as_slice()).unwrap(); - file.write_all(&[0x0u8]).unwrap(); - - let chunk = MockChunkInfo::new(); - assert_eq!(chunk.id(), 0); - - let map = IndexedChunkMap::new(&blob_path, 1, true).unwrap(); - assert_eq!(map.map.not_ready_count.load(Ordering::Acquire), 1); - assert_eq!(map.map.count, 1); - assert_eq!(map.map.size(), 0x1001); - assert!(!map.is_range_all_ready()); - assert!(!map.is_ready(chunk.as_base()).unwrap()); - map.set_ready_and_clear_pending(chunk.as_base()).unwrap(); - assert!(map.is_ready(chunk.as_base()).unwrap()); - } -} +// Copyright 2021 Ant Group. All rights reserved. +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! A chunk state tracking driver based on a bitmap file. +//! +//! This module provides a chunk state tracking driver based on a bitmap file. There's a state bit +//! in the bitmap file for each chunk, and atomic operations are used to manipulate the bitmap. +//! So it supports concurrent downloading. +use std::io::Result; + +use crate::cache::state::persist_map::PersistMap; +use crate::cache::state::{ChunkIndexGetter, ChunkMap, RangeMap}; +use crate::device::BlobChunkInfo; + +/// The name suffix of blob chunk_map file, named $blob_id.chunk_map. +const FILE_SUFFIX: &str = "chunk_map"; + +/// An implementation of [ChunkMap] to support chunk state tracking by using a bitmap file. +/// +/// The `IndexedChunkMap` is an implementation of [ChunkMap] which uses a bitmap file and atomic +/// bitmap operations to track readiness state. It creates or opens a file with the name +/// `$blob_id.chunk_map` to record whether a chunk has been cached by the blob cache, and atomic +/// bitmap operations are used to manipulate the state bit. The bitmap file will be persisted to +/// disk. +/// +/// This approach can be used to share chunk ready state between multiple nydusd instances. +/// For example: the bitmap file layout is [0b00000000, 0b00000000], when blobcache calls +/// set_ready(3), the layout should be changed to [0b00010000, 0b00000000]. +pub struct IndexedChunkMap { + map: PersistMap, +} + +impl IndexedChunkMap { + /// Create a new instance of `IndexedChunkMap`. + pub fn new(blob_path: &str, chunk_count: u32, persist: bool) -> Result { + let filename = format!("{}.{}", blob_path, FILE_SUFFIX); + + PersistMap::open(&filename, chunk_count, true, persist).map(|map| IndexedChunkMap { map }) + } +} + +impl ChunkMap for IndexedChunkMap { + fn is_ready(&self, chunk: &dyn BlobChunkInfo) -> Result { + if self.is_range_all_ready() { + Ok(true) + } else { + let index = self.map.validate_index(chunk.id())?; + Ok(self.map.is_chunk_ready(index).0) + } + } + + fn set_ready_and_clear_pending(&self, chunk: &dyn BlobChunkInfo) -> Result<()> { + self.map.set_chunk_ready(chunk.id()) + } + + fn is_persist(&self) -> bool { + true + } + + fn as_range_map(&self) -> Option<&dyn RangeMap> { + Some(self) + } +} + +impl RangeMap for IndexedChunkMap { + type I = u32; + + #[inline] + fn is_range_all_ready(&self) -> bool { + self.map.is_range_all_ready() + } + + fn is_range_ready(&self, start_index: u32, count: u32) -> Result { + if !self.is_range_all_ready() { + for idx in 0..count { + let index = self + .map + .validate_index(start_index.checked_add(idx).ok_or_else(|| einval!())?)?; + if !self.map.is_chunk_ready(index).0 { + return Ok(false); + } + } + } + + Ok(true) + } + + fn check_range_ready_and_mark_pending( + &self, + start_index: u32, + count: u32, + ) -> Result>> { + if self.is_range_all_ready() { + return Ok(None); + } + + let mut vec = Vec::with_capacity(count as usize); + let count = std::cmp::min(count, u32::MAX - start_index); + let end = start_index + count; + + for index in start_index..end { + if !self.map.is_chunk_ready(index).0 { + vec.push(index); + } + } + + if vec.is_empty() { + Ok(None) + } else { + Ok(Some(vec)) + } + } + + fn set_range_ready_and_clear_pending(&self, start_index: u32, count: u32) -> Result<()> { + let count = std::cmp::min(count, u32::MAX - start_index); + let end = start_index + count; + + for index in start_index..end { + self.map.set_chunk_ready(index)?; + } + + Ok(()) + } +} + +impl ChunkIndexGetter for IndexedChunkMap { + type Index = u32; + + fn get_index(chunk: &dyn BlobChunkInfo) -> Self::Index { + chunk.id() + } +} + +#[cfg(test)] +mod tests { + use std::fs::OpenOptions; + use std::io::Write; + use std::sync::atomic::Ordering; + use vmm_sys_util::tempdir::TempDir; + + use super::super::persist_map::*; + use super::*; + use crate::device::v5::BlobV5ChunkInfo; + use crate::test::MockChunkInfo; + + #[test] + fn test_indexed_new_invalid_file_size() { + let dir = TempDir::new().unwrap(); + let blob_path = dir.as_path().join("blob-1"); + let blob_path = blob_path.as_os_str().to_str().unwrap().to_string(); + + assert!(IndexedChunkMap::new(&blob_path, 0, false).is_err()); + + let cache_path = format!("{}.{}", blob_path, FILE_SUFFIX); + let mut file = OpenOptions::new() + .read(true) + .write(true) + .create(true) + .open(&cache_path) + .map_err(|err| { + einval!(format!( + "failed to open/create blob chunk_map file {:?}: {:?}", + cache_path, err + )) + }) + .unwrap(); + file.write_all(&[0x0u8]).unwrap(); + + let chunk = MockChunkInfo::new(); + assert_eq!(chunk.id(), 0); + + assert!(IndexedChunkMap::new(&blob_path, 1, true).is_err()); + } + + #[test] + fn test_indexed_new_zero_file_size() { + let dir = TempDir::new().unwrap(); + let blob_path = dir.as_path().join("blob-1"); + let blob_path = blob_path.as_os_str().to_str().unwrap().to_string(); + + assert!(IndexedChunkMap::new(&blob_path, 0, true).is_err()); + + let cache_path = format!("{}.{}", blob_path, FILE_SUFFIX); + let _file = OpenOptions::new() + .read(true) + .write(true) + .create(true) + .open(&cache_path) + .map_err(|err| { + einval!(format!( + "failed to open/create blob chunk_map file {:?}: {:?}", + cache_path, err + )) + }) + .unwrap(); + + let chunk = MockChunkInfo::new(); + assert_eq!(chunk.id(), 0); + + let map = IndexedChunkMap::new(&blob_path, 1, true).unwrap(); + assert_eq!(map.map.not_ready_count.load(Ordering::Acquire), 1); + assert_eq!(map.map.count, 1); + assert_eq!(map.map.size(), 0x1001); + assert!(!map.is_range_all_ready()); + assert!(!map.is_ready(chunk.as_base()).unwrap()); + map.set_ready_and_clear_pending(chunk.as_base()).unwrap(); + assert!(map.is_ready(chunk.as_base()).unwrap()); + } + + #[test] + fn test_indexed_new_header_not_ready() { + let dir = TempDir::new().unwrap(); + let blob_path = dir.as_path().join("blob-1"); + let blob_path = blob_path.as_os_str().to_str().unwrap().to_string(); + + assert!(IndexedChunkMap::new(&blob_path, 0, true).is_err()); + + let cache_path = format!("{}.{}", blob_path, FILE_SUFFIX); + let file = OpenOptions::new() + .read(true) + .write(true) + .create(true) + .open(&cache_path) + .map_err(|err| { + einval!(format!( + "failed to open/create blob chunk_map file {:?}: {:?}", + cache_path, err + )) + }) + .unwrap(); + file.set_len(0x1001).unwrap(); + + let chunk = MockChunkInfo::new(); + assert_eq!(chunk.id(), 0); + + let map = IndexedChunkMap::new(&blob_path, 1, true).unwrap(); + assert_eq!(map.map.not_ready_count.load(Ordering::Acquire), 1); + assert_eq!(map.map.count, 1); + assert_eq!(map.map.size(), 0x1001); + assert!(!map.is_range_all_ready()); + assert!(!map.is_ready(chunk.as_base()).unwrap()); + map.set_ready_and_clear_pending(chunk.as_base()).unwrap(); + assert!(map.is_ready(chunk.as_base()).unwrap()); + } + + #[test] + fn test_indexed_new_all_ready() { + let dir = TempDir::new().unwrap(); + let blob_path = dir.as_path().join("blob-1"); + let blob_path = blob_path.as_os_str().to_str().unwrap().to_string(); + + assert!(IndexedChunkMap::new(&blob_path, 0, true).is_err()); + + let cache_path = format!("{}.{}", blob_path, FILE_SUFFIX); + let mut file = OpenOptions::new() + .read(true) + .write(true) + .create(true) + .open(&cache_path) + .map_err(|err| { + einval!(format!( + "failed to open/create blob chunk_map file {:?}: {:?}", + cache_path, err + )) + }) + .unwrap(); + let header = Header { + magic: MAGIC1, + version: 1, + magic2: MAGIC2, + all_ready: MAGIC_ALL_READY, + reserved: [0x0u8; HEADER_RESERVED_SIZE], + }; + + // write file header and sync to disk. + file.write_all(header.as_slice()).unwrap(); + file.write_all(&[0x0u8]).unwrap(); + + let chunk = MockChunkInfo::new(); + assert_eq!(chunk.id(), 0); + + let map = IndexedChunkMap::new(&blob_path, 1, true).unwrap(); + assert!(map.is_range_all_ready()); + assert_eq!(map.map.count, 1); + assert_eq!(map.map.size(), 0x1001); + assert!(map.is_ready(chunk.as_base()).unwrap()); + map.set_ready_and_clear_pending(chunk.as_base()).unwrap(); + assert!(map.is_ready(chunk.as_base()).unwrap()); + } + + #[test] + fn test_indexed_new_load_v0() { + let dir = TempDir::new().unwrap(); + let blob_path = dir.as_path().join("blob-1"); + let blob_path = blob_path.as_os_str().to_str().unwrap().to_string(); + + assert!(IndexedChunkMap::new(&blob_path, 0, true).is_err()); + + let cache_path = format!("{}.{}", blob_path, FILE_SUFFIX); + let mut file = OpenOptions::new() + .read(true) + .write(true) + .create(true) + .open(&cache_path) + .map_err(|err| { + einval!(format!( + "failed to open/create blob chunk_map file {:?}: {:?}", + cache_path, err + )) + }) + .unwrap(); + let header = Header { + magic: MAGIC1, + version: 0, + magic2: 0, + all_ready: 0, + reserved: [0x0u8; HEADER_RESERVED_SIZE], + }; + + // write file header and sync to disk. + file.write_all(header.as_slice()).unwrap(); + file.write_all(&[0x0u8]).unwrap(); + + let chunk = MockChunkInfo::new(); + assert_eq!(chunk.id(), 0); + + let map = IndexedChunkMap::new(&blob_path, 1, true).unwrap(); + assert_eq!(map.map.not_ready_count.load(Ordering::Acquire), 1); + assert_eq!(map.map.count, 1); + assert_eq!(map.map.size(), 0x1001); + assert!(!map.is_range_all_ready()); + assert!(!map.is_ready(chunk.as_base()).unwrap()); + map.set_ready_and_clear_pending(chunk.as_base()).unwrap(); + assert!(map.is_ready(chunk.as_base()).unwrap()); + } +} diff --git a/storage/src/cache/state/mod.rs b/storage/src/cache/state/mod.rs index 8cdac7574f9..c2e170f1c13 100644 --- a/storage/src/cache/state/mod.rs +++ b/storage/src/cache/state/mod.rs @@ -1,211 +1,211 @@ -// Copyright 2021 Ant Group. All rights reserved. -// Copyright (C) 2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Chunk or data readiness state tracking drivers. -//! -//! To cache data from remote backend storage onto local storage, a cache state tracking mechanism -//! is needed to track whether a specific chunk or data is ready on local storage and to cooperate -//! on concurrent data downloading. The [ChunkMap](trait.ChunkMap.html) trait is the main mechanism -//! to track chunk state. And [BlobStateMap](struct.BlobStateMap.html) is an adapter structure of -//! [ChunkMap] to support concurrent data downloading, which is based on a base [ChunkMap] -//! implementation to track chunk readiness state. And [RangeMap](trait.RangeMap.html) objects are -//! used to track readiness for a range of chunks or data, with support of batch operation. -//! -//! There are several implementation of the [ChunkMap] and [RangeMap] trait to track chunk and data -//! readiness state: -//! - [BlobStateMap](struct.BlobStateMap.html): an adapter structure to enable concurrent -//! synchronization manipulation of readiness state, based on an underlying base [ChunkMap] or -//! [RangeMap] object. -//! - [BlobRangeMap](struct.BlobRangeMap.html): a data state tracking driver using a bitmap file -//! to persist state, indexed by data address range. -//! - [DigestedChunkMap](struct.DigestedChunkMap.html): a chunk state tracking driver -//! for legacy Rafs images without chunk array, which uses chunk digest as the id to track chunk -//! readiness state. The [DigestedChunkMap] is not optimal in case of performance and memory -//! consumption. -//! - [IndexedChunkMap](struct.IndexedChunkMap.html): a chunk state tracking driver using a bitmap -//! file to persist state, indexed by chunk index. There's a state bit in the bitmap file for each -//! chunk, and atomic operations are used to manipulate the bitmap for concurrent state -//! manipulating. It's the recommended state tracking driver. -//! - [NoopChunkMap](struct.NoopChunkMap.html): a no-operation chunk state tracking driver, -//! which just reports every chunk as always ready to use or not. It may be used to support disk -//! based backend storage or dummy cache. - -use std::any::Any; -use std::io::Result; - -use crate::device::BlobChunkInfo; -use crate::StorageResult; - -pub use blob_state_map::BlobStateMap; -pub use digested_chunk_map::DigestedChunkMap; -pub use indexed_chunk_map::IndexedChunkMap; -pub use noop_chunk_map::NoopChunkMap; -pub use range_map::BlobRangeMap; - -mod blob_state_map; -mod digested_chunk_map; -mod indexed_chunk_map; -mod noop_chunk_map; -mod persist_map; -mod range_map; - -/// Trait to track chunk readiness state. -pub trait ChunkMap: Any + Send + Sync { - /// Check whether the chunk is ready for use. - fn is_ready(&self, chunk: &dyn BlobChunkInfo) -> Result; - - /// Check whether the chunk is pending for downloading. - fn is_pending(&self, _chunk: &dyn BlobChunkInfo) -> Result { - Ok(false) - } - - /// Check whether a chunk is ready for use or pending for downloading. - fn is_ready_or_pending(&self, chunk: &dyn BlobChunkInfo) -> Result { - if matches!(self.is_pending(chunk), Ok(true)) { - Ok(true) - } else { - self.is_ready(chunk) - } - } - - /// Check whether the chunk is ready for use, and mark it as pending if not ready yet. - /// - /// The function returns: - /// - `Err(Timeout)` waiting for inflight backend IO timeouts. - /// - `Ok(true)` if the chunk is ready. - /// - `Ok(false)` marks the chunk as pending, either set_ready_and_clear_pending() or - /// clear_pending() must be called to clear the pending state. - fn check_ready_and_mark_pending(&self, _chunk: &dyn BlobChunkInfo) -> StorageResult { - panic!("no support of check_ready_and_mark_pending()"); - } - - /// Set the chunk to ready for use and clear the pending state. - fn set_ready_and_clear_pending(&self, _chunk: &dyn BlobChunkInfo) -> Result<()> { - panic!("no support of check_ready_and_mark_pending()"); - } - - /// Clear the pending state of the chunk. - fn clear_pending(&self, _chunk: &dyn BlobChunkInfo) { - panic!("no support of clear_pending()"); - } - - /// Check whether the implementation supports state persistence. - fn is_persist(&self) -> bool { - false - } - - /// Convert the objet to an [RangeMap](trait.RangeMap.html) object. - fn as_range_map(&self) -> Option<&dyn RangeMap> { - None - } -} - -/// Trait to track chunk or data readiness state. -/// -/// A `RangeMap` object tracks readiness state of a chunk or data range, indexed by chunk index or -/// data address. The trait methods are designed to support batch operations for improving -/// performance by avoid frequently acquire/release locks. -pub trait RangeMap: Send + Sync { - type I: Send + Sync; - - /// Check whether all chunks or data managed by the `RangeMap` object are ready. - fn is_range_all_ready(&self) -> bool { - false - } - - /// Check whether all chunks or data in the range are ready for use. - fn is_range_ready(&self, _start: Self::I, _count: Self::I) -> Result { - Err(enosys!()) - } - - /// Check whether all chunks or data in the range [start, start + count) are ready. - /// - /// This function checks readiness of a range of chunks or data. If a chunk or data is both not - /// ready and not pending(inflight), it will be marked as pending and returned. Following - /// actions should be: - /// - call set_range_ready_and_clear_pending() to mark data or chunks as ready and clear pending - /// state. - /// - clear_range_pending() to clear the pending state without marking data or chunks as ready. - /// - wait_for_range_ready() to wait for all data or chunks to clear pending state, including - /// data or chunks marked as pending by other threads. - fn check_range_ready_and_mark_pending( - &self, - _start: Self::I, - _count: Self::I, - ) -> Result>> { - Err(enosys!()) - } - - /// Mark all chunks or data in the range as ready for use. - fn set_range_ready_and_clear_pending(&self, _start: Self::I, _count: Self::I) -> Result<()> { - Err(enosys!()) - } - - /// Clear the pending state for all chunks or data in the range. - fn clear_range_pending(&self, _start: Self::I, _count: Self::I) {} - - /// Wait for all chunks or data in the range to be ready until timeout. - fn wait_for_range_ready(&self, _start: Self::I, _count: Self::I) -> Result { - Err(enosys!()) - } -} - -/// Trait to convert a [BlobChunkInfo](../../device/trait.BlobChunkInfo.html) object to an index -/// needed by [ChunkMap](trait.ChunkMap.html). -pub trait ChunkIndexGetter { - /// Type of index needed by [ChunkMap]. - type Index; - - /// Get the chunk's id/key for state tracking. - fn get_index(chunk: &dyn BlobChunkInfo) -> Self::Index; -} - -#[cfg(test)] -mod tests { - use crate::test::MockChunkInfo; - - use super::*; - - impl RangeMap for NoopChunkMap { - type I = u32; - } - - #[test] - fn test_trait_default_impl() { - let m = NoopChunkMap::new(false); - let chunk_info = MockChunkInfo { - index: 128, - ..Default::default() - }; - - assert!(m.is_pending(&chunk_info).is_ok()); - assert!(!m.is_pending(&chunk_info).unwrap()); - assert!(!m.is_range_all_ready()); - assert!(m.is_range_ready(0, 1).is_err()); - assert!(m.check_range_ready_and_mark_pending(0, 1).is_err()); - assert!(m.set_range_ready_and_clear_pending(0, 1).is_err()); - m.clear_range_pending(0, 1); - assert!(m.wait_for_range_ready(0, 1).is_err()); - assert!(m.as_range_map().is_none()); - assert!(!m.is_persist()); - assert!(!m.is_ready(&chunk_info).unwrap()); - } - - #[test] - #[should_panic] - fn test_check_ready_and_mark_pending_default_impl() { - let chunk_info = MockChunkInfo::default(); - let m = NoopChunkMap::new(false); - m.check_ready_and_mark_pending(&chunk_info).unwrap(); - } - - #[test] - #[should_panic] - fn test_set_ready_and_clear_pending_default_impl() { - let chunk_info = MockChunkInfo::default(); - let m = NoopChunkMap::new(false); - m.set_ready_and_clear_pending(&chunk_info).unwrap(); - } -} +// Copyright 2021 Ant Group. All rights reserved. +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Chunk or data readiness state tracking drivers. +//! +//! To cache data from remote backend storage onto local storage, a cache state tracking mechanism +//! is needed to track whether a specific chunk or data is ready on local storage and to cooperate +//! on concurrent data downloading. The [ChunkMap](trait.ChunkMap.html) trait is the main mechanism +//! to track chunk state. And [BlobStateMap](struct.BlobStateMap.html) is an adapter structure of +//! [ChunkMap] to support concurrent data downloading, which is based on a base [ChunkMap] +//! implementation to track chunk readiness state. And [RangeMap](trait.RangeMap.html) objects are +//! used to track readiness for a range of chunks or data, with support of batch operation. +//! +//! There are several implementation of the [ChunkMap] and [RangeMap] trait to track chunk and data +//! readiness state: +//! - [BlobStateMap](struct.BlobStateMap.html): an adapter structure to enable concurrent +//! synchronization manipulation of readiness state, based on an underlying base [ChunkMap] or +//! [RangeMap] object. +//! - [BlobRangeMap](struct.BlobRangeMap.html): a data state tracking driver using a bitmap file +//! to persist state, indexed by data address range. +//! - [DigestedChunkMap](struct.DigestedChunkMap.html): a chunk state tracking driver +//! for legacy Rafs images without chunk array, which uses chunk digest as the id to track chunk +//! readiness state. The [DigestedChunkMap] is not optimal in case of performance and memory +//! consumption. +//! - [IndexedChunkMap](struct.IndexedChunkMap.html): a chunk state tracking driver using a bitmap +//! file to persist state, indexed by chunk index. There's a state bit in the bitmap file for each +//! chunk, and atomic operations are used to manipulate the bitmap for concurrent state +//! manipulating. It's the recommended state tracking driver. +//! - [NoopChunkMap](struct.NoopChunkMap.html): a no-operation chunk state tracking driver, +//! which just reports every chunk as always ready to use or not. It may be used to support disk +//! based backend storage or dummy cache. + +use std::any::Any; +use std::io::Result; + +use crate::device::BlobChunkInfo; +use crate::StorageResult; + +pub use blob_state_map::BlobStateMap; +pub use digested_chunk_map::DigestedChunkMap; +pub use indexed_chunk_map::IndexedChunkMap; +pub use noop_chunk_map::NoopChunkMap; +pub use range_map::BlobRangeMap; + +mod blob_state_map; +mod digested_chunk_map; +mod indexed_chunk_map; +mod noop_chunk_map; +mod persist_map; +mod range_map; + +/// Trait to track chunk readiness state. +pub trait ChunkMap: Any + Send + Sync { + /// Check whether the chunk is ready for use. + fn is_ready(&self, chunk: &dyn BlobChunkInfo) -> Result; + + /// Check whether the chunk is pending for downloading. + fn is_pending(&self, _chunk: &dyn BlobChunkInfo) -> Result { + Ok(false) + } + + /// Check whether a chunk is ready for use or pending for downloading. + fn is_ready_or_pending(&self, chunk: &dyn BlobChunkInfo) -> Result { + if matches!(self.is_pending(chunk), Ok(true)) { + Ok(true) + } else { + self.is_ready(chunk) + } + } + + /// Check whether the chunk is ready for use, and mark it as pending if not ready yet. + /// + /// The function returns: + /// - `Err(Timeout)` waiting for inflight backend IO timeouts. + /// - `Ok(true)` if the chunk is ready. + /// - `Ok(false)` marks the chunk as pending, either set_ready_and_clear_pending() or + /// clear_pending() must be called to clear the pending state. + fn check_ready_and_mark_pending(&self, _chunk: &dyn BlobChunkInfo) -> StorageResult { + panic!("no support of check_ready_and_mark_pending()"); + } + + /// Set the chunk to ready for use and clear the pending state. + fn set_ready_and_clear_pending(&self, _chunk: &dyn BlobChunkInfo) -> Result<()> { + panic!("no support of check_ready_and_mark_pending()"); + } + + /// Clear the pending state of the chunk. + fn clear_pending(&self, _chunk: &dyn BlobChunkInfo) { + panic!("no support of clear_pending()"); + } + + /// Check whether the implementation supports state persistence. + fn is_persist(&self) -> bool { + false + } + + /// Convert the objet to an [RangeMap](trait.RangeMap.html) object. + fn as_range_map(&self) -> Option<&dyn RangeMap> { + None + } +} + +/// Trait to track chunk or data readiness state. +/// +/// A `RangeMap` object tracks readiness state of a chunk or data range, indexed by chunk index or +/// data address. The trait methods are designed to support batch operations for improving +/// performance by avoid frequently acquire/release locks. +pub trait RangeMap: Send + Sync { + type I: Send + Sync; + + /// Check whether all chunks or data managed by the `RangeMap` object are ready. + fn is_range_all_ready(&self) -> bool { + false + } + + /// Check whether all chunks or data in the range are ready for use. + fn is_range_ready(&self, _start: Self::I, _count: Self::I) -> Result { + Err(enosys!()) + } + + /// Check whether all chunks or data in the range [start, start + count) are ready. + /// + /// This function checks readiness of a range of chunks or data. If a chunk or data is both not + /// ready and not pending(inflight), it will be marked as pending and returned. Following + /// actions should be: + /// - call set_range_ready_and_clear_pending() to mark data or chunks as ready and clear pending + /// state. + /// - clear_range_pending() to clear the pending state without marking data or chunks as ready. + /// - wait_for_range_ready() to wait for all data or chunks to clear pending state, including + /// data or chunks marked as pending by other threads. + fn check_range_ready_and_mark_pending( + &self, + _start: Self::I, + _count: Self::I, + ) -> Result>> { + Err(enosys!()) + } + + /// Mark all chunks or data in the range as ready for use. + fn set_range_ready_and_clear_pending(&self, _start: Self::I, _count: Self::I) -> Result<()> { + Err(enosys!()) + } + + /// Clear the pending state for all chunks or data in the range. + fn clear_range_pending(&self, _start: Self::I, _count: Self::I) {} + + /// Wait for all chunks or data in the range to be ready until timeout. + fn wait_for_range_ready(&self, _start: Self::I, _count: Self::I) -> Result { + Err(enosys!()) + } +} + +/// Trait to convert a [BlobChunkInfo](../../device/trait.BlobChunkInfo.html) object to an index +/// needed by [ChunkMap](trait.ChunkMap.html). +pub trait ChunkIndexGetter { + /// Type of index needed by [ChunkMap]. + type Index; + + /// Get the chunk's id/key for state tracking. + fn get_index(chunk: &dyn BlobChunkInfo) -> Self::Index; +} + +#[cfg(test)] +mod tests { + use crate::test::MockChunkInfo; + + use super::*; + + impl RangeMap for NoopChunkMap { + type I = u32; + } + + #[test] + fn test_trait_default_impl() { + let m = NoopChunkMap::new(false); + let chunk_info = MockChunkInfo { + index: 128, + ..Default::default() + }; + + assert!(m.is_pending(&chunk_info).is_ok()); + assert!(!m.is_pending(&chunk_info).unwrap()); + assert!(!m.is_range_all_ready()); + assert!(m.is_range_ready(0, 1).is_err()); + assert!(m.check_range_ready_and_mark_pending(0, 1).is_err()); + assert!(m.set_range_ready_and_clear_pending(0, 1).is_err()); + m.clear_range_pending(0, 1); + assert!(m.wait_for_range_ready(0, 1).is_err()); + assert!(m.as_range_map().is_none()); + assert!(!m.is_persist()); + assert!(!m.is_ready(&chunk_info).unwrap()); + } + + #[test] + #[should_panic] + fn test_check_ready_and_mark_pending_default_impl() { + let chunk_info = MockChunkInfo::default(); + let m = NoopChunkMap::new(false); + m.check_ready_and_mark_pending(&chunk_info).unwrap(); + } + + #[test] + #[should_panic] + fn test_set_ready_and_clear_pending_default_impl() { + let chunk_info = MockChunkInfo::default(); + let m = NoopChunkMap::new(false); + m.set_ready_and_clear_pending(&chunk_info).unwrap(); + } +} diff --git a/storage/src/cache/state/noop_chunk_map.rs b/storage/src/cache/state/noop_chunk_map.rs index 7e4d29bc415..e205e8943f6 100644 --- a/storage/src/cache/state/noop_chunk_map.rs +++ b/storage/src/cache/state/noop_chunk_map.rs @@ -1,37 +1,37 @@ -// Copyright (C) 2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::io::Result; - -use crate::cache::state::{ChunkIndexGetter, ChunkMap}; -use crate::device::BlobChunkInfo; - -/// A dummy implementation of the [ChunkMap] trait. -/// -/// The `NoopChunkMap` is an dummy implementation of [ChunkMap], which just reports every chunk as -/// always ready to use or not. It may be used to support disk based backend storage. -pub struct NoopChunkMap { - cached: bool, -} - -impl NoopChunkMap { - /// Create a new instance of `NoopChunkMap`. - pub fn new(cached: bool) -> Self { - Self { cached } - } -} - -impl ChunkMap for NoopChunkMap { - fn is_ready(&self, _chunk: &dyn BlobChunkInfo) -> Result { - Ok(self.cached) - } -} - -impl ChunkIndexGetter for NoopChunkMap { - type Index = u32; - - fn get_index(chunk: &dyn BlobChunkInfo) -> Self::Index { - chunk.id() - } -} +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::io::Result; + +use crate::cache::state::{ChunkIndexGetter, ChunkMap}; +use crate::device::BlobChunkInfo; + +/// A dummy implementation of the [ChunkMap] trait. +/// +/// The `NoopChunkMap` is an dummy implementation of [ChunkMap], which just reports every chunk as +/// always ready to use or not. It may be used to support disk based backend storage. +pub struct NoopChunkMap { + cached: bool, +} + +impl NoopChunkMap { + /// Create a new instance of `NoopChunkMap`. + pub fn new(cached: bool) -> Self { + Self { cached } + } +} + +impl ChunkMap for NoopChunkMap { + fn is_ready(&self, _chunk: &dyn BlobChunkInfo) -> Result { + Ok(self.cached) + } +} + +impl ChunkIndexGetter for NoopChunkMap { + type Index = u32; + + fn get_index(chunk: &dyn BlobChunkInfo) -> Self::Index { + chunk.id() + } +} diff --git a/storage/src/cache/state/persist_map.rs b/storage/src/cache/state/persist_map.rs index bf434174607..bff9e375424 100644 --- a/storage/src/cache/state/persist_map.rs +++ b/storage/src/cache/state/persist_map.rs @@ -1,264 +1,264 @@ -// Copyright 2021 Ant Group. All rights reserved. -// Copyright (C) 2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::fs::{File, OpenOptions}; -use std::io::{Result, Write}; -use std::os::unix::io::AsRawFd; -use std::sync::atomic::{AtomicU32, AtomicU8, Ordering}; - -use nydus_utils::div_round_up; -use nydus_utils::filemap::{clone_file, FileMapState}; - -use crate::utils::readahead; - -pub(crate) const MAGIC1: u32 = 0x424D_4150; -pub(crate) const MAGIC2: u32 = 0x434D_4150; -pub(crate) const MAGIC_ALL_READY: u32 = 0x4D4D_4150; -pub(crate) const HEADER_SIZE: usize = 4096; -pub(crate) const HEADER_RESERVED_SIZE: usize = HEADER_SIZE - 16; - -/// The blob chunk map file header, 4096 bytes. -#[repr(C)] -pub(crate) struct Header { - /// PersistMap magic number - pub magic: u32, - pub version: u32, - pub magic2: u32, - pub all_ready: u32, - pub reserved: [u8; HEADER_RESERVED_SIZE], -} - -impl Header { - pub fn as_slice(&self) -> &[u8] { - unsafe { - std::slice::from_raw_parts( - self as *const Header as *const u8, - std::mem::size_of::

(), - ) - } - } -} - -pub(crate) struct PersistMap { - pub count: u32, - pub not_ready_count: AtomicU32, - filemap: FileMapState, -} - -impl PersistMap { - pub fn open(filename: &str, chunk_count: u32, create: bool, persist: bool) -> Result { - if chunk_count == 0 { - return Err(einval!("chunk count should be greater than 0")); - } - - let mut file = OpenOptions::new() - .read(true) - .write(create) - .create(create) - .truncate(!persist) - .open(filename) - .map_err(|err| { - einval!(format!( - "failed to open/create blob chunk_map file {:?}: {:?}", - filename, err - )) - })?; - - let file_size = file.metadata()?.len(); - let bitmap_size = div_round_up(chunk_count as u64, 8u64); - let expected_size = HEADER_SIZE as u64 + bitmap_size; - let mut new_content = false; - - if file_size == 0 { - if !create { - return Err(enoent!()); - } - - new_content = true; - Self::write_header(&mut file, expected_size)?; - } else if file_size != expected_size { - // File size doesn't match, it's too risky to accept the chunk state file. Fallback to - // always mark chunk data as not ready. - warn!("blob chunk_map file may be corrupted: {:?}", filename); - return Err(einval!(format!("chunk_map file {:?} is invalid", filename))); - } - - let file2 = clone_file(file.as_raw_fd())?; - let mut filemap = FileMapState::new(file2, 0, expected_size as usize, true)?; - let header = filemap.get_mut::
(0)?; - if header.magic != MAGIC1 { - if !create { - return Err(enoent!()); - } - - // There's race window between "file.set_len()" and "file.write(&header)". If that - // happens, all file content should be zero. Detect the race window and write out - // header again to fix it. - let content = filemap.get_slice::(0, expected_size as usize)?; - for c in content { - if *c != 0 { - return Err(einval!(format!( - "invalid blob chunk_map file header: {:?}", - filename - ))); - } - } - - new_content = true; - Self::write_header(&mut file, expected_size)?; - } - - let header = filemap.get_mut::
(0)?; - let mut not_ready_count = chunk_count; - if header.version >= 1 { - if header.magic2 != MAGIC2 { - return Err(einval!(format!( - "invalid blob chunk_map file header: {:?}", - filename - ))); - } - if header.all_ready == MAGIC_ALL_READY { - not_ready_count = 0; - } else if new_content { - not_ready_count = chunk_count; - } else { - let mut ready_count = 0; - for idx in HEADER_SIZE..expected_size as usize { - let current = filemap.get_ref::(idx)?; - let val = current.load(Ordering::Acquire); - ready_count += val.count_ones() as u32; - } - - if ready_count >= chunk_count { - let header = filemap.get_mut::
(0)?; - header.all_ready = MAGIC_ALL_READY; - let _ = file.sync_all(); - not_ready_count = 0; - } else { - not_ready_count = chunk_count - ready_count; - } - } - } - - readahead(file.as_raw_fd(), 0, expected_size); - if !persist { - let _ = std::fs::remove_file(filename); - } - - Ok(Self { - count: chunk_count, - not_ready_count: AtomicU32::new(not_ready_count), - filemap, - }) - } - - fn write_header(file: &mut File, size: u64) -> Result<()> { - let header = Header { - magic: MAGIC1, - version: 1, - magic2: MAGIC2, - all_ready: 0, - reserved: [0x0u8; HEADER_RESERVED_SIZE], - }; - - // Set file size to expected value and sync to disk. - file.set_len(size)?; - file.sync_all()?; - // write file header and sync to disk. - file.write_all(header.as_slice())?; - file.sync_all()?; - - Ok(()) - } - - #[cfg(test)] - pub fn size(&self) -> usize { - self.filemap.size() - } - - #[inline] - pub fn validate_index(&self, idx: u32) -> Result { - if idx < self.count { - Ok(idx) - } else { - Err(einval!(format!( - "chunk index {} exceeds chunk count {}", - idx, self.count - ))) - } - } - - #[inline] - fn read_u8(&self, idx: u32) -> u8 { - let start = HEADER_SIZE + (idx as usize >> 3); - let current = self.filemap.get_ref::(start).unwrap(); - - current.load(Ordering::Acquire) - } - - #[inline] - fn write_u8(&self, idx: u32, current: u8) -> bool { - let mask = Self::index_to_mask(idx); - let expected = current | mask; - let start = HEADER_SIZE + (idx as usize >> 3); - let atomic_value = self.filemap.get_ref::(start).unwrap(); - - atomic_value - .compare_exchange(current, expected, Ordering::Acquire, Ordering::Relaxed) - .is_ok() - } - - #[inline] - fn index_to_mask(index: u32) -> u8 { - let pos = 8 - ((index & 0b111) + 1); - 1 << pos - } - - #[inline] - pub fn is_chunk_ready(&self, index: u32) -> (bool, u8) { - let mask = Self::index_to_mask(index); - let current = self.read_u8(index); - let ready = current & mask == mask; - - (ready, current) - } - - pub fn set_chunk_ready(&self, index: u32) -> Result<()> { - let index = self.validate_index(index)?; - - // Loop to atomically update the state bit corresponding to the chunk index. - loop { - let (ready, current) = self.is_chunk_ready(index); - if ready { - break; - } - - if self.write_u8(index, current) { - if self.not_ready_count.fetch_sub(1, Ordering::AcqRel) == 1 { - self.mark_all_ready(); - } - break; - } - } - - Ok(()) - } - - fn mark_all_ready(&self) { - if self.filemap.sync_data().is_ok() { - /* - if let Ok(header) = self.filemap.get_mut::
(0) { - header.all_ready = MAGIC_ALL_READY; - let _ = self.filemap.sync_data(); - } - */ - } - } - - #[inline] - pub fn is_range_all_ready(&self) -> bool { - self.not_ready_count.load(Ordering::Acquire) == 0 - } -} +// Copyright 2021 Ant Group. All rights reserved. +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::fs::{File, OpenOptions}; +use std::io::{Result, Write}; +use std::os::unix::io::AsRawFd; +use std::sync::atomic::{AtomicU32, AtomicU8, Ordering}; + +use nydus_utils::div_round_up; +use nydus_utils::filemap::{clone_file, FileMapState}; + +use crate::utils::readahead; + +pub(crate) const MAGIC1: u32 = 0x424D_4150; +pub(crate) const MAGIC2: u32 = 0x434D_4150; +pub(crate) const MAGIC_ALL_READY: u32 = 0x4D4D_4150; +pub(crate) const HEADER_SIZE: usize = 4096; +pub(crate) const HEADER_RESERVED_SIZE: usize = HEADER_SIZE - 16; + +/// The blob chunk map file header, 4096 bytes. +#[repr(C)] +pub(crate) struct Header { + /// PersistMap magic number + pub magic: u32, + pub version: u32, + pub magic2: u32, + pub all_ready: u32, + pub reserved: [u8; HEADER_RESERVED_SIZE], +} + +impl Header { + pub fn as_slice(&self) -> &[u8] { + unsafe { + std::slice::from_raw_parts( + self as *const Header as *const u8, + std::mem::size_of::
(), + ) + } + } +} + +pub(crate) struct PersistMap { + pub count: u32, + pub not_ready_count: AtomicU32, + filemap: FileMapState, +} + +impl PersistMap { + pub fn open(filename: &str, chunk_count: u32, create: bool, persist: bool) -> Result { + if chunk_count == 0 { + return Err(einval!("chunk count should be greater than 0")); + } + + let mut file = OpenOptions::new() + .read(true) + .write(create) + .create(create) + .truncate(!persist) + .open(filename) + .map_err(|err| { + einval!(format!( + "failed to open/create blob chunk_map file {:?}: {:?}", + filename, err + )) + })?; + + let file_size = file.metadata()?.len(); + let bitmap_size = div_round_up(chunk_count as u64, 8u64); + let expected_size = HEADER_SIZE as u64 + bitmap_size; + let mut new_content = false; + + if file_size == 0 { + if !create { + return Err(enoent!()); + } + + new_content = true; + Self::write_header(&mut file, expected_size)?; + } else if file_size != expected_size { + // File size doesn't match, it's too risky to accept the chunk state file. Fallback to + // always mark chunk data as not ready. + warn!("blob chunk_map file may be corrupted: {:?}", filename); + return Err(einval!(format!("chunk_map file {:?} is invalid", filename))); + } + + let file2 = clone_file(file.as_raw_fd())?; + let mut filemap = FileMapState::new(file2, 0, expected_size as usize, true)?; + let header = filemap.get_mut::
(0)?; + if header.magic != MAGIC1 { + if !create { + return Err(enoent!()); + } + + // There's race window between "file.set_len()" and "file.write(&header)". If that + // happens, all file content should be zero. Detect the race window and write out + // header again to fix it. + let content = filemap.get_slice::(0, expected_size as usize)?; + for c in content { + if *c != 0 { + return Err(einval!(format!( + "invalid blob chunk_map file header: {:?}", + filename + ))); + } + } + + new_content = true; + Self::write_header(&mut file, expected_size)?; + } + + let header = filemap.get_mut::
(0)?; + let mut not_ready_count = chunk_count; + if header.version >= 1 { + if header.magic2 != MAGIC2 { + return Err(einval!(format!( + "invalid blob chunk_map file header: {:?}", + filename + ))); + } + if header.all_ready == MAGIC_ALL_READY { + not_ready_count = 0; + } else if new_content { + not_ready_count = chunk_count; + } else { + let mut ready_count = 0; + for idx in HEADER_SIZE..expected_size as usize { + let current = filemap.get_ref::(idx)?; + let val = current.load(Ordering::Acquire); + ready_count += val.count_ones() as u32; + } + + if ready_count >= chunk_count { + let header = filemap.get_mut::
(0)?; + header.all_ready = MAGIC_ALL_READY; + let _ = file.sync_all(); + not_ready_count = 0; + } else { + not_ready_count = chunk_count - ready_count; + } + } + } + + readahead(file.as_raw_fd(), 0, expected_size); + if !persist { + let _ = std::fs::remove_file(filename); + } + + Ok(Self { + count: chunk_count, + not_ready_count: AtomicU32::new(not_ready_count), + filemap, + }) + } + + fn write_header(file: &mut File, size: u64) -> Result<()> { + let header = Header { + magic: MAGIC1, + version: 1, + magic2: MAGIC2, + all_ready: 0, + reserved: [0x0u8; HEADER_RESERVED_SIZE], + }; + + // Set file size to expected value and sync to disk. + file.set_len(size)?; + file.sync_all()?; + // write file header and sync to disk. + file.write_all(header.as_slice())?; + file.sync_all()?; + + Ok(()) + } + + #[cfg(test)] + pub fn size(&self) -> usize { + self.filemap.size() + } + + #[inline] + pub fn validate_index(&self, idx: u32) -> Result { + if idx < self.count { + Ok(idx) + } else { + Err(einval!(format!( + "chunk index {} exceeds chunk count {}", + idx, self.count + ))) + } + } + + #[inline] + fn read_u8(&self, idx: u32) -> u8 { + let start = HEADER_SIZE + (idx as usize >> 3); + let current = self.filemap.get_ref::(start).unwrap(); + + current.load(Ordering::Acquire) + } + + #[inline] + fn write_u8(&self, idx: u32, current: u8) -> bool { + let mask = Self::index_to_mask(idx); + let expected = current | mask; + let start = HEADER_SIZE + (idx as usize >> 3); + let atomic_value = self.filemap.get_ref::(start).unwrap(); + + atomic_value + .compare_exchange(current, expected, Ordering::Acquire, Ordering::Relaxed) + .is_ok() + } + + #[inline] + fn index_to_mask(index: u32) -> u8 { + let pos = 8 - ((index & 0b111) + 1); + 1 << pos + } + + #[inline] + pub fn is_chunk_ready(&self, index: u32) -> (bool, u8) { + let mask = Self::index_to_mask(index); + let current = self.read_u8(index); + let ready = current & mask == mask; + + (ready, current) + } + + pub fn set_chunk_ready(&self, index: u32) -> Result<()> { + let index = self.validate_index(index)?; + + // Loop to atomically update the state bit corresponding to the chunk index. + loop { + let (ready, current) = self.is_chunk_ready(index); + if ready { + break; + } + + if self.write_u8(index, current) { + if self.not_ready_count.fetch_sub(1, Ordering::AcqRel) == 1 { + self.mark_all_ready(); + } + break; + } + } + + Ok(()) + } + + fn mark_all_ready(&self) { + if self.filemap.sync_data().is_ok() { + /* + if let Ok(header) = self.filemap.get_mut::
(0) { + header.all_ready = MAGIC_ALL_READY; + let _ = self.filemap.sync_data(); + } + */ + } + } + + #[inline] + pub fn is_range_all_ready(&self) -> bool { + self.not_ready_count.load(Ordering::Acquire) == 0 + } +} diff --git a/storage/src/cache/state/range_map.rs b/storage/src/cache/state/range_map.rs index 131bc15eeae..dc4fbecfe14 100644 --- a/storage/src/cache/state/range_map.rs +++ b/storage/src/cache/state/range_map.rs @@ -1,217 +1,217 @@ -// Copyright (C) 2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::io::Result; - -use crate::cache::state::persist_map::PersistMap; -use crate::cache::state::RangeMap; - -/// The name suffix of blob chunk_map file, named $blob_id.chunk_map. -const FILE_SUFFIX: &str = "range_map"; - -/// An implementation of [RangeMap] to support cache state tracking by using a bitmap file. -/// -/// The `BlobRangeMap` is an implementation of [RangeMap] which uses a bitmap file and atomic -/// bitmap operations to track readiness state. It creates or opens a file with the name -/// `$blob_id.range_map` to record whether a data range has been cached by the blob cache, and -/// atomic bitmap operations are used to manipulate the state bit. The bitmap file will be persisted -/// to disk. -pub struct BlobRangeMap { - pub(crate) shift: u32, - map: PersistMap, -} - -impl BlobRangeMap { - /// Create a new instance of `BlobRangeMap`. - pub fn new(blob_path: &str, count: u32, shift: u32) -> Result { - let filename = format!("{}.{}", blob_path, FILE_SUFFIX); - debug_assert!(shift < 64); - - PersistMap::open(&filename, count, true, true).map(|map| BlobRangeMap { shift, map }) - } - - /// Create a new instance of `BlobRangeMap` from an existing chunk map file. - pub fn open(blob_id: &str, workdir: &str, count: u32, shift: u32) -> Result { - let filename = format!("{}/{}.{}", workdir, blob_id, FILE_SUFFIX); - debug_assert!(shift < 64); - - PersistMap::open(&filename, count, false, true).map(|map| BlobRangeMap { shift, map }) - } - - pub(crate) fn get_range(&self, start: u64, count: u64) -> Result<(u32, u32)> { - if let Some(end) = start.checked_add(count) { - let start_index = start >> self.shift as u64; - let end_index = (end - 1) >> self.shift as u64; - if start_index > u32::MAX as u64 || end_index > u32::MAX as u64 { - Err(einval!()) - } else { - self.map.validate_index(start_index as u32)?; - self.map.validate_index(end_index as u32)?; - Ok((start_index as u32, end_index as u32 + 1)) - } - } else { - Err(einval!()) - } - } -} - -impl RangeMap for BlobRangeMap { - type I = u64; - - fn is_range_all_ready(&self) -> bool { - self.map.is_range_all_ready() - } - - /// Check whether all data in the range are ready for use. - fn is_range_ready(&self, start: u64, count: u64) -> Result { - if !self.is_range_all_ready() { - let (start_index, end_index) = self.get_range(start, count)?; - for index in start_index..end_index { - if !self.map.is_chunk_ready(index).0 { - return Ok(false); - } - } - } - - Ok(true) - } - - fn check_range_ready_and_mark_pending( - &self, - start: u64, - count: u64, - ) -> Result>> { - if self.is_range_all_ready() { - Ok(None) - } else { - let (start_index, end_index) = self.get_range(start, count)?; - let mut vec = Vec::with_capacity(count as usize); - - for index in start_index..end_index { - if !self.map.is_chunk_ready(index).0 { - vec.push((index as u64) << self.shift); - } - } - - if vec.is_empty() { - Ok(None) - } else { - Ok(Some(vec)) - } - } - } - - fn set_range_ready_and_clear_pending(&self, start: u64, count: u64) -> Result<()> { - if !self.is_range_all_ready() { - let (start_index, end_index) = self.get_range(start, count)?; - - for index in start_index..end_index { - self.map.set_chunk_ready(index)?; - } - } - - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use std::sync::Arc; - use std::thread; - use std::time::Instant; - - use vmm_sys_util::tempdir::TempDir; - - use super::super::BlobStateMap; - use super::*; - - #[test] - fn test_range_map() { - let dir = TempDir::new().unwrap(); - let blob_path = dir.as_path().join("blob-1"); - let blob_path = blob_path.as_os_str().to_str().unwrap().to_string(); - let range_count = 1000000; - let skip_index = 77; - - let map1 = Arc::new(BlobStateMap::from_range_map( - BlobRangeMap::new(&blob_path, range_count, 12).unwrap(), - )); - let map2 = Arc::new(BlobStateMap::from_range_map( - BlobRangeMap::new(&blob_path, range_count, 12).unwrap(), - )); - let map3 = Arc::new(BlobStateMap::from_range_map( - BlobRangeMap::new(&blob_path, range_count, 12).unwrap(), - )); - - let now = Instant::now(); - - let h1 = thread::spawn(move || { - for idx in 0..range_count { - if idx % skip_index != 0 { - let addr = ((idx as u64) << 12) + (idx as u64 % 0x1000); - map1.set_range_ready_and_clear_pending(addr, 1).unwrap(); - } - } - }); - - let h2 = thread::spawn(move || { - for idx in 0..range_count { - if idx % skip_index != 0 { - let addr = ((idx as u64) << 12) + (idx as u64 % 0x1000); - map2.set_range_ready_and_clear_pending(addr, 1).unwrap(); - } - } - }); - - h1.join() - .map_err(|e| { - error!("Join error {:?}", e); - e - }) - .unwrap(); - h2.join() - .map_err(|e| { - error!("Join error {:?}", e); - e - }) - .unwrap(); - - println!("BlobRangeMap Concurrency: {}ms", now.elapsed().as_millis()); - - for idx in 0..range_count { - let addr = ((idx as u64) << 12) + (idx as u64 % 0x1000); - - let is_ready = map3.is_range_ready(addr, 1).unwrap(); - if idx % skip_index == 0 { - if is_ready { - panic!("indexed chunk map: index {} shouldn't be ready", idx); - } - } else if !is_ready { - panic!("indexed chunk map: index {} should be ready", idx); - } - } - } - - #[test] - fn test_range_map_state() { - let dir = TempDir::new().unwrap(); - let blob_path = dir.as_path().join("blob-1"); - let blob_path = blob_path.as_os_str().to_str().unwrap().to_string(); - let range_count = 100; - - let map = BlobRangeMap::new(&blob_path, range_count, 0).unwrap(); - assert_eq!( - map.check_range_ready_and_mark_pending(1, 10) - .unwrap() - .unwrap() - .len(), - 10 - ); - assert!(map.set_range_ready_and_clear_pending(1, 10).is_ok()); - assert!(map - .check_range_ready_and_mark_pending(1, 10) - .unwrap() - .is_none()); - } -} +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::io::Result; + +use crate::cache::state::persist_map::PersistMap; +use crate::cache::state::RangeMap; + +/// The name suffix of blob chunk_map file, named $blob_id.chunk_map. +const FILE_SUFFIX: &str = "range_map"; + +/// An implementation of [RangeMap] to support cache state tracking by using a bitmap file. +/// +/// The `BlobRangeMap` is an implementation of [RangeMap] which uses a bitmap file and atomic +/// bitmap operations to track readiness state. It creates or opens a file with the name +/// `$blob_id.range_map` to record whether a data range has been cached by the blob cache, and +/// atomic bitmap operations are used to manipulate the state bit. The bitmap file will be persisted +/// to disk. +pub struct BlobRangeMap { + pub(crate) shift: u32, + map: PersistMap, +} + +impl BlobRangeMap { + /// Create a new instance of `BlobRangeMap`. + pub fn new(blob_path: &str, count: u32, shift: u32) -> Result { + let filename = format!("{}.{}", blob_path, FILE_SUFFIX); + debug_assert!(shift < 64); + + PersistMap::open(&filename, count, true, true).map(|map| BlobRangeMap { shift, map }) + } + + /// Create a new instance of `BlobRangeMap` from an existing chunk map file. + pub fn open(blob_id: &str, workdir: &str, count: u32, shift: u32) -> Result { + let filename = format!("{}/{}.{}", workdir, blob_id, FILE_SUFFIX); + debug_assert!(shift < 64); + + PersistMap::open(&filename, count, false, true).map(|map| BlobRangeMap { shift, map }) + } + + pub(crate) fn get_range(&self, start: u64, count: u64) -> Result<(u32, u32)> { + if let Some(end) = start.checked_add(count) { + let start_index = start >> self.shift as u64; + let end_index = (end - 1) >> self.shift as u64; + if start_index > u32::MAX as u64 || end_index > u32::MAX as u64 { + Err(einval!()) + } else { + self.map.validate_index(start_index as u32)?; + self.map.validate_index(end_index as u32)?; + Ok((start_index as u32, end_index as u32 + 1)) + } + } else { + Err(einval!()) + } + } +} + +impl RangeMap for BlobRangeMap { + type I = u64; + + fn is_range_all_ready(&self) -> bool { + self.map.is_range_all_ready() + } + + /// Check whether all data in the range are ready for use. + fn is_range_ready(&self, start: u64, count: u64) -> Result { + if !self.is_range_all_ready() { + let (start_index, end_index) = self.get_range(start, count)?; + for index in start_index..end_index { + if !self.map.is_chunk_ready(index).0 { + return Ok(false); + } + } + } + + Ok(true) + } + + fn check_range_ready_and_mark_pending( + &self, + start: u64, + count: u64, + ) -> Result>> { + if self.is_range_all_ready() { + Ok(None) + } else { + let (start_index, end_index) = self.get_range(start, count)?; + let mut vec = Vec::with_capacity(count as usize); + + for index in start_index..end_index { + if !self.map.is_chunk_ready(index).0 { + vec.push((index as u64) << self.shift); + } + } + + if vec.is_empty() { + Ok(None) + } else { + Ok(Some(vec)) + } + } + } + + fn set_range_ready_and_clear_pending(&self, start: u64, count: u64) -> Result<()> { + if !self.is_range_all_ready() { + let (start_index, end_index) = self.get_range(start, count)?; + + for index in start_index..end_index { + self.map.set_chunk_ready(index)?; + } + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + use std::thread; + use std::time::Instant; + + use vmm_sys_util::tempdir::TempDir; + + use super::super::BlobStateMap; + use super::*; + + #[test] + fn test_range_map() { + let dir = TempDir::new().unwrap(); + let blob_path = dir.as_path().join("blob-1"); + let blob_path = blob_path.as_os_str().to_str().unwrap().to_string(); + let range_count = 1000000; + let skip_index = 77; + + let map1 = Arc::new(BlobStateMap::from_range_map( + BlobRangeMap::new(&blob_path, range_count, 12).unwrap(), + )); + let map2 = Arc::new(BlobStateMap::from_range_map( + BlobRangeMap::new(&blob_path, range_count, 12).unwrap(), + )); + let map3 = Arc::new(BlobStateMap::from_range_map( + BlobRangeMap::new(&blob_path, range_count, 12).unwrap(), + )); + + let now = Instant::now(); + + let h1 = thread::spawn(move || { + for idx in 0..range_count { + if idx % skip_index != 0 { + let addr = ((idx as u64) << 12) + (idx as u64 % 0x1000); + map1.set_range_ready_and_clear_pending(addr, 1).unwrap(); + } + } + }); + + let h2 = thread::spawn(move || { + for idx in 0..range_count { + if idx % skip_index != 0 { + let addr = ((idx as u64) << 12) + (idx as u64 % 0x1000); + map2.set_range_ready_and_clear_pending(addr, 1).unwrap(); + } + } + }); + + h1.join() + .map_err(|e| { + error!("Join error {:?}", e); + e + }) + .unwrap(); + h2.join() + .map_err(|e| { + error!("Join error {:?}", e); + e + }) + .unwrap(); + + println!("BlobRangeMap Concurrency: {}ms", now.elapsed().as_millis()); + + for idx in 0..range_count { + let addr = ((idx as u64) << 12) + (idx as u64 % 0x1000); + + let is_ready = map3.is_range_ready(addr, 1).unwrap(); + if idx % skip_index == 0 { + if is_ready { + panic!("indexed chunk map: index {} shouldn't be ready", idx); + } + } else if !is_ready { + panic!("indexed chunk map: index {} should be ready", idx); + } + } + } + + #[test] + fn test_range_map_state() { + let dir = TempDir::new().unwrap(); + let blob_path = dir.as_path().join("blob-1"); + let blob_path = blob_path.as_os_str().to_str().unwrap().to_string(); + let range_count = 100; + + let map = BlobRangeMap::new(&blob_path, range_count, 0).unwrap(); + assert_eq!( + map.check_range_ready_and_mark_pending(1, 10) + .unwrap() + .unwrap() + .len(), + 10 + ); + assert!(map.set_range_ready_and_clear_pending(1, 10).is_ok()); + assert!(map + .check_range_ready_and_mark_pending(1, 10) + .unwrap() + .is_none()); + } +} diff --git a/storage/src/cache/worker.rs b/storage/src/cache/worker.rs index d76f4fe251d..8899f1b10f3 100644 --- a/storage/src/cache/worker.rs +++ b/storage/src/cache/worker.rs @@ -1,521 +1,521 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2021-2022 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::io::Result; -use std::sync::atomic::{AtomicBool, AtomicI32, AtomicU32, AtomicU64, AtomicUsize, Ordering}; -use std::sync::{Arc, Once}; -use std::thread; -use std::time::{Duration, SystemTime}; - -use nydus_api::PrefetchConfigV2; -use nydus_utils::async_helper::with_runtime; -use nydus_utils::metrics::{BlobcacheMetrics, Metric}; -use nydus_utils::mpmc::Channel; -use tokio::runtime::Runtime; -use tokio::sync::Semaphore; - -use crate::cache::{BlobCache, BlobIoRange}; -use crate::factory::ASYNC_RUNTIME; - -/// Configuration information for asynchronous workers. -pub(crate) struct AsyncPrefetchConfig { - /// Whether or not to enable prefetch. - pub enable: bool, - /// Number of working threads. - pub threads_count: usize, - /// The amplify batch size to prefetch data from backend. - pub batch_size: usize, - /// Network bandwidth for prefetch, in unit of Bytes and Zero means no rate limit is set. - #[allow(unused)] - pub bandwidth_limit: u32, -} - -impl From<&PrefetchConfigV2> for AsyncPrefetchConfig { - fn from(p: &PrefetchConfigV2) -> Self { - AsyncPrefetchConfig { - enable: p.enable, - threads_count: p.threads_count, - batch_size: p.batch_size, - bandwidth_limit: p.bandwidth_limit, - } - } -} - -/// Asynchronous service request message. -pub(crate) enum AsyncPrefetchMessage { - /// Asynchronous blob layer prefetch request with (offset, size) of blob on storage backend. - BlobPrefetch(Arc, u64, u64, SystemTime), - /// Asynchronous file-system layer prefetch request. - FsPrefetch(Arc, BlobIoRange, SystemTime), - #[cfg_attr(not(test), allow(unused))] - /// Ping for test. - Ping, - #[allow(unused)] - RateLimiter(u64), -} - -impl AsyncPrefetchMessage { - /// Create a new asynchronous filesystem prefetch request message. - pub fn new_fs_prefetch(blob_cache: Arc, req: BlobIoRange) -> Self { - AsyncPrefetchMessage::FsPrefetch(blob_cache, req, SystemTime::now()) - } - - /// Create a new asynchronous blob prefetch request message. - pub fn new_blob_prefetch(blob_cache: Arc, offset: u64, size: u64) -> Self { - AsyncPrefetchMessage::BlobPrefetch(blob_cache, offset, size, SystemTime::now()) - } -} - -/// An asynchronous task manager for data prefetching -pub(crate) struct AsyncWorkerMgr { - metrics: Arc, - ping_requests: AtomicU32, - workers: AtomicU32, - active: AtomicBool, - begin_timing_once: Once, - - // Limit the total retry times to avoid unnecessary resource consumption. - retry_times: AtomicI32, - - prefetch_sema: Arc, - prefetch_channel: Arc>, - prefetch_config: Arc, - #[allow(unused)] - prefetch_delayed: AtomicU64, - prefetch_inflight: AtomicU32, - prefetch_consumed: AtomicUsize, - #[cfg(feature = "prefetch-rate-limit")] - prefetch_limiter: Option>, -} - -impl AsyncWorkerMgr { - /// Create a new instance of `AsyncWorkerMgr`. - pub fn new( - metrics: Arc, - prefetch_config: Arc, - ) -> Result { - #[cfg(feature = "prefetch-rate-limit")] - let prefetch_limiter = match prefetch_config.bandwidth_limit { - 0 => None, - v => { - // If the given value is less than maximum blob chunk size, it exceeds burst size of the - // limiter ending up with throttling all throughput, so ensure bandwidth is bigger than - // the maximum chunk size. - let limit = std::cmp::max(crate::RAFS_MAX_CHUNK_SIZE as usize, v as usize); - let limiter = leaky_bucket::RateLimiter::builder() - .initial(limit) - .refill(limit / 10) - .interval(Duration::from_millis(100)) - .build(); - Some(Arc::new(limiter)) - } - }; - - Ok(AsyncWorkerMgr { - metrics, - ping_requests: AtomicU32::new(0), - workers: AtomicU32::new(0), - active: AtomicBool::new(false), - begin_timing_once: Once::new(), - - retry_times: AtomicI32::new(32), - - prefetch_sema: Arc::new(Semaphore::new(0)), - prefetch_channel: Arc::new(Channel::new()), - prefetch_config, - prefetch_delayed: AtomicU64::new(0), - prefetch_inflight: AtomicU32::new(0), - prefetch_consumed: AtomicUsize::new(0), - #[cfg(feature = "prefetch-rate-limit")] - prefetch_limiter, - }) - } - - /// Create working threads and start the event loop. - pub fn start(mgr: Arc) -> Result<()> { - if mgr.prefetch_config.enable { - Self::start_prefetch_workers(mgr)?; - } - - Ok(()) - } - - /// Stop all working threads. - pub fn stop(&self) { - if self - .active - .compare_exchange(true, false, Ordering::AcqRel, Ordering::Relaxed) - .is_err() - { - return; - } - self.prefetch_channel.close(); - - while self.workers.load(Ordering::Relaxed) > 0 { - self.prefetch_channel.notify_waiters(); - thread::sleep(Duration::from_millis(10)); - } - } - - /// Send an asynchronous service request message to the workers. - pub fn send_prefetch_message( - &self, - msg: AsyncPrefetchMessage, - ) -> std::result::Result<(), AsyncPrefetchMessage> { - if !self.prefetch_config.enable { - Err(msg) - } else { - self.prefetch_inflight.fetch_add(1, Ordering::Relaxed); - self.prefetch_channel.send(msg) - } - } - - /// Flush pending prefetch requests associated with `blob_id`. - pub fn flush_pending_prefetch_requests(&self, blob_id: &str) { - self.prefetch_channel - .flush_pending_prefetch_requests(|t| match t { - AsyncPrefetchMessage::BlobPrefetch(blob, _, _, _) => { - blob_id == blob.blob_id() && !blob.is_prefetch_active() - } - AsyncPrefetchMessage::FsPrefetch(blob, _, _) => { - blob_id == blob.blob_id() && !blob.is_prefetch_active() - } - _ => false, - }); - } - - /// Consume network bandwidth budget for prefetching. - pub fn consume_prefetch_budget(&self, size: u64) { - if self.prefetch_inflight.load(Ordering::Relaxed) > 0 { - self.prefetch_consumed - .fetch_add(size as usize, Ordering::AcqRel); - } - } - - fn start_prefetch_workers(mgr: Arc) -> Result<()> { - // Hold the request queue to barrier all working threads. - let guard = mgr.prefetch_channel.lock_channel(); - for num in 0..mgr.prefetch_config.threads_count { - let mgr2 = mgr.clone(); - let res = thread::Builder::new() - .name(format!("nydus_storage_worker_{}", num)) - .spawn(move || { - mgr2.grow_n(1); - mgr2.metrics - .prefetch_workers - .fetch_add(1, Ordering::Relaxed); - - with_runtime(|rt| { - rt.block_on(Self::handle_prefetch_requests(mgr2.clone(), rt)); - }); - - mgr2.metrics - .prefetch_workers - .fetch_sub(1, Ordering::Relaxed); - mgr2.shrink_n(1); - info!("storage: worker thread {} exits.", num) - }); - - if let Err(e) = res { - error!("storage: failed to create worker thread, {:?}", e); - mgr.prefetch_channel.close(); - drop(guard); - mgr.stop(); - return Err(e); - } - } - mgr.active.store(true, Ordering::Release); - Ok(()) - } - - async fn handle_prefetch_requests(mgr: Arc, rt: &Runtime) { - mgr.begin_timing_once.call_once(|| { - let now = SystemTime::now() - .duration_since(SystemTime::UNIX_EPOCH) - .unwrap(); - - mgr.metrics.prefetch_begin_time_secs.set(now.as_secs()); - mgr.metrics - .prefetch_begin_time_millis - .set(now.subsec_millis() as u64); - }); - - // Max 1 active requests per thread. - mgr.prefetch_sema.add_permits(1); - - while let Ok(msg) = mgr.prefetch_channel.recv().await { - mgr.handle_prefetch_rate_limit(&msg).await; - let mgr2 = mgr.clone(); - - match msg { - AsyncPrefetchMessage::BlobPrefetch(blob_cache, offset, size, begin_time) => { - let token = Semaphore::acquire_owned(mgr2.prefetch_sema.clone()) - .await - .unwrap(); - if blob_cache.is_prefetch_active() { - rt.spawn_blocking(move || { - let _ = Self::handle_blob_prefetch_request( - mgr2.clone(), - blob_cache, - offset, - size, - begin_time, - ); - drop(token); - }); - } - } - AsyncPrefetchMessage::FsPrefetch(blob_cache, req, begin_time) => { - let token = Semaphore::acquire_owned(mgr2.prefetch_sema.clone()) - .await - .unwrap(); - - if blob_cache.is_prefetch_active() { - rt.spawn_blocking(move || { - let _ = Self::handle_fs_prefetch_request( - mgr2.clone(), - blob_cache, - req, - begin_time, - ); - drop(token) - }); - } - } - AsyncPrefetchMessage::Ping => { - let _ = mgr.ping_requests.fetch_add(1, Ordering::Relaxed); - } - AsyncPrefetchMessage::RateLimiter(_size) => {} - } - - mgr.prefetch_inflight.fetch_sub(1, Ordering::Relaxed); - } - } - - async fn handle_prefetch_rate_limit(&self, _msg: &AsyncPrefetchMessage) { - #[cfg(feature = "prefetch-rate-limit")] - // Allocate network bandwidth budget - if let Some(limiter) = &self.prefetch_limiter { - let size = match _msg { - AsyncPrefetchMessage::BlobPrefetch(blob_cache, _offset, size, _) => { - if blob_cache.is_prefetch_active() { - *size - } else { - 0 - } - } - AsyncPrefetchMessage::FsPrefetch(blob_cache, req, _) => { - if blob_cache.is_prefetch_active() { - req.blob_size - } else { - 0 - } - } - AsyncPrefetchMessage::Ping => 0, - AsyncPrefetchMessage::RateLimiter(size) => *size, - }; - - if size > 0 { - let size = (self.prefetch_consumed.swap(0, Ordering::AcqRel)) - .saturating_add(size as usize); - let max = limiter.max(); - let size = std::cmp::min(size, max.saturating_add(max)); - let cap = limiter.balance(); - if cap < size { - self.prefetch_delayed.fetch_add(1, Ordering::Relaxed); - } - limiter.acquire(size).await; - } - } - } - - fn handle_blob_prefetch_request( - mgr: Arc, - cache: Arc, - offset: u64, - size: u64, - begin_time: SystemTime, - ) -> Result<()> { - trace!( - "storage: prefetch blob {} offset {} size {}", - cache.blob_id(), - offset, - size - ); - if size == 0 { - return Ok(()); - } - - // Record how much prefetch data is requested from storage backend. - // So the average backend merged request size will be prefetch_data_amount/prefetch_requests_count. - // We can measure merging possibility by this. - let metrics = mgr.metrics.clone(); - metrics.prefetch_requests_count.inc(); - metrics.prefetch_data_amount.add(size); - - if let Some(obj) = cache.get_blob_object() { - if let Err(_e) = obj.fetch_range_compressed(offset, size, true) { - if mgr.retry_times.load(Ordering::Relaxed) > 0 { - mgr.retry_times.fetch_sub(1, Ordering::Relaxed); - ASYNC_RUNTIME.spawn(async move { - tokio::time::sleep(Duration::from_secs(1)).await; - let msg = - AsyncPrefetchMessage::new_blob_prefetch(cache.clone(), offset, size); - let _ = mgr.send_prefetch_message(msg); - }); - } - } - } else { - warn!("prefetch blob range is not supported"); - } - - metrics.calculate_prefetch_metrics(begin_time); - - Ok(()) - } - - // TODO: Nydus plans to switch backend storage IO stack to full asynchronous mode. - // But we can't make `handle_fs_prefetch_request` as async due to the fact that - // tokio doesn't allow dropping runtime in a non-blocking context. Otherwise, prefetch - // threads always panic in debug program profile. We can achieve the goal when - // backend/registry also switches to async IO. - fn handle_fs_prefetch_request( - mgr: Arc, - cache: Arc, - req: BlobIoRange, - begin_time: SystemTime, - ) -> Result<()> { - let blob_offset = req.blob_offset; - let blob_size = req.blob_size; - trace!( - "storage: prefetch fs data from blob {} offset {} size {}", - cache.blob_id(), - blob_offset, - blob_size - ); - if blob_size == 0 { - return Ok(()); - } - - // Record how much prefetch data is requested from storage backend. - // So the average backend merged request size will be prefetch_data_amount/prefetch_requests_count. - // We can measure merging possibility by this. - mgr.metrics.prefetch_requests_count.inc(); - mgr.metrics.prefetch_data_amount.add(blob_size); - - if let Some(obj) = cache.get_blob_object() { - obj.prefetch_chunks(&req)?; - } else { - cache.prefetch_range(&req)?; - } - - mgr.metrics.calculate_prefetch_metrics(begin_time); - - Ok(()) - } - - fn shrink_n(&self, n: u32) { - self.workers.fetch_sub(n, Ordering::Relaxed); - } - - fn grow_n(&self, n: u32) { - self.workers.fetch_add(n, Ordering::Relaxed); - } -} - -#[cfg(test)] -mod tests { - use super::*; - use vmm_sys_util::tempdir::TempDir; - - #[test] - fn test_worker_mgr_new() { - let tmpdir = TempDir::new().unwrap(); - let metrics = BlobcacheMetrics::new("test1", tmpdir.as_path().to_str().unwrap()); - let config = Arc::new(AsyncPrefetchConfig { - enable: true, - threads_count: 2, - batch_size: 0x100000, - bandwidth_limit: 0x100000, - }); - - let mgr = Arc::new(AsyncWorkerMgr::new(metrics, config).unwrap()); - AsyncWorkerMgr::start(mgr.clone()).unwrap(); - assert_eq!(mgr.ping_requests.load(Ordering::Acquire), 0); - assert!(mgr - .send_prefetch_message(AsyncPrefetchMessage::Ping) - .is_ok()); - assert!(mgr - .send_prefetch_message(AsyncPrefetchMessage::Ping) - .is_ok()); - assert!(mgr - .send_prefetch_message(AsyncPrefetchMessage::Ping) - .is_ok()); - assert!(mgr - .send_prefetch_message(AsyncPrefetchMessage::Ping) - .is_ok()); - assert!(mgr - .send_prefetch_message(AsyncPrefetchMessage::Ping) - .is_ok()); - thread::sleep(Duration::from_secs(1)); - assert_eq!(mgr.ping_requests.load(Ordering::Acquire), 5); - assert_eq!(mgr.workers.load(Ordering::Acquire), 2); - mgr.stop(); - assert_eq!(mgr.workers.load(Ordering::Acquire), 0); - assert!(mgr - .send_prefetch_message(AsyncPrefetchMessage::Ping) - .is_err()); - } - - #[cfg(feature = "prefetch-rate-limit")] - #[test] - fn test_worker_mgr_rate_limiter() { - let tmpdir = TempDir::new().unwrap(); - let metrics = BlobcacheMetrics::new("test1", tmpdir.as_path().to_str().unwrap()); - let config = Arc::new(AsyncPrefetchConfig { - enable: true, - threads_count: 4, - batch_size: 0x1000000, - bandwidth_limit: 0x1000000, - }); - - let mgr = Arc::new(AsyncWorkerMgr::new(metrics, config).unwrap()); - AsyncWorkerMgr::start(mgr.clone()).unwrap(); - - assert_eq!(mgr.prefetch_delayed.load(Ordering::Acquire), 0); - assert_eq!(mgr.prefetch_inflight.load(Ordering::Acquire), 0); - - thread::sleep(Duration::from_secs(1)); - assert!(mgr - .send_prefetch_message(AsyncPrefetchMessage::RateLimiter(1)) - .is_ok()); - assert!(mgr - .send_prefetch_message(AsyncPrefetchMessage::RateLimiter(1)) - .is_ok()); - thread::sleep(Duration::from_secs(1)); - assert_eq!(mgr.prefetch_delayed.load(Ordering::Acquire), 0); - assert_eq!(mgr.prefetch_inflight.load(Ordering::Acquire), 0); - - assert!(mgr - .send_prefetch_message(AsyncPrefetchMessage::RateLimiter(0x1000000)) - .is_ok()); - assert!(mgr - .send_prefetch_message(AsyncPrefetchMessage::RateLimiter(0x1000000)) - .is_ok()); - assert!(mgr - .send_prefetch_message(AsyncPrefetchMessage::RateLimiter(u64::MAX)) - .is_ok()); - assert_eq!(mgr.prefetch_inflight.load(Ordering::Acquire), 3); - thread::sleep(Duration::from_secs(1)); - assert!(mgr.prefetch_inflight.load(Ordering::Acquire) <= 2); - assert!(mgr.prefetch_inflight.load(Ordering::Acquire) >= 1); - thread::sleep(Duration::from_secs(3)); - assert!(mgr.prefetch_inflight.load(Ordering::Acquire) >= 1); - assert!(mgr.prefetch_delayed.load(Ordering::Acquire) >= 1); - - mgr.stop(); - assert_eq!(mgr.workers.load(Ordering::Acquire), 0); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2021-2022 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::io::Result; +use std::sync::atomic::{AtomicBool, AtomicI32, AtomicU32, AtomicU64, AtomicUsize, Ordering}; +use std::sync::{Arc, Once}; +use std::thread; +use std::time::{Duration, SystemTime}; + +use nydus_api::PrefetchConfigV2; +use nydus_utils::async_helper::with_runtime; +use nydus_utils::metrics::{BlobcacheMetrics, Metric}; +use nydus_utils::mpmc::Channel; +use tokio::runtime::Runtime; +use tokio::sync::Semaphore; + +use crate::cache::{BlobCache, BlobIoRange}; +use crate::factory::ASYNC_RUNTIME; + +/// Configuration information for asynchronous workers. +pub(crate) struct AsyncPrefetchConfig { + /// Whether or not to enable prefetch. + pub enable: bool, + /// Number of working threads. + pub threads_count: usize, + /// The amplify batch size to prefetch data from backend. + pub batch_size: usize, + /// Network bandwidth for prefetch, in unit of Bytes and Zero means no rate limit is set. + #[allow(unused)] + pub bandwidth_limit: u32, +} + +impl From<&PrefetchConfigV2> for AsyncPrefetchConfig { + fn from(p: &PrefetchConfigV2) -> Self { + AsyncPrefetchConfig { + enable: p.enable, + threads_count: p.threads_count, + batch_size: p.batch_size, + bandwidth_limit: p.bandwidth_limit, + } + } +} + +/// Asynchronous service request message. +pub(crate) enum AsyncPrefetchMessage { + /// Asynchronous blob layer prefetch request with (offset, size) of blob on storage backend. + BlobPrefetch(Arc, u64, u64, SystemTime), + /// Asynchronous file-system layer prefetch request. + FsPrefetch(Arc, BlobIoRange, SystemTime), + #[cfg_attr(not(test), allow(unused))] + /// Ping for test. + Ping, + #[allow(unused)] + RateLimiter(u64), +} + +impl AsyncPrefetchMessage { + /// Create a new asynchronous filesystem prefetch request message. + pub fn new_fs_prefetch(blob_cache: Arc, req: BlobIoRange) -> Self { + AsyncPrefetchMessage::FsPrefetch(blob_cache, req, SystemTime::now()) + } + + /// Create a new asynchronous blob prefetch request message. + pub fn new_blob_prefetch(blob_cache: Arc, offset: u64, size: u64) -> Self { + AsyncPrefetchMessage::BlobPrefetch(blob_cache, offset, size, SystemTime::now()) + } +} + +/// An asynchronous task manager for data prefetching +pub(crate) struct AsyncWorkerMgr { + metrics: Arc, + ping_requests: AtomicU32, + workers: AtomicU32, + active: AtomicBool, + begin_timing_once: Once, + + // Limit the total retry times to avoid unnecessary resource consumption. + retry_times: AtomicI32, + + prefetch_sema: Arc, + prefetch_channel: Arc>, + prefetch_config: Arc, + #[allow(unused)] + prefetch_delayed: AtomicU64, + prefetch_inflight: AtomicU32, + prefetch_consumed: AtomicUsize, + #[cfg(feature = "prefetch-rate-limit")] + prefetch_limiter: Option>, +} + +impl AsyncWorkerMgr { + /// Create a new instance of `AsyncWorkerMgr`. + pub fn new( + metrics: Arc, + prefetch_config: Arc, + ) -> Result { + #[cfg(feature = "prefetch-rate-limit")] + let prefetch_limiter = match prefetch_config.bandwidth_limit { + 0 => None, + v => { + // If the given value is less than maximum blob chunk size, it exceeds burst size of the + // limiter ending up with throttling all throughput, so ensure bandwidth is bigger than + // the maximum chunk size. + let limit = std::cmp::max(crate::RAFS_MAX_CHUNK_SIZE as usize, v as usize); + let limiter = leaky_bucket::RateLimiter::builder() + .initial(limit) + .refill(limit / 10) + .interval(Duration::from_millis(100)) + .build(); + Some(Arc::new(limiter)) + } + }; + + Ok(AsyncWorkerMgr { + metrics, + ping_requests: AtomicU32::new(0), + workers: AtomicU32::new(0), + active: AtomicBool::new(false), + begin_timing_once: Once::new(), + + retry_times: AtomicI32::new(32), + + prefetch_sema: Arc::new(Semaphore::new(0)), + prefetch_channel: Arc::new(Channel::new()), + prefetch_config, + prefetch_delayed: AtomicU64::new(0), + prefetch_inflight: AtomicU32::new(0), + prefetch_consumed: AtomicUsize::new(0), + #[cfg(feature = "prefetch-rate-limit")] + prefetch_limiter, + }) + } + + /// Create working threads and start the event loop. + pub fn start(mgr: Arc) -> Result<()> { + if mgr.prefetch_config.enable { + Self::start_prefetch_workers(mgr)?; + } + + Ok(()) + } + + /// Stop all working threads. + pub fn stop(&self) { + if self + .active + .compare_exchange(true, false, Ordering::AcqRel, Ordering::Relaxed) + .is_err() + { + return; + } + self.prefetch_channel.close(); + + while self.workers.load(Ordering::Relaxed) > 0 { + self.prefetch_channel.notify_waiters(); + thread::sleep(Duration::from_millis(10)); + } + } + + /// Send an asynchronous service request message to the workers. + pub fn send_prefetch_message( + &self, + msg: AsyncPrefetchMessage, + ) -> std::result::Result<(), AsyncPrefetchMessage> { + if !self.prefetch_config.enable { + Err(msg) + } else { + self.prefetch_inflight.fetch_add(1, Ordering::Relaxed); + self.prefetch_channel.send(msg) + } + } + + /// Flush pending prefetch requests associated with `blob_id`. + pub fn flush_pending_prefetch_requests(&self, blob_id: &str) { + self.prefetch_channel + .flush_pending_prefetch_requests(|t| match t { + AsyncPrefetchMessage::BlobPrefetch(blob, _, _, _) => { + blob_id == blob.blob_id() && !blob.is_prefetch_active() + } + AsyncPrefetchMessage::FsPrefetch(blob, _, _) => { + blob_id == blob.blob_id() && !blob.is_prefetch_active() + } + _ => false, + }); + } + + /// Consume network bandwidth budget for prefetching. + pub fn consume_prefetch_budget(&self, size: u64) { + if self.prefetch_inflight.load(Ordering::Relaxed) > 0 { + self.prefetch_consumed + .fetch_add(size as usize, Ordering::AcqRel); + } + } + + fn start_prefetch_workers(mgr: Arc) -> Result<()> { + // Hold the request queue to barrier all working threads. + let guard = mgr.prefetch_channel.lock_channel(); + for num in 0..mgr.prefetch_config.threads_count { + let mgr2 = mgr.clone(); + let res = thread::Builder::new() + .name(format!("nydus_storage_worker_{}", num)) + .spawn(move || { + mgr2.grow_n(1); + mgr2.metrics + .prefetch_workers + .fetch_add(1, Ordering::Relaxed); + + with_runtime(|rt| { + rt.block_on(Self::handle_prefetch_requests(mgr2.clone(), rt)); + }); + + mgr2.metrics + .prefetch_workers + .fetch_sub(1, Ordering::Relaxed); + mgr2.shrink_n(1); + info!("storage: worker thread {} exits.", num) + }); + + if let Err(e) = res { + error!("storage: failed to create worker thread, {:?}", e); + mgr.prefetch_channel.close(); + drop(guard); + mgr.stop(); + return Err(e); + } + } + mgr.active.store(true, Ordering::Release); + Ok(()) + } + + async fn handle_prefetch_requests(mgr: Arc, rt: &Runtime) { + mgr.begin_timing_once.call_once(|| { + let now = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap(); + + mgr.metrics.prefetch_begin_time_secs.set(now.as_secs()); + mgr.metrics + .prefetch_begin_time_millis + .set(now.subsec_millis() as u64); + }); + + // Max 1 active requests per thread. + mgr.prefetch_sema.add_permits(1); + + while let Ok(msg) = mgr.prefetch_channel.recv().await { + mgr.handle_prefetch_rate_limit(&msg).await; + let mgr2 = mgr.clone(); + + match msg { + AsyncPrefetchMessage::BlobPrefetch(blob_cache, offset, size, begin_time) => { + let token = Semaphore::acquire_owned(mgr2.prefetch_sema.clone()) + .await + .unwrap(); + if blob_cache.is_prefetch_active() { + rt.spawn_blocking(move || { + let _ = Self::handle_blob_prefetch_request( + mgr2.clone(), + blob_cache, + offset, + size, + begin_time, + ); + drop(token); + }); + } + } + AsyncPrefetchMessage::FsPrefetch(blob_cache, req, begin_time) => { + let token = Semaphore::acquire_owned(mgr2.prefetch_sema.clone()) + .await + .unwrap(); + + if blob_cache.is_prefetch_active() { + rt.spawn_blocking(move || { + let _ = Self::handle_fs_prefetch_request( + mgr2.clone(), + blob_cache, + req, + begin_time, + ); + drop(token) + }); + } + } + AsyncPrefetchMessage::Ping => { + let _ = mgr.ping_requests.fetch_add(1, Ordering::Relaxed); + } + AsyncPrefetchMessage::RateLimiter(_size) => {} + } + + mgr.prefetch_inflight.fetch_sub(1, Ordering::Relaxed); + } + } + + async fn handle_prefetch_rate_limit(&self, _msg: &AsyncPrefetchMessage) { + #[cfg(feature = "prefetch-rate-limit")] + // Allocate network bandwidth budget + if let Some(limiter) = &self.prefetch_limiter { + let size = match _msg { + AsyncPrefetchMessage::BlobPrefetch(blob_cache, _offset, size, _) => { + if blob_cache.is_prefetch_active() { + *size + } else { + 0 + } + } + AsyncPrefetchMessage::FsPrefetch(blob_cache, req, _) => { + if blob_cache.is_prefetch_active() { + req.blob_size + } else { + 0 + } + } + AsyncPrefetchMessage::Ping => 0, + AsyncPrefetchMessage::RateLimiter(size) => *size, + }; + + if size > 0 { + let size = (self.prefetch_consumed.swap(0, Ordering::AcqRel)) + .saturating_add(size as usize); + let max = limiter.max(); + let size = std::cmp::min(size, max.saturating_add(max)); + let cap = limiter.balance(); + if cap < size { + self.prefetch_delayed.fetch_add(1, Ordering::Relaxed); + } + limiter.acquire(size).await; + } + } + } + + fn handle_blob_prefetch_request( + mgr: Arc, + cache: Arc, + offset: u64, + size: u64, + begin_time: SystemTime, + ) -> Result<()> { + trace!( + "storage: prefetch blob {} offset {} size {}", + cache.blob_id(), + offset, + size + ); + if size == 0 { + return Ok(()); + } + + // Record how much prefetch data is requested from storage backend. + // So the average backend merged request size will be prefetch_data_amount/prefetch_requests_count. + // We can measure merging possibility by this. + let metrics = mgr.metrics.clone(); + metrics.prefetch_requests_count.inc(); + metrics.prefetch_data_amount.add(size); + + if let Some(obj) = cache.get_blob_object() { + if let Err(_e) = obj.fetch_range_compressed(offset, size, true) { + if mgr.retry_times.load(Ordering::Relaxed) > 0 { + mgr.retry_times.fetch_sub(1, Ordering::Relaxed); + ASYNC_RUNTIME.spawn(async move { + tokio::time::sleep(Duration::from_secs(1)).await; + let msg = + AsyncPrefetchMessage::new_blob_prefetch(cache.clone(), offset, size); + let _ = mgr.send_prefetch_message(msg); + }); + } + } + } else { + warn!("prefetch blob range is not supported"); + } + + metrics.calculate_prefetch_metrics(begin_time); + + Ok(()) + } + + // TODO: Nydus plans to switch backend storage IO stack to full asynchronous mode. + // But we can't make `handle_fs_prefetch_request` as async due to the fact that + // tokio doesn't allow dropping runtime in a non-blocking context. Otherwise, prefetch + // threads always panic in debug program profile. We can achieve the goal when + // backend/registry also switches to async IO. + fn handle_fs_prefetch_request( + mgr: Arc, + cache: Arc, + req: BlobIoRange, + begin_time: SystemTime, + ) -> Result<()> { + let blob_offset = req.blob_offset; + let blob_size = req.blob_size; + trace!( + "storage: prefetch fs data from blob {} offset {} size {}", + cache.blob_id(), + blob_offset, + blob_size + ); + if blob_size == 0 { + return Ok(()); + } + + // Record how much prefetch data is requested from storage backend. + // So the average backend merged request size will be prefetch_data_amount/prefetch_requests_count. + // We can measure merging possibility by this. + mgr.metrics.prefetch_requests_count.inc(); + mgr.metrics.prefetch_data_amount.add(blob_size); + + if let Some(obj) = cache.get_blob_object() { + obj.prefetch_chunks(&req)?; + } else { + cache.prefetch_range(&req)?; + } + + mgr.metrics.calculate_prefetch_metrics(begin_time); + + Ok(()) + } + + fn shrink_n(&self, n: u32) { + self.workers.fetch_sub(n, Ordering::Relaxed); + } + + fn grow_n(&self, n: u32) { + self.workers.fetch_add(n, Ordering::Relaxed); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use vmm_sys_util::tempdir::TempDir; + + #[test] + fn test_worker_mgr_new() { + let tmpdir = TempDir::new().unwrap(); + let metrics = BlobcacheMetrics::new("test1", tmpdir.as_path().to_str().unwrap()); + let config = Arc::new(AsyncPrefetchConfig { + enable: true, + threads_count: 2, + batch_size: 0x100000, + bandwidth_limit: 0x100000, + }); + + let mgr = Arc::new(AsyncWorkerMgr::new(metrics, config).unwrap()); + AsyncWorkerMgr::start(mgr.clone()).unwrap(); + assert_eq!(mgr.ping_requests.load(Ordering::Acquire), 0); + assert!(mgr + .send_prefetch_message(AsyncPrefetchMessage::Ping) + .is_ok()); + assert!(mgr + .send_prefetch_message(AsyncPrefetchMessage::Ping) + .is_ok()); + assert!(mgr + .send_prefetch_message(AsyncPrefetchMessage::Ping) + .is_ok()); + assert!(mgr + .send_prefetch_message(AsyncPrefetchMessage::Ping) + .is_ok()); + assert!(mgr + .send_prefetch_message(AsyncPrefetchMessage::Ping) + .is_ok()); + thread::sleep(Duration::from_secs(1)); + assert_eq!(mgr.ping_requests.load(Ordering::Acquire), 5); + assert_eq!(mgr.workers.load(Ordering::Acquire), 2); + mgr.stop(); + assert_eq!(mgr.workers.load(Ordering::Acquire), 0); + assert!(mgr + .send_prefetch_message(AsyncPrefetchMessage::Ping) + .is_err()); + } + + #[cfg(feature = "prefetch-rate-limit")] + #[test] + fn test_worker_mgr_rate_limiter() { + let tmpdir = TempDir::new().unwrap(); + let metrics = BlobcacheMetrics::new("test1", tmpdir.as_path().to_str().unwrap()); + let config = Arc::new(AsyncPrefetchConfig { + enable: true, + threads_count: 4, + batch_size: 0x1000000, + bandwidth_limit: 0x1000000, + }); + + let mgr = Arc::new(AsyncWorkerMgr::new(metrics, config).unwrap()); + AsyncWorkerMgr::start(mgr.clone()).unwrap(); + + assert_eq!(mgr.prefetch_delayed.load(Ordering::Acquire), 0); + assert_eq!(mgr.prefetch_inflight.load(Ordering::Acquire), 0); + + thread::sleep(Duration::from_secs(1)); + assert!(mgr + .send_prefetch_message(AsyncPrefetchMessage::RateLimiter(1)) + .is_ok()); + assert!(mgr + .send_prefetch_message(AsyncPrefetchMessage::RateLimiter(1)) + .is_ok()); + thread::sleep(Duration::from_secs(1)); + assert_eq!(mgr.prefetch_delayed.load(Ordering::Acquire), 0); + assert_eq!(mgr.prefetch_inflight.load(Ordering::Acquire), 0); + + assert!(mgr + .send_prefetch_message(AsyncPrefetchMessage::RateLimiter(0x1000000)) + .is_ok()); + assert!(mgr + .send_prefetch_message(AsyncPrefetchMessage::RateLimiter(0x1000000)) + .is_ok()); + assert!(mgr + .send_prefetch_message(AsyncPrefetchMessage::RateLimiter(u64::MAX)) + .is_ok()); + assert_eq!(mgr.prefetch_inflight.load(Ordering::Acquire), 3); + thread::sleep(Duration::from_secs(1)); + assert!(mgr.prefetch_inflight.load(Ordering::Acquire) <= 2); + assert!(mgr.prefetch_inflight.load(Ordering::Acquire) >= 1); + thread::sleep(Duration::from_secs(3)); + assert!(mgr.prefetch_inflight.load(Ordering::Acquire) >= 1); + assert!(mgr.prefetch_delayed.load(Ordering::Acquire) >= 1); + + mgr.stop(); + assert_eq!(mgr.workers.load(Ordering::Acquire), 0); + } +} diff --git a/storage/src/device.rs b/storage/src/device.rs index 6e6cbc15ed6..48620eefd81 100644 --- a/storage/src/device.rs +++ b/storage/src/device.rs @@ -1,1671 +1,1671 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Blob Storage Public Service APIs -//! -//! The core functionality of the nydus-storage crate is to serve blob IO request, mainly read chunk -//! data from blobs. This module provides public APIs and data structures for clients to issue blob -//! IO requests. The main traits and structs provided include: -//! - [BlobChunkInfo](trait.BlobChunkInfo.html): trait to provide basic information for a chunk. -//! - [BlobDevice](struct.BlobDevice.html): a wrapping object over a group of underlying [BlobCache] -//! object to serve blob data access requests. -//! - [BlobInfo](struct.BlobInfo.html): configuration information for a metadata/data blob object. -//! - [BlobIoChunk](enum.BlobIoChunk.html): an enumeration to encapsulate different [BlobChunkInfo] -//! implementations for [BlobIoDesc]. -//! - [BlobIoDesc](struct.BlobIoDesc.html): a blob IO descriptor, containing information for a -//! continuous IO range within a chunk. -//! - [BlobIoVec](struct.BlobIoVec.html): a scatter/gather list for blob IO operation, containing -//! one or more blob IO descriptors -//! - [BlobPrefetchRequest](struct.BlobPrefetchRequest.html): a blob data prefetching request. -use std::any::Any; -use std::collections::hash_map::Drain; -use std::collections::HashMap; -use std::convert::TryFrom; -use std::fmt::{Debug, Formatter}; -use std::fs::File; -use std::io::{self, Error}; -use std::ops::Deref; -use std::os::unix::io::AsRawFd; -use std::path::Path; -use std::sync::{Arc, Mutex}; - -use arc_swap::ArcSwap; -use fuse_backend_rs::api::filesystem::ZeroCopyWriter; -use fuse_backend_rs::file_buf::FileVolatileSlice; -use fuse_backend_rs::file_traits::FileReadWriteVolatile; - -use nydus_api::ConfigV2; -use nydus_utils::compress; -use nydus_utils::crypt::{self, Cipher, CipherContext}; -use nydus_utils::digest::{self, RafsDigest}; - -use crate::cache::BlobCache; -use crate::factory::BLOB_FACTORY; - -pub(crate) const BLOB_FEATURE_INCOMPAT_MASK: u32 = 0x0000_ffff; -pub(crate) const BLOB_FEATURE_INCOMPAT_VALUE: u32 = 0x0000_0fff; - -bitflags! { - /// Features bits for blob management. - pub struct BlobFeatures: u32 { - /// Uncompressed chunk data is aligned. - const ALIGNED = 0x0000_0001; - /// RAFS meta data is inlined in the data blob. - const INLINED_FS_META = 0x0000_0002; - /// Blob chunk information format v2. - const CHUNK_INFO_V2 = 0x0000_0004; - /// Blob compression information data include context data for zlib random access. - const ZRAN = 0x0000_0008; - /// Blob data and blob meta are stored in separate blobs. - const SEPARATE = 0x0000_0010; - /// Chunk digest array is inlined in the data blob. - const INLINED_CHUNK_DIGEST = 0x0000_0020; - /// Blob is for RAFS filesystems in TARFS mode. - const TARFS = 0x0000_0040; - /// Small file chunk are merged into batch chunk. - const BATCH = 0x0000_0080; - /// Whether the Blob is encrypted. - const ENCRYPTED = 0x0000_0100; - /// Blob has TAR headers to separate contents. - const HAS_TAR_HEADER = 0x1000_0000; - /// Blob has Table of Content (ToC) at the tail. - const HAS_TOC = 0x2000_0000; - /// Data blob are encoded with Tar header and optionally ToC. - /// It's also a flag indicating that images are generated with `nydus-image` v2.2 or newer. - const CAP_TAR_TOC = 0x4000_0000; - /// Rafs V5 image without extended blob table, this is an internal flag. - const _V5_NO_EXT_BLOB_TABLE = 0x8000_0000; - /// Blob is generated with chunkdict. - const IS_CHUNKDICT_GENERATED = 0x0000_0200; - } -} - -impl Default for BlobFeatures { - fn default() -> Self { - BlobFeatures::empty() - } -} - -impl BlobFeatures { - /// Check whether the blob is for RAFS filesystems in TARFS mode. - pub fn is_tarfs(&self) -> bool { - self.contains(BlobFeatures::CAP_TAR_TOC) && self.contains(BlobFeatures::TARFS) - } -} - -impl TryFrom for BlobFeatures { - type Error = Error; - - fn try_from(value: u32) -> Result { - if value & BLOB_FEATURE_INCOMPAT_MASK & !BLOB_FEATURE_INCOMPAT_VALUE != 0 - || value & BlobFeatures::_V5_NO_EXT_BLOB_TABLE.bits() != 0 - { - Err(einval!(format!("invalid blob features: 0x{:x}", value))) - } else { - // Safe because we have just validated feature flags. - Ok(unsafe { BlobFeatures::from_bits_unchecked(value) }) - } - } -} - -/// Configuration information for a metadata/data blob object. -/// -/// The `BlobInfo` structure provides information for the storage subsystem to manage a blob file -/// and serve blob IO requests for clients. -#[derive(Clone, Debug, Default)] -pub struct BlobInfo { - /// The index of blob in RAFS blob table. - blob_index: u32, - /// A sha256 hex string generally. - blob_id: String, - /// Feature bits for blob management. - blob_features: BlobFeatures, - /// Size of the compressed blob file. - compressed_size: u64, - /// Size of the uncompressed blob file, or the cache file. - uncompressed_size: u64, - /// Chunk size. - chunk_size: u32, - /// Number of chunks in blob file. - /// A helper to distinguish bootstrap with extended blob table or not: - /// Bootstrap with extended blob table always has non-zero `chunk_count` - chunk_count: u32, - /// Compression algorithm to process the blob. - compressor: compress::Algorithm, - /// Chunk data encryption algorithm. - cipher: crypt::Algorithm, - /// Message digest algorithm to process the blob. - digester: digest::Algorithm, - /// Starting offset of the data to prefetch. - prefetch_offset: u32, - /// Size of blob data to prefetch. - prefetch_size: u32, - /// The blob is for a legacy estargz image. - is_legacy_stargz: bool, - - /// V6: compressor that is used for compressing chunk info array. - meta_ci_compressor: u32, - /// V6: Offset of the chunk information array in the compressed blob. - meta_ci_offset: u64, - /// V6: Size of the compressed chunk information array. - meta_ci_compressed_size: u64, - /// V6: Size of the uncompressed chunk information array. - meta_ci_uncompressed_size: u64, - - // SHA256 digest of blob ToC content, including the toc tar header. - // It's all zero for blobs with inlined-meta. - blob_toc_digest: [u8; 32], - // SHA256 digest of RAFS blob for ZRAN, containing `blob.meta`, `blob.digest` `blob.toc` and - // optionally 'image.boot`. It's all zero for ZRAN blobs with inlined-meta, so need special - // handling. - blob_meta_digest: [u8; 32], - // Size of RAFS blob for ZRAN. It's zero ZRAN blobs with inlined-meta. - blob_meta_size: u64, - // Size of blob ToC content, it's zero for blobs with inlined-meta. - blob_toc_size: u32, - - /// V6: support fs-cache mode - fs_cache_file: Option>, - /// V6: support inlined-meta - meta_path: Arc>, - /// V6: support data encryption. - cipher_object: Arc, - /// Cipher context for encryption. - cipher_ctx: Option, - - /// is chunkdict generated - is_chunkdict_generated: bool, -} - -impl BlobInfo { - /// Create a new instance of `BlobInfo`. - pub fn new( - blob_index: u32, - blob_id: String, - uncompressed_size: u64, - compressed_size: u64, - chunk_size: u32, - chunk_count: u32, - blob_features: BlobFeatures, - ) -> Self { - let blob_id = blob_id.trim_end_matches('\0').to_string(); - let mut blob_info = BlobInfo { - blob_index, - blob_id, - blob_features, - uncompressed_size, - compressed_size, - chunk_size, - chunk_count, - - compressor: compress::Algorithm::None, - cipher: crypt::Algorithm::None, - digester: digest::Algorithm::Blake3, - prefetch_offset: 0, - prefetch_size: 0, - is_legacy_stargz: false, - meta_ci_compressor: 0, - meta_ci_offset: 0, - meta_ci_compressed_size: 0, - meta_ci_uncompressed_size: 0, - - blob_toc_digest: [0u8; 32], - blob_meta_digest: [0u8; 32], - blob_meta_size: 0, - blob_toc_size: 0, - - fs_cache_file: None, - meta_path: Arc::new(Mutex::new(String::new())), - cipher_object: Default::default(), - cipher_ctx: None, - - is_chunkdict_generated: false, - }; - - blob_info.compute_features(); - - blob_info - } - - /// Set the is_chunkdict_generated flag. - pub fn set_chunkdict_generated(&mut self, is_chunkdict_generated: bool) { - self.is_chunkdict_generated = is_chunkdict_generated; - } - - /// Get the is_chunkdict_generated flag. - pub fn is_chunkdict_generated(&self) -> bool { - self.is_chunkdict_generated - } - - /// Get the blob index in the blob array. - pub fn blob_index(&self) -> u32 { - self.blob_index - } - - /// Get the id of the blob, with special handling of `inlined-meta` case. - pub fn blob_id(&self) -> String { - if (self.has_feature(BlobFeatures::INLINED_FS_META) - && !self.has_feature(BlobFeatures::SEPARATE)) - || !self.has_feature(BlobFeatures::CAP_TAR_TOC) - { - let guard = self.meta_path.lock().unwrap(); - if !guard.is_empty() { - return guard.deref().clone(); - } - } - self.blob_id.clone() - } - - /// Get raw blob id, without special handling of `inlined-meta` case. - pub fn raw_blob_id(&self) -> &str { - &self.blob_id - } - - /// Get size of compressed chunk data, not including `blob.meta`, `blob.chunk`, `toc` etc. - pub fn compressed_data_size(&self) -> u64 { - if self.has_feature(BlobFeatures::SEPARATE) { - // It's the size of referenced OCIv1 targz blob. - self.compressed_size - } else if self.has_feature(BlobFeatures::CAP_TAR_TOC) { - // Image built with nydus 2.2 and newer versions. - if self.meta_ci_is_valid() { - // For RAFS v6 - if self.has_feature(BlobFeatures::HAS_TAR_HEADER) { - // There's a tar header between chunk data and compression information. - self.meta_ci_offset - 0x200 - } else { - self.meta_ci_offset - } - } else { - // For RAFS v5 - if self.has_feature(BlobFeatures::HAS_TAR_HEADER) { - // There's a tar header between chunk data and fs meta data. - self.compressed_size - 0x200 - } else { - self.compressed_size - } - } - } else { - // Images built with nydus 2.1 and previous versions. - self.compressed_size - } - } - - /// Get size of the compressed blob, including `blob.meta`, `blob.chunk`, `toc` etc. - pub fn compressed_size(&self) -> u64 { - self.compressed_size - } - - /// Get size of the uncompressed blob. - pub fn uncompressed_size(&self) -> u64 { - self.uncompressed_size - } - - /// Get chunk size. - pub fn chunk_size(&self) -> u32 { - self.chunk_size - } - - /// Get number of chunks in the blob. - pub fn chunk_count(&self) -> u32 { - self.chunk_count - } - - /// Get the compression algorithm to handle the blob data. - pub fn compressor(&self) -> compress::Algorithm { - self.compressor - } - - /// Set compression algorithm for the blob. - pub fn set_compressor(&mut self, compressor: compress::Algorithm) { - self.compressor = compressor; - self.compute_features(); - } - - /// Get the cipher algorithm to handle chunk data. - pub fn cipher(&self) -> crypt::Algorithm { - self.cipher - } - - /// Set encryption algorithm for the blob. - pub fn set_cipher(&mut self, cipher: crypt::Algorithm) { - self.cipher = cipher; - } - - /// Get the cipher object to encrypt/decrypt chunk data. - pub fn cipher_object(&self) -> Arc { - self.cipher_object.clone() - } - - /// Get the cipher context. - pub fn cipher_context(&self) -> Option { - self.cipher_ctx.clone() - } - - /// Set the cipher info, including cipher algo, cipher object and cipher context. - pub fn set_cipher_info( - &mut self, - cipher: crypt::Algorithm, - cipher_object: Arc, - cipher_ctx: Option, - ) { - self.cipher = cipher; - self.cipher_object = cipher_object; - self.cipher_ctx = cipher_ctx; - } - - /// Get the message digest algorithm for the blob. - pub fn digester(&self) -> digest::Algorithm { - self.digester - } - - /// Set compression algorithm for the blob. - pub fn set_digester(&mut self, digester: digest::Algorithm) { - self.digester = digester; - } - - /// Get blob data prefetching offset. - pub fn prefetch_offset(&self) -> u64 { - self.prefetch_offset as u64 - } - - /// Get blob data prefetching offset. - pub fn prefetch_size(&self) -> u64 { - self.prefetch_size as u64 - } - - /// Set a range for blob data prefetching. - /// - /// Only one range could be configured per blob, and zero prefetch_size means disabling blob - /// data prefetching. - pub fn set_prefetch_info(&mut self, offset: u64, size: u64) { - self.prefetch_offset = offset as u32; - self.prefetch_size = size as u32; - } - - /// Check whether this blob is for an stargz image. - pub fn is_legacy_stargz(&self) -> bool { - self.is_legacy_stargz - } - - /// Set metadata information for a blob. - /// - /// The compressed blobs are laid out as: - /// `[compressed chunk data], [compressed metadata], [uncompressed header]`. - pub fn set_blob_meta_info( - &mut self, - offset: u64, - compressed_size: u64, - uncompressed_size: u64, - compressor: u32, - ) { - self.meta_ci_compressor = compressor; - self.meta_ci_offset = offset; - self.meta_ci_compressed_size = compressed_size; - self.meta_ci_uncompressed_size = uncompressed_size; - } - - /// Get compression algorithm for chunk information array. - pub fn meta_ci_compressor(&self) -> compress::Algorithm { - if self.meta_ci_compressor == compress::Algorithm::Lz4Block as u32 { - compress::Algorithm::Lz4Block - } else if self.meta_ci_compressor == compress::Algorithm::GZip as u32 { - compress::Algorithm::GZip - } else if self.meta_ci_compressor == compress::Algorithm::Zstd as u32 { - compress::Algorithm::Zstd - } else { - compress::Algorithm::None - } - } - - /// Get offset of chunk information array in the compressed blob. - pub fn meta_ci_offset(&self) -> u64 { - self.meta_ci_offset - } - - /// Get size of the compressed chunk information array. - pub fn meta_ci_compressed_size(&self) -> u64 { - self.meta_ci_compressed_size - } - - /// Get the uncompressed size of the chunk information array. - pub fn meta_ci_uncompressed_size(&self) -> u64 { - self.meta_ci_uncompressed_size - } - - /// Check whether compression metadata is available. - pub fn meta_ci_is_valid(&self) -> bool { - self.meta_ci_compressed_size != 0 && self.meta_ci_uncompressed_size != 0 - } - - /// Set the associated `File` object provided by Linux fscache subsystem. - pub fn set_fscache_file(&mut self, file: Option>) { - self.fs_cache_file = file; - } - - #[cfg(target_os = "linux")] - /// Get the associated `File` object provided by Linux fscache subsystem. - pub(crate) fn get_fscache_file(&self) -> Option> { - self.fs_cache_file.clone() - } - - /// Get blob features. - pub fn features(&self) -> BlobFeatures { - self.blob_features - } - - /// Check whether the requested features are available. - pub fn has_feature(&self, features: BlobFeatures) -> bool { - self.blob_features.bits() & features.bits() == features.bits() - } - - /// Generate feature flags according to blob configuration. - fn compute_features(&mut self) { - if self.chunk_count == 0 { - self.blob_features |= BlobFeatures::_V5_NO_EXT_BLOB_TABLE; - } - if self.compressor == compress::Algorithm::GZip - && !self.has_feature(BlobFeatures::CHUNK_INFO_V2) - { - self.is_legacy_stargz = true; - } - } - - /// Get SHA256 digest of the ToC content, including the toc tar header. - /// - /// It's all zero for inlined bootstrap. - pub fn blob_toc_digest(&self) -> &[u8; 32] { - &self.blob_toc_digest - } - - /// Set SHA256 digest of the ToC content, including the toc tar header. - pub fn set_blob_toc_digest(&mut self, digest: [u8; 32]) { - self.blob_toc_digest = digest; - } - - /// Get size of the ToC content. It's all zero for inlined bootstrap. - pub fn blob_toc_size(&self) -> u32 { - self.blob_toc_size - } - - /// Set size of the ToC content. - pub fn set_blob_toc_size(&mut self, sz: u32) { - self.blob_toc_size = sz; - } - - /// The RAFS blob contains `blob.meta`, `blob.digest`, `image.boot`, `ToC` etc. - /// Get SHA256 digest of RAFS blob containing `blob.meta`, `blob.digest` `blob.toc` and - /// optionally 'image.boot`. - /// - /// Default to `self.blob_id` when it's all zero. - pub fn blob_meta_digest(&self) -> &[u8; 32] { - &self.blob_meta_digest - } - - /// Set SHA256 digest of the RAFS blob. - pub fn set_blob_meta_digest(&mut self, digest: [u8; 32]) { - self.blob_meta_digest = digest; - } - - /// Get size of the RAFS blob. - pub fn blob_meta_size(&self) -> u64 { - self.blob_meta_size - } - - /// Set size of the RAFS blob. - pub fn set_blob_meta_size(&mut self, size: u64) { - self.blob_meta_size = size; - } - - /// Set path for meta blob file, which will be used by `get_blob_id()` and `get_blob_meta_id()`. - pub fn set_blob_id_from_meta_path(&self, path: &Path) -> Result<(), Error> { - *self.meta_path.lock().unwrap() = Self::get_blob_id_from_meta_path(path)?; - Ok(()) - } - - pub fn get_blob_id_from_meta_path(path: &Path) -> Result { - // Manual implementation of Path::file_prefix(). - let mut id = path.file_name().ok_or_else(|| { - einval!(format!( - "failed to get blob id from meta file path {}", - path.display() - )) - })?; - loop { - let id1 = Path::new(id).file_stem().ok_or_else(|| { - einval!(format!( - "failed to get blob id from meta file path {}", - path.display() - )) - })?; - if id1.is_empty() { - return Err(einval!(format!( - "failed to get blob id from meta file path {}", - path.display() - ))); - } else if id == id1 { - break; - } else { - id = id1; - } - } - let id = id.to_str().ok_or_else(|| { - einval!(format!( - "failed to get blob id from meta file path {}", - path.display() - )) - })?; - - Ok(id.to_string()) - } - - /// Get RAFS blob id for ZRan. - pub fn get_blob_meta_id(&self) -> Result { - assert!(self.has_feature(BlobFeatures::SEPARATE)); - let id = if self.has_feature(BlobFeatures::INLINED_FS_META) { - let guard = self.meta_path.lock().unwrap(); - if guard.is_empty() { - return Err(einval!("failed to get blob id from meta file name")); - } - guard.deref().clone() - } else { - hex::encode(self.blob_meta_digest) - }; - Ok(id) - } - - /// Get the cipher info, including cipher algo, cipher object and cipher context. - pub fn get_cipher_info(&self) -> (crypt::Algorithm, Arc, Option) { - ( - self.cipher, - self.cipher_object.clone(), - self.cipher_ctx.clone(), - ) - } -} - -bitflags! { - /// Blob chunk flags. - pub struct BlobChunkFlags: u32 { - /// Chunk data is compressed. - const COMPRESSED = 0x0000_0001; - /// Chunk is a hole, with all data as zero. - const _HOLECHUNK = 0x0000_0002; - /// Chunk data is encrypted. - const ENCYPTED = 0x0000_0004; - /// Chunk data is merged into a batch chunk. - const BATCH = 0x0000_0008; - } -} - -impl Default for BlobChunkFlags { - fn default() -> Self { - BlobChunkFlags::empty() - } -} - -/// Trait to provide basic information for a chunk. -/// -/// A `BlobChunkInfo` object describes how a chunk is located within the compressed and -/// uncompressed data blobs. It's used to help the storage subsystem to: -/// - download chunks from storage backend -/// - maintain chunk readiness state for each chunk -/// - convert from compressed form to uncompressed form -/// -/// This trait may be extended to provide additional information for a specific Rafs filesystem -/// version, for example `BlobV5ChunkInfo` provides Rafs v5 filesystem related information about -/// a chunk. -pub trait BlobChunkInfo: Any + Sync + Send { - /// Get the message digest value of the chunk, which acts as an identifier for the chunk. - fn chunk_id(&self) -> &RafsDigest; - - /// Get a unique ID to identify the chunk within the metadata/data blob. - /// - /// The returned value of `id()` is often been used as HashMap keys, so `id()` method should - /// return unique identifier for each chunk of a blob file. - fn id(&self) -> u32; - - /// Get the blob index of the blob file in the Rafs v5 metadata's blob array. - fn blob_index(&self) -> u32; - - /// Get the chunk offset in the compressed blob. - fn compressed_offset(&self) -> u64; - - /// Get the size of the compressed chunk. - fn compressed_size(&self) -> u32; - - /// Get end of the chunk in the compressed blob. - fn compressed_end(&self) -> u64 { - self.compressed_offset() + self.compressed_size() as u64 - } - - /// Get the chunk offset in the uncompressed blob. - fn uncompressed_offset(&self) -> u64; - - /// Get the size of the uncompressed chunk. - fn uncompressed_size(&self) -> u32; - - /// Get end of the chunk in the compressed blob. - fn uncompressed_end(&self) -> u64 { - self.uncompressed_offset() + self.uncompressed_size() as u64 - } - - /// Check whether the chunk is batch chunk or not. - fn is_batch(&self) -> bool; - - /// Check whether the chunk is compressed or not. - /// - /// Some chunk may become bigger after compression, so plain data instead of compressed - /// data may be stored in the compressed data blob for those chunks. - fn is_compressed(&self) -> bool; - - /// Check whether the chunk is encrypted or not. - fn is_encrypted(&self) -> bool; - - fn as_any(&self) -> &dyn Any; -} - -/// An enumeration to encapsulate different [BlobChunkInfo] implementations for [BlobIoDesc]. -/// -/// This helps to feed unified IO description to storage subsystem from both rafs v6 and v5 since -/// rafs v6 have a different ChunkInfo definition on bootstrap. -#[derive(Clone)] -pub struct BlobIoChunk(Arc); - -impl From> for BlobIoChunk { - fn from(v: Arc) -> Self { - BlobIoChunk(v) - } -} - -impl BlobChunkInfo for BlobIoChunk { - fn chunk_id(&self) -> &RafsDigest { - self.0.chunk_id() - } - - fn id(&self) -> u32 { - self.0.id() - } - - fn blob_index(&self) -> u32 { - self.0.blob_index() - } - - fn compressed_offset(&self) -> u64 { - self.0.compressed_offset() - } - - fn compressed_size(&self) -> u32 { - self.0.compressed_size() - } - - fn uncompressed_offset(&self) -> u64 { - self.0.uncompressed_offset() - } - - fn uncompressed_size(&self) -> u32 { - self.0.uncompressed_size() - } - - fn is_batch(&self) -> bool { - self.0.is_batch() - } - - fn is_compressed(&self) -> bool { - self.0.is_compressed() - } - - fn is_encrypted(&self) -> bool { - self.0.is_encrypted() - } - - fn as_any(&self) -> &dyn Any { - self - } -} - -/// Blob IO descriptor, containing information for a continuous IO range within a chunk. -#[derive(Clone)] -pub struct BlobIoDesc { - /// The blob associated with the IO operation. - pub blob: Arc, - /// The chunk associated with the IO operation. - pub chunkinfo: BlobIoChunk, - /// Offset from start of the chunk for the IO operation. - pub offset: u32, - /// Size of the IO operation - pub size: u32, - /// Whether it's a user initiated IO, otherwise is a storage system internal IO. - /// - /// It might be initiated by user io amplification. With this flag, lower device - /// layer may choose how to prioritize the IO operation. - pub(crate) user_io: bool, -} - -impl BlobIoDesc { - /// Create a new blob IO descriptor. - pub fn new( - blob: Arc, - chunkinfo: BlobIoChunk, - offset: u32, - size: u32, - user_io: bool, - ) -> Self { - BlobIoDesc { - blob, - chunkinfo, - offset, - size, - user_io, - } - } - - /// Check whether the `other` BlobIoDesc is continuous to current one. - pub fn is_continuous(&self, next: &BlobIoDesc, max_gap: u64) -> bool { - let prev_end = self.chunkinfo.compressed_offset() + self.chunkinfo.compressed_size() as u64; - let next_offset = next.chunkinfo.compressed_offset(); - - if self.chunkinfo.is_batch() || next.chunkinfo.is_batch() { - // Batch chunk can only be compared by uncompressed info. - return next.chunkinfo.uncompressed_offset() - self.chunkinfo.uncompressed_end() - <= max_gap; - } - - if self.chunkinfo.blob_index() == next.chunkinfo.blob_index() && next_offset >= prev_end { - if next.blob.is_legacy_stargz() { - next_offset - prev_end <= max_gap * 8 - } else { - next_offset - prev_end <= max_gap - } - } else { - false - } - } -} - -impl Debug for BlobIoDesc { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - f.debug_struct("BlobIoDesc") - .field("blob_index", &self.blob.blob_index) - .field("chunk_index", &self.chunkinfo.id()) - .field("compressed_offset", &self.chunkinfo.compressed_offset()) - .field("file_offset", &self.offset) - .field("size", &self.size) - .field("user", &self.user_io) - .finish() - } -} - -/// Scatter/gather list for blob IO operation, containing zero or more blob IO descriptors -pub struct BlobIoVec { - /// The blob associated with the IO operation. - bi_blob: Arc, - /// Total size of blob IOs to be performed. - bi_size: u64, - /// Array of blob IOs, these IOs should be executed sequentially. - pub(crate) bi_vec: Vec, -} - -impl BlobIoVec { - /// Create a new blob IO scatter/gather list object. - pub fn new(bi_blob: Arc) -> Self { - BlobIoVec { - bi_blob, - bi_size: 0, - bi_vec: Vec::with_capacity(128), - } - } - - /// Add a new 'BlobIoDesc' to the 'BlobIoVec'. - pub fn push(&mut self, desc: BlobIoDesc) { - assert_eq!(self.bi_blob.blob_index(), desc.blob.blob_index()); - assert_eq!(self.bi_blob.blob_id(), desc.blob.blob_id()); - assert!(self.bi_size.checked_add(desc.size as u64).is_some()); - self.bi_size += desc.size as u64; - self.bi_vec.push(desc); - } - - /// Append another blob io vector to current one. - pub fn append(&mut self, mut vec: BlobIoVec) { - assert_eq!(self.bi_blob.blob_id(), vec.bi_blob.blob_id()); - assert!(self.bi_size.checked_add(vec.bi_size).is_some()); - self.bi_vec.append(vec.bi_vec.as_mut()); - self.bi_size += vec.bi_size; - } - - /// Reset the blob io vector. - pub fn reset(&mut self) { - self.bi_size = 0; - self.bi_vec.truncate(0); - } - - /// Get number of 'BlobIoDesc' in the 'BlobIoVec'. - pub fn len(&self) -> usize { - self.bi_vec.len() - } - - /// Check whether there's 'BlobIoDesc' in the 'BlobIoVec'. - pub fn is_empty(&self) -> bool { - self.bi_vec.is_empty() - } - - /// Get size of pending IO data. - pub fn size(&self) -> u64 { - self.bi_size - } - - /// Get an immutable reference to a `BlobIoDesc` entry. - pub fn blob_io_desc(&self, index: usize) -> Option<&BlobIoDesc> { - if index < self.bi_vec.len() { - Some(&self.bi_vec[index]) - } else { - None - } - } - - /// Get the target blob index of the blob io vector. - pub fn blob_index(&self) -> u32 { - self.bi_blob.blob_index() - } - - /// Check whether the blob io vector is targeting the blob with `blob_index` - pub fn is_target_blob(&self, blob_index: u32) -> bool { - self.bi_blob.blob_index() == blob_index - } - - /// Check whether two blob io vector targets the same blob. - pub fn has_same_blob(&self, desc: &BlobIoVec) -> bool { - self.bi_blob.blob_index() == desc.bi_blob.blob_index() - } -} - -impl Debug for BlobIoVec { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - f.debug_struct("BlobIoDesc") - .field("blob_index", &self.bi_blob.blob_index) - .field("size", &self.bi_size) - .field("decriptors", &self.bi_vec) - .finish() - } -} - -/// Helper structure to merge blob IOs to reduce IO requests. -#[derive(Default)] -pub struct BlobIoMerge { - map: HashMap, - current: String, -} - -impl BlobIoMerge { - /// Append an `BlobIoVec` object to the merge state object. - pub fn append(&mut self, desc: BlobIoVec) { - if !desc.is_empty() { - let id = desc.bi_blob.blob_id.as_str(); - if self.current != id { - self.current = id.to_string(); - } - if let Some(prev) = self.map.get_mut(id) { - prev.append(desc); - } else { - self.map.insert(id.to_string(), desc); - } - } - } - - /// Drain elements in the cache. - pub fn drain(&mut self) -> Drain<'_, String, BlobIoVec> { - self.map.drain() - } - - /// Get current element. - pub fn get_current_element(&mut self) -> Option<&mut BlobIoVec> { - self.map.get_mut(&self.current) - } -} - -/// A segment representing a continuous range for a blob IO operation. -/// -/// It can span multiple chunks while the `offset` is where the user io starts -/// within the first chunk and `len` is the total user io length of these chunks. -#[derive(Clone, Debug, Default)] -pub(crate) struct BlobIoSegment { - /// Start position of the range within the chunk - pub offset: u32, - /// Size of the range within the chunk - pub len: u32, -} - -impl BlobIoSegment { - /// Create a new instance of `ChunkSegment`. - pub fn new(offset: u32, len: u32) -> Self { - Self { offset, len } - } - - #[inline] - pub fn append(&mut self, offset: u32, len: u32) { - assert!(offset.checked_add(len).is_some()); - assert_eq!(offset, 0); - - self.len += len; - } - - pub fn is_empty(&self) -> bool { - self.offset == 0 && self.len == 0 - } -} - -/// Struct to maintain information about blob IO operation. -#[derive(Clone, Debug)] -pub(crate) enum BlobIoTag { - /// Io requests to fulfill user requests. - User(BlobIoSegment), - /// Io requests to fulfill internal requirements. - Internal, -} - -impl BlobIoTag { - /// Check whether the tag is a user issued io request. - pub fn is_user_io(&self) -> bool { - matches!(self, BlobIoTag::User(_)) - } -} - -/// Struct to representing multiple continuous blob IO as one storage backend request. -/// -/// For network based remote storage backend, such as Registry/OS, it may have limited IOPs -/// due to high request round-trip time, but have enough network bandwidth. In such cases, -/// it may help to improve performance by merging multiple continuous and small blob IO -/// requests into one big backend request. -/// -/// A `BlobIoRange` request targets a continuous range of a single blob. -#[derive(Default, Clone)] -pub struct BlobIoRange { - pub(crate) blob_info: Arc, - pub(crate) blob_offset: u64, - pub(crate) blob_size: u64, - pub(crate) chunks: Vec>, - pub(crate) tags: Vec, -} - -impl Debug for BlobIoRange { - fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { - f.debug_struct("BlobIoRange") - .field("blob_id", &self.blob_info.blob_id()) - .field("blob_offset", &self.blob_offset) - .field("blob_size", &self.blob_size) - .field("tags", &self.tags) - .finish() - } -} - -impl BlobIoRange { - /// Create a new instance of `BlobIoRange`. - pub fn new(bio: &BlobIoDesc, capacity: usize) -> Self { - let blob_size = bio.chunkinfo.compressed_size() as u64; - let blob_offset = bio.chunkinfo.compressed_offset(); - assert!(blob_offset.checked_add(blob_size).is_some()); - - let mut chunks = Vec::with_capacity(capacity); - let mut tags = Vec::with_capacity(capacity); - tags.push(Self::tag_from_desc(bio)); - chunks.push(bio.chunkinfo.0.clone()); - - BlobIoRange { - blob_info: bio.blob.clone(), - blob_offset, - blob_size, - chunks, - tags, - } - } - - /// Merge an `BlobIoDesc` into the `BlobIoRange` object. - pub fn merge(&mut self, bio: &BlobIoDesc, _max_gap: u64) { - let end = self.blob_offset + self.blob_size; - let offset = bio.chunkinfo.compressed_offset(); - let size = bio.chunkinfo.compressed_size() as u64; - let size = if end == offset { - assert!(offset.checked_add(size).is_some()); - size - } else { - assert!(offset > end); - size + (offset - end) - }; - assert!(end.checked_add(size).is_some()); - - self.blob_size += size; - self.tags.push(Self::tag_from_desc(bio)); - self.chunks.push(bio.chunkinfo.0.clone()); - } - - fn tag_from_desc(bio: &BlobIoDesc) -> BlobIoTag { - if bio.user_io { - BlobIoTag::User(BlobIoSegment::new(bio.offset, bio.size as u32)) - } else { - BlobIoTag::Internal - } - } -} - -/// Struct representing a blob data prefetching request. -/// -/// It may help to improve performance for the storage backend to prefetch data in background. -/// A `BlobPrefetchControl` object advises to prefetch data range [offset, offset + len) from -/// blob `blob_id`. The prefetch operation should be asynchronous, and cache hit for filesystem -/// read operations should validate data integrity. -pub struct BlobPrefetchRequest { - /// The ID of the blob to prefetch data for. - pub blob_id: String, - /// Offset into the blob to prefetch data. - pub offset: u64, - /// Size of data to prefetch. - pub len: u64, -} - -/// Trait to provide direct access to underlying uncompressed blob file. -/// -/// The suggested flow to make use of an `BlobObject` is as below: -/// - call `is_all_data_ready()` to check all blob data has already been cached. If true, skip -/// next step. -/// - call `fetch()` to ensure blob range [offset, offset + size) has been cached. -/// - call `as_raw_fd()` to get the underlying file descriptor for direct access. -/// - call File::read(buf, offset + `base_offset()`, size) to read data from underlying cache file. -pub trait BlobObject: AsRawFd { - /// Get base offset to read blob from the fd returned by `as_raw_fd()`. - fn base_offset(&self) -> u64; - - /// Check whether all data of the blob object is ready. - fn is_all_data_ready(&self) -> bool; - - /// Fetch data from storage backend covering compressed blob range [offset, offset + size). - /// - /// Used by asynchronous prefetch worker to implement blob prefetch. - fn fetch_range_compressed(&self, offset: u64, size: u64, prefetch: bool) -> io::Result<()>; - - /// Fetch data from storage backend and make sure data range [offset, offset + size) is ready - /// for use. - /// - /// Used by rafs to support blobfs. - fn fetch_range_uncompressed(&self, offset: u64, size: u64) -> io::Result<()>; - - /// Prefetch data for specified chunks from storage backend. - /// - /// Used by asynchronous prefetch worker to implement fs prefetch. - fn prefetch_chunks(&self, range: &BlobIoRange) -> io::Result<()>; -} - -/// A wrapping object over an underlying [BlobCache] object. -/// -/// All blob Io requests are actually served by the underlying [BlobCache] object. The wrapper -/// provides an interface to dynamically switch underlying [BlobCache] objects. -#[derive(Clone, Default)] -pub struct BlobDevice { - blobs: Arc>>>, - blob_count: usize, -} - -impl BlobDevice { - /// Create new blob device instance. - pub fn new(config: &Arc, blob_infos: &[Arc]) -> io::Result { - let mut blobs = Vec::with_capacity(blob_infos.len()); - for blob_info in blob_infos.iter() { - let blob = BLOB_FACTORY.new_blob_cache(config, blob_info)?; - blobs.push(blob); - } - - Ok(BlobDevice { - blobs: Arc::new(ArcSwap::new(Arc::new(blobs))), - blob_count: blob_infos.len(), - }) - } - - /// Update configuration and storage backends of the blob device. - /// - /// The `update()` method switch a new storage backend object according to the configuration - /// information passed in. - pub fn update( - &self, - config: &Arc, - blob_infos: &[Arc], - fs_prefetch: bool, - ) -> io::Result<()> { - if self.blobs.load().len() != blob_infos.len() { - return Err(einval!( - "number of blobs doesn't match when update 'BlobDevice' object" - )); - } - - let mut blobs = Vec::with_capacity(blob_infos.len()); - for blob_info in blob_infos.iter() { - let blob = BLOB_FACTORY.new_blob_cache(config, blob_info)?; - blobs.push(blob); - } - - if fs_prefetch { - // Stop prefetch if it is running before swapping backend since prefetch threads cloned - // Arc, the swap operation can't drop inner object completely. - // Otherwise prefetch threads will be leaked. - self.stop_prefetch(); - } - self.blobs.store(Arc::new(blobs)); - if fs_prefetch { - self.start_prefetch(); - } - - Ok(()) - } - - /// Close the blob device. - pub fn close(&self) -> io::Result<()> { - Ok(()) - } - - /// Check whether the `BlobDevice` has any blobs. - pub fn has_device(&self) -> bool { - self.blob_count > 0 - } - - /// Read a range of data from a data blob into the provided writer - pub fn read_to(&self, w: &mut dyn ZeroCopyWriter, desc: &mut BlobIoVec) -> io::Result { - // Validate that: - // - bi_vec[0] is valid - // - bi_vec[0].blob.blob_index() is valid - // - all IOs are against a single blob. - if desc.bi_vec.is_empty() { - if desc.bi_size == 0 { - Ok(0) - } else { - Err(einval!("BlobIoVec size doesn't match.")) - } - } else if desc.blob_index() as usize >= self.blob_count { - Err(einval!("BlobIoVec has out of range blob_index.")) - } else { - let size = desc.bi_size; - let mut f = BlobDeviceIoVec::new(self, desc); - // The `off` parameter to w.write_from() is actually ignored by - // BlobV5IoVec::read_vectored_at_volatile() - w.write_from(&mut f, size as usize, 0) - } - } - - /// Try to prefetch specified blob data. - pub fn prefetch( - &self, - io_vecs: &[&BlobIoVec], - prefetches: &[BlobPrefetchRequest], - ) -> io::Result<()> { - for idx in 0..prefetches.len() { - if let Some(blob) = self.get_blob_by_id(&prefetches[idx].blob_id) { - let _ = blob.prefetch(blob.clone(), &prefetches[idx..idx + 1], &[]); - } - } - - for io_vec in io_vecs.iter() { - if let Some(blob) = self.get_blob_by_iovec(io_vec) { - // Prefetch errors are ignored. - let _ = blob - .prefetch(blob.clone(), &[], &io_vec.bi_vec) - .map_err(|e| { - error!("failed to prefetch blob data, {}", e); - }); - } - } - - Ok(()) - } - - /// Start the background blob data prefetch task. - pub fn start_prefetch(&self) { - for blob in self.blobs.load().iter() { - let _ = blob.start_prefetch(); - } - } - - /// Stop the background blob data prefetch task. - pub fn stop_prefetch(&self) { - for blob in self.blobs.load().iter() { - let _ = blob.stop_prefetch(); - } - } - - /// fetch specified blob data in a synchronous way. - pub fn fetch_range_synchronous(&self, prefetches: &[BlobPrefetchRequest]) -> io::Result<()> { - for req in prefetches { - if req.len == 0 { - continue; - } - if let Some(cache) = self.get_blob_by_id(&req.blob_id) { - trace!( - "fetch blob {} offset {} size {}", - req.blob_id, - req.offset, - req.len - ); - if let Some(obj) = cache.get_blob_object() { - obj.fetch_range_uncompressed(req.offset as u64, req.len as u64) - .map_err(|e| { - warn!( - "Failed to prefetch data from blob {}, offset {}, size {}, {}", - cache.blob_id(), - req.offset, - req.len, - e - ); - e - })?; - } else { - error!("No support for fetching uncompressed blob data"); - return Err(einval!("No support for fetching uncompressed blob data")); - } - } - } - - Ok(()) - } - - /// Check all chunks related to the blob io vector are ready. - pub fn all_chunks_ready(&self, io_vecs: &[BlobIoVec]) -> bool { - for io_vec in io_vecs.iter() { - if let Some(blob) = self.get_blob_by_iovec(io_vec) { - let chunk_map = blob.get_chunk_map(); - for desc in io_vec.bi_vec.iter() { - if !chunk_map.is_ready(&desc.chunkinfo).unwrap_or(false) { - return false; - } - } - } else { - return false; - } - } - - true - } - - /// RAFS V6: create a `BlobIoChunk` for chunk with index `chunk_index`. - pub fn create_io_chunk(&self, blob_index: u32, chunk_index: u32) -> Option { - if (blob_index as usize) < self.blob_count { - let state = self.blobs.load(); - let blob = &state[blob_index as usize]; - blob.get_chunk_info(chunk_index).map(|v| v.into()) - } else { - None - } - } - - /// RAFS V6: get chunk information object for chunks. - pub fn get_chunk_info( - &self, - blob_index: u32, - chunk_index: u32, - ) -> Option> { - if (blob_index as usize) < self.blob_count { - let state = self.blobs.load(); - let blob = &state[blob_index as usize]; - blob.get_chunk_info(chunk_index) - } else { - None - } - } - - fn get_blob_by_iovec(&self, iovec: &BlobIoVec) -> Option> { - let blob_index = iovec.blob_index(); - if (blob_index as usize) < self.blob_count { - return Some(self.blobs.load()[blob_index as usize].clone()); - } - - None - } - - fn get_blob_by_id(&self, blob_id: &str) -> Option> { - for blob in self.blobs.load().iter() { - if blob.blob_id() == blob_id { - return Some(blob.clone()); - } - } - - None - } -} - -/// Struct to execute Io requests with a single blob. -/// -/// It's used to support `BlobDevice::read_to()` and acts the main entrance to read chunk data -/// from data blobs. -struct BlobDeviceIoVec<'a> { - dev: &'a BlobDevice, - iovec: &'a mut BlobIoVec, -} - -impl<'a> BlobDeviceIoVec<'a> { - fn new(dev: &'a BlobDevice, iovec: &'a mut BlobIoVec) -> Self { - BlobDeviceIoVec { dev, iovec } - } -} - -impl FileReadWriteVolatile for BlobDeviceIoVec<'_> { - fn read_volatile(&mut self, _slice: FileVolatileSlice) -> Result { - // Skip because we don't really use it - unimplemented!(); - } - - fn write_volatile(&mut self, _slice: FileVolatileSlice) -> Result { - // Skip because we don't really use it - unimplemented!(); - } - - fn read_at_volatile(&mut self, slice: FileVolatileSlice, offset: u64) -> Result { - let buffers = [slice]; - self.read_vectored_at_volatile(&buffers, offset) - } - - // The default read_vectored_at_volatile only read to the first slice, so we have to overload it. - fn read_vectored_at_volatile( - &mut self, - buffers: &[FileVolatileSlice], - _offset: u64, - ) -> Result { - // BlobDevice::read_to() has validated that all IOs are against a single blob. - let index = self.iovec.blob_index(); - let blobs = &self.dev.blobs.load(); - - if (index as usize) < blobs.len() { - blobs[index as usize].read(self.iovec, buffers) - } else { - let msg = format!( - "failed to get blob object for BlobIoVec, index {}, blob array len: {}", - index, - blobs.len() - ); - Err(einval!(msg)) - } - } - - fn write_at_volatile( - &mut self, - _slice: FileVolatileSlice, - _offset: u64, - ) -> Result { - unimplemented!() - } -} - -/// Traits and Structs to support Rafs v5 image format. -/// -/// The Rafs v5 image format is designed with fused filesystem metadata and blob management -/// metadata, which is simple to implement but also introduces inter-dependency between the -/// filesystem layer and the blob management layer. This circular dependency is hard to maintain -/// and extend. Newer Rafs image format adopts designs with independent blob management layer, -/// which could be easily used to support both fuse and virtio-fs. So Rafs v5 image specific -/// interfaces are isolated into a dedicated sub-module. -pub mod v5 { - use super::*; - - /// Trait to provide extended information for a Rafs v5 chunk. - /// - /// Rafs filesystem stores filesystem metadata in a single metadata blob, and stores file - /// content in zero or more data blobs, which are separated from the metadata blob. - /// A `BlobV5ChunkInfo` object describes how a Rafs v5 chunk is located within a data blob. - /// It is abstracted because Rafs have several ways to load metadata from metadata blob. - pub trait BlobV5ChunkInfo: BlobChunkInfo { - /// Get the chunk index in the Rafs v5 metadata's chunk info array. - fn index(&self) -> u32; - - /// Get the file offset within the Rafs file it belongs to. - fn file_offset(&self) -> u64; - - /// Get flags of the chunk. - fn flags(&self) -> BlobChunkFlags; - - /// Cast to a base [BlobChunkInfo] trait object. - fn as_base(&self) -> &dyn BlobChunkInfo; - } -} - -#[cfg(test)] -mod tests { - use std::path::PathBuf; - - use super::*; - use crate::test::MockChunkInfo; - - #[test] - fn test_blob_io_chunk() { - let chunk: Arc = Arc::new(MockChunkInfo { - block_id: Default::default(), - blob_index: 0, - flags: Default::default(), - compress_size: 0x100, - uncompress_size: 0x200, - compress_offset: 0x1000, - uncompress_offset: 0x2000, - file_offset: 0, - index: 3, - reserved: 0, - }); - let iochunk: BlobIoChunk = chunk.clone().into(); - - assert_eq!(iochunk.id(), 3); - assert_eq!(iochunk.compressed_offset(), 0x1000); - assert_eq!(iochunk.compressed_size(), 0x100); - assert_eq!(iochunk.uncompressed_offset(), 0x2000); - assert_eq!(iochunk.uncompressed_size(), 0x200); - assert!(!iochunk.is_compressed()); - } - - #[test] - fn test_chunk_is_continuous() { - let blob_info = Arc::new(BlobInfo::new( - 1, - "test1".to_owned(), - 0x200000, - 0x100000, - 0x100000, - 512, - BlobFeatures::_V5_NO_EXT_BLOB_TABLE, - )); - let chunk1 = Arc::new(MockChunkInfo { - block_id: Default::default(), - blob_index: 1, - flags: BlobChunkFlags::empty(), - compress_size: 0x800, - uncompress_size: 0x1000, - compress_offset: 0, - uncompress_offset: 0, - file_offset: 0, - index: 0, - reserved: 0, - }) as Arc; - let chunk2 = Arc::new(MockChunkInfo { - block_id: Default::default(), - blob_index: 1, - flags: BlobChunkFlags::empty(), - compress_size: 0x800, - uncompress_size: 0x1000, - compress_offset: 0x800, - uncompress_offset: 0x1000, - file_offset: 0x1000, - index: 1, - reserved: 0, - }) as Arc; - let chunk3 = Arc::new(MockChunkInfo { - block_id: Default::default(), - blob_index: 1, - flags: BlobChunkFlags::empty(), - compress_size: 0x800, - uncompress_size: 0x1000, - compress_offset: 0x1800, - uncompress_offset: 0x3000, - file_offset: 0x3000, - index: 1, - reserved: 0, - }) as Arc; - - let desc1 = BlobIoDesc { - blob: blob_info.clone(), - chunkinfo: chunk1.into(), - offset: 0, - size: 0x1000, - user_io: true, - }; - let desc2 = BlobIoDesc { - blob: blob_info.clone(), - chunkinfo: chunk2.into(), - offset: 0, - size: 0x1000, - user_io: true, - }; - let desc3 = BlobIoDesc { - blob: blob_info, - chunkinfo: chunk3.into(), - offset: 0, - size: 0x1000, - user_io: true, - }; - - assert!(desc1.is_continuous(&desc2, 0x0)); - assert!(desc1.is_continuous(&desc2, 0x1000)); - assert!(!desc2.is_continuous(&desc1, 0x1000)); - assert!(!desc2.is_continuous(&desc1, 0x0)); - - assert!(!desc1.is_continuous(&desc3, 0x0)); - assert!(!desc1.is_continuous(&desc3, 0x400)); - assert!(!desc1.is_continuous(&desc3, 0x800)); - assert!(desc1.is_continuous(&desc3, 0x1000)); - - assert!(!desc2.is_continuous(&desc3, 0x0)); - assert!(!desc2.is_continuous(&desc3, 0x400)); - assert!(desc2.is_continuous(&desc3, 0x800)); - assert!(desc2.is_continuous(&desc3, 0x1000)); - } - - #[test] - fn test_append_same_blob_with_diff_index() { - let blob1 = Arc::new(BlobInfo::new( - 1, - "test1".to_owned(), - 0x200000, - 0x100000, - 0x100000, - 512, - BlobFeatures::_V5_NO_EXT_BLOB_TABLE, - )); - let chunk1 = Arc::new(MockChunkInfo { - block_id: Default::default(), - blob_index: 1, - flags: BlobChunkFlags::empty(), - compress_size: 0x800, - uncompress_size: 0x1000, - compress_offset: 0, - uncompress_offset: 0, - file_offset: 0, - index: 0, - reserved: 0, - }) as Arc; - let mut iovec = BlobIoVec::new(blob1.clone()); - iovec.push(BlobIoDesc::new(blob1, BlobIoChunk(chunk1), 0, 0x1000, true)); - - let blob2 = Arc::new(BlobInfo::new( - 2, // different index - "test1".to_owned(), // same id - 0x200000, - 0x100000, - 0x100000, - 512, - BlobFeatures::_V5_NO_EXT_BLOB_TABLE, - )); - let chunk2 = Arc::new(MockChunkInfo { - block_id: Default::default(), - blob_index: 2, - flags: BlobChunkFlags::empty(), - compress_size: 0x800, - uncompress_size: 0x1000, - compress_offset: 0x800, - uncompress_offset: 0x1000, - file_offset: 0x1000, - index: 1, - reserved: 0, - }) as Arc; - let mut iovec2 = BlobIoVec::new(blob2.clone()); - iovec2.push(BlobIoDesc::new(blob2, BlobIoChunk(chunk2), 0, 0x1000, true)); - - iovec.append(iovec2); - assert_eq!(0x2000, iovec.bi_size); - } - - #[test] - fn test_extend_large_blob_io_vec() { - let size = 0x2_0000_0000; // 8G blob - let chunk_size = 0x10_0000; // 1M chunk - let chunk_count = (size / chunk_size as u64) as u32; - let large_blob = Arc::new(BlobInfo::new( - 0, - "blob_id".to_owned(), - size, - size, - chunk_size, - chunk_count, - BlobFeatures::default(), - )); - - let mut iovec = BlobIoVec::new(large_blob.clone()); - let mut iovec2 = BlobIoVec::new(large_blob.clone()); - - // Extend half of blob - for chunk_idx in 0..chunk_count { - let chunk = Arc::new(MockChunkInfo { - block_id: Default::default(), - blob_index: large_blob.blob_index, - flags: BlobChunkFlags::empty(), - compress_size: chunk_size, - compress_offset: chunk_idx as u64 * chunk_size as u64, - uncompress_size: 2 * chunk_size, - uncompress_offset: 2 * chunk_idx as u64 * chunk_size as u64, - file_offset: 2 * chunk_idx as u64 * chunk_size as u64, - index: chunk_idx as u32, - reserved: 0, - }) as Arc; - let desc = BlobIoDesc::new(large_blob.clone(), BlobIoChunk(chunk), 0, chunk_size, true); - if chunk_idx < chunk_count / 2 { - iovec.push(desc); - } else { - iovec2.push(desc) - } - } - - // Extend other half of blob - iovec.append(iovec2); - - assert_eq!(size, iovec.size()); - assert_eq!(chunk_count, iovec.len() as u32); - } - - #[test] - fn test_blob_info_blob_meta_id() { - let blob_info = BlobInfo::new( - 1, - "blob_id".to_owned(), - 0, - 0, - 0, - 1, - BlobFeatures::SEPARATE | BlobFeatures::INLINED_FS_META, - ); - - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let mut source_path = PathBuf::from(root_dir); - source_path.push("../tests/texture/blobs/be7d77eeb719f70884758d1aa800ed0fb09d701aaec469964e9d54325f0d5fef"); - - assert!(blob_info - .set_blob_id_from_meta_path(source_path.as_path()) - .is_ok()); - - let id = blob_info.get_blob_meta_id(); - assert!(id.is_ok()); - assert_eq!( - id.unwrap(), - "be7d77eeb719f70884758d1aa800ed0fb09d701aaec469964e9d54325f0d5fef".to_owned() - ); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Blob Storage Public Service APIs +//! +//! The core functionality of the nydus-storage crate is to serve blob IO request, mainly read chunk +//! data from blobs. This module provides public APIs and data structures for clients to issue blob +//! IO requests. The main traits and structs provided include: +//! - [BlobChunkInfo](trait.BlobChunkInfo.html): trait to provide basic information for a chunk. +//! - [BlobDevice](struct.BlobDevice.html): a wrapping object over a group of underlying [BlobCache] +//! object to serve blob data access requests. +//! - [BlobInfo](struct.BlobInfo.html): configuration information for a metadata/data blob object. +//! - [BlobIoChunk](enum.BlobIoChunk.html): an enumeration to encapsulate different [BlobChunkInfo] +//! implementations for [BlobIoDesc]. +//! - [BlobIoDesc](struct.BlobIoDesc.html): a blob IO descriptor, containing information for a +//! continuous IO range within a chunk. +//! - [BlobIoVec](struct.BlobIoVec.html): a scatter/gather list for blob IO operation, containing +//! one or more blob IO descriptors +//! - [BlobPrefetchRequest](struct.BlobPrefetchRequest.html): a blob data prefetching request. +use std::any::Any; +use std::collections::hash_map::Drain; +use std::collections::HashMap; +use std::convert::TryFrom; +use std::fmt::{Debug, Formatter}; +use std::fs::File; +use std::io::{self, Error}; +use std::ops::Deref; +use std::os::unix::io::AsRawFd; +use std::path::Path; +use std::sync::{Arc, Mutex}; + +use arc_swap::ArcSwap; +use fuse_backend_rs::api::filesystem::ZeroCopyWriter; +use fuse_backend_rs::file_buf::FileVolatileSlice; +use fuse_backend_rs::file_traits::FileReadWriteVolatile; + +use nydus_api::ConfigV2; +use nydus_utils::compress; +use nydus_utils::crypt::{self, Cipher, CipherContext}; +use nydus_utils::digest::{self, RafsDigest}; + +use crate::cache::BlobCache; +use crate::factory::BLOB_FACTORY; + +pub(crate) const BLOB_FEATURE_INCOMPAT_MASK: u32 = 0x0000_ffff; +pub(crate) const BLOB_FEATURE_INCOMPAT_VALUE: u32 = 0x0000_0fff; + +bitflags! { + /// Features bits for blob management. + pub struct BlobFeatures: u32 { + /// Uncompressed chunk data is aligned. + const ALIGNED = 0x0000_0001; + /// RAFS meta data is inlined in the data blob. + const INLINED_FS_META = 0x0000_0002; + /// Blob chunk information format v2. + const CHUNK_INFO_V2 = 0x0000_0004; + /// Blob compression information data include context data for zlib random access. + const ZRAN = 0x0000_0008; + /// Blob data and blob meta are stored in separate blobs. + const SEPARATE = 0x0000_0010; + /// Chunk digest array is inlined in the data blob. + const INLINED_CHUNK_DIGEST = 0x0000_0020; + /// Blob is for RAFS filesystems in TARFS mode. + const TARFS = 0x0000_0040; + /// Small file chunk are merged into batch chunk. + const BATCH = 0x0000_0080; + /// Whether the Blob is encrypted. + const ENCRYPTED = 0x0000_0100; + /// Blob has TAR headers to separate contents. + const HAS_TAR_HEADER = 0x1000_0000; + /// Blob has Table of Content (ToC) at the tail. + const HAS_TOC = 0x2000_0000; + /// Data blob are encoded with Tar header and optionally ToC. + /// It's also a flag indicating that images are generated with `nydus-image` v2.2 or newer. + const CAP_TAR_TOC = 0x4000_0000; + /// Rafs V5 image without extended blob table, this is an internal flag. + const _V5_NO_EXT_BLOB_TABLE = 0x8000_0000; + /// Blob is generated with chunkdict. + const IS_CHUNKDICT_GENERATED = 0x0000_0200; + } +} + +impl Default for BlobFeatures { + fn default() -> Self { + BlobFeatures::empty() + } +} + +impl BlobFeatures { + /// Check whether the blob is for RAFS filesystems in TARFS mode. + pub fn is_tarfs(&self) -> bool { + self.contains(BlobFeatures::CAP_TAR_TOC) && self.contains(BlobFeatures::TARFS) + } +} + +impl TryFrom for BlobFeatures { + type Error = Error; + + fn try_from(value: u32) -> Result { + if value & BLOB_FEATURE_INCOMPAT_MASK & !BLOB_FEATURE_INCOMPAT_VALUE != 0 + || value & BlobFeatures::_V5_NO_EXT_BLOB_TABLE.bits() != 0 + { + Err(einval!(format!("invalid blob features: 0x{:x}", value))) + } else { + // Safe because we have just validated feature flags. + Ok(unsafe { BlobFeatures::from_bits_unchecked(value) }) + } + } +} + +/// Configuration information for a metadata/data blob object. +/// +/// The `BlobInfo` structure provides information for the storage subsystem to manage a blob file +/// and serve blob IO requests for clients. +#[derive(Clone, Debug, Default)] +pub struct BlobInfo { + /// The index of blob in RAFS blob table. + blob_index: u32, + /// A sha256 hex string generally. + blob_id: String, + /// Feature bits for blob management. + blob_features: BlobFeatures, + /// Size of the compressed blob file. + compressed_size: u64, + /// Size of the uncompressed blob file, or the cache file. + uncompressed_size: u64, + /// Chunk size. + chunk_size: u32, + /// Number of chunks in blob file. + /// A helper to distinguish bootstrap with extended blob table or not: + /// Bootstrap with extended blob table always has non-zero `chunk_count` + chunk_count: u32, + /// Compression algorithm to process the blob. + compressor: compress::Algorithm, + /// Chunk data encryption algorithm. + cipher: crypt::Algorithm, + /// Message digest algorithm to process the blob. + digester: digest::Algorithm, + /// Starting offset of the data to prefetch. + prefetch_offset: u32, + /// Size of blob data to prefetch. + prefetch_size: u32, + /// The blob is for a legacy estargz image. + is_legacy_stargz: bool, + + /// V6: compressor that is used for compressing chunk info array. + meta_ci_compressor: u32, + /// V6: Offset of the chunk information array in the compressed blob. + meta_ci_offset: u64, + /// V6: Size of the compressed chunk information array. + meta_ci_compressed_size: u64, + /// V6: Size of the uncompressed chunk information array. + meta_ci_uncompressed_size: u64, + + // SHA256 digest of blob ToC content, including the toc tar header. + // It's all zero for blobs with inlined-meta. + blob_toc_digest: [u8; 32], + // SHA256 digest of RAFS blob for ZRAN, containing `blob.meta`, `blob.digest` `blob.toc` and + // optionally 'image.boot`. It's all zero for ZRAN blobs with inlined-meta, so need special + // handling. + blob_meta_digest: [u8; 32], + // Size of RAFS blob for ZRAN. It's zero ZRAN blobs with inlined-meta. + blob_meta_size: u64, + // Size of blob ToC content, it's zero for blobs with inlined-meta. + blob_toc_size: u32, + + /// V6: support fs-cache mode + fs_cache_file: Option>, + /// V6: support inlined-meta + meta_path: Arc>, + /// V6: support data encryption. + cipher_object: Arc, + /// Cipher context for encryption. + cipher_ctx: Option, + + /// is chunkdict generated + is_chunkdict_generated: bool, +} + +impl BlobInfo { + /// Create a new instance of `BlobInfo`. + pub fn new( + blob_index: u32, + blob_id: String, + uncompressed_size: u64, + compressed_size: u64, + chunk_size: u32, + chunk_count: u32, + blob_features: BlobFeatures, + ) -> Self { + let blob_id = blob_id.trim_end_matches('\0').to_string(); + let mut blob_info = BlobInfo { + blob_index, + blob_id, + blob_features, + uncompressed_size, + compressed_size, + chunk_size, + chunk_count, + + compressor: compress::Algorithm::None, + cipher: crypt::Algorithm::None, + digester: digest::Algorithm::Blake3, + prefetch_offset: 0, + prefetch_size: 0, + is_legacy_stargz: false, + meta_ci_compressor: 0, + meta_ci_offset: 0, + meta_ci_compressed_size: 0, + meta_ci_uncompressed_size: 0, + + blob_toc_digest: [0u8; 32], + blob_meta_digest: [0u8; 32], + blob_meta_size: 0, + blob_toc_size: 0, + + fs_cache_file: None, + meta_path: Arc::new(Mutex::new(String::new())), + cipher_object: Default::default(), + cipher_ctx: None, + + is_chunkdict_generated: false, + }; + + blob_info.compute_features(); + + blob_info + } + + /// Set the is_chunkdict_generated flag. + pub fn set_chunkdict_generated(&mut self, is_chunkdict_generated: bool) { + self.is_chunkdict_generated = is_chunkdict_generated; + } + + /// Get the is_chunkdict_generated flag. + pub fn is_chunkdict_generated(&self) -> bool { + self.is_chunkdict_generated + } + + /// Get the blob index in the blob array. + pub fn blob_index(&self) -> u32 { + self.blob_index + } + + /// Get the id of the blob, with special handling of `inlined-meta` case. + pub fn blob_id(&self) -> String { + if (self.has_feature(BlobFeatures::INLINED_FS_META) + && !self.has_feature(BlobFeatures::SEPARATE)) + || !self.has_feature(BlobFeatures::CAP_TAR_TOC) + { + let guard = self.meta_path.lock().unwrap(); + if !guard.is_empty() { + return guard.deref().clone(); + } + } + self.blob_id.clone() + } + + /// Get raw blob id, without special handling of `inlined-meta` case. + pub fn raw_blob_id(&self) -> &str { + &self.blob_id + } + + /// Get size of compressed chunk data, not including `blob.meta`, `blob.chunk`, `toc` etc. + pub fn compressed_data_size(&self) -> u64 { + if self.has_feature(BlobFeatures::SEPARATE) { + // It's the size of referenced OCIv1 targz blob. + self.compressed_size + } else if self.has_feature(BlobFeatures::CAP_TAR_TOC) { + // Image built with nydus 2.2 and newer versions. + if self.meta_ci_is_valid() { + // For RAFS v6 + if self.has_feature(BlobFeatures::HAS_TAR_HEADER) { + // There's a tar header between chunk data and compression information. + self.meta_ci_offset - 0x200 + } else { + self.meta_ci_offset + } + } else { + // For RAFS v5 + if self.has_feature(BlobFeatures::HAS_TAR_HEADER) { + // There's a tar header between chunk data and fs meta data. + self.compressed_size - 0x200 + } else { + self.compressed_size + } + } + } else { + // Images built with nydus 2.1 and previous versions. + self.compressed_size + } + } + + /// Get size of the compressed blob, including `blob.meta`, `blob.chunk`, `toc` etc. + pub fn compressed_size(&self) -> u64 { + self.compressed_size + } + + /// Get size of the uncompressed blob. + pub fn uncompressed_size(&self) -> u64 { + self.uncompressed_size + } + + /// Get chunk size. + pub fn chunk_size(&self) -> u32 { + self.chunk_size + } + + /// Get number of chunks in the blob. + pub fn chunk_count(&self) -> u32 { + self.chunk_count + } + + /// Get the compression algorithm to handle the blob data. + pub fn compressor(&self) -> compress::Algorithm { + self.compressor + } + + /// Set compression algorithm for the blob. + pub fn set_compressor(&mut self, compressor: compress::Algorithm) { + self.compressor = compressor; + self.compute_features(); + } + + /// Get the cipher algorithm to handle chunk data. + pub fn cipher(&self) -> crypt::Algorithm { + self.cipher + } + + /// Set encryption algorithm for the blob. + pub fn set_cipher(&mut self, cipher: crypt::Algorithm) { + self.cipher = cipher; + } + + /// Get the cipher object to encrypt/decrypt chunk data. + pub fn cipher_object(&self) -> Arc { + self.cipher_object.clone() + } + + /// Get the cipher context. + pub fn cipher_context(&self) -> Option { + self.cipher_ctx.clone() + } + + /// Set the cipher info, including cipher algo, cipher object and cipher context. + pub fn set_cipher_info( + &mut self, + cipher: crypt::Algorithm, + cipher_object: Arc, + cipher_ctx: Option, + ) { + self.cipher = cipher; + self.cipher_object = cipher_object; + self.cipher_ctx = cipher_ctx; + } + + /// Get the message digest algorithm for the blob. + pub fn digester(&self) -> digest::Algorithm { + self.digester + } + + /// Set compression algorithm for the blob. + pub fn set_digester(&mut self, digester: digest::Algorithm) { + self.digester = digester; + } + + /// Get blob data prefetching offset. + pub fn prefetch_offset(&self) -> u64 { + self.prefetch_offset as u64 + } + + /// Get blob data prefetching offset. + pub fn prefetch_size(&self) -> u64 { + self.prefetch_size as u64 + } + + /// Set a range for blob data prefetching. + /// + /// Only one range could be configured per blob, and zero prefetch_size means disabling blob + /// data prefetching. + pub fn set_prefetch_info(&mut self, offset: u64, size: u64) { + self.prefetch_offset = offset as u32; + self.prefetch_size = size as u32; + } + + /// Check whether this blob is for an stargz image. + pub fn is_legacy_stargz(&self) -> bool { + self.is_legacy_stargz + } + + /// Set metadata information for a blob. + /// + /// The compressed blobs are laid out as: + /// `[compressed chunk data], [compressed metadata], [uncompressed header]`. + pub fn set_blob_meta_info( + &mut self, + offset: u64, + compressed_size: u64, + uncompressed_size: u64, + compressor: u32, + ) { + self.meta_ci_compressor = compressor; + self.meta_ci_offset = offset; + self.meta_ci_compressed_size = compressed_size; + self.meta_ci_uncompressed_size = uncompressed_size; + } + + /// Get compression algorithm for chunk information array. + pub fn meta_ci_compressor(&self) -> compress::Algorithm { + if self.meta_ci_compressor == compress::Algorithm::Lz4Block as u32 { + compress::Algorithm::Lz4Block + } else if self.meta_ci_compressor == compress::Algorithm::GZip as u32 { + compress::Algorithm::GZip + } else if self.meta_ci_compressor == compress::Algorithm::Zstd as u32 { + compress::Algorithm::Zstd + } else { + compress::Algorithm::None + } + } + + /// Get offset of chunk information array in the compressed blob. + pub fn meta_ci_offset(&self) -> u64 { + self.meta_ci_offset + } + + /// Get size of the compressed chunk information array. + pub fn meta_ci_compressed_size(&self) -> u64 { + self.meta_ci_compressed_size + } + + /// Get the uncompressed size of the chunk information array. + pub fn meta_ci_uncompressed_size(&self) -> u64 { + self.meta_ci_uncompressed_size + } + + /// Check whether compression metadata is available. + pub fn meta_ci_is_valid(&self) -> bool { + self.meta_ci_compressed_size != 0 && self.meta_ci_uncompressed_size != 0 + } + + /// Set the associated `File` object provided by Linux fscache subsystem. + pub fn set_fscache_file(&mut self, file: Option>) { + self.fs_cache_file = file; + } + + #[cfg(target_os = "linux")] + /// Get the associated `File` object provided by Linux fscache subsystem. + pub(crate) fn get_fscache_file(&self) -> Option> { + self.fs_cache_file.clone() + } + + /// Get blob features. + pub fn features(&self) -> BlobFeatures { + self.blob_features + } + + /// Check whether the requested features are available. + pub fn has_feature(&self, features: BlobFeatures) -> bool { + self.blob_features.bits() & features.bits() == features.bits() + } + + /// Generate feature flags according to blob configuration. + fn compute_features(&mut self) { + if self.chunk_count == 0 { + self.blob_features |= BlobFeatures::_V5_NO_EXT_BLOB_TABLE; + } + if self.compressor == compress::Algorithm::GZip + && !self.has_feature(BlobFeatures::CHUNK_INFO_V2) + { + self.is_legacy_stargz = true; + } + } + + /// Get SHA256 digest of the ToC content, including the toc tar header. + /// + /// It's all zero for inlined bootstrap. + pub fn blob_toc_digest(&self) -> &[u8; 32] { + &self.blob_toc_digest + } + + /// Set SHA256 digest of the ToC content, including the toc tar header. + pub fn set_blob_toc_digest(&mut self, digest: [u8; 32]) { + self.blob_toc_digest = digest; + } + + /// Get size of the ToC content. It's all zero for inlined bootstrap. + pub fn blob_toc_size(&self) -> u32 { + self.blob_toc_size + } + + /// Set size of the ToC content. + pub fn set_blob_toc_size(&mut self, sz: u32) { + self.blob_toc_size = sz; + } + + /// The RAFS blob contains `blob.meta`, `blob.digest`, `image.boot`, `ToC` etc. + /// Get SHA256 digest of RAFS blob containing `blob.meta`, `blob.digest` `blob.toc` and + /// optionally 'image.boot`. + /// + /// Default to `self.blob_id` when it's all zero. + pub fn blob_meta_digest(&self) -> &[u8; 32] { + &self.blob_meta_digest + } + + /// Set SHA256 digest of the RAFS blob. + pub fn set_blob_meta_digest(&mut self, digest: [u8; 32]) { + self.blob_meta_digest = digest; + } + + /// Get size of the RAFS blob. + pub fn blob_meta_size(&self) -> u64 { + self.blob_meta_size + } + + /// Set size of the RAFS blob. + pub fn set_blob_meta_size(&mut self, size: u64) { + self.blob_meta_size = size; + } + + /// Set path for meta blob file, which will be used by `get_blob_id()` and `get_blob_meta_id()`. + pub fn set_blob_id_from_meta_path(&self, path: &Path) -> Result<(), Error> { + *self.meta_path.lock().unwrap() = Self::get_blob_id_from_meta_path(path)?; + Ok(()) + } + + pub fn get_blob_id_from_meta_path(path: &Path) -> Result { + // Manual implementation of Path::file_prefix(). + let mut id = path.file_name().ok_or_else(|| { + einval!(format!( + "failed to get blob id from meta file path {}", + path.display() + )) + })?; + loop { + let id1 = Path::new(id).file_stem().ok_or_else(|| { + einval!(format!( + "failed to get blob id from meta file path {}", + path.display() + )) + })?; + if id1.is_empty() { + return Err(einval!(format!( + "failed to get blob id from meta file path {}", + path.display() + ))); + } else if id == id1 { + break; + } else { + id = id1; + } + } + let id = id.to_str().ok_or_else(|| { + einval!(format!( + "failed to get blob id from meta file path {}", + path.display() + )) + })?; + + Ok(id.to_string()) + } + + /// Get RAFS blob id for ZRan. + pub fn get_blob_meta_id(&self) -> Result { + assert!(self.has_feature(BlobFeatures::SEPARATE)); + let id = if self.has_feature(BlobFeatures::INLINED_FS_META) { + let guard = self.meta_path.lock().unwrap(); + if guard.is_empty() { + return Err(einval!("failed to get blob id from meta file name")); + } + guard.deref().clone() + } else { + hex::encode(self.blob_meta_digest) + }; + Ok(id) + } + + /// Get the cipher info, including cipher algo, cipher object and cipher context. + pub fn get_cipher_info(&self) -> (crypt::Algorithm, Arc, Option) { + ( + self.cipher, + self.cipher_object.clone(), + self.cipher_ctx.clone(), + ) + } +} + +bitflags! { + /// Blob chunk flags. + pub struct BlobChunkFlags: u32 { + /// Chunk data is compressed. + const COMPRESSED = 0x0000_0001; + /// Chunk is a hole, with all data as zero. + const _HOLECHUNK = 0x0000_0002; + /// Chunk data is encrypted. + const ENCYPTED = 0x0000_0004; + /// Chunk data is merged into a batch chunk. + const BATCH = 0x0000_0008; + } +} + +impl Default for BlobChunkFlags { + fn default() -> Self { + BlobChunkFlags::empty() + } +} + +/// Trait to provide basic information for a chunk. +/// +/// A `BlobChunkInfo` object describes how a chunk is located within the compressed and +/// uncompressed data blobs. It's used to help the storage subsystem to: +/// - download chunks from storage backend +/// - maintain chunk readiness state for each chunk +/// - convert from compressed form to uncompressed form +/// +/// This trait may be extended to provide additional information for a specific Rafs filesystem +/// version, for example `BlobV5ChunkInfo` provides Rafs v5 filesystem related information about +/// a chunk. +pub trait BlobChunkInfo: Any + Sync + Send { + /// Get the message digest value of the chunk, which acts as an identifier for the chunk. + fn chunk_id(&self) -> &RafsDigest; + + /// Get a unique ID to identify the chunk within the metadata/data blob. + /// + /// The returned value of `id()` is often been used as HashMap keys, so `id()` method should + /// return unique identifier for each chunk of a blob file. + fn id(&self) -> u32; + + /// Get the blob index of the blob file in the Rafs v5 metadata's blob array. + fn blob_index(&self) -> u32; + + /// Get the chunk offset in the compressed blob. + fn compressed_offset(&self) -> u64; + + /// Get the size of the compressed chunk. + fn compressed_size(&self) -> u32; + + /// Get end of the chunk in the compressed blob. + fn compressed_end(&self) -> u64 { + self.compressed_offset() + self.compressed_size() as u64 + } + + /// Get the chunk offset in the uncompressed blob. + fn uncompressed_offset(&self) -> u64; + + /// Get the size of the uncompressed chunk. + fn uncompressed_size(&self) -> u32; + + /// Get end of the chunk in the compressed blob. + fn uncompressed_end(&self) -> u64 { + self.uncompressed_offset() + self.uncompressed_size() as u64 + } + + /// Check whether the chunk is batch chunk or not. + fn is_batch(&self) -> bool; + + /// Check whether the chunk is compressed or not. + /// + /// Some chunk may become bigger after compression, so plain data instead of compressed + /// data may be stored in the compressed data blob for those chunks. + fn is_compressed(&self) -> bool; + + /// Check whether the chunk is encrypted or not. + fn is_encrypted(&self) -> bool; + + fn as_any(&self) -> &dyn Any; +} + +/// An enumeration to encapsulate different [BlobChunkInfo] implementations for [BlobIoDesc]. +/// +/// This helps to feed unified IO description to storage subsystem from both rafs v6 and v5 since +/// rafs v6 have a different ChunkInfo definition on bootstrap. +#[derive(Clone)] +pub struct BlobIoChunk(Arc); + +impl From> for BlobIoChunk { + fn from(v: Arc) -> Self { + BlobIoChunk(v) + } +} + +impl BlobChunkInfo for BlobIoChunk { + fn chunk_id(&self) -> &RafsDigest { + self.0.chunk_id() + } + + fn id(&self) -> u32 { + self.0.id() + } + + fn blob_index(&self) -> u32 { + self.0.blob_index() + } + + fn compressed_offset(&self) -> u64 { + self.0.compressed_offset() + } + + fn compressed_size(&self) -> u32 { + self.0.compressed_size() + } + + fn uncompressed_offset(&self) -> u64 { + self.0.uncompressed_offset() + } + + fn uncompressed_size(&self) -> u32 { + self.0.uncompressed_size() + } + + fn is_batch(&self) -> bool { + self.0.is_batch() + } + + fn is_compressed(&self) -> bool { + self.0.is_compressed() + } + + fn is_encrypted(&self) -> bool { + self.0.is_encrypted() + } + + fn as_any(&self) -> &dyn Any { + self + } +} + +/// Blob IO descriptor, containing information for a continuous IO range within a chunk. +#[derive(Clone)] +pub struct BlobIoDesc { + /// The blob associated with the IO operation. + pub blob: Arc, + /// The chunk associated with the IO operation. + pub chunkinfo: BlobIoChunk, + /// Offset from start of the chunk for the IO operation. + pub offset: u32, + /// Size of the IO operation + pub size: u32, + /// Whether it's a user initiated IO, otherwise is a storage system internal IO. + /// + /// It might be initiated by user io amplification. With this flag, lower device + /// layer may choose how to prioritize the IO operation. + pub(crate) user_io: bool, +} + +impl BlobIoDesc { + /// Create a new blob IO descriptor. + pub fn new( + blob: Arc, + chunkinfo: BlobIoChunk, + offset: u32, + size: u32, + user_io: bool, + ) -> Self { + BlobIoDesc { + blob, + chunkinfo, + offset, + size, + user_io, + } + } + + /// Check whether the `other` BlobIoDesc is continuous to current one. + pub fn is_continuous(&self, next: &BlobIoDesc, max_gap: u64) -> bool { + let prev_end = self.chunkinfo.compressed_offset() + self.chunkinfo.compressed_size() as u64; + let next_offset = next.chunkinfo.compressed_offset(); + + if self.chunkinfo.is_batch() || next.chunkinfo.is_batch() { + // Batch chunk can only be compared by uncompressed info. + return next.chunkinfo.uncompressed_offset() - self.chunkinfo.uncompressed_end() + <= max_gap; + } + + if self.chunkinfo.blob_index() == next.chunkinfo.blob_index() && next_offset >= prev_end { + if next.blob.is_legacy_stargz() { + next_offset - prev_end <= max_gap * 8 + } else { + next_offset - prev_end <= max_gap + } + } else { + false + } + } +} + +impl Debug for BlobIoDesc { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + f.debug_struct("BlobIoDesc") + .field("blob_index", &self.blob.blob_index) + .field("chunk_index", &self.chunkinfo.id()) + .field("compressed_offset", &self.chunkinfo.compressed_offset()) + .field("file_offset", &self.offset) + .field("size", &self.size) + .field("user", &self.user_io) + .finish() + } +} + +/// Scatter/gather list for blob IO operation, containing zero or more blob IO descriptors +pub struct BlobIoVec { + /// The blob associated with the IO operation. + bi_blob: Arc, + /// Total size of blob IOs to be performed. + bi_size: u64, + /// Array of blob IOs, these IOs should be executed sequentially. + pub(crate) bi_vec: Vec, +} + +impl BlobIoVec { + /// Create a new blob IO scatter/gather list object. + pub fn new(bi_blob: Arc) -> Self { + BlobIoVec { + bi_blob, + bi_size: 0, + bi_vec: Vec::with_capacity(128), + } + } + + /// Add a new 'BlobIoDesc' to the 'BlobIoVec'. + pub fn push(&mut self, desc: BlobIoDesc) { + assert_eq!(self.bi_blob.blob_index(), desc.blob.blob_index()); + assert_eq!(self.bi_blob.blob_id(), desc.blob.blob_id()); + assert!(self.bi_size.checked_add(desc.size as u64).is_some()); + self.bi_size += desc.size as u64; + self.bi_vec.push(desc); + } + + /// Append another blob io vector to current one. + pub fn append(&mut self, mut vec: BlobIoVec) { + assert_eq!(self.bi_blob.blob_id(), vec.bi_blob.blob_id()); + assert!(self.bi_size.checked_add(vec.bi_size).is_some()); + self.bi_vec.append(vec.bi_vec.as_mut()); + self.bi_size += vec.bi_size; + } + + /// Reset the blob io vector. + pub fn reset(&mut self) { + self.bi_size = 0; + self.bi_vec.truncate(0); + } + + /// Get number of 'BlobIoDesc' in the 'BlobIoVec'. + pub fn len(&self) -> usize { + self.bi_vec.len() + } + + /// Check whether there's 'BlobIoDesc' in the 'BlobIoVec'. + pub fn is_empty(&self) -> bool { + self.bi_vec.is_empty() + } + + /// Get size of pending IO data. + pub fn size(&self) -> u64 { + self.bi_size + } + + /// Get an immutable reference to a `BlobIoDesc` entry. + pub fn blob_io_desc(&self, index: usize) -> Option<&BlobIoDesc> { + if index < self.bi_vec.len() { + Some(&self.bi_vec[index]) + } else { + None + } + } + + /// Get the target blob index of the blob io vector. + pub fn blob_index(&self) -> u32 { + self.bi_blob.blob_index() + } + + /// Check whether the blob io vector is targeting the blob with `blob_index` + pub fn is_target_blob(&self, blob_index: u32) -> bool { + self.bi_blob.blob_index() == blob_index + } + + /// Check whether two blob io vector targets the same blob. + pub fn has_same_blob(&self, desc: &BlobIoVec) -> bool { + self.bi_blob.blob_index() == desc.bi_blob.blob_index() + } +} + +impl Debug for BlobIoVec { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + f.debug_struct("BlobIoDesc") + .field("blob_index", &self.bi_blob.blob_index) + .field("size", &self.bi_size) + .field("decriptors", &self.bi_vec) + .finish() + } +} + +/// Helper structure to merge blob IOs to reduce IO requests. +#[derive(Default)] +pub struct BlobIoMerge { + map: HashMap, + current: String, +} + +impl BlobIoMerge { + /// Append an `BlobIoVec` object to the merge state object. + pub fn append(&mut self, desc: BlobIoVec) { + if !desc.is_empty() { + let id = desc.bi_blob.blob_id.as_str(); + if self.current != id { + self.current = id.to_string(); + } + if let Some(prev) = self.map.get_mut(id) { + prev.append(desc); + } else { + self.map.insert(id.to_string(), desc); + } + } + } + + /// Drain elements in the cache. + pub fn drain(&mut self) -> Drain<'_, String, BlobIoVec> { + self.map.drain() + } + + /// Get current element. + pub fn get_current_element(&mut self) -> Option<&mut BlobIoVec> { + self.map.get_mut(&self.current) + } +} + +/// A segment representing a continuous range for a blob IO operation. +/// +/// It can span multiple chunks while the `offset` is where the user io starts +/// within the first chunk and `len` is the total user io length of these chunks. +#[derive(Clone, Debug, Default)] +pub(crate) struct BlobIoSegment { + /// Start position of the range within the chunk + pub offset: u32, + /// Size of the range within the chunk + pub len: u32, +} + +impl BlobIoSegment { + /// Create a new instance of `ChunkSegment`. + pub fn new(offset: u32, len: u32) -> Self { + Self { offset, len } + } + + #[inline] + pub fn append(&mut self, offset: u32, len: u32) { + assert!(offset.checked_add(len).is_some()); + assert_eq!(offset, 0); + + self.len += len; + } + + pub fn is_empty(&self) -> bool { + self.offset == 0 && self.len == 0 + } +} + +/// Struct to maintain information about blob IO operation. +#[derive(Clone, Debug)] +pub(crate) enum BlobIoTag { + /// Io requests to fulfill user requests. + User(BlobIoSegment), + /// Io requests to fulfill internal requirements. + Internal, +} + +impl BlobIoTag { + /// Check whether the tag is a user issued io request. + pub fn is_user_io(&self) -> bool { + matches!(self, BlobIoTag::User(_)) + } +} + +/// Struct to representing multiple continuous blob IO as one storage backend request. +/// +/// For network based remote storage backend, such as Registry/OS, it may have limited IOPs +/// due to high request round-trip time, but have enough network bandwidth. In such cases, +/// it may help to improve performance by merging multiple continuous and small blob IO +/// requests into one big backend request. +/// +/// A `BlobIoRange` request targets a continuous range of a single blob. +#[derive(Default, Clone)] +pub struct BlobIoRange { + pub(crate) blob_info: Arc, + pub(crate) blob_offset: u64, + pub(crate) blob_size: u64, + pub(crate) chunks: Vec>, + pub(crate) tags: Vec, +} + +impl Debug for BlobIoRange { + fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { + f.debug_struct("BlobIoRange") + .field("blob_id", &self.blob_info.blob_id()) + .field("blob_offset", &self.blob_offset) + .field("blob_size", &self.blob_size) + .field("tags", &self.tags) + .finish() + } +} + +impl BlobIoRange { + /// Create a new instance of `BlobIoRange`. + pub fn new(bio: &BlobIoDesc, capacity: usize) -> Self { + let blob_size = bio.chunkinfo.compressed_size() as u64; + let blob_offset = bio.chunkinfo.compressed_offset(); + assert!(blob_offset.checked_add(blob_size).is_some()); + + let mut chunks = Vec::with_capacity(capacity); + let mut tags = Vec::with_capacity(capacity); + tags.push(Self::tag_from_desc(bio)); + chunks.push(bio.chunkinfo.0.clone()); + + BlobIoRange { + blob_info: bio.blob.clone(), + blob_offset, + blob_size, + chunks, + tags, + } + } + + /// Merge an `BlobIoDesc` into the `BlobIoRange` object. + pub fn merge(&mut self, bio: &BlobIoDesc, _max_gap: u64) { + let end = self.blob_offset + self.blob_size; + let offset = bio.chunkinfo.compressed_offset(); + let size = bio.chunkinfo.compressed_size() as u64; + let size = if end == offset { + assert!(offset.checked_add(size).is_some()); + size + } else { + assert!(offset > end); + size + (offset - end) + }; + assert!(end.checked_add(size).is_some()); + + self.blob_size += size; + self.tags.push(Self::tag_from_desc(bio)); + self.chunks.push(bio.chunkinfo.0.clone()); + } + + fn tag_from_desc(bio: &BlobIoDesc) -> BlobIoTag { + if bio.user_io { + BlobIoTag::User(BlobIoSegment::new(bio.offset, bio.size as u32)) + } else { + BlobIoTag::Internal + } + } +} + +/// Struct representing a blob data prefetching request. +/// +/// It may help to improve performance for the storage backend to prefetch data in background. +/// A `BlobPrefetchControl` object advises to prefetch data range [offset, offset + len) from +/// blob `blob_id`. The prefetch operation should be asynchronous, and cache hit for filesystem +/// read operations should validate data integrity. +pub struct BlobPrefetchRequest { + /// The ID of the blob to prefetch data for. + pub blob_id: String, + /// Offset into the blob to prefetch data. + pub offset: u64, + /// Size of data to prefetch. + pub len: u64, +} + +/// Trait to provide direct access to underlying uncompressed blob file. +/// +/// The suggested flow to make use of an `BlobObject` is as below: +/// - call `is_all_data_ready()` to check all blob data has already been cached. If true, skip +/// next step. +/// - call `fetch()` to ensure blob range [offset, offset + size) has been cached. +/// - call `as_raw_fd()` to get the underlying file descriptor for direct access. +/// - call File::read(buf, offset + `base_offset()`, size) to read data from underlying cache file. +pub trait BlobObject: AsRawFd { + /// Get base offset to read blob from the fd returned by `as_raw_fd()`. + fn base_offset(&self) -> u64; + + /// Check whether all data of the blob object is ready. + fn is_all_data_ready(&self) -> bool; + + /// Fetch data from storage backend covering compressed blob range [offset, offset + size). + /// + /// Used by asynchronous prefetch worker to implement blob prefetch. + fn fetch_range_compressed(&self, offset: u64, size: u64, prefetch: bool) -> io::Result<()>; + + /// Fetch data from storage backend and make sure data range [offset, offset + size) is ready + /// for use. + /// + /// Used by rafs to support blobfs. + fn fetch_range_uncompressed(&self, offset: u64, size: u64) -> io::Result<()>; + + /// Prefetch data for specified chunks from storage backend. + /// + /// Used by asynchronous prefetch worker to implement fs prefetch. + fn prefetch_chunks(&self, range: &BlobIoRange) -> io::Result<()>; +} + +/// A wrapping object over an underlying [BlobCache] object. +/// +/// All blob Io requests are actually served by the underlying [BlobCache] object. The wrapper +/// provides an interface to dynamically switch underlying [BlobCache] objects. +#[derive(Clone, Default)] +pub struct BlobDevice { + blobs: Arc>>>, + blob_count: usize, +} + +impl BlobDevice { + /// Create new blob device instance. + pub fn new(config: &Arc, blob_infos: &[Arc]) -> io::Result { + let mut blobs = Vec::with_capacity(blob_infos.len()); + for blob_info in blob_infos.iter() { + let blob = BLOB_FACTORY.new_blob_cache(config, blob_info)?; + blobs.push(blob); + } + + Ok(BlobDevice { + blobs: Arc::new(ArcSwap::new(Arc::new(blobs))), + blob_count: blob_infos.len(), + }) + } + + /// Update configuration and storage backends of the blob device. + /// + /// The `update()` method switch a new storage backend object according to the configuration + /// information passed in. + pub fn update( + &self, + config: &Arc, + blob_infos: &[Arc], + fs_prefetch: bool, + ) -> io::Result<()> { + if self.blobs.load().len() != blob_infos.len() { + return Err(einval!( + "number of blobs doesn't match when update 'BlobDevice' object" + )); + } + + let mut blobs = Vec::with_capacity(blob_infos.len()); + for blob_info in blob_infos.iter() { + let blob = BLOB_FACTORY.new_blob_cache(config, blob_info)?; + blobs.push(blob); + } + + if fs_prefetch { + // Stop prefetch if it is running before swapping backend since prefetch threads cloned + // Arc, the swap operation can't drop inner object completely. + // Otherwise prefetch threads will be leaked. + self.stop_prefetch(); + } + self.blobs.store(Arc::new(blobs)); + if fs_prefetch { + self.start_prefetch(); + } + + Ok(()) + } + + /// Close the blob device. + pub fn close(&self) -> io::Result<()> { + Ok(()) + } + + /// Check whether the `BlobDevice` has any blobs. + pub fn has_device(&self) -> bool { + self.blob_count > 0 + } + + /// Read a range of data from a data blob into the provided writer + pub fn read_to(&self, w: &mut dyn ZeroCopyWriter, desc: &mut BlobIoVec) -> io::Result { + // Validate that: + // - bi_vec[0] is valid + // - bi_vec[0].blob.blob_index() is valid + // - all IOs are against a single blob. + if desc.bi_vec.is_empty() { + if desc.bi_size == 0 { + Ok(0) + } else { + Err(einval!("BlobIoVec size doesn't match.")) + } + } else if desc.blob_index() as usize >= self.blob_count { + Err(einval!("BlobIoVec has out of range blob_index.")) + } else { + let size = desc.bi_size; + let mut f = BlobDeviceIoVec::new(self, desc); + // The `off` parameter to w.write_from() is actually ignored by + // BlobV5IoVec::read_vectored_at_volatile() + w.write_from(&mut f, size as usize, 0) + } + } + + /// Try to prefetch specified blob data. + pub fn prefetch( + &self, + io_vecs: &[&BlobIoVec], + prefetches: &[BlobPrefetchRequest], + ) -> io::Result<()> { + for idx in 0..prefetches.len() { + if let Some(blob) = self.get_blob_by_id(&prefetches[idx].blob_id) { + let _ = blob.prefetch(blob.clone(), &prefetches[idx..idx + 1], &[]); + } + } + + for io_vec in io_vecs.iter() { + if let Some(blob) = self.get_blob_by_iovec(io_vec) { + // Prefetch errors are ignored. + let _ = blob + .prefetch(blob.clone(), &[], &io_vec.bi_vec) + .map_err(|e| { + error!("failed to prefetch blob data, {}", e); + }); + } + } + + Ok(()) + } + + /// Start the background blob data prefetch task. + pub fn start_prefetch(&self) { + for blob in self.blobs.load().iter() { + let _ = blob.start_prefetch(); + } + } + + /// Stop the background blob data prefetch task. + pub fn stop_prefetch(&self) { + for blob in self.blobs.load().iter() { + let _ = blob.stop_prefetch(); + } + } + + /// fetch specified blob data in a synchronous way. + pub fn fetch_range_synchronous(&self, prefetches: &[BlobPrefetchRequest]) -> io::Result<()> { + for req in prefetches { + if req.len == 0 { + continue; + } + if let Some(cache) = self.get_blob_by_id(&req.blob_id) { + trace!( + "fetch blob {} offset {} size {}", + req.blob_id, + req.offset, + req.len + ); + if let Some(obj) = cache.get_blob_object() { + obj.fetch_range_uncompressed(req.offset as u64, req.len as u64) + .map_err(|e| { + warn!( + "Failed to prefetch data from blob {}, offset {}, size {}, {}", + cache.blob_id(), + req.offset, + req.len, + e + ); + e + })?; + } else { + error!("No support for fetching uncompressed blob data"); + return Err(einval!("No support for fetching uncompressed blob data")); + } + } + } + + Ok(()) + } + + /// Check all chunks related to the blob io vector are ready. + pub fn all_chunks_ready(&self, io_vecs: &[BlobIoVec]) -> bool { + for io_vec in io_vecs.iter() { + if let Some(blob) = self.get_blob_by_iovec(io_vec) { + let chunk_map = blob.get_chunk_map(); + for desc in io_vec.bi_vec.iter() { + if !chunk_map.is_ready(&desc.chunkinfo).unwrap_or(false) { + return false; + } + } + } else { + return false; + } + } + + true + } + + /// RAFS V6: create a `BlobIoChunk` for chunk with index `chunk_index`. + pub fn create_io_chunk(&self, blob_index: u32, chunk_index: u32) -> Option { + if (blob_index as usize) < self.blob_count { + let state = self.blobs.load(); + let blob = &state[blob_index as usize]; + blob.get_chunk_info(chunk_index).map(|v| v.into()) + } else { + None + } + } + + /// RAFS V6: get chunk information object for chunks. + pub fn get_chunk_info( + &self, + blob_index: u32, + chunk_index: u32, + ) -> Option> { + if (blob_index as usize) < self.blob_count { + let state = self.blobs.load(); + let blob = &state[blob_index as usize]; + blob.get_chunk_info(chunk_index) + } else { + None + } + } + + fn get_blob_by_iovec(&self, iovec: &BlobIoVec) -> Option> { + let blob_index = iovec.blob_index(); + if (blob_index as usize) < self.blob_count { + return Some(self.blobs.load()[blob_index as usize].clone()); + } + + None + } + + fn get_blob_by_id(&self, blob_id: &str) -> Option> { + for blob in self.blobs.load().iter() { + if blob.blob_id() == blob_id { + return Some(blob.clone()); + } + } + + None + } +} + +/// Struct to execute Io requests with a single blob. +/// +/// It's used to support `BlobDevice::read_to()` and acts the main entrance to read chunk data +/// from data blobs. +struct BlobDeviceIoVec<'a> { + dev: &'a BlobDevice, + iovec: &'a mut BlobIoVec, +} + +impl<'a> BlobDeviceIoVec<'a> { + fn new(dev: &'a BlobDevice, iovec: &'a mut BlobIoVec) -> Self { + BlobDeviceIoVec { dev, iovec } + } +} + +impl FileReadWriteVolatile for BlobDeviceIoVec<'_> { + fn read_volatile(&mut self, _slice: FileVolatileSlice) -> Result { + // Skip because we don't really use it + unimplemented!(); + } + + fn write_volatile(&mut self, _slice: FileVolatileSlice) -> Result { + // Skip because we don't really use it + unimplemented!(); + } + + fn read_at_volatile(&mut self, slice: FileVolatileSlice, offset: u64) -> Result { + let buffers = [slice]; + self.read_vectored_at_volatile(&buffers, offset) + } + + // The default read_vectored_at_volatile only read to the first slice, so we have to overload it. + fn read_vectored_at_volatile( + &mut self, + buffers: &[FileVolatileSlice], + _offset: u64, + ) -> Result { + // BlobDevice::read_to() has validated that all IOs are against a single blob. + let index = self.iovec.blob_index(); + let blobs = &self.dev.blobs.load(); + + if (index as usize) < blobs.len() { + blobs[index as usize].read(self.iovec, buffers) + } else { + let msg = format!( + "failed to get blob object for BlobIoVec, index {}, blob array len: {}", + index, + blobs.len() + ); + Err(einval!(msg)) + } + } + + fn write_at_volatile( + &mut self, + _slice: FileVolatileSlice, + _offset: u64, + ) -> Result { + unimplemented!() + } +} + +/// Traits and Structs to support Rafs v5 image format. +/// +/// The Rafs v5 image format is designed with fused filesystem metadata and blob management +/// metadata, which is simple to implement but also introduces inter-dependency between the +/// filesystem layer and the blob management layer. This circular dependency is hard to maintain +/// and extend. Newer Rafs image format adopts designs with independent blob management layer, +/// which could be easily used to support both fuse and virtio-fs. So Rafs v5 image specific +/// interfaces are isolated into a dedicated sub-module. +pub mod v5 { + use super::*; + + /// Trait to provide extended information for a Rafs v5 chunk. + /// + /// Rafs filesystem stores filesystem metadata in a single metadata blob, and stores file + /// content in zero or more data blobs, which are separated from the metadata blob. + /// A `BlobV5ChunkInfo` object describes how a Rafs v5 chunk is located within a data blob. + /// It is abstracted because Rafs have several ways to load metadata from metadata blob. + pub trait BlobV5ChunkInfo: BlobChunkInfo { + /// Get the chunk index in the Rafs v5 metadata's chunk info array. + fn index(&self) -> u32; + + /// Get the file offset within the Rafs file it belongs to. + fn file_offset(&self) -> u64; + + /// Get flags of the chunk. + fn flags(&self) -> BlobChunkFlags; + + /// Cast to a base [BlobChunkInfo] trait object. + fn as_base(&self) -> &dyn BlobChunkInfo; + } +} + +#[cfg(test)] +mod tests { + use std::path::PathBuf; + + use super::*; + use crate::test::MockChunkInfo; + + #[test] + fn test_blob_io_chunk() { + let chunk: Arc = Arc::new(MockChunkInfo { + block_id: Default::default(), + blob_index: 0, + flags: Default::default(), + compress_size: 0x100, + uncompress_size: 0x200, + compress_offset: 0x1000, + uncompress_offset: 0x2000, + file_offset: 0, + index: 3, + reserved: 0, + }); + let iochunk: BlobIoChunk = chunk.clone().into(); + + assert_eq!(iochunk.id(), 3); + assert_eq!(iochunk.compressed_offset(), 0x1000); + assert_eq!(iochunk.compressed_size(), 0x100); + assert_eq!(iochunk.uncompressed_offset(), 0x2000); + assert_eq!(iochunk.uncompressed_size(), 0x200); + assert!(!iochunk.is_compressed()); + } + + #[test] + fn test_chunk_is_continuous() { + let blob_info = Arc::new(BlobInfo::new( + 1, + "test1".to_owned(), + 0x200000, + 0x100000, + 0x100000, + 512, + BlobFeatures::_V5_NO_EXT_BLOB_TABLE, + )); + let chunk1 = Arc::new(MockChunkInfo { + block_id: Default::default(), + blob_index: 1, + flags: BlobChunkFlags::empty(), + compress_size: 0x800, + uncompress_size: 0x1000, + compress_offset: 0, + uncompress_offset: 0, + file_offset: 0, + index: 0, + reserved: 0, + }) as Arc; + let chunk2 = Arc::new(MockChunkInfo { + block_id: Default::default(), + blob_index: 1, + flags: BlobChunkFlags::empty(), + compress_size: 0x800, + uncompress_size: 0x1000, + compress_offset: 0x800, + uncompress_offset: 0x1000, + file_offset: 0x1000, + index: 1, + reserved: 0, + }) as Arc; + let chunk3 = Arc::new(MockChunkInfo { + block_id: Default::default(), + blob_index: 1, + flags: BlobChunkFlags::empty(), + compress_size: 0x800, + uncompress_size: 0x1000, + compress_offset: 0x1800, + uncompress_offset: 0x3000, + file_offset: 0x3000, + index: 1, + reserved: 0, + }) as Arc; + + let desc1 = BlobIoDesc { + blob: blob_info.clone(), + chunkinfo: chunk1.into(), + offset: 0, + size: 0x1000, + user_io: true, + }; + let desc2 = BlobIoDesc { + blob: blob_info.clone(), + chunkinfo: chunk2.into(), + offset: 0, + size: 0x1000, + user_io: true, + }; + let desc3 = BlobIoDesc { + blob: blob_info, + chunkinfo: chunk3.into(), + offset: 0, + size: 0x1000, + user_io: true, + }; + + assert!(desc1.is_continuous(&desc2, 0x0)); + assert!(desc1.is_continuous(&desc2, 0x1000)); + assert!(!desc2.is_continuous(&desc1, 0x1000)); + assert!(!desc2.is_continuous(&desc1, 0x0)); + + assert!(!desc1.is_continuous(&desc3, 0x0)); + assert!(!desc1.is_continuous(&desc3, 0x400)); + assert!(!desc1.is_continuous(&desc3, 0x800)); + assert!(desc1.is_continuous(&desc3, 0x1000)); + + assert!(!desc2.is_continuous(&desc3, 0x0)); + assert!(!desc2.is_continuous(&desc3, 0x400)); + assert!(desc2.is_continuous(&desc3, 0x800)); + assert!(desc2.is_continuous(&desc3, 0x1000)); + } + + #[test] + fn test_append_same_blob_with_diff_index() { + let blob1 = Arc::new(BlobInfo::new( + 1, + "test1".to_owned(), + 0x200000, + 0x100000, + 0x100000, + 512, + BlobFeatures::_V5_NO_EXT_BLOB_TABLE, + )); + let chunk1 = Arc::new(MockChunkInfo { + block_id: Default::default(), + blob_index: 1, + flags: BlobChunkFlags::empty(), + compress_size: 0x800, + uncompress_size: 0x1000, + compress_offset: 0, + uncompress_offset: 0, + file_offset: 0, + index: 0, + reserved: 0, + }) as Arc; + let mut iovec = BlobIoVec::new(blob1.clone()); + iovec.push(BlobIoDesc::new(blob1, BlobIoChunk(chunk1), 0, 0x1000, true)); + + let blob2 = Arc::new(BlobInfo::new( + 2, // different index + "test1".to_owned(), // same id + 0x200000, + 0x100000, + 0x100000, + 512, + BlobFeatures::_V5_NO_EXT_BLOB_TABLE, + )); + let chunk2 = Arc::new(MockChunkInfo { + block_id: Default::default(), + blob_index: 2, + flags: BlobChunkFlags::empty(), + compress_size: 0x800, + uncompress_size: 0x1000, + compress_offset: 0x800, + uncompress_offset: 0x1000, + file_offset: 0x1000, + index: 1, + reserved: 0, + }) as Arc; + let mut iovec2 = BlobIoVec::new(blob2.clone()); + iovec2.push(BlobIoDesc::new(blob2, BlobIoChunk(chunk2), 0, 0x1000, true)); + + iovec.append(iovec2); + assert_eq!(0x2000, iovec.bi_size); + } + + #[test] + fn test_extend_large_blob_io_vec() { + let size = 0x2_0000_0000; // 8G blob + let chunk_size = 0x10_0000; // 1M chunk + let chunk_count = (size / chunk_size as u64) as u32; + let large_blob = Arc::new(BlobInfo::new( + 0, + "blob_id".to_owned(), + size, + size, + chunk_size, + chunk_count, + BlobFeatures::default(), + )); + + let mut iovec = BlobIoVec::new(large_blob.clone()); + let mut iovec2 = BlobIoVec::new(large_blob.clone()); + + // Extend half of blob + for chunk_idx in 0..chunk_count { + let chunk = Arc::new(MockChunkInfo { + block_id: Default::default(), + blob_index: large_blob.blob_index, + flags: BlobChunkFlags::empty(), + compress_size: chunk_size, + compress_offset: chunk_idx as u64 * chunk_size as u64, + uncompress_size: 2 * chunk_size, + uncompress_offset: 2 * chunk_idx as u64 * chunk_size as u64, + file_offset: 2 * chunk_idx as u64 * chunk_size as u64, + index: chunk_idx as u32, + reserved: 0, + }) as Arc; + let desc = BlobIoDesc::new(large_blob.clone(), BlobIoChunk(chunk), 0, chunk_size, true); + if chunk_idx < chunk_count / 2 { + iovec.push(desc); + } else { + iovec2.push(desc) + } + } + + // Extend other half of blob + iovec.append(iovec2); + + assert_eq!(size, iovec.size()); + assert_eq!(chunk_count, iovec.len() as u32); + } + + #[test] + fn test_blob_info_blob_meta_id() { + let blob_info = BlobInfo::new( + 1, + "blob_id".to_owned(), + 0, + 0, + 0, + 1, + BlobFeatures::SEPARATE | BlobFeatures::INLINED_FS_META, + ); + + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let mut source_path = PathBuf::from(root_dir); + source_path.push("../tests/texture/blobs/be7d77eeb719f70884758d1aa800ed0fb09d701aaec469964e9d54325f0d5fef"); + + assert!(blob_info + .set_blob_id_from_meta_path(source_path.as_path()) + .is_ok()); + + let id = blob_info.get_blob_meta_id(); + assert!(id.is_ok()); + assert_eq!( + id.unwrap(), + "be7d77eeb719f70884758d1aa800ed0fb09d701aaec469964e9d54325f0d5fef".to_owned() + ); + } +} diff --git a/storage/src/factory.rs b/storage/src/factory.rs index ef74a129b8d..8a19580be15 100644 --- a/storage/src/factory.rs +++ b/storage/src/factory.rs @@ -1,260 +1,260 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Factory to create blob cache objects for blobs. -//! -//! The factory module provides methods to create -//! [blob cache objects](../cache/trait.BlobCache.html) for blobs. Internally it caches a group -//! of [BlobCacheMgr](../cache/trait.BlobCacheMgr.html) objects according to their -//! [ConfigV2](../../api/http/struct.ConfigV2.html). Those cached blob managers may be -//! garbage-collected! by [BlobFactory::gc()](struct.BlobFactory.html#method.gc) if not used anymore. -use std::collections::HashMap; -use std::hash::{Hash, Hasher}; -use std::io::Result as IOResult; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::{Arc, Mutex}; -use std::time::Duration; - -use lazy_static::lazy_static; -use nydus_api::{default_user_io_batch_size, BackendConfigV2, ConfigV2}; -use tokio::runtime::{Builder, Runtime}; -use tokio::time; - -#[cfg(feature = "backend-http-proxy")] -use crate::backend::http_proxy; -#[cfg(feature = "backend-localdisk")] -use crate::backend::localdisk; -#[cfg(feature = "backend-localfs")] -use crate::backend::localfs; -#[cfg(feature = "backend-oss")] -use crate::backend::oss; -#[cfg(feature = "backend-registry")] -use crate::backend::registry; -#[cfg(feature = "backend-s3")] -use crate::backend::s3; -use crate::backend::BlobBackend; -use crate::cache::{BlobCache, BlobCacheMgr, DummyCacheMgr, FileCacheMgr}; -use crate::device::BlobInfo; - -lazy_static! { - pub static ref ASYNC_RUNTIME: Arc = { - let runtime = Builder::new_multi_thread() - .worker_threads(1) // Limit the number of worker thread to 1 since this runtime is generally used to do blocking IO. - .thread_keep_alive(Duration::from_secs(10)) - .max_blocking_threads(8) - .thread_name("cache-flusher") - .enable_all() - .build(); - match runtime { - Ok(v) => Arc::new(v), - Err(e) => panic!("failed to create tokio async runtime, {}", e), - } - }; -} - -#[derive(Eq, PartialEq)] -struct BlobCacheMgrKey { - config: Arc, -} - -#[allow(clippy::derived_hash_with_manual_eq)] -impl Hash for BlobCacheMgrKey { - fn hash(&self, state: &mut H) { - self.config.id.hash(state); - if let Some(backend) = self.config.backend.as_ref() { - backend.backend_type.hash(state); - } - if let Some(cache) = self.config.cache.as_ref() { - cache.cache_type.hash(state); - cache.prefetch.hash(state); - } - } -} - -lazy_static::lazy_static! { - /// Default blob factory. - pub static ref BLOB_FACTORY: BlobFactory = BlobFactory::new(); -} - -/// Factory to create blob cache for blob objects. -pub struct BlobFactory { - mgrs: Mutex>>, - mgr_checker_active: AtomicBool, -} - -impl BlobFactory { - /// Create a new instance of blob factory object. - pub fn new() -> Self { - BlobFactory { - mgrs: Mutex::new(HashMap::new()), - mgr_checker_active: AtomicBool::new(false), - } - } - - pub fn start_mgr_checker(&self) { - if self - .mgr_checker_active - .compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed) - .is_err() - { - return; - } - ASYNC_RUNTIME.spawn(async { - let mut interval = time::interval(Duration::from_secs(5)); - loop { - interval.tick().await; - BLOB_FACTORY.check_cache_stat(); - } - }); - } - - /// Create a blob cache object for a blob with specified configuration. - pub fn new_blob_cache( - &self, - config: &Arc, - blob_info: &Arc, - ) -> IOResult> { - let backend_cfg = config.get_backend_config()?; - let cache_cfg = config.get_cache_config()?; - let user_io_batch_size = config - .get_rafs_config() - .map_or_else(|_| default_user_io_batch_size(), |v| v.user_io_batch_size) - as u32; - let key = BlobCacheMgrKey { - config: config.clone(), - }; - let mut guard = self.mgrs.lock().unwrap(); - // Use the existing blob cache manager if there's one with the same configuration. - if let Some(mgr) = guard.get(&key) { - return mgr.get_blob_cache(blob_info); - } - let backend = Self::new_backend(backend_cfg, &blob_info.blob_id())?; - let mgr = match cache_cfg.cache_type.as_str() { - "blobcache" | "filecache" => { - let mgr = FileCacheMgr::new( - cache_cfg, - backend, - ASYNC_RUNTIME.clone(), - &config.id, - user_io_batch_size, - )?; - mgr.init()?; - Arc::new(mgr) as Arc - } - #[cfg(target_os = "linux")] - "fscache" => { - let mgr = crate::cache::FsCacheMgr::new( - cache_cfg, - backend, - ASYNC_RUNTIME.clone(), - &config.id, - user_io_batch_size, - )?; - mgr.init()?; - Arc::new(mgr) as Arc - } - _ => { - let mgr = DummyCacheMgr::new(cache_cfg, backend, false)?; - mgr.init()?; - Arc::new(mgr) as Arc - } - }; - - let mgr = guard.entry(key).or_insert_with(|| mgr); - - mgr.get_blob_cache(blob_info) - } - - /// Garbage-collect unused blob cache managers and blob caches. - pub fn gc(&self, victim: Option<(&Arc, &str)>) { - let mut mgrs = Vec::new(); - - if let Some((config, id)) = victim { - let key = BlobCacheMgrKey { - config: config.clone(), - }; - let mgr = self.mgrs.lock().unwrap().get(&key).cloned(); - if let Some(mgr) = mgr { - if mgr.gc(Some(id)) { - mgrs.push((key, mgr.clone())); - } - } - } else { - for (key, mgr) in self.mgrs.lock().unwrap().iter() { - if mgr.gc(None) { - mgrs.push(( - BlobCacheMgrKey { - config: key.config.clone(), - }, - mgr.clone(), - )); - } - } - } - - for (key, mgr) in mgrs { - let mut guard = self.mgrs.lock().unwrap(); - if mgr.gc(None) { - guard.remove(&key); - } - } - } - - /// Create a storage backend for the blob with id `blob_id`. - #[allow(unused_variables)] - pub fn new_backend( - config: &BackendConfigV2, - blob_id: &str, - ) -> IOResult> { - match config.backend_type.as_str() { - #[cfg(feature = "backend-oss")] - "oss" => Ok(Arc::new(oss::Oss::new( - config.get_oss_config()?, - Some(blob_id), - )?)), - #[cfg(feature = "backend-s3")] - "s3" => Ok(Arc::new(s3::S3::new( - config.get_s3_config()?, - Some(blob_id), - )?)), - #[cfg(feature = "backend-registry")] - "registry" => Ok(Arc::new(registry::Registry::new( - config.get_registry_config()?, - Some(blob_id), - )?)), - #[cfg(feature = "backend-localfs")] - "localfs" => Ok(Arc::new(localfs::LocalFs::new( - config.get_localfs_config()?, - Some(blob_id), - )?)), - #[cfg(feature = "backend-localdisk")] - "localdisk" => Ok(Arc::new(localdisk::LocalDisk::new( - config.get_localdisk_config()?, - Some(blob_id), - )?)), - #[cfg(feature = "backend-http-proxy")] - "http-proxy" => Ok(Arc::new(http_proxy::HttpProxy::new( - config.get_http_proxy_config()?, - Some(blob_id), - )?)), - _ => Err(einval!(format!( - "unsupported backend type '{}'", - config.backend_type - ))), - } - } - - fn check_cache_stat(&self) { - let mgrs = self.mgrs.lock().unwrap(); - for (_key, mgr) in mgrs.iter() { - mgr.check_stat(); - } - } -} - -impl Default for BlobFactory { - fn default() -> Self { - Self::new() - } -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Factory to create blob cache objects for blobs. +//! +//! The factory module provides methods to create +//! [blob cache objects](../cache/trait.BlobCache.html) for blobs. Internally it caches a group +//! of [BlobCacheMgr](../cache/trait.BlobCacheMgr.html) objects according to their +//! [ConfigV2](../../api/http/struct.ConfigV2.html). Those cached blob managers may be +//! garbage-collected! by [BlobFactory::gc()](struct.BlobFactory.html#method.gc) if not used anymore. +use std::collections::HashMap; +use std::hash::{Hash, Hasher}; +use std::io::Result as IOResult; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::{Arc, Mutex}; +use std::time::Duration; + +use lazy_static::lazy_static; +use nydus_api::{default_user_io_batch_size, BackendConfigV2, ConfigV2}; +use tokio::runtime::{Builder, Runtime}; +use tokio::time; + +#[cfg(feature = "backend-http-proxy")] +use crate::backend::http_proxy; +#[cfg(feature = "backend-localdisk")] +use crate::backend::localdisk; +#[cfg(feature = "backend-localfs")] +use crate::backend::localfs; +#[cfg(feature = "backend-oss")] +use crate::backend::oss; +#[cfg(feature = "backend-registry")] +use crate::backend::registry; +#[cfg(feature = "backend-s3")] +use crate::backend::s3; +use crate::backend::BlobBackend; +use crate::cache::{BlobCache, BlobCacheMgr, DummyCacheMgr, FileCacheMgr}; +use crate::device::BlobInfo; + +lazy_static! { + pub static ref ASYNC_RUNTIME: Arc = { + let runtime = Builder::new_multi_thread() + .worker_threads(1) // Limit the number of worker thread to 1 since this runtime is generally used to do blocking IO. + .thread_keep_alive(Duration::from_secs(10)) + .max_blocking_threads(8) + .thread_name("cache-flusher") + .enable_all() + .build(); + match runtime { + Ok(v) => Arc::new(v), + Err(e) => panic!("failed to create tokio async runtime, {}", e), + } + }; +} + +#[derive(Eq, PartialEq)] +struct BlobCacheMgrKey { + config: Arc, +} + +#[allow(clippy::derived_hash_with_manual_eq)] +impl Hash for BlobCacheMgrKey { + fn hash(&self, state: &mut H) { + self.config.id.hash(state); + if let Some(backend) = self.config.backend.as_ref() { + backend.backend_type.hash(state); + } + if let Some(cache) = self.config.cache.as_ref() { + cache.cache_type.hash(state); + cache.prefetch.hash(state); + } + } +} + +lazy_static::lazy_static! { + /// Default blob factory. + pub static ref BLOB_FACTORY: BlobFactory = BlobFactory::new(); +} + +/// Factory to create blob cache for blob objects. +pub struct BlobFactory { + mgrs: Mutex>>, + mgr_checker_active: AtomicBool, +} + +impl BlobFactory { + /// Create a new instance of blob factory object. + pub fn new() -> Self { + BlobFactory { + mgrs: Mutex::new(HashMap::new()), + mgr_checker_active: AtomicBool::new(false), + } + } + + pub fn start_mgr_checker(&self) { + if self + .mgr_checker_active + .compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed) + .is_err() + { + return; + } + ASYNC_RUNTIME.spawn(async { + let mut interval = time::interval(Duration::from_secs(5)); + loop { + interval.tick().await; + BLOB_FACTORY.check_cache_stat(); + } + }); + } + + /// Create a blob cache object for a blob with specified configuration. + pub fn new_blob_cache( + &self, + config: &Arc, + blob_info: &Arc, + ) -> IOResult> { + let backend_cfg = config.get_backend_config()?; + let cache_cfg = config.get_cache_config()?; + let user_io_batch_size = config + .get_rafs_config() + .map_or_else(|_| default_user_io_batch_size(), |v| v.user_io_batch_size) + as u32; + let key = BlobCacheMgrKey { + config: config.clone(), + }; + let mut guard = self.mgrs.lock().unwrap(); + // Use the existing blob cache manager if there's one with the same configuration. + if let Some(mgr) = guard.get(&key) { + return mgr.get_blob_cache(blob_info); + } + let backend = Self::new_backend(backend_cfg, &blob_info.blob_id())?; + let mgr = match cache_cfg.cache_type.as_str() { + "blobcache" | "filecache" => { + let mgr = FileCacheMgr::new( + cache_cfg, + backend, + ASYNC_RUNTIME.clone(), + &config.id, + user_io_batch_size, + )?; + mgr.init()?; + Arc::new(mgr) as Arc + } + #[cfg(target_os = "linux")] + "fscache" => { + let mgr = crate::cache::FsCacheMgr::new( + cache_cfg, + backend, + ASYNC_RUNTIME.clone(), + &config.id, + user_io_batch_size, + )?; + mgr.init()?; + Arc::new(mgr) as Arc + } + _ => { + let mgr = DummyCacheMgr::new(cache_cfg, backend, false)?; + mgr.init()?; + Arc::new(mgr) as Arc + } + }; + + let mgr = guard.entry(key).or_insert_with(|| mgr); + + mgr.get_blob_cache(blob_info) + } + + /// Garbage-collect unused blob cache managers and blob caches. + pub fn gc(&self, victim: Option<(&Arc, &str)>) { + let mut mgrs = Vec::new(); + + if let Some((config, id)) = victim { + let key = BlobCacheMgrKey { + config: config.clone(), + }; + let mgr = self.mgrs.lock().unwrap().get(&key).cloned(); + if let Some(mgr) = mgr { + if mgr.gc(Some(id)) { + mgrs.push((key, mgr.clone())); + } + } + } else { + for (key, mgr) in self.mgrs.lock().unwrap().iter() { + if mgr.gc(None) { + mgrs.push(( + BlobCacheMgrKey { + config: key.config.clone(), + }, + mgr.clone(), + )); + } + } + } + + for (key, mgr) in mgrs { + let mut guard = self.mgrs.lock().unwrap(); + if mgr.gc(None) { + guard.remove(&key); + } + } + } + + /// Create a storage backend for the blob with id `blob_id`. + #[allow(unused_variables)] + pub fn new_backend( + config: &BackendConfigV2, + blob_id: &str, + ) -> IOResult> { + match config.backend_type.as_str() { + #[cfg(feature = "backend-oss")] + "oss" => Ok(Arc::new(oss::Oss::new( + config.get_oss_config()?, + Some(blob_id), + )?)), + #[cfg(feature = "backend-s3")] + "s3" => Ok(Arc::new(s3::S3::new( + config.get_s3_config()?, + Some(blob_id), + )?)), + #[cfg(feature = "backend-registry")] + "registry" => Ok(Arc::new(registry::Registry::new( + config.get_registry_config()?, + Some(blob_id), + )?)), + #[cfg(feature = "backend-localfs")] + "localfs" => Ok(Arc::new(localfs::LocalFs::new( + config.get_localfs_config()?, + Some(blob_id), + )?)), + #[cfg(feature = "backend-localdisk")] + "localdisk" => Ok(Arc::new(localdisk::LocalDisk::new( + config.get_localdisk_config()?, + Some(blob_id), + )?)), + #[cfg(feature = "backend-http-proxy")] + "http-proxy" => Ok(Arc::new(http_proxy::HttpProxy::new( + config.get_http_proxy_config()?, + Some(blob_id), + )?)), + _ => Err(einval!(format!( + "unsupported backend type '{}'", + config.backend_type + ))), + } + } + + fn check_cache_stat(&self) { + let mgrs = self.mgrs.lock().unwrap(); + for (_key, mgr) in mgrs.iter() { + mgr.check_stat(); + } + } +} + +impl Default for BlobFactory { + fn default() -> Self { + Self::new() + } +} diff --git a/storage/src/lib.rs b/storage/src/lib.rs index 1795c176e1f..5463635ce97 100644 --- a/storage/src/lib.rs +++ b/storage/src/lib.rs @@ -1,104 +1,104 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Chunked blob storage service to support Rafs filesystem. -//! -//! The Rafs filesystem is blob based filesystem with chunk deduplication. A Rafs filesystem is -//! composed up of a metadata blob and zero or more data blobs. A blob is just a plain object -//! storage containing data chunks. Data chunks may be compressed, encrypted and deduplicated by -//! content digest value. When Rafs file is used for container images, Rafs metadata blob contains -//! all filesystem metadatas, such as directory, file name, permission etc. Actually file contents -//! are split into chunks and stored into data blobs. Rafs may build one data blob for each -//! container image layer or build a single data blob for the whole image, according to building -//! options. -//! -//! The nydus-storage crate is used to manage and access chunked blobs for Rafs filesystem, which -//! contains three layers: -//! - [Backend](backend/index.html): access raw blob objects on remote storage backends. -//! - [Cache](cache/index.html): cache remote blob contents onto local storage in forms -//! optimized for performance. -//! - [Device](device/index.html): public APIs for chunked blobs -//! -//! There are several core abstractions provided by the public APIs: -//! - [BlobInfo](device/struct.BlobInfo.html): provides information about blobs, which is typically -//! constructed from the `blob array` in Rafs filesystem metadata. -//! - [BlobDevice](device/struct.BlobDevice.html): provides access to all blobs of a Rafs filesystem, -//! which is constructed from an array of [BlobInfo](device/struct.BlobInfo.html) objects. -//! - [BlobChunkInfo](device/trait.BlobChunkInfo.html): provides information about a data chunk, which -//! is loaded from Rafs metadata. -//! - [BlobIoDesc](device/struct.BlobIoDesc.html): a blob IO descriptor, containing information for a -//! continuous IO range within a chunk. -//! - [BlobIoVec](device/struct.BlobIoVec.html): a scatter/gather list for blob IO operation, containing -//! one or more blob IO descriptors -//! -//! To read data from the Rafs filesystem, the Rafs filesystem driver will prepare a -//! [BlobIoVec](device/struct.BlobIoVec.html) -//! object and submit it to the corresponding [BlobDevice](device/struct.BlobDevice.html) -//! object to actually execute the IO -//! operations. -#[macro_use] -extern crate log; -#[macro_use] -extern crate bitflags; -#[macro_use] -extern crate nydus_api; - -use std::fmt::{Display, Formatter}; - -pub mod backend; -pub mod cache; -pub mod device; -pub mod factory; -pub mod meta; -//pub mod remote; -#[cfg(test)] -pub(crate) mod test; -pub mod utils; - -// A helper to impl RafsChunkInfo for upper layers like Rafs different metadata mode. -#[doc(hidden)] -#[macro_export] -macro_rules! impl_getter { - ($G: ident, $F: ident, $U: ty) => { - fn $G(&self) -> $U { - self.$F - } - }; -} - -/// Default blob chunk size. -pub const RAFS_DEFAULT_CHUNK_SIZE: u64 = 1024 * 1024; -/// Maximum blob chunk size, 16MB. -pub const RAFS_MAX_CHUNK_SIZE: u64 = 1024 * 1024 * 16; -/// Maximum numbers of chunk per data blob -pub const RAFS_MAX_CHUNKS_PER_BLOB: u32 = 1u32 << 24; -/// Generate maximum gap between chunks from merging size. -pub const RAFS_BATCH_SIZE_TO_GAP_SHIFT: u64 = 7; - -/// Error codes related to storage subsystem. -#[derive(Debug)] -pub enum StorageError { - Unsupported, - Timeout, - VolatileSlice(vm_memory::VolatileMemoryError), - MemOverflow, - NotContinuous, - CacheIndex(std::io::Error), -} - -impl Display for StorageError { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match self { - StorageError::Unsupported => write!(f, "unsupported storage operation"), - StorageError::Timeout => write!(f, "timeout when reading data from storage backend"), - StorageError::MemOverflow => write!(f, "memory overflow when doing storage backend IO"), - StorageError::NotContinuous => write!(f, "address ranges are not continuous"), - StorageError::VolatileSlice(e) => write!(f, "{}", e), - StorageError::CacheIndex(e) => write!(f, "Wrong cache index {}", e), - } - } -} - -/// Specialized std::result::Result for storage subsystem. -pub type StorageResult = std::result::Result; +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Chunked blob storage service to support Rafs filesystem. +//! +//! The Rafs filesystem is blob based filesystem with chunk deduplication. A Rafs filesystem is +//! composed up of a metadata blob and zero or more data blobs. A blob is just a plain object +//! storage containing data chunks. Data chunks may be compressed, encrypted and deduplicated by +//! content digest value. When Rafs file is used for container images, Rafs metadata blob contains +//! all filesystem metadatas, such as directory, file name, permission etc. Actually file contents +//! are split into chunks and stored into data blobs. Rafs may build one data blob for each +//! container image layer or build a single data blob for the whole image, according to building +//! options. +//! +//! The nydus-storage crate is used to manage and access chunked blobs for Rafs filesystem, which +//! contains three layers: +//! - [Backend](backend/index.html): access raw blob objects on remote storage backends. +//! - [Cache](cache/index.html): cache remote blob contents onto local storage in forms +//! optimized for performance. +//! - [Device](device/index.html): public APIs for chunked blobs +//! +//! There are several core abstractions provided by the public APIs: +//! - [BlobInfo](device/struct.BlobInfo.html): provides information about blobs, which is typically +//! constructed from the `blob array` in Rafs filesystem metadata. +//! - [BlobDevice](device/struct.BlobDevice.html): provides access to all blobs of a Rafs filesystem, +//! which is constructed from an array of [BlobInfo](device/struct.BlobInfo.html) objects. +//! - [BlobChunkInfo](device/trait.BlobChunkInfo.html): provides information about a data chunk, which +//! is loaded from Rafs metadata. +//! - [BlobIoDesc](device/struct.BlobIoDesc.html): a blob IO descriptor, containing information for a +//! continuous IO range within a chunk. +//! - [BlobIoVec](device/struct.BlobIoVec.html): a scatter/gather list for blob IO operation, containing +//! one or more blob IO descriptors +//! +//! To read data from the Rafs filesystem, the Rafs filesystem driver will prepare a +//! [BlobIoVec](device/struct.BlobIoVec.html) +//! object and submit it to the corresponding [BlobDevice](device/struct.BlobDevice.html) +//! object to actually execute the IO +//! operations. +#[macro_use] +extern crate log; +#[macro_use] +extern crate bitflags; +#[macro_use] +extern crate nydus_api; + +use std::fmt::{Display, Formatter}; + +pub mod backend; +pub mod cache; +pub mod device; +pub mod factory; +pub mod meta; +//pub mod remote; +#[cfg(test)] +pub(crate) mod test; +pub mod utils; + +// A helper to impl RafsChunkInfo for upper layers like Rafs different metadata mode. +#[doc(hidden)] +#[macro_export] +macro_rules! impl_getter { + ($G: ident, $F: ident, $U: ty) => { + fn $G(&self) -> $U { + self.$F + } + }; +} + +/// Default blob chunk size. +pub const RAFS_DEFAULT_CHUNK_SIZE: u64 = 1024 * 1024; +/// Maximum blob chunk size, 16MB. +pub const RAFS_MAX_CHUNK_SIZE: u64 = 1024 * 1024 * 16; +/// Maximum numbers of chunk per data blob +pub const RAFS_MAX_CHUNKS_PER_BLOB: u32 = 1u32 << 24; +/// Generate maximum gap between chunks from merging size. +pub const RAFS_BATCH_SIZE_TO_GAP_SHIFT: u64 = 7; + +/// Error codes related to storage subsystem. +#[derive(Debug)] +pub enum StorageError { + Unsupported, + Timeout, + VolatileSlice(vm_memory::VolatileMemoryError), + MemOverflow, + NotContinuous, + CacheIndex(std::io::Error), +} + +impl Display for StorageError { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + StorageError::Unsupported => write!(f, "unsupported storage operation"), + StorageError::Timeout => write!(f, "timeout when reading data from storage backend"), + StorageError::MemOverflow => write!(f, "memory overflow when doing storage backend IO"), + StorageError::NotContinuous => write!(f, "address ranges are not continuous"), + StorageError::VolatileSlice(e) => write!(f, "{}", e), + StorageError::CacheIndex(e) => write!(f, "Wrong cache index {}", e), + } + } +} + +/// Specialized std::result::Result for storage subsystem. +pub type StorageResult = std::result::Result; diff --git a/storage/src/meta/batch.rs b/storage/src/meta/batch.rs index 828849a9291..94a91e2514d 100644 --- a/storage/src/meta/batch.rs +++ b/storage/src/meta/batch.rs @@ -1,203 +1,203 @@ -// Copyright (C) 2022 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::io::Result; -use std::mem::size_of; -use std::slice; - -use crate::meta::chunk_info_v2::BlobChunkInfoV2Ondisk; -use crate::meta::BlobMetaChunkInfo; - -/// Context information to support batch chunk. -/// Each one corresponds to a whole batch chunk containing multiple small chunks. -#[repr(C, packed)] -#[derive(Default)] -pub struct BatchInflateContext { - /// Compressed size of the whole batch chunk data. - compressed_size: u32, - /// Uncompressed size of the whole batch chunk data without 4K aligned. - uncompressed_batch_size: u32, - __reserved1: u64, - __reserved2: u64, - __reserved3: u64, - __reserved4: u64, -} - -impl BatchInflateContext { - /// Get compressed size of the whole batch chunk data. - pub fn compressed_size(&self) -> u32 { - u32::from_le(self.compressed_size) - } - - /// Set compressed size of the whole batch chunk data. - pub fn set_compressed_size(&mut self, compressed_size: u32) { - self.compressed_size = u32::to_le(compressed_size); - } - - /// Set uncompressed size of the whole batch chunk data. - pub fn set_uncompressed_batch_size(&mut self, uncompressed_batch_size: u32) { - self.uncompressed_batch_size = u32::to_le(uncompressed_batch_size); - } - - /// Get uncompressed size of the whole batch chunk data. - pub fn uncompressed_batch_size(&self) -> u32 { - u32::from_le(self.uncompressed_batch_size) - } - - /// Convert to an immutable u8 slice. - pub fn as_slice(&self) -> &[u8] { - unsafe { - slice::from_raw_parts( - self as *const BatchInflateContext as *const u8, - size_of::(), - ) - } - } -} - -/// Struct to generate [BatchInflateContext] objects for batch chunks. -pub struct BatchContextGenerator { - /// Buffering the to be dumped chunk data for Chunk Merging. - chunk_data_buf: Vec, - /// Storing all `BatchInflateContext` of current blob. - contexts: Vec, -} - -impl BatchContextGenerator { - /// Get the buffer of to be dumped chunk data for batch chunk. - pub fn chunk_data_buf(&self) -> &Vec { - &self.chunk_data_buf - } - - /// Check whether the chunk data buffer is empty. - pub fn chunk_data_buf_is_empty(&self) -> bool { - self.chunk_data_buf.is_empty() - } - - /// Get the length of chunk data buffer. - pub fn chunk_data_buf_len(&self) -> usize { - self.chunk_data_buf.len() - } - - /// Append new chunk data to the chunk data buffer. - pub fn append_chunk_data_buf(&mut self, chunk_data: &[u8]) { - self.chunk_data_buf.extend_from_slice(chunk_data); - } - - /// Clear the chunk data buffer. - pub fn clear_chunk_data_buf(&mut self) { - self.chunk_data_buf.clear(); - } - - /// Add a batch context for a dumped batch chunk. - pub fn add_context(&mut self, compressed_size: u32) { - let ctx = BatchInflateContext { - compressed_size: u32::to_le(compressed_size), - uncompressed_batch_size: u32::to_le(self.chunk_data_buf_len() as u32), - __reserved1: u64::to_le(0), - __reserved2: u64::to_le(0), - __reserved3: u64::to_le(0), - __reserved4: u64::to_le(0), - }; - self.contexts.push(ctx); - } - - /// Create a new instance of [BatchInflateContext]. - pub fn new(batch_size: u32) -> Result { - Ok(Self { - chunk_data_buf: Vec::with_capacity(batch_size as usize), - contexts: Vec::with_capacity(10240), - }) - } - - /// Generate and return a v2 chunk info struct. - pub fn generate_chunk_info( - &mut self, - compressed_offset: u64, - uncompressed_offset: u64, - uncompressed_size: u32, - encrypted: bool, - ) -> Result { - let mut chunk = BlobChunkInfoV2Ondisk::default(); - chunk.set_compressed_offset(compressed_offset); - chunk.set_compressed_size(0); - chunk.set_uncompressed_offset(uncompressed_offset); - chunk.set_uncompressed_size(uncompressed_size); - chunk.set_batch(true); - chunk.set_batch_index(self.contexts.len() as u32); - chunk.set_uncompressed_offset_in_batch_buf(self.chunk_data_buf_len() as u32); - chunk.set_compressed(true); - chunk.set_encrypted(encrypted); - - Ok(chunk) - } - - /// Convert all the batch chunk information to a u8 vector. - pub fn to_vec(&self) -> Result<(Vec, u32)> { - let mut data = Vec::new(); - - for ctx in &self.contexts { - data.extend_from_slice(ctx.as_slice()); - } - - Ok((data, self.contexts.len() as u32)) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::mem::ManuallyDrop; - - #[test] - fn test_batch_inflate_context() { - let mut ctx = BatchInflateContext { - compressed_size: 0, - uncompressed_batch_size: 0, - __reserved1: 0, - __reserved2: 0, - __reserved3: 0, - __reserved4: 0, - }; - ctx.set_compressed_size(0x20); - assert_eq!(ctx.compressed_size(), 0x20); - ctx.set_uncompressed_batch_size(0x30); - assert_eq!(ctx.uncompressed_batch_size(), 0x30); - let mut v = [0u8; 40]; - v[0] = 0x20; - v[4] = 0x30; - assert_eq!(ctx.as_slice(), v); - } - - #[test] - fn test_batch_context_generator() { - let mut generator = BatchContextGenerator::new(0x100000).unwrap(); - assert!(generator.chunk_data_buf_is_empty()); - assert_eq!(generator.chunk_data_buf_len(), 0); - - generator.append_chunk_data_buf(&[1, 2, 3, 4]); - assert!(!generator.chunk_data_buf_is_empty()); - assert_eq!(generator.chunk_data_buf_len(), 4); - - generator.add_context(4); - - let (ctx_data, _) = generator.to_vec().unwrap(); - let ctx_vec = unsafe { - ManuallyDrop::new(Vec::from_raw_parts( - ctx_data.as_slice().as_ptr() as *mut BatchInflateContext, - 1, - 1, - )) - }; - assert_eq!(ctx_vec[0].compressed_size(), 4); - assert_eq!(ctx_vec[0].uncompressed_batch_size(), 4); - - generator.clear_chunk_data_buf(); - assert!(generator.chunk_data_buf_is_empty()); - assert_eq!(generator.chunk_data_buf_len(), 0); - - let chunk_info = generator.generate_chunk_info(0, 0, 4, false).unwrap(); - assert!(chunk_info.is_batch()); - } -} +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::io::Result; +use std::mem::size_of; +use std::slice; + +use crate::meta::chunk_info_v2::BlobChunkInfoV2Ondisk; +use crate::meta::BlobMetaChunkInfo; + +/// Context information to support batch chunk. +/// Each one corresponds to a whole batch chunk containing multiple small chunks. +#[repr(C, packed)] +#[derive(Default)] +pub struct BatchInflateContext { + /// Compressed size of the whole batch chunk data. + compressed_size: u32, + /// Uncompressed size of the whole batch chunk data without 4K aligned. + uncompressed_batch_size: u32, + __reserved1: u64, + __reserved2: u64, + __reserved3: u64, + __reserved4: u64, +} + +impl BatchInflateContext { + /// Get compressed size of the whole batch chunk data. + pub fn compressed_size(&self) -> u32 { + u32::from_le(self.compressed_size) + } + + /// Set compressed size of the whole batch chunk data. + pub fn set_compressed_size(&mut self, compressed_size: u32) { + self.compressed_size = u32::to_le(compressed_size); + } + + /// Set uncompressed size of the whole batch chunk data. + pub fn set_uncompressed_batch_size(&mut self, uncompressed_batch_size: u32) { + self.uncompressed_batch_size = u32::to_le(uncompressed_batch_size); + } + + /// Get uncompressed size of the whole batch chunk data. + pub fn uncompressed_batch_size(&self) -> u32 { + u32::from_le(self.uncompressed_batch_size) + } + + /// Convert to an immutable u8 slice. + pub fn as_slice(&self) -> &[u8] { + unsafe { + slice::from_raw_parts( + self as *const BatchInflateContext as *const u8, + size_of::(), + ) + } + } +} + +/// Struct to generate [BatchInflateContext] objects for batch chunks. +pub struct BatchContextGenerator { + /// Buffering the to be dumped chunk data for Chunk Merging. + chunk_data_buf: Vec, + /// Storing all `BatchInflateContext` of current blob. + contexts: Vec, +} + +impl BatchContextGenerator { + /// Get the buffer of to be dumped chunk data for batch chunk. + pub fn chunk_data_buf(&self) -> &Vec { + &self.chunk_data_buf + } + + /// Check whether the chunk data buffer is empty. + pub fn chunk_data_buf_is_empty(&self) -> bool { + self.chunk_data_buf.is_empty() + } + + /// Get the length of chunk data buffer. + pub fn chunk_data_buf_len(&self) -> usize { + self.chunk_data_buf.len() + } + + /// Append new chunk data to the chunk data buffer. + pub fn append_chunk_data_buf(&mut self, chunk_data: &[u8]) { + self.chunk_data_buf.extend_from_slice(chunk_data); + } + + /// Clear the chunk data buffer. + pub fn clear_chunk_data_buf(&mut self) { + self.chunk_data_buf.clear(); + } + + /// Add a batch context for a dumped batch chunk. + pub fn add_context(&mut self, compressed_size: u32) { + let ctx = BatchInflateContext { + compressed_size: u32::to_le(compressed_size), + uncompressed_batch_size: u32::to_le(self.chunk_data_buf_len() as u32), + __reserved1: u64::to_le(0), + __reserved2: u64::to_le(0), + __reserved3: u64::to_le(0), + __reserved4: u64::to_le(0), + }; + self.contexts.push(ctx); + } + + /// Create a new instance of [BatchInflateContext]. + pub fn new(batch_size: u32) -> Result { + Ok(Self { + chunk_data_buf: Vec::with_capacity(batch_size as usize), + contexts: Vec::with_capacity(10240), + }) + } + + /// Generate and return a v2 chunk info struct. + pub fn generate_chunk_info( + &mut self, + compressed_offset: u64, + uncompressed_offset: u64, + uncompressed_size: u32, + encrypted: bool, + ) -> Result { + let mut chunk = BlobChunkInfoV2Ondisk::default(); + chunk.set_compressed_offset(compressed_offset); + chunk.set_compressed_size(0); + chunk.set_uncompressed_offset(uncompressed_offset); + chunk.set_uncompressed_size(uncompressed_size); + chunk.set_batch(true); + chunk.set_batch_index(self.contexts.len() as u32); + chunk.set_uncompressed_offset_in_batch_buf(self.chunk_data_buf_len() as u32); + chunk.set_compressed(true); + chunk.set_encrypted(encrypted); + + Ok(chunk) + } + + /// Convert all the batch chunk information to a u8 vector. + pub fn to_vec(&self) -> Result<(Vec, u32)> { + let mut data = Vec::new(); + + for ctx in &self.contexts { + data.extend_from_slice(ctx.as_slice()); + } + + Ok((data, self.contexts.len() as u32)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::mem::ManuallyDrop; + + #[test] + fn test_batch_inflate_context() { + let mut ctx = BatchInflateContext { + compressed_size: 0, + uncompressed_batch_size: 0, + __reserved1: 0, + __reserved2: 0, + __reserved3: 0, + __reserved4: 0, + }; + ctx.set_compressed_size(0x20); + assert_eq!(ctx.compressed_size(), 0x20); + ctx.set_uncompressed_batch_size(0x30); + assert_eq!(ctx.uncompressed_batch_size(), 0x30); + let mut v = [0u8; 40]; + v[0] = 0x20; + v[4] = 0x30; + assert_eq!(ctx.as_slice(), v); + } + + #[test] + fn test_batch_context_generator() { + let mut generator = BatchContextGenerator::new(0x100000).unwrap(); + assert!(generator.chunk_data_buf_is_empty()); + assert_eq!(generator.chunk_data_buf_len(), 0); + + generator.append_chunk_data_buf(&[1, 2, 3, 4]); + assert!(!generator.chunk_data_buf_is_empty()); + assert_eq!(generator.chunk_data_buf_len(), 4); + + generator.add_context(4); + + let (ctx_data, _) = generator.to_vec().unwrap(); + let ctx_vec = unsafe { + ManuallyDrop::new(Vec::from_raw_parts( + ctx_data.as_slice().as_ptr() as *mut BatchInflateContext, + 1, + 1, + )) + }; + assert_eq!(ctx_vec[0].compressed_size(), 4); + assert_eq!(ctx_vec[0].uncompressed_batch_size(), 4); + + generator.clear_chunk_data_buf(); + assert!(generator.chunk_data_buf_is_empty()); + assert_eq!(generator.chunk_data_buf_len(), 0); + + let chunk_info = generator.generate_chunk_info(0, 0, 4, false).unwrap(); + assert!(chunk_info.is_batch()); + } +} diff --git a/storage/src/meta/chunk_info_v1.rs b/storage/src/meta/chunk_info_v1.rs index 3d20998f32b..4001e1cad8d 100644 --- a/storage/src/meta/chunk_info_v1.rs +++ b/storage/src/meta/chunk_info_v1.rs @@ -1,476 +1,476 @@ -// Copyright (C) 2021-2022 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use crate::meta::{BlobCompressionContext, BlobMetaChunkInfo, BLOB_CCT_CHUNK_SIZE_MASK}; -use std::io::Result; - -const BLOB_CC_V1_CHUNK_COMP_OFFSET_MASK: u64 = 0xff_ffff_ffff; -const BLOB_CC_V1_CHUNK_UNCOMP_OFFSET_MASK: u64 = 0xfff_ffff_f000; -const BLOB_CC_V1_CHUNK_SIZE_LOW_MASK: u64 = 0x0f_ffff; -const BLOB_CC_V1_CHUNK_SIZE_HIGH_MASK: u64 = 0xf0_0000; -const BLOB_CC_V1_CHUNK_SIZE_LOW_SHIFT: u64 = 44; -const BLOB_CC_V1_CHUNK_SIZE_HIGH_COMP_SHIFT: u64 = 20; -const BLOB_CC_V1_CHUNK_SIZE_HIGH_UNCOMP_SHIFT: u64 = 12; - -/// Chunk compression information on disk format V1. -#[repr(C, packed)] -#[derive(Clone, Copy, Default)] -pub struct BlobChunkInfoV1Ondisk { - // 20bits: size (low), 32bits: offset, 4bits: size (high), 8bits reserved - pub(crate) uncomp_info: u64, - // 20bits: size (low), 4bits: size (high), offset: 40bits - pub(crate) comp_info: u64, -} - -impl BlobMetaChunkInfo for BlobChunkInfoV1Ondisk { - fn compressed_offset(&self) -> u64 { - u64::from_le(self.comp_info) & BLOB_CC_V1_CHUNK_COMP_OFFSET_MASK - } - - fn set_compressed_offset(&mut self, offset: u64) { - assert_eq!(offset & !BLOB_CC_V1_CHUNK_COMP_OFFSET_MASK, 0); - self.comp_info &= u64::to_le(!BLOB_CC_V1_CHUNK_COMP_OFFSET_MASK); - self.comp_info |= u64::to_le(offset & BLOB_CC_V1_CHUNK_COMP_OFFSET_MASK); - } - - fn compressed_size(&self) -> u32 { - let bit20 = u64::from_le(self.comp_info) >> BLOB_CC_V1_CHUNK_SIZE_LOW_SHIFT; - let bit4 = - (u64::from_le(self.comp_info) & 0xf0000000000) >> BLOB_CC_V1_CHUNK_SIZE_HIGH_COMP_SHIFT; - (bit4 | bit20) as u32 + 1 - } - - fn set_compressed_size(&mut self, size: u32) { - let size = size as u64; - assert!(size > 0 && size <= BLOB_CCT_CHUNK_SIZE_MASK + 1); - - let size_low = - ((size - 1) & BLOB_CC_V1_CHUNK_SIZE_LOW_MASK) << BLOB_CC_V1_CHUNK_SIZE_LOW_SHIFT; - let size_high = - ((size - 1) & BLOB_CC_V1_CHUNK_SIZE_HIGH_MASK) << BLOB_CC_V1_CHUNK_SIZE_HIGH_COMP_SHIFT; - let offset = u64::from_le(self.comp_info) & BLOB_CC_V1_CHUNK_COMP_OFFSET_MASK; - - self.comp_info = u64::to_le(size_low | size_high | offset); - } - - fn uncompressed_offset(&self) -> u64 { - u64::from_le(self.uncomp_info) & BLOB_CC_V1_CHUNK_UNCOMP_OFFSET_MASK - } - - fn set_uncompressed_offset(&mut self, offset: u64) { - assert_eq!(offset & !BLOB_CC_V1_CHUNK_UNCOMP_OFFSET_MASK, 0); - self.uncomp_info &= u64::to_le(!BLOB_CC_V1_CHUNK_UNCOMP_OFFSET_MASK); - self.uncomp_info |= u64::to_le(offset & BLOB_CC_V1_CHUNK_UNCOMP_OFFSET_MASK); - } - - fn uncompressed_size(&self) -> u32 { - let size_high = - (u64::from_le(self.uncomp_info) & 0xf00) << BLOB_CC_V1_CHUNK_SIZE_HIGH_UNCOMP_SHIFT; - let size_low = u64::from_le(self.uncomp_info) >> BLOB_CC_V1_CHUNK_SIZE_LOW_SHIFT; - (size_high | size_low) as u32 + 1 - } - - fn set_uncompressed_size(&mut self, size: u32) { - let size = size as u64; - assert!(size != 0 && size <= BLOB_CCT_CHUNK_SIZE_MASK + 1); - - let size_low = - ((size - 1) & BLOB_CC_V1_CHUNK_SIZE_LOW_MASK) << BLOB_CC_V1_CHUNK_SIZE_LOW_SHIFT; - let size_high = ((size - 1) & BLOB_CC_V1_CHUNK_SIZE_HIGH_MASK) - >> BLOB_CC_V1_CHUNK_SIZE_HIGH_UNCOMP_SHIFT; - let offset = u64::from_le(self.uncomp_info) & BLOB_CC_V1_CHUNK_UNCOMP_OFFSET_MASK; - - self.uncomp_info = u64::to_le(size_low | offset | size_high); - } - - fn is_encrypted(&self) -> bool { - false - } - - fn is_compressed(&self) -> bool { - self.compressed_size() != self.uncompressed_size() - } - - fn is_zran(&self) -> bool { - false - } - - fn is_batch(&self) -> bool { - false - } - - fn get_zran_index(&self) -> Result { - unimplemented!() - } - - fn get_zran_offset(&self) -> Result { - unimplemented!() - } - - fn get_batch_index(&self) -> Result { - unimplemented!() - } - - fn get_uncompressed_offset_in_batch_buf(&self) -> Result { - unimplemented!() - } - - fn get_data(&self) -> u64 { - 0 - } - - fn validate(&self, state: &BlobCompressionContext) -> Result<()> { - if self.compressed_end() > state.compressed_size - || self.uncompressed_end() > state.uncompressed_size - || self.uncompressed_size() == 0 - || (!self.is_compressed() && self.uncompressed_size() != self.compressed_size()) - { - return Err(einval!(format!( - "invalid chunk, blob: index {}/c_end 0x{:}/d_end 0x{:x}, chunk: c_end 0x{:x}/d_end 0x{:x}/compressed {}", - state.blob_index, - state.compressed_size, - state.uncompressed_size, - self.compressed_end(), - self.uncompressed_end(), - self.is_compressed(), - ))); - } - - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use std::fs::OpenOptions; - use std::io::Write; - use std::mem::ManuallyDrop; - use std::sync::Arc; - - use nydus_utils::compress; - use nydus_utils::metrics::BackendMetrics; - use vmm_sys_util::tempfile::TempFile; - - use super::*; - use crate::backend::BlobReader; - use crate::device::{BlobFeatures, BlobInfo}; - use crate::meta::tests::DummyBlobReader; - use crate::meta::{ - round_up_4k, BlobCompressionContext, BlobCompressionContextHeader, - BlobCompressionContextInfo, BlobMetaChunkArray, - }; - use crate::utils::alloc_buf; - use crate::RAFS_MAX_CHUNK_SIZE; - - #[test] - fn test_new_chunk_on_disk() { - let mut chunk = BlobChunkInfoV1Ondisk::default(); - - assert_eq!(chunk.compressed_offset(), 0); - assert_eq!(chunk.compressed_size(), 1); - assert_eq!(chunk.compressed_end(), 1); - assert_eq!(chunk.uncompressed_offset(), 0); - assert_eq!(chunk.uncompressed_size(), 1); - assert_eq!(chunk.aligned_uncompressed_end(), 0x1000); - - chunk.set_compressed_offset(0x1000); - chunk.set_compressed_size(0x100); - assert_eq!(chunk.compressed_offset(), 0x1000); - assert_eq!(chunk.compressed_size(), 0x100); - - chunk.set_uncompressed_offset(0x1000); - chunk.set_uncompressed_size(0x100); - assert_eq!(chunk.uncompressed_offset(), 0x1000); - assert_eq!(chunk.uncompressed_size(), 0x100); - - chunk.set_compressed_offset(0xffffffffff); - chunk.set_compressed_size(0x1000000); - assert_eq!(chunk.compressed_offset(), 0xffffffffff); - assert_eq!(chunk.compressed_size(), 0x1000000); - - chunk.set_uncompressed_offset(0xffffffff000); - chunk.set_uncompressed_size(0x1000000); - assert_eq!(chunk.uncompressed_offset(), 0xffffffff000); - assert_eq!(chunk.uncompressed_size(), 0x1000000); - - // For testing old format compatibility. - let chunk = BlobChunkInfoV1Ondisk { - uncomp_info: u64::to_le(0xffff_ffff_f100_0000), - comp_info: u64::to_le(0xffff_f0ff_ffff_ffff), - }; - assert_eq!(chunk.uncompressed_size(), 0x000f_ffff + 1); - assert_eq!(chunk.uncompressed_offset(), 0xffff_1000 * 0x1000); - assert_eq!(chunk.compressed_size(), 0x000f_ffff + 1); - assert_eq!(chunk.compressed_offset(), 0x00ff_ffff_ffff); - } - - #[test] - fn test_get_chunk_index_with_hole() { - let state = BlobCompressionContext { - chunk_info_array: ManuallyDrop::new(BlobMetaChunkArray::V1(vec![ - BlobChunkInfoV1Ondisk { - uncomp_info: u64::to_le(0x01ff_f000_0000_0000), - comp_info: u64::to_le(0x00ff_f000_0000_0000), - }, - BlobChunkInfoV1Ondisk { - uncomp_info: u64::to_le(0x01ff_f000_0010_0000), - comp_info: u64::to_le(0x00ff_f000_0010_0000), - }, - ])), - ..Default::default() - }; - - assert_eq!( - state - .chunk_info_array - .get_chunk_index_nocheck(&state, 0, false) - .unwrap(), - 0 - ); - assert_eq!( - state - .chunk_info_array - .get_chunk_index_nocheck(&state, 0x1fff, false) - .unwrap(), - 0 - ); - assert_eq!( - state - .chunk_info_array - .get_chunk_index_nocheck(&state, 0x100000, false) - .unwrap(), - 1 - ); - assert_eq!( - state - .chunk_info_array - .get_chunk_index_nocheck(&state, 0x101fff, false) - .unwrap(), - 1 - ); - state - .chunk_info_array - .get_chunk_index_nocheck(&state, 0x2000, false) - .unwrap_err(); - state - .chunk_info_array - .get_chunk_index_nocheck(&state, 0xfffff, false) - .unwrap_err(); - state - .chunk_info_array - .get_chunk_index_nocheck(&state, 0x102000, false) - .unwrap_err(); - } - - #[test] - fn test_get_chunks() { - let state = BlobCompressionContext { - blob_index: 1, - blob_features: 0, - compressed_size: 0x6001, - uncompressed_size: 0x102001, - chunk_info_array: ManuallyDrop::new(BlobMetaChunkArray::V1(vec![ - BlobChunkInfoV1Ondisk { - uncomp_info: u64::to_le(0x0100_0000_0000_0000), - comp_info: u64::to_le(0x00ff_f000_0000_0000), - }, - BlobChunkInfoV1Ondisk { - uncomp_info: u64::to_le(0x01ff_f000_0000_2000), - comp_info: u64::to_le(0x01ff_f000_0000_1000), - }, - BlobChunkInfoV1Ondisk { - uncomp_info: u64::to_le(0x01ff_f000_0000_4000), - comp_info: u64::to_le(0x00ff_f000_0000_3000), - }, - BlobChunkInfoV1Ondisk { - uncomp_info: u64::to_le(0x01ff_f000_0010_0000), - comp_info: u64::to_le(0x00ff_f000_0000_4000), - }, - BlobChunkInfoV1Ondisk { - uncomp_info: u64::to_le(0x01ff_f000_0010_2000), - comp_info: u64::to_le(0x00ff_f000_0000_5000), - }, - ])), - ..Default::default() - }; - let info = BlobCompressionContextInfo { - state: Arc::new(state), - }; - - let vec = info.get_chunks_uncompressed(0x0, 0x1001, 0).unwrap(); - assert_eq!(vec.len(), 1); - assert_eq!(vec[0].blob_index(), 1); - assert_eq!(vec[0].id(), 0); - assert_eq!(vec[0].compressed_offset(), 0); - assert_eq!(vec[0].compressed_size(), 0x1000); - assert_eq!(vec[0].uncompressed_offset(), 0); - assert_eq!(vec[0].uncompressed_size(), 0x1001); - assert!(vec[0].is_compressed()); - - let vec = info.get_chunks_uncompressed(0x0, 0x4000, 0).unwrap(); - assert_eq!(vec.len(), 2); - assert_eq!(vec[1].blob_index(), 1); - assert_eq!(vec[1].id(), 1); - assert_eq!(vec[1].compressed_offset(), 0x1000); - assert_eq!(vec[1].compressed_size(), 0x2000); - assert_eq!(vec[1].uncompressed_offset(), 0x2000); - assert_eq!(vec[1].uncompressed_size(), 0x2000); - assert!(!vec[1].is_compressed()); - - let vec = info.get_chunks_uncompressed(0x0, 0x4001, 0).unwrap(); - assert_eq!(vec.len(), 3); - - let vec = info.get_chunks_uncompressed(0x100000, 0x2000, 0).unwrap(); - assert_eq!(vec.len(), 1); - - assert!(info.get_chunks_uncompressed(0x0, 0x6001, 0).is_err()); - assert!(info.get_chunks_uncompressed(0x0, 0xfffff, 0).is_err()); - assert!(info.get_chunks_uncompressed(0x0, 0x100000, 0).is_err()); - assert!(info.get_chunks_uncompressed(0x0, 0x104000, 0).is_err()); - assert!(info.get_chunks_uncompressed(0x0, 0x104001, 0).is_err()); - assert!(info.get_chunks_uncompressed(0x100000, 0x2001, 0).is_err()); - assert!(info.get_chunks_uncompressed(0x100000, 0x4000, 0).is_err()); - assert!(info.get_chunks_uncompressed(0x100000, 0x4001, 0).is_err()); - assert!(info - .get_chunks_uncompressed(0x102000, 0xffff_ffff_ffff_ffff, 0) - .is_err()); - assert!(info.get_chunks_uncompressed(0x104000, 0x1, 0).is_err()); - } - - #[test] - fn test_read_metadata_compressor_none() { - let temp = TempFile::new().unwrap(); - let mut w = OpenOptions::new() - .read(true) - .write(true) - .open(temp.as_path()) - .unwrap(); - let r = OpenOptions::new() - .read(true) - .write(false) - .open(temp.as_path()) - .unwrap(); - - let chunks = [ - BlobChunkInfoV1Ondisk { - uncomp_info: 0x01ff_f000_0000_0000, - comp_info: 0x00ff_f000_0000_0000, - }, - BlobChunkInfoV1Ondisk { - uncomp_info: 0x01ff_f000_0010_0000, - comp_info: 0x00ff_f000_0010_0000, - }, - ]; - - let data = unsafe { - std::slice::from_raw_parts( - chunks.as_ptr() as *const u8, - chunks.len() * std::mem::size_of::(), - ) - }; - let uncompressed_size = data.len(); - - let pos = 0; - w.write_all(data).unwrap(); - let header = BlobCompressionContextHeader::default(); - w.write_all(header.as_bytes()).unwrap(); - - let mut blob_info = BlobInfo::new( - 0, - "dummy".to_string(), - 0, - 0, - RAFS_MAX_CHUNK_SIZE as u32, - 0, - BlobFeatures::default(), - ); - blob_info.set_blob_meta_info( - pos, - data.len() as u64, - data.len() as u64, - compress::Algorithm::None as u32, - ); - - let mut buffer = alloc_buf( - round_up_4k(uncompressed_size) + std::mem::size_of::(), - ); - let reader: Arc = Arc::new(DummyBlobReader { - metrics: BackendMetrics::new("dummy", "localfs"), - file: r, - }); - BlobCompressionContextInfo::read_metadata(&blob_info, &reader, &mut buffer).unwrap(); - - assert_eq!(&buffer[0..data.len()], data); - } - - #[test] - fn test_read_metadata_compressor_lz4() { - let temp = TempFile::new().unwrap(); - let mut w = OpenOptions::new() - .read(true) - .write(true) - .open(temp.as_path()) - .unwrap(); - let r = OpenOptions::new() - .read(true) - .write(false) - .open(temp.as_path()) - .unwrap(); - - let chunks = [ - BlobChunkInfoV1Ondisk { - uncomp_info: 0x01ff_f000_0000_0000, - comp_info: 0x00ff_f000_0000_0000, - }, - BlobChunkInfoV1Ondisk { - uncomp_info: 0x01ff_f000_0010_0000, - comp_info: 0x00ff_f000_0010_0000, - }, - ]; - - let data = unsafe { - std::slice::from_raw_parts( - chunks.as_ptr() as *const u8, - chunks.len() * std::mem::size_of::(), - ) - }; - - let (buf, compressed) = compress::compress(data, compress::Algorithm::Lz4Block).unwrap(); - assert!(compressed); - - let pos = 0; - w.write_all(&buf).unwrap(); - let header = BlobCompressionContextHeader::default(); - w.write_all(header.as_bytes()).unwrap(); - - let compressed_size = buf.len(); - let uncompressed_size = data.len(); - let mut blob_info = BlobInfo::new( - 0, - "dummy".to_string(), - 0, - 0, - RAFS_MAX_CHUNK_SIZE as u32, - 0, - BlobFeatures::default(), - ); - blob_info.set_blob_meta_info( - pos, - compressed_size as u64, - uncompressed_size as u64, - compress::Algorithm::Lz4Block as u32, - ); - - let mut buffer = alloc_buf( - round_up_4k(uncompressed_size) + std::mem::size_of::(), - ); - let reader: Arc = Arc::new(DummyBlobReader { - metrics: BackendMetrics::new("dummy", "localfs"), - file: r, - }); - BlobCompressionContextInfo::read_metadata(&blob_info, &reader, &mut buffer).unwrap(); - - assert_eq!(&buffer[0..uncompressed_size], data); - } -} +// Copyright (C) 2021-2022 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use crate::meta::{BlobCompressionContext, BlobMetaChunkInfo, BLOB_CCT_CHUNK_SIZE_MASK}; +use std::io::Result; + +const BLOB_CC_V1_CHUNK_COMP_OFFSET_MASK: u64 = 0xff_ffff_ffff; +const BLOB_CC_V1_CHUNK_UNCOMP_OFFSET_MASK: u64 = 0xfff_ffff_f000; +const BLOB_CC_V1_CHUNK_SIZE_LOW_MASK: u64 = 0x0f_ffff; +const BLOB_CC_V1_CHUNK_SIZE_HIGH_MASK: u64 = 0xf0_0000; +const BLOB_CC_V1_CHUNK_SIZE_LOW_SHIFT: u64 = 44; +const BLOB_CC_V1_CHUNK_SIZE_HIGH_COMP_SHIFT: u64 = 20; +const BLOB_CC_V1_CHUNK_SIZE_HIGH_UNCOMP_SHIFT: u64 = 12; + +/// Chunk compression information on disk format V1. +#[repr(C, packed)] +#[derive(Clone, Copy, Default)] +pub struct BlobChunkInfoV1Ondisk { + // 20bits: size (low), 32bits: offset, 4bits: size (high), 8bits reserved + pub(crate) uncomp_info: u64, + // 20bits: size (low), 4bits: size (high), offset: 40bits + pub(crate) comp_info: u64, +} + +impl BlobMetaChunkInfo for BlobChunkInfoV1Ondisk { + fn compressed_offset(&self) -> u64 { + u64::from_le(self.comp_info) & BLOB_CC_V1_CHUNK_COMP_OFFSET_MASK + } + + fn set_compressed_offset(&mut self, offset: u64) { + assert_eq!(offset & !BLOB_CC_V1_CHUNK_COMP_OFFSET_MASK, 0); + self.comp_info &= u64::to_le(!BLOB_CC_V1_CHUNK_COMP_OFFSET_MASK); + self.comp_info |= u64::to_le(offset & BLOB_CC_V1_CHUNK_COMP_OFFSET_MASK); + } + + fn compressed_size(&self) -> u32 { + let bit20 = u64::from_le(self.comp_info) >> BLOB_CC_V1_CHUNK_SIZE_LOW_SHIFT; + let bit4 = + (u64::from_le(self.comp_info) & 0xf0000000000) >> BLOB_CC_V1_CHUNK_SIZE_HIGH_COMP_SHIFT; + (bit4 | bit20) as u32 + 1 + } + + fn set_compressed_size(&mut self, size: u32) { + let size = size as u64; + assert!(size > 0 && size <= BLOB_CCT_CHUNK_SIZE_MASK + 1); + + let size_low = + ((size - 1) & BLOB_CC_V1_CHUNK_SIZE_LOW_MASK) << BLOB_CC_V1_CHUNK_SIZE_LOW_SHIFT; + let size_high = + ((size - 1) & BLOB_CC_V1_CHUNK_SIZE_HIGH_MASK) << BLOB_CC_V1_CHUNK_SIZE_HIGH_COMP_SHIFT; + let offset = u64::from_le(self.comp_info) & BLOB_CC_V1_CHUNK_COMP_OFFSET_MASK; + + self.comp_info = u64::to_le(size_low | size_high | offset); + } + + fn uncompressed_offset(&self) -> u64 { + u64::from_le(self.uncomp_info) & BLOB_CC_V1_CHUNK_UNCOMP_OFFSET_MASK + } + + fn set_uncompressed_offset(&mut self, offset: u64) { + assert_eq!(offset & !BLOB_CC_V1_CHUNK_UNCOMP_OFFSET_MASK, 0); + self.uncomp_info &= u64::to_le(!BLOB_CC_V1_CHUNK_UNCOMP_OFFSET_MASK); + self.uncomp_info |= u64::to_le(offset & BLOB_CC_V1_CHUNK_UNCOMP_OFFSET_MASK); + } + + fn uncompressed_size(&self) -> u32 { + let size_high = + (u64::from_le(self.uncomp_info) & 0xf00) << BLOB_CC_V1_CHUNK_SIZE_HIGH_UNCOMP_SHIFT; + let size_low = u64::from_le(self.uncomp_info) >> BLOB_CC_V1_CHUNK_SIZE_LOW_SHIFT; + (size_high | size_low) as u32 + 1 + } + + fn set_uncompressed_size(&mut self, size: u32) { + let size = size as u64; + assert!(size != 0 && size <= BLOB_CCT_CHUNK_SIZE_MASK + 1); + + let size_low = + ((size - 1) & BLOB_CC_V1_CHUNK_SIZE_LOW_MASK) << BLOB_CC_V1_CHUNK_SIZE_LOW_SHIFT; + let size_high = ((size - 1) & BLOB_CC_V1_CHUNK_SIZE_HIGH_MASK) + >> BLOB_CC_V1_CHUNK_SIZE_HIGH_UNCOMP_SHIFT; + let offset = u64::from_le(self.uncomp_info) & BLOB_CC_V1_CHUNK_UNCOMP_OFFSET_MASK; + + self.uncomp_info = u64::to_le(size_low | offset | size_high); + } + + fn is_encrypted(&self) -> bool { + false + } + + fn is_compressed(&self) -> bool { + self.compressed_size() != self.uncompressed_size() + } + + fn is_zran(&self) -> bool { + false + } + + fn is_batch(&self) -> bool { + false + } + + fn get_zran_index(&self) -> Result { + unimplemented!() + } + + fn get_zran_offset(&self) -> Result { + unimplemented!() + } + + fn get_batch_index(&self) -> Result { + unimplemented!() + } + + fn get_uncompressed_offset_in_batch_buf(&self) -> Result { + unimplemented!() + } + + fn get_data(&self) -> u64 { + 0 + } + + fn validate(&self, state: &BlobCompressionContext) -> Result<()> { + if self.compressed_end() > state.compressed_size + || self.uncompressed_end() > state.uncompressed_size + || self.uncompressed_size() == 0 + || (!self.is_compressed() && self.uncompressed_size() != self.compressed_size()) + { + return Err(einval!(format!( + "invalid chunk, blob: index {}/c_end 0x{:}/d_end 0x{:x}, chunk: c_end 0x{:x}/d_end 0x{:x}/compressed {}", + state.blob_index, + state.compressed_size, + state.uncompressed_size, + self.compressed_end(), + self.uncompressed_end(), + self.is_compressed(), + ))); + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use std::fs::OpenOptions; + use std::io::Write; + use std::mem::ManuallyDrop; + use std::sync::Arc; + + use nydus_utils::compress; + use nydus_utils::metrics::BackendMetrics; + use vmm_sys_util::tempfile::TempFile; + + use super::*; + use crate::backend::BlobReader; + use crate::device::{BlobFeatures, BlobInfo}; + use crate::meta::tests::DummyBlobReader; + use crate::meta::{ + round_up_4k, BlobCompressionContext, BlobCompressionContextHeader, + BlobCompressionContextInfo, BlobMetaChunkArray, + }; + use crate::utils::alloc_buf; + use crate::RAFS_MAX_CHUNK_SIZE; + + #[test] + fn test_new_chunk_on_disk() { + let mut chunk = BlobChunkInfoV1Ondisk::default(); + + assert_eq!(chunk.compressed_offset(), 0); + assert_eq!(chunk.compressed_size(), 1); + assert_eq!(chunk.compressed_end(), 1); + assert_eq!(chunk.uncompressed_offset(), 0); + assert_eq!(chunk.uncompressed_size(), 1); + assert_eq!(chunk.aligned_uncompressed_end(), 0x1000); + + chunk.set_compressed_offset(0x1000); + chunk.set_compressed_size(0x100); + assert_eq!(chunk.compressed_offset(), 0x1000); + assert_eq!(chunk.compressed_size(), 0x100); + + chunk.set_uncompressed_offset(0x1000); + chunk.set_uncompressed_size(0x100); + assert_eq!(chunk.uncompressed_offset(), 0x1000); + assert_eq!(chunk.uncompressed_size(), 0x100); + + chunk.set_compressed_offset(0xffffffffff); + chunk.set_compressed_size(0x1000000); + assert_eq!(chunk.compressed_offset(), 0xffffffffff); + assert_eq!(chunk.compressed_size(), 0x1000000); + + chunk.set_uncompressed_offset(0xffffffff000); + chunk.set_uncompressed_size(0x1000000); + assert_eq!(chunk.uncompressed_offset(), 0xffffffff000); + assert_eq!(chunk.uncompressed_size(), 0x1000000); + + // For testing old format compatibility. + let chunk = BlobChunkInfoV1Ondisk { + uncomp_info: u64::to_le(0xffff_ffff_f100_0000), + comp_info: u64::to_le(0xffff_f0ff_ffff_ffff), + }; + assert_eq!(chunk.uncompressed_size(), 0x000f_ffff + 1); + assert_eq!(chunk.uncompressed_offset(), 0xffff_1000 * 0x1000); + assert_eq!(chunk.compressed_size(), 0x000f_ffff + 1); + assert_eq!(chunk.compressed_offset(), 0x00ff_ffff_ffff); + } + + #[test] + fn test_get_chunk_index_with_hole() { + let state = BlobCompressionContext { + chunk_info_array: ManuallyDrop::new(BlobMetaChunkArray::V1(vec![ + BlobChunkInfoV1Ondisk { + uncomp_info: u64::to_le(0x01ff_f000_0000_0000), + comp_info: u64::to_le(0x00ff_f000_0000_0000), + }, + BlobChunkInfoV1Ondisk { + uncomp_info: u64::to_le(0x01ff_f000_0010_0000), + comp_info: u64::to_le(0x00ff_f000_0010_0000), + }, + ])), + ..Default::default() + }; + + assert_eq!( + state + .chunk_info_array + .get_chunk_index_nocheck(&state, 0, false) + .unwrap(), + 0 + ); + assert_eq!( + state + .chunk_info_array + .get_chunk_index_nocheck(&state, 0x1fff, false) + .unwrap(), + 0 + ); + assert_eq!( + state + .chunk_info_array + .get_chunk_index_nocheck(&state, 0x100000, false) + .unwrap(), + 1 + ); + assert_eq!( + state + .chunk_info_array + .get_chunk_index_nocheck(&state, 0x101fff, false) + .unwrap(), + 1 + ); + state + .chunk_info_array + .get_chunk_index_nocheck(&state, 0x2000, false) + .unwrap_err(); + state + .chunk_info_array + .get_chunk_index_nocheck(&state, 0xfffff, false) + .unwrap_err(); + state + .chunk_info_array + .get_chunk_index_nocheck(&state, 0x102000, false) + .unwrap_err(); + } + + #[test] + fn test_get_chunks() { + let state = BlobCompressionContext { + blob_index: 1, + blob_features: 0, + compressed_size: 0x6001, + uncompressed_size: 0x102001, + chunk_info_array: ManuallyDrop::new(BlobMetaChunkArray::V1(vec![ + BlobChunkInfoV1Ondisk { + uncomp_info: u64::to_le(0x0100_0000_0000_0000), + comp_info: u64::to_le(0x00ff_f000_0000_0000), + }, + BlobChunkInfoV1Ondisk { + uncomp_info: u64::to_le(0x01ff_f000_0000_2000), + comp_info: u64::to_le(0x01ff_f000_0000_1000), + }, + BlobChunkInfoV1Ondisk { + uncomp_info: u64::to_le(0x01ff_f000_0000_4000), + comp_info: u64::to_le(0x00ff_f000_0000_3000), + }, + BlobChunkInfoV1Ondisk { + uncomp_info: u64::to_le(0x01ff_f000_0010_0000), + comp_info: u64::to_le(0x00ff_f000_0000_4000), + }, + BlobChunkInfoV1Ondisk { + uncomp_info: u64::to_le(0x01ff_f000_0010_2000), + comp_info: u64::to_le(0x00ff_f000_0000_5000), + }, + ])), + ..Default::default() + }; + let info = BlobCompressionContextInfo { + state: Arc::new(state), + }; + + let vec = info.get_chunks_uncompressed(0x0, 0x1001, 0).unwrap(); + assert_eq!(vec.len(), 1); + assert_eq!(vec[0].blob_index(), 1); + assert_eq!(vec[0].id(), 0); + assert_eq!(vec[0].compressed_offset(), 0); + assert_eq!(vec[0].compressed_size(), 0x1000); + assert_eq!(vec[0].uncompressed_offset(), 0); + assert_eq!(vec[0].uncompressed_size(), 0x1001); + assert!(vec[0].is_compressed()); + + let vec = info.get_chunks_uncompressed(0x0, 0x4000, 0).unwrap(); + assert_eq!(vec.len(), 2); + assert_eq!(vec[1].blob_index(), 1); + assert_eq!(vec[1].id(), 1); + assert_eq!(vec[1].compressed_offset(), 0x1000); + assert_eq!(vec[1].compressed_size(), 0x2000); + assert_eq!(vec[1].uncompressed_offset(), 0x2000); + assert_eq!(vec[1].uncompressed_size(), 0x2000); + assert!(!vec[1].is_compressed()); + + let vec = info.get_chunks_uncompressed(0x0, 0x4001, 0).unwrap(); + assert_eq!(vec.len(), 3); + + let vec = info.get_chunks_uncompressed(0x100000, 0x2000, 0).unwrap(); + assert_eq!(vec.len(), 1); + + assert!(info.get_chunks_uncompressed(0x0, 0x6001, 0).is_err()); + assert!(info.get_chunks_uncompressed(0x0, 0xfffff, 0).is_err()); + assert!(info.get_chunks_uncompressed(0x0, 0x100000, 0).is_err()); + assert!(info.get_chunks_uncompressed(0x0, 0x104000, 0).is_err()); + assert!(info.get_chunks_uncompressed(0x0, 0x104001, 0).is_err()); + assert!(info.get_chunks_uncompressed(0x100000, 0x2001, 0).is_err()); + assert!(info.get_chunks_uncompressed(0x100000, 0x4000, 0).is_err()); + assert!(info.get_chunks_uncompressed(0x100000, 0x4001, 0).is_err()); + assert!(info + .get_chunks_uncompressed(0x102000, 0xffff_ffff_ffff_ffff, 0) + .is_err()); + assert!(info.get_chunks_uncompressed(0x104000, 0x1, 0).is_err()); + } + + #[test] + fn test_read_metadata_compressor_none() { + let temp = TempFile::new().unwrap(); + let mut w = OpenOptions::new() + .read(true) + .write(true) + .open(temp.as_path()) + .unwrap(); + let r = OpenOptions::new() + .read(true) + .write(false) + .open(temp.as_path()) + .unwrap(); + + let chunks = [ + BlobChunkInfoV1Ondisk { + uncomp_info: 0x01ff_f000_0000_0000, + comp_info: 0x00ff_f000_0000_0000, + }, + BlobChunkInfoV1Ondisk { + uncomp_info: 0x01ff_f000_0010_0000, + comp_info: 0x00ff_f000_0010_0000, + }, + ]; + + let data = unsafe { + std::slice::from_raw_parts( + chunks.as_ptr() as *const u8, + chunks.len() * std::mem::size_of::(), + ) + }; + let uncompressed_size = data.len(); + + let pos = 0; + w.write_all(data).unwrap(); + let header = BlobCompressionContextHeader::default(); + w.write_all(header.as_bytes()).unwrap(); + + let mut blob_info = BlobInfo::new( + 0, + "dummy".to_string(), + 0, + 0, + RAFS_MAX_CHUNK_SIZE as u32, + 0, + BlobFeatures::default(), + ); + blob_info.set_blob_meta_info( + pos, + data.len() as u64, + data.len() as u64, + compress::Algorithm::None as u32, + ); + + let mut buffer = alloc_buf( + round_up_4k(uncompressed_size) + std::mem::size_of::(), + ); + let reader: Arc = Arc::new(DummyBlobReader { + metrics: BackendMetrics::new("dummy", "localfs"), + file: r, + }); + BlobCompressionContextInfo::read_metadata(&blob_info, &reader, &mut buffer).unwrap(); + + assert_eq!(&buffer[0..data.len()], data); + } + + #[test] + fn test_read_metadata_compressor_lz4() { + let temp = TempFile::new().unwrap(); + let mut w = OpenOptions::new() + .read(true) + .write(true) + .open(temp.as_path()) + .unwrap(); + let r = OpenOptions::new() + .read(true) + .write(false) + .open(temp.as_path()) + .unwrap(); + + let chunks = [ + BlobChunkInfoV1Ondisk { + uncomp_info: 0x01ff_f000_0000_0000, + comp_info: 0x00ff_f000_0000_0000, + }, + BlobChunkInfoV1Ondisk { + uncomp_info: 0x01ff_f000_0010_0000, + comp_info: 0x00ff_f000_0010_0000, + }, + ]; + + let data = unsafe { + std::slice::from_raw_parts( + chunks.as_ptr() as *const u8, + chunks.len() * std::mem::size_of::(), + ) + }; + + let (buf, compressed) = compress::compress(data, compress::Algorithm::Lz4Block).unwrap(); + assert!(compressed); + + let pos = 0; + w.write_all(&buf).unwrap(); + let header = BlobCompressionContextHeader::default(); + w.write_all(header.as_bytes()).unwrap(); + + let compressed_size = buf.len(); + let uncompressed_size = data.len(); + let mut blob_info = BlobInfo::new( + 0, + "dummy".to_string(), + 0, + 0, + RAFS_MAX_CHUNK_SIZE as u32, + 0, + BlobFeatures::default(), + ); + blob_info.set_blob_meta_info( + pos, + compressed_size as u64, + uncompressed_size as u64, + compress::Algorithm::Lz4Block as u32, + ); + + let mut buffer = alloc_buf( + round_up_4k(uncompressed_size) + std::mem::size_of::(), + ); + let reader: Arc = Arc::new(DummyBlobReader { + metrics: BackendMetrics::new("dummy", "localfs"), + file: r, + }); + BlobCompressionContextInfo::read_metadata(&blob_info, &reader, &mut buffer).unwrap(); + + assert_eq!(&buffer[0..uncompressed_size], data); + } +} diff --git a/storage/src/meta/chunk_info_v2.rs b/storage/src/meta/chunk_info_v2.rs index 913af3243cf..dbbb9a9cddf 100644 --- a/storage/src/meta/chunk_info_v2.rs +++ b/storage/src/meta/chunk_info_v2.rs @@ -1,506 +1,506 @@ -// Copyright (C) 2022 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::fmt::{Display, Formatter}; -use std::io::{Error, ErrorKind, Result}; - -use crate::device::BlobFeatures; -use crate::meta::{BlobCompressionContext, BlobMetaChunkInfo, BLOB_CCT_CHUNK_SIZE_MASK}; - -const CHUNK_V2_COMP_OFFSET_MASK: u64 = 0xff_ffff_ffff; -const CHUNK_V2_COMP_SIZE_SHIFT: u64 = 40; -const CHUNK_V2_UNCOMP_OFFSET_MASK: u64 = 0xffff_ffff; -const CHUNK_V2_UNCOMP_OFFSET_SHIFT: u64 = 12; -const CHUNK_V2_UNCOMP_SIZE_SHIFT: u64 = 32; -const CHUNK_V2_FLAG_MASK: u64 = 0xff << 56; -const CHUNK_V2_FLAG_COMPRESSED: u64 = 0x1 << 56; -const CHUNK_V2_FLAG_ZRAN: u64 = 0x2 << 56; -const CHUNK_V2_FLAG_BATCH: u64 = 0x4 << 56; -const CHUNK_V2_FLAG_ENCRYPTED: u64 = 0x8 << 56; -const CHUNK_V2_FLAG_VALID: u64 = 0xf << 56; - -/// Chunk compression information on disk format V2. -#[repr(C, packed)] -#[derive(Clone, Copy, Default, Debug)] -pub struct BlobChunkInfoV2Ondisk { - // 32bits: offset, 24bits: size, 8bits: flags - pub(crate) uncomp_info: u64, - // offset: 40bits, 24bits: size - pub(crate) comp_info: u64, - // attached misc data - pub(crate) data: u64, -} - -impl BlobChunkInfoV2Ondisk { - pub(crate) fn set_compressed(&mut self, compressed: bool) { - if compressed { - self.uncomp_info |= u64::to_le(CHUNK_V2_FLAG_COMPRESSED); - } else { - self.uncomp_info &= u64::to_le(!CHUNK_V2_FLAG_COMPRESSED); - } - } - - pub(crate) fn set_encrypted(&mut self, encrypted: bool) { - if encrypted { - self.uncomp_info |= u64::to_le(CHUNK_V2_FLAG_ENCRYPTED); - } else { - self.uncomp_info &= u64::to_le(!CHUNK_V2_FLAG_ENCRYPTED); - } - } - - pub(crate) fn set_zran(&mut self, zran: bool) { - if zran { - self.uncomp_info |= u64::to_le(CHUNK_V2_FLAG_ZRAN); - } else { - self.uncomp_info &= u64::to_le(!CHUNK_V2_FLAG_ZRAN); - } - } - - pub(crate) fn set_batch(&mut self, batch: bool) { - if batch { - self.uncomp_info |= u64::to_le(CHUNK_V2_FLAG_BATCH); - } else { - self.uncomp_info &= u64::to_le(!CHUNK_V2_FLAG_BATCH); - } - } - - pub(crate) fn set_data(&mut self, data: u64) { - self.data = u64::to_le(data); - } - - pub(crate) fn set_zran_index(&mut self, index: u32) { - assert!(self.is_zran()); - let mut data = u64::from_le(self.data) & 0x0000_0000_ffff_ffff; - data |= (index as u64) << 32; - self.data = u64::to_le(data); - } - - pub(crate) fn set_zran_offset(&mut self, offset: u32) { - assert!(self.is_zran()); - let mut data = u64::from_le(self.data) & 0xffff_ffff_0000_0000; - data |= offset as u64; - self.data = u64::to_le(data); - } - - pub(crate) fn set_batch_index(&mut self, index: u32) { - assert!(self.is_batch()); - let mut data = u64::from_le(self.data) & 0x0000_0000_ffff_ffff; - data |= (index as u64) << 32; - self.data = u64::to_le(data); - } - - pub(crate) fn set_uncompressed_offset_in_batch_buf(&mut self, offset: u32) { - assert!(self.is_batch()); - let mut data = u64::from_le(self.data) & 0xffff_ffff_0000_0000; - data |= offset as u64; - self.data = u64::to_le(data); - } - - fn flags(&self) -> u8 { - ((u64::from_le(self.uncomp_info) & CHUNK_V2_FLAG_MASK) >> 56) as u8 - } - - fn check_flags(&self) -> u8 { - ((u64::from_le(self.uncomp_info) & !CHUNK_V2_FLAG_VALID) >> 56) as u8 - } -} - -impl BlobMetaChunkInfo for BlobChunkInfoV2Ondisk { - fn compressed_offset(&self) -> u64 { - u64::from_le(self.comp_info) & CHUNK_V2_COMP_OFFSET_MASK - } - - fn set_compressed_offset(&mut self, offset: u64) { - assert_eq!(offset & !CHUNK_V2_COMP_OFFSET_MASK, 0); - self.comp_info &= u64::to_le(!CHUNK_V2_COMP_OFFSET_MASK); - self.comp_info |= u64::to_le(offset & CHUNK_V2_COMP_OFFSET_MASK); - } - - fn compressed_size(&self) -> u32 { - ((u64::from_le(self.comp_info) >> CHUNK_V2_COMP_SIZE_SHIFT) & BLOB_CCT_CHUNK_SIZE_MASK) - as u32 - } - - fn set_compressed_size(&mut self, size: u32) { - let size = size as u64; - assert!(size <= BLOB_CCT_CHUNK_SIZE_MASK); - self.comp_info &= u64::to_le(!(BLOB_CCT_CHUNK_SIZE_MASK << CHUNK_V2_COMP_SIZE_SHIFT)); - self.comp_info |= u64::to_le(size << CHUNK_V2_COMP_SIZE_SHIFT); - } - - fn uncompressed_offset(&self) -> u64 { - (u64::from_le(self.uncomp_info) & CHUNK_V2_UNCOMP_OFFSET_MASK) - << CHUNK_V2_UNCOMP_OFFSET_SHIFT - } - - fn set_uncompressed_offset(&mut self, offset: u64) { - let off = (offset >> CHUNK_V2_UNCOMP_OFFSET_SHIFT) & CHUNK_V2_UNCOMP_OFFSET_MASK; - assert_eq!(offset, off << CHUNK_V2_UNCOMP_OFFSET_SHIFT); - self.uncomp_info &= u64::to_le(!CHUNK_V2_UNCOMP_OFFSET_MASK); - self.uncomp_info |= u64::to_le(off); - } - - fn uncompressed_size(&self) -> u32 { - let size = u64::from_le(self.uncomp_info) >> CHUNK_V2_UNCOMP_SIZE_SHIFT; - (size & BLOB_CCT_CHUNK_SIZE_MASK) as u32 + 1 - } - - fn set_uncompressed_size(&mut self, size: u32) { - let size = size as u64; - assert!(size != 0 && size - 1 <= BLOB_CCT_CHUNK_SIZE_MASK); - self.uncomp_info &= u64::to_le(!(BLOB_CCT_CHUNK_SIZE_MASK << CHUNK_V2_UNCOMP_SIZE_SHIFT)); - self.uncomp_info |= u64::to_le((size - 1) << CHUNK_V2_UNCOMP_SIZE_SHIFT); - } - - fn is_encrypted(&self) -> bool { - u64::from_le(self.uncomp_info) & CHUNK_V2_FLAG_ENCRYPTED != 0 - } - - fn is_compressed(&self) -> bool { - u64::from_le(self.uncomp_info) & CHUNK_V2_FLAG_COMPRESSED != 0 - } - - fn is_zran(&self) -> bool { - u64::from_le(self.uncomp_info) & CHUNK_V2_FLAG_ZRAN != 0 - } - - fn is_batch(&self) -> bool { - u64::from_le(self.uncomp_info) & CHUNK_V2_FLAG_BATCH != 0 - } - - fn get_zran_index(&self) -> Result { - if !self.is_zran() { - return Err(einval!("Failed to get zran_index: not a ZRan chunk")); - } - Ok((u64::from_le(self.data) >> 32) as u32) - } - - fn get_zran_offset(&self) -> Result { - if !self.is_zran() { - return Err(einval!("Failed to get zran_offset: not a ZRan chunk")); - } - Ok(u64::from_le(self.data) as u32) - } - - fn get_batch_index(&self) -> Result { - if !self.is_batch() { - return Err(einval!("Failed to get batch_index: not a batch chunk")); - } - Ok((u64::from_le(self.data) >> 32) as u32) - } - - fn get_uncompressed_offset_in_batch_buf(&self) -> Result { - if !self.is_batch() { - return Err(einval!( - "Failed to get uncompressed_offset_in_batch_buf: not a batch chunk" - )); - } - Ok(u64::from_le(self.data) as u32) - } - - fn get_data(&self) -> u64 { - u64::from_le(self.data) - } - - fn validate(&self, state: &BlobCompressionContext) -> Result<()> { - if self.compressed_end() > state.compressed_size - || self.uncompressed_end() > state.uncompressed_size - || self.uncompressed_size() == 0 - || (!state.is_separate() && !self.is_batch() && self.compressed_size() == 0) - || (!self.is_encrypted() - && !self.is_compressed() - && self.uncompressed_size() != self.compressed_size()) - { - return Err(Error::new( - ErrorKind::Other, - format!( - "invalid chunk, blob: index {}/c_size 0x{:x}/d_size 0x{:x}, chunk: c_end 0x{:x}/d_end 0x{:x}/compressed {} batch {} zran {} encrypted {}", - state.blob_index, - state.compressed_size, - state.uncompressed_size, - self.compressed_end(), - self.uncompressed_end(), - self.is_compressed(), - self.is_batch(), - self.is_zran(), - self.is_encrypted() - ), - )); - } - - let invalid_flags = self.check_flags(); - if invalid_flags != 0 { - return Err(Error::new( - ErrorKind::Other, - format!("unknown chunk flags 0x{:x}", invalid_flags), - )); - } - - if state.blob_features & BlobFeatures::ZRAN.bits() == 0 && self.is_zran() { - return Err(Error::new( - ErrorKind::Other, - "invalid chunk flag ZRan for non-ZRan blob", - )); - } else if self.is_zran() { - let index = self.get_zran_index()? as usize; - if index >= state.zran_info_array.len() { - return Err(Error::new( - ErrorKind::Other, - format!( - "ZRan index {} is too big, max {}", - index, - state.zran_info_array.len() - ), - )); - } - let ctx = &state.zran_info_array[index]; - let zran_offset = self.get_zran_offset()?; - if zran_offset >= ctx.out_size() - || zran_offset + self.uncompressed_size() > ctx.out_size() - { - return Err(Error::new( - ErrorKind::Other, - format!( - "ZRan range 0x{:x}/0x{:x} is invalid, should be with in 0/0x{:x}", - zran_offset, - self.uncompressed_size(), - ctx.out_size() - ), - )); - } - } - - if self.is_batch() { - if state.blob_features & BlobFeatures::BATCH.bits() == 0 { - return Err(Error::new( - ErrorKind::Other, - "invalid chunk flag Batch for non-Batch blob", - )); - } else { - let index = self.get_batch_index()? as usize; - if index >= state.batch_info_array.len() { - return Err(Error::new( - ErrorKind::Other, - format!( - "Batch index {} is too big, max {}", - index, - state.batch_info_array.len() - ), - )); - } - let ctx = &state.batch_info_array[index]; - if ctx.compressed_size() > ctx.uncompressed_batch_size() - || self.get_uncompressed_offset_in_batch_buf()? + self.uncompressed_size() - > ctx.uncompressed_batch_size() - || u64::MAX - self.compressed_offset() < ctx.compressed_size() as u64 - { - return Err(Error::new(ErrorKind::Other, format!( - "Batch Context is invalid: chunk: uncompressed_size 0x{:x}, uncompressed_offset_in_batch_buf 0x{:x}, uncompressed_batch_size 0x{:x}, batch context: index {}, compressed_size 0x{:x}, uncompressed_batch_size 0x{:x}", - self.uncompressed_size(), - self.get_uncompressed_offset_in_batch_buf()?, - ctx.uncompressed_batch_size(), - index, - ctx.compressed_size(), - ctx.uncompressed_batch_size(), - ))); - } - } - } - - Ok(()) - } -} - -impl Display for BlobChunkInfoV2Ondisk { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!( - f, - "{{ comp:{:x}/{:x}, uncomp:{:x}/{:x} data:{:x} flags:{:x}}}", - self.compressed_offset(), - self.compressed_size(), - self.uncompressed_offset(), - self.uncompressed_size(), - self.get_data(), - self.flags(), - ) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::meta::BlobMetaChunkArray; - use std::mem::ManuallyDrop; - - #[test] - fn test_new_chunk_on_disk() { - let mut chunk = BlobChunkInfoV2Ondisk::default(); - - assert_eq!(chunk.compressed_offset(), 0); - assert_eq!(chunk.compressed_size(), 0); - assert_eq!(chunk.compressed_end(), 0); - assert_eq!(chunk.uncompressed_offset(), 0); - assert_eq!(chunk.uncompressed_size(), 1); - assert!(!chunk.is_zran()); - assert_eq!(chunk.aligned_uncompressed_end(), 0x1000); - - chunk.set_compressed_offset(0x1000); - chunk.set_compressed_size(0x100); - assert_eq!(chunk.compressed_offset(), 0x1000); - assert_eq!(chunk.compressed_size(), 0x100); - - chunk.set_uncompressed_offset(0x1000); - chunk.set_uncompressed_size(0x100); - assert_eq!(chunk.uncompressed_offset(), 0x1000); - assert_eq!(chunk.uncompressed_size(), 0x100); - - chunk.set_compressed_offset(0xffffffffff); - chunk.set_compressed_size(0x1000000 - 1); - assert_eq!(chunk.compressed_offset(), 0xffffffffff); - assert_eq!(chunk.compressed_size(), 0x1000000 - 1); - - chunk.set_uncompressed_offset(0xffffffff000); - chunk.set_uncompressed_size(0x1000000); - assert_eq!(chunk.uncompressed_offset(), 0xffffffff000); - assert_eq!(chunk.uncompressed_size(), 0x1000000); - - chunk.set_zran(true); - chunk.set_zran_index(3); - chunk.set_zran_offset(5); - assert_eq!(chunk.get_zran_index().unwrap(), 3); - assert_eq!(chunk.get_zran_offset().unwrap(), 5); - chunk.set_zran(false); - assert!(!chunk.is_zran()); - - let before = chunk.uncomp_info; - chunk.set_compressed(true); - chunk.set_compressed(false); - assert_eq!(chunk.uncomp_info as u64, before); - - chunk.set_encrypted(true); - assert!(chunk.is_encrypted()); - - let before = chunk.uncomp_info; - chunk.set_batch(true); - chunk.set_batch(false); - assert_eq!(chunk.uncomp_info as u64, before); - - chunk.set_data(0x10); - assert_eq!(chunk.data as u64, 0x10); - - chunk.set_batch(true); - chunk.set_batch_index(0x20); - assert_eq!(chunk.data as u64, 137438953488); - - chunk.set_uncompressed_offset_in_batch_buf(0x30); - assert_eq!(chunk.data as u64, 137438953520); - - assert_eq!(chunk.flags(), 12); - assert_eq!(chunk.get_batch_index().unwrap(), 32); - assert_eq!(chunk.get_uncompressed_offset_in_batch_buf().unwrap(), 48); - assert_eq!(chunk.get_data(), 137438953520); - - // For testing old format compatibility. - let chunk = BlobChunkInfoV2Ondisk { - uncomp_info: u64::to_le(0x0300_0100_0000_0100), - comp_info: u64::to_le(0x0fff_ffff_ffff_ffff), - data: u64::from_le(0x0000_0003_0000_0005), - }; - assert_eq!(chunk.uncompressed_offset(), 0x100000); - assert_eq!(chunk.uncompressed_size(), 0x100 + 1); - assert_eq!(chunk.compressed_size(), 0x000f_ffff); - assert_eq!(chunk.compressed_offset(), 0x00ff_ffff_ffff); - assert_eq!(chunk.get_zran_index().unwrap(), 3); - assert_eq!(chunk.get_zran_offset().unwrap(), 5); - } - - #[test] - fn test_get_chunk_index_with_hole() { - let state = BlobCompressionContext { - chunk_info_array: ManuallyDrop::new(BlobMetaChunkArray::V2(vec![ - BlobChunkInfoV2Ondisk { - uncomp_info: u64::to_le(0x0100_1fff_0000_0000), - comp_info: u64::to_le(0x000f_ff00_0000_0000), - data: 0, - }, - BlobChunkInfoV2Ondisk { - uncomp_info: u64::to_le(0x0100_1fff_0000_0100), - comp_info: u64::to_le(0x001f_ff00_0010_0000), - data: 0, - }, - ])), - ..Default::default() - }; - - assert_eq!( - state - .chunk_info_array - .get_chunk_index_nocheck(&state, 0, false) - .unwrap(), - 0 - ); - assert_eq!( - state - .chunk_info_array - .get_chunk_index_nocheck(&state, 0x1fff, false) - .unwrap(), - 0 - ); - assert_eq!( - state - .chunk_info_array - .get_chunk_index_nocheck(&state, 0x100000, false) - .unwrap(), - 1 - ); - assert_eq!( - state - .chunk_info_array - .get_chunk_index_nocheck(&state, 0x101fff, false) - .unwrap(), - 1 - ); - state - .chunk_info_array - .get_chunk_index_nocheck(&state, 0x2000, false) - .unwrap_err(); - state - .chunk_info_array - .get_chunk_index_nocheck(&state, 0xfffff, false) - .unwrap_err(); - state - .chunk_info_array - .get_chunk_index_nocheck(&state, 0x102000, false) - .unwrap_err(); - } - - #[test] - fn test_chunk_on_disk_validate() { - let mut ctx = BlobCompressionContext::default(); - let mut chunk = BlobChunkInfoV2Ondisk::default(); - println!("{}", chunk); - - chunk.set_compressed_offset(0x10); - chunk.set_compressed_size(0x20); - chunk.set_encrypted(false); - chunk.set_compressed(false); - chunk.set_uncompressed_size(0x30); - chunk.set_compressed_size(0x40); - chunk.set_zran(true); - ctx.compressed_size = 0x100; - ctx.uncompressed_size = 0x40; - ctx.blob_features = 0; - assert!(chunk.validate(&ctx).is_err()); - - chunk.set_encrypted(true); - assert!(chunk.validate(&ctx).is_err()); - - ctx.blob_features = BlobFeatures::ZRAN.bits(); - chunk.set_zran_index(0); - assert!(chunk.validate(&ctx).is_err()); - - chunk.set_zran(false); - assert!(chunk.validate(&ctx).is_ok()); - } -} +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::fmt::{Display, Formatter}; +use std::io::{Error, ErrorKind, Result}; + +use crate::device::BlobFeatures; +use crate::meta::{BlobCompressionContext, BlobMetaChunkInfo, BLOB_CCT_CHUNK_SIZE_MASK}; + +const CHUNK_V2_COMP_OFFSET_MASK: u64 = 0xff_ffff_ffff; +const CHUNK_V2_COMP_SIZE_SHIFT: u64 = 40; +const CHUNK_V2_UNCOMP_OFFSET_MASK: u64 = 0xffff_ffff; +const CHUNK_V2_UNCOMP_OFFSET_SHIFT: u64 = 12; +const CHUNK_V2_UNCOMP_SIZE_SHIFT: u64 = 32; +const CHUNK_V2_FLAG_MASK: u64 = 0xff << 56; +const CHUNK_V2_FLAG_COMPRESSED: u64 = 0x1 << 56; +const CHUNK_V2_FLAG_ZRAN: u64 = 0x2 << 56; +const CHUNK_V2_FLAG_BATCH: u64 = 0x4 << 56; +const CHUNK_V2_FLAG_ENCRYPTED: u64 = 0x8 << 56; +const CHUNK_V2_FLAG_VALID: u64 = 0xf << 56; + +/// Chunk compression information on disk format V2. +#[repr(C, packed)] +#[derive(Clone, Copy, Default, Debug)] +pub struct BlobChunkInfoV2Ondisk { + // 32bits: offset, 24bits: size, 8bits: flags + pub(crate) uncomp_info: u64, + // offset: 40bits, 24bits: size + pub(crate) comp_info: u64, + // attached misc data + pub(crate) data: u64, +} + +impl BlobChunkInfoV2Ondisk { + pub(crate) fn set_compressed(&mut self, compressed: bool) { + if compressed { + self.uncomp_info |= u64::to_le(CHUNK_V2_FLAG_COMPRESSED); + } else { + self.uncomp_info &= u64::to_le(!CHUNK_V2_FLAG_COMPRESSED); + } + } + + pub(crate) fn set_encrypted(&mut self, encrypted: bool) { + if encrypted { + self.uncomp_info |= u64::to_le(CHUNK_V2_FLAG_ENCRYPTED); + } else { + self.uncomp_info &= u64::to_le(!CHUNK_V2_FLAG_ENCRYPTED); + } + } + + pub(crate) fn set_zran(&mut self, zran: bool) { + if zran { + self.uncomp_info |= u64::to_le(CHUNK_V2_FLAG_ZRAN); + } else { + self.uncomp_info &= u64::to_le(!CHUNK_V2_FLAG_ZRAN); + } + } + + pub(crate) fn set_batch(&mut self, batch: bool) { + if batch { + self.uncomp_info |= u64::to_le(CHUNK_V2_FLAG_BATCH); + } else { + self.uncomp_info &= u64::to_le(!CHUNK_V2_FLAG_BATCH); + } + } + + pub(crate) fn set_data(&mut self, data: u64) { + self.data = u64::to_le(data); + } + + pub(crate) fn set_zran_index(&mut self, index: u32) { + assert!(self.is_zran()); + let mut data = u64::from_le(self.data) & 0x0000_0000_ffff_ffff; + data |= (index as u64) << 32; + self.data = u64::to_le(data); + } + + pub(crate) fn set_zran_offset(&mut self, offset: u32) { + assert!(self.is_zran()); + let mut data = u64::from_le(self.data) & 0xffff_ffff_0000_0000; + data |= offset as u64; + self.data = u64::to_le(data); + } + + pub(crate) fn set_batch_index(&mut self, index: u32) { + assert!(self.is_batch()); + let mut data = u64::from_le(self.data) & 0x0000_0000_ffff_ffff; + data |= (index as u64) << 32; + self.data = u64::to_le(data); + } + + pub(crate) fn set_uncompressed_offset_in_batch_buf(&mut self, offset: u32) { + assert!(self.is_batch()); + let mut data = u64::from_le(self.data) & 0xffff_ffff_0000_0000; + data |= offset as u64; + self.data = u64::to_le(data); + } + + fn flags(&self) -> u8 { + ((u64::from_le(self.uncomp_info) & CHUNK_V2_FLAG_MASK) >> 56) as u8 + } + + fn check_flags(&self) -> u8 { + ((u64::from_le(self.uncomp_info) & !CHUNK_V2_FLAG_VALID) >> 56) as u8 + } +} + +impl BlobMetaChunkInfo for BlobChunkInfoV2Ondisk { + fn compressed_offset(&self) -> u64 { + u64::from_le(self.comp_info) & CHUNK_V2_COMP_OFFSET_MASK + } + + fn set_compressed_offset(&mut self, offset: u64) { + assert_eq!(offset & !CHUNK_V2_COMP_OFFSET_MASK, 0); + self.comp_info &= u64::to_le(!CHUNK_V2_COMP_OFFSET_MASK); + self.comp_info |= u64::to_le(offset & CHUNK_V2_COMP_OFFSET_MASK); + } + + fn compressed_size(&self) -> u32 { + ((u64::from_le(self.comp_info) >> CHUNK_V2_COMP_SIZE_SHIFT) & BLOB_CCT_CHUNK_SIZE_MASK) + as u32 + } + + fn set_compressed_size(&mut self, size: u32) { + let size = size as u64; + assert!(size <= BLOB_CCT_CHUNK_SIZE_MASK); + self.comp_info &= u64::to_le(!(BLOB_CCT_CHUNK_SIZE_MASK << CHUNK_V2_COMP_SIZE_SHIFT)); + self.comp_info |= u64::to_le(size << CHUNK_V2_COMP_SIZE_SHIFT); + } + + fn uncompressed_offset(&self) -> u64 { + (u64::from_le(self.uncomp_info) & CHUNK_V2_UNCOMP_OFFSET_MASK) + << CHUNK_V2_UNCOMP_OFFSET_SHIFT + } + + fn set_uncompressed_offset(&mut self, offset: u64) { + let off = (offset >> CHUNK_V2_UNCOMP_OFFSET_SHIFT) & CHUNK_V2_UNCOMP_OFFSET_MASK; + assert_eq!(offset, off << CHUNK_V2_UNCOMP_OFFSET_SHIFT); + self.uncomp_info &= u64::to_le(!CHUNK_V2_UNCOMP_OFFSET_MASK); + self.uncomp_info |= u64::to_le(off); + } + + fn uncompressed_size(&self) -> u32 { + let size = u64::from_le(self.uncomp_info) >> CHUNK_V2_UNCOMP_SIZE_SHIFT; + (size & BLOB_CCT_CHUNK_SIZE_MASK) as u32 + 1 + } + + fn set_uncompressed_size(&mut self, size: u32) { + let size = size as u64; + assert!(size != 0 && size - 1 <= BLOB_CCT_CHUNK_SIZE_MASK); + self.uncomp_info &= u64::to_le(!(BLOB_CCT_CHUNK_SIZE_MASK << CHUNK_V2_UNCOMP_SIZE_SHIFT)); + self.uncomp_info |= u64::to_le((size - 1) << CHUNK_V2_UNCOMP_SIZE_SHIFT); + } + + fn is_encrypted(&self) -> bool { + u64::from_le(self.uncomp_info) & CHUNK_V2_FLAG_ENCRYPTED != 0 + } + + fn is_compressed(&self) -> bool { + u64::from_le(self.uncomp_info) & CHUNK_V2_FLAG_COMPRESSED != 0 + } + + fn is_zran(&self) -> bool { + u64::from_le(self.uncomp_info) & CHUNK_V2_FLAG_ZRAN != 0 + } + + fn is_batch(&self) -> bool { + u64::from_le(self.uncomp_info) & CHUNK_V2_FLAG_BATCH != 0 + } + + fn get_zran_index(&self) -> Result { + if !self.is_zran() { + return Err(einval!("Failed to get zran_index: not a ZRan chunk")); + } + Ok((u64::from_le(self.data) >> 32) as u32) + } + + fn get_zran_offset(&self) -> Result { + if !self.is_zran() { + return Err(einval!("Failed to get zran_offset: not a ZRan chunk")); + } + Ok(u64::from_le(self.data) as u32) + } + + fn get_batch_index(&self) -> Result { + if !self.is_batch() { + return Err(einval!("Failed to get batch_index: not a batch chunk")); + } + Ok((u64::from_le(self.data) >> 32) as u32) + } + + fn get_uncompressed_offset_in_batch_buf(&self) -> Result { + if !self.is_batch() { + return Err(einval!( + "Failed to get uncompressed_offset_in_batch_buf: not a batch chunk" + )); + } + Ok(u64::from_le(self.data) as u32) + } + + fn get_data(&self) -> u64 { + u64::from_le(self.data) + } + + fn validate(&self, state: &BlobCompressionContext) -> Result<()> { + if self.compressed_end() > state.compressed_size + || self.uncompressed_end() > state.uncompressed_size + || self.uncompressed_size() == 0 + || (!state.is_separate() && !self.is_batch() && self.compressed_size() == 0) + || (!self.is_encrypted() + && !self.is_compressed() + && self.uncompressed_size() != self.compressed_size()) + { + return Err(Error::new( + ErrorKind::Other, + format!( + "invalid chunk, blob: index {}/c_size 0x{:x}/d_size 0x{:x}, chunk: c_end 0x{:x}/d_end 0x{:x}/compressed {} batch {} zran {} encrypted {}", + state.blob_index, + state.compressed_size, + state.uncompressed_size, + self.compressed_end(), + self.uncompressed_end(), + self.is_compressed(), + self.is_batch(), + self.is_zran(), + self.is_encrypted() + ), + )); + } + + let invalid_flags = self.check_flags(); + if invalid_flags != 0 { + return Err(Error::new( + ErrorKind::Other, + format!("unknown chunk flags 0x{:x}", invalid_flags), + )); + } + + if state.blob_features & BlobFeatures::ZRAN.bits() == 0 && self.is_zran() { + return Err(Error::new( + ErrorKind::Other, + "invalid chunk flag ZRan for non-ZRan blob", + )); + } else if self.is_zran() { + let index = self.get_zran_index()? as usize; + if index >= state.zran_info_array.len() { + return Err(Error::new( + ErrorKind::Other, + format!( + "ZRan index {} is too big, max {}", + index, + state.zran_info_array.len() + ), + )); + } + let ctx = &state.zran_info_array[index]; + let zran_offset = self.get_zran_offset()?; + if zran_offset >= ctx.out_size() + || zran_offset + self.uncompressed_size() > ctx.out_size() + { + return Err(Error::new( + ErrorKind::Other, + format!( + "ZRan range 0x{:x}/0x{:x} is invalid, should be with in 0/0x{:x}", + zran_offset, + self.uncompressed_size(), + ctx.out_size() + ), + )); + } + } + + if self.is_batch() { + if state.blob_features & BlobFeatures::BATCH.bits() == 0 { + return Err(Error::new( + ErrorKind::Other, + "invalid chunk flag Batch for non-Batch blob", + )); + } else { + let index = self.get_batch_index()? as usize; + if index >= state.batch_info_array.len() { + return Err(Error::new( + ErrorKind::Other, + format!( + "Batch index {} is too big, max {}", + index, + state.batch_info_array.len() + ), + )); + } + let ctx = &state.batch_info_array[index]; + if ctx.compressed_size() > ctx.uncompressed_batch_size() + || self.get_uncompressed_offset_in_batch_buf()? + self.uncompressed_size() + > ctx.uncompressed_batch_size() + || u64::MAX - self.compressed_offset() < ctx.compressed_size() as u64 + { + return Err(Error::new(ErrorKind::Other, format!( + "Batch Context is invalid: chunk: uncompressed_size 0x{:x}, uncompressed_offset_in_batch_buf 0x{:x}, uncompressed_batch_size 0x{:x}, batch context: index {}, compressed_size 0x{:x}, uncompressed_batch_size 0x{:x}", + self.uncompressed_size(), + self.get_uncompressed_offset_in_batch_buf()?, + ctx.uncompressed_batch_size(), + index, + ctx.compressed_size(), + ctx.uncompressed_batch_size(), + ))); + } + } + } + + Ok(()) + } +} + +impl Display for BlobChunkInfoV2Ondisk { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{{ comp:{:x}/{:x}, uncomp:{:x}/{:x} data:{:x} flags:{:x}}}", + self.compressed_offset(), + self.compressed_size(), + self.uncompressed_offset(), + self.uncompressed_size(), + self.get_data(), + self.flags(), + ) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::meta::BlobMetaChunkArray; + use std::mem::ManuallyDrop; + + #[test] + fn test_new_chunk_on_disk() { + let mut chunk = BlobChunkInfoV2Ondisk::default(); + + assert_eq!(chunk.compressed_offset(), 0); + assert_eq!(chunk.compressed_size(), 0); + assert_eq!(chunk.compressed_end(), 0); + assert_eq!(chunk.uncompressed_offset(), 0); + assert_eq!(chunk.uncompressed_size(), 1); + assert!(!chunk.is_zran()); + assert_eq!(chunk.aligned_uncompressed_end(), 0x1000); + + chunk.set_compressed_offset(0x1000); + chunk.set_compressed_size(0x100); + assert_eq!(chunk.compressed_offset(), 0x1000); + assert_eq!(chunk.compressed_size(), 0x100); + + chunk.set_uncompressed_offset(0x1000); + chunk.set_uncompressed_size(0x100); + assert_eq!(chunk.uncompressed_offset(), 0x1000); + assert_eq!(chunk.uncompressed_size(), 0x100); + + chunk.set_compressed_offset(0xffffffffff); + chunk.set_compressed_size(0x1000000 - 1); + assert_eq!(chunk.compressed_offset(), 0xffffffffff); + assert_eq!(chunk.compressed_size(), 0x1000000 - 1); + + chunk.set_uncompressed_offset(0xffffffff000); + chunk.set_uncompressed_size(0x1000000); + assert_eq!(chunk.uncompressed_offset(), 0xffffffff000); + assert_eq!(chunk.uncompressed_size(), 0x1000000); + + chunk.set_zran(true); + chunk.set_zran_index(3); + chunk.set_zran_offset(5); + assert_eq!(chunk.get_zran_index().unwrap(), 3); + assert_eq!(chunk.get_zran_offset().unwrap(), 5); + chunk.set_zran(false); + assert!(!chunk.is_zran()); + + let before = chunk.uncomp_info; + chunk.set_compressed(true); + chunk.set_compressed(false); + assert_eq!(chunk.uncomp_info as u64, before); + + chunk.set_encrypted(true); + assert!(chunk.is_encrypted()); + + let before = chunk.uncomp_info; + chunk.set_batch(true); + chunk.set_batch(false); + assert_eq!(chunk.uncomp_info as u64, before); + + chunk.set_data(0x10); + assert_eq!(chunk.data as u64, 0x10); + + chunk.set_batch(true); + chunk.set_batch_index(0x20); + assert_eq!(chunk.data as u64, 137438953488); + + chunk.set_uncompressed_offset_in_batch_buf(0x30); + assert_eq!(chunk.data as u64, 137438953520); + + assert_eq!(chunk.flags(), 12); + assert_eq!(chunk.get_batch_index().unwrap(), 32); + assert_eq!(chunk.get_uncompressed_offset_in_batch_buf().unwrap(), 48); + assert_eq!(chunk.get_data(), 137438953520); + + // For testing old format compatibility. + let chunk = BlobChunkInfoV2Ondisk { + uncomp_info: u64::to_le(0x0300_0100_0000_0100), + comp_info: u64::to_le(0x0fff_ffff_ffff_ffff), + data: u64::from_le(0x0000_0003_0000_0005), + }; + assert_eq!(chunk.uncompressed_offset(), 0x100000); + assert_eq!(chunk.uncompressed_size(), 0x100 + 1); + assert_eq!(chunk.compressed_size(), 0x000f_ffff); + assert_eq!(chunk.compressed_offset(), 0x00ff_ffff_ffff); + assert_eq!(chunk.get_zran_index().unwrap(), 3); + assert_eq!(chunk.get_zran_offset().unwrap(), 5); + } + + #[test] + fn test_get_chunk_index_with_hole() { + let state = BlobCompressionContext { + chunk_info_array: ManuallyDrop::new(BlobMetaChunkArray::V2(vec![ + BlobChunkInfoV2Ondisk { + uncomp_info: u64::to_le(0x0100_1fff_0000_0000), + comp_info: u64::to_le(0x000f_ff00_0000_0000), + data: 0, + }, + BlobChunkInfoV2Ondisk { + uncomp_info: u64::to_le(0x0100_1fff_0000_0100), + comp_info: u64::to_le(0x001f_ff00_0010_0000), + data: 0, + }, + ])), + ..Default::default() + }; + + assert_eq!( + state + .chunk_info_array + .get_chunk_index_nocheck(&state, 0, false) + .unwrap(), + 0 + ); + assert_eq!( + state + .chunk_info_array + .get_chunk_index_nocheck(&state, 0x1fff, false) + .unwrap(), + 0 + ); + assert_eq!( + state + .chunk_info_array + .get_chunk_index_nocheck(&state, 0x100000, false) + .unwrap(), + 1 + ); + assert_eq!( + state + .chunk_info_array + .get_chunk_index_nocheck(&state, 0x101fff, false) + .unwrap(), + 1 + ); + state + .chunk_info_array + .get_chunk_index_nocheck(&state, 0x2000, false) + .unwrap_err(); + state + .chunk_info_array + .get_chunk_index_nocheck(&state, 0xfffff, false) + .unwrap_err(); + state + .chunk_info_array + .get_chunk_index_nocheck(&state, 0x102000, false) + .unwrap_err(); + } + + #[test] + fn test_chunk_on_disk_validate() { + let mut ctx = BlobCompressionContext::default(); + let mut chunk = BlobChunkInfoV2Ondisk::default(); + println!("{}", chunk); + + chunk.set_compressed_offset(0x10); + chunk.set_compressed_size(0x20); + chunk.set_encrypted(false); + chunk.set_compressed(false); + chunk.set_uncompressed_size(0x30); + chunk.set_compressed_size(0x40); + chunk.set_zran(true); + ctx.compressed_size = 0x100; + ctx.uncompressed_size = 0x40; + ctx.blob_features = 0; + assert!(chunk.validate(&ctx).is_err()); + + chunk.set_encrypted(true); + assert!(chunk.validate(&ctx).is_err()); + + ctx.blob_features = BlobFeatures::ZRAN.bits(); + chunk.set_zran_index(0); + assert!(chunk.validate(&ctx).is_err()); + + chunk.set_zran(false); + assert!(chunk.validate(&ctx).is_ok()); + } +} diff --git a/storage/src/meta/mod.rs b/storage/src/meta/mod.rs index 9e9d40334c3..5c2edb2af01 100644 --- a/storage/src/meta/mod.rs +++ b/storage/src/meta/mod.rs @@ -1,2452 +1,2452 @@ -// Copyright (C) 2021-2023 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Generate, manage and access blob meta information for RAFS v6 data blobs. -//! -//! RAFS v6 filesystem includes three types of data: -//! - fs meta: contain filesystem meta data including super block, inode table, dirent etc. -//! - blob meta: contain digest and compression context for data chunks. -//! - chunk data: contain chunked file data in compressed or uncompressed form. -//! -//! There are different ways to packing above three types of data into blobs: -//! - meta blob/bootstrap: `fs meta` -//! - native data blob: `chunk data` | `compression context table` | [`chunk digest table`] | [`table of context`] -//! - native data blob with inlined fs meta: `chunk data` | `compression context table` | [`chunk digest table`] | `fs meta` | [`table of content`] -//! - ZRan data blob: `compression context table` | [`chunk digest table`] | [`table of content`] -//! - ZRan data blob with inlined fs meta: `compression context table` | [`chunk digest table`] | `fs meta` | [`table of content`] -//! -//! The blob compression context table contains following information: -//! - chunk compression information table: to locate compressed/uncompressed chunks in the data blob -//! - optional ZRan context table: to support randomly access/decompress gzip file -//! - optional ZRan dictionary table: to support randomly access/decompress gzip file -//! -//! The blob compression context table is laid as below: -//! | `chunk compression info table` | [`ZRan context table`] | [`ZRan dictionary table`] - -use std::any::Any; -use std::borrow::Cow; -use std::fs::OpenOptions; -use std::io::Result; -use std::mem::{size_of, ManuallyDrop}; -use std::ops::{Add, BitAnd, Not}; -use std::path::PathBuf; -use std::sync::Arc; - -use nydus_utils::compress::zlib_random::ZranContext; -use nydus_utils::crypt::decrypt_with_context; -use nydus_utils::digest::{DigestData, RafsDigest}; -use nydus_utils::filemap::FileMapState; -use nydus_utils::{compress, crypt}; - -use crate::backend::BlobReader; -use crate::device::v5::BlobV5ChunkInfo; -use crate::device::{BlobChunkFlags, BlobChunkInfo, BlobFeatures, BlobInfo}; -use crate::meta::toc::{TocEntryList, TocLocation}; -use crate::utils::alloc_buf; -use crate::{RAFS_MAX_CHUNKS_PER_BLOB, RAFS_MAX_CHUNK_SIZE}; - -mod chunk_info_v1; -pub use chunk_info_v1::BlobChunkInfoV1Ondisk; -mod chunk_info_v2; -pub use chunk_info_v2::BlobChunkInfoV2Ondisk; - -pub mod toc; - -mod zran; -pub use zran::{ZranContextGenerator, ZranInflateContext}; - -mod batch; -pub use batch::{BatchContextGenerator, BatchInflateContext}; - -const BLOB_CCT_MAGIC: u32 = 0xb10bb10bu32; -const BLOB_CCT_HEADER_SIZE: u64 = 0x1000u64; -const BLOB_CCT_CHUNK_SIZE_MASK: u64 = 0xff_ffff; - -const BLOB_CCT_V1_MAX_SIZE: u64 = RAFS_MAX_CHUNK_SIZE * 16; -const BLOB_CCT_V2_MAX_SIZE: u64 = RAFS_MAX_CHUNK_SIZE * 24; -//const BLOB_CCT_V1_RESERVED_SIZE: u64 = BLOB_METADATA_HEADER_SIZE - 44; -const BLOB_CCT_V2_RESERVED_SIZE: u64 = BLOB_CCT_HEADER_SIZE - 64; - -/// File suffix for blob meta file. -const BLOB_CCT_FILE_SUFFIX: &str = "blob.meta"; -/// File suffix for blob chunk digests. -const BLOB_DIGEST_FILE_SUFFIX: &str = "blob.digest"; -/// File suffix for blob ToC. -const BLOB_TOC_FILE_SUFFIX: &str = "blob.toc"; - -/// On disk format for blob compression context table header. -/// -/// Blob compression context table contains compression information for all chunks in the blob. -/// The compression context table header will be written into the data blob in plaintext mode, -/// and can be used as marker to locate the compression context table. All fields of compression -/// context table header should be encoded in little-endian format. -/// -/// The compression context table and header are arranged in the data blob as follows: -/// -/// `chunk data` | `compression context table` | `[ZRan context table | ZRan dictionary]` | `compression context table header` -#[repr(C)] -#[derive(Clone, Copy, Debug)] -pub struct BlobCompressionContextHeader { - /// Magic number to identify the header. - s_magic: u32, - /// Feature flags for the data blob. - s_features: u32, - /// Compression algorithm to process the compression context table. - s_ci_compressor: u32, - /// Number of entries in compression context table. - s_ci_entries: u32, - /// File offset to get the compression context table. - s_ci_offset: u64, - /// Size of compressed compression context table. - s_ci_compressed_size: u64, - /// Size of uncompressed compression context table. - s_ci_uncompressed_size: u64, - /// File offset to get the optional ZRan context data. - s_ci_zran_offset: u64, - /// Size of ZRan context data, including the ZRan context table and dictionary table. - s_ci_zran_size: u64, - /// Number of entries in the ZRan context table. - s_ci_zran_count: u32, - - s_reserved: [u8; BLOB_CCT_V2_RESERVED_SIZE as usize], - /// Second magic number to identify the blob meta data header. - s_magic2: u32, -} - -impl Default for BlobCompressionContextHeader { - fn default() -> Self { - BlobCompressionContextHeader { - s_magic: BLOB_CCT_MAGIC, - s_features: 0, - s_ci_compressor: compress::Algorithm::Lz4Block as u32, - s_ci_entries: 0, - s_ci_offset: 0, - s_ci_compressed_size: 0, - s_ci_uncompressed_size: 0, - s_ci_zran_offset: 0, - s_ci_zran_size: 0, - s_ci_zran_count: 0, - s_reserved: [0u8; BLOB_CCT_V2_RESERVED_SIZE as usize], - s_magic2: BLOB_CCT_MAGIC, - } - } -} - -impl BlobCompressionContextHeader { - /// Check whether a blob feature is set or not. - pub fn has_feature(&self, feature: BlobFeatures) -> bool { - self.s_features & feature.bits() != 0 - } - - /// Get compression algorithm to process chunk compression information array. - pub fn ci_compressor(&self) -> compress::Algorithm { - if self.s_ci_compressor == compress::Algorithm::Lz4Block as u32 { - compress::Algorithm::Lz4Block - } else if self.s_ci_compressor == compress::Algorithm::GZip as u32 { - compress::Algorithm::GZip - } else if self.s_ci_compressor == compress::Algorithm::Zstd as u32 { - compress::Algorithm::Zstd - } else { - compress::Algorithm::None - } - } - - /// Set compression algorithm to process chunk compression information array. - pub fn set_ci_compressor(&mut self, algo: compress::Algorithm) { - self.s_ci_compressor = algo as u32; - } - - /// Get number of entries in chunk compression information array. - pub fn ci_entries(&self) -> u32 { - self.s_ci_entries - } - - /// Set number of entries in chunk compression information array. - pub fn set_ci_entries(&mut self, entries: u32) { - self.s_ci_entries = entries; - } - - /// Get offset of compressed chunk compression information array. - pub fn ci_compressed_offset(&self) -> u64 { - self.s_ci_offset - } - - /// Set offset of compressed chunk compression information array. - pub fn set_ci_compressed_offset(&mut self, offset: u64) { - self.s_ci_offset = offset; - } - - /// Get size of compressed chunk compression information array. - pub fn ci_compressed_size(&self) -> u64 { - self.s_ci_compressed_size - } - - /// Set size of compressed chunk compression information array. - pub fn set_ci_compressed_size(&mut self, size: u64) { - self.s_ci_compressed_size = size; - } - - /// Get size of uncompressed chunk compression information array. - pub fn ci_uncompressed_size(&self) -> u64 { - self.s_ci_uncompressed_size - } - - /// Set size of uncompressed chunk compression information array. - pub fn set_ci_uncompressed_size(&mut self, size: u64) { - self.s_ci_uncompressed_size = size; - } - - /// Get ZRan context information entry count. - pub fn ci_zran_count(&self) -> u32 { - self.s_ci_zran_count - } - - /// Set ZRan context information entry count. - pub fn set_ci_zran_count(&mut self, count: u32) { - self.s_ci_zran_count = count; - } - - /// Get offset of ZRan context information table. - pub fn ci_zran_offset(&self) -> u64 { - self.s_ci_zran_offset - } - - /// Set offset of ZRan context information table. - pub fn set_ci_zran_offset(&mut self, offset: u64) { - self.s_ci_zran_offset = offset; - } - - /// Get size of ZRan context information table and dictionary table. - pub fn ci_zran_size(&self) -> u64 { - self.s_ci_zran_size - } - - /// Set size of ZRan context information table and dictionary table. - pub fn set_ci_zran_size(&mut self, size: u64) { - self.s_ci_zran_size = size; - } - - /// Check whether uncompressed chunks are 4k aligned. - pub fn is_4k_aligned(&self) -> bool { - self.has_feature(BlobFeatures::ALIGNED) - } - - /// Set flag indicating whether uncompressed chunks are aligned. - pub fn set_aligned(&mut self, aligned: bool) { - if aligned { - self.s_features |= BlobFeatures::ALIGNED.bits(); - } else { - self.s_features &= !BlobFeatures::ALIGNED.bits(); - } - } - - /// Set flag indicating whether RAFS meta is inlined in the data blob. - pub fn set_inlined_fs_meta(&mut self, inlined: bool) { - if inlined { - self.s_features |= BlobFeatures::INLINED_FS_META.bits(); - } else { - self.s_features &= !BlobFeatures::INLINED_FS_META.bits(); - } - } - - /// Set flag indicating whether chunk compression information format v2 is used or not. - pub fn set_chunk_info_v2(&mut self, enable: bool) { - if enable { - self.s_features |= BlobFeatures::CHUNK_INFO_V2.bits(); - } else { - self.s_features &= !BlobFeatures::CHUNK_INFO_V2.bits(); - } - } - - /// Set flag indicating whether it's a ZRan blob or not. - pub fn set_ci_zran(&mut self, enable: bool) { - if enable { - self.s_features |= BlobFeatures::ZRAN.bits(); - } else { - self.s_features &= !BlobFeatures::ZRAN.bits(); - } - } - - /// Set flag indicating whether blob.data and blob.meta are stored in separated blobs. - pub fn set_separate_blob(&mut self, enable: bool) { - if enable { - self.s_features |= BlobFeatures::SEPARATE.bits(); - } else { - self.s_features &= !BlobFeatures::SEPARATE.bits(); - } - } - - /// Set flag indicating whether it's a blob for batch chunk or not. - pub fn set_ci_batch(&mut self, enable: bool) { - if enable { - self.s_features |= BlobFeatures::BATCH.bits(); - } else { - self.s_features &= !BlobFeatures::BATCH.bits(); - } - } - - /// Set flag indicating whether chunk digest is inlined in the data blob or not. - pub fn set_inlined_chunk_digest(&mut self, enable: bool) { - if enable { - self.s_features |= BlobFeatures::INLINED_CHUNK_DIGEST.bits(); - } else { - self.s_features &= !BlobFeatures::INLINED_CHUNK_DIGEST.bits(); - } - } - - /// Set flag indicating new blob format with tar headers. - pub fn set_has_tar_header(&mut self, enable: bool) { - if enable { - self.s_features |= BlobFeatures::HAS_TAR_HEADER.bits(); - } else { - self.s_features &= !BlobFeatures::HAS_TAR_HEADER.bits(); - } - } - - /// Set flag indicating new blob format with toc headers. - pub fn set_has_toc(&mut self, enable: bool) { - if enable { - self.s_features |= BlobFeatures::HAS_TOC.bits(); - } else { - self.s_features &= !BlobFeatures::HAS_TOC.bits(); - } - } - - /// Set flag indicating having inlined-meta capability. - pub fn set_cap_tar_toc(&mut self, enable: bool) { - if enable { - self.s_features |= BlobFeatures::CAP_TAR_TOC.bits(); - } else { - self.s_features &= !BlobFeatures::CAP_TAR_TOC.bits(); - } - } - - /// Set flag indicating the blob is for RAFS filesystem in TARFS mode. - pub fn set_tarfs(&mut self, enable: bool) { - if enable { - self.s_features |= BlobFeatures::TARFS.bits(); - } else { - self.s_features &= !BlobFeatures::TARFS.bits(); - } - } - - /// Set flag indicating the blob is encrypted. - pub fn set_encrypted(&mut self, enable: bool) { - if enable { - self.s_features |= BlobFeatures::ENCRYPTED.bits(); - } else { - self.s_features &= !BlobFeatures::ENCRYPTED.bits(); - } - } - - /// Get blob meta feature flags. - pub fn features(&self) -> u32 { - self.s_features - } - - /// Convert the header as an `&[u8]`. - pub fn as_bytes(&self) -> &[u8] { - unsafe { - std::slice::from_raw_parts( - self as *const BlobCompressionContextHeader as *const u8, - size_of::(), - ) - } - } - - /// Set flag indicating whether it's a blob for batch chunk or not. - pub fn set_is_chunkdict_generated(&mut self, enable: bool) { - if enable { - self.s_features |= BlobFeatures::IS_CHUNKDICT_GENERATED.bits(); - } else { - self.s_features &= !BlobFeatures::IS_CHUNKDICT_GENERATED.bits(); - } - } -} - -/// Struct to manage blob chunk compression information, a wrapper over [BlobCompressionContext]. -/// -/// A [BlobCompressionContextInfo] object is loaded from on disk [BlobCompressionContextHeader] -/// object, and provides methods to query compression information about chunks in the blob. -#[derive(Clone)] -pub struct BlobCompressionContextInfo { - pub(crate) state: Arc, -} - -impl BlobCompressionContextInfo { - /// Create a new instance of [BlobCompressionContextInfo]. - /// - /// If a blob compression context cache file is present and is valid, it will be reused. - /// Otherwise download compression context content from backend if `reader` is valid. - /// - /// The downloaded compression context table will be cached into a file named as - /// `[blob_id].blob.meta`. The cache file is readonly once created and may be accessed - /// concurrently by multiple clients. - pub fn new( - blob_path: &str, - blob_info: &BlobInfo, - reader: Option<&Arc>, - load_chunk_digest: bool, - ) -> Result { - assert_eq!( - size_of::() as u64, - BLOB_CCT_HEADER_SIZE - ); - assert_eq!(size_of::(), 16); - assert_eq!(size_of::(), 24); - assert_eq!(size_of::(), 40); - - let chunk_count = blob_info.chunk_count(); - if chunk_count == 0 || chunk_count > RAFS_MAX_CHUNKS_PER_BLOB { - return Err(einval!("invalid chunk count in blob meta header")); - } - - let uncompressed_size = blob_info.meta_ci_uncompressed_size() as usize; - let meta_path = format!("{}.{}", blob_path, BLOB_CCT_FILE_SUFFIX); - trace!( - "try to open blob meta file: path {:?} uncompressed_size {} chunk_count {}", - meta_path, - uncompressed_size, - chunk_count - ); - let enable_write = reader.is_some(); - let file = OpenOptions::new() - .read(true) - .write(enable_write) - .create(enable_write) - .open(&meta_path) - .map_err(|err| { - einval!(format!( - "failed to open/create blob meta file {}: {}", - meta_path, err - )) - })?; - - let aligned_uncompressed_size = round_up_4k(uncompressed_size); - let expected_size = BLOB_CCT_HEADER_SIZE as usize + aligned_uncompressed_size; - let mut file_size = file.metadata()?.len(); - if file_size == 0 && enable_write { - file.set_len(expected_size as u64)?; - file_size = expected_size as u64; - } - if file_size != expected_size as u64 { - return Err(einval!(format!( - "size of blob meta file '{}' doesn't match, expect {:x}, got {:x}", - meta_path, expected_size, file_size - ))); - } - - let mut filemap = FileMapState::new(file, 0, expected_size, enable_write)?; - let base = filemap.validate_range(0, expected_size)?; - let header = - filemap.get_mut::(aligned_uncompressed_size as usize)?; - if !Self::validate_header(blob_info, header)? { - if let Some(reader) = reader { - let buffer = - unsafe { std::slice::from_raw_parts_mut(base as *mut u8, expected_size) }; - Self::read_metadata(blob_info, reader, buffer)?; - if !Self::validate_header(blob_info, header)? { - return Err(enoent!(format!("double check blob_info still invalid",))); - } - filemap.sync_data()?; - } else { - return Err(enoent!(format!( - "blob meta header from file '{}' is invalid", - meta_path - ))); - } - } - - let chunk_infos = BlobMetaChunkArray::from_file_map(&filemap, blob_info)?; - let chunk_infos = ManuallyDrop::new(chunk_infos); - let mut state = BlobCompressionContext { - blob_index: blob_info.blob_index(), - blob_features: blob_info.features().bits(), - compressed_size: blob_info.compressed_data_size(), - uncompressed_size: round_up_4k(blob_info.uncompressed_size()), - chunk_info_array: chunk_infos, - blob_meta_file_map: filemap, - ..Default::default() - }; - - if blob_info.has_feature(BlobFeatures::BATCH) { - let header = state - .blob_meta_file_map - .get_mut::(aligned_uncompressed_size as usize)?; - let inflate_offset = header.s_ci_zran_offset as usize; - let inflate_count = header.s_ci_zran_count as usize; - let batch_inflate_size = inflate_count * size_of::(); - let ptr = state - .blob_meta_file_map - .validate_range(inflate_offset, batch_inflate_size)?; - let array = unsafe { - Vec::from_raw_parts( - ptr as *mut u8 as *mut BatchInflateContext, - inflate_count, - inflate_count, - ) - }; - state.batch_info_array = ManuallyDrop::new(array); - } else if blob_info.has_feature(BlobFeatures::ZRAN) { - let header = state - .blob_meta_file_map - .get_mut::(aligned_uncompressed_size as usize)?; - let zran_offset = header.s_ci_zran_offset as usize; - let zran_count = header.s_ci_zran_count as usize; - let ci_zran_size = header.s_ci_zran_size as usize; - let zran_size = zran_count * size_of::(); - let ptr = state - .blob_meta_file_map - .validate_range(zran_offset, zran_size)?; - let array = unsafe { - Vec::from_raw_parts( - ptr as *mut u8 as *mut ZranInflateContext, - zran_count, - zran_count, - ) - }; - state.zran_info_array = ManuallyDrop::new(array); - - let zran_dict_size = ci_zran_size - zran_size; - let ptr = state - .blob_meta_file_map - .validate_range(zran_offset + zran_size, zran_dict_size)?; - let array = - unsafe { Vec::from_raw_parts(ptr as *mut u8, zran_dict_size, zran_dict_size) }; - state.zran_dict_table = ManuallyDrop::new(array); - } - - if load_chunk_digest && blob_info.has_feature(BlobFeatures::INLINED_CHUNK_DIGEST) { - let digest_path = PathBuf::from(format!("{}.{}", blob_path, BLOB_DIGEST_FILE_SUFFIX)); - if let Some(reader) = reader { - let toc_path = format!("{}.{}", blob_path, BLOB_TOC_FILE_SUFFIX); - let location = if blob_info.blob_toc_size() != 0 { - let blob_size = reader - .blob_size() - .map_err(|_e| eio!("failed to get blob size"))?; - let offset = blob_size - blob_info.blob_toc_size() as u64; - let mut location = TocLocation::new(offset, blob_info.blob_toc_size() as u64); - let digest = blob_info.blob_toc_digest(); - for c in digest { - if *c != 0 { - location.validate_digest = true; - location.digest.data = *digest; - break; - } - } - location - } else { - TocLocation::default() - }; - let toc_list = - TocEntryList::read_from_cache_file(toc_path, reader.as_ref(), &location)?; - toc_list.extract_from_blob(reader.clone(), None, Some(&digest_path))?; - } - if !digest_path.exists() { - return Err(eother!("failed to download chunk digest file from blob")); - } - - let file = OpenOptions::new().read(true).open(&digest_path)?; - let md = file.metadata()?; - let size = 32 * blob_info.chunk_count() as usize; - if md.len() != size as u64 { - return Err(eother!(format!( - "size of chunk digest file doesn't match, expect {}, got {}", - size, - md.len() - ))); - } - - let file_map = FileMapState::new(file, 0, size, false)?; - let ptr = file_map.validate_range(0, size)?; - let array = unsafe { - Vec::from_raw_parts( - ptr as *mut u8 as *mut _, - chunk_count as usize, - chunk_count as usize, - ) - }; - state.chunk_digest_file_map = file_map; - state.chunk_digest_array = ManuallyDrop::new(array); - } - - Ok(BlobCompressionContextInfo { - state: Arc::new(state), - }) - } - - /// Get data chunks covering uncompressed data range `[start, start + size)`. - /// - /// For 4k-aligned uncompressed data chunks, there may be padding areas between data chunks. - /// - /// The method returns error if any of following condition is true: - /// - range [start, start + size) is invalid. - /// - `start` is bigger than blob size. - /// - some portions of the range [start, start + size) is not covered by chunks. - /// - blob meta is invalid. - pub fn get_chunks_uncompressed( - &self, - start: u64, - size: u64, - batch_size: u64, - ) -> Result>> { - let end = start.checked_add(size).ok_or_else(|| { - einval!(format!( - "get_chunks_uncompressed: invalid start {}/size {}", - start, size - )) - })?; - if end > self.state.uncompressed_size { - return Err(einval!(format!( - "get_chunks_uncompressed: invalid end {}/uncompressed_size {}", - end, self.state.uncompressed_size - ))); - } - let batch_end = if batch_size <= size { - end - } else { - std::cmp::min( - start.checked_add(batch_size).unwrap_or(end), - self.state.uncompressed_size, - ) - }; - let batch_size = if batch_size < size { size } else { batch_size }; - - self.state - .get_chunks_uncompressed(start, end, batch_end, batch_size) - } - - /// Get data chunks covering compressed data range `[start, start + size)`. - /// - /// The method returns error if any of following condition is true: - /// - range [start, start + size) is invalid. - /// - `start` is bigger than blob size. - /// - some portions of the range [start, start + size) is not covered by chunks. - /// - blob meta is invalid. - pub fn get_chunks_compressed( - &self, - start: u64, - size: u64, - batch_size: u64, - prefetch: bool, - ) -> Result>> { - let end = start.checked_add(size).ok_or_else(|| { - einval!(einval!(format!( - "get_chunks_compressed: invalid start {}/size {}", - start, size - ))) - })?; - if end > self.state.compressed_size { - return Err(einval!(format!( - "get_chunks_compressed: invalid end {}/compressed_size {}", - end, self.state.compressed_size - ))); - } - let batch_end = if batch_size <= size { - end - } else { - std::cmp::min( - start.checked_add(batch_size).unwrap_or(end), - self.state.compressed_size, - ) - }; - - self.state - .get_chunks_compressed(start, end, batch_end, batch_size, prefetch) - } - - /// Amplify the request by appending more continuous chunks to the chunk array. - pub fn add_more_chunks( - &self, - chunks: &[Arc], - max_size: u64, - ) -> Result>> { - self.state.add_more_chunks(chunks, max_size) - } - - /// Get number of chunks in the data blob. - pub fn get_chunk_count(&self) -> usize { - self.state.chunk_info_array.len() - } - - /// Get index of chunk covering uncompressed `addr`. - pub fn get_chunk_index(&self, addr: u64) -> Result { - self.state.get_chunk_index(addr) - } - - /// Get uncompressed offset of the chunk at `chunk_index`. - pub fn get_uncompressed_offset(&self, chunk_index: usize) -> u64 { - self.state.get_uncompressed_offset(chunk_index) - } - - /// Get chunk digest for the chunk at `chunk_index`. - pub fn get_chunk_digest(&self, chunk_index: usize) -> Option<&[u8]> { - self.state.get_chunk_digest(chunk_index) - } - - /// Get `BlobChunkInfo` object for the chunk at `chunk_index`. - pub fn get_chunk_info(&self, chunk_index: usize) -> Arc { - BlobMetaChunk::new(chunk_index, &self.state) - } - - /// Get whether chunk at `chunk_index` is batch chunk. - /// Some chunks build in batch mode can also be non-batch chunks, - /// that they are too big to be put into a batch. - pub fn is_batch_chunk(&self, chunk_index: u32) -> bool { - self.state.is_batch_chunk(chunk_index as usize) - } - - /// Get Batch index associated with the chunk at `chunk_index`. - pub fn get_batch_index(&self, chunk_index: u32) -> Result { - self.state.get_batch_index(chunk_index as usize) - } - - /// Get uncompressed batch offset associated with the chunk at `chunk_index`. - pub fn get_uncompressed_offset_in_batch_buf(&self, chunk_index: u32) -> Result { - self.state - .get_uncompressed_offset_in_batch_buf(chunk_index as usize) - } - - /// Get Batch context information at `batch_index`. - pub fn get_batch_context(&self, batch_index: u32) -> Result<&BatchInflateContext> { - self.state.get_batch_context(batch_index as usize) - } - - /// Get compressed size associated with the chunk at `chunk_index`. - /// Capable of handling both batch and non-batch chunks. - pub fn get_compressed_size(&self, chunk_index: u32) -> Result { - self.state.get_compressed_size(chunk_index as usize) - } - - /// Get ZRan index associated with the chunk at `chunk_index`. - pub fn get_zran_index(&self, chunk_index: u32) -> Result { - self.state.get_zran_index(chunk_index as usize) - } - - /// Get ZRan offset associated with the chunk at `chunk_index`. - pub fn get_zran_offset(&self, chunk_index: u32) -> Result { - self.state.get_zran_offset(chunk_index as usize) - } - - /// Get ZRan context information at `zran_index`. - pub fn get_zran_context(&self, zran_index: u32) -> Result<(ZranContext, &[u8])> { - self.state.get_zran_context(zran_index as usize) - } - - fn read_metadata( - blob_info: &BlobInfo, - reader: &Arc, - buffer: &mut [u8], - ) -> Result<()> { - trace!( - "blob_info compressor {} ci_compressor {} ci_compressed_size {} ci_uncompressed_size {}", - blob_info.compressor(), - blob_info.meta_ci_compressor(), - blob_info.meta_ci_compressed_size(), - blob_info.meta_ci_uncompressed_size(), - ); - - let compressed_size = blob_info.meta_ci_compressed_size(); - let uncompressed_size = blob_info.meta_ci_uncompressed_size(); - let aligned_uncompressed_size = round_up_4k(uncompressed_size); - let expected_raw_size = (compressed_size + BLOB_CCT_HEADER_SIZE) as usize; - let mut raw_data = alloc_buf(expected_raw_size); - - let read_size = reader - .read_all(&mut raw_data, blob_info.meta_ci_offset()) - .map_err(|e| { - eio!(format!( - "failed to read metadata for blob {} from backend, {}", - blob_info.blob_id(), - e - )) - })?; - if read_size != expected_raw_size { - return Err(eio!(format!( - "failed to read metadata for blob {} from backend, compressor {}, got {} bytes, expect {} bytes", - blob_info.blob_id(), - blob_info.meta_ci_compressor(), - read_size, - expected_raw_size - ))); - } - - let decrypted = match decrypt_with_context( - &raw_data[0..compressed_size as usize], - &blob_info.cipher_object(), - &blob_info.cipher_context(), - blob_info.cipher() != crypt::Algorithm::None, - ){ - Ok(data) => data, - Err(e) => return Err(eio!(format!( - "failed to decrypt metadata for blob {} from backend, cipher {}, encrypted data size {}, {}", - blob_info.blob_id(), - blob_info.cipher(), - compressed_size, - e - ))), - }; - let header = match decrypt_with_context( - &raw_data[compressed_size as usize..expected_raw_size], - &blob_info.cipher_object(), - &blob_info.cipher_context(), - blob_info.cipher() != crypt::Algorithm::None, - ){ - Ok(data) => data, - Err(e) => return Err(eio!(format!( - "failed to decrypt meta header for blob {} from backend, cipher {}, encrypted data size {}, {}", - blob_info.blob_id(), - blob_info.cipher(), - compressed_size, - e - ))), - }; - - let uncompressed = if blob_info.meta_ci_compressor() != compress::Algorithm::None { - // Lz4 does not support concurrent decompression of the same data into - // the same piece of memory. There will be multiple containers mmap the - // same file, causing the buffer to be shared between different - // processes. This will cause data errors due to race issues when - // decompressing with lz4. We solve this problem by creating a temporary - // memory to hold the decompressed data. - // - // Because this process will only be executed when the blob.meta file is - // created for the first time, which means that a machine will only - // execute the process once when the blob.meta is created for the first - // time, the memory consumption and performance impact are relatively - // small. - let mut uncompressed = vec![0u8; uncompressed_size as usize]; - compress::decompress( - &decrypted, - &mut uncompressed, - blob_info.meta_ci_compressor(), - ) - .map_err(|e| { - error!("failed to decompress blob meta data: {}", e); - e - })?; - Cow::Owned(uncompressed) - } else { - decrypted - }; - buffer[0..uncompressed_size as usize].copy_from_slice(&uncompressed); - buffer[aligned_uncompressed_size as usize - ..(aligned_uncompressed_size + BLOB_CCT_HEADER_SIZE) as usize] - .copy_from_slice(&header); - Ok(()) - } - - fn validate_header( - blob_info: &BlobInfo, - header: &BlobCompressionContextHeader, - ) -> Result { - trace!("blob meta header magic {:x}/{:x}, entries {:x}/{:x}, features {:x}/{:x}, compressor {:x}/{:x}, ci_offset {:x}/{:x}, compressed_size {:x}/{:x}, uncompressed_size {:x}/{:x}", - u32::from_le(header.s_magic), - BLOB_CCT_MAGIC, - u32::from_le(header.s_ci_entries), - blob_info.chunk_count(), - u32::from_le(header.s_features), - blob_info.features().bits(), - u32::from_le(header.s_ci_compressor), - blob_info.meta_ci_compressor() as u32, - u64::from_le(header.s_ci_offset), - blob_info.meta_ci_offset(), - u64::from_le(header.s_ci_compressed_size), - blob_info.meta_ci_compressed_size(), - u64::from_le(header.s_ci_uncompressed_size), - blob_info.meta_ci_uncompressed_size()); - - if u32::from_le(header.s_magic) != BLOB_CCT_MAGIC - || u32::from_le(header.s_magic2) != BLOB_CCT_MAGIC - || (!blob_info.has_feature(BlobFeatures::IS_CHUNKDICT_GENERATED) - && u32::from_le(header.s_ci_entries) != blob_info.chunk_count()) - || u32::from_le(header.s_ci_compressor) != blob_info.meta_ci_compressor() as u32 - || u64::from_le(header.s_ci_offset) != blob_info.meta_ci_offset() - || u64::from_le(header.s_ci_compressed_size) != blob_info.meta_ci_compressed_size() - || u64::from_le(header.s_ci_uncompressed_size) != blob_info.meta_ci_uncompressed_size() - { - return Ok(false); - } - - let chunk_count = blob_info.chunk_count(); - if chunk_count == 0 || chunk_count > RAFS_MAX_CHUNKS_PER_BLOB { - return Err(einval!(format!( - "chunk count {:x} in blob meta header is invalid!", - chunk_count - ))); - } - - let info_size = u64::from_le(header.s_ci_uncompressed_size) as usize; - let aligned_info_size = round_up_4k(info_size); - if blob_info.has_feature(BlobFeatures::CHUNK_INFO_V2) - && (blob_info.has_feature(BlobFeatures::ZRAN) - || blob_info.has_feature(BlobFeatures::BATCH)) - { - if info_size < (chunk_count as usize) * (size_of::()) { - return Err(einval!("uncompressed size in blob meta header is invalid!")); - } - } else if blob_info.has_feature(BlobFeatures::CHUNK_INFO_V2) { - if info_size != (chunk_count as usize) * (size_of::()) - || (aligned_info_size as u64) > BLOB_CCT_V2_MAX_SIZE - { - return Err(einval!("uncompressed size in blob meta header is invalid!")); - } - } else if blob_info.has_feature(BlobFeatures::ZRAN) - || blob_info.has_feature(BlobFeatures::BATCH) - { - return Err(einval!("invalid feature flags in blob meta header!")); - } else if !blob_info.has_feature(BlobFeatures::IS_CHUNKDICT_GENERATED) - && (info_size != (chunk_count as usize) * (size_of::()) - || (aligned_info_size as u64) > BLOB_CCT_V1_MAX_SIZE) - { - return Err(einval!("uncompressed size in blob meta header is invalid!")); - } - - if blob_info.has_feature(BlobFeatures::ZRAN) { - let offset = header.s_ci_zran_offset; - if offset != (chunk_count as u64) * (size_of::() as u64) { - return Ok(false); - } - if offset + header.s_ci_zran_size > info_size as u64 { - return Ok(false); - } - let zran_count = header.s_ci_zran_count as u64; - let size = zran_count * size_of::() as u64; - if zran_count > chunk_count as u64 { - return Ok(false); - } - if size > header.s_ci_zran_size { - return Ok(false); - } - } - - Ok(true) - } -} - -/// Struct to maintain compression context information for all chunks in a blob. -#[derive(Default)] -pub struct BlobCompressionContext { - pub(crate) blob_index: u32, - pub(crate) blob_features: u32, - pub(crate) compressed_size: u64, - pub(crate) uncompressed_size: u64, - pub(crate) chunk_info_array: ManuallyDrop, - pub(crate) chunk_digest_array: ManuallyDrop>, - pub(crate) batch_info_array: ManuallyDrop>, - pub(crate) zran_info_array: ManuallyDrop>, - pub(crate) zran_dict_table: ManuallyDrop>, - blob_meta_file_map: FileMapState, - chunk_digest_file_map: FileMapState, - chunk_digest_default: RafsDigest, -} - -impl BlobCompressionContext { - fn get_chunks_uncompressed( - self: &Arc, - start: u64, - end: u64, - batch_end: u64, - batch_size: u64, - ) -> Result>> { - self.chunk_info_array - .get_chunks_uncompressed(self, start, end, batch_end, batch_size) - } - - fn get_chunks_compressed( - self: &Arc, - start: u64, - end: u64, - batch_end: u64, - batch_size: u64, - prefetch: bool, - ) -> Result>> { - self.chunk_info_array - .get_chunks_compressed(self, start, end, batch_end, batch_size, prefetch) - } - - fn add_more_chunks( - self: &Arc, - chunks: &[Arc], - max_size: u64, - ) -> Result>> { - self.chunk_info_array - .add_more_chunks(self, chunks, max_size) - } - - fn get_uncompressed_offset(&self, chunk_index: usize) -> u64 { - self.chunk_info_array.uncompressed_offset(chunk_index) - } - - fn get_chunk_digest(&self, chunk_index: usize) -> Option<&[u8]> { - if chunk_index < self.chunk_digest_array.len() { - Some(&self.chunk_digest_array[chunk_index]) - } else { - None - } - } - - fn get_chunk_index(&self, addr: u64) -> Result { - self.chunk_info_array - .get_chunk_index_nocheck(self, addr, false) - } - - /// Get whether chunk at `chunk_index` is batch chunk. - /// Some chunks build in batch mode can also be non-batch chunks, - /// that they are too big to be put into a batch. - fn is_batch_chunk(&self, chunk_index: usize) -> bool { - self.chunk_info_array.is_batch(chunk_index) - } - - fn get_batch_index(&self, chunk_index: usize) -> Result { - self.chunk_info_array.batch_index(chunk_index) - } - - fn get_uncompressed_offset_in_batch_buf(&self, chunk_index: usize) -> Result { - self.chunk_info_array - .uncompressed_offset_in_batch_buf(chunk_index) - } - - /// Get Batch context information for decoding. - fn get_batch_context(&self, batch_index: usize) -> Result<&BatchInflateContext> { - if batch_index < self.batch_info_array.len() { - let ctx = &self.batch_info_array[batch_index]; - Ok(ctx) - } else { - Err(einval!(format!( - "Invalid batch index, current: {}, max: {}", - batch_index, - self.batch_info_array.len() - ))) - } - } - - /// Get compressed size associated with the chunk at `chunk_index`. - /// Capable of handling both batch and non-batch chunks. - pub fn get_compressed_size(&self, chunk_index: usize) -> Result { - if self.is_batch_chunk(chunk_index) { - let ctx = self - .get_batch_context(self.get_batch_index(chunk_index)? as usize) - .unwrap(); - Ok(ctx.compressed_size()) - } else { - Ok(self.chunk_info_array.compressed_size(chunk_index)) - } - } - - fn get_zran_index(&self, chunk_index: usize) -> Result { - self.chunk_info_array.zran_index(chunk_index) - } - - fn get_zran_offset(&self, chunk_index: usize) -> Result { - self.chunk_info_array.zran_offset(chunk_index) - } - - /// Get ZRan context information for decoding. - fn get_zran_context(&self, zran_index: usize) -> Result<(ZranContext, &[u8])> { - if zran_index < self.zran_info_array.len() { - let entry = &self.zran_info_array[zran_index]; - let dict_off = entry.dict_offset() as usize; - let dict_size = entry.dict_size() as usize; - if dict_off.checked_add(dict_size).is_none() - || dict_off + dict_size > self.zran_dict_table.len() - { - return Err(einval!(format!( - "Invalid ZRan context, dict_off: {}, dict_size: {}, max: {}", - dict_off, - dict_size, - self.zran_dict_table.len() - ))); - }; - let dict = &self.zran_dict_table[dict_off..dict_off + dict_size]; - let ctx = ZranContext::from(entry); - Ok((ctx, dict)) - } else { - Err(einval!(format!( - "Invalid ZRan index, current: {}, max: {}", - zran_index, - self.zran_info_array.len() - ))) - } - } - - pub(crate) fn is_separate(&self) -> bool { - self.blob_features & BlobFeatures::SEPARATE.bits() != 0 - } - - pub(crate) fn is_encrypted(&self) -> bool { - self.blob_features & BlobFeatures::ENCRYPTED.bits() != 0 - } -} - -/// A customized array to host chunk information table for a blob. -pub enum BlobMetaChunkArray { - /// V1 chunk compression information array. - V1(Vec), - /// V2 chunk compression information array. - V2(Vec), -} - -impl Default for BlobMetaChunkArray { - fn default() -> Self { - BlobMetaChunkArray::new_v2() - } -} - -// Methods for RAFS filesystem builder. -impl BlobMetaChunkArray { - /// Create a [BlobMetaChunkArray] with v1 chunk compression information format. - pub fn new_v1() -> Self { - BlobMetaChunkArray::V1(Vec::new()) - } - - /// Create a [BlobMetaChunkArray] with v2 chunk compression information format. - pub fn new_v2() -> Self { - BlobMetaChunkArray::V2(Vec::new()) - } - - /// Get number of entries in the chunk compression information array. - pub fn len(&self) -> usize { - match self { - BlobMetaChunkArray::V1(v) => v.len(), - BlobMetaChunkArray::V2(v) => v.len(), - } - } - - /// Check whether the chunk compression information array is empty or not. - pub fn is_empty(&self) -> bool { - match self { - BlobMetaChunkArray::V1(v) => v.is_empty(), - BlobMetaChunkArray::V2(v) => v.is_empty(), - } - } - - /// Convert the chunk compression information array as a u8 slice. - pub fn as_byte_slice(&self) -> &[u8] { - match self { - BlobMetaChunkArray::V1(v) => unsafe { - std::slice::from_raw_parts( - v.as_ptr() as *const u8, - v.len() * size_of::(), - ) - }, - BlobMetaChunkArray::V2(v) => unsafe { - std::slice::from_raw_parts( - v.as_ptr() as *const u8, - v.len() * size_of::(), - ) - }, - } - } - - /// Add an entry of v1 chunk compression information into the array. - pub fn add_v1( - &mut self, - compressed_offset: u64, - compressed_size: u32, - uncompressed_offset: u64, - uncompressed_size: u32, - ) { - match self { - BlobMetaChunkArray::V1(v) => { - let mut meta = BlobChunkInfoV1Ondisk::default(); - meta.set_compressed_offset(compressed_offset); - meta.set_compressed_size(compressed_size); - meta.set_uncompressed_offset(uncompressed_offset); - meta.set_uncompressed_size(uncompressed_size); - v.push(meta); - } - BlobMetaChunkArray::V2(_v) => unimplemented!(), - } - } - - /// Add an entry of v2 chunk compression information into the array. - #[allow(clippy::too_many_arguments)] - pub fn add_v2( - &mut self, - compressed_offset: u64, - compressed_size: u32, - uncompressed_offset: u64, - uncompressed_size: u32, - compressed: bool, - encrypted: bool, - is_batch: bool, - data: u64, - ) { - match self { - BlobMetaChunkArray::V2(v) => { - let mut meta = BlobChunkInfoV2Ondisk::default(); - meta.set_compressed_offset(compressed_offset); - meta.set_compressed_size(compressed_size); - meta.set_uncompressed_offset(uncompressed_offset); - meta.set_uncompressed_size(uncompressed_size); - meta.set_compressed(compressed); - meta.set_encrypted(encrypted); - meta.set_batch(is_batch); - meta.set_data(data); - v.push(meta); - } - BlobMetaChunkArray::V1(_v) => unimplemented!(), - } - } - - /// Add an entry of pre-built v2 chunk compression information into the array. - pub fn add_v2_info(&mut self, chunk_info: BlobChunkInfoV2Ondisk) { - match self { - BlobMetaChunkArray::V2(v) => v.push(chunk_info), - BlobMetaChunkArray::V1(_v) => unimplemented!(), - } - } -} - -impl BlobMetaChunkArray { - fn from_file_map(filemap: &FileMapState, blob_info: &BlobInfo) -> Result { - let chunk_count = blob_info.chunk_count(); - if blob_info.has_feature(BlobFeatures::CHUNK_INFO_V2) { - let chunk_size = chunk_count as usize * size_of::(); - let base = filemap.validate_range(0, chunk_size)?; - let v = unsafe { - Vec::from_raw_parts( - base as *mut u8 as *mut BlobChunkInfoV2Ondisk, - chunk_count as usize, - chunk_count as usize, - ) - }; - Ok(BlobMetaChunkArray::V2(v)) - } else { - let chunk_size = chunk_count as usize * size_of::(); - let base = filemap.validate_range(0, chunk_size)?; - let v = unsafe { - Vec::from_raw_parts( - base as *mut u8 as *mut BlobChunkInfoV1Ondisk, - chunk_count as usize, - chunk_count as usize, - ) - }; - Ok(BlobMetaChunkArray::V1(v)) - } - } - - fn get_chunk_index_nocheck( - &self, - state: &BlobCompressionContext, - addr: u64, - compressed: bool, - ) -> Result { - match self { - BlobMetaChunkArray::V1(v) => { - Self::_get_chunk_index_nocheck(state, v, addr, compressed, false) - } - BlobMetaChunkArray::V2(v) => { - Self::_get_chunk_index_nocheck(state, v, addr, compressed, false) - } - } - } - - fn get_chunks_compressed( - &self, - state: &Arc, - start: u64, - end: u64, - batch_end: u64, - batch_size: u64, - prefetch: bool, - ) -> Result>> { - match self { - BlobMetaChunkArray::V1(v) => { - Self::_get_chunks_compressed(state, v, start, end, batch_end, batch_size, prefetch) - } - BlobMetaChunkArray::V2(v) => { - Self::_get_chunks_compressed(state, v, start, end, batch_end, batch_size, prefetch) - } - } - } - - fn get_chunks_uncompressed( - &self, - state: &Arc, - start: u64, - end: u64, - batch_end: u64, - batch_size: u64, - ) -> Result>> { - match self { - BlobMetaChunkArray::V1(v) => { - Self::_get_chunks_uncompressed(state, v, start, end, batch_end, batch_size) - } - BlobMetaChunkArray::V2(v) => { - Self::_get_chunks_uncompressed(state, v, start, end, batch_end, batch_size) - } - } - } - - fn add_more_chunks( - &self, - state: &Arc, - chunks: &[Arc], - max_size: u64, - ) -> Result>> { - match self { - BlobMetaChunkArray::V1(v) => Self::_add_more_chunks(state, v, chunks, max_size), - BlobMetaChunkArray::V2(v) => Self::_add_more_chunks(state, v, chunks, max_size), - } - } - - fn compressed_offset(&self, index: usize) -> u64 { - match self { - BlobMetaChunkArray::V1(v) => v[index].compressed_offset(), - BlobMetaChunkArray::V2(v) => v[index].compressed_offset(), - } - } - - fn compressed_size(&self, index: usize) -> u32 { - match self { - BlobMetaChunkArray::V1(v) => v[index].compressed_size(), - BlobMetaChunkArray::V2(v) => v[index].compressed_size(), - } - } - - fn uncompressed_offset(&self, index: usize) -> u64 { - match self { - BlobMetaChunkArray::V1(v) => v[index].uncompressed_offset(), - BlobMetaChunkArray::V2(v) => v[index].uncompressed_offset(), - } - } - - fn uncompressed_size(&self, index: usize) -> u32 { - match self { - BlobMetaChunkArray::V1(v) => v[index].uncompressed_size(), - BlobMetaChunkArray::V2(v) => v[index].uncompressed_size(), - } - } - - fn is_batch(&self, index: usize) -> bool { - match self { - BlobMetaChunkArray::V1(v) => v[index].is_batch(), - BlobMetaChunkArray::V2(v) => v[index].is_batch(), - } - } - - fn batch_index(&self, index: usize) -> Result { - match self { - BlobMetaChunkArray::V1(v) => v[index].get_batch_index(), - BlobMetaChunkArray::V2(v) => v[index].get_batch_index(), - } - } - - fn uncompressed_offset_in_batch_buf(&self, index: usize) -> Result { - match self { - BlobMetaChunkArray::V1(v) => v[index].get_uncompressed_offset_in_batch_buf(), - BlobMetaChunkArray::V2(v) => v[index].get_uncompressed_offset_in_batch_buf(), - } - } - - fn zran_index(&self, index: usize) -> Result { - match self { - BlobMetaChunkArray::V1(v) => v[index].get_zran_index(), - BlobMetaChunkArray::V2(v) => v[index].get_zran_index(), - } - } - - fn zran_offset(&self, index: usize) -> Result { - match self { - BlobMetaChunkArray::V1(v) => v[index].get_zran_offset(), - BlobMetaChunkArray::V2(v) => v[index].get_zran_offset(), - } - } - - fn is_compressed(&self, index: usize) -> bool { - match self { - BlobMetaChunkArray::V1(v) => v[index].is_compressed(), - BlobMetaChunkArray::V2(v) => v[index].is_compressed(), - } - } - - fn is_encrypted(&self, index: usize) -> bool { - match self { - BlobMetaChunkArray::V1(v) => v[index].is_encrypted(), - BlobMetaChunkArray::V2(v) => v[index].is_encrypted(), - } - } - - fn _get_chunk_index_nocheck( - state: &BlobCompressionContext, - chunks: &[T], - addr: u64, - compressed: bool, - prefetch: bool, - ) -> Result { - let mut size = chunks.len(); - let mut left = 0; - let mut right = size; - let mut start = 0; - let mut end = 0; - - while left < right { - let mid = left + size / 2; - // SAFETY: the call is made safe by the following invariants: - // - `mid >= 0` - // - `mid < size`: `mid` is limited by `[left; right)` bound. - let entry = &chunks[mid]; - if compressed { - // Capable of handling both batch and non-batch chunks. - let c_offset = entry.compressed_offset(); - let c_size = state.get_compressed_size(mid)?; - (start, end) = (c_offset, c_offset + c_size as u64); - } else { - start = entry.uncompressed_offset(); - end = entry.uncompressed_end(); - }; - - if start > addr { - right = mid; - } else if end <= addr { - left = mid + 1; - } else { - // Find the first chunk in the batch. - if entry.is_batch() && entry.get_uncompressed_offset_in_batch_buf()? > 0 { - right = mid; - } else { - return Ok(mid); - } - } - - size = right - left; - } - - // Special handling prefetch for ZRan blobs because they may have holes. - if prefetch { - if right < chunks.len() { - let entry = &chunks[right]; - if entry.compressed_offset() > addr { - return Ok(right); - } - } - if left < chunks.len() { - let entry = &chunks[left]; - if entry.compressed_offset() > addr { - return Ok(left); - } - } - } - - // if addr == self.chunks[last].compressed_offset, return einval with error msg. - Err(einval!(format!( - "failed to get chunk index, prefetch {}, left {}, right {}, start: {}, end: {}, addr: {}", - prefetch, left, right, start, end, addr - ))) - } - - fn _get_chunks_uncompressed( - state: &Arc, - chunk_info_array: &[T], - start: u64, - end: u64, - batch_end: u64, - batch_size: u64, - ) -> Result>> { - let mut vec = Vec::with_capacity(512); - let mut index = - Self::_get_chunk_index_nocheck(state, chunk_info_array, start, false, false)?; - let entry = Self::get_chunk_entry(state, chunk_info_array, index)?; - trace!( - "get_chunks_uncompressed: entry {} {}", - entry.uncompressed_offset(), - entry.uncompressed_end() - ); - - // Special handling of ZRan chunks - if entry.is_zran() { - let zran_index = entry.get_zran_index()?; - let mut count = state.zran_info_array[zran_index as usize].out_size() as u64; - let mut zran_last = zran_index; - let mut zran_end = entry.aligned_uncompressed_end(); - - while index > 0 { - let entry = Self::get_chunk_entry(state, chunk_info_array, index - 1)?; - if !entry.is_zran() { - return Err(einval!( - "inconsistent ZRan and non-ZRan chunk compression information entries" - )); - } else if entry.get_zran_index()? != zran_index { - // reach the header chunk associated with the same ZRan context. - break; - } else { - index -= 1; - } - } - - for entry in &chunk_info_array[index..] { - entry.validate(state)?; - if !entry.is_zran() { - return Err(einval!( - "inconsistent ZRan and non-ZRan chunk compression information entries" - )); - } - if entry.get_zran_index()? != zran_last { - let ctx = &state.zran_info_array[entry.get_zran_index()? as usize]; - if count + ctx.out_size() as u64 >= batch_size - && entry.uncompressed_offset() >= end - { - return Ok(vec); - } - count += ctx.out_size() as u64; - zran_last = entry.get_zran_index()?; - } - zran_end = entry.aligned_uncompressed_end(); - vec.push(BlobMetaChunk::new(index, state)); - index += 1; - } - - if zran_end >= end { - return Ok(vec); - } - return Err(einval!(format!( - "entry not found index {} chunk_info_array.len {}, end 0x{:x}, range [0x{:x}-0x{:x}]", - index, - chunk_info_array.len(), - vec.last().map(|v| v.uncompressed_end()).unwrap_or_default(), - start, - end, - ))); - } - - vec.push(BlobMetaChunk::new(index, state)); - let mut last_end = entry.aligned_uncompressed_end(); - if last_end >= batch_end { - Ok(vec) - } else { - while index + 1 < chunk_info_array.len() { - index += 1; - - let entry = Self::get_chunk_entry(state, chunk_info_array, index)?; - if entry.uncompressed_offset() != last_end { - return Err(einval!(format!( - "mismatch uncompressed {} size {} last_end {}", - entry.uncompressed_offset(), - entry.uncompressed_size(), - last_end - ))); - } else if last_end >= end && entry.aligned_uncompressed_end() >= batch_end { - // Avoid read amplify if next chunk is too big. - return Ok(vec); - } - - vec.push(BlobMetaChunk::new(index, state)); - last_end = entry.aligned_uncompressed_end(); - if last_end >= batch_end { - return Ok(vec); - } - } - - if last_end >= end { - Ok(vec) - } else { - Err(einval!(format!( - "entry not found index {} chunk_info_array.len {}, last_end 0x{:x}, end 0x{:x}, blob compressed size 0x{:x}", - index, - chunk_info_array.len(), - last_end, - end, - state.uncompressed_size, - ))) - } - } - } - - fn _get_chunks_compressed( - state: &Arc, - chunk_info_array: &[T], - start: u64, - end: u64, - batch_end: u64, - batch_size: u64, - prefetch: bool, - ) -> Result>> { - let mut vec = Vec::with_capacity(512); - let mut index = - Self::_get_chunk_index_nocheck(state, chunk_info_array, start, true, prefetch)?; - let entry = Self::get_chunk_entry(state, chunk_info_array, index)?; - - // Special handling of ZRan chunks - if entry.is_zran() { - let zran_index = entry.get_zran_index()?; - let pos = state.zran_info_array[zran_index as usize].in_offset(); - let mut zran_last = zran_index; - - while index > 0 { - let entry = Self::get_chunk_entry(state, chunk_info_array, index - 1)?; - if !entry.is_zran() { - return Err(einval!( - "inconsistent ZRan and non-ZRan chunk compression information entries" - )); - } else if entry.get_zran_index()? != zran_index { - // reach the header chunk associated with the same ZRan context. - break; - } else { - index -= 1; - } - } - - for entry in &chunk_info_array[index..] { - entry.validate(state)?; - if !entry.is_zran() { - return Err(einval!( - "inconsistent ZRan and non-ZRan chunk compression information entries" - )); - } - if entry.get_zran_index()? != zran_last { - let ctx = &state.zran_info_array[entry.get_zran_index()? as usize]; - if ctx.in_offset() + ctx.in_size() as u64 - pos > batch_size - && entry.compressed_offset() > end - { - return Ok(vec); - } - zran_last = entry.get_zran_index()?; - } - vec.push(BlobMetaChunk::new(index, state)); - index += 1; - } - - if let Some(c) = vec.last() { - if c.uncompressed_end() >= end { - return Ok(vec); - } - // Special handling prefetch for ZRan blobs - if prefetch && index >= chunk_info_array.len() { - return Ok(vec); - } - } - return Err(einval!(format!( - "entry not found index {} chunk_info_array.len {}", - index, - chunk_info_array.len(), - ))); - } - - vec.push(BlobMetaChunk::new(index, state)); - let mut last_end = entry.compressed_end(); - if last_end >= batch_end { - Ok(vec) - } else { - while index + 1 < chunk_info_array.len() { - index += 1; - - let entry = Self::get_chunk_entry(state, chunk_info_array, index)?; - // Avoid read amplify if next chunk is too big. - if last_end >= end && entry.compressed_end() > batch_end { - return Ok(vec); - } - - vec.push(BlobMetaChunk::new(index, state)); - last_end = entry.compressed_end(); - if last_end >= batch_end { - return Ok(vec); - } - } - - if last_end >= end || (prefetch && !vec.is_empty()) { - Ok(vec) - } else { - Err(einval!(format!( - "entry not found index {} chunk_info_array.len {}, last_end 0x{:x}, end 0x{:x}, blob compressed size 0x{:x}", - index, - chunk_info_array.len(), - last_end, - end, - state.compressed_size, - ))) - } - } - } - - fn _add_more_chunks( - state: &Arc, - chunk_info_array: &[T], - chunks: &[Arc], - max_size: u64, - ) -> Result>> { - let first_idx = chunks[0].id() as usize; - let first_entry = Self::get_chunk_entry(state, chunk_info_array, first_idx)?; - let last_idx = chunks[chunks.len() - 1].id() as usize; - let last_entry = Self::get_chunk_entry(state, chunk_info_array, last_idx)?; - - // The maximum size to be amplified in the current fetch request. - let fetch_end = max_size + chunks[0].compressed_offset(); - - let mut vec = Vec::with_capacity(128); - - // Special handling of ZRan chunks - if first_entry.is_zran() { - let first_zran_idx = first_entry.get_zran_index()?; - let mut last_zran_idx = last_entry.get_zran_index()?; - let mut index = first_idx; - while index > 0 { - let entry = Self::get_chunk_entry(state, chunk_info_array, index - 1)?; - if !entry.is_zran() { - // All chunks should be ZRan chunks. - return Err(std::io::Error::new( - std::io::ErrorKind::Other, - "invalid ZRan compression information data", - )); - } else if entry.get_zran_index()? != first_zran_idx { - // reach the header chunk associated with the same ZRan context. - break; - } else { - index -= 1; - } - } - - for entry in &chunk_info_array[index..] { - if entry.validate(state).is_err() || !entry.is_zran() { - return Err(std::io::Error::new( - std::io::ErrorKind::Other, - "invalid ZRan compression information data", - )); - } else if entry.get_zran_index()? > last_zran_idx { - if entry.compressed_end() + RAFS_MAX_CHUNK_SIZE <= fetch_end - && entry.get_zran_index()? == last_zran_idx + 1 - { - vec.push(BlobMetaChunk::new(index, state)); - last_zran_idx += 1; - } else { - return Ok(vec); - } - } else { - vec.push(BlobMetaChunk::new(index, state)); - } - index += 1; - } - } else { - // Handling of Batch chunks and normal chunks - let mut entry_idx = first_idx; - let mut curr_batch_idx = u32::MAX; - - // Search the first chunk of the current Batch. - if first_entry.is_batch() { - curr_batch_idx = first_entry.get_batch_index()?; - while entry_idx > 0 { - let entry = Self::get_chunk_entry(state, chunk_info_array, entry_idx - 1)?; - if !entry.is_batch() || entry.get_batch_index()? != curr_batch_idx { - // Reach the previous non-batch or batch chunk. - break; - } else { - entry_idx -= 1; - } - } - } - - // Iterate and add chunks. - let mut idx_chunks = 0; - for (idx, entry) in chunk_info_array.iter().enumerate().skip(entry_idx) { - entry.validate(state)?; - - // Add chunk if it is in the `chunks` array. - if idx_chunks < chunks.len() && idx == chunks[idx_chunks].id() as usize { - vec.push(chunks[idx_chunks].clone()); - idx_chunks += 1; - if entry.is_batch() { - curr_batch_idx = entry.get_batch_index()?; - } - continue; - } - - // If chunk is not in the `chunks` array, add it if in the current Batch, - // or can be amplified. - if entry.is_batch() { - if curr_batch_idx == entry.get_batch_index()? { - vec.push(BlobMetaChunk::new(idx, state)); - continue; - } - - let batch_ctx = state.get_batch_context(entry.get_batch_index()? as usize)?; - if entry.compressed_offset() + batch_ctx.compressed_size() as u64 <= fetch_end { - vec.push(BlobMetaChunk::new(idx, state)); - curr_batch_idx = entry.get_batch_index()?; - } else { - break; - } - continue; - } - if entry.compressed_end() <= fetch_end { - vec.push(BlobMetaChunk::new(idx, state)); - } else { - break; - } - } - } - - Ok(vec) - } - - fn get_chunk_entry<'a, T: BlobMetaChunkInfo>( - state: &Arc, - chunk_info_array: &'a [T], - index: usize, - ) -> Result<&'a T> { - assert!(index < chunk_info_array.len()); - let entry = &chunk_info_array[index]; - // If the chunk belongs to a chunkdict, skip the validation check. - if state.blob_features & BlobFeatures::IS_CHUNKDICT_GENERATED.bits() == 0 { - entry.validate(state)?; - } - Ok(entry) - } -} - -/// An implementation of `trait BlobChunkInfo` based on blob meta information. -#[derive(Clone)] -pub struct BlobMetaChunk { - chunk_index: usize, - meta: Arc, -} - -impl BlobMetaChunk { - #[allow(clippy::new_ret_no_self)] - pub(crate) fn new( - chunk_index: usize, - meta: &Arc, - ) -> Arc { - assert!(chunk_index <= RAFS_MAX_CHUNKS_PER_BLOB as usize); - Arc::new(BlobMetaChunk { - chunk_index, - meta: meta.clone(), - }) as Arc - } -} - -impl BlobChunkInfo for BlobMetaChunk { - fn chunk_id(&self) -> &RafsDigest { - if self.chunk_index < self.meta.chunk_digest_array.len() { - let digest = &self.meta.chunk_digest_array[self.chunk_index]; - digest.into() - } else { - &self.meta.chunk_digest_default - } - } - - fn id(&self) -> u32 { - self.chunk_index as u32 - } - - fn blob_index(&self) -> u32 { - self.meta.blob_index - } - - fn compressed_offset(&self) -> u64 { - self.meta - .chunk_info_array - .compressed_offset(self.chunk_index) - } - - fn compressed_size(&self) -> u32 { - self.meta.chunk_info_array.compressed_size(self.chunk_index) - } - - fn uncompressed_offset(&self) -> u64 { - self.meta - .chunk_info_array - .uncompressed_offset(self.chunk_index) - } - - fn uncompressed_size(&self) -> u32 { - self.meta - .chunk_info_array - .uncompressed_size(self.chunk_index) - } - - fn is_batch(&self) -> bool { - self.meta.chunk_info_array.is_batch(self.chunk_index) - } - - fn is_compressed(&self) -> bool { - self.meta.chunk_info_array.is_compressed(self.chunk_index) - } - - fn is_encrypted(&self) -> bool { - self.meta.chunk_info_array.is_encrypted(self.chunk_index) - } - - fn as_any(&self) -> &dyn Any { - self - } -} - -impl BlobV5ChunkInfo for BlobMetaChunk { - fn index(&self) -> u32 { - self.chunk_index as u32 - } - - fn file_offset(&self) -> u64 { - // Not used for RAFS v6 - 0 - } - - fn flags(&self) -> BlobChunkFlags { - let mut flags = BlobChunkFlags::empty(); - if self.is_compressed() { - flags |= BlobChunkFlags::COMPRESSED; - } - flags - } - - fn as_base(&self) -> &dyn BlobChunkInfo { - self - } -} - -/// Trait to manage compression information about chunks based on blob meta. -pub trait BlobMetaChunkInfo { - /// Get compressed offset of the chunk. - fn compressed_offset(&self) -> u64; - - /// Set compressed offset of the chunk. - fn set_compressed_offset(&mut self, offset: u64); - - /// Get compressed size of the chunk. - fn compressed_size(&self) -> u32; - - /// Set compressed size of the chunk. - fn set_compressed_size(&mut self, size: u32); - - /// Get end of compressed data of the chunk. - fn compressed_end(&self) -> u64 { - self.compressed_offset() + self.compressed_size() as u64 - } - - /// Get uncompressed offset of the chunk. - fn uncompressed_offset(&self) -> u64; - - /// Set uncompressed offset of the chunk. - fn set_uncompressed_offset(&mut self, offset: u64); - - /// Get uncompressed end of the chunk. - fn uncompressed_size(&self) -> u32; - - /// Set uncompressed end of the chunk. - fn set_uncompressed_size(&mut self, size: u32); - - /// Get end of uncompressed data of the chunk. - fn uncompressed_end(&self) -> u64 { - self.uncompressed_offset() + self.uncompressed_size() as u64 - } - - /// Get 4K-aligned end of uncompressed data of the chunk. - fn aligned_uncompressed_end(&self) -> u64 { - round_up_4k(self.uncompressed_end()) - } - - /// Check whether chunk data is encrypted or not. - fn is_encrypted(&self) -> bool; - - /// Check whether the blob chunk is compressed or not. - /// - /// Assume the image builder guarantee that compress_size < uncompress_size if the chunk is - /// compressed. - fn is_compressed(&self) -> bool; - - /// Check whether the chunk has associated Batch context data. - fn is_batch(&self) -> bool; - - /// Check whether the chunk has associated ZRan context data. - fn is_zran(&self) -> bool; - - /// Get index of the ZRan context data associated with the chunk. - fn get_zran_index(&self) -> Result; - - /// Get offset to get context data from the associated ZRan context. - fn get_zran_offset(&self) -> Result; - - /// Get index of the Batch context data associated with the chunk. - fn get_batch_index(&self) -> Result; - - /// Get offset of uncompressed chunk data inside the batch chunk. - fn get_uncompressed_offset_in_batch_buf(&self) -> Result; - - /// Get data associated with the entry. V2 only, V1 just returns zero. - fn get_data(&self) -> u64; - - /// Check whether the chunk compression information is valid or not. - fn validate(&self, state: &BlobCompressionContext) -> Result<()>; -} - -/// Generate description string for blob meta features. -pub fn format_blob_features(features: BlobFeatures) -> String { - let mut output = String::new(); - if features.contains(BlobFeatures::ALIGNED) { - output += "aligned "; - } - if features.contains(BlobFeatures::BATCH) { - output += "batch "; - } - if features.contains(BlobFeatures::CAP_TAR_TOC) { - output += "cap_toc "; - } - if features.contains(BlobFeatures::INLINED_CHUNK_DIGEST) { - output += "chunk-digest "; - } - if features.contains(BlobFeatures::CHUNK_INFO_V2) { - output += "chunk-v2 "; - } - if features.contains(BlobFeatures::INLINED_FS_META) { - output += "fs-meta "; - } - if features.contains(BlobFeatures::SEPARATE) { - output += "separate "; - } - if features.contains(BlobFeatures::HAS_TAR_HEADER) { - output += "tar-header "; - } - if features.contains(BlobFeatures::HAS_TOC) { - output += "toc "; - } - if features.contains(BlobFeatures::ZRAN) { - output += "zran "; - } - if features.contains(BlobFeatures::ENCRYPTED) { - output += "encrypted "; - } - if features.contains(BlobFeatures::IS_CHUNKDICT_GENERATED) { - output += "is-chunkdict-generated "; - } - output.trim_end().to_string() -} - -fn round_up_4k + BitAnd + Not + From>(val: T) -> T { - (val + T::from(0xfff)) & !T::from(0xfff) -} - -#[cfg(test)] -pub(crate) mod tests { - use super::*; - use crate::backend::{BackendResult, BlobReader}; - use crate::device::BlobFeatures; - use crate::RAFS_DEFAULT_CHUNK_SIZE; - use nix::sys::uio; - use nydus_utils::digest::{self, DigestHasher}; - use nydus_utils::metrics::BackendMetrics; - use std::fs::File; - use std::os::unix::io::AsRawFd; - use std::path::PathBuf; - - pub(crate) struct DummyBlobReader { - pub metrics: Arc, - pub file: File, - } - - impl BlobReader for DummyBlobReader { - fn blob_size(&self) -> BackendResult { - Ok(0) - } - - fn try_read(&self, buf: &mut [u8], offset: u64) -> BackendResult { - let ret = uio::pread(self.file.as_raw_fd(), buf, offset as i64).unwrap(); - Ok(ret) - } - - fn metrics(&self) -> &BackendMetrics { - &self.metrics - } - } - - #[test] - fn test_round_up_4k() { - assert_eq!(round_up_4k(0), 0x0u32); - assert_eq!(round_up_4k(1), 0x1000u32); - assert_eq!(round_up_4k(0xfff), 0x1000u32); - assert_eq!(round_up_4k(0x1000), 0x1000u32); - assert_eq!(round_up_4k(0x1001), 0x2000u32); - assert_eq!(round_up_4k(0x1fff), 0x2000u64); - } - - #[test] - fn test_load_meta_ci_zran_add_more_chunks() { - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let path = PathBuf::from(root_dir).join("../tests/texture/zran/233c72f2b6b698c07021c4da367cfe2dff4f049efbaa885ca0ff760ea297865a"); - - let features = BlobFeatures::ALIGNED - | BlobFeatures::INLINED_FS_META - | BlobFeatures::CHUNK_INFO_V2 - | BlobFeatures::ZRAN; - let mut blob_info = BlobInfo::new( - 0, - "233c72f2b6b698c07021c4da367cfe2dff4f049efbaa885ca0ff760ea297865a".to_string(), - 0x16c6000, - 9839040, - RAFS_DEFAULT_CHUNK_SIZE as u32, - 0xa3, - features, - ); - blob_info.set_blob_meta_info(0, 0xa1290, 0xa1290, compress::Algorithm::None as u32); - let meta = - BlobCompressionContextInfo::new(&path.display().to_string(), &blob_info, None, false) - .unwrap(); - assert_eq!(meta.state.chunk_info_array.len(), 0xa3); - assert_eq!(meta.state.zran_info_array.len(), 0x15); - assert_eq!(meta.state.zran_dict_table.len(), 0xa0348 - 0x15 * 40); - - let chunks = vec![BlobMetaChunk::new(0, &meta.state)]; - let chunks = meta.add_more_chunks(chunks.as_slice(), 0x30000).unwrap(); - assert_eq!(chunks.len(), 67); - - let chunks = vec![BlobMetaChunk::new(0, &meta.state)]; - let chunks = meta - .add_more_chunks(chunks.as_slice(), RAFS_DEFAULT_CHUNK_SIZE) - .unwrap(); - assert_eq!(chunks.len(), 67); - - let chunks = vec![BlobMetaChunk::new(66, &meta.state)]; - let chunks = meta - .add_more_chunks(chunks.as_slice(), RAFS_DEFAULT_CHUNK_SIZE) - .unwrap(); - assert_eq!(chunks.len(), 67); - - let chunks = vec![BlobMetaChunk::new(116, &meta.state)]; - let chunks = meta - .add_more_chunks(chunks.as_slice(), RAFS_DEFAULT_CHUNK_SIZE) - .unwrap(); - assert_eq!(chunks.len(), 1); - - let chunks = vec![BlobMetaChunk::new(162, &meta.state)]; - let chunks = meta - .add_more_chunks(chunks.as_slice(), RAFS_DEFAULT_CHUNK_SIZE) - .unwrap(); - assert_eq!(chunks.len(), 12); - } - - #[test] - fn test_load_meta_ci_zran_get_chunks_uncompressed() { - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let path = PathBuf::from(root_dir).join("../tests/texture/zran/233c72f2b6b698c07021c4da367cfe2dff4f049efbaa885ca0ff760ea297865a"); - - let features = BlobFeatures::ALIGNED - | BlobFeatures::INLINED_FS_META - | BlobFeatures::CHUNK_INFO_V2 - | BlobFeatures::ZRAN; - let mut blob_info = BlobInfo::new( - 0, - "233c72f2b6b698c07021c4da367cfe2dff4f049efbaa885ca0ff760ea297865a".to_string(), - 0x16c6000, - 9839040, - RAFS_DEFAULT_CHUNK_SIZE as u32, - 0xa3, - features, - ); - blob_info.set_blob_meta_info(0, 0xa1290, 0xa1290, compress::Algorithm::None as u32); - let meta = - BlobCompressionContextInfo::new(&path.display().to_string(), &blob_info, None, false) - .unwrap(); - assert_eq!(meta.state.chunk_info_array.len(), 0xa3); - assert_eq!(meta.state.zran_info_array.len(), 0x15); - assert_eq!(meta.state.zran_dict_table.len(), 0xa0348 - 0x15 * 40); - - let chunks = meta.get_chunks_uncompressed(0, 1, 0x30000).unwrap(); - assert_eq!(chunks.len(), 67); - - let chunks = meta - .get_chunks_uncompressed(0, 1, RAFS_DEFAULT_CHUNK_SIZE) - .unwrap(); - assert_eq!(chunks.len(), 67); - - let chunks = meta - .get_chunks_uncompressed(0x112000, 0x10000, RAFS_DEFAULT_CHUNK_SIZE) - .unwrap(); - assert_eq!(chunks.len(), 116); - - let chunks = meta - .get_chunks_uncompressed(0xf9b000, 0x100, RAFS_DEFAULT_CHUNK_SIZE) - .unwrap(); - assert_eq!(chunks.len(), 12); - - let chunks = meta - .get_chunks_uncompressed(0xf9b000, 0x100, 4 * RAFS_DEFAULT_CHUNK_SIZE) - .unwrap(); - assert_eq!(chunks.len(), 13); - - let chunks = meta - .get_chunks_uncompressed(0x16c5000, 0x100, 4 * RAFS_DEFAULT_CHUNK_SIZE) - .unwrap(); - assert_eq!(chunks.len(), 12); - - assert!(meta - .get_chunks_uncompressed(0x2000000, 0x100, 4 * RAFS_DEFAULT_CHUNK_SIZE) - .is_err()); - } - - #[test] - fn test_load_meta_ci_zran_get_chunks_compressed() { - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let path = PathBuf::from(root_dir).join("../tests/texture/zran/233c72f2b6b698c07021c4da367cfe2dff4f049efbaa885ca0ff760ea297865a"); - - let features = BlobFeatures::ALIGNED - | BlobFeatures::INLINED_FS_META - | BlobFeatures::CHUNK_INFO_V2 - | BlobFeatures::ZRAN; - let mut blob_info = BlobInfo::new( - 0, - "233c72f2b6b698c07021c4da367cfe2dff4f049efbaa885ca0ff760ea297865a".to_string(), - 0x16c6000, - 9839040, - RAFS_DEFAULT_CHUNK_SIZE as u32, - 0xa3, - features, - ); - blob_info.set_blob_meta_info(0, 0xa1290, 0xa1290, compress::Algorithm::None as u32); - let meta = - BlobCompressionContextInfo::new(&path.display().to_string(), &blob_info, None, false) - .unwrap(); - assert_eq!(meta.state.chunk_info_array.len(), 0xa3); - assert_eq!(meta.state.zran_info_array.len(), 0x15); - assert_eq!(meta.state.zran_dict_table.len(), 0xa0348 - 0x15 * 40); - - let chunks = meta.get_chunks_compressed(0xb8, 1, 0x30000, false).unwrap(); - assert_eq!(chunks.len(), 67); - - let chunks = meta - .get_chunks_compressed(0xb8, 1, RAFS_DEFAULT_CHUNK_SIZE, false) - .unwrap(); - assert_eq!(chunks.len(), 116); - - let chunks = meta - .get_chunks_compressed(0xb8, 1, 2 * RAFS_DEFAULT_CHUNK_SIZE, false) - .unwrap(); - assert_eq!(chunks.len(), 120); - - let chunks = meta - .get_chunks_compressed(0x5fd41e, 1, RAFS_DEFAULT_CHUNK_SIZE / 2, false) - .unwrap(); - assert_eq!(chunks.len(), 3); - - let chunks = meta - .get_chunks_compressed(0x95d55d, 0x20, RAFS_DEFAULT_CHUNK_SIZE, false) - .unwrap(); - assert_eq!(chunks.len(), 12); - - assert!(meta - .get_chunks_compressed(0x0, 0x1, RAFS_DEFAULT_CHUNK_SIZE, false) - .is_err()); - assert!(meta - .get_chunks_compressed(0x1000000, 0x1, RAFS_DEFAULT_CHUNK_SIZE, false) - .is_err()); - } - - #[test] - fn test_blob_compression_context_header_getters_and_setters() { - let mut header = BlobCompressionContextHeader::default(); - - assert_eq!(header.features(), 0); - header.set_aligned(true); - assert!(header.is_4k_aligned()); - header.set_aligned(false); - - header.set_inlined_fs_meta(true); - assert!(header.has_feature(BlobFeatures::INLINED_FS_META)); - header.set_inlined_fs_meta(false); - - header.set_chunk_info_v2(true); - assert!(header.has_feature(BlobFeatures::CHUNK_INFO_V2)); - header.set_chunk_info_v2(false); - - header.set_ci_zran(true); - assert!(header.has_feature(BlobFeatures::ZRAN)); - header.set_ci_zran(false); - - header.set_separate_blob(true); - assert!(header.has_feature(BlobFeatures::SEPARATE)); - header.set_separate_blob(false); - - header.set_ci_batch(true); - assert!(header.has_feature(BlobFeatures::BATCH)); - header.set_ci_batch(false); - - header.set_inlined_chunk_digest(true); - assert!(header.has_feature(BlobFeatures::INLINED_CHUNK_DIGEST)); - header.set_inlined_chunk_digest(false); - - header.set_has_tar_header(true); - assert!(header.has_feature(BlobFeatures::HAS_TAR_HEADER)); - header.set_has_tar_header(false); - - header.set_has_toc(true); - assert!(header.has_feature(BlobFeatures::HAS_TOC)); - header.set_has_toc(false); - - header.set_cap_tar_toc(true); - assert!(header.has_feature(BlobFeatures::CAP_TAR_TOC)); - header.set_cap_tar_toc(false); - - header.set_tarfs(true); - assert!(header.has_feature(BlobFeatures::TARFS)); - header.set_tarfs(false); - - header.set_encrypted(true); - assert!(header.has_feature(BlobFeatures::ENCRYPTED)); - header.set_encrypted(false); - - assert_eq!(header.features(), 0); - - assert_eq!(header.ci_compressor(), compress::Algorithm::Lz4Block); - header.set_ci_compressor(compress::Algorithm::GZip); - assert_eq!(header.ci_compressor(), compress::Algorithm::GZip); - header.set_ci_compressor(compress::Algorithm::Zstd); - assert_eq!(header.ci_compressor(), compress::Algorithm::Zstd); - - let mut hasher = RafsDigest::hasher(digest::Algorithm::Sha256); - hasher.digest_update(header.as_bytes()); - let hash: String = hasher.digest_finalize().into(); - assert_eq!( - hash, - String::from("f56a1129d3df9fc7d60b26dbf495a60bda3dfc265f4f37854e4a36b826b660fc") - ); - - assert_eq!(header.ci_entries(), 0); - header.set_ci_entries(1); - assert_eq!(header.ci_entries(), 1); - - assert_eq!(header.ci_compressed_offset(), 0); - header.set_ci_compressed_offset(1); - assert_eq!(header.ci_compressed_offset(), 1); - - assert_eq!(header.ci_compressed_size(), 0); - header.set_ci_compressed_size(1); - assert_eq!(header.ci_compressed_size(), 1); - - assert_eq!(header.ci_uncompressed_size(), 0); - header.set_ci_uncompressed_size(1); - assert_eq!(header.ci_uncompressed_size(), 1); - - assert_eq!(header.ci_zran_count(), 0); - header.set_ci_zran_count(1); - assert_eq!(header.ci_zran_count(), 1); - - assert_eq!(header.ci_zran_offset(), 0); - header.set_ci_zran_offset(1); - assert_eq!(header.ci_zran_offset(), 1); - - assert_eq!(header.ci_zran_size(), 0); - header.set_ci_zran_size(1); - assert_eq!(header.ci_zran_size(), 1); - } - - #[test] - fn test_format_blob_features() { - let features = !BlobFeatures::default(); - let content = format_blob_features(features); - assert!(content.contains("aligned")); - assert!(content.contains("fs-meta")); - } - - #[test] - fn test_add_more_chunks() { - // Batch chunks: [chunk0, chunk1], chunk2, [chunk3, chunk4] - let mut chunk0 = BlobChunkInfoV2Ondisk::default(); - chunk0.set_batch(true); - chunk0.set_compressed(true); - chunk0.set_batch_index(0); - chunk0.set_uncompressed_offset_in_batch_buf(0); - chunk0.set_uncompressed_offset(0); - chunk0.set_uncompressed_size(0x2000); - chunk0.set_compressed_offset(0); - - let mut chunk1 = BlobChunkInfoV2Ondisk::default(); - chunk1.set_batch(true); - chunk1.set_compressed(true); - chunk1.set_batch_index(0); - chunk1.set_uncompressed_offset_in_batch_buf(0x2000); - chunk1.set_uncompressed_offset(0x2000); - chunk1.set_uncompressed_size(0x1000); - chunk1.set_compressed_offset(0); - - let mut batch_ctx0 = BatchInflateContext::default(); - batch_ctx0.set_uncompressed_batch_size(0x3000); - batch_ctx0.set_compressed_size(0x2000); - - let mut chunk2 = BlobChunkInfoV2Ondisk::default(); - chunk2.set_batch(false); - chunk2.set_compressed(true); - chunk2.set_uncompressed_offset(0x3000); - chunk2.set_compressed_offset(0x2000); - chunk2.set_uncompressed_size(0x4000); - chunk2.set_compressed_size(0x3000); - - let mut chunk3 = BlobChunkInfoV2Ondisk::default(); - chunk3.set_batch(true); - chunk3.set_compressed(true); - chunk3.set_batch_index(1); - chunk3.set_uncompressed_offset_in_batch_buf(0); - chunk3.set_uncompressed_offset(0x7000); - chunk3.set_uncompressed_size(0x2000); - chunk3.set_compressed_offset(0x5000); - - let mut chunk4 = BlobChunkInfoV2Ondisk::default(); - chunk4.set_batch(true); - chunk4.set_compressed(true); - chunk4.set_batch_index(1); - chunk4.set_uncompressed_offset_in_batch_buf(0x2000); - chunk4.set_uncompressed_offset(0x9000); - chunk4.set_uncompressed_size(0x2000); - chunk4.set_compressed_offset(0x5000); - - let mut batch_ctx1 = BatchInflateContext::default(); - batch_ctx1.set_compressed_size(0x3000); - batch_ctx1.set_uncompressed_batch_size(0x4000); - - let chunk_info_array = vec![chunk0, chunk1, chunk2, chunk3, chunk4]; - let chunk_infos = BlobMetaChunkArray::V2(chunk_info_array); - let chunk_infos = ManuallyDrop::new(chunk_infos); - - let batch_ctx_array = vec![batch_ctx0, batch_ctx1]; - let batch_ctxes = ManuallyDrop::new(batch_ctx_array); - - let state = BlobCompressionContext { - chunk_info_array: chunk_infos, - batch_info_array: batch_ctxes, - compressed_size: 0x8000, - uncompressed_size: 0xB000, - blob_features: (BlobFeatures::BATCH - | BlobFeatures::ALIGNED - | BlobFeatures::INLINED_FS_META - | BlobFeatures::CHUNK_INFO_V2) - .bits(), - ..Default::default() - }; - - let state = Arc::new(state); - let meta = BlobCompressionContextInfo { state }; - - // test read amplification - let chunks = vec![BlobMetaChunk::new(0, &meta.state)]; - let chunks = meta - .add_more_chunks(&chunks, RAFS_DEFAULT_CHUNK_SIZE) - .unwrap(); - let chunk_ids: Vec<_> = chunks.iter().map(|c| c.id()).collect(); - assert_eq!(chunk_ids, vec![0, 1, 2, 3, 4]); - - // test read the chunk in the middle of the batch chunk - let chunks = vec![BlobMetaChunk::new(1, &meta.state)]; - let chunks = meta - .add_more_chunks(&chunks, RAFS_DEFAULT_CHUNK_SIZE) - .unwrap(); - let chunk_ids: Vec<_> = chunks.iter().map(|c| c.id()).collect(); - assert_eq!(chunk_ids, vec![0, 1, 2, 3, 4]); - - // test no read amplification - let chunks = vec![BlobMetaChunk::new(1, &meta.state)]; - let chunks = meta.add_more_chunks(&chunks, 0).unwrap(); - let chunk_ids: Vec<_> = chunks.iter().map(|c| c.id()).collect(); - assert_eq!(chunk_ids, vec![0, 1]); - - // test read non-batch chunk - let chunks = vec![BlobMetaChunk::new(2, &meta.state)]; - let chunks = meta.add_more_chunks(&chunks, 0).unwrap(); - let chunk_ids: Vec<_> = chunks.iter().map(|c| c.id()).collect(); - assert_eq!(chunk_ids, vec![2]); - - // test small read amplification - let chunks = vec![BlobMetaChunk::new(1, &meta.state)]; - let chunks = meta.add_more_chunks(&chunks, 0x6000).unwrap(); - let chunk_ids: Vec<_> = chunks.iter().map(|c| c.id()).collect(); - assert_eq!(chunk_ids, vec![0, 1, 2]); - } -} +// Copyright (C) 2021-2023 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Generate, manage and access blob meta information for RAFS v6 data blobs. +//! +//! RAFS v6 filesystem includes three types of data: +//! - fs meta: contain filesystem meta data including super block, inode table, dirent etc. +//! - blob meta: contain digest and compression context for data chunks. +//! - chunk data: contain chunked file data in compressed or uncompressed form. +//! +//! There are different ways to packing above three types of data into blobs: +//! - meta blob/bootstrap: `fs meta` +//! - native data blob: `chunk data` | `compression context table` | [`chunk digest table`] | [`table of context`] +//! - native data blob with inlined fs meta: `chunk data` | `compression context table` | [`chunk digest table`] | `fs meta` | [`table of content`] +//! - ZRan data blob: `compression context table` | [`chunk digest table`] | [`table of content`] +//! - ZRan data blob with inlined fs meta: `compression context table` | [`chunk digest table`] | `fs meta` | [`table of content`] +//! +//! The blob compression context table contains following information: +//! - chunk compression information table: to locate compressed/uncompressed chunks in the data blob +//! - optional ZRan context table: to support randomly access/decompress gzip file +//! - optional ZRan dictionary table: to support randomly access/decompress gzip file +//! +//! The blob compression context table is laid as below: +//! | `chunk compression info table` | [`ZRan context table`] | [`ZRan dictionary table`] + +use std::any::Any; +use std::borrow::Cow; +use std::fs::OpenOptions; +use std::io::Result; +use std::mem::{size_of, ManuallyDrop}; +use std::ops::{Add, BitAnd, Not}; +use std::path::PathBuf; +use std::sync::Arc; + +use nydus_utils::compress::zlib_random::ZranContext; +use nydus_utils::crypt::decrypt_with_context; +use nydus_utils::digest::{DigestData, RafsDigest}; +use nydus_utils::filemap::FileMapState; +use nydus_utils::{compress, crypt}; + +use crate::backend::BlobReader; +use crate::device::v5::BlobV5ChunkInfo; +use crate::device::{BlobChunkFlags, BlobChunkInfo, BlobFeatures, BlobInfo}; +use crate::meta::toc::{TocEntryList, TocLocation}; +use crate::utils::alloc_buf; +use crate::{RAFS_MAX_CHUNKS_PER_BLOB, RAFS_MAX_CHUNK_SIZE}; + +mod chunk_info_v1; +pub use chunk_info_v1::BlobChunkInfoV1Ondisk; +mod chunk_info_v2; +pub use chunk_info_v2::BlobChunkInfoV2Ondisk; + +pub mod toc; + +mod zran; +pub use zran::{ZranContextGenerator, ZranInflateContext}; + +mod batch; +pub use batch::{BatchContextGenerator, BatchInflateContext}; + +const BLOB_CCT_MAGIC: u32 = 0xb10bb10bu32; +const BLOB_CCT_HEADER_SIZE: u64 = 0x1000u64; +const BLOB_CCT_CHUNK_SIZE_MASK: u64 = 0xff_ffff; + +const BLOB_CCT_V1_MAX_SIZE: u64 = RAFS_MAX_CHUNK_SIZE * 16; +const BLOB_CCT_V2_MAX_SIZE: u64 = RAFS_MAX_CHUNK_SIZE * 24; +//const BLOB_CCT_V1_RESERVED_SIZE: u64 = BLOB_METADATA_HEADER_SIZE - 44; +const BLOB_CCT_V2_RESERVED_SIZE: u64 = BLOB_CCT_HEADER_SIZE - 64; + +/// File suffix for blob meta file. +const BLOB_CCT_FILE_SUFFIX: &str = "blob.meta"; +/// File suffix for blob chunk digests. +const BLOB_DIGEST_FILE_SUFFIX: &str = "blob.digest"; +/// File suffix for blob ToC. +const BLOB_TOC_FILE_SUFFIX: &str = "blob.toc"; + +/// On disk format for blob compression context table header. +/// +/// Blob compression context table contains compression information for all chunks in the blob. +/// The compression context table header will be written into the data blob in plaintext mode, +/// and can be used as marker to locate the compression context table. All fields of compression +/// context table header should be encoded in little-endian format. +/// +/// The compression context table and header are arranged in the data blob as follows: +/// +/// `chunk data` | `compression context table` | `[ZRan context table | ZRan dictionary]` | `compression context table header` +#[repr(C)] +#[derive(Clone, Copy, Debug)] +pub struct BlobCompressionContextHeader { + /// Magic number to identify the header. + s_magic: u32, + /// Feature flags for the data blob. + s_features: u32, + /// Compression algorithm to process the compression context table. + s_ci_compressor: u32, + /// Number of entries in compression context table. + s_ci_entries: u32, + /// File offset to get the compression context table. + s_ci_offset: u64, + /// Size of compressed compression context table. + s_ci_compressed_size: u64, + /// Size of uncompressed compression context table. + s_ci_uncompressed_size: u64, + /// File offset to get the optional ZRan context data. + s_ci_zran_offset: u64, + /// Size of ZRan context data, including the ZRan context table and dictionary table. + s_ci_zran_size: u64, + /// Number of entries in the ZRan context table. + s_ci_zran_count: u32, + + s_reserved: [u8; BLOB_CCT_V2_RESERVED_SIZE as usize], + /// Second magic number to identify the blob meta data header. + s_magic2: u32, +} + +impl Default for BlobCompressionContextHeader { + fn default() -> Self { + BlobCompressionContextHeader { + s_magic: BLOB_CCT_MAGIC, + s_features: 0, + s_ci_compressor: compress::Algorithm::Lz4Block as u32, + s_ci_entries: 0, + s_ci_offset: 0, + s_ci_compressed_size: 0, + s_ci_uncompressed_size: 0, + s_ci_zran_offset: 0, + s_ci_zran_size: 0, + s_ci_zran_count: 0, + s_reserved: [0u8; BLOB_CCT_V2_RESERVED_SIZE as usize], + s_magic2: BLOB_CCT_MAGIC, + } + } +} + +impl BlobCompressionContextHeader { + /// Check whether a blob feature is set or not. + pub fn has_feature(&self, feature: BlobFeatures) -> bool { + self.s_features & feature.bits() != 0 + } + + /// Get compression algorithm to process chunk compression information array. + pub fn ci_compressor(&self) -> compress::Algorithm { + if self.s_ci_compressor == compress::Algorithm::Lz4Block as u32 { + compress::Algorithm::Lz4Block + } else if self.s_ci_compressor == compress::Algorithm::GZip as u32 { + compress::Algorithm::GZip + } else if self.s_ci_compressor == compress::Algorithm::Zstd as u32 { + compress::Algorithm::Zstd + } else { + compress::Algorithm::None + } + } + + /// Set compression algorithm to process chunk compression information array. + pub fn set_ci_compressor(&mut self, algo: compress::Algorithm) { + self.s_ci_compressor = algo as u32; + } + + /// Get number of entries in chunk compression information array. + pub fn ci_entries(&self) -> u32 { + self.s_ci_entries + } + + /// Set number of entries in chunk compression information array. + pub fn set_ci_entries(&mut self, entries: u32) { + self.s_ci_entries = entries; + } + + /// Get offset of compressed chunk compression information array. + pub fn ci_compressed_offset(&self) -> u64 { + self.s_ci_offset + } + + /// Set offset of compressed chunk compression information array. + pub fn set_ci_compressed_offset(&mut self, offset: u64) { + self.s_ci_offset = offset; + } + + /// Get size of compressed chunk compression information array. + pub fn ci_compressed_size(&self) -> u64 { + self.s_ci_compressed_size + } + + /// Set size of compressed chunk compression information array. + pub fn set_ci_compressed_size(&mut self, size: u64) { + self.s_ci_compressed_size = size; + } + + /// Get size of uncompressed chunk compression information array. + pub fn ci_uncompressed_size(&self) -> u64 { + self.s_ci_uncompressed_size + } + + /// Set size of uncompressed chunk compression information array. + pub fn set_ci_uncompressed_size(&mut self, size: u64) { + self.s_ci_uncompressed_size = size; + } + + /// Get ZRan context information entry count. + pub fn ci_zran_count(&self) -> u32 { + self.s_ci_zran_count + } + + /// Set ZRan context information entry count. + pub fn set_ci_zran_count(&mut self, count: u32) { + self.s_ci_zran_count = count; + } + + /// Get offset of ZRan context information table. + pub fn ci_zran_offset(&self) -> u64 { + self.s_ci_zran_offset + } + + /// Set offset of ZRan context information table. + pub fn set_ci_zran_offset(&mut self, offset: u64) { + self.s_ci_zran_offset = offset; + } + + /// Get size of ZRan context information table and dictionary table. + pub fn ci_zran_size(&self) -> u64 { + self.s_ci_zran_size + } + + /// Set size of ZRan context information table and dictionary table. + pub fn set_ci_zran_size(&mut self, size: u64) { + self.s_ci_zran_size = size; + } + + /// Check whether uncompressed chunks are 4k aligned. + pub fn is_4k_aligned(&self) -> bool { + self.has_feature(BlobFeatures::ALIGNED) + } + + /// Set flag indicating whether uncompressed chunks are aligned. + pub fn set_aligned(&mut self, aligned: bool) { + if aligned { + self.s_features |= BlobFeatures::ALIGNED.bits(); + } else { + self.s_features &= !BlobFeatures::ALIGNED.bits(); + } + } + + /// Set flag indicating whether RAFS meta is inlined in the data blob. + pub fn set_inlined_fs_meta(&mut self, inlined: bool) { + if inlined { + self.s_features |= BlobFeatures::INLINED_FS_META.bits(); + } else { + self.s_features &= !BlobFeatures::INLINED_FS_META.bits(); + } + } + + /// Set flag indicating whether chunk compression information format v2 is used or not. + pub fn set_chunk_info_v2(&mut self, enable: bool) { + if enable { + self.s_features |= BlobFeatures::CHUNK_INFO_V2.bits(); + } else { + self.s_features &= !BlobFeatures::CHUNK_INFO_V2.bits(); + } + } + + /// Set flag indicating whether it's a ZRan blob or not. + pub fn set_ci_zran(&mut self, enable: bool) { + if enable { + self.s_features |= BlobFeatures::ZRAN.bits(); + } else { + self.s_features &= !BlobFeatures::ZRAN.bits(); + } + } + + /// Set flag indicating whether blob.data and blob.meta are stored in separated blobs. + pub fn set_separate_blob(&mut self, enable: bool) { + if enable { + self.s_features |= BlobFeatures::SEPARATE.bits(); + } else { + self.s_features &= !BlobFeatures::SEPARATE.bits(); + } + } + + /// Set flag indicating whether it's a blob for batch chunk or not. + pub fn set_ci_batch(&mut self, enable: bool) { + if enable { + self.s_features |= BlobFeatures::BATCH.bits(); + } else { + self.s_features &= !BlobFeatures::BATCH.bits(); + } + } + + /// Set flag indicating whether chunk digest is inlined in the data blob or not. + pub fn set_inlined_chunk_digest(&mut self, enable: bool) { + if enable { + self.s_features |= BlobFeatures::INLINED_CHUNK_DIGEST.bits(); + } else { + self.s_features &= !BlobFeatures::INLINED_CHUNK_DIGEST.bits(); + } + } + + /// Set flag indicating new blob format with tar headers. + pub fn set_has_tar_header(&mut self, enable: bool) { + if enable { + self.s_features |= BlobFeatures::HAS_TAR_HEADER.bits(); + } else { + self.s_features &= !BlobFeatures::HAS_TAR_HEADER.bits(); + } + } + + /// Set flag indicating new blob format with toc headers. + pub fn set_has_toc(&mut self, enable: bool) { + if enable { + self.s_features |= BlobFeatures::HAS_TOC.bits(); + } else { + self.s_features &= !BlobFeatures::HAS_TOC.bits(); + } + } + + /// Set flag indicating having inlined-meta capability. + pub fn set_cap_tar_toc(&mut self, enable: bool) { + if enable { + self.s_features |= BlobFeatures::CAP_TAR_TOC.bits(); + } else { + self.s_features &= !BlobFeatures::CAP_TAR_TOC.bits(); + } + } + + /// Set flag indicating the blob is for RAFS filesystem in TARFS mode. + pub fn set_tarfs(&mut self, enable: bool) { + if enable { + self.s_features |= BlobFeatures::TARFS.bits(); + } else { + self.s_features &= !BlobFeatures::TARFS.bits(); + } + } + + /// Set flag indicating the blob is encrypted. + pub fn set_encrypted(&mut self, enable: bool) { + if enable { + self.s_features |= BlobFeatures::ENCRYPTED.bits(); + } else { + self.s_features &= !BlobFeatures::ENCRYPTED.bits(); + } + } + + /// Get blob meta feature flags. + pub fn features(&self) -> u32 { + self.s_features + } + + /// Convert the header as an `&[u8]`. + pub fn as_bytes(&self) -> &[u8] { + unsafe { + std::slice::from_raw_parts( + self as *const BlobCompressionContextHeader as *const u8, + size_of::(), + ) + } + } + + /// Set flag indicating whether it's a blob for batch chunk or not. + pub fn set_is_chunkdict_generated(&mut self, enable: bool) { + if enable { + self.s_features |= BlobFeatures::IS_CHUNKDICT_GENERATED.bits(); + } else { + self.s_features &= !BlobFeatures::IS_CHUNKDICT_GENERATED.bits(); + } + } +} + +/// Struct to manage blob chunk compression information, a wrapper over [BlobCompressionContext]. +/// +/// A [BlobCompressionContextInfo] object is loaded from on disk [BlobCompressionContextHeader] +/// object, and provides methods to query compression information about chunks in the blob. +#[derive(Clone)] +pub struct BlobCompressionContextInfo { + pub(crate) state: Arc, +} + +impl BlobCompressionContextInfo { + /// Create a new instance of [BlobCompressionContextInfo]. + /// + /// If a blob compression context cache file is present and is valid, it will be reused. + /// Otherwise download compression context content from backend if `reader` is valid. + /// + /// The downloaded compression context table will be cached into a file named as + /// `[blob_id].blob.meta`. The cache file is readonly once created and may be accessed + /// concurrently by multiple clients. + pub fn new( + blob_path: &str, + blob_info: &BlobInfo, + reader: Option<&Arc>, + load_chunk_digest: bool, + ) -> Result { + assert_eq!( + size_of::() as u64, + BLOB_CCT_HEADER_SIZE + ); + assert_eq!(size_of::(), 16); + assert_eq!(size_of::(), 24); + assert_eq!(size_of::(), 40); + + let chunk_count = blob_info.chunk_count(); + if chunk_count == 0 || chunk_count > RAFS_MAX_CHUNKS_PER_BLOB { + return Err(einval!("invalid chunk count in blob meta header")); + } + + let uncompressed_size = blob_info.meta_ci_uncompressed_size() as usize; + let meta_path = format!("{}.{}", blob_path, BLOB_CCT_FILE_SUFFIX); + trace!( + "try to open blob meta file: path {:?} uncompressed_size {} chunk_count {}", + meta_path, + uncompressed_size, + chunk_count + ); + let enable_write = reader.is_some(); + let file = OpenOptions::new() + .read(true) + .write(enable_write) + .create(enable_write) + .open(&meta_path) + .map_err(|err| { + einval!(format!( + "failed to open/create blob meta file {}: {}", + meta_path, err + )) + })?; + + let aligned_uncompressed_size = round_up_4k(uncompressed_size); + let expected_size = BLOB_CCT_HEADER_SIZE as usize + aligned_uncompressed_size; + let mut file_size = file.metadata()?.len(); + if file_size == 0 && enable_write { + file.set_len(expected_size as u64)?; + file_size = expected_size as u64; + } + if file_size != expected_size as u64 { + return Err(einval!(format!( + "size of blob meta file '{}' doesn't match, expect {:x}, got {:x}", + meta_path, expected_size, file_size + ))); + } + + let mut filemap = FileMapState::new(file, 0, expected_size, enable_write)?; + let base = filemap.validate_range(0, expected_size)?; + let header = + filemap.get_mut::(aligned_uncompressed_size as usize)?; + if !Self::validate_header(blob_info, header)? { + if let Some(reader) = reader { + let buffer = + unsafe { std::slice::from_raw_parts_mut(base as *mut u8, expected_size) }; + Self::read_metadata(blob_info, reader, buffer)?; + if !Self::validate_header(blob_info, header)? { + return Err(enoent!(format!("double check blob_info still invalid",))); + } + filemap.sync_data()?; + } else { + return Err(enoent!(format!( + "blob meta header from file '{}' is invalid", + meta_path + ))); + } + } + + let chunk_infos = BlobMetaChunkArray::from_file_map(&filemap, blob_info)?; + let chunk_infos = ManuallyDrop::new(chunk_infos); + let mut state = BlobCompressionContext { + blob_index: blob_info.blob_index(), + blob_features: blob_info.features().bits(), + compressed_size: blob_info.compressed_data_size(), + uncompressed_size: round_up_4k(blob_info.uncompressed_size()), + chunk_info_array: chunk_infos, + blob_meta_file_map: filemap, + ..Default::default() + }; + + if blob_info.has_feature(BlobFeatures::BATCH) { + let header = state + .blob_meta_file_map + .get_mut::(aligned_uncompressed_size as usize)?; + let inflate_offset = header.s_ci_zran_offset as usize; + let inflate_count = header.s_ci_zran_count as usize; + let batch_inflate_size = inflate_count * size_of::(); + let ptr = state + .blob_meta_file_map + .validate_range(inflate_offset, batch_inflate_size)?; + let array = unsafe { + Vec::from_raw_parts( + ptr as *mut u8 as *mut BatchInflateContext, + inflate_count, + inflate_count, + ) + }; + state.batch_info_array = ManuallyDrop::new(array); + } else if blob_info.has_feature(BlobFeatures::ZRAN) { + let header = state + .blob_meta_file_map + .get_mut::(aligned_uncompressed_size as usize)?; + let zran_offset = header.s_ci_zran_offset as usize; + let zran_count = header.s_ci_zran_count as usize; + let ci_zran_size = header.s_ci_zran_size as usize; + let zran_size = zran_count * size_of::(); + let ptr = state + .blob_meta_file_map + .validate_range(zran_offset, zran_size)?; + let array = unsafe { + Vec::from_raw_parts( + ptr as *mut u8 as *mut ZranInflateContext, + zran_count, + zran_count, + ) + }; + state.zran_info_array = ManuallyDrop::new(array); + + let zran_dict_size = ci_zran_size - zran_size; + let ptr = state + .blob_meta_file_map + .validate_range(zran_offset + zran_size, zran_dict_size)?; + let array = + unsafe { Vec::from_raw_parts(ptr as *mut u8, zran_dict_size, zran_dict_size) }; + state.zran_dict_table = ManuallyDrop::new(array); + } + + if load_chunk_digest && blob_info.has_feature(BlobFeatures::INLINED_CHUNK_DIGEST) { + let digest_path = PathBuf::from(format!("{}.{}", blob_path, BLOB_DIGEST_FILE_SUFFIX)); + if let Some(reader) = reader { + let toc_path = format!("{}.{}", blob_path, BLOB_TOC_FILE_SUFFIX); + let location = if blob_info.blob_toc_size() != 0 { + let blob_size = reader + .blob_size() + .map_err(|_e| eio!("failed to get blob size"))?; + let offset = blob_size - blob_info.blob_toc_size() as u64; + let mut location = TocLocation::new(offset, blob_info.blob_toc_size() as u64); + let digest = blob_info.blob_toc_digest(); + for c in digest { + if *c != 0 { + location.validate_digest = true; + location.digest.data = *digest; + break; + } + } + location + } else { + TocLocation::default() + }; + let toc_list = + TocEntryList::read_from_cache_file(toc_path, reader.as_ref(), &location)?; + toc_list.extract_from_blob(reader.clone(), None, Some(&digest_path))?; + } + if !digest_path.exists() { + return Err(eother!("failed to download chunk digest file from blob")); + } + + let file = OpenOptions::new().read(true).open(&digest_path)?; + let md = file.metadata()?; + let size = 32 * blob_info.chunk_count() as usize; + if md.len() != size as u64 { + return Err(eother!(format!( + "size of chunk digest file doesn't match, expect {}, got {}", + size, + md.len() + ))); + } + + let file_map = FileMapState::new(file, 0, size, false)?; + let ptr = file_map.validate_range(0, size)?; + let array = unsafe { + Vec::from_raw_parts( + ptr as *mut u8 as *mut _, + chunk_count as usize, + chunk_count as usize, + ) + }; + state.chunk_digest_file_map = file_map; + state.chunk_digest_array = ManuallyDrop::new(array); + } + + Ok(BlobCompressionContextInfo { + state: Arc::new(state), + }) + } + + /// Get data chunks covering uncompressed data range `[start, start + size)`. + /// + /// For 4k-aligned uncompressed data chunks, there may be padding areas between data chunks. + /// + /// The method returns error if any of following condition is true: + /// - range [start, start + size) is invalid. + /// - `start` is bigger than blob size. + /// - some portions of the range [start, start + size) is not covered by chunks. + /// - blob meta is invalid. + pub fn get_chunks_uncompressed( + &self, + start: u64, + size: u64, + batch_size: u64, + ) -> Result>> { + let end = start.checked_add(size).ok_or_else(|| { + einval!(format!( + "get_chunks_uncompressed: invalid start {}/size {}", + start, size + )) + })?; + if end > self.state.uncompressed_size { + return Err(einval!(format!( + "get_chunks_uncompressed: invalid end {}/uncompressed_size {}", + end, self.state.uncompressed_size + ))); + } + let batch_end = if batch_size <= size { + end + } else { + std::cmp::min( + start.checked_add(batch_size).unwrap_or(end), + self.state.uncompressed_size, + ) + }; + let batch_size = if batch_size < size { size } else { batch_size }; + + self.state + .get_chunks_uncompressed(start, end, batch_end, batch_size) + } + + /// Get data chunks covering compressed data range `[start, start + size)`. + /// + /// The method returns error if any of following condition is true: + /// - range [start, start + size) is invalid. + /// - `start` is bigger than blob size. + /// - some portions of the range [start, start + size) is not covered by chunks. + /// - blob meta is invalid. + pub fn get_chunks_compressed( + &self, + start: u64, + size: u64, + batch_size: u64, + prefetch: bool, + ) -> Result>> { + let end = start.checked_add(size).ok_or_else(|| { + einval!(einval!(format!( + "get_chunks_compressed: invalid start {}/size {}", + start, size + ))) + })?; + if end > self.state.compressed_size { + return Err(einval!(format!( + "get_chunks_compressed: invalid end {}/compressed_size {}", + end, self.state.compressed_size + ))); + } + let batch_end = if batch_size <= size { + end + } else { + std::cmp::min( + start.checked_add(batch_size).unwrap_or(end), + self.state.compressed_size, + ) + }; + + self.state + .get_chunks_compressed(start, end, batch_end, batch_size, prefetch) + } + + /// Amplify the request by appending more continuous chunks to the chunk array. + pub fn add_more_chunks( + &self, + chunks: &[Arc], + max_size: u64, + ) -> Result>> { + self.state.add_more_chunks(chunks, max_size) + } + + /// Get number of chunks in the data blob. + pub fn get_chunk_count(&self) -> usize { + self.state.chunk_info_array.len() + } + + /// Get index of chunk covering uncompressed `addr`. + pub fn get_chunk_index(&self, addr: u64) -> Result { + self.state.get_chunk_index(addr) + } + + /// Get uncompressed offset of the chunk at `chunk_index`. + pub fn get_uncompressed_offset(&self, chunk_index: usize) -> u64 { + self.state.get_uncompressed_offset(chunk_index) + } + + /// Get chunk digest for the chunk at `chunk_index`. + pub fn get_chunk_digest(&self, chunk_index: usize) -> Option<&[u8]> { + self.state.get_chunk_digest(chunk_index) + } + + /// Get `BlobChunkInfo` object for the chunk at `chunk_index`. + pub fn get_chunk_info(&self, chunk_index: usize) -> Arc { + BlobMetaChunk::new(chunk_index, &self.state) + } + + /// Get whether chunk at `chunk_index` is batch chunk. + /// Some chunks build in batch mode can also be non-batch chunks, + /// that they are too big to be put into a batch. + pub fn is_batch_chunk(&self, chunk_index: u32) -> bool { + self.state.is_batch_chunk(chunk_index as usize) + } + + /// Get Batch index associated with the chunk at `chunk_index`. + pub fn get_batch_index(&self, chunk_index: u32) -> Result { + self.state.get_batch_index(chunk_index as usize) + } + + /// Get uncompressed batch offset associated with the chunk at `chunk_index`. + pub fn get_uncompressed_offset_in_batch_buf(&self, chunk_index: u32) -> Result { + self.state + .get_uncompressed_offset_in_batch_buf(chunk_index as usize) + } + + /// Get Batch context information at `batch_index`. + pub fn get_batch_context(&self, batch_index: u32) -> Result<&BatchInflateContext> { + self.state.get_batch_context(batch_index as usize) + } + + /// Get compressed size associated with the chunk at `chunk_index`. + /// Capable of handling both batch and non-batch chunks. + pub fn get_compressed_size(&self, chunk_index: u32) -> Result { + self.state.get_compressed_size(chunk_index as usize) + } + + /// Get ZRan index associated with the chunk at `chunk_index`. + pub fn get_zran_index(&self, chunk_index: u32) -> Result { + self.state.get_zran_index(chunk_index as usize) + } + + /// Get ZRan offset associated with the chunk at `chunk_index`. + pub fn get_zran_offset(&self, chunk_index: u32) -> Result { + self.state.get_zran_offset(chunk_index as usize) + } + + /// Get ZRan context information at `zran_index`. + pub fn get_zran_context(&self, zran_index: u32) -> Result<(ZranContext, &[u8])> { + self.state.get_zran_context(zran_index as usize) + } + + fn read_metadata( + blob_info: &BlobInfo, + reader: &Arc, + buffer: &mut [u8], + ) -> Result<()> { + trace!( + "blob_info compressor {} ci_compressor {} ci_compressed_size {} ci_uncompressed_size {}", + blob_info.compressor(), + blob_info.meta_ci_compressor(), + blob_info.meta_ci_compressed_size(), + blob_info.meta_ci_uncompressed_size(), + ); + + let compressed_size = blob_info.meta_ci_compressed_size(); + let uncompressed_size = blob_info.meta_ci_uncompressed_size(); + let aligned_uncompressed_size = round_up_4k(uncompressed_size); + let expected_raw_size = (compressed_size + BLOB_CCT_HEADER_SIZE) as usize; + let mut raw_data = alloc_buf(expected_raw_size); + + let read_size = reader + .read_all(&mut raw_data, blob_info.meta_ci_offset()) + .map_err(|e| { + eio!(format!( + "failed to read metadata for blob {} from backend, {}", + blob_info.blob_id(), + e + )) + })?; + if read_size != expected_raw_size { + return Err(eio!(format!( + "failed to read metadata for blob {} from backend, compressor {}, got {} bytes, expect {} bytes", + blob_info.blob_id(), + blob_info.meta_ci_compressor(), + read_size, + expected_raw_size + ))); + } + + let decrypted = match decrypt_with_context( + &raw_data[0..compressed_size as usize], + &blob_info.cipher_object(), + &blob_info.cipher_context(), + blob_info.cipher() != crypt::Algorithm::None, + ){ + Ok(data) => data, + Err(e) => return Err(eio!(format!( + "failed to decrypt metadata for blob {} from backend, cipher {}, encrypted data size {}, {}", + blob_info.blob_id(), + blob_info.cipher(), + compressed_size, + e + ))), + }; + let header = match decrypt_with_context( + &raw_data[compressed_size as usize..expected_raw_size], + &blob_info.cipher_object(), + &blob_info.cipher_context(), + blob_info.cipher() != crypt::Algorithm::None, + ){ + Ok(data) => data, + Err(e) => return Err(eio!(format!( + "failed to decrypt meta header for blob {} from backend, cipher {}, encrypted data size {}, {}", + blob_info.blob_id(), + blob_info.cipher(), + compressed_size, + e + ))), + }; + + let uncompressed = if blob_info.meta_ci_compressor() != compress::Algorithm::None { + // Lz4 does not support concurrent decompression of the same data into + // the same piece of memory. There will be multiple containers mmap the + // same file, causing the buffer to be shared between different + // processes. This will cause data errors due to race issues when + // decompressing with lz4. We solve this problem by creating a temporary + // memory to hold the decompressed data. + // + // Because this process will only be executed when the blob.meta file is + // created for the first time, which means that a machine will only + // execute the process once when the blob.meta is created for the first + // time, the memory consumption and performance impact are relatively + // small. + let mut uncompressed = vec![0u8; uncompressed_size as usize]; + compress::decompress( + &decrypted, + &mut uncompressed, + blob_info.meta_ci_compressor(), + ) + .map_err(|e| { + error!("failed to decompress blob meta data: {}", e); + e + })?; + Cow::Owned(uncompressed) + } else { + decrypted + }; + buffer[0..uncompressed_size as usize].copy_from_slice(&uncompressed); + buffer[aligned_uncompressed_size as usize + ..(aligned_uncompressed_size + BLOB_CCT_HEADER_SIZE) as usize] + .copy_from_slice(&header); + Ok(()) + } + + fn validate_header( + blob_info: &BlobInfo, + header: &BlobCompressionContextHeader, + ) -> Result { + trace!("blob meta header magic {:x}/{:x}, entries {:x}/{:x}, features {:x}/{:x}, compressor {:x}/{:x}, ci_offset {:x}/{:x}, compressed_size {:x}/{:x}, uncompressed_size {:x}/{:x}", + u32::from_le(header.s_magic), + BLOB_CCT_MAGIC, + u32::from_le(header.s_ci_entries), + blob_info.chunk_count(), + u32::from_le(header.s_features), + blob_info.features().bits(), + u32::from_le(header.s_ci_compressor), + blob_info.meta_ci_compressor() as u32, + u64::from_le(header.s_ci_offset), + blob_info.meta_ci_offset(), + u64::from_le(header.s_ci_compressed_size), + blob_info.meta_ci_compressed_size(), + u64::from_le(header.s_ci_uncompressed_size), + blob_info.meta_ci_uncompressed_size()); + + if u32::from_le(header.s_magic) != BLOB_CCT_MAGIC + || u32::from_le(header.s_magic2) != BLOB_CCT_MAGIC + || (!blob_info.has_feature(BlobFeatures::IS_CHUNKDICT_GENERATED) + && u32::from_le(header.s_ci_entries) != blob_info.chunk_count()) + || u32::from_le(header.s_ci_compressor) != blob_info.meta_ci_compressor() as u32 + || u64::from_le(header.s_ci_offset) != blob_info.meta_ci_offset() + || u64::from_le(header.s_ci_compressed_size) != blob_info.meta_ci_compressed_size() + || u64::from_le(header.s_ci_uncompressed_size) != blob_info.meta_ci_uncompressed_size() + { + return Ok(false); + } + + let chunk_count = blob_info.chunk_count(); + if chunk_count == 0 || chunk_count > RAFS_MAX_CHUNKS_PER_BLOB { + return Err(einval!(format!( + "chunk count {:x} in blob meta header is invalid!", + chunk_count + ))); + } + + let info_size = u64::from_le(header.s_ci_uncompressed_size) as usize; + let aligned_info_size = round_up_4k(info_size); + if blob_info.has_feature(BlobFeatures::CHUNK_INFO_V2) + && (blob_info.has_feature(BlobFeatures::ZRAN) + || blob_info.has_feature(BlobFeatures::BATCH)) + { + if info_size < (chunk_count as usize) * (size_of::()) { + return Err(einval!("uncompressed size in blob meta header is invalid!")); + } + } else if blob_info.has_feature(BlobFeatures::CHUNK_INFO_V2) { + if info_size != (chunk_count as usize) * (size_of::()) + || (aligned_info_size as u64) > BLOB_CCT_V2_MAX_SIZE + { + return Err(einval!("uncompressed size in blob meta header is invalid!")); + } + } else if blob_info.has_feature(BlobFeatures::ZRAN) + || blob_info.has_feature(BlobFeatures::BATCH) + { + return Err(einval!("invalid feature flags in blob meta header!")); + } else if !blob_info.has_feature(BlobFeatures::IS_CHUNKDICT_GENERATED) + && (info_size != (chunk_count as usize) * (size_of::()) + || (aligned_info_size as u64) > BLOB_CCT_V1_MAX_SIZE) + { + return Err(einval!("uncompressed size in blob meta header is invalid!")); + } + + if blob_info.has_feature(BlobFeatures::ZRAN) { + let offset = header.s_ci_zran_offset; + if offset != (chunk_count as u64) * (size_of::() as u64) { + return Ok(false); + } + if offset + header.s_ci_zran_size > info_size as u64 { + return Ok(false); + } + let zran_count = header.s_ci_zran_count as u64; + let size = zran_count * size_of::() as u64; + if zran_count > chunk_count as u64 { + return Ok(false); + } + if size > header.s_ci_zran_size { + return Ok(false); + } + } + + Ok(true) + } +} + +/// Struct to maintain compression context information for all chunks in a blob. +#[derive(Default)] +pub struct BlobCompressionContext { + pub(crate) blob_index: u32, + pub(crate) blob_features: u32, + pub(crate) compressed_size: u64, + pub(crate) uncompressed_size: u64, + pub(crate) chunk_info_array: ManuallyDrop, + pub(crate) chunk_digest_array: ManuallyDrop>, + pub(crate) batch_info_array: ManuallyDrop>, + pub(crate) zran_info_array: ManuallyDrop>, + pub(crate) zran_dict_table: ManuallyDrop>, + blob_meta_file_map: FileMapState, + chunk_digest_file_map: FileMapState, + chunk_digest_default: RafsDigest, +} + +impl BlobCompressionContext { + fn get_chunks_uncompressed( + self: &Arc, + start: u64, + end: u64, + batch_end: u64, + batch_size: u64, + ) -> Result>> { + self.chunk_info_array + .get_chunks_uncompressed(self, start, end, batch_end, batch_size) + } + + fn get_chunks_compressed( + self: &Arc, + start: u64, + end: u64, + batch_end: u64, + batch_size: u64, + prefetch: bool, + ) -> Result>> { + self.chunk_info_array + .get_chunks_compressed(self, start, end, batch_end, batch_size, prefetch) + } + + fn add_more_chunks( + self: &Arc, + chunks: &[Arc], + max_size: u64, + ) -> Result>> { + self.chunk_info_array + .add_more_chunks(self, chunks, max_size) + } + + fn get_uncompressed_offset(&self, chunk_index: usize) -> u64 { + self.chunk_info_array.uncompressed_offset(chunk_index) + } + + fn get_chunk_digest(&self, chunk_index: usize) -> Option<&[u8]> { + if chunk_index < self.chunk_digest_array.len() { + Some(&self.chunk_digest_array[chunk_index]) + } else { + None + } + } + + fn get_chunk_index(&self, addr: u64) -> Result { + self.chunk_info_array + .get_chunk_index_nocheck(self, addr, false) + } + + /// Get whether chunk at `chunk_index` is batch chunk. + /// Some chunks build in batch mode can also be non-batch chunks, + /// that they are too big to be put into a batch. + fn is_batch_chunk(&self, chunk_index: usize) -> bool { + self.chunk_info_array.is_batch(chunk_index) + } + + fn get_batch_index(&self, chunk_index: usize) -> Result { + self.chunk_info_array.batch_index(chunk_index) + } + + fn get_uncompressed_offset_in_batch_buf(&self, chunk_index: usize) -> Result { + self.chunk_info_array + .uncompressed_offset_in_batch_buf(chunk_index) + } + + /// Get Batch context information for decoding. + fn get_batch_context(&self, batch_index: usize) -> Result<&BatchInflateContext> { + if batch_index < self.batch_info_array.len() { + let ctx = &self.batch_info_array[batch_index]; + Ok(ctx) + } else { + Err(einval!(format!( + "Invalid batch index, current: {}, max: {}", + batch_index, + self.batch_info_array.len() + ))) + } + } + + /// Get compressed size associated with the chunk at `chunk_index`. + /// Capable of handling both batch and non-batch chunks. + pub fn get_compressed_size(&self, chunk_index: usize) -> Result { + if self.is_batch_chunk(chunk_index) { + let ctx = self + .get_batch_context(self.get_batch_index(chunk_index)? as usize) + .unwrap(); + Ok(ctx.compressed_size()) + } else { + Ok(self.chunk_info_array.compressed_size(chunk_index)) + } + } + + fn get_zran_index(&self, chunk_index: usize) -> Result { + self.chunk_info_array.zran_index(chunk_index) + } + + fn get_zran_offset(&self, chunk_index: usize) -> Result { + self.chunk_info_array.zran_offset(chunk_index) + } + + /// Get ZRan context information for decoding. + fn get_zran_context(&self, zran_index: usize) -> Result<(ZranContext, &[u8])> { + if zran_index < self.zran_info_array.len() { + let entry = &self.zran_info_array[zran_index]; + let dict_off = entry.dict_offset() as usize; + let dict_size = entry.dict_size() as usize; + if dict_off.checked_add(dict_size).is_none() + || dict_off + dict_size > self.zran_dict_table.len() + { + return Err(einval!(format!( + "Invalid ZRan context, dict_off: {}, dict_size: {}, max: {}", + dict_off, + dict_size, + self.zran_dict_table.len() + ))); + }; + let dict = &self.zran_dict_table[dict_off..dict_off + dict_size]; + let ctx = ZranContext::from(entry); + Ok((ctx, dict)) + } else { + Err(einval!(format!( + "Invalid ZRan index, current: {}, max: {}", + zran_index, + self.zran_info_array.len() + ))) + } + } + + pub(crate) fn is_separate(&self) -> bool { + self.blob_features & BlobFeatures::SEPARATE.bits() != 0 + } + + pub(crate) fn is_encrypted(&self) -> bool { + self.blob_features & BlobFeatures::ENCRYPTED.bits() != 0 + } +} + +/// A customized array to host chunk information table for a blob. +pub enum BlobMetaChunkArray { + /// V1 chunk compression information array. + V1(Vec), + /// V2 chunk compression information array. + V2(Vec), +} + +impl Default for BlobMetaChunkArray { + fn default() -> Self { + BlobMetaChunkArray::new_v2() + } +} + +// Methods for RAFS filesystem builder. +impl BlobMetaChunkArray { + /// Create a [BlobMetaChunkArray] with v1 chunk compression information format. + pub fn new_v1() -> Self { + BlobMetaChunkArray::V1(Vec::new()) + } + + /// Create a [BlobMetaChunkArray] with v2 chunk compression information format. + pub fn new_v2() -> Self { + BlobMetaChunkArray::V2(Vec::new()) + } + + /// Get number of entries in the chunk compression information array. + pub fn len(&self) -> usize { + match self { + BlobMetaChunkArray::V1(v) => v.len(), + BlobMetaChunkArray::V2(v) => v.len(), + } + } + + /// Check whether the chunk compression information array is empty or not. + pub fn is_empty(&self) -> bool { + match self { + BlobMetaChunkArray::V1(v) => v.is_empty(), + BlobMetaChunkArray::V2(v) => v.is_empty(), + } + } + + /// Convert the chunk compression information array as a u8 slice. + pub fn as_byte_slice(&self) -> &[u8] { + match self { + BlobMetaChunkArray::V1(v) => unsafe { + std::slice::from_raw_parts( + v.as_ptr() as *const u8, + v.len() * size_of::(), + ) + }, + BlobMetaChunkArray::V2(v) => unsafe { + std::slice::from_raw_parts( + v.as_ptr() as *const u8, + v.len() * size_of::(), + ) + }, + } + } + + /// Add an entry of v1 chunk compression information into the array. + pub fn add_v1( + &mut self, + compressed_offset: u64, + compressed_size: u32, + uncompressed_offset: u64, + uncompressed_size: u32, + ) { + match self { + BlobMetaChunkArray::V1(v) => { + let mut meta = BlobChunkInfoV1Ondisk::default(); + meta.set_compressed_offset(compressed_offset); + meta.set_compressed_size(compressed_size); + meta.set_uncompressed_offset(uncompressed_offset); + meta.set_uncompressed_size(uncompressed_size); + v.push(meta); + } + BlobMetaChunkArray::V2(_v) => unimplemented!(), + } + } + + /// Add an entry of v2 chunk compression information into the array. + #[allow(clippy::too_many_arguments)] + pub fn add_v2( + &mut self, + compressed_offset: u64, + compressed_size: u32, + uncompressed_offset: u64, + uncompressed_size: u32, + compressed: bool, + encrypted: bool, + is_batch: bool, + data: u64, + ) { + match self { + BlobMetaChunkArray::V2(v) => { + let mut meta = BlobChunkInfoV2Ondisk::default(); + meta.set_compressed_offset(compressed_offset); + meta.set_compressed_size(compressed_size); + meta.set_uncompressed_offset(uncompressed_offset); + meta.set_uncompressed_size(uncompressed_size); + meta.set_compressed(compressed); + meta.set_encrypted(encrypted); + meta.set_batch(is_batch); + meta.set_data(data); + v.push(meta); + } + BlobMetaChunkArray::V1(_v) => unimplemented!(), + } + } + + /// Add an entry of pre-built v2 chunk compression information into the array. + pub fn add_v2_info(&mut self, chunk_info: BlobChunkInfoV2Ondisk) { + match self { + BlobMetaChunkArray::V2(v) => v.push(chunk_info), + BlobMetaChunkArray::V1(_v) => unimplemented!(), + } + } +} + +impl BlobMetaChunkArray { + fn from_file_map(filemap: &FileMapState, blob_info: &BlobInfo) -> Result { + let chunk_count = blob_info.chunk_count(); + if blob_info.has_feature(BlobFeatures::CHUNK_INFO_V2) { + let chunk_size = chunk_count as usize * size_of::(); + let base = filemap.validate_range(0, chunk_size)?; + let v = unsafe { + Vec::from_raw_parts( + base as *mut u8 as *mut BlobChunkInfoV2Ondisk, + chunk_count as usize, + chunk_count as usize, + ) + }; + Ok(BlobMetaChunkArray::V2(v)) + } else { + let chunk_size = chunk_count as usize * size_of::(); + let base = filemap.validate_range(0, chunk_size)?; + let v = unsafe { + Vec::from_raw_parts( + base as *mut u8 as *mut BlobChunkInfoV1Ondisk, + chunk_count as usize, + chunk_count as usize, + ) + }; + Ok(BlobMetaChunkArray::V1(v)) + } + } + + fn get_chunk_index_nocheck( + &self, + state: &BlobCompressionContext, + addr: u64, + compressed: bool, + ) -> Result { + match self { + BlobMetaChunkArray::V1(v) => { + Self::_get_chunk_index_nocheck(state, v, addr, compressed, false) + } + BlobMetaChunkArray::V2(v) => { + Self::_get_chunk_index_nocheck(state, v, addr, compressed, false) + } + } + } + + fn get_chunks_compressed( + &self, + state: &Arc, + start: u64, + end: u64, + batch_end: u64, + batch_size: u64, + prefetch: bool, + ) -> Result>> { + match self { + BlobMetaChunkArray::V1(v) => { + Self::_get_chunks_compressed(state, v, start, end, batch_end, batch_size, prefetch) + } + BlobMetaChunkArray::V2(v) => { + Self::_get_chunks_compressed(state, v, start, end, batch_end, batch_size, prefetch) + } + } + } + + fn get_chunks_uncompressed( + &self, + state: &Arc, + start: u64, + end: u64, + batch_end: u64, + batch_size: u64, + ) -> Result>> { + match self { + BlobMetaChunkArray::V1(v) => { + Self::_get_chunks_uncompressed(state, v, start, end, batch_end, batch_size) + } + BlobMetaChunkArray::V2(v) => { + Self::_get_chunks_uncompressed(state, v, start, end, batch_end, batch_size) + } + } + } + + fn add_more_chunks( + &self, + state: &Arc, + chunks: &[Arc], + max_size: u64, + ) -> Result>> { + match self { + BlobMetaChunkArray::V1(v) => Self::_add_more_chunks(state, v, chunks, max_size), + BlobMetaChunkArray::V2(v) => Self::_add_more_chunks(state, v, chunks, max_size), + } + } + + fn compressed_offset(&self, index: usize) -> u64 { + match self { + BlobMetaChunkArray::V1(v) => v[index].compressed_offset(), + BlobMetaChunkArray::V2(v) => v[index].compressed_offset(), + } + } + + fn compressed_size(&self, index: usize) -> u32 { + match self { + BlobMetaChunkArray::V1(v) => v[index].compressed_size(), + BlobMetaChunkArray::V2(v) => v[index].compressed_size(), + } + } + + fn uncompressed_offset(&self, index: usize) -> u64 { + match self { + BlobMetaChunkArray::V1(v) => v[index].uncompressed_offset(), + BlobMetaChunkArray::V2(v) => v[index].uncompressed_offset(), + } + } + + fn uncompressed_size(&self, index: usize) -> u32 { + match self { + BlobMetaChunkArray::V1(v) => v[index].uncompressed_size(), + BlobMetaChunkArray::V2(v) => v[index].uncompressed_size(), + } + } + + fn is_batch(&self, index: usize) -> bool { + match self { + BlobMetaChunkArray::V1(v) => v[index].is_batch(), + BlobMetaChunkArray::V2(v) => v[index].is_batch(), + } + } + + fn batch_index(&self, index: usize) -> Result { + match self { + BlobMetaChunkArray::V1(v) => v[index].get_batch_index(), + BlobMetaChunkArray::V2(v) => v[index].get_batch_index(), + } + } + + fn uncompressed_offset_in_batch_buf(&self, index: usize) -> Result { + match self { + BlobMetaChunkArray::V1(v) => v[index].get_uncompressed_offset_in_batch_buf(), + BlobMetaChunkArray::V2(v) => v[index].get_uncompressed_offset_in_batch_buf(), + } + } + + fn zran_index(&self, index: usize) -> Result { + match self { + BlobMetaChunkArray::V1(v) => v[index].get_zran_index(), + BlobMetaChunkArray::V2(v) => v[index].get_zran_index(), + } + } + + fn zran_offset(&self, index: usize) -> Result { + match self { + BlobMetaChunkArray::V1(v) => v[index].get_zran_offset(), + BlobMetaChunkArray::V2(v) => v[index].get_zran_offset(), + } + } + + fn is_compressed(&self, index: usize) -> bool { + match self { + BlobMetaChunkArray::V1(v) => v[index].is_compressed(), + BlobMetaChunkArray::V2(v) => v[index].is_compressed(), + } + } + + fn is_encrypted(&self, index: usize) -> bool { + match self { + BlobMetaChunkArray::V1(v) => v[index].is_encrypted(), + BlobMetaChunkArray::V2(v) => v[index].is_encrypted(), + } + } + + fn _get_chunk_index_nocheck( + state: &BlobCompressionContext, + chunks: &[T], + addr: u64, + compressed: bool, + prefetch: bool, + ) -> Result { + let mut size = chunks.len(); + let mut left = 0; + let mut right = size; + let mut start = 0; + let mut end = 0; + + while left < right { + let mid = left + size / 2; + // SAFETY: the call is made safe by the following invariants: + // - `mid >= 0` + // - `mid < size`: `mid` is limited by `[left; right)` bound. + let entry = &chunks[mid]; + if compressed { + // Capable of handling both batch and non-batch chunks. + let c_offset = entry.compressed_offset(); + let c_size = state.get_compressed_size(mid)?; + (start, end) = (c_offset, c_offset + c_size as u64); + } else { + start = entry.uncompressed_offset(); + end = entry.uncompressed_end(); + }; + + if start > addr { + right = mid; + } else if end <= addr { + left = mid + 1; + } else { + // Find the first chunk in the batch. + if entry.is_batch() && entry.get_uncompressed_offset_in_batch_buf()? > 0 { + right = mid; + } else { + return Ok(mid); + } + } + + size = right - left; + } + + // Special handling prefetch for ZRan blobs because they may have holes. + if prefetch { + if right < chunks.len() { + let entry = &chunks[right]; + if entry.compressed_offset() > addr { + return Ok(right); + } + } + if left < chunks.len() { + let entry = &chunks[left]; + if entry.compressed_offset() > addr { + return Ok(left); + } + } + } + + // if addr == self.chunks[last].compressed_offset, return einval with error msg. + Err(einval!(format!( + "failed to get chunk index, prefetch {}, left {}, right {}, start: {}, end: {}, addr: {}", + prefetch, left, right, start, end, addr + ))) + } + + fn _get_chunks_uncompressed( + state: &Arc, + chunk_info_array: &[T], + start: u64, + end: u64, + batch_end: u64, + batch_size: u64, + ) -> Result>> { + let mut vec = Vec::with_capacity(512); + let mut index = + Self::_get_chunk_index_nocheck(state, chunk_info_array, start, false, false)?; + let entry = Self::get_chunk_entry(state, chunk_info_array, index)?; + trace!( + "get_chunks_uncompressed: entry {} {}", + entry.uncompressed_offset(), + entry.uncompressed_end() + ); + + // Special handling of ZRan chunks + if entry.is_zran() { + let zran_index = entry.get_zran_index()?; + let mut count = state.zran_info_array[zran_index as usize].out_size() as u64; + let mut zran_last = zran_index; + let mut zran_end = entry.aligned_uncompressed_end(); + + while index > 0 { + let entry = Self::get_chunk_entry(state, chunk_info_array, index - 1)?; + if !entry.is_zran() { + return Err(einval!( + "inconsistent ZRan and non-ZRan chunk compression information entries" + )); + } else if entry.get_zran_index()? != zran_index { + // reach the header chunk associated with the same ZRan context. + break; + } else { + index -= 1; + } + } + + for entry in &chunk_info_array[index..] { + entry.validate(state)?; + if !entry.is_zran() { + return Err(einval!( + "inconsistent ZRan and non-ZRan chunk compression information entries" + )); + } + if entry.get_zran_index()? != zran_last { + let ctx = &state.zran_info_array[entry.get_zran_index()? as usize]; + if count + ctx.out_size() as u64 >= batch_size + && entry.uncompressed_offset() >= end + { + return Ok(vec); + } + count += ctx.out_size() as u64; + zran_last = entry.get_zran_index()?; + } + zran_end = entry.aligned_uncompressed_end(); + vec.push(BlobMetaChunk::new(index, state)); + index += 1; + } + + if zran_end >= end { + return Ok(vec); + } + return Err(einval!(format!( + "entry not found index {} chunk_info_array.len {}, end 0x{:x}, range [0x{:x}-0x{:x}]", + index, + chunk_info_array.len(), + vec.last().map(|v| v.uncompressed_end()).unwrap_or_default(), + start, + end, + ))); + } + + vec.push(BlobMetaChunk::new(index, state)); + let mut last_end = entry.aligned_uncompressed_end(); + if last_end >= batch_end { + Ok(vec) + } else { + while index + 1 < chunk_info_array.len() { + index += 1; + + let entry = Self::get_chunk_entry(state, chunk_info_array, index)?; + if entry.uncompressed_offset() != last_end { + return Err(einval!(format!( + "mismatch uncompressed {} size {} last_end {}", + entry.uncompressed_offset(), + entry.uncompressed_size(), + last_end + ))); + } else if last_end >= end && entry.aligned_uncompressed_end() >= batch_end { + // Avoid read amplify if next chunk is too big. + return Ok(vec); + } + + vec.push(BlobMetaChunk::new(index, state)); + last_end = entry.aligned_uncompressed_end(); + if last_end >= batch_end { + return Ok(vec); + } + } + + if last_end >= end { + Ok(vec) + } else { + Err(einval!(format!( + "entry not found index {} chunk_info_array.len {}, last_end 0x{:x}, end 0x{:x}, blob compressed size 0x{:x}", + index, + chunk_info_array.len(), + last_end, + end, + state.uncompressed_size, + ))) + } + } + } + + fn _get_chunks_compressed( + state: &Arc, + chunk_info_array: &[T], + start: u64, + end: u64, + batch_end: u64, + batch_size: u64, + prefetch: bool, + ) -> Result>> { + let mut vec = Vec::with_capacity(512); + let mut index = + Self::_get_chunk_index_nocheck(state, chunk_info_array, start, true, prefetch)?; + let entry = Self::get_chunk_entry(state, chunk_info_array, index)?; + + // Special handling of ZRan chunks + if entry.is_zran() { + let zran_index = entry.get_zran_index()?; + let pos = state.zran_info_array[zran_index as usize].in_offset(); + let mut zran_last = zran_index; + + while index > 0 { + let entry = Self::get_chunk_entry(state, chunk_info_array, index - 1)?; + if !entry.is_zran() { + return Err(einval!( + "inconsistent ZRan and non-ZRan chunk compression information entries" + )); + } else if entry.get_zran_index()? != zran_index { + // reach the header chunk associated with the same ZRan context. + break; + } else { + index -= 1; + } + } + + for entry in &chunk_info_array[index..] { + entry.validate(state)?; + if !entry.is_zran() { + return Err(einval!( + "inconsistent ZRan and non-ZRan chunk compression information entries" + )); + } + if entry.get_zran_index()? != zran_last { + let ctx = &state.zran_info_array[entry.get_zran_index()? as usize]; + if ctx.in_offset() + ctx.in_size() as u64 - pos > batch_size + && entry.compressed_offset() > end + { + return Ok(vec); + } + zran_last = entry.get_zran_index()?; + } + vec.push(BlobMetaChunk::new(index, state)); + index += 1; + } + + if let Some(c) = vec.last() { + if c.uncompressed_end() >= end { + return Ok(vec); + } + // Special handling prefetch for ZRan blobs + if prefetch && index >= chunk_info_array.len() { + return Ok(vec); + } + } + return Err(einval!(format!( + "entry not found index {} chunk_info_array.len {}", + index, + chunk_info_array.len(), + ))); + } + + vec.push(BlobMetaChunk::new(index, state)); + let mut last_end = entry.compressed_end(); + if last_end >= batch_end { + Ok(vec) + } else { + while index + 1 < chunk_info_array.len() { + index += 1; + + let entry = Self::get_chunk_entry(state, chunk_info_array, index)?; + // Avoid read amplify if next chunk is too big. + if last_end >= end && entry.compressed_end() > batch_end { + return Ok(vec); + } + + vec.push(BlobMetaChunk::new(index, state)); + last_end = entry.compressed_end(); + if last_end >= batch_end { + return Ok(vec); + } + } + + if last_end >= end || (prefetch && !vec.is_empty()) { + Ok(vec) + } else { + Err(einval!(format!( + "entry not found index {} chunk_info_array.len {}, last_end 0x{:x}, end 0x{:x}, blob compressed size 0x{:x}", + index, + chunk_info_array.len(), + last_end, + end, + state.compressed_size, + ))) + } + } + } + + fn _add_more_chunks( + state: &Arc, + chunk_info_array: &[T], + chunks: &[Arc], + max_size: u64, + ) -> Result>> { + let first_idx = chunks[0].id() as usize; + let first_entry = Self::get_chunk_entry(state, chunk_info_array, first_idx)?; + let last_idx = chunks[chunks.len() - 1].id() as usize; + let last_entry = Self::get_chunk_entry(state, chunk_info_array, last_idx)?; + + // The maximum size to be amplified in the current fetch request. + let fetch_end = max_size + chunks[0].compressed_offset(); + + let mut vec = Vec::with_capacity(128); + + // Special handling of ZRan chunks + if first_entry.is_zran() { + let first_zran_idx = first_entry.get_zran_index()?; + let mut last_zran_idx = last_entry.get_zran_index()?; + let mut index = first_idx; + while index > 0 { + let entry = Self::get_chunk_entry(state, chunk_info_array, index - 1)?; + if !entry.is_zran() { + // All chunks should be ZRan chunks. + return Err(std::io::Error::new( + std::io::ErrorKind::Other, + "invalid ZRan compression information data", + )); + } else if entry.get_zran_index()? != first_zran_idx { + // reach the header chunk associated with the same ZRan context. + break; + } else { + index -= 1; + } + } + + for entry in &chunk_info_array[index..] { + if entry.validate(state).is_err() || !entry.is_zran() { + return Err(std::io::Error::new( + std::io::ErrorKind::Other, + "invalid ZRan compression information data", + )); + } else if entry.get_zran_index()? > last_zran_idx { + if entry.compressed_end() + RAFS_MAX_CHUNK_SIZE <= fetch_end + && entry.get_zran_index()? == last_zran_idx + 1 + { + vec.push(BlobMetaChunk::new(index, state)); + last_zran_idx += 1; + } else { + return Ok(vec); + } + } else { + vec.push(BlobMetaChunk::new(index, state)); + } + index += 1; + } + } else { + // Handling of Batch chunks and normal chunks + let mut entry_idx = first_idx; + let mut curr_batch_idx = u32::MAX; + + // Search the first chunk of the current Batch. + if first_entry.is_batch() { + curr_batch_idx = first_entry.get_batch_index()?; + while entry_idx > 0 { + let entry = Self::get_chunk_entry(state, chunk_info_array, entry_idx - 1)?; + if !entry.is_batch() || entry.get_batch_index()? != curr_batch_idx { + // Reach the previous non-batch or batch chunk. + break; + } else { + entry_idx -= 1; + } + } + } + + // Iterate and add chunks. + let mut idx_chunks = 0; + for (idx, entry) in chunk_info_array.iter().enumerate().skip(entry_idx) { + entry.validate(state)?; + + // Add chunk if it is in the `chunks` array. + if idx_chunks < chunks.len() && idx == chunks[idx_chunks].id() as usize { + vec.push(chunks[idx_chunks].clone()); + idx_chunks += 1; + if entry.is_batch() { + curr_batch_idx = entry.get_batch_index()?; + } + continue; + } + + // If chunk is not in the `chunks` array, add it if in the current Batch, + // or can be amplified. + if entry.is_batch() { + if curr_batch_idx == entry.get_batch_index()? { + vec.push(BlobMetaChunk::new(idx, state)); + continue; + } + + let batch_ctx = state.get_batch_context(entry.get_batch_index()? as usize)?; + if entry.compressed_offset() + batch_ctx.compressed_size() as u64 <= fetch_end { + vec.push(BlobMetaChunk::new(idx, state)); + curr_batch_idx = entry.get_batch_index()?; + } else { + break; + } + continue; + } + if entry.compressed_end() <= fetch_end { + vec.push(BlobMetaChunk::new(idx, state)); + } else { + break; + } + } + } + + Ok(vec) + } + + fn get_chunk_entry<'a, T: BlobMetaChunkInfo>( + state: &Arc, + chunk_info_array: &'a [T], + index: usize, + ) -> Result<&'a T> { + assert!(index < chunk_info_array.len()); + let entry = &chunk_info_array[index]; + // If the chunk belongs to a chunkdict, skip the validation check. + if state.blob_features & BlobFeatures::IS_CHUNKDICT_GENERATED.bits() == 0 { + entry.validate(state)?; + } + Ok(entry) + } +} + +/// An implementation of `trait BlobChunkInfo` based on blob meta information. +#[derive(Clone)] +pub struct BlobMetaChunk { + chunk_index: usize, + meta: Arc, +} + +impl BlobMetaChunk { + #[allow(clippy::new_ret_no_self)] + pub(crate) fn new( + chunk_index: usize, + meta: &Arc, + ) -> Arc { + assert!(chunk_index <= RAFS_MAX_CHUNKS_PER_BLOB as usize); + Arc::new(BlobMetaChunk { + chunk_index, + meta: meta.clone(), + }) as Arc + } +} + +impl BlobChunkInfo for BlobMetaChunk { + fn chunk_id(&self) -> &RafsDigest { + if self.chunk_index < self.meta.chunk_digest_array.len() { + let digest = &self.meta.chunk_digest_array[self.chunk_index]; + digest.into() + } else { + &self.meta.chunk_digest_default + } + } + + fn id(&self) -> u32 { + self.chunk_index as u32 + } + + fn blob_index(&self) -> u32 { + self.meta.blob_index + } + + fn compressed_offset(&self) -> u64 { + self.meta + .chunk_info_array + .compressed_offset(self.chunk_index) + } + + fn compressed_size(&self) -> u32 { + self.meta.chunk_info_array.compressed_size(self.chunk_index) + } + + fn uncompressed_offset(&self) -> u64 { + self.meta + .chunk_info_array + .uncompressed_offset(self.chunk_index) + } + + fn uncompressed_size(&self) -> u32 { + self.meta + .chunk_info_array + .uncompressed_size(self.chunk_index) + } + + fn is_batch(&self) -> bool { + self.meta.chunk_info_array.is_batch(self.chunk_index) + } + + fn is_compressed(&self) -> bool { + self.meta.chunk_info_array.is_compressed(self.chunk_index) + } + + fn is_encrypted(&self) -> bool { + self.meta.chunk_info_array.is_encrypted(self.chunk_index) + } + + fn as_any(&self) -> &dyn Any { + self + } +} + +impl BlobV5ChunkInfo for BlobMetaChunk { + fn index(&self) -> u32 { + self.chunk_index as u32 + } + + fn file_offset(&self) -> u64 { + // Not used for RAFS v6 + 0 + } + + fn flags(&self) -> BlobChunkFlags { + let mut flags = BlobChunkFlags::empty(); + if self.is_compressed() { + flags |= BlobChunkFlags::COMPRESSED; + } + flags + } + + fn as_base(&self) -> &dyn BlobChunkInfo { + self + } +} + +/// Trait to manage compression information about chunks based on blob meta. +pub trait BlobMetaChunkInfo { + /// Get compressed offset of the chunk. + fn compressed_offset(&self) -> u64; + + /// Set compressed offset of the chunk. + fn set_compressed_offset(&mut self, offset: u64); + + /// Get compressed size of the chunk. + fn compressed_size(&self) -> u32; + + /// Set compressed size of the chunk. + fn set_compressed_size(&mut self, size: u32); + + /// Get end of compressed data of the chunk. + fn compressed_end(&self) -> u64 { + self.compressed_offset() + self.compressed_size() as u64 + } + + /// Get uncompressed offset of the chunk. + fn uncompressed_offset(&self) -> u64; + + /// Set uncompressed offset of the chunk. + fn set_uncompressed_offset(&mut self, offset: u64); + + /// Get uncompressed end of the chunk. + fn uncompressed_size(&self) -> u32; + + /// Set uncompressed end of the chunk. + fn set_uncompressed_size(&mut self, size: u32); + + /// Get end of uncompressed data of the chunk. + fn uncompressed_end(&self) -> u64 { + self.uncompressed_offset() + self.uncompressed_size() as u64 + } + + /// Get 4K-aligned end of uncompressed data of the chunk. + fn aligned_uncompressed_end(&self) -> u64 { + round_up_4k(self.uncompressed_end()) + } + + /// Check whether chunk data is encrypted or not. + fn is_encrypted(&self) -> bool; + + /// Check whether the blob chunk is compressed or not. + /// + /// Assume the image builder guarantee that compress_size < uncompress_size if the chunk is + /// compressed. + fn is_compressed(&self) -> bool; + + /// Check whether the chunk has associated Batch context data. + fn is_batch(&self) -> bool; + + /// Check whether the chunk has associated ZRan context data. + fn is_zran(&self) -> bool; + + /// Get index of the ZRan context data associated with the chunk. + fn get_zran_index(&self) -> Result; + + /// Get offset to get context data from the associated ZRan context. + fn get_zran_offset(&self) -> Result; + + /// Get index of the Batch context data associated with the chunk. + fn get_batch_index(&self) -> Result; + + /// Get offset of uncompressed chunk data inside the batch chunk. + fn get_uncompressed_offset_in_batch_buf(&self) -> Result; + + /// Get data associated with the entry. V2 only, V1 just returns zero. + fn get_data(&self) -> u64; + + /// Check whether the chunk compression information is valid or not. + fn validate(&self, state: &BlobCompressionContext) -> Result<()>; +} + +/// Generate description string for blob meta features. +pub fn format_blob_features(features: BlobFeatures) -> String { + let mut output = String::new(); + if features.contains(BlobFeatures::ALIGNED) { + output += "aligned "; + } + if features.contains(BlobFeatures::BATCH) { + output += "batch "; + } + if features.contains(BlobFeatures::CAP_TAR_TOC) { + output += "cap_toc "; + } + if features.contains(BlobFeatures::INLINED_CHUNK_DIGEST) { + output += "chunk-digest "; + } + if features.contains(BlobFeatures::CHUNK_INFO_V2) { + output += "chunk-v2 "; + } + if features.contains(BlobFeatures::INLINED_FS_META) { + output += "fs-meta "; + } + if features.contains(BlobFeatures::SEPARATE) { + output += "separate "; + } + if features.contains(BlobFeatures::HAS_TAR_HEADER) { + output += "tar-header "; + } + if features.contains(BlobFeatures::HAS_TOC) { + output += "toc "; + } + if features.contains(BlobFeatures::ZRAN) { + output += "zran "; + } + if features.contains(BlobFeatures::ENCRYPTED) { + output += "encrypted "; + } + if features.contains(BlobFeatures::IS_CHUNKDICT_GENERATED) { + output += "is-chunkdict-generated "; + } + output.trim_end().to_string() +} + +fn round_up_4k + BitAnd + Not + From>(val: T) -> T { + (val + T::from(0xfff)) & !T::from(0xfff) +} + +#[cfg(test)] +pub(crate) mod tests { + use super::*; + use crate::backend::{BackendResult, BlobReader}; + use crate::device::BlobFeatures; + use crate::RAFS_DEFAULT_CHUNK_SIZE; + use nix::sys::uio; + use nydus_utils::digest::{self, DigestHasher}; + use nydus_utils::metrics::BackendMetrics; + use std::fs::File; + use std::os::unix::io::AsRawFd; + use std::path::PathBuf; + + pub(crate) struct DummyBlobReader { + pub metrics: Arc, + pub file: File, + } + + impl BlobReader for DummyBlobReader { + fn blob_size(&self) -> BackendResult { + Ok(0) + } + + fn try_read(&self, buf: &mut [u8], offset: u64) -> BackendResult { + let ret = uio::pread(self.file.as_raw_fd(), buf, offset as i64).unwrap(); + Ok(ret) + } + + fn metrics(&self) -> &BackendMetrics { + &self.metrics + } + } + + #[test] + fn test_round_up_4k() { + assert_eq!(round_up_4k(0), 0x0u32); + assert_eq!(round_up_4k(1), 0x1000u32); + assert_eq!(round_up_4k(0xfff), 0x1000u32); + assert_eq!(round_up_4k(0x1000), 0x1000u32); + assert_eq!(round_up_4k(0x1001), 0x2000u32); + assert_eq!(round_up_4k(0x1fff), 0x2000u64); + } + + #[test] + fn test_load_meta_ci_zran_add_more_chunks() { + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let path = PathBuf::from(root_dir).join("../tests/texture/zran/233c72f2b6b698c07021c4da367cfe2dff4f049efbaa885ca0ff760ea297865a"); + + let features = BlobFeatures::ALIGNED + | BlobFeatures::INLINED_FS_META + | BlobFeatures::CHUNK_INFO_V2 + | BlobFeatures::ZRAN; + let mut blob_info = BlobInfo::new( + 0, + "233c72f2b6b698c07021c4da367cfe2dff4f049efbaa885ca0ff760ea297865a".to_string(), + 0x16c6000, + 9839040, + RAFS_DEFAULT_CHUNK_SIZE as u32, + 0xa3, + features, + ); + blob_info.set_blob_meta_info(0, 0xa1290, 0xa1290, compress::Algorithm::None as u32); + let meta = + BlobCompressionContextInfo::new(&path.display().to_string(), &blob_info, None, false) + .unwrap(); + assert_eq!(meta.state.chunk_info_array.len(), 0xa3); + assert_eq!(meta.state.zran_info_array.len(), 0x15); + assert_eq!(meta.state.zran_dict_table.len(), 0xa0348 - 0x15 * 40); + + let chunks = vec![BlobMetaChunk::new(0, &meta.state)]; + let chunks = meta.add_more_chunks(chunks.as_slice(), 0x30000).unwrap(); + assert_eq!(chunks.len(), 67); + + let chunks = vec![BlobMetaChunk::new(0, &meta.state)]; + let chunks = meta + .add_more_chunks(chunks.as_slice(), RAFS_DEFAULT_CHUNK_SIZE) + .unwrap(); + assert_eq!(chunks.len(), 67); + + let chunks = vec![BlobMetaChunk::new(66, &meta.state)]; + let chunks = meta + .add_more_chunks(chunks.as_slice(), RAFS_DEFAULT_CHUNK_SIZE) + .unwrap(); + assert_eq!(chunks.len(), 67); + + let chunks = vec![BlobMetaChunk::new(116, &meta.state)]; + let chunks = meta + .add_more_chunks(chunks.as_slice(), RAFS_DEFAULT_CHUNK_SIZE) + .unwrap(); + assert_eq!(chunks.len(), 1); + + let chunks = vec![BlobMetaChunk::new(162, &meta.state)]; + let chunks = meta + .add_more_chunks(chunks.as_slice(), RAFS_DEFAULT_CHUNK_SIZE) + .unwrap(); + assert_eq!(chunks.len(), 12); + } + + #[test] + fn test_load_meta_ci_zran_get_chunks_uncompressed() { + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let path = PathBuf::from(root_dir).join("../tests/texture/zran/233c72f2b6b698c07021c4da367cfe2dff4f049efbaa885ca0ff760ea297865a"); + + let features = BlobFeatures::ALIGNED + | BlobFeatures::INLINED_FS_META + | BlobFeatures::CHUNK_INFO_V2 + | BlobFeatures::ZRAN; + let mut blob_info = BlobInfo::new( + 0, + "233c72f2b6b698c07021c4da367cfe2dff4f049efbaa885ca0ff760ea297865a".to_string(), + 0x16c6000, + 9839040, + RAFS_DEFAULT_CHUNK_SIZE as u32, + 0xa3, + features, + ); + blob_info.set_blob_meta_info(0, 0xa1290, 0xa1290, compress::Algorithm::None as u32); + let meta = + BlobCompressionContextInfo::new(&path.display().to_string(), &blob_info, None, false) + .unwrap(); + assert_eq!(meta.state.chunk_info_array.len(), 0xa3); + assert_eq!(meta.state.zran_info_array.len(), 0x15); + assert_eq!(meta.state.zran_dict_table.len(), 0xa0348 - 0x15 * 40); + + let chunks = meta.get_chunks_uncompressed(0, 1, 0x30000).unwrap(); + assert_eq!(chunks.len(), 67); + + let chunks = meta + .get_chunks_uncompressed(0, 1, RAFS_DEFAULT_CHUNK_SIZE) + .unwrap(); + assert_eq!(chunks.len(), 67); + + let chunks = meta + .get_chunks_uncompressed(0x112000, 0x10000, RAFS_DEFAULT_CHUNK_SIZE) + .unwrap(); + assert_eq!(chunks.len(), 116); + + let chunks = meta + .get_chunks_uncompressed(0xf9b000, 0x100, RAFS_DEFAULT_CHUNK_SIZE) + .unwrap(); + assert_eq!(chunks.len(), 12); + + let chunks = meta + .get_chunks_uncompressed(0xf9b000, 0x100, 4 * RAFS_DEFAULT_CHUNK_SIZE) + .unwrap(); + assert_eq!(chunks.len(), 13); + + let chunks = meta + .get_chunks_uncompressed(0x16c5000, 0x100, 4 * RAFS_DEFAULT_CHUNK_SIZE) + .unwrap(); + assert_eq!(chunks.len(), 12); + + assert!(meta + .get_chunks_uncompressed(0x2000000, 0x100, 4 * RAFS_DEFAULT_CHUNK_SIZE) + .is_err()); + } + + #[test] + fn test_load_meta_ci_zran_get_chunks_compressed() { + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let path = PathBuf::from(root_dir).join("../tests/texture/zran/233c72f2b6b698c07021c4da367cfe2dff4f049efbaa885ca0ff760ea297865a"); + + let features = BlobFeatures::ALIGNED + | BlobFeatures::INLINED_FS_META + | BlobFeatures::CHUNK_INFO_V2 + | BlobFeatures::ZRAN; + let mut blob_info = BlobInfo::new( + 0, + "233c72f2b6b698c07021c4da367cfe2dff4f049efbaa885ca0ff760ea297865a".to_string(), + 0x16c6000, + 9839040, + RAFS_DEFAULT_CHUNK_SIZE as u32, + 0xa3, + features, + ); + blob_info.set_blob_meta_info(0, 0xa1290, 0xa1290, compress::Algorithm::None as u32); + let meta = + BlobCompressionContextInfo::new(&path.display().to_string(), &blob_info, None, false) + .unwrap(); + assert_eq!(meta.state.chunk_info_array.len(), 0xa3); + assert_eq!(meta.state.zran_info_array.len(), 0x15); + assert_eq!(meta.state.zran_dict_table.len(), 0xa0348 - 0x15 * 40); + + let chunks = meta.get_chunks_compressed(0xb8, 1, 0x30000, false).unwrap(); + assert_eq!(chunks.len(), 67); + + let chunks = meta + .get_chunks_compressed(0xb8, 1, RAFS_DEFAULT_CHUNK_SIZE, false) + .unwrap(); + assert_eq!(chunks.len(), 116); + + let chunks = meta + .get_chunks_compressed(0xb8, 1, 2 * RAFS_DEFAULT_CHUNK_SIZE, false) + .unwrap(); + assert_eq!(chunks.len(), 120); + + let chunks = meta + .get_chunks_compressed(0x5fd41e, 1, RAFS_DEFAULT_CHUNK_SIZE / 2, false) + .unwrap(); + assert_eq!(chunks.len(), 3); + + let chunks = meta + .get_chunks_compressed(0x95d55d, 0x20, RAFS_DEFAULT_CHUNK_SIZE, false) + .unwrap(); + assert_eq!(chunks.len(), 12); + + assert!(meta + .get_chunks_compressed(0x0, 0x1, RAFS_DEFAULT_CHUNK_SIZE, false) + .is_err()); + assert!(meta + .get_chunks_compressed(0x1000000, 0x1, RAFS_DEFAULT_CHUNK_SIZE, false) + .is_err()); + } + + #[test] + fn test_blob_compression_context_header_getters_and_setters() { + let mut header = BlobCompressionContextHeader::default(); + + assert_eq!(header.features(), 0); + header.set_aligned(true); + assert!(header.is_4k_aligned()); + header.set_aligned(false); + + header.set_inlined_fs_meta(true); + assert!(header.has_feature(BlobFeatures::INLINED_FS_META)); + header.set_inlined_fs_meta(false); + + header.set_chunk_info_v2(true); + assert!(header.has_feature(BlobFeatures::CHUNK_INFO_V2)); + header.set_chunk_info_v2(false); + + header.set_ci_zran(true); + assert!(header.has_feature(BlobFeatures::ZRAN)); + header.set_ci_zran(false); + + header.set_separate_blob(true); + assert!(header.has_feature(BlobFeatures::SEPARATE)); + header.set_separate_blob(false); + + header.set_ci_batch(true); + assert!(header.has_feature(BlobFeatures::BATCH)); + header.set_ci_batch(false); + + header.set_inlined_chunk_digest(true); + assert!(header.has_feature(BlobFeatures::INLINED_CHUNK_DIGEST)); + header.set_inlined_chunk_digest(false); + + header.set_has_tar_header(true); + assert!(header.has_feature(BlobFeatures::HAS_TAR_HEADER)); + header.set_has_tar_header(false); + + header.set_has_toc(true); + assert!(header.has_feature(BlobFeatures::HAS_TOC)); + header.set_has_toc(false); + + header.set_cap_tar_toc(true); + assert!(header.has_feature(BlobFeatures::CAP_TAR_TOC)); + header.set_cap_tar_toc(false); + + header.set_tarfs(true); + assert!(header.has_feature(BlobFeatures::TARFS)); + header.set_tarfs(false); + + header.set_encrypted(true); + assert!(header.has_feature(BlobFeatures::ENCRYPTED)); + header.set_encrypted(false); + + assert_eq!(header.features(), 0); + + assert_eq!(header.ci_compressor(), compress::Algorithm::Lz4Block); + header.set_ci_compressor(compress::Algorithm::GZip); + assert_eq!(header.ci_compressor(), compress::Algorithm::GZip); + header.set_ci_compressor(compress::Algorithm::Zstd); + assert_eq!(header.ci_compressor(), compress::Algorithm::Zstd); + + let mut hasher = RafsDigest::hasher(digest::Algorithm::Sha256); + hasher.digest_update(header.as_bytes()); + let hash: String = hasher.digest_finalize().into(); + assert_eq!( + hash, + String::from("f56a1129d3df9fc7d60b26dbf495a60bda3dfc265f4f37854e4a36b826b660fc") + ); + + assert_eq!(header.ci_entries(), 0); + header.set_ci_entries(1); + assert_eq!(header.ci_entries(), 1); + + assert_eq!(header.ci_compressed_offset(), 0); + header.set_ci_compressed_offset(1); + assert_eq!(header.ci_compressed_offset(), 1); + + assert_eq!(header.ci_compressed_size(), 0); + header.set_ci_compressed_size(1); + assert_eq!(header.ci_compressed_size(), 1); + + assert_eq!(header.ci_uncompressed_size(), 0); + header.set_ci_uncompressed_size(1); + assert_eq!(header.ci_uncompressed_size(), 1); + + assert_eq!(header.ci_zran_count(), 0); + header.set_ci_zran_count(1); + assert_eq!(header.ci_zran_count(), 1); + + assert_eq!(header.ci_zran_offset(), 0); + header.set_ci_zran_offset(1); + assert_eq!(header.ci_zran_offset(), 1); + + assert_eq!(header.ci_zran_size(), 0); + header.set_ci_zran_size(1); + assert_eq!(header.ci_zran_size(), 1); + } + + #[test] + fn test_format_blob_features() { + let features = !BlobFeatures::default(); + let content = format_blob_features(features); + assert!(content.contains("aligned")); + assert!(content.contains("fs-meta")); + } + + #[test] + fn test_add_more_chunks() { + // Batch chunks: [chunk0, chunk1], chunk2, [chunk3, chunk4] + let mut chunk0 = BlobChunkInfoV2Ondisk::default(); + chunk0.set_batch(true); + chunk0.set_compressed(true); + chunk0.set_batch_index(0); + chunk0.set_uncompressed_offset_in_batch_buf(0); + chunk0.set_uncompressed_offset(0); + chunk0.set_uncompressed_size(0x2000); + chunk0.set_compressed_offset(0); + + let mut chunk1 = BlobChunkInfoV2Ondisk::default(); + chunk1.set_batch(true); + chunk1.set_compressed(true); + chunk1.set_batch_index(0); + chunk1.set_uncompressed_offset_in_batch_buf(0x2000); + chunk1.set_uncompressed_offset(0x2000); + chunk1.set_uncompressed_size(0x1000); + chunk1.set_compressed_offset(0); + + let mut batch_ctx0 = BatchInflateContext::default(); + batch_ctx0.set_uncompressed_batch_size(0x3000); + batch_ctx0.set_compressed_size(0x2000); + + let mut chunk2 = BlobChunkInfoV2Ondisk::default(); + chunk2.set_batch(false); + chunk2.set_compressed(true); + chunk2.set_uncompressed_offset(0x3000); + chunk2.set_compressed_offset(0x2000); + chunk2.set_uncompressed_size(0x4000); + chunk2.set_compressed_size(0x3000); + + let mut chunk3 = BlobChunkInfoV2Ondisk::default(); + chunk3.set_batch(true); + chunk3.set_compressed(true); + chunk3.set_batch_index(1); + chunk3.set_uncompressed_offset_in_batch_buf(0); + chunk3.set_uncompressed_offset(0x7000); + chunk3.set_uncompressed_size(0x2000); + chunk3.set_compressed_offset(0x5000); + + let mut chunk4 = BlobChunkInfoV2Ondisk::default(); + chunk4.set_batch(true); + chunk4.set_compressed(true); + chunk4.set_batch_index(1); + chunk4.set_uncompressed_offset_in_batch_buf(0x2000); + chunk4.set_uncompressed_offset(0x9000); + chunk4.set_uncompressed_size(0x2000); + chunk4.set_compressed_offset(0x5000); + + let mut batch_ctx1 = BatchInflateContext::default(); + batch_ctx1.set_compressed_size(0x3000); + batch_ctx1.set_uncompressed_batch_size(0x4000); + + let chunk_info_array = vec![chunk0, chunk1, chunk2, chunk3, chunk4]; + let chunk_infos = BlobMetaChunkArray::V2(chunk_info_array); + let chunk_infos = ManuallyDrop::new(chunk_infos); + + let batch_ctx_array = vec![batch_ctx0, batch_ctx1]; + let batch_ctxes = ManuallyDrop::new(batch_ctx_array); + + let state = BlobCompressionContext { + chunk_info_array: chunk_infos, + batch_info_array: batch_ctxes, + compressed_size: 0x8000, + uncompressed_size: 0xB000, + blob_features: (BlobFeatures::BATCH + | BlobFeatures::ALIGNED + | BlobFeatures::INLINED_FS_META + | BlobFeatures::CHUNK_INFO_V2) + .bits(), + ..Default::default() + }; + + let state = Arc::new(state); + let meta = BlobCompressionContextInfo { state }; + + // test read amplification + let chunks = vec![BlobMetaChunk::new(0, &meta.state)]; + let chunks = meta + .add_more_chunks(&chunks, RAFS_DEFAULT_CHUNK_SIZE) + .unwrap(); + let chunk_ids: Vec<_> = chunks.iter().map(|c| c.id()).collect(); + assert_eq!(chunk_ids, vec![0, 1, 2, 3, 4]); + + // test read the chunk in the middle of the batch chunk + let chunks = vec![BlobMetaChunk::new(1, &meta.state)]; + let chunks = meta + .add_more_chunks(&chunks, RAFS_DEFAULT_CHUNK_SIZE) + .unwrap(); + let chunk_ids: Vec<_> = chunks.iter().map(|c| c.id()).collect(); + assert_eq!(chunk_ids, vec![0, 1, 2, 3, 4]); + + // test no read amplification + let chunks = vec![BlobMetaChunk::new(1, &meta.state)]; + let chunks = meta.add_more_chunks(&chunks, 0).unwrap(); + let chunk_ids: Vec<_> = chunks.iter().map(|c| c.id()).collect(); + assert_eq!(chunk_ids, vec![0, 1]); + + // test read non-batch chunk + let chunks = vec![BlobMetaChunk::new(2, &meta.state)]; + let chunks = meta.add_more_chunks(&chunks, 0).unwrap(); + let chunk_ids: Vec<_> = chunks.iter().map(|c| c.id()).collect(); + assert_eq!(chunk_ids, vec![2]); + + // test small read amplification + let chunks = vec![BlobMetaChunk::new(1, &meta.state)]; + let chunks = meta.add_more_chunks(&chunks, 0x6000).unwrap(); + let chunk_ids: Vec<_> = chunks.iter().map(|c| c.id()).collect(); + assert_eq!(chunk_ids, vec![0, 1, 2]); + } +} diff --git a/storage/src/meta/toc.rs b/storage/src/meta/toc.rs index 91fc8ea2601..7a9351e7ca8 100644 --- a/storage/src/meta/toc.rs +++ b/storage/src/meta/toc.rs @@ -1,995 +1,995 @@ -// Copyright 2022 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Rafs filesystem TOC entry layout and data structures. - -use std::convert::{TryFrom, TryInto}; -use std::fs::{self, File, OpenOptions}; -use std::io::{Error, ErrorKind, Read, Result, Write}; -use std::mem::size_of; -use std::path::{Path, PathBuf}; -use std::slice; -use std::sync::Arc; - -use nydus_api::ConfigV2; -use nydus_utils::compress::{self, Decoder}; -use nydus_utils::digest::{self, DigestHasher, RafsDigest}; -use serde::Serialize; -use tar::{EntryType, Header}; - -use crate::backend::{BlobBufReader, BlobReader}; -use crate::factory::BlobFactory; -use crate::utils::alloc_buf; - -/// File name for RAFS data chunks. -pub const TOC_ENTRY_BLOB_RAW: &str = "image.blob"; -/// File name for RAFS meta/bootstrap. -pub const TOC_ENTRY_BOOTSTRAP: &str = "image.boot"; -/// File name for RAFS blob compression context table. -pub const TOC_ENTRY_BLOB_META: &str = "blob.meta"; -/// File name for RAFS blob compression context table header. -pub const TOC_ENTRY_BLOB_META_HEADER: &str = "blob.meta.header"; -/// File name for RAFS chunk digest table. -pub const TOC_ENTRY_BLOB_DIGEST: &str = "blob.digest"; -/// File name for RAFS blob ToC table. -pub const TOC_ENTRY_BLOB_TOC: &str = "rafs.blob.toc"; - -bitflags! { - #[derive(Serialize)] - /// Feature flags for ToC entry. - pub struct TocEntryFlags: u32 { - /// Entry data is not compressed. - const COMPRESSION_NONE = 0x0001; - /// Entry data is compressed with zstd. - const COMPRESSION_ZSTD = 0x0002; - /// Entry data is compressed with lz4. - const COMPRESSION_LZ4_BLOCK = 0x0004; - /// Bit mask for compression algorithms. - const COMPRESSION_MASK = 0x000f; - } -} - -impl TryFrom for TocEntryFlags { - type Error = Error; - - fn try_from(c: compress::Algorithm) -> std::result::Result { - match c { - compress::Algorithm::None => Ok(Self::COMPRESSION_NONE), - compress::Algorithm::Zstd => Ok(Self::COMPRESSION_ZSTD), - compress::Algorithm::Lz4Block => Ok(Self::COMPRESSION_LZ4_BLOCK), - _ => Err(eother!(format!("unsupported compressor {}", c,))), - } - } -} - -/// Blob ToC entry on-disk format, 128 bytes. -/// -/// The structure is designed to seek ToC data with the `name` field. -#[repr(C)] -#[derive(Clone, Copy)] -pub struct TocEntry { - /// Possible values: COMPRESSOR - flags: u32, - reserved1: u32, - /// Name of entry file - name: [u8; 16], - /// Sha256 of uncompressed data - uncompressed_digest: [u8; 32], - /// Offset of compressed data - compressed_offset: u64, - /// Size of compressed data - compressed_size: u64, - /// Size of uncompressed data - uncompressed_size: u64, - reserved2: [u8; 48], -} - -impl Default for TocEntry { - fn default() -> Self { - TocEntry { - flags: 0, - reserved1: 0, - name: [0u8; 16], - uncompressed_digest: [0u8; 32], - compressed_offset: 0, - compressed_size: 0, - uncompressed_size: 0, - reserved2: [0u8; 48], - } - } -} - -impl TocEntry { - /// Get ToC entry name. - pub fn name(&self) -> Result { - String::from_utf8(self.name.to_vec()) - .map(|v| v.trim_end_matches('\0').to_string()) - .map_err(|_e| eother!(format!("failed to get ToC entry name"))) - } - - /// Get digest of uncompressed content. - pub fn uncompressed_digest(&self) -> RafsDigest { - RafsDigest { - data: self.uncompressed_digest, - } - } - - /// Get size of uncompressed content. - pub fn uncompressed_size(&self) -> u64 { - self.uncompressed_size - } - - /// Get offset of compressed content. - pub fn compressed_offset(&self) -> u64 { - self.compressed_offset - } - - /// Get size of compressed content. - pub fn compressed_size(&self) -> u64 { - self.compressed_size - } - - /// Get compression algorithm to process entry data. - pub fn compressor(&self) -> Result { - let flags = TocEntryFlags::from_bits(self.flags) - .ok_or_else(|| einval!("unknown compression algorithm for TOC entry"))?; - let algo = match flags & TocEntryFlags::COMPRESSION_MASK { - TocEntryFlags::COMPRESSION_ZSTD => compress::Algorithm::Zstd, - TocEntryFlags::COMPRESSION_LZ4_BLOCK => compress::Algorithm::Lz4Block, - TocEntryFlags::COMPRESSION_NONE => compress::Algorithm::None, - _ => return Err(einval!("unknown compression algorithm for TOC entry")), - }; - Ok(algo) - } - - /// Set compression algorithm to process entry data. - pub fn set_compressor(&mut self, compressor: compress::Algorithm) -> Result<()> { - let c: TocEntryFlags = compressor.try_into()?; - - self.flags &= !TocEntryFlags::COMPRESSION_MASK.bits(); - self.flags |= c.bits(); - - Ok(()) - } - - /// Extract entry data from a `BlobReader` into a writer. - pub fn extract_from_reader( - &self, - reader: Arc, - writer: &mut W, - ) -> Result<()> { - let mut hasher = digest::RafsDigest::hasher(digest::Algorithm::Sha256); - let mut count = 0; - let buf_size = std::cmp::min(0x1000000u64, self.compressed_size) as usize; - let mut buf_reader = BlobBufReader::new( - buf_size, - reader, - self.compressed_offset, - self.compressed_size, - ); - - if self.flags & TocEntryFlags::COMPRESSION_ZSTD.bits() != 0 { - let mut decoder = Decoder::new(buf_reader, compress::Algorithm::Zstd) - .map_err(|_| eother!("failed to create decoder"))?; - let mut buf = alloc_buf(0x40000); - loop { - let sz = decoder - .read(&mut buf) - .map_err(|e| eother!(format!("failed to decompress data, {}", e)))?; - if sz == 0 { - break; - } - hasher.digest_update(&buf[..sz]); - writer - .write_all(&buf[..sz]) - .map_err(|e| eother!(format!("failed to write decompressed data, {}", e)))?; - count += sz as u64; - } - } else if self.flags & TocEntryFlags::COMPRESSION_LZ4_BLOCK.bits() != 0 { - return Err(eother!("unsupported compression algorithm lz4_block.")); - } else if self.flags & TocEntryFlags::COMPRESSION_NONE.bits() != 0 { - let mut buf = alloc_buf(0x40000); - loop { - let sz = buf_reader - .read(&mut buf) - .map_err(|e| eother!(format!("failed to decompress data, {}", e)))?; - if sz == 0 { - break; - } - hasher.digest_update(&buf[..sz]); - writer - .write_all(&buf[..sz]) - .map_err(|e| eother!(format!("failed to write decompressed data, {}", e)))?; - count += sz as u64; - } - } else { - return Err(eother!("unsupported compression algorithm.")); - } - - if count != self.uncompressed_size { - return Err(eother!(format!( - "size of decompressed content doesn't match, expect {}, got {}", - self.uncompressed_size, count, - ))); - } - let digest = hasher.digest_finalize(); - if digest.data != self.uncompressed_digest - && self.uncompressed_digest != RafsDigest::default().data - { - return Err(eother!("digest of decompressed content doesn't match")); - } - - Ok(()) - } - - /// Extract entry data from a data buffer into a writer. - pub fn extract_from_buf(&self, buf: &[u8], writer: &mut W) -> Result<()> { - let mut hasher = digest::RafsDigest::hasher(digest::Algorithm::Sha256); - let mut count = 0; - - if self.flags & TocEntryFlags::COMPRESSION_ZSTD.bits() != 0 { - let mut decoder = Decoder::new(buf, compress::Algorithm::Zstd) - .map_err(|_| eother!("failed to create decoder"))?; - let mut buf = alloc_buf(0x40000); - loop { - let sz = decoder - .read(&mut buf) - .map_err(|e| eother!(format!("failed to decompress data, {}", e)))?; - if sz == 0 { - break; - } - hasher.digest_update(&buf[..sz]); - writer - .write_all(&buf[..sz]) - .map_err(|e| eother!(format!("failed to write decompressed data, {}", e)))?; - count += sz as u64; - } - } else if self.flags & TocEntryFlags::COMPRESSION_LZ4_BLOCK.bits() != 0 { - return Err(eother!("unsupported compression algorithm lz4_block.")); - } else if self.flags & TocEntryFlags::COMPRESSION_NONE.bits() != 0 { - hasher.digest_update(buf); - writer - .write_all(buf) - .map_err(|e| eother!(format!("failed to write decompressed data, {}", e)))?; - count = buf.len() as u64; - } else { - return Err(eother!("unsupported compression algorithm.")); - } - - if count != self.uncompressed_size { - return Err(eother!(format!( - "size of decompressed content doesn't match, expect {}, got {}", - self.uncompressed_size, count, - ))); - } - let digest = hasher.digest_finalize(); - if digest.data != self.uncompressed_digest { - return Err(eother!("digest of decompressed content doesn't match")); - } - - Ok(()) - } -} - -/// Container to host a group of ToC entries. -pub struct TocEntryList { - entries: Vec, - toc_digest: RafsDigest, - toc_size: u32, -} - -impl Default for TocEntryList { - fn default() -> Self { - Self::new() - } -} - -impl TocEntryList { - /// Create a new instance of [TocEntryList]. - pub fn new() -> Self { - Self { - entries: Vec::new(), - toc_digest: RafsDigest::default(), - toc_size: 0, - } - } - - /// Add a ToC entry into the list. - pub fn add( - &mut self, - name: &str, - compressor: compress::Algorithm, - uncompressed_digest: RafsDigest, - compressed_offset: u64, - compressed_size: u64, - uncompressed_size: u64, - ) -> Result<&mut TocEntry> { - let name_size = name.as_bytes().len(); - if name_size > 16 { - return Err(eother!(format!("invalid entry name length {}", name_size))); - } - - let last = self.entries.len(); - let target = &mut [0u8; 16]; - target[..name_size].clone_from_slice(name.as_bytes()); - let mut entry = TocEntry { - flags: 0, - reserved1: 0, - name: *target, - uncompressed_digest: uncompressed_digest.data, - compressed_offset, - compressed_size, - uncompressed_size, - reserved2: [0u8; 48], - }; - entry.set_compressor(compressor)?; - self.entries.push(entry); - - Ok(&mut self.entries[last]) - } - - /// Convert object to a byte slice. - pub fn as_bytes(&self) -> &[u8] { - let (_, data, _) = unsafe { self.entries.align_to::() }; - data - } - - /// Get ToC entry with specified name. - pub fn get_entry(&self, name: &str) -> Option<&TocEntry> { - for toc in self.entries.iter() { - if let Ok(n) = toc.name() { - if n == name { - return Some(toc); - } - } - } - - None - } - - /// Get digest of ToC content. - pub fn toc_digest(&self) -> &RafsDigest { - &self.toc_digest - } - - /// Get size of ToC content. - pub fn toc_size(&self) -> u32 { - self.toc_size - } - - /// Read a [TocEntryList] from a [BlobReader]. - pub fn read_from_blob( - reader: &dyn BlobReader, - cache_file: Option<&mut W>, - location: &TocLocation, - ) -> Result { - let (buf, _) = Self::read_toc_header(reader, location)?; - if let Some(writer) = cache_file { - writer.write_all(&buf)?; - } - Self::parse_toc_header(&buf, location) - } - - /// Read a [TocEntryList] from cache file, and fallback to storage backend. - pub fn read_from_cache_file>( - path: P, - reader: &dyn BlobReader, - location: &TocLocation, - ) -> Result { - location.validate()?; - - if let Ok(mut file) = OpenOptions::new().read(true).open(path.as_ref()) { - let md = file.metadata()?; - let size = md.len(); - if size > 512 && size % 128 == 0 && md.len() <= 0x1000 { - let mut buf = alloc_buf(size as usize); - file.read_exact(&mut buf) - .map_err(|e| eother!(format!("failed to read ToC from cache, {}", e)))?; - if let Ok(toc) = Self::parse_toc_header(&buf, location) { - return Ok(toc); - } - } - } - - let p = path - .as_ref() - .to_path_buf() - .with_extension("toc_downloading"); - if let Ok(mut file) = OpenOptions::new() - .create(true) - .write(true) - .truncate(true) - .open(p.as_path()) - { - match Self::read_from_blob(reader, Some(&mut file), location) { - Ok(v) => { - let _ = fs::rename(p, path.as_ref()); - Ok(v) - } - Err(e) => { - let _ = fs::remove_file(p); - Err(e) - } - } - } else { - Self::read_from_blob::(reader, None, location) - } - } - - fn read_toc_header(reader: &dyn BlobReader, location: &TocLocation) -> Result<(Vec, u64)> { - location.validate()?; - let (offset, size) = if location.auto_detect { - let blob_size = reader - .blob_size() - .map_err(|e| eio!(format!("failed to get blob size, {}", e)))?; - let size = if blob_size > 0x1000 { - 0x1000 - } else { - blob_size >> 7 << 7 - }; - (blob_size - size, size) - } else { - (location.offset, location.size) - }; - - let size = size as usize; - let mut buf = alloc_buf(size); - let sz = reader - .read(&mut buf, offset) - .map_err(|e| eother!(format!("failed to read ToC from backend, {}", e)))?; - if sz != size { - return Err(eother!(format!( - "failed to read ToC from backend, expect {}, got {} bytes", - size, sz - ))); - } - - Ok((buf, offset + 0x1000)) - } - - fn parse_toc_header(buf: &[u8], location: &TocLocation) -> Result { - if buf.len() < 512 { - return Err(Error::new( - ErrorKind::InvalidData, - format!("blob ToC size {} is too small", buf.len()), - )); - } - let size = buf.len() - 512; - let header = Header::from_byte_slice(&buf[size..]); - let entry_type = header.entry_type(); - if entry_type != EntryType::Regular { - return Err(Error::new( - ErrorKind::Other, - "Tar entry type for ToC is not a regular file", - )); - } - let entry_size = header.entry_size().map_err(|_| { - Error::new(ErrorKind::Other, "failed to get entry size from tar header") - })?; - if entry_size > size as u64 { - return Err(Error::new( - ErrorKind::Other, - format!( - "invalid toc entry size in tar header, expect {}, got {}", - size, entry_size - ), - )); - } - let name = header.path().map_err(|_| { - Error::new( - ErrorKind::Other, - "failed to get ToC file name from tar header", - ) - })?; - if name != Path::new(TOC_ENTRY_BLOB_TOC) { - return Err(Error::new( - ErrorKind::Other, - format!( - "ToC file name from tar header doesn't match, {}", - name.display() - ), - )); - } - let _header = header - .as_gnu() - .ok_or_else(|| Error::new(ErrorKind::Other, "invalid GNU tar header for ToC"))?; - - let mut pos = size - entry_size as usize; - let mut list = TocEntryList::new(); - list.toc_digest = digest::RafsDigest::from_buf(&buf[pos..], digest::Algorithm::Sha256); - list.toc_size = (entry_size + 512) as u32; - if location.validate_digest && list.toc_digest != location.digest { - return Err(eother!(format!( - "toc content digest value doesn't match, expect {:?}, got {:?}", - location.digest.data, list.toc_digest.data - ))); - } - - while pos < size { - let mut entry = TocEntry::default(); - let s = unsafe { - slice::from_raw_parts_mut(&mut entry as *mut _ as *mut u8, size_of::()) - }; - s.copy_from_slice(&buf[pos..pos + size_of::()]); - list.entries.push(entry); - pos += size_of::(); - } - - Ok(list) - } - - /// Extract `image.boot` and/or `blob.digest` from a [BlobReader] into files. - pub fn extract_from_blob>( - &self, - reader: Arc, - bootstrap: Option

, - digest: Option

, - ) -> Result<()> { - if let Some(path) = bootstrap { - let bootstrap = self - .get_entry(TOC_ENTRY_BOOTSTRAP) - .ok_or_else(|| enoent!("`image.boot` doesn't exist in the ToC list"))?; - let compressor = bootstrap.compressor()?; - if compressor == compress::Algorithm::None - && bootstrap.compressed_size() != bootstrap.uncompressed_size() - { - return Err(einval!("invalid ToC entry for `image.boot`")); - } - - let mut ready = false; - if path.as_ref().exists() { - let mut file = OpenOptions::new().read(true).open(path.as_ref())?; - let digest = RafsDigest::from_reader(&mut file, digest::Algorithm::Sha256)?; - if digest.data == bootstrap.uncompressed_digest { - ready = true; - } - } - if !ready { - let p = path - .as_ref() - .to_path_buf() - .with_extension("toc_downloading"); - let mut file = OpenOptions::new() - .create(true) - .write(true) - .truncate(true) - .open(p.as_path())?; - bootstrap - .extract_from_reader(reader.clone(), &mut file) - .map_err(|e| { - let _ = fs::remove_file(&p); - e - })?; - fs::rename(&p, path).map_err(|e| { - let _ = fs::remove_file(&p); - e - })?; - } - } - - if let Some(path) = digest { - let cda = self - .get_entry(TOC_ENTRY_BLOB_DIGEST) - .ok_or_else(|| enoent!("`blob.digest` doesn't exist in the ToC list"))?; - let compressor = cda.compressor()?; - if compressor == compress::Algorithm::None - && cda.compressed_size() != cda.uncompressed_size() - { - return Err(einval!("invalid ToC entry for `blob.digest`")); - } - - let mut ready = false; - if path.as_ref().exists() { - let mut file = OpenOptions::new().read(true).open(path.as_ref())?; - let digest = RafsDigest::from_reader(&mut file, digest::Algorithm::Sha256)?; - if digest.data == cda.uncompressed_digest { - ready = true; - } - } - if !ready { - let p = path - .as_ref() - .to_path_buf() - .with_extension("toc_downloading"); - let mut file = OpenOptions::new() - .create(true) - .write(true) - .truncate(true) - .open(p.as_path())?; - cda.extract_from_reader(reader.clone(), &mut file) - .map_err(|e| { - let _ = fs::remove_file(&p); - e - })?; - fs::rename(&p, path).map_err(|e| { - let _ = fs::remove_file(&p); - e - })?; - } - } - - Ok(()) - } - - /// Extract inlined RAFS metadata from data blobs. - pub fn extract_rafs_meta(id: &str, config: Arc) -> Result { - let backend_config = config.get_backend_config()?; - let workdir = config.get_cache_working_directory()?; - let path = PathBuf::from(workdir); - if !path.is_dir() { - return Err(Error::new( - ErrorKind::NotFound, - "invalid cache working directory", - )); - } - let path = path.join(id).with_extension(TOC_ENTRY_BOOTSTRAP); - - let blob_mgr = BlobFactory::new_backend(backend_config, "extract_rafs_meta")?; - let reader = blob_mgr - .get_reader(id) - .map_err(|e| eother!(format!("failed to get reader for blob {}, {}", id, e)))?; - let location = TocLocation::default(); - let (buf, blob_size) = Self::read_toc_header(reader.as_ref(), &location)?; - - if let Ok(toc) = Self::parse_toc_header(&buf, &location) { - toc.extract_from_blob(reader, Some(path.clone()), None)?; - } else { - if buf.len() < 512 { - return Err(einval!(format!("blob ToC size {} is too small", buf.len()))); - } - let header = Header::from_byte_slice(&buf[buf.len() - 512..]); - let entry_type = header.entry_type(); - if entry_type != EntryType::Regular { - return Err(eother!( - "Tar entry type for `image.boot` is not a regular file" - )); - } - let name = header - .path() - .map_err(|_| eother!("failed to get `image.boot` file name from tar header"))?; - if name != Path::new(TOC_ENTRY_BOOTSTRAP) { - return Err(eother!(format!( - "file name from tar header doesn't match `image.boot`, {}", - name.display() - ))); - } - let _header = header - .as_gnu() - .ok_or_else(|| eother!("invalid GNU tar header for ToC"))?; - let entry_size = header - .entry_size() - .map_err(|_| eother!("failed to get entry size from tar header"))?; - if entry_size > blob_size - 512 { - return Err(eother!(format!( - "invalid `image.boot` entry size in tar header, max {}, got {}", - blob_size - 512, - entry_size - ))); - } - let offset = blob_size - 512 - entry_size; - - let mut toc = TocEntryList::new(); - toc.add( - TOC_ENTRY_BOOTSTRAP, - compress::Algorithm::None, - RafsDigest::default(), - offset, - entry_size, - entry_size, - )?; - toc.extract_from_blob(reader, Some(path.clone()), None)?; - } - - Ok(path) - } -} - -/// Information to locate and validate ToC content. -#[derive(Debug)] -pub struct TocLocation { - /// Enable validating digest of the ToC content. - pub validate_digest: bool, - /// Auto detect location of ToC content. - pub auto_detect: bool, - /// Offset of the ToC content in the data blob. - pub offset: u64, - /// Size of the ToC content. - pub size: u64, - /// SHA256 digest of ToC content. - pub digest: RafsDigest, -} - -impl Default for TocLocation { - fn default() -> Self { - TocLocation { - validate_digest: false, - auto_detect: true, - offset: 0, - size: 0, - digest: RafsDigest::default(), - } - } -} - -impl TocLocation { - /// Create a [TocLocation] object with offset and size. - pub fn new(offset: u64, size: u64) -> Self { - TocLocation { - validate_digest: false, - auto_detect: false, - offset, - size, - digest: RafsDigest::default(), - } - } - - /// Create a [TocLocation] object with offset, size and digest. - pub fn with_digest(offset: u64, size: u64, digest: RafsDigest) -> Self { - TocLocation { - validate_digest: true, - auto_detect: false, - offset, - size, - digest, - } - } - - fn validate(&self) -> Result<()> { - if !self.auto_detect && (!(512..=0x10000).contains(&self.size) || self.size % 128 != 0) { - return Err(eother!(format!("invalid size {} of blob ToC", self.size))); - } - - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::factory::BlobFactory; - use nydus_api::{BackendConfigV2, LocalFsConfig}; - use vmm_sys_util::tempfile::TempFile; - - #[test] - fn test_read_toc_list() { - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let path = Path::new(root_dir).join("../tests/texture/toc"); - let id = "2fa78cad554b75ac91a4a125ed148d0ddeb25efa4aaa8bd80e5dc292690a4dca"; - let digest = RafsDigest { - data: [ - 79u8, 223, 187, 54, 239, 116, 163, 198, 58, 40, 226, 171, 175, 165, 64, 68, 199, - 89, 65, 85, 190, 182, 221, 173, 159, 54, 130, 92, 254, 88, 40, 108, - ], - }; - let config = BackendConfigV2 { - backend_type: "localfs".to_string(), - localfs: Some(LocalFsConfig { - blob_file: "".to_string(), - dir: path.to_str().unwrap().to_string(), - alt_dirs: vec![], - }), - localdisk: None, - oss: None, - registry: None, - s3: None, - http_proxy: None, - }; - let blob_mgr = BlobFactory::new_backend(&config, id).unwrap(); - let blob = blob_mgr.get_reader(id).unwrap(); - let location = TocLocation::with_digest(9010, 1024, digest); - let mut list = - TocEntryList::read_from_blob::(blob.as_ref(), None, &location).unwrap(); - assert_eq!(list.entries.len(), 4); - - assert!(list.get_entry(TOC_ENTRY_BLOB_RAW).is_some()); - assert!(list.get_entry(TOC_ENTRY_BOOTSTRAP).is_some()); - assert!(list.get_entry(TOC_ENTRY_BLOB_META).is_some()); - assert!(list.get_entry(TOC_ENTRY_BLOB_META_HEADER).is_some()); - - let mut buf = Vec::new(); - let entry = list.get_entry(TOC_ENTRY_BLOB_META).unwrap(); - assert_eq!(entry.uncompressed_size(), 0x30); - entry.extract_from_reader(blob.clone(), &mut buf).unwrap(); - assert!(!buf.is_empty()); - - let mut buf = Vec::new(); - let entry = list.get_entry(TOC_ENTRY_BLOB_META_HEADER).unwrap(); - assert_eq!(entry.uncompressed_size(), 0x1000); - entry.extract_from_reader(blob.clone(), &mut buf).unwrap(); - assert!(!buf.is_empty()); - - assert!(list - .add( - TOC_ENTRY_BLOB_DIGEST, - compress::Algorithm::Lz4Block, - digest, - 0, - 2, - 3 - ) - .is_ok()); - assert!(list.get_entry(TOC_ENTRY_BLOB_DIGEST).is_some()); - } - - #[test] - fn test_parse_toc_list() { - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let path = Path::new(root_dir).join("../tests/texture/toc"); - let id = "2fa78cad554b75ac91a4a125ed148d0ddeb25efa4aaa8bd80e5dc292690a4dca"; - let mut digest = RafsDigest { - data: [ - 79u8, 223, 187, 54, 239, 116, 163, 198, 58, 40, 226, 171, 175, 165, 64, 68, 199, - 89, 65, 85, 190, 182, 221, 173, 159, 54, 130, 92, 254, 88, 40, 108, - ], - }; - let config = BackendConfigV2 { - backend_type: "localfs".to_string(), - localfs: Some(LocalFsConfig { - blob_file: "".to_string(), - dir: path.to_str().unwrap().to_string(), - alt_dirs: vec![], - }), - oss: None, - registry: None, - s3: None, - http_proxy: None, - localdisk: None, - }; - let blob_mgr = BlobFactory::new_backend(&config, id).unwrap(); - let blob = blob_mgr.get_reader(id).unwrap(); - - digest.data[0] = 0; - let location = TocLocation::with_digest(9010, 1024, digest); - assert!(TocEntryList::read_from_blob::(blob.as_ref(), None, &location).is_err()); - digest.data[0] = 79u8; - - let location = TocLocation::new(9000, 1024); - assert!(TocEntryList::read_from_blob::(blob.as_ref(), None, &location).is_err()); - - let location = Default::default(); - let list = - TocEntryList::read_from_blob::(blob.as_ref(), None, &location).unwrap(); - assert_eq!(list.entries.len(), 4); - } - - #[test] - fn test_read_from_cache_file() { - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let path = Path::new(root_dir).join("../tests/texture/toc"); - let id = "2fa78cad554b75ac91a4a125ed148d0ddeb25efa4aaa8bd80e5dc292690a4dca"; - let config = BackendConfigV2 { - backend_type: "localfs".to_string(), - localfs: Some(LocalFsConfig { - blob_file: "".to_string(), - dir: path.to_str().unwrap().to_string(), - alt_dirs: vec![], - }), - oss: None, - registry: None, - s3: None, - localdisk: None, - http_proxy: None, - }; - let blob_mgr = BlobFactory::new_backend(&config, id).unwrap(); - let blob = blob_mgr.get_reader(id).unwrap(); - - let tempfile = TempFile::new().unwrap(); - let path = tempfile.as_path().to_path_buf(); - let mut file = tempfile.into_file(); - file.write_all(&[0u8; 32]).unwrap(); - - let location = Default::default(); - let list = TocEntryList::read_from_cache_file(&path, blob.as_ref(), &location).unwrap(); - assert_eq!(list.entries.len(), 4); - assert_eq!(path.metadata().unwrap().len(), 0x1000); - let list = TocEntryList::read_from_cache_file(&path, blob.as_ref(), &location).unwrap(); - assert_eq!(list.entries.len(), 4); - - list.extract_from_blob(blob.clone(), Some(path.as_path()), None) - .unwrap(); - assert_eq!(path.metadata().unwrap().len(), 20480); - list.extract_from_blob(blob.clone(), Some(path.as_path()), None) - .unwrap(); - assert_eq!(path.metadata().unwrap().len(), 20480); - } - - #[test] - fn test_toc_entry_flags() { - let flags = TocEntryFlags::try_from(compress::Algorithm::None).unwrap(); - assert_eq!(flags, TocEntryFlags::COMPRESSION_NONE); - let flags = TocEntryFlags::try_from(compress::Algorithm::Lz4Block).unwrap(); - assert_eq!(flags, TocEntryFlags::COMPRESSION_LZ4_BLOCK); - let flags = TocEntryFlags::try_from(compress::Algorithm::Zstd).unwrap(); - assert_eq!(flags, TocEntryFlags::COMPRESSION_ZSTD); - let _e = TocEntryFlags::try_from(compress::Algorithm::GZip).unwrap_err(); - } - - fn extract_from_buf_with_different_flags(entry: &TocEntry, buf: &[u8]) -> Result { - let tmp_file = TempFile::new(); - let mut file = OpenOptions::new() - .write(true) - .read(true) - .open(tmp_file.unwrap().as_path()) - .unwrap(); - - entry.extract_from_buf(&buf, &mut file)?; - - let mut hasher = RafsDigest::hasher(digest::Algorithm::Sha256); - let mut buffer = [0; 1024]; - loop { - let count = file.read(&mut buffer)?; - if count == 0 { - break; - } - hasher.digest_update(&buffer[..count]); - } - Ok(hasher.digest_finalize().into()) - } - - #[test] - fn test_extract_from_buf() { - let mut entry = TocEntry { - flags: 0, - reserved1: 0, - name: [0u8; 16], - uncompressed_digest: [ - 45, 15, 227, 154, 167, 87, 190, 28, 152, 93, 55, 27, 96, 217, 56, 121, 96, 131, - 226, 94, 70, 74, 193, 156, 222, 228, 46, 156, 49, 169, 143, 53, - ], - compressed_offset: 0, - compressed_size: 0, - uncompressed_size: 0, - reserved2: [0u8; 48], - }; - - let buf = [ - 79u8, 223, 187, 54, 239, 116, 163, 198, 58, 40, 226, 171, 175, 165, 64, 68, 199, 89, - 65, 85, 190, 182, 221, 173, 159, 54, 130, 92, 254, 88, 40, 108, - ]; - - entry.flags = TocEntryFlags::COMPRESSION_LZ4_BLOCK.bits(); - assert!(extract_from_buf_with_different_flags(&entry, &buf).is_err()); - - entry.flags = (!TocEntryFlags::empty()).bits() + 1; - assert!(extract_from_buf_with_different_flags(&entry, &buf).is_err()); - - entry.flags = TocEntryFlags::COMPRESSION_NONE.bits(); - assert!(extract_from_buf_with_different_flags(&entry, &buf).is_err()); - entry.uncompressed_size = 32; - let s = extract_from_buf_with_different_flags(&entry, &buf); - assert!(s.is_ok()); - assert_eq!( - s.unwrap(), - String::from("e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855") - ); - - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let path = Path::new(root_dir) - .join("../tests/texture/zstd") - .join("2fa78cad554b75ac91a4a125ed148d0ddeb25efa4aaa8bd80e5dc292690a4dca.zst"); - let mut file = OpenOptions::new().read(true).open(path.as_path()).unwrap(); - let mut buffer = [0; 1024]; - let mut buf = vec![]; - loop { - let count = file.read(&mut buffer).unwrap(); - if count == 0 { - break; - } - buf.extend_from_slice(&buffer[..count]); - } - entry.flags = TocEntryFlags::COMPRESSION_ZSTD.bits(); - entry.uncompressed_size = 10034; - assert!(extract_from_buf_with_different_flags(&entry, &buf).is_err()); - entry.uncompressed_digest = [ - 47, 167, 140, 173, 85, 75, 117, 172, 145, 164, 161, 37, 237, 20, 141, 13, 222, 178, 94, - 250, 74, 170, 139, 216, 14, 93, 194, 146, 105, 10, 77, 202, - ]; - let s = extract_from_buf_with_different_flags(&entry, &buf); - assert!(s.is_ok()); - assert_eq!( - s.unwrap(), - "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855".to_owned() - ); - } -} +// Copyright 2022 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Rafs filesystem TOC entry layout and data structures. + +use std::convert::{TryFrom, TryInto}; +use std::fs::{self, File, OpenOptions}; +use std::io::{Error, ErrorKind, Read, Result, Write}; +use std::mem::size_of; +use std::path::{Path, PathBuf}; +use std::slice; +use std::sync::Arc; + +use nydus_api::ConfigV2; +use nydus_utils::compress::{self, Decoder}; +use nydus_utils::digest::{self, DigestHasher, RafsDigest}; +use serde::Serialize; +use tar::{EntryType, Header}; + +use crate::backend::{BlobBufReader, BlobReader}; +use crate::factory::BlobFactory; +use crate::utils::alloc_buf; + +/// File name for RAFS data chunks. +pub const TOC_ENTRY_BLOB_RAW: &str = "image.blob"; +/// File name for RAFS meta/bootstrap. +pub const TOC_ENTRY_BOOTSTRAP: &str = "image.boot"; +/// File name for RAFS blob compression context table. +pub const TOC_ENTRY_BLOB_META: &str = "blob.meta"; +/// File name for RAFS blob compression context table header. +pub const TOC_ENTRY_BLOB_META_HEADER: &str = "blob.meta.header"; +/// File name for RAFS chunk digest table. +pub const TOC_ENTRY_BLOB_DIGEST: &str = "blob.digest"; +/// File name for RAFS blob ToC table. +pub const TOC_ENTRY_BLOB_TOC: &str = "rafs.blob.toc"; + +bitflags! { + #[derive(Serialize)] + /// Feature flags for ToC entry. + pub struct TocEntryFlags: u32 { + /// Entry data is not compressed. + const COMPRESSION_NONE = 0x0001; + /// Entry data is compressed with zstd. + const COMPRESSION_ZSTD = 0x0002; + /// Entry data is compressed with lz4. + const COMPRESSION_LZ4_BLOCK = 0x0004; + /// Bit mask for compression algorithms. + const COMPRESSION_MASK = 0x000f; + } +} + +impl TryFrom for TocEntryFlags { + type Error = Error; + + fn try_from(c: compress::Algorithm) -> std::result::Result { + match c { + compress::Algorithm::None => Ok(Self::COMPRESSION_NONE), + compress::Algorithm::Zstd => Ok(Self::COMPRESSION_ZSTD), + compress::Algorithm::Lz4Block => Ok(Self::COMPRESSION_LZ4_BLOCK), + _ => Err(eother!(format!("unsupported compressor {}", c,))), + } + } +} + +/// Blob ToC entry on-disk format, 128 bytes. +/// +/// The structure is designed to seek ToC data with the `name` field. +#[repr(C)] +#[derive(Clone, Copy)] +pub struct TocEntry { + /// Possible values: COMPRESSOR + flags: u32, + reserved1: u32, + /// Name of entry file + name: [u8; 16], + /// Sha256 of uncompressed data + uncompressed_digest: [u8; 32], + /// Offset of compressed data + compressed_offset: u64, + /// Size of compressed data + compressed_size: u64, + /// Size of uncompressed data + uncompressed_size: u64, + reserved2: [u8; 48], +} + +impl Default for TocEntry { + fn default() -> Self { + TocEntry { + flags: 0, + reserved1: 0, + name: [0u8; 16], + uncompressed_digest: [0u8; 32], + compressed_offset: 0, + compressed_size: 0, + uncompressed_size: 0, + reserved2: [0u8; 48], + } + } +} + +impl TocEntry { + /// Get ToC entry name. + pub fn name(&self) -> Result { + String::from_utf8(self.name.to_vec()) + .map(|v| v.trim_end_matches('\0').to_string()) + .map_err(|_e| eother!(format!("failed to get ToC entry name"))) + } + + /// Get digest of uncompressed content. + pub fn uncompressed_digest(&self) -> RafsDigest { + RafsDigest { + data: self.uncompressed_digest, + } + } + + /// Get size of uncompressed content. + pub fn uncompressed_size(&self) -> u64 { + self.uncompressed_size + } + + /// Get offset of compressed content. + pub fn compressed_offset(&self) -> u64 { + self.compressed_offset + } + + /// Get size of compressed content. + pub fn compressed_size(&self) -> u64 { + self.compressed_size + } + + /// Get compression algorithm to process entry data. + pub fn compressor(&self) -> Result { + let flags = TocEntryFlags::from_bits(self.flags) + .ok_or_else(|| einval!("unknown compression algorithm for TOC entry"))?; + let algo = match flags & TocEntryFlags::COMPRESSION_MASK { + TocEntryFlags::COMPRESSION_ZSTD => compress::Algorithm::Zstd, + TocEntryFlags::COMPRESSION_LZ4_BLOCK => compress::Algorithm::Lz4Block, + TocEntryFlags::COMPRESSION_NONE => compress::Algorithm::None, + _ => return Err(einval!("unknown compression algorithm for TOC entry")), + }; + Ok(algo) + } + + /// Set compression algorithm to process entry data. + pub fn set_compressor(&mut self, compressor: compress::Algorithm) -> Result<()> { + let c: TocEntryFlags = compressor.try_into()?; + + self.flags &= !TocEntryFlags::COMPRESSION_MASK.bits(); + self.flags |= c.bits(); + + Ok(()) + } + + /// Extract entry data from a `BlobReader` into a writer. + pub fn extract_from_reader( + &self, + reader: Arc, + writer: &mut W, + ) -> Result<()> { + let mut hasher = digest::RafsDigest::hasher(digest::Algorithm::Sha256); + let mut count = 0; + let buf_size = std::cmp::min(0x1000000u64, self.compressed_size) as usize; + let mut buf_reader = BlobBufReader::new( + buf_size, + reader, + self.compressed_offset, + self.compressed_size, + ); + + if self.flags & TocEntryFlags::COMPRESSION_ZSTD.bits() != 0 { + let mut decoder = Decoder::new(buf_reader, compress::Algorithm::Zstd) + .map_err(|_| eother!("failed to create decoder"))?; + let mut buf = alloc_buf(0x40000); + loop { + let sz = decoder + .read(&mut buf) + .map_err(|e| eother!(format!("failed to decompress data, {}", e)))?; + if sz == 0 { + break; + } + hasher.digest_update(&buf[..sz]); + writer + .write_all(&buf[..sz]) + .map_err(|e| eother!(format!("failed to write decompressed data, {}", e)))?; + count += sz as u64; + } + } else if self.flags & TocEntryFlags::COMPRESSION_LZ4_BLOCK.bits() != 0 { + return Err(eother!("unsupported compression algorithm lz4_block.")); + } else if self.flags & TocEntryFlags::COMPRESSION_NONE.bits() != 0 { + let mut buf = alloc_buf(0x40000); + loop { + let sz = buf_reader + .read(&mut buf) + .map_err(|e| eother!(format!("failed to decompress data, {}", e)))?; + if sz == 0 { + break; + } + hasher.digest_update(&buf[..sz]); + writer + .write_all(&buf[..sz]) + .map_err(|e| eother!(format!("failed to write decompressed data, {}", e)))?; + count += sz as u64; + } + } else { + return Err(eother!("unsupported compression algorithm.")); + } + + if count != self.uncompressed_size { + return Err(eother!(format!( + "size of decompressed content doesn't match, expect {}, got {}", + self.uncompressed_size, count, + ))); + } + let digest = hasher.digest_finalize(); + if digest.data != self.uncompressed_digest + && self.uncompressed_digest != RafsDigest::default().data + { + return Err(eother!("digest of decompressed content doesn't match")); + } + + Ok(()) + } + + /// Extract entry data from a data buffer into a writer. + pub fn extract_from_buf(&self, buf: &[u8], writer: &mut W) -> Result<()> { + let mut hasher = digest::RafsDigest::hasher(digest::Algorithm::Sha256); + let mut count = 0; + + if self.flags & TocEntryFlags::COMPRESSION_ZSTD.bits() != 0 { + let mut decoder = Decoder::new(buf, compress::Algorithm::Zstd) + .map_err(|_| eother!("failed to create decoder"))?; + let mut buf = alloc_buf(0x40000); + loop { + let sz = decoder + .read(&mut buf) + .map_err(|e| eother!(format!("failed to decompress data, {}", e)))?; + if sz == 0 { + break; + } + hasher.digest_update(&buf[..sz]); + writer + .write_all(&buf[..sz]) + .map_err(|e| eother!(format!("failed to write decompressed data, {}", e)))?; + count += sz as u64; + } + } else if self.flags & TocEntryFlags::COMPRESSION_LZ4_BLOCK.bits() != 0 { + return Err(eother!("unsupported compression algorithm lz4_block.")); + } else if self.flags & TocEntryFlags::COMPRESSION_NONE.bits() != 0 { + hasher.digest_update(buf); + writer + .write_all(buf) + .map_err(|e| eother!(format!("failed to write decompressed data, {}", e)))?; + count = buf.len() as u64; + } else { + return Err(eother!("unsupported compression algorithm.")); + } + + if count != self.uncompressed_size { + return Err(eother!(format!( + "size of decompressed content doesn't match, expect {}, got {}", + self.uncompressed_size, count, + ))); + } + let digest = hasher.digest_finalize(); + if digest.data != self.uncompressed_digest { + return Err(eother!("digest of decompressed content doesn't match")); + } + + Ok(()) + } +} + +/// Container to host a group of ToC entries. +pub struct TocEntryList { + entries: Vec, + toc_digest: RafsDigest, + toc_size: u32, +} + +impl Default for TocEntryList { + fn default() -> Self { + Self::new() + } +} + +impl TocEntryList { + /// Create a new instance of [TocEntryList]. + pub fn new() -> Self { + Self { + entries: Vec::new(), + toc_digest: RafsDigest::default(), + toc_size: 0, + } + } + + /// Add a ToC entry into the list. + pub fn add( + &mut self, + name: &str, + compressor: compress::Algorithm, + uncompressed_digest: RafsDigest, + compressed_offset: u64, + compressed_size: u64, + uncompressed_size: u64, + ) -> Result<&mut TocEntry> { + let name_size = name.as_bytes().len(); + if name_size > 16 { + return Err(eother!(format!("invalid entry name length {}", name_size))); + } + + let last = self.entries.len(); + let target = &mut [0u8; 16]; + target[..name_size].clone_from_slice(name.as_bytes()); + let mut entry = TocEntry { + flags: 0, + reserved1: 0, + name: *target, + uncompressed_digest: uncompressed_digest.data, + compressed_offset, + compressed_size, + uncompressed_size, + reserved2: [0u8; 48], + }; + entry.set_compressor(compressor)?; + self.entries.push(entry); + + Ok(&mut self.entries[last]) + } + + /// Convert object to a byte slice. + pub fn as_bytes(&self) -> &[u8] { + let (_, data, _) = unsafe { self.entries.align_to::() }; + data + } + + /// Get ToC entry with specified name. + pub fn get_entry(&self, name: &str) -> Option<&TocEntry> { + for toc in self.entries.iter() { + if let Ok(n) = toc.name() { + if n == name { + return Some(toc); + } + } + } + + None + } + + /// Get digest of ToC content. + pub fn toc_digest(&self) -> &RafsDigest { + &self.toc_digest + } + + /// Get size of ToC content. + pub fn toc_size(&self) -> u32 { + self.toc_size + } + + /// Read a [TocEntryList] from a [BlobReader]. + pub fn read_from_blob( + reader: &dyn BlobReader, + cache_file: Option<&mut W>, + location: &TocLocation, + ) -> Result { + let (buf, _) = Self::read_toc_header(reader, location)?; + if let Some(writer) = cache_file { + writer.write_all(&buf)?; + } + Self::parse_toc_header(&buf, location) + } + + /// Read a [TocEntryList] from cache file, and fallback to storage backend. + pub fn read_from_cache_file>( + path: P, + reader: &dyn BlobReader, + location: &TocLocation, + ) -> Result { + location.validate()?; + + if let Ok(mut file) = OpenOptions::new().read(true).open(path.as_ref()) { + let md = file.metadata()?; + let size = md.len(); + if size > 512 && size % 128 == 0 && md.len() <= 0x1000 { + let mut buf = alloc_buf(size as usize); + file.read_exact(&mut buf) + .map_err(|e| eother!(format!("failed to read ToC from cache, {}", e)))?; + if let Ok(toc) = Self::parse_toc_header(&buf, location) { + return Ok(toc); + } + } + } + + let p = path + .as_ref() + .to_path_buf() + .with_extension("toc_downloading"); + if let Ok(mut file) = OpenOptions::new() + .create(true) + .write(true) + .truncate(true) + .open(p.as_path()) + { + match Self::read_from_blob(reader, Some(&mut file), location) { + Ok(v) => { + let _ = fs::rename(p, path.as_ref()); + Ok(v) + } + Err(e) => { + let _ = fs::remove_file(p); + Err(e) + } + } + } else { + Self::read_from_blob::(reader, None, location) + } + } + + fn read_toc_header(reader: &dyn BlobReader, location: &TocLocation) -> Result<(Vec, u64)> { + location.validate()?; + let (offset, size) = if location.auto_detect { + let blob_size = reader + .blob_size() + .map_err(|e| eio!(format!("failed to get blob size, {}", e)))?; + let size = if blob_size > 0x1000 { + 0x1000 + } else { + blob_size >> 7 << 7 + }; + (blob_size - size, size) + } else { + (location.offset, location.size) + }; + + let size = size as usize; + let mut buf = alloc_buf(size); + let sz = reader + .read(&mut buf, offset) + .map_err(|e| eother!(format!("failed to read ToC from backend, {}", e)))?; + if sz != size { + return Err(eother!(format!( + "failed to read ToC from backend, expect {}, got {} bytes", + size, sz + ))); + } + + Ok((buf, offset + 0x1000)) + } + + fn parse_toc_header(buf: &[u8], location: &TocLocation) -> Result { + if buf.len() < 512 { + return Err(Error::new( + ErrorKind::InvalidData, + format!("blob ToC size {} is too small", buf.len()), + )); + } + let size = buf.len() - 512; + let header = Header::from_byte_slice(&buf[size..]); + let entry_type = header.entry_type(); + if entry_type != EntryType::Regular { + return Err(Error::new( + ErrorKind::Other, + "Tar entry type for ToC is not a regular file", + )); + } + let entry_size = header.entry_size().map_err(|_| { + Error::new(ErrorKind::Other, "failed to get entry size from tar header") + })?; + if entry_size > size as u64 { + return Err(Error::new( + ErrorKind::Other, + format!( + "invalid toc entry size in tar header, expect {}, got {}", + size, entry_size + ), + )); + } + let name = header.path().map_err(|_| { + Error::new( + ErrorKind::Other, + "failed to get ToC file name from tar header", + ) + })?; + if name != Path::new(TOC_ENTRY_BLOB_TOC) { + return Err(Error::new( + ErrorKind::Other, + format!( + "ToC file name from tar header doesn't match, {}", + name.display() + ), + )); + } + let _header = header + .as_gnu() + .ok_or_else(|| Error::new(ErrorKind::Other, "invalid GNU tar header for ToC"))?; + + let mut pos = size - entry_size as usize; + let mut list = TocEntryList::new(); + list.toc_digest = digest::RafsDigest::from_buf(&buf[pos..], digest::Algorithm::Sha256); + list.toc_size = (entry_size + 512) as u32; + if location.validate_digest && list.toc_digest != location.digest { + return Err(eother!(format!( + "toc content digest value doesn't match, expect {:?}, got {:?}", + location.digest.data, list.toc_digest.data + ))); + } + + while pos < size { + let mut entry = TocEntry::default(); + let s = unsafe { + slice::from_raw_parts_mut(&mut entry as *mut _ as *mut u8, size_of::()) + }; + s.copy_from_slice(&buf[pos..pos + size_of::()]); + list.entries.push(entry); + pos += size_of::(); + } + + Ok(list) + } + + /// Extract `image.boot` and/or `blob.digest` from a [BlobReader] into files. + pub fn extract_from_blob>( + &self, + reader: Arc, + bootstrap: Option

, + digest: Option

, + ) -> Result<()> { + if let Some(path) = bootstrap { + let bootstrap = self + .get_entry(TOC_ENTRY_BOOTSTRAP) + .ok_or_else(|| enoent!("`image.boot` doesn't exist in the ToC list"))?; + let compressor = bootstrap.compressor()?; + if compressor == compress::Algorithm::None + && bootstrap.compressed_size() != bootstrap.uncompressed_size() + { + return Err(einval!("invalid ToC entry for `image.boot`")); + } + + let mut ready = false; + if path.as_ref().exists() { + let mut file = OpenOptions::new().read(true).open(path.as_ref())?; + let digest = RafsDigest::from_reader(&mut file, digest::Algorithm::Sha256)?; + if digest.data == bootstrap.uncompressed_digest { + ready = true; + } + } + if !ready { + let p = path + .as_ref() + .to_path_buf() + .with_extension("toc_downloading"); + let mut file = OpenOptions::new() + .create(true) + .write(true) + .truncate(true) + .open(p.as_path())?; + bootstrap + .extract_from_reader(reader.clone(), &mut file) + .map_err(|e| { + let _ = fs::remove_file(&p); + e + })?; + fs::rename(&p, path).map_err(|e| { + let _ = fs::remove_file(&p); + e + })?; + } + } + + if let Some(path) = digest { + let cda = self + .get_entry(TOC_ENTRY_BLOB_DIGEST) + .ok_or_else(|| enoent!("`blob.digest` doesn't exist in the ToC list"))?; + let compressor = cda.compressor()?; + if compressor == compress::Algorithm::None + && cda.compressed_size() != cda.uncompressed_size() + { + return Err(einval!("invalid ToC entry for `blob.digest`")); + } + + let mut ready = false; + if path.as_ref().exists() { + let mut file = OpenOptions::new().read(true).open(path.as_ref())?; + let digest = RafsDigest::from_reader(&mut file, digest::Algorithm::Sha256)?; + if digest.data == cda.uncompressed_digest { + ready = true; + } + } + if !ready { + let p = path + .as_ref() + .to_path_buf() + .with_extension("toc_downloading"); + let mut file = OpenOptions::new() + .create(true) + .write(true) + .truncate(true) + .open(p.as_path())?; + cda.extract_from_reader(reader.clone(), &mut file) + .map_err(|e| { + let _ = fs::remove_file(&p); + e + })?; + fs::rename(&p, path).map_err(|e| { + let _ = fs::remove_file(&p); + e + })?; + } + } + + Ok(()) + } + + /// Extract inlined RAFS metadata from data blobs. + pub fn extract_rafs_meta(id: &str, config: Arc) -> Result { + let backend_config = config.get_backend_config()?; + let workdir = config.get_cache_working_directory()?; + let path = PathBuf::from(workdir); + if !path.is_dir() { + return Err(Error::new( + ErrorKind::NotFound, + "invalid cache working directory", + )); + } + let path = path.join(id).with_extension(TOC_ENTRY_BOOTSTRAP); + + let blob_mgr = BlobFactory::new_backend(backend_config, "extract_rafs_meta")?; + let reader = blob_mgr + .get_reader(id) + .map_err(|e| eother!(format!("failed to get reader for blob {}, {}", id, e)))?; + let location = TocLocation::default(); + let (buf, blob_size) = Self::read_toc_header(reader.as_ref(), &location)?; + + if let Ok(toc) = Self::parse_toc_header(&buf, &location) { + toc.extract_from_blob(reader, Some(path.clone()), None)?; + } else { + if buf.len() < 512 { + return Err(einval!(format!("blob ToC size {} is too small", buf.len()))); + } + let header = Header::from_byte_slice(&buf[buf.len() - 512..]); + let entry_type = header.entry_type(); + if entry_type != EntryType::Regular { + return Err(eother!( + "Tar entry type for `image.boot` is not a regular file" + )); + } + let name = header + .path() + .map_err(|_| eother!("failed to get `image.boot` file name from tar header"))?; + if name != Path::new(TOC_ENTRY_BOOTSTRAP) { + return Err(eother!(format!( + "file name from tar header doesn't match `image.boot`, {}", + name.display() + ))); + } + let _header = header + .as_gnu() + .ok_or_else(|| eother!("invalid GNU tar header for ToC"))?; + let entry_size = header + .entry_size() + .map_err(|_| eother!("failed to get entry size from tar header"))?; + if entry_size > blob_size - 512 { + return Err(eother!(format!( + "invalid `image.boot` entry size in tar header, max {}, got {}", + blob_size - 512, + entry_size + ))); + } + let offset = blob_size - 512 - entry_size; + + let mut toc = TocEntryList::new(); + toc.add( + TOC_ENTRY_BOOTSTRAP, + compress::Algorithm::None, + RafsDigest::default(), + offset, + entry_size, + entry_size, + )?; + toc.extract_from_blob(reader, Some(path.clone()), None)?; + } + + Ok(path) + } +} + +/// Information to locate and validate ToC content. +#[derive(Debug)] +pub struct TocLocation { + /// Enable validating digest of the ToC content. + pub validate_digest: bool, + /// Auto detect location of ToC content. + pub auto_detect: bool, + /// Offset of the ToC content in the data blob. + pub offset: u64, + /// Size of the ToC content. + pub size: u64, + /// SHA256 digest of ToC content. + pub digest: RafsDigest, +} + +impl Default for TocLocation { + fn default() -> Self { + TocLocation { + validate_digest: false, + auto_detect: true, + offset: 0, + size: 0, + digest: RafsDigest::default(), + } + } +} + +impl TocLocation { + /// Create a [TocLocation] object with offset and size. + pub fn new(offset: u64, size: u64) -> Self { + TocLocation { + validate_digest: false, + auto_detect: false, + offset, + size, + digest: RafsDigest::default(), + } + } + + /// Create a [TocLocation] object with offset, size and digest. + pub fn with_digest(offset: u64, size: u64, digest: RafsDigest) -> Self { + TocLocation { + validate_digest: true, + auto_detect: false, + offset, + size, + digest, + } + } + + fn validate(&self) -> Result<()> { + if !self.auto_detect && (!(512..=0x10000).contains(&self.size) || self.size % 128 != 0) { + return Err(eother!(format!("invalid size {} of blob ToC", self.size))); + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::factory::BlobFactory; + use nydus_api::{BackendConfigV2, LocalFsConfig}; + use vmm_sys_util::tempfile::TempFile; + + #[test] + fn test_read_toc_list() { + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let path = Path::new(root_dir).join("../tests/texture/toc"); + let id = "2fa78cad554b75ac91a4a125ed148d0ddeb25efa4aaa8bd80e5dc292690a4dca"; + let digest = RafsDigest { + data: [ + 79u8, 223, 187, 54, 239, 116, 163, 198, 58, 40, 226, 171, 175, 165, 64, 68, 199, + 89, 65, 85, 190, 182, 221, 173, 159, 54, 130, 92, 254, 88, 40, 108, + ], + }; + let config = BackendConfigV2 { + backend_type: "localfs".to_string(), + localfs: Some(LocalFsConfig { + blob_file: "".to_string(), + dir: path.to_str().unwrap().to_string(), + alt_dirs: vec![], + }), + localdisk: None, + oss: None, + registry: None, + s3: None, + http_proxy: None, + }; + let blob_mgr = BlobFactory::new_backend(&config, id).unwrap(); + let blob = blob_mgr.get_reader(id).unwrap(); + let location = TocLocation::with_digest(9010, 1024, digest); + let mut list = + TocEntryList::read_from_blob::(blob.as_ref(), None, &location).unwrap(); + assert_eq!(list.entries.len(), 4); + + assert!(list.get_entry(TOC_ENTRY_BLOB_RAW).is_some()); + assert!(list.get_entry(TOC_ENTRY_BOOTSTRAP).is_some()); + assert!(list.get_entry(TOC_ENTRY_BLOB_META).is_some()); + assert!(list.get_entry(TOC_ENTRY_BLOB_META_HEADER).is_some()); + + let mut buf = Vec::new(); + let entry = list.get_entry(TOC_ENTRY_BLOB_META).unwrap(); + assert_eq!(entry.uncompressed_size(), 0x30); + entry.extract_from_reader(blob.clone(), &mut buf).unwrap(); + assert!(!buf.is_empty()); + + let mut buf = Vec::new(); + let entry = list.get_entry(TOC_ENTRY_BLOB_META_HEADER).unwrap(); + assert_eq!(entry.uncompressed_size(), 0x1000); + entry.extract_from_reader(blob.clone(), &mut buf).unwrap(); + assert!(!buf.is_empty()); + + assert!(list + .add( + TOC_ENTRY_BLOB_DIGEST, + compress::Algorithm::Lz4Block, + digest, + 0, + 2, + 3 + ) + .is_ok()); + assert!(list.get_entry(TOC_ENTRY_BLOB_DIGEST).is_some()); + } + + #[test] + fn test_parse_toc_list() { + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let path = Path::new(root_dir).join("../tests/texture/toc"); + let id = "2fa78cad554b75ac91a4a125ed148d0ddeb25efa4aaa8bd80e5dc292690a4dca"; + let mut digest = RafsDigest { + data: [ + 79u8, 223, 187, 54, 239, 116, 163, 198, 58, 40, 226, 171, 175, 165, 64, 68, 199, + 89, 65, 85, 190, 182, 221, 173, 159, 54, 130, 92, 254, 88, 40, 108, + ], + }; + let config = BackendConfigV2 { + backend_type: "localfs".to_string(), + localfs: Some(LocalFsConfig { + blob_file: "".to_string(), + dir: path.to_str().unwrap().to_string(), + alt_dirs: vec![], + }), + oss: None, + registry: None, + s3: None, + http_proxy: None, + localdisk: None, + }; + let blob_mgr = BlobFactory::new_backend(&config, id).unwrap(); + let blob = blob_mgr.get_reader(id).unwrap(); + + digest.data[0] = 0; + let location = TocLocation::with_digest(9010, 1024, digest); + assert!(TocEntryList::read_from_blob::(blob.as_ref(), None, &location).is_err()); + digest.data[0] = 79u8; + + let location = TocLocation::new(9000, 1024); + assert!(TocEntryList::read_from_blob::(blob.as_ref(), None, &location).is_err()); + + let location = Default::default(); + let list = + TocEntryList::read_from_blob::(blob.as_ref(), None, &location).unwrap(); + assert_eq!(list.entries.len(), 4); + } + + #[test] + fn test_read_from_cache_file() { + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let path = Path::new(root_dir).join("../tests/texture/toc"); + let id = "2fa78cad554b75ac91a4a125ed148d0ddeb25efa4aaa8bd80e5dc292690a4dca"; + let config = BackendConfigV2 { + backend_type: "localfs".to_string(), + localfs: Some(LocalFsConfig { + blob_file: "".to_string(), + dir: path.to_str().unwrap().to_string(), + alt_dirs: vec![], + }), + oss: None, + registry: None, + s3: None, + localdisk: None, + http_proxy: None, + }; + let blob_mgr = BlobFactory::new_backend(&config, id).unwrap(); + let blob = blob_mgr.get_reader(id).unwrap(); + + let tempfile = TempFile::new().unwrap(); + let path = tempfile.as_path().to_path_buf(); + let mut file = tempfile.into_file(); + file.write_all(&[0u8; 32]).unwrap(); + + let location = Default::default(); + let list = TocEntryList::read_from_cache_file(&path, blob.as_ref(), &location).unwrap(); + assert_eq!(list.entries.len(), 4); + assert_eq!(path.metadata().unwrap().len(), 0x1000); + let list = TocEntryList::read_from_cache_file(&path, blob.as_ref(), &location).unwrap(); + assert_eq!(list.entries.len(), 4); + + list.extract_from_blob(blob.clone(), Some(path.as_path()), None) + .unwrap(); + assert_eq!(path.metadata().unwrap().len(), 20480); + list.extract_from_blob(blob.clone(), Some(path.as_path()), None) + .unwrap(); + assert_eq!(path.metadata().unwrap().len(), 20480); + } + + #[test] + fn test_toc_entry_flags() { + let flags = TocEntryFlags::try_from(compress::Algorithm::None).unwrap(); + assert_eq!(flags, TocEntryFlags::COMPRESSION_NONE); + let flags = TocEntryFlags::try_from(compress::Algorithm::Lz4Block).unwrap(); + assert_eq!(flags, TocEntryFlags::COMPRESSION_LZ4_BLOCK); + let flags = TocEntryFlags::try_from(compress::Algorithm::Zstd).unwrap(); + assert_eq!(flags, TocEntryFlags::COMPRESSION_ZSTD); + let _e = TocEntryFlags::try_from(compress::Algorithm::GZip).unwrap_err(); + } + + fn extract_from_buf_with_different_flags(entry: &TocEntry, buf: &[u8]) -> Result { + let tmp_file = TempFile::new(); + let mut file = OpenOptions::new() + .write(true) + .read(true) + .open(tmp_file.unwrap().as_path()) + .unwrap(); + + entry.extract_from_buf(&buf, &mut file)?; + + let mut hasher = RafsDigest::hasher(digest::Algorithm::Sha256); + let mut buffer = [0; 1024]; + loop { + let count = file.read(&mut buffer)?; + if count == 0 { + break; + } + hasher.digest_update(&buffer[..count]); + } + Ok(hasher.digest_finalize().into()) + } + + #[test] + fn test_extract_from_buf() { + let mut entry = TocEntry { + flags: 0, + reserved1: 0, + name: [0u8; 16], + uncompressed_digest: [ + 45, 15, 227, 154, 167, 87, 190, 28, 152, 93, 55, 27, 96, 217, 56, 121, 96, 131, + 226, 94, 70, 74, 193, 156, 222, 228, 46, 156, 49, 169, 143, 53, + ], + compressed_offset: 0, + compressed_size: 0, + uncompressed_size: 0, + reserved2: [0u8; 48], + }; + + let buf = [ + 79u8, 223, 187, 54, 239, 116, 163, 198, 58, 40, 226, 171, 175, 165, 64, 68, 199, 89, + 65, 85, 190, 182, 221, 173, 159, 54, 130, 92, 254, 88, 40, 108, + ]; + + entry.flags = TocEntryFlags::COMPRESSION_LZ4_BLOCK.bits(); + assert!(extract_from_buf_with_different_flags(&entry, &buf).is_err()); + + entry.flags = (!TocEntryFlags::empty()).bits() + 1; + assert!(extract_from_buf_with_different_flags(&entry, &buf).is_err()); + + entry.flags = TocEntryFlags::COMPRESSION_NONE.bits(); + assert!(extract_from_buf_with_different_flags(&entry, &buf).is_err()); + entry.uncompressed_size = 32; + let s = extract_from_buf_with_different_flags(&entry, &buf); + assert!(s.is_ok()); + assert_eq!( + s.unwrap(), + String::from("e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855") + ); + + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let path = Path::new(root_dir) + .join("../tests/texture/zstd") + .join("2fa78cad554b75ac91a4a125ed148d0ddeb25efa4aaa8bd80e5dc292690a4dca.zst"); + let mut file = OpenOptions::new().read(true).open(path.as_path()).unwrap(); + let mut buffer = [0; 1024]; + let mut buf = vec![]; + loop { + let count = file.read(&mut buffer).unwrap(); + if count == 0 { + break; + } + buf.extend_from_slice(&buffer[..count]); + } + entry.flags = TocEntryFlags::COMPRESSION_ZSTD.bits(); + entry.uncompressed_size = 10034; + assert!(extract_from_buf_with_different_flags(&entry, &buf).is_err()); + entry.uncompressed_digest = [ + 47, 167, 140, 173, 85, 75, 117, 172, 145, 164, 161, 37, 237, 20, 141, 13, 222, 178, 94, + 250, 74, 170, 139, 216, 14, 93, 194, 146, 105, 10, 77, 202, + ]; + let s = extract_from_buf_with_different_flags(&entry, &buf); + assert!(s.is_ok()); + assert_eq!( + s.unwrap(), + "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855".to_owned() + ); + } +} diff --git a/storage/src/meta/zran.rs b/storage/src/meta/zran.rs index f954e3b91a4..2cfef8dd545 100644 --- a/storage/src/meta/zran.rs +++ b/storage/src/meta/zran.rs @@ -1,266 +1,266 @@ -// Copyright (C) 2022 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::io::{BufReader, Read, Result}; -use std::mem::size_of; -use std::slice; - -use nydus_utils::compress::zlib_random::{ZranContext, ZranGenerator, ZranReader}; - -use crate::meta::chunk_info_v2::BlobChunkInfoV2Ondisk; -use crate::meta::{round_up_4k, BlobMetaChunkInfo}; -use crate::RAFS_DEFAULT_CHUNK_SIZE; - -/// Context information to support random access to zlib/gzip stream . -#[repr(C, packed)] -pub struct ZranInflateContext { - /// Offset in the original compression data stream. - in_offset: u64, - /// Offset in the uncompressed data stream. - out_offset: u64, - /// Offset into the dictionary table to get the inflate dictionary. - dict_offset: u64, - /// Size of original compressed data. - in_len: u32, - /// Size of uncompressed data. - out_len: u32, - /// Size of inflate dictionary. - dict_size: u32, - /// Optional previous byte in the original compressed data stream, used when `ctx_bits` is non-zero. - ctx_byte: u8, - /// Bits from previous byte to feeds into the inflate context for random access. - ctx_bits: u8, - __reserved1: u8, - __reserved2: u8, -} - -impl ZranInflateContext { - /// Get offset into the compressed stream. - pub fn in_offset(&self) -> u64 { - u64::from_le(self.in_offset) - } - - /// Get size of compressed data. - pub fn in_size(&self) -> u32 { - u32::from_le(self.in_len) - } - - /// Get offset into the decompressed stream. - pub fn out_offset(&self) -> u64 { - u64::from_le(self.out_offset) - } - - /// Get size of the decompressed data. - pub fn out_size(&self) -> u32 { - u32::from_le(self.out_len) - } - - /// Get offset into the dictionary table to fetch associated inflate dictionary. - pub fn dict_offset(&self) -> u64 { - u64::from_le(self.dict_offset) - } - - /// Get size of the associated inflate dictionary. - pub fn dict_size(&self) -> u32 { - u32::from_le(self.dict_size) - } - - /// Get the byte for zlib random decompression. - pub fn ctx_byte(&self) -> u8 { - self.ctx_byte - } - - /// Get the byte for zlib random decompression. - pub fn ctx_bits(&self) -> u8 { - self.ctx_bits - } - - /// Convert to an immutable u8 slice. - pub fn as_slice(&self) -> &[u8] { - unsafe { - slice::from_raw_parts( - self as *const ZranInflateContext as *const u8, - size_of::(), - ) - } - } -} - -impl From<&ZranInflateContext> for ZranContext { - fn from(ctx: &ZranInflateContext) -> Self { - ZranContext { - in_offset: ctx.in_offset(), - out_offset: ctx.out_offset(), - in_len: ctx.in_size(), - out_len: ctx.out_size(), - ctx_byte: ctx.ctx_byte(), - ctx_bits: ctx.ctx_bits(), - dict: vec![], - } - } -} - -/// Struct to generate [ZranInflateContext] objects for zlib/gzip stream. -pub struct ZranContextGenerator { - generator: ZranGenerator, - reader: ZranReader, - uncomp_pos: u64, -} - -impl ZranContextGenerator { - /// Create a new instance of [ZranContextGenerator]. - pub fn new(file: R) -> Result { - let reader = ZranReader::new(file)?; - let mut generator = ZranGenerator::new(reader.clone()); - - generator.set_min_compressed_size(RAFS_DEFAULT_CHUNK_SIZE / 2); - generator.set_max_compressed_size(RAFS_DEFAULT_CHUNK_SIZE); - generator.set_max_uncompressed_size(RAFS_DEFAULT_CHUNK_SIZE * 2); - - Ok(Self { - generator, - reader, - uncomp_pos: 0, - }) - } - - /// Create a new instance of [ZranContextGenerator] from a `BufReader`. - pub fn from_buf_reader(buf_reader: BufReader) -> Result { - let buf = buf_reader.buffer().to_vec(); - let file = buf_reader.into_inner(); - - let reader = ZranReader::new(file)?; - reader.set_initial_data(&buf); - - let mut generator = ZranGenerator::new(reader.clone()); - generator.set_min_compressed_size(RAFS_DEFAULT_CHUNK_SIZE / 2); - generator.set_max_compressed_size(RAFS_DEFAULT_CHUNK_SIZE); - generator.set_max_uncompressed_size(RAFS_DEFAULT_CHUNK_SIZE * 2); - - Ok(Self { - generator, - reader, - uncomp_pos: 0, - }) - } - - /// Get reader to read decompressed data. - pub fn reader(&self) -> ZranReader { - self.reader.clone() - } - - /// Get number of zlib/gzip inflate context entries. - pub fn len(&self) -> usize { - self.generator.get_compression_ctx_array().len() - } - - /// Check whether there's any zlib/gzip inflate context entries. - pub fn is_empty(&self) -> bool { - self.generator.get_compression_ctx_array().is_empty() - } - - /// Begin transaction to generate a data chunk for a file. - pub fn start_chunk(&mut self, chunk_size: u64) -> Result { - self.generator.begin_read(chunk_size) - } - - /// Finish the transaction to generate a data chunk and return the chunk info struct. - pub fn finish_chunk(&mut self) -> Result { - let info = self.generator.end_read()?; - let mut chunk = BlobChunkInfoV2Ondisk::default(); - chunk.set_compressed_offset(info.in_pos); - chunk.set_compressed_size(info.in_len); - chunk.set_uncompressed_offset(self.uncomp_pos); - chunk.set_uncompressed_size(info.ci_len); - chunk.set_zran(true); - chunk.set_zran_index(info.ci_index); - chunk.set_zran_offset(info.ci_offset); - chunk.set_compressed(true); - chunk.set_encrypted(false); - - self.uncomp_pos += round_up_4k(info.ci_len as u64); - - Ok(chunk) - } - - /// Convert all the zlib/gzip random access information to a u8 vector. - pub fn to_vec(&self) -> Result<(Vec, u32)> { - let mut data = Vec::new(); - let records = self.generator.get_compression_ctx_array(); - let mut dict_off = 0; - - for info in records { - let ctx = ZranInflateContext { - in_offset: u64::to_le(info.in_offset), - out_offset: u64::to_le(info.out_offset), - dict_offset: u64::to_le(dict_off), - in_len: u32::to_le(info.in_len), - out_len: u32::to_le(info.out_len), - dict_size: u32::to_le(info.dict.len() as u32), - ctx_byte: info.ctx_byte, - ctx_bits: info.ctx_bits, - __reserved1: 0, - __reserved2: 0, - }; - data.extend_from_slice(ctx.as_slice()); - dict_off += info.dict.len() as u64; - } - for info in records { - if !info.dict.is_empty() { - data.extend_from_slice(&info.dict); - } - } - - Ok((data, records.len() as u32)) - } -} - -impl Read for ZranContextGenerator { - fn read(&mut self, buf: &mut [u8]) -> Result { - self.generator.read(buf) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::fs::OpenOptions; - use std::path::PathBuf; - use tar::{Archive, EntryType}; - - #[test] - fn test_generate_chunk_info() { - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let path = PathBuf::from(root_dir).join("../tests/texture/zran/zran-two-streams.tar.gz"); - let file = OpenOptions::new().read(true).open(path).unwrap(); - - let mut generator = ZranContextGenerator::new(file).unwrap(); - let mut tar = Archive::new(generator.reader()); - tar.set_ignore_zeros(true); - - generator.generator.set_min_compressed_size(1024); - generator.generator.set_max_compressed_size(2048); - generator.generator.set_max_uncompressed_size(4096); - - assert_eq!(generator.len(), 0); - - let entries = tar.entries().unwrap(); - for entry in entries { - let mut entry = entry.unwrap(); - if entry.header().entry_type() == EntryType::Regular { - loop { - let _start = generator.start_chunk(4096).unwrap(); - let mut buf = vec![0u8; 4096]; - let sz = entry.read(&mut buf).unwrap(); - if sz == 0 { - break; - } - let _chunk = generator.finish_chunk().unwrap(); - } - } - } - - assert_eq!(generator.len(), 3); - } -} +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::io::{BufReader, Read, Result}; +use std::mem::size_of; +use std::slice; + +use nydus_utils::compress::zlib_random::{ZranContext, ZranGenerator, ZranReader}; + +use crate::meta::chunk_info_v2::BlobChunkInfoV2Ondisk; +use crate::meta::{round_up_4k, BlobMetaChunkInfo}; +use crate::RAFS_DEFAULT_CHUNK_SIZE; + +/// Context information to support random access to zlib/gzip stream . +#[repr(C, packed)] +pub struct ZranInflateContext { + /// Offset in the original compression data stream. + in_offset: u64, + /// Offset in the uncompressed data stream. + out_offset: u64, + /// Offset into the dictionary table to get the inflate dictionary. + dict_offset: u64, + /// Size of original compressed data. + in_len: u32, + /// Size of uncompressed data. + out_len: u32, + /// Size of inflate dictionary. + dict_size: u32, + /// Optional previous byte in the original compressed data stream, used when `ctx_bits` is non-zero. + ctx_byte: u8, + /// Bits from previous byte to feeds into the inflate context for random access. + ctx_bits: u8, + __reserved1: u8, + __reserved2: u8, +} + +impl ZranInflateContext { + /// Get offset into the compressed stream. + pub fn in_offset(&self) -> u64 { + u64::from_le(self.in_offset) + } + + /// Get size of compressed data. + pub fn in_size(&self) -> u32 { + u32::from_le(self.in_len) + } + + /// Get offset into the decompressed stream. + pub fn out_offset(&self) -> u64 { + u64::from_le(self.out_offset) + } + + /// Get size of the decompressed data. + pub fn out_size(&self) -> u32 { + u32::from_le(self.out_len) + } + + /// Get offset into the dictionary table to fetch associated inflate dictionary. + pub fn dict_offset(&self) -> u64 { + u64::from_le(self.dict_offset) + } + + /// Get size of the associated inflate dictionary. + pub fn dict_size(&self) -> u32 { + u32::from_le(self.dict_size) + } + + /// Get the byte for zlib random decompression. + pub fn ctx_byte(&self) -> u8 { + self.ctx_byte + } + + /// Get the byte for zlib random decompression. + pub fn ctx_bits(&self) -> u8 { + self.ctx_bits + } + + /// Convert to an immutable u8 slice. + pub fn as_slice(&self) -> &[u8] { + unsafe { + slice::from_raw_parts( + self as *const ZranInflateContext as *const u8, + size_of::(), + ) + } + } +} + +impl From<&ZranInflateContext> for ZranContext { + fn from(ctx: &ZranInflateContext) -> Self { + ZranContext { + in_offset: ctx.in_offset(), + out_offset: ctx.out_offset(), + in_len: ctx.in_size(), + out_len: ctx.out_size(), + ctx_byte: ctx.ctx_byte(), + ctx_bits: ctx.ctx_bits(), + dict: vec![], + } + } +} + +/// Struct to generate [ZranInflateContext] objects for zlib/gzip stream. +pub struct ZranContextGenerator { + generator: ZranGenerator, + reader: ZranReader, + uncomp_pos: u64, +} + +impl ZranContextGenerator { + /// Create a new instance of [ZranContextGenerator]. + pub fn new(file: R) -> Result { + let reader = ZranReader::new(file)?; + let mut generator = ZranGenerator::new(reader.clone()); + + generator.set_min_compressed_size(RAFS_DEFAULT_CHUNK_SIZE / 2); + generator.set_max_compressed_size(RAFS_DEFAULT_CHUNK_SIZE); + generator.set_max_uncompressed_size(RAFS_DEFAULT_CHUNK_SIZE * 2); + + Ok(Self { + generator, + reader, + uncomp_pos: 0, + }) + } + + /// Create a new instance of [ZranContextGenerator] from a `BufReader`. + pub fn from_buf_reader(buf_reader: BufReader) -> Result { + let buf = buf_reader.buffer().to_vec(); + let file = buf_reader.into_inner(); + + let reader = ZranReader::new(file)?; + reader.set_initial_data(&buf); + + let mut generator = ZranGenerator::new(reader.clone()); + generator.set_min_compressed_size(RAFS_DEFAULT_CHUNK_SIZE / 2); + generator.set_max_compressed_size(RAFS_DEFAULT_CHUNK_SIZE); + generator.set_max_uncompressed_size(RAFS_DEFAULT_CHUNK_SIZE * 2); + + Ok(Self { + generator, + reader, + uncomp_pos: 0, + }) + } + + /// Get reader to read decompressed data. + pub fn reader(&self) -> ZranReader { + self.reader.clone() + } + + /// Get number of zlib/gzip inflate context entries. + pub fn len(&self) -> usize { + self.generator.get_compression_ctx_array().len() + } + + /// Check whether there's any zlib/gzip inflate context entries. + pub fn is_empty(&self) -> bool { + self.generator.get_compression_ctx_array().is_empty() + } + + /// Begin transaction to generate a data chunk for a file. + pub fn start_chunk(&mut self, chunk_size: u64) -> Result { + self.generator.begin_read(chunk_size) + } + + /// Finish the transaction to generate a data chunk and return the chunk info struct. + pub fn finish_chunk(&mut self) -> Result { + let info = self.generator.end_read()?; + let mut chunk = BlobChunkInfoV2Ondisk::default(); + chunk.set_compressed_offset(info.in_pos); + chunk.set_compressed_size(info.in_len); + chunk.set_uncompressed_offset(self.uncomp_pos); + chunk.set_uncompressed_size(info.ci_len); + chunk.set_zran(true); + chunk.set_zran_index(info.ci_index); + chunk.set_zran_offset(info.ci_offset); + chunk.set_compressed(true); + chunk.set_encrypted(false); + + self.uncomp_pos += round_up_4k(info.ci_len as u64); + + Ok(chunk) + } + + /// Convert all the zlib/gzip random access information to a u8 vector. + pub fn to_vec(&self) -> Result<(Vec, u32)> { + let mut data = Vec::new(); + let records = self.generator.get_compression_ctx_array(); + let mut dict_off = 0; + + for info in records { + let ctx = ZranInflateContext { + in_offset: u64::to_le(info.in_offset), + out_offset: u64::to_le(info.out_offset), + dict_offset: u64::to_le(dict_off), + in_len: u32::to_le(info.in_len), + out_len: u32::to_le(info.out_len), + dict_size: u32::to_le(info.dict.len() as u32), + ctx_byte: info.ctx_byte, + ctx_bits: info.ctx_bits, + __reserved1: 0, + __reserved2: 0, + }; + data.extend_from_slice(ctx.as_slice()); + dict_off += info.dict.len() as u64; + } + for info in records { + if !info.dict.is_empty() { + data.extend_from_slice(&info.dict); + } + } + + Ok((data, records.len() as u32)) + } +} + +impl Read for ZranContextGenerator { + fn read(&mut self, buf: &mut [u8]) -> Result { + self.generator.read(buf) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs::OpenOptions; + use std::path::PathBuf; + use tar::{Archive, EntryType}; + + #[test] + fn test_generate_chunk_info() { + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let path = PathBuf::from(root_dir).join("../tests/texture/zran/zran-two-streams.tar.gz"); + let file = OpenOptions::new().read(true).open(path).unwrap(); + + let mut generator = ZranContextGenerator::new(file).unwrap(); + let mut tar = Archive::new(generator.reader()); + tar.set_ignore_zeros(true); + + generator.generator.set_min_compressed_size(1024); + generator.generator.set_max_compressed_size(2048); + generator.generator.set_max_uncompressed_size(4096); + + assert_eq!(generator.len(), 0); + + let entries = tar.entries().unwrap(); + for entry in entries { + let mut entry = entry.unwrap(); + if entry.header().entry_type() == EntryType::Regular { + loop { + let _start = generator.start_chunk(4096).unwrap(); + let mut buf = vec![0u8; 4096]; + let sz = entry.read(&mut buf).unwrap(); + if sz == 0 { + break; + } + let _chunk = generator.finish_chunk().unwrap(); + } + } + } + + assert_eq!(generator.len(), 3); + } +} diff --git a/storage/src/remote/client.rs b/storage/src/remote/client.rs index 62a20710bd0..0eefc3c6a7c 100644 --- a/storage/src/remote/client.rs +++ b/storage/src/remote/client.rs @@ -1,771 +1,771 @@ -// Copyright (C) 2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::collections::HashMap; -use std::fs::File; -use std::io::Result; -use std::mem; -use std::os::unix::io::{AsRawFd, RawFd}; -use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, Ordering}; -use std::sync::{Arc, Condvar, Mutex, MutexGuard}; -use std::time::{Duration, Instant}; - -use nix::sys::select::{select, FdSet}; -use vm_memory::ByteValued; - -use crate::cache::state::{BlobRangeMap, RangeMap}; -use crate::device::{BlobInfo, BlobIoRange, BlobObject}; -use crate::remote::connection::Endpoint; -use crate::remote::message::{ - FetchRangeReply, FetchRangeRequest, FetchRangeResult, GetBlobReply, GetBlobRequest, HeaderFlag, - MsgHeader, MsgValidator, RequestCode, -}; - -const REQUEST_TIMEOUT_SEC: u64 = 4; -const RANGE_MAP_SHIFT: u64 = 18; -const RANGE_MAP_MASK: u64 = (1 << RANGE_MAP_SHIFT) - 1; - -/// Manager to access and cache blob objects managed by remote blob manager. -/// -/// A `RemoteBlobMgr` object may be used to access services from a remote blob manager, and cache -/// blob information to improve performance. -pub struct RemoteBlobMgr { - remote_blobs: Arc, - server_connection: Arc, - workdir: String, -} - -impl RemoteBlobMgr { - /// Create a new instance of `RemoteBlobMgr`. - pub fn new(workdir: String, sock: &str) -> Result { - let remote_blobs = Arc::new(RemoteBlobs::new()); - let conn = ServerConnection::new(sock, remote_blobs.clone()); - - Ok(RemoteBlobMgr { - remote_blobs, - server_connection: Arc::new(conn), - workdir, - }) - } - - /// Connect to remote blob manager. - pub fn connect(&self) -> Result<()> { - self.server_connection.connect().map(|_| ()) - } - - /// Start to handle communication messages. - pub fn start(&self) -> Result<()> { - ServerConnection::start(self.server_connection.clone()) - } - - /// Shutdown the `RemoteblogMgr` instance. - pub fn shutdown(&self) { - self.server_connection.close(); - self.remote_blobs.reset(); - } - - /// Ping remote blog manager server. - pub fn ping(&self) -> Result<()> { - self.server_connection.call_ping() - } - - /// Get an `BlobObject` trait object to access the specified blob. - pub fn get_blob_object(&self, blob_info: &Arc) -> Result> { - if let Some(blob) = self.remote_blobs.get_blob(blob_info) { - return Ok(blob); - } - - loop { - let (file, base, token) = self.server_connection.call_get_blob(blob_info)?; - let file = Arc::new(file); - let blob = RemoteBlob::new( - blob_info.clone(), - self.server_connection.clone(), - file, - base, - token, - &self.workdir, - )?; - let blob = Arc::new(blob); - if let Some(blob) = self.remote_blobs.add_blob(blob, token) { - return Ok(blob); - } - } - } -} - -struct RemoteBlobs { - generation: AtomicU32, - active_blobs: Mutex>>, -} - -impl RemoteBlobs { - fn new() -> Self { - Self { - generation: AtomicU32::new(1), - active_blobs: Mutex::new(Vec::new()), - } - } - - fn reset(&self) { - self.active_blobs.lock().unwrap().truncate(0); - } - - fn add_blob(&self, blob: Arc, token: u64) -> Option> { - let mut guard = self.active_blobs.lock().unwrap(); - for b in guard.iter() { - if blob.blob_info.blob_id() == b.blob_info.blob_id() { - return Some(b.clone()); - } - } - - if (token >> 32) as u32 == self.get_generation() { - guard.push(blob.clone()); - return Some(blob); - } - - None - } - - fn get_blob(&self, blob_info: &Arc) -> Option> { - let guard = self.active_blobs.lock().unwrap(); - - for blob in guard.iter() { - if blob.blob_info.blob_id() == blob_info.blob_id() { - return Some(blob.clone()); - } - } - - None - } - - fn get_generation(&self) -> u32 { - self.generation.load(Ordering::Acquire) - } - - fn notify_disconnect(&self) { - self.generation.fetch_add(1, Ordering::AcqRel); - for blob in self.active_blobs.lock().unwrap().iter() { - blob.token.store(0, Ordering::Release); - } - } -} - -/// Struct to access and cache blob object managed by remote blob manager. -/// -/// The `RemoteBlob` structure acts as a proxy to access a blob managed by remote blob manager. -/// It has a separate data plane and control plane. A file descriptor will be received from the -/// remote blob manager, so all data access requests will be served by directly access the file -/// descriptor. And a communication channel will be used to communicate control message between -/// the client and the remote blob manager. To improve control plane performance, it may cache -/// blob metadata and chunk map to avoid unnecessary control messages. -struct RemoteBlob { - blob_info: Arc, - conn: Arc, - map: Arc, - file: Arc, - base: u64, - token: AtomicU64, -} - -impl RemoteBlob { - /// Create a new instance of `RemoteBlob`. - fn new( - blob_info: Arc, - conn: Arc, - file: Arc, - base: u64, - token: u64, - work_dir: &str, - ) -> Result { - let blob_path = format!("{}/{}", work_dir, blob_info.blob_id()); - let count = (blob_info.uncompressed_size() + RANGE_MAP_MASK) >> RANGE_MAP_SHIFT; - let map = BlobRangeMap::new(&blob_path, count as u32, RANGE_MAP_SHIFT as u32)?; - debug_assert!(count <= u32::MAX as u64); - - Ok(RemoteBlob { - blob_info, - map: Arc::new(map), - conn, - file, - base, - token: AtomicU64::new(token), - }) - } -} - -impl AsRawFd for RemoteBlob { - fn as_raw_fd(&self) -> RawFd { - self.file.as_raw_fd() - } -} - -impl BlobObject for RemoteBlob { - fn base_offset(&self) -> u64 { - self.base - } - - fn is_all_data_ready(&self) -> bool { - self.map.is_range_all_ready() - } - - fn fetch_range_compressed(&self, _offset: u64, _size: u64) -> Result { - Err(enosys!()) - } - - fn fetch_range_uncompressed(&self, offset: u64, size: u64) -> Result { - match self.map.is_range_ready(offset, size) { - Ok(true) => Ok(0), - _ => self.conn.call_fetch_range(self, offset, size), - } - } - - fn prefetch_chunks(&self, _range: &BlobIoRange) -> Result { - Err(enosys!()) - } -} - -#[derive(Debug, Eq, PartialEq)] -enum RequestStatus { - Waiting, - Reconnect, - Timeout, - Finished, -} - -#[allow(dead_code)] -enum RequestResult { - None, - Reconnect, - Noop, - GetBlob(u32, u64, u64, Option), - FetchRange(u32, u64), -} - -struct Request { - tag: u64, - condvar: Condvar, - state: Mutex<(RequestStatus, RequestResult)>, -} - -impl Request { - fn new(tag: u64) -> Self { - Request { - tag, - condvar: Condvar::new(), - state: Mutex::new((RequestStatus::Waiting, RequestResult::None)), - } - } - - fn wait_for_result(&self) { - let mut guard = self.state.lock().unwrap(); - - while guard.0 == RequestStatus::Waiting { - let res = self - .condvar - .wait_timeout(guard, Duration::from_secs(REQUEST_TIMEOUT_SEC)) - .unwrap(); - let tor = res.1; - - guard = res.0; - if guard.0 == RequestStatus::Finished || guard.0 == RequestStatus::Reconnect { - return; - } else if tor.timed_out() { - guard.0 = RequestStatus::Timeout; - } - } - } - - fn set_result(&self, result: RequestResult) { - let mut guard = self.state.lock().unwrap(); - - match guard.0 { - RequestStatus::Waiting | RequestStatus::Timeout | RequestStatus::Reconnect => { - guard.1 = result; - guard.0 = RequestStatus::Finished; - self.condvar.notify_all(); - } - RequestStatus::Finished => { - debug!("received duplicated reply"); - } - } - } -} - -/// Struct to maintain state for a connection to remote blob manager. -struct ServerConnection { - sock: String, - tag: AtomicU64, - exiting: AtomicBool, - conn: Mutex>, - ready: Condvar, - requests: Mutex>>, - remote_blobs: Arc, -} - -impl ServerConnection { - fn new(sock: &str, remote_blobs: Arc) -> Self { - ServerConnection { - sock: sock.to_owned(), - tag: AtomicU64::new(1), - exiting: AtomicBool::new(false), - conn: Mutex::new(None), - ready: Condvar::new(), - requests: Mutex::new(HashMap::new()), - remote_blobs, - } - } - - fn connect(&self) -> Result { - let mut guard = self.get_connection()?; - if guard.is_some() { - return Ok(false); - } - - match Endpoint::connect(&self.sock) { - Ok(v) => { - *guard = Some(v); - Ok(true) - } - Err(e) => { - error!("cannot connect to remote blob manager, {}", e); - Err(eio!()) - } - } - } - - fn close(&self) { - if !self.exiting.swap(true, Ordering::AcqRel) { - self.disconnect(); - } - } - - fn start(client: Arc) -> Result<()> { - std::thread::spawn(move || loop { - // Ensure connection is ready. - match client.get_connection() { - Ok(guard) => { - if guard.is_none() { - drop(client.ready.wait(guard)); - } else { - drop(guard); - } - } - Err(_) => continue, - } - - let _ = client.handle_reply(); - }); - - Ok(()) - } - - // Only works for single-threaded context. - fn handle_reply(&self) -> Result<()> { - let mut nr; - let mut rfd = FdSet::new(); - let mut efd = FdSet::new(); - - loop { - { - rfd.clear(); - efd.clear(); - match self.get_connection()?.as_ref() { - None => return Err(eio!()), - Some(conn) => { - rfd.insert(conn.as_raw_fd()); - efd.insert(conn.as_raw_fd()); - nr = conn.as_raw_fd() + 1; - } - } - } - let _ = select(nr, Some(&mut rfd), None, Some(&mut efd), None) - .map_err(|e| eother!(format!("{}", e)))?; - - let mut guard = self.get_connection()?; - let (hdr, files) = match guard.as_mut() { - None => return Err(eio!()), - Some(conn) => conn.recv_header().map_err(|_e| eio!())?, - }; - if !hdr.is_valid() { - return Err(einval!()); - } - let body_size = hdr.get_size() as usize; - - match hdr.get_code() { - RequestCode::MaxCommand => return Err(eother!()), - RequestCode::Noop => self.handle_result(hdr.get_tag(), RequestResult::Noop), - RequestCode::GetBlob => { - self.handle_get_blob_reply(guard, &hdr, body_size, files)?; - } - RequestCode::FetchRange => { - self.handle_fetch_range_reply(guard, &hdr, body_size, files)?; - } - } - } - } - - fn call_ping(&self) -> Result<()> { - 'next_iter: loop { - let req = self.create_request(); - let hdr = MsgHeader::new( - req.tag, - RequestCode::Noop, - HeaderFlag::NEED_REPLY.bits(), - 0u32, - ); - let msg = [0u8; 0]; - - self.send_msg(&hdr, &msg)?; - match self.wait_for_result(&req)? { - RequestResult::Noop => return Ok(()), - RequestResult::Reconnect => continue 'next_iter, - _ => return Err(eother!()), - } - } - } - - fn call_get_blob(&self, blob_info: &Arc) -> Result<(File, u64, u64)> { - if blob_info.blob_id().len() >= 256 { - return Err(einval!("blob id is too large")); - } - - 'next_iter: loop { - let req = self.create_request(); - let hdr = MsgHeader::new( - req.tag, - RequestCode::GetBlob, - HeaderFlag::NEED_REPLY.bits(), - std::mem::size_of::() as u32, - ); - let generation = self.remote_blobs.get_generation(); - let msg = GetBlobRequest::new(generation, blob_info.blob_id()); - - self.send_msg(&hdr, &msg)?; - match self.wait_for_result(&req)? { - RequestResult::GetBlob(result, token, base, file) => { - if result != 0 { - return Err(std::io::Error::from_raw_os_error(result as i32)); - } else if (token >> 32) as u32 != self.remote_blobs.get_generation() { - continue 'next_iter; - } else if let Some(file) = file { - return Ok((file, base, token)); - } else { - return Err(einval!()); - } - } - RequestResult::Reconnect => continue 'next_iter, - _ => return Err(eother!()), - } - } - } - - fn call_fetch_range(&self, blob: &RemoteBlob, start: u64, count: u64) -> Result { - 'next_iter: loop { - let token = blob.token.load(Ordering::Acquire); - if (token >> 32) as u32 != self.remote_blobs.get_generation() { - self.reopen_blob(blob)?; - continue 'next_iter; - } - - let req = self.create_request(); - let hdr = MsgHeader::new( - req.tag, - RequestCode::FetchRange, - HeaderFlag::NEED_REPLY.bits(), - std::mem::size_of::() as u32, - ); - let msg = FetchRangeRequest::new(token, start, count); - self.send_msg(&hdr, &msg)?; - match self.wait_for_result(&req)? { - RequestResult::FetchRange(result, size) => { - if result == FetchRangeResult::Success as u32 { - return Ok(size as usize); - } else if result == FetchRangeResult::GenerationMismatch as u32 { - continue 'next_iter; - } else { - return Err(std::io::Error::from_raw_os_error(count as i32)); - } - } - RequestResult::Reconnect => continue 'next_iter, - _ => return Err(eother!()), - } - } - } - - fn reopen_blob(&self, blob: &RemoteBlob) -> Result<()> { - 'next_iter: loop { - let req = self.create_request(); - let hdr = MsgHeader::new( - req.tag, - RequestCode::GetBlob, - HeaderFlag::NEED_REPLY.bits(), - std::mem::size_of::() as u32, - ); - let generation = self.remote_blobs.get_generation(); - let msg = GetBlobRequest::new(generation, blob.blob_info.blob_id()); - - self.send_msg(&hdr, &msg)?; - match self.wait_for_result(&req)? { - RequestResult::GetBlob(result, token, _base, file) => { - if result != 0 { - return Err(std::io::Error::from_raw_os_error(result as i32)); - } else if (token >> 32) as u32 != self.remote_blobs.get_generation() { - continue 'next_iter; - } else if let Some(_file) = file { - blob.token.store(token, Ordering::Release); - return Ok(()); - } else { - return Err(einval!()); - } - } - RequestResult::Reconnect => continue 'next_iter, - _ => return Err(eother!()), - } - } - } - - fn get_next_tag(&self) -> u64 { - self.tag.fetch_add(1, Ordering::AcqRel) - } - - fn create_request(&self) -> Arc { - let tag = self.get_next_tag(); - let request = Arc::new(Request::new(tag)); - - self.requests.lock().unwrap().insert(tag, request.clone()); - - request - } - - fn get_connection(&self) -> Result>> { - if self.exiting.load(Ordering::Relaxed) { - Err(eio!()) - } else { - Ok(self.conn.lock().unwrap()) - } - } - - fn send_msg(&self, hdr: &MsgHeader, msg: &T) -> Result<()> { - if let Ok(mut guard) = self.get_connection() { - if let Some(conn) = guard.as_mut() { - if conn.send_message(hdr, msg, None).is_ok() { - return Ok(()); - } - } - } - - let start = Instant::now(); - self.disconnect(); - loop { - self.reconnect(); - if let Ok(mut guard) = self.get_connection() { - if let Some(conn) = guard.as_mut() { - if conn.send_message(hdr, msg, None).is_ok() { - return Ok(()); - } - } - } - - self.disconnect(); - if let Some(end) = start.checked_add(Duration::from_secs(REQUEST_TIMEOUT_SEC)) { - let now = Instant::now(); - if end < now { - return Err(eio!()); - } - } else { - return Err(eio!()); - } - std::thread::sleep(Duration::from_millis(10)); - } - } - - fn reconnect(&self) { - if let Ok(true) = self.connect() { - let guard = self.requests.lock().unwrap(); - for entry in guard.iter() { - let mut state = entry.1.state.lock().unwrap(); - if state.0 == RequestStatus::Waiting { - state.0 = RequestStatus::Reconnect; - entry.1.condvar.notify_all(); - } - } - } - } - - fn disconnect(&self) { - self.remote_blobs.notify_disconnect(); - - let mut guard = self.conn.lock().unwrap(); - if let Some(conn) = guard.as_mut() { - conn.close(); - } - *guard = None; - } - - fn wait_for_result(&self, request: &Arc) -> Result { - request.wait_for_result(); - - let mut guard = self.requests.lock().unwrap(); - match guard.remove(&request.tag) { - None => Err(enoent!()), - Some(entry) => { - let mut guard2 = entry.state.lock().unwrap(); - match guard2.0 { - RequestStatus::Waiting => panic!("should not happen"), - RequestStatus::Timeout => Err(eio!()), - RequestStatus::Reconnect => Ok(RequestResult::Reconnect), - RequestStatus::Finished => { - let mut val = RequestResult::None; - mem::swap(&mut guard2.1, &mut val); - Ok(val) - } - } - } - } - } - - fn handle_result(&self, tag: u64, result: RequestResult) { - let requests = self.requests.lock().unwrap(); - - match requests.get(&tag) { - None => debug!("no request for tag {} found, may have timed out", tag), - Some(request) => request.set_result(result), - } - } - - fn handle_get_blob_reply( - &self, - mut guard: MutexGuard>, - hdr: &MsgHeader, - body_size: usize, - files: Option>, - ) -> Result<()> { - if body_size != mem::size_of::() { - return Err(einval!()); - } - let (size, data) = match guard.as_mut() { - None => return Err(einval!()), - Some(conn) => conn.recv_data(body_size).map_err(|_e| eio!())?, - }; - if size != body_size { - return Err(eio!()); - } - drop(guard); - - let mut msg = GetBlobReply::new(0, 0, 0); - msg.as_mut_slice().copy_from_slice(&data); - if !msg.is_valid() { - return Err(einval!()); - } else if msg.result != 0 { - self.handle_result( - hdr.get_tag(), - RequestResult::GetBlob(msg.result, msg.token, msg.base, None), - ); - } else { - if files.is_none() { - return Err(einval!()); - } - // Safe because we have just validated files is not none. - let mut files = files.unwrap(); - if files.len() != 1 { - return Err(einval!()); - } - // Safe because we have just validated files[0] is valid. - let file = files.pop().unwrap(); - self.handle_result( - hdr.get_tag(), - RequestResult::GetBlob(msg.result, msg.token, msg.base, Some(file)), - ); - } - - Ok(()) - } - - fn handle_fetch_range_reply( - &self, - mut guard: MutexGuard>, - hdr: &MsgHeader, - body_size: usize, - files: Option>, - ) -> Result<()> { - if body_size != mem::size_of::() || files.is_some() { - return Err(einval!()); - } - let (size, data) = match guard.as_mut() { - None => return Err(einval!()), - Some(conn) => conn.recv_data(body_size).map_err(|_e| eio!())?, - }; - if size != body_size { - return Err(eio!()); - } - drop(guard); - - let mut msg = FetchRangeReply::new(0, 0, 0); - msg.as_mut_slice().copy_from_slice(&data); - if !msg.is_valid() { - return Err(einval!()); - } else { - self.handle_result( - hdr.get_tag(), - RequestResult::FetchRange(msg.result, msg.count), - ); - } - - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_request() { - let req = Arc::new(Request::new(1)); - let req1 = req.clone(); - - assert_eq!(req.tag, 1); - { - let guard = req.state.lock().unwrap(); - assert_eq!(guard.0, RequestStatus::Waiting); - matches!(guard.1, RequestResult::None); - } - - let (sender, receiver) = std::sync::mpsc::channel::(); - std::thread::spawn(move || { - let _ = receiver.recv().unwrap(); - { - let mut guard = req1.state.lock().unwrap(); - guard.0 = RequestStatus::Reconnect; - } - - let _ = receiver.recv().unwrap(); - req1.set_result(RequestResult::Reconnect); - }); - - { - req.wait_for_result(); - let mut guard = req.state.lock().unwrap(); - assert_eq!(guard.0, RequestStatus::Timeout); - guard.0 = RequestStatus::Waiting; - } - - sender.send(true).unwrap(); - { - req.wait_for_result(); - let mut guard = req.state.lock().unwrap(); - assert_eq!(guard.0, RequestStatus::Reconnect); - guard.0 = RequestStatus::Waiting; - } - - sender.send(true).unwrap(); - { - req.wait_for_result(); - let guard = req.state.lock().unwrap(); - assert_eq!(guard.0, RequestStatus::Finished); - matches!(guard.1, RequestResult::Reconnect); - } - } -} +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::collections::HashMap; +use std::fs::File; +use std::io::Result; +use std::mem; +use std::os::unix::io::{AsRawFd, RawFd}; +use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, Ordering}; +use std::sync::{Arc, Condvar, Mutex, MutexGuard}; +use std::time::{Duration, Instant}; + +use nix::sys::select::{select, FdSet}; +use vm_memory::ByteValued; + +use crate::cache::state::{BlobRangeMap, RangeMap}; +use crate::device::{BlobInfo, BlobIoRange, BlobObject}; +use crate::remote::connection::Endpoint; +use crate::remote::message::{ + FetchRangeReply, FetchRangeRequest, FetchRangeResult, GetBlobReply, GetBlobRequest, HeaderFlag, + MsgHeader, MsgValidator, RequestCode, +}; + +const REQUEST_TIMEOUT_SEC: u64 = 4; +const RANGE_MAP_SHIFT: u64 = 18; +const RANGE_MAP_MASK: u64 = (1 << RANGE_MAP_SHIFT) - 1; + +/// Manager to access and cache blob objects managed by remote blob manager. +/// +/// A `RemoteBlobMgr` object may be used to access services from a remote blob manager, and cache +/// blob information to improve performance. +pub struct RemoteBlobMgr { + remote_blobs: Arc, + server_connection: Arc, + workdir: String, +} + +impl RemoteBlobMgr { + /// Create a new instance of `RemoteBlobMgr`. + pub fn new(workdir: String, sock: &str) -> Result { + let remote_blobs = Arc::new(RemoteBlobs::new()); + let conn = ServerConnection::new(sock, remote_blobs.clone()); + + Ok(RemoteBlobMgr { + remote_blobs, + server_connection: Arc::new(conn), + workdir, + }) + } + + /// Connect to remote blob manager. + pub fn connect(&self) -> Result<()> { + self.server_connection.connect().map(|_| ()) + } + + /// Start to handle communication messages. + pub fn start(&self) -> Result<()> { + ServerConnection::start(self.server_connection.clone()) + } + + /// Shutdown the `RemoteblogMgr` instance. + pub fn shutdown(&self) { + self.server_connection.close(); + self.remote_blobs.reset(); + } + + /// Ping remote blog manager server. + pub fn ping(&self) -> Result<()> { + self.server_connection.call_ping() + } + + /// Get an `BlobObject` trait object to access the specified blob. + pub fn get_blob_object(&self, blob_info: &Arc) -> Result> { + if let Some(blob) = self.remote_blobs.get_blob(blob_info) { + return Ok(blob); + } + + loop { + let (file, base, token) = self.server_connection.call_get_blob(blob_info)?; + let file = Arc::new(file); + let blob = RemoteBlob::new( + blob_info.clone(), + self.server_connection.clone(), + file, + base, + token, + &self.workdir, + )?; + let blob = Arc::new(blob); + if let Some(blob) = self.remote_blobs.add_blob(blob, token) { + return Ok(blob); + } + } + } +} + +struct RemoteBlobs { + generation: AtomicU32, + active_blobs: Mutex>>, +} + +impl RemoteBlobs { + fn new() -> Self { + Self { + generation: AtomicU32::new(1), + active_blobs: Mutex::new(Vec::new()), + } + } + + fn reset(&self) { + self.active_blobs.lock().unwrap().truncate(0); + } + + fn add_blob(&self, blob: Arc, token: u64) -> Option> { + let mut guard = self.active_blobs.lock().unwrap(); + for b in guard.iter() { + if blob.blob_info.blob_id() == b.blob_info.blob_id() { + return Some(b.clone()); + } + } + + if (token >> 32) as u32 == self.get_generation() { + guard.push(blob.clone()); + return Some(blob); + } + + None + } + + fn get_blob(&self, blob_info: &Arc) -> Option> { + let guard = self.active_blobs.lock().unwrap(); + + for blob in guard.iter() { + if blob.blob_info.blob_id() == blob_info.blob_id() { + return Some(blob.clone()); + } + } + + None + } + + fn get_generation(&self) -> u32 { + self.generation.load(Ordering::Acquire) + } + + fn notify_disconnect(&self) { + self.generation.fetch_add(1, Ordering::AcqRel); + for blob in self.active_blobs.lock().unwrap().iter() { + blob.token.store(0, Ordering::Release); + } + } +} + +/// Struct to access and cache blob object managed by remote blob manager. +/// +/// The `RemoteBlob` structure acts as a proxy to access a blob managed by remote blob manager. +/// It has a separate data plane and control plane. A file descriptor will be received from the +/// remote blob manager, so all data access requests will be served by directly access the file +/// descriptor. And a communication channel will be used to communicate control message between +/// the client and the remote blob manager. To improve control plane performance, it may cache +/// blob metadata and chunk map to avoid unnecessary control messages. +struct RemoteBlob { + blob_info: Arc, + conn: Arc, + map: Arc, + file: Arc, + base: u64, + token: AtomicU64, +} + +impl RemoteBlob { + /// Create a new instance of `RemoteBlob`. + fn new( + blob_info: Arc, + conn: Arc, + file: Arc, + base: u64, + token: u64, + work_dir: &str, + ) -> Result { + let blob_path = format!("{}/{}", work_dir, blob_info.blob_id()); + let count = (blob_info.uncompressed_size() + RANGE_MAP_MASK) >> RANGE_MAP_SHIFT; + let map = BlobRangeMap::new(&blob_path, count as u32, RANGE_MAP_SHIFT as u32)?; + debug_assert!(count <= u32::MAX as u64); + + Ok(RemoteBlob { + blob_info, + map: Arc::new(map), + conn, + file, + base, + token: AtomicU64::new(token), + }) + } +} + +impl AsRawFd for RemoteBlob { + fn as_raw_fd(&self) -> RawFd { + self.file.as_raw_fd() + } +} + +impl BlobObject for RemoteBlob { + fn base_offset(&self) -> u64 { + self.base + } + + fn is_all_data_ready(&self) -> bool { + self.map.is_range_all_ready() + } + + fn fetch_range_compressed(&self, _offset: u64, _size: u64) -> Result { + Err(enosys!()) + } + + fn fetch_range_uncompressed(&self, offset: u64, size: u64) -> Result { + match self.map.is_range_ready(offset, size) { + Ok(true) => Ok(0), + _ => self.conn.call_fetch_range(self, offset, size), + } + } + + fn prefetch_chunks(&self, _range: &BlobIoRange) -> Result { + Err(enosys!()) + } +} + +#[derive(Debug, Eq, PartialEq)] +enum RequestStatus { + Waiting, + Reconnect, + Timeout, + Finished, +} + +#[allow(dead_code)] +enum RequestResult { + None, + Reconnect, + Noop, + GetBlob(u32, u64, u64, Option), + FetchRange(u32, u64), +} + +struct Request { + tag: u64, + condvar: Condvar, + state: Mutex<(RequestStatus, RequestResult)>, +} + +impl Request { + fn new(tag: u64) -> Self { + Request { + tag, + condvar: Condvar::new(), + state: Mutex::new((RequestStatus::Waiting, RequestResult::None)), + } + } + + fn wait_for_result(&self) { + let mut guard = self.state.lock().unwrap(); + + while guard.0 == RequestStatus::Waiting { + let res = self + .condvar + .wait_timeout(guard, Duration::from_secs(REQUEST_TIMEOUT_SEC)) + .unwrap(); + let tor = res.1; + + guard = res.0; + if guard.0 == RequestStatus::Finished || guard.0 == RequestStatus::Reconnect { + return; + } else if tor.timed_out() { + guard.0 = RequestStatus::Timeout; + } + } + } + + fn set_result(&self, result: RequestResult) { + let mut guard = self.state.lock().unwrap(); + + match guard.0 { + RequestStatus::Waiting | RequestStatus::Timeout | RequestStatus::Reconnect => { + guard.1 = result; + guard.0 = RequestStatus::Finished; + self.condvar.notify_all(); + } + RequestStatus::Finished => { + debug!("received duplicated reply"); + } + } + } +} + +/// Struct to maintain state for a connection to remote blob manager. +struct ServerConnection { + sock: String, + tag: AtomicU64, + exiting: AtomicBool, + conn: Mutex>, + ready: Condvar, + requests: Mutex>>, + remote_blobs: Arc, +} + +impl ServerConnection { + fn new(sock: &str, remote_blobs: Arc) -> Self { + ServerConnection { + sock: sock.to_owned(), + tag: AtomicU64::new(1), + exiting: AtomicBool::new(false), + conn: Mutex::new(None), + ready: Condvar::new(), + requests: Mutex::new(HashMap::new()), + remote_blobs, + } + } + + fn connect(&self) -> Result { + let mut guard = self.get_connection()?; + if guard.is_some() { + return Ok(false); + } + + match Endpoint::connect(&self.sock) { + Ok(v) => { + *guard = Some(v); + Ok(true) + } + Err(e) => { + error!("cannot connect to remote blob manager, {}", e); + Err(eio!()) + } + } + } + + fn close(&self) { + if !self.exiting.swap(true, Ordering::AcqRel) { + self.disconnect(); + } + } + + fn start(client: Arc) -> Result<()> { + std::thread::spawn(move || loop { + // Ensure connection is ready. + match client.get_connection() { + Ok(guard) => { + if guard.is_none() { + drop(client.ready.wait(guard)); + } else { + drop(guard); + } + } + Err(_) => continue, + } + + let _ = client.handle_reply(); + }); + + Ok(()) + } + + // Only works for single-threaded context. + fn handle_reply(&self) -> Result<()> { + let mut nr; + let mut rfd = FdSet::new(); + let mut efd = FdSet::new(); + + loop { + { + rfd.clear(); + efd.clear(); + match self.get_connection()?.as_ref() { + None => return Err(eio!()), + Some(conn) => { + rfd.insert(conn.as_raw_fd()); + efd.insert(conn.as_raw_fd()); + nr = conn.as_raw_fd() + 1; + } + } + } + let _ = select(nr, Some(&mut rfd), None, Some(&mut efd), None) + .map_err(|e| eother!(format!("{}", e)))?; + + let mut guard = self.get_connection()?; + let (hdr, files) = match guard.as_mut() { + None => return Err(eio!()), + Some(conn) => conn.recv_header().map_err(|_e| eio!())?, + }; + if !hdr.is_valid() { + return Err(einval!()); + } + let body_size = hdr.get_size() as usize; + + match hdr.get_code() { + RequestCode::MaxCommand => return Err(eother!()), + RequestCode::Noop => self.handle_result(hdr.get_tag(), RequestResult::Noop), + RequestCode::GetBlob => { + self.handle_get_blob_reply(guard, &hdr, body_size, files)?; + } + RequestCode::FetchRange => { + self.handle_fetch_range_reply(guard, &hdr, body_size, files)?; + } + } + } + } + + fn call_ping(&self) -> Result<()> { + 'next_iter: loop { + let req = self.create_request(); + let hdr = MsgHeader::new( + req.tag, + RequestCode::Noop, + HeaderFlag::NEED_REPLY.bits(), + 0u32, + ); + let msg = [0u8; 0]; + + self.send_msg(&hdr, &msg)?; + match self.wait_for_result(&req)? { + RequestResult::Noop => return Ok(()), + RequestResult::Reconnect => continue 'next_iter, + _ => return Err(eother!()), + } + } + } + + fn call_get_blob(&self, blob_info: &Arc) -> Result<(File, u64, u64)> { + if blob_info.blob_id().len() >= 256 { + return Err(einval!("blob id is too large")); + } + + 'next_iter: loop { + let req = self.create_request(); + let hdr = MsgHeader::new( + req.tag, + RequestCode::GetBlob, + HeaderFlag::NEED_REPLY.bits(), + std::mem::size_of::() as u32, + ); + let generation = self.remote_blobs.get_generation(); + let msg = GetBlobRequest::new(generation, blob_info.blob_id()); + + self.send_msg(&hdr, &msg)?; + match self.wait_for_result(&req)? { + RequestResult::GetBlob(result, token, base, file) => { + if result != 0 { + return Err(std::io::Error::from_raw_os_error(result as i32)); + } else if (token >> 32) as u32 != self.remote_blobs.get_generation() { + continue 'next_iter; + } else if let Some(file) = file { + return Ok((file, base, token)); + } else { + return Err(einval!()); + } + } + RequestResult::Reconnect => continue 'next_iter, + _ => return Err(eother!()), + } + } + } + + fn call_fetch_range(&self, blob: &RemoteBlob, start: u64, count: u64) -> Result { + 'next_iter: loop { + let token = blob.token.load(Ordering::Acquire); + if (token >> 32) as u32 != self.remote_blobs.get_generation() { + self.reopen_blob(blob)?; + continue 'next_iter; + } + + let req = self.create_request(); + let hdr = MsgHeader::new( + req.tag, + RequestCode::FetchRange, + HeaderFlag::NEED_REPLY.bits(), + std::mem::size_of::() as u32, + ); + let msg = FetchRangeRequest::new(token, start, count); + self.send_msg(&hdr, &msg)?; + match self.wait_for_result(&req)? { + RequestResult::FetchRange(result, size) => { + if result == FetchRangeResult::Success as u32 { + return Ok(size as usize); + } else if result == FetchRangeResult::GenerationMismatch as u32 { + continue 'next_iter; + } else { + return Err(std::io::Error::from_raw_os_error(count as i32)); + } + } + RequestResult::Reconnect => continue 'next_iter, + _ => return Err(eother!()), + } + } + } + + fn reopen_blob(&self, blob: &RemoteBlob) -> Result<()> { + 'next_iter: loop { + let req = self.create_request(); + let hdr = MsgHeader::new( + req.tag, + RequestCode::GetBlob, + HeaderFlag::NEED_REPLY.bits(), + std::mem::size_of::() as u32, + ); + let generation = self.remote_blobs.get_generation(); + let msg = GetBlobRequest::new(generation, blob.blob_info.blob_id()); + + self.send_msg(&hdr, &msg)?; + match self.wait_for_result(&req)? { + RequestResult::GetBlob(result, token, _base, file) => { + if result != 0 { + return Err(std::io::Error::from_raw_os_error(result as i32)); + } else if (token >> 32) as u32 != self.remote_blobs.get_generation() { + continue 'next_iter; + } else if let Some(_file) = file { + blob.token.store(token, Ordering::Release); + return Ok(()); + } else { + return Err(einval!()); + } + } + RequestResult::Reconnect => continue 'next_iter, + _ => return Err(eother!()), + } + } + } + + fn get_next_tag(&self) -> u64 { + self.tag.fetch_add(1, Ordering::AcqRel) + } + + fn create_request(&self) -> Arc { + let tag = self.get_next_tag(); + let request = Arc::new(Request::new(tag)); + + self.requests.lock().unwrap().insert(tag, request.clone()); + + request + } + + fn get_connection(&self) -> Result>> { + if self.exiting.load(Ordering::Relaxed) { + Err(eio!()) + } else { + Ok(self.conn.lock().unwrap()) + } + } + + fn send_msg(&self, hdr: &MsgHeader, msg: &T) -> Result<()> { + if let Ok(mut guard) = self.get_connection() { + if let Some(conn) = guard.as_mut() { + if conn.send_message(hdr, msg, None).is_ok() { + return Ok(()); + } + } + } + + let start = Instant::now(); + self.disconnect(); + loop { + self.reconnect(); + if let Ok(mut guard) = self.get_connection() { + if let Some(conn) = guard.as_mut() { + if conn.send_message(hdr, msg, None).is_ok() { + return Ok(()); + } + } + } + + self.disconnect(); + if let Some(end) = start.checked_add(Duration::from_secs(REQUEST_TIMEOUT_SEC)) { + let now = Instant::now(); + if end < now { + return Err(eio!()); + } + } else { + return Err(eio!()); + } + std::thread::sleep(Duration::from_millis(10)); + } + } + + fn reconnect(&self) { + if let Ok(true) = self.connect() { + let guard = self.requests.lock().unwrap(); + for entry in guard.iter() { + let mut state = entry.1.state.lock().unwrap(); + if state.0 == RequestStatus::Waiting { + state.0 = RequestStatus::Reconnect; + entry.1.condvar.notify_all(); + } + } + } + } + + fn disconnect(&self) { + self.remote_blobs.notify_disconnect(); + + let mut guard = self.conn.lock().unwrap(); + if let Some(conn) = guard.as_mut() { + conn.close(); + } + *guard = None; + } + + fn wait_for_result(&self, request: &Arc) -> Result { + request.wait_for_result(); + + let mut guard = self.requests.lock().unwrap(); + match guard.remove(&request.tag) { + None => Err(enoent!()), + Some(entry) => { + let mut guard2 = entry.state.lock().unwrap(); + match guard2.0 { + RequestStatus::Waiting => panic!("should not happen"), + RequestStatus::Timeout => Err(eio!()), + RequestStatus::Reconnect => Ok(RequestResult::Reconnect), + RequestStatus::Finished => { + let mut val = RequestResult::None; + mem::swap(&mut guard2.1, &mut val); + Ok(val) + } + } + } + } + } + + fn handle_result(&self, tag: u64, result: RequestResult) { + let requests = self.requests.lock().unwrap(); + + match requests.get(&tag) { + None => debug!("no request for tag {} found, may have timed out", tag), + Some(request) => request.set_result(result), + } + } + + fn handle_get_blob_reply( + &self, + mut guard: MutexGuard>, + hdr: &MsgHeader, + body_size: usize, + files: Option>, + ) -> Result<()> { + if body_size != mem::size_of::() { + return Err(einval!()); + } + let (size, data) = match guard.as_mut() { + None => return Err(einval!()), + Some(conn) => conn.recv_data(body_size).map_err(|_e| eio!())?, + }; + if size != body_size { + return Err(eio!()); + } + drop(guard); + + let mut msg = GetBlobReply::new(0, 0, 0); + msg.as_mut_slice().copy_from_slice(&data); + if !msg.is_valid() { + return Err(einval!()); + } else if msg.result != 0 { + self.handle_result( + hdr.get_tag(), + RequestResult::GetBlob(msg.result, msg.token, msg.base, None), + ); + } else { + if files.is_none() { + return Err(einval!()); + } + // Safe because we have just validated files is not none. + let mut files = files.unwrap(); + if files.len() != 1 { + return Err(einval!()); + } + // Safe because we have just validated files[0] is valid. + let file = files.pop().unwrap(); + self.handle_result( + hdr.get_tag(), + RequestResult::GetBlob(msg.result, msg.token, msg.base, Some(file)), + ); + } + + Ok(()) + } + + fn handle_fetch_range_reply( + &self, + mut guard: MutexGuard>, + hdr: &MsgHeader, + body_size: usize, + files: Option>, + ) -> Result<()> { + if body_size != mem::size_of::() || files.is_some() { + return Err(einval!()); + } + let (size, data) = match guard.as_mut() { + None => return Err(einval!()), + Some(conn) => conn.recv_data(body_size).map_err(|_e| eio!())?, + }; + if size != body_size { + return Err(eio!()); + } + drop(guard); + + let mut msg = FetchRangeReply::new(0, 0, 0); + msg.as_mut_slice().copy_from_slice(&data); + if !msg.is_valid() { + return Err(einval!()); + } else { + self.handle_result( + hdr.get_tag(), + RequestResult::FetchRange(msg.result, msg.count), + ); + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_request() { + let req = Arc::new(Request::new(1)); + let req1 = req.clone(); + + assert_eq!(req.tag, 1); + { + let guard = req.state.lock().unwrap(); + assert_eq!(guard.0, RequestStatus::Waiting); + matches!(guard.1, RequestResult::None); + } + + let (sender, receiver) = std::sync::mpsc::channel::(); + std::thread::spawn(move || { + let _ = receiver.recv().unwrap(); + { + let mut guard = req1.state.lock().unwrap(); + guard.0 = RequestStatus::Reconnect; + } + + let _ = receiver.recv().unwrap(); + req1.set_result(RequestResult::Reconnect); + }); + + { + req.wait_for_result(); + let mut guard = req.state.lock().unwrap(); + assert_eq!(guard.0, RequestStatus::Timeout); + guard.0 = RequestStatus::Waiting; + } + + sender.send(true).unwrap(); + { + req.wait_for_result(); + let mut guard = req.state.lock().unwrap(); + assert_eq!(guard.0, RequestStatus::Reconnect); + guard.0 = RequestStatus::Waiting; + } + + sender.send(true).unwrap(); + { + req.wait_for_result(); + let guard = req.state.lock().unwrap(); + assert_eq!(guard.0, RequestStatus::Finished); + matches!(guard.1, RequestResult::Reconnect); + } + } +} diff --git a/storage/src/remote/connection.rs b/storage/src/remote/connection.rs index 65d0d008bb0..c9c0f81694d 100644 --- a/storage/src/remote/connection.rs +++ b/storage/src/remote/connection.rs @@ -1,1049 +1,1049 @@ -// Copyright (C) 2019 Alibaba Cloud. All rights reserved. -// SPDX-License-Identifier: Apache-2.0 - -//! Structs for Unix Domain Socket listener and endpoint. -//! -//! This file is copied from vhost/src/vhost-user/connection.rs, please keep it as is when possible. - -#![allow(dead_code)] - -use std::fs::File; -use std::io::Error as IOError; -use std::io::ErrorKind; -use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; -use std::os::unix::net::{UnixListener, UnixStream}; -use std::path::{Path, PathBuf}; -use std::{mem, slice}; - -use libc::{c_void, iovec}; -use vm_memory::ByteValued; - -use super::message::*; -use dbs_uhttp::{ScmSocket, SysError}; -use std::net::Shutdown; - -#[allow(clippy::enum_variant_names)] -#[derive(Debug)] -pub(crate) enum Error { - /// Invalid parameters. - InvalidParam, - /// Unsupported operations due to that the protocol feature hasn't been negotiated. - InvalidOperation, - /// Invalid message format, flag or content. - InvalidMessage, - /// Only part of a message have been sent or received successfully - PartialMessage, - /// Message is too large - OversizedMsg, - /// Fd array in question is too big or too small - IncorrectFds, - /// Can't connect to peer. - SocketConnect(std::io::Error), - /// Generic socket errors. - SocketError(std::io::Error), - /// The socket is broken or has been closed. - SocketBroken(std::io::Error), - /// Should retry the socket operation again. - SocketRetry(std::io::Error), - /// Failure from the slave side. - SlaveInternalError, - /// Failure from the master side. - MasterInternalError, - /// Virtio/protocol features mismatch. - FeatureMismatch, - /// Error from request handler - ReqHandlerError(IOError), - /// memfd file creation error - MemFdCreateError, - /// File truncate error - FileTrucateError, - /// memfd file seal errors - MemFdSealError, -} - -impl std::fmt::Display for Error { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - match self { - Error::InvalidParam => write!(f, "invalid parameters"), - Error::InvalidOperation => write!(f, "invalid operation"), - Error::InvalidMessage => write!(f, "invalid message"), - Error::PartialMessage => write!(f, "partial message"), - Error::OversizedMsg => write!(f, "oversized message"), - Error::IncorrectFds => write!(f, "wrong number of attached fds"), - Error::SocketError(e) => write!(f, "socket error: {}", e), - Error::SocketConnect(e) => write!(f, "can't connect to peer: {}", e), - Error::SocketBroken(e) => write!(f, "socket is broken: {}", e), - Error::SocketRetry(e) => write!(f, "temporary socket error: {}", e), - Error::SlaveInternalError => write!(f, "slave internal error"), - Error::MasterInternalError => write!(f, "Master internal error"), - Error::FeatureMismatch => write!(f, "virtio/protocol features mismatch"), - Error::ReqHandlerError(e) => write!(f, "handler failed to handle request: {}", e), - Error::MemFdCreateError => { - write!(f, "handler failed to allocate memfd during get_inflight_fd") - } - Error::FileTrucateError => { - write!(f, "handler failed to trucate memfd during get_inflight_fd") - } - Error::MemFdSealError => write!( - f, - "handler failed to apply seals to memfd during get_inflight_fd" - ), - } - } -} - -impl std::error::Error for Error {} - -impl Error { - /// Determine whether to rebuild the underline communication channel. - pub fn should_reconnect(&self) -> bool { - match *self { - // Should reconnect because it may be caused by temporary network errors. - Error::PartialMessage => true, - // Should reconnect because the underline socket is broken. - Error::SocketBroken(_) => true, - // Slave internal error, hope it recovers on reconnect. - Error::SlaveInternalError => true, - // Master internal error, hope it recovers on reconnect. - Error::MasterInternalError => true, - // Should just retry the IO operation instead of rebuilding the underline connection. - Error::SocketRetry(_) => false, - Error::InvalidParam | Error::InvalidOperation => false, - Error::InvalidMessage | Error::IncorrectFds | Error::OversizedMsg => false, - Error::SocketError(_) | Error::SocketConnect(_) => false, - Error::FeatureMismatch => false, - Error::ReqHandlerError(_) => false, - Error::MemFdCreateError | Error::FileTrucateError | Error::MemFdSealError => false, - } - } -} - -impl std::convert::From for Error { - #[allow(unreachable_patterns)] // EWOULDBLOCK equals to EGAIN on linux - fn from(err: std::io::Error) -> Self { - Error::SocketError(err) - } -} - -impl std::convert::From for Error { - /// Convert raw socket errors into meaningful blob manager errors. - /// - /// The vmm_sys_util::errno::Error is a simple wrapper over the raw errno, which doesn't means - /// much to the connection manager. So convert it into meaningful errors to simplify - /// the connection manager logic. - /// - /// # Return: - /// * - Error::SocketRetry: temporary error caused by signals or short of resources. - /// * - Error::SocketBroken: the underline socket is broken. - /// * - Error::SocketError: other socket related errors. - #[allow(unreachable_patterns)] // EWOULDBLOCK equals to EGAIN on linux - fn from(err: SysError) -> Self { - match err.errno() { - // The socket is marked nonblocking and the requested operation would block. - libc::EAGAIN => Error::SocketRetry(IOError::from_raw_os_error(libc::EAGAIN)), - // The socket is marked nonblocking and the requested operation would block. - libc::EWOULDBLOCK => Error::SocketRetry(IOError::from_raw_os_error(libc::EWOULDBLOCK)), - // A signal occurred before any data was transmitted - libc::EINTR => Error::SocketRetry(IOError::from_raw_os_error(libc::EINTR)), - // The output queue for a network interface was full. This generally indicates - // that the interface has stopped sending, but may be caused by transient congestion. - libc::ENOBUFS => Error::SocketRetry(IOError::from_raw_os_error(libc::ENOBUFS)), - // No memory available. - libc::ENOMEM => Error::SocketRetry(IOError::from_raw_os_error(libc::ENOMEM)), - // Connection reset by peer. - libc::ECONNRESET => Error::SocketBroken(IOError::from_raw_os_error(libc::ECONNRESET)), - // The local end has been shut down on a connection oriented socket. In this case the - // process will also receive a SIGPIPE unless MSG_NOSIGNAL is set. - libc::EPIPE => Error::SocketBroken(IOError::from_raw_os_error(libc::EPIPE)), - // Write permission is denied on the destination socket file, or search permission is - // denied for one of the directories the path prefix. - libc::EACCES => Error::SocketConnect(IOError::from_raw_os_error(libc::EACCES)), - // Catch all other errors - e => Error::SocketError(IOError::from_raw_os_error(e)), - } - } -} - -pub(crate) type Result = std::result::Result; - -/// Unix domain socket listener for accepting incoming connections. -pub(crate) struct Listener { - fd: UnixListener, - path: Option, -} - -impl Listener { - /// Create a unix domain socket listener. - /// - /// # Return: - /// * - the new Listener object on success. - /// * - SocketError: failed to create listener socket. - pub fn new>(path: P, unlink: bool) -> Result { - if unlink { - let _ = std::fs::remove_file(&path); - } - let fd = UnixListener::bind(&path).map_err(Error::SocketError)?; - - Ok(Listener { - fd, - path: Some(path.as_ref().to_owned()), - }) - } - - /// Accept an incoming connection. - /// - /// # Return: - /// * - Some(UnixStream): new UnixStream object if new incoming connection is available. - /// * - None: no incoming connection available. - /// * - SocketError: errors from accept(). - pub fn accept(&self) -> Result> { - loop { - match self.fd.accept() { - Ok((socket, _addr)) => return Ok(Some(socket)), - Err(e) => { - match e.kind() { - // No incoming connection available. - ErrorKind::WouldBlock => return Ok(None), - // New connection closed by peer. - ErrorKind::ConnectionAborted => return Ok(None), - // Interrupted by signals, retry - ErrorKind::Interrupted => continue, - _ => return Err(Error::SocketError(e)), - } - } - } - } - } - - /// Change blocking status on the listener. - /// - /// # Return: - /// * - () on success. - /// * - SocketError: failure from set_nonblocking(). - pub fn set_nonblocking(&self, block: bool) -> Result<()> { - self.fd.set_nonblocking(block).map_err(Error::SocketError) - } -} - -impl AsRawFd for Listener { - fn as_raw_fd(&self) -> RawFd { - self.fd.as_raw_fd() - } -} - -impl FromRawFd for Listener { - unsafe fn from_raw_fd(fd: RawFd) -> Self { - Listener { - fd: UnixListener::from_raw_fd(fd), - path: None, - } - } -} - -impl Drop for Listener { - fn drop(&mut self) { - if let Some(path) = &self.path { - let _ = std::fs::remove_file(path); - } - } -} - -/// Unix domain socket endpoint. -pub(crate) struct Endpoint { - sock: UnixStream, -} - -impl Endpoint { - /// Create a new stream by connecting to server at `str`. - /// - /// # Return: - /// * - the new Endpoint object on success. - /// * - SocketConnect: failed to connect to peer. - pub fn connect>(path: P) -> Result { - let sock = UnixStream::connect(path).map_err(Error::SocketConnect)?; - Ok(Self::from_stream(sock)) - } - - /// Create an endpoint from a stream object. - pub fn from_stream(sock: UnixStream) -> Self { - Endpoint { sock } - } - - /// Close the underlying socket. - pub fn close(&self) { - let _ = self.sock.shutdown(Shutdown::Both); - } - - /// Sends bytes from scatter-gather vectors over the socket with optional attached file - /// descriptors. - /// - /// # Return: - /// * - number of bytes sent on success - /// * - SocketRetry: temporary error caused by signals or short of resources. - /// * - SocketBroken: the underline socket is broken. - /// * - SocketError: other socket related errors. - pub fn send_iovec(&mut self, iovs: &[&[u8]], fds: Option<&[RawFd]>) -> Result { - let rfds = match fds { - Some(rfds) => rfds, - _ => &[], - }; - self.sock.send_with_fds(iovs, rfds).map_err(Into::into) - } - - /// Sends all bytes from scatter-gather vectors over the socket with optional attached file - /// descriptors. Will loop until all data has been transfered. - /// - /// # Return: - /// * - number of bytes sent on success - /// * - SocketBroken: the underline socket is broken. - /// * - SocketError: other socket related errors. - pub fn send_iovec_all(&mut self, iovs: &[&[u8]], fds: Option<&[RawFd]>) -> Result { - let mut data_sent = 0; - let mut data_total = 0; - let iov_lens: Vec = iovs.iter().map(|iov| iov.len()).collect(); - for len in &iov_lens { - data_total += len; - } - - while (data_total - data_sent) > 0 { - let (nr_skip, offset) = get_sub_iovs_offset(&iov_lens, data_sent); - let iov = &iovs[nr_skip][offset..]; - - let data = &[&[iov], &iovs[(nr_skip + 1)..]].concat(); - let sfds = if data_sent == 0 { fds } else { None }; - - let sent = self.send_iovec(data, sfds); - match sent { - Ok(0) => return Ok(data_sent), - Ok(n) => data_sent += n, - Err(e) => match e { - Error::SocketRetry(_) => {} - _ => return Err(e), - }, - } - } - Ok(data_sent) - } - - /// Sends bytes from a slice over the socket with optional attached file descriptors. - /// - /// # Return: - /// * - number of bytes sent on success - /// * - SocketRetry: temporary error caused by signals or short of resources. - /// * - SocketBroken: the underline socket is broken. - /// * - SocketError: other socket related errors. - pub fn send_slice(&mut self, data: &[u8], fds: Option<&[RawFd]>) -> Result { - self.send_iovec(&[data], fds) - } - - /// Sends a header-only message with optional attached file descriptors. - /// - /// # Return: - /// * - number of bytes sent on success - /// * - SocketRetry: temporary error caused by signals or short of resources. - /// * - SocketBroken: the underline socket is broken. - /// * - SocketError: other socket related errors. - /// * - PartialMessage: received a partial message. - pub fn send_header(&mut self, hdr: &MsgHeader, fds: Option<&[RawFd]>) -> Result<()> { - // Safe because there can't be other mutable referance to hdr. - let iovs = unsafe { - [slice::from_raw_parts( - hdr as *const MsgHeader as *const u8, - mem::size_of::(), - )] - }; - let bytes = self.send_iovec_all(&iovs[..], fds)?; - if bytes != mem::size_of::() { - return Err(Error::PartialMessage); - } - Ok(()) - } - - /// Send a message with header and body. Optional file descriptors may be attached to - /// the message. - /// - /// # Return: - /// * - number of bytes sent on success - /// * - SocketRetry: temporary error caused by signals or short of resources. - /// * - SocketBroken: the underline socket is broken. - /// * - SocketError: other socket related errors. - /// * - PartialMessage: received a partial message. - pub fn send_message( - &mut self, - hdr: &MsgHeader, - body: &T, - fds: Option<&[RawFd]>, - ) -> Result<()> { - if mem::size_of::() > MAX_MSG_SIZE { - return Err(Error::OversizedMsg); - } - // Safe because there can't be other mutable referance to hdr and body. - let iovs = unsafe { - [ - slice::from_raw_parts( - hdr as *const MsgHeader as *const u8, - mem::size_of::(), - ), - slice::from_raw_parts(body as *const T as *const u8, mem::size_of::()), - ] - }; - let bytes = self.send_iovec_all(&iovs[..], fds)?; - if bytes != mem::size_of::() + mem::size_of::() { - return Err(Error::PartialMessage); - } - Ok(()) - } - - /// Send a message with header, body and payload. Optional file descriptors - /// may also be attached to the message. - /// - /// # Return: - /// * - number of bytes sent on success - /// * - SocketRetry: temporary error caused by signals or short of resources. - /// * - SocketBroken: the underline socket is broken. - /// * - SocketError: other socket related errors. - /// * - OversizedMsg: message size is too big. - /// * - PartialMessage: received a partial message. - /// * - IncorrectFds: wrong number of attached fds. - pub fn send_message_with_payload( - &mut self, - hdr: &MsgHeader, - body: &T, - payload: &[u8], - fds: Option<&[RawFd]>, - ) -> Result<()> { - let len = payload.len(); - if mem::size_of::() > MAX_MSG_SIZE { - return Err(Error::OversizedMsg); - } - if len > MAX_MSG_SIZE - mem::size_of::() { - return Err(Error::OversizedMsg); - } - if let Some(fd_arr) = fds { - if fd_arr.len() > MAX_ATTACHED_FD_ENTRIES { - return Err(Error::IncorrectFds); - } - } - - // Safe because there can't be other mutable reference to hdr, body and payload. - let iovs = unsafe { - [ - slice::from_raw_parts( - hdr as *const MsgHeader as *const u8, - mem::size_of::(), - ), - slice::from_raw_parts(body as *const T as *const u8, mem::size_of::()), - slice::from_raw_parts(payload.as_ptr() as *const u8, len), - ] - }; - let total = mem::size_of::() + mem::size_of::() + len; - let len = self.send_iovec_all(&iovs, fds)?; - if len != total { - return Err(Error::PartialMessage); - } - Ok(()) - } - - /// Reads bytes from the socket into the given scatter/gather vectors. - /// - /// # Return: - /// * - (number of bytes received, buf) on success - /// * - SocketRetry: temporary error caused by signals or short of resources. - /// * - SocketBroken: the underline socket is broken. - /// * - SocketError: other socket related errors. - pub fn recv_data(&mut self, len: usize) -> Result<(usize, Vec)> { - let mut rbuf = vec![0u8; len]; - let mut iovs = [iovec { - iov_base: rbuf.as_mut_ptr() as *mut c_void, - iov_len: len, - }]; - // Safe because we own rbuf and it's safe to fill a byte array with arbitrary data. - let (bytes, _) = unsafe { self.sock.recv_with_fds(&mut iovs, &mut [])? }; - Ok((bytes, rbuf)) - } - - /// Reads bytes from the socket into the given scatter/gather vectors with optional attached - /// file. - /// - /// The underlying communication channel is a Unix domain socket in STREAM mode. It's a little - /// tricky to pass file descriptors through such a communication channel. Let's assume that a - /// sender sending a message with some file descriptors attached. To successfully receive those - /// attached file descriptors, the receiver must obey following rules: - /// 1) file descriptors are attached to a message. - /// 2) message(packet) boundaries must be respected on the receive side. - /// In other words, recvmsg() operations must not cross the packet boundary, otherwise the - /// attached file descriptors will get lost. - /// Note that this function wraps received file descriptors as `File`. - /// - /// # Return: - /// * - (number of bytes received, [received files]) on success - /// * - SocketRetry: temporary error caused by signals or short of resources. - /// * - SocketBroken: the underline socket is broken. - /// * - SocketError: other socket related errors. - /// - /// # Safety - /// - /// It is the callers responsibility to ensure it is safe for arbitrary data to be - /// written to the iovec pointers. - pub unsafe fn recv_into_iovec( - &mut self, - iovs: &mut [iovec], - ) -> Result<(usize, Option>)> { - let mut fd_array = vec![0; MAX_ATTACHED_FD_ENTRIES]; - - let (bytes, fds) = self.sock.recv_with_fds(iovs, &mut fd_array)?; - - let files = match fds { - 0 => None, - n => { - let files = fd_array - .iter() - .take(n) - .map(|fd| { - // Safe because we have the ownership of `fd`. - File::from_raw_fd(*fd) - }) - .collect(); - Some(files) - } - }; - - Ok((bytes, files)) - } - - /// Reads all bytes from the socket into the given scatter/gather vectors with optional - /// attached files. Will loop until all data has been transferred. - /// - /// The underlying communication channel is a Unix domain socket in STREAM mode. It's a little - /// tricky to pass file descriptors through such a communication channel. Let's assume that a - /// sender sending a message with some file descriptors attached. To successfully receive those - /// attached file descriptors, the receiver must obey following rules: - /// 1) file descriptors are attached to a message. - /// 2) message(packet) boundaries must be respected on the receive side. - /// In other words, recvmsg() operations must not cross the packet boundary, otherwise the - /// attached file descriptors will get lost. - /// Note that this function wraps received file descriptors as `File`. - /// - /// # Return: - /// * - (number of bytes received, [received fds]) on success - /// * - SocketBroken: the underline socket is broken. - /// * - SocketError: other socket related errors. - /// - /// # Safety - /// - /// It is the callers responsibility to ensure it is safe for arbitrary data to be - /// written to the iovec pointers. - pub unsafe fn recv_into_iovec_all( - &mut self, - iovs: &mut [iovec], - ) -> Result<(usize, Option>)> { - let mut data_read = 0; - let mut data_total = 0; - let mut rfds = None; - let iov_lens: Vec = iovs.iter().map(|iov| iov.iov_len).collect(); - for len in &iov_lens { - data_total += len; - } - - while (data_total - data_read) > 0 { - let (nr_skip, offset) = get_sub_iovs_offset(&iov_lens, data_read); - let iov = &mut iovs[nr_skip]; - - let mut data = [ - &[iovec { - iov_base: (iov.iov_base as usize + offset) as *mut c_void, - iov_len: iov.iov_len - offset, - }], - &iovs[(nr_skip + 1)..], - ] - .concat(); - - let res = self.recv_into_iovec(&mut data); - match res { - Ok((0, _)) => return Ok((data_read, rfds)), - Ok((n, fds)) => { - if data_read == 0 { - rfds = fds; - } - data_read += n; - } - Err(e) => match e { - Error::SocketRetry(_) => {} - _ => return Err(e), - }, - } - } - Ok((data_read, rfds)) - } - - /// Reads bytes from the socket into a new buffer with optional attached - /// files. Received file descriptors are set close-on-exec and converted to `File`. - /// - /// # Return: - /// * - (number of bytes received, buf, [received files]) on success. - /// * - SocketRetry: temporary error caused by signals or short of resources. - /// * - SocketBroken: the underline socket is broken. - /// * - SocketError: other socket related errors. - pub fn recv_into_buf( - &mut self, - buf_size: usize, - ) -> Result<(usize, Vec, Option>)> { - let mut buf = vec![0u8; buf_size]; - let (bytes, files) = { - let mut iovs = [iovec { - iov_base: buf.as_mut_ptr() as *mut c_void, - iov_len: buf_size, - }]; - // Safe because we own buf and it's safe to fill a byte array with arbitrary data. - unsafe { self.recv_into_iovec(&mut iovs)? } - }; - Ok((bytes, buf, files)) - } - - /// Receive a header-only message with optional attached files. - /// Note, only the first MAX_ATTACHED_FD_ENTRIES file descriptors will be - /// accepted and all other file descriptor will be discard silently. - /// - /// # Return: - /// * - (message header, [received files]) on success. - /// * - SocketRetry: temporary error caused by signals or short of resources. - /// * - SocketBroken: the underline socket is broken. - /// * - SocketError: other socket related errors. - /// * - PartialMessage: received a partial message. - /// * - InvalidMessage: received a invalid message. - pub fn recv_header(&mut self) -> Result<(MsgHeader, Option>)> { - let mut hdr = MsgHeader::default(); - let mut iovs = [iovec { - iov_base: (&mut hdr as *mut MsgHeader) as *mut c_void, - iov_len: mem::size_of::(), - }]; - // Safe because we own hdr and it's ByteValued. - let (bytes, files) = unsafe { self.recv_into_iovec_all(&mut iovs[..])? }; - - if bytes != mem::size_of::() { - return Err(Error::PartialMessage); - } else if !hdr.is_valid() { - return Err(Error::InvalidMessage); - } - - Ok((hdr, files)) - } - - /// Receive a message with optional attached file descriptors. - /// Note, only the first MAX_ATTACHED_FD_ENTRIES file descriptors will be - /// accepted and all other file descriptor will be discard silently. - /// - /// # Return: - /// * - (message header, message body, [received files]) on success. - /// * - SocketRetry: temporary error caused by signals or short of resources. - /// * - SocketBroken: the underline socket is broken. - /// * - SocketError: other socket related errors. - /// * - PartialMessage: received a partial message. - /// * - InvalidMessage: received a invalid message. - pub fn recv_body( - &mut self, - ) -> Result<(MsgHeader, T, Option>)> { - let mut hdr = MsgHeader::default(); - let mut body: T = Default::default(); - let mut iovs = [ - iovec { - iov_base: (&mut hdr as *mut MsgHeader) as *mut c_void, - iov_len: mem::size_of::(), - }, - iovec { - iov_base: (&mut body as *mut T) as *mut c_void, - iov_len: mem::size_of::(), - }, - ]; - // Safe because we own hdr and body and they're ByteValued. - let (bytes, files) = unsafe { self.recv_into_iovec_all(&mut iovs[..])? }; - - let total = mem::size_of::() + mem::size_of::(); - if bytes != total { - return Err(Error::PartialMessage); - } else if !hdr.is_valid() || !body.is_valid() { - return Err(Error::InvalidMessage); - } - - Ok((hdr, body, files)) - } - - /// Receive a message with header and optional content. Callers need to - /// pre-allocate a big enough buffer to receive the message body and - /// optional payload. If there are attached file descriptor associated - /// with the message, the first MAX_ATTACHED_FD_ENTRIES file descriptors - /// will be accepted and all other file descriptor will be discard - /// silently. - /// - /// # Return: - /// * - (message header, message size, [received files]) on success. - /// * - SocketRetry: temporary error caused by signals or short of resources. - /// * - SocketBroken: the underline socket is broken. - /// * - SocketError: other socket related errors. - /// * - PartialMessage: received a partial message. - /// * - InvalidMessage: received a invalid message. - pub fn recv_body_into_buf( - &mut self, - buf: &mut [u8], - ) -> Result<(MsgHeader, usize, Option>)> { - let mut hdr = MsgHeader::default(); - let mut iovs = [ - iovec { - iov_base: (&mut hdr as *mut MsgHeader) as *mut c_void, - iov_len: mem::size_of::(), - }, - iovec { - iov_base: buf.as_mut_ptr() as *mut c_void, - iov_len: buf.len(), - }, - ]; - // Safe because we own hdr and have a mutable borrow of buf, and hdr is ByteValued - // and it's safe to fill a byte slice with arbitrary data. - let (bytes, files) = unsafe { self.recv_into_iovec_all(&mut iovs[..])? }; - - if bytes < mem::size_of::() { - return Err(Error::PartialMessage); - } else if !hdr.is_valid() { - return Err(Error::InvalidMessage); - } - - Ok((hdr, bytes - mem::size_of::(), files)) - } - - /// Receive a message with optional payload and attached file descriptors. - /// Note, only the first MAX_ATTACHED_FD_ENTRIES file descriptors will be - /// accepted and all other file descriptor will be discard silently. - /// - /// # Return: - /// * - (message header, message body, size of payload, [received files]) on success. - /// * - SocketRetry: temporary error caused by signals or short of resources. - /// * - SocketBroken: the underline socket is broken. - /// * - SocketError: other socket related errors. - /// * - PartialMessage: received a partial message. - /// * - InvalidMessage: received a invalid message. - #[cfg_attr(feature = "cargo-clippy", allow(clippy::type_complexity))] - pub fn recv_payload_into_buf( - &mut self, - buf: &mut [u8], - ) -> Result<(MsgHeader, T, usize, Option>)> { - let mut hdr = MsgHeader::default(); - let mut body: T = Default::default(); - let mut iovs = [ - iovec { - iov_base: (&mut hdr as *mut MsgHeader) as *mut c_void, - iov_len: mem::size_of::(), - }, - iovec { - iov_base: (&mut body as *mut T) as *mut c_void, - iov_len: mem::size_of::(), - }, - iovec { - iov_base: buf.as_mut_ptr() as *mut c_void, - iov_len: buf.len(), - }, - ]; - // Safe because we own hdr and body and have a mutable borrow of buf, and - // hdr and body are ByteValued, and it's safe to fill a byte slice with - // arbitrary data. - let (bytes, files) = unsafe { self.recv_into_iovec_all(&mut iovs[..])? }; - - let total = mem::size_of::() + mem::size_of::(); - if bytes < total { - return Err(Error::PartialMessage); - } else if !hdr.is_valid() || !body.is_valid() { - return Err(Error::InvalidMessage); - } - - Ok((hdr, body, bytes - total, files)) - } -} - -impl AsRawFd for Endpoint { - fn as_raw_fd(&self) -> RawFd { - self.sock.as_raw_fd() - } -} - -// Given a slice of sizes and the `skip_size`, return the offset of `skip_size` in the slice. -// For example: -// let iov_lens = vec![4, 4, 5]; -// let size = 6; -// assert_eq!(get_sub_iovs_offset(&iov_len, size), (1, 2)); -fn get_sub_iovs_offset(iov_lens: &[usize], skip_size: usize) -> (usize, usize) { - let mut size = skip_size; - let mut nr_skip = 0; - - for len in iov_lens { - if size >= *len { - size -= *len; - nr_skip += 1; - } else { - break; - } - } - (nr_skip, size) -} - -#[cfg(test)] -mod tests { - use super::*; - use std::io::{Read, Seek, SeekFrom, Write}; - use vmm_sys_util::rand::rand_alphanumerics; - use vmm_sys_util::tempfile::TempFile; - - fn temp_path() -> PathBuf { - PathBuf::from(format!( - "/tmp/blob_test_{}", - rand_alphanumerics(8).to_str().unwrap() - )) - } - - #[test] - fn create_listener() { - let path = temp_path(); - let listener = Listener::new(&path, true).unwrap(); - - assert!(listener.as_raw_fd() > 0); - } - - #[test] - fn create_listener_from_raw_fd() { - let path = temp_path(); - let file = File::create(path).unwrap(); - let listener = unsafe { Listener::from_raw_fd(file.as_raw_fd()) }; - - assert!(listener.as_raw_fd() > 0); - } - - #[test] - fn accept_connection() { - let path = temp_path(); - let listener = Listener::new(&path, true).unwrap(); - listener.set_nonblocking(true).unwrap(); - - // accept on a fd without incoming connection - let conn = listener.accept().unwrap(); - assert!(conn.is_none()); - } - - #[test] - fn send_data() { - let path = temp_path(); - let listener = Listener::new(&path, true).unwrap(); - listener.set_nonblocking(true).unwrap(); - let mut master = Endpoint::connect(&path).unwrap(); - let sock = listener.accept().unwrap().unwrap(); - let mut slave = Endpoint::from_stream(sock); - - let buf1 = vec![0x1, 0x2, 0x3, 0x4]; - let mut len = master.send_slice(&buf1[..], None).unwrap(); - assert_eq!(len, 4); - let (bytes, buf2, _) = slave.recv_into_buf(0x1000).unwrap(); - assert_eq!(bytes, 4); - assert_eq!(&buf1[..], &buf2[..bytes]); - - len = master.send_slice(&buf1[..], None).unwrap(); - assert_eq!(len, 4); - let (bytes, buf2, _) = slave.recv_into_buf(0x2).unwrap(); - assert_eq!(bytes, 2); - assert_eq!(&buf1[..2], &buf2[..]); - let (bytes, buf2, _) = slave.recv_into_buf(0x2).unwrap(); - assert_eq!(bytes, 2); - assert_eq!(&buf1[2..], &buf2[..]); - } - - #[test] - fn send_fd() { - let path = temp_path(); - let listener = Listener::new(&path, true).unwrap(); - listener.set_nonblocking(true).unwrap(); - let mut master = Endpoint::connect(&path).unwrap(); - let sock = listener.accept().unwrap().unwrap(); - let mut slave = Endpoint::from_stream(sock); - - let mut fd = TempFile::new().unwrap().into_file(); - write!(fd, "test").unwrap(); - - // Normal case for sending/receiving file descriptors - let buf1 = vec![0x1, 0x2, 0x3, 0x4]; - let len = master - .send_slice(&buf1[..], Some(&[fd.as_raw_fd()])) - .unwrap(); - assert_eq!(len, 4); - - let (bytes, buf2, files) = slave.recv_into_buf(4).unwrap(); - assert_eq!(bytes, 4); - assert_eq!(&buf1[..], &buf2[..]); - assert!(files.is_some()); - let files = files.unwrap(); - { - assert_eq!(files.len(), 1); - let mut file = &files[0]; - let mut content = String::new(); - file.seek(SeekFrom::Start(0)).unwrap(); - file.read_to_string(&mut content).unwrap(); - assert_eq!(content, "test"); - } - - // Following communication pattern should work: - // Sending side: data(header, body) with fds - // Receiving side: data(header) with fds, data(body) - let len = master - .send_slice( - &buf1[..], - Some(&[fd.as_raw_fd(), fd.as_raw_fd(), fd.as_raw_fd()]), - ) - .unwrap(); - assert_eq!(len, 4); - - let (bytes, buf2, files) = slave.recv_into_buf(0x2).unwrap(); - assert_eq!(bytes, 2); - assert_eq!(&buf1[..2], &buf2[..]); - assert!(files.is_some()); - let files = files.unwrap(); - { - assert_eq!(files.len(), 3); - let mut file = &files[1]; - let mut content = String::new(); - file.seek(SeekFrom::Start(0)).unwrap(); - file.read_to_string(&mut content).unwrap(); - assert_eq!(content, "test"); - } - let (bytes, buf2, files) = slave.recv_into_buf(0x2).unwrap(); - assert_eq!(bytes, 2); - assert_eq!(&buf1[2..], &buf2[..]); - assert!(files.is_none()); - - // Following communication pattern should not work: - // Sending side: data(header, body) with fds - // Receiving side: data(header), data(body) with fds - let len = master - .send_slice( - &buf1[..], - Some(&[fd.as_raw_fd(), fd.as_raw_fd(), fd.as_raw_fd()]), - ) - .unwrap(); - assert_eq!(len, 4); - - let (bytes, buf4) = slave.recv_data(2).unwrap(); - assert_eq!(bytes, 2); - assert_eq!(&buf1[..2], &buf4[..]); - let (bytes, buf2, files) = slave.recv_into_buf(0x2).unwrap(); - assert_eq!(bytes, 2); - assert_eq!(&buf1[2..], &buf2[..]); - assert!(files.is_none()); - - // Following communication pattern should work: - // Sending side: data, data with fds - // Receiving side: data, data with fds - let len = master.send_slice(&buf1[..], None).unwrap(); - assert_eq!(len, 4); - let len = master - .send_slice( - &buf1[..], - Some(&[fd.as_raw_fd(), fd.as_raw_fd(), fd.as_raw_fd()]), - ) - .unwrap(); - assert_eq!(len, 4); - - let (bytes, buf2, files) = slave.recv_into_buf(0x4).unwrap(); - assert_eq!(bytes, 4); - assert_eq!(&buf1[..], &buf2[..]); - assert!(files.is_none()); - - let (bytes, buf2, files) = slave.recv_into_buf(0x2).unwrap(); - assert_eq!(bytes, 2); - assert_eq!(&buf1[..2], &buf2[..]); - assert!(files.is_some()); - let files = files.unwrap(); - { - assert_eq!(files.len(), 3); - let mut file = &files[1]; - let mut content = String::new(); - file.seek(SeekFrom::Start(0)).unwrap(); - file.read_to_string(&mut content).unwrap(); - assert_eq!(content, "test"); - } - let (bytes, buf2, files) = slave.recv_into_buf(0x2).unwrap(); - assert_eq!(bytes, 2); - assert_eq!(&buf1[2..], &buf2[..]); - assert!(files.is_none()); - - // Following communication pattern should not work: - // Sending side: data1, data2 with fds - // Receiving side: data + partial of data2, left of data2 with fds - let len = master.send_slice(&buf1[..], None).unwrap(); - assert_eq!(len, 4); - let len = master - .send_slice( - &buf1[..], - Some(&[fd.as_raw_fd(), fd.as_raw_fd(), fd.as_raw_fd()]), - ) - .unwrap(); - assert_eq!(len, 4); - - let (bytes, _buf) = slave.recv_data(5).unwrap(); - #[cfg(target_os = "linux")] - assert_eq!(bytes, 5); - - #[cfg(target_os = "macos")] - assert_eq!(bytes, 4); - - let (bytes, _buf, files) = slave.recv_into_buf(0x4).unwrap(); - #[cfg(target_os = "linux")] - assert_eq!(bytes, 3); - #[cfg(target_os = "linux")] - assert!(files.is_none()); - - #[cfg(target_os = "macos")] - assert_eq!(bytes, 4); - #[cfg(target_os = "macos")] - assert!(files.is_some()); - - // If the target fd array is too small, extra file descriptors will get lost. - let len = master - .send_slice( - &buf1[..], - Some(&[fd.as_raw_fd(), fd.as_raw_fd(), fd.as_raw_fd()]), - ) - .unwrap(); - assert_eq!(len, 4); - - let (bytes, _, files) = slave.recv_into_buf(0x4).unwrap(); - assert_eq!(bytes, 4); - assert!(files.is_some()); - } - - #[test] - fn send_recv() { - let path = temp_path(); - let listener = Listener::new(&path, true).unwrap(); - listener.set_nonblocking(true).unwrap(); - let mut master = Endpoint::connect(&path).unwrap(); - let sock = listener.accept().unwrap().unwrap(); - let mut slave = Endpoint::from_stream(sock); - - let mut hdr1 = MsgHeader::new(2, RequestCode::GetBlob, 0, mem::size_of::() as u32); - hdr1.set_need_reply(true); - let features1 = 0x1u64; - master.send_message(&hdr1, &features1, None).unwrap(); - - let mut features2 = 0u64; - let slice = unsafe { - slice::from_raw_parts_mut( - (&mut features2 as *mut u64) as *mut u8, - mem::size_of::(), - ) - }; - let (hdr2, bytes, files) = slave.recv_body_into_buf(slice).unwrap(); - assert_eq!(hdr1, hdr2); - assert_eq!(bytes, 8); - assert_eq!(features1, features2); - assert!(files.is_none()); - - master.send_header(&hdr1, None).unwrap(); - let (hdr2, files) = slave.recv_header().unwrap(); - assert_eq!(hdr1, hdr2); - assert!(files.is_none()); - } -} +// Copyright (C) 2019 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Structs for Unix Domain Socket listener and endpoint. +//! +//! This file is copied from vhost/src/vhost-user/connection.rs, please keep it as is when possible. + +#![allow(dead_code)] + +use std::fs::File; +use std::io::Error as IOError; +use std::io::ErrorKind; +use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; +use std::os::unix::net::{UnixListener, UnixStream}; +use std::path::{Path, PathBuf}; +use std::{mem, slice}; + +use libc::{c_void, iovec}; +use vm_memory::ByteValued; + +use super::message::*; +use dbs_uhttp::{ScmSocket, SysError}; +use std::net::Shutdown; + +#[allow(clippy::enum_variant_names)] +#[derive(Debug)] +pub(crate) enum Error { + /// Invalid parameters. + InvalidParam, + /// Unsupported operations due to that the protocol feature hasn't been negotiated. + InvalidOperation, + /// Invalid message format, flag or content. + InvalidMessage, + /// Only part of a message have been sent or received successfully + PartialMessage, + /// Message is too large + OversizedMsg, + /// Fd array in question is too big or too small + IncorrectFds, + /// Can't connect to peer. + SocketConnect(std::io::Error), + /// Generic socket errors. + SocketError(std::io::Error), + /// The socket is broken or has been closed. + SocketBroken(std::io::Error), + /// Should retry the socket operation again. + SocketRetry(std::io::Error), + /// Failure from the slave side. + SlaveInternalError, + /// Failure from the master side. + MasterInternalError, + /// Virtio/protocol features mismatch. + FeatureMismatch, + /// Error from request handler + ReqHandlerError(IOError), + /// memfd file creation error + MemFdCreateError, + /// File truncate error + FileTrucateError, + /// memfd file seal errors + MemFdSealError, +} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Error::InvalidParam => write!(f, "invalid parameters"), + Error::InvalidOperation => write!(f, "invalid operation"), + Error::InvalidMessage => write!(f, "invalid message"), + Error::PartialMessage => write!(f, "partial message"), + Error::OversizedMsg => write!(f, "oversized message"), + Error::IncorrectFds => write!(f, "wrong number of attached fds"), + Error::SocketError(e) => write!(f, "socket error: {}", e), + Error::SocketConnect(e) => write!(f, "can't connect to peer: {}", e), + Error::SocketBroken(e) => write!(f, "socket is broken: {}", e), + Error::SocketRetry(e) => write!(f, "temporary socket error: {}", e), + Error::SlaveInternalError => write!(f, "slave internal error"), + Error::MasterInternalError => write!(f, "Master internal error"), + Error::FeatureMismatch => write!(f, "virtio/protocol features mismatch"), + Error::ReqHandlerError(e) => write!(f, "handler failed to handle request: {}", e), + Error::MemFdCreateError => { + write!(f, "handler failed to allocate memfd during get_inflight_fd") + } + Error::FileTrucateError => { + write!(f, "handler failed to trucate memfd during get_inflight_fd") + } + Error::MemFdSealError => write!( + f, + "handler failed to apply seals to memfd during get_inflight_fd" + ), + } + } +} + +impl std::error::Error for Error {} + +impl Error { + /// Determine whether to rebuild the underline communication channel. + pub fn should_reconnect(&self) -> bool { + match *self { + // Should reconnect because it may be caused by temporary network errors. + Error::PartialMessage => true, + // Should reconnect because the underline socket is broken. + Error::SocketBroken(_) => true, + // Slave internal error, hope it recovers on reconnect. + Error::SlaveInternalError => true, + // Master internal error, hope it recovers on reconnect. + Error::MasterInternalError => true, + // Should just retry the IO operation instead of rebuilding the underline connection. + Error::SocketRetry(_) => false, + Error::InvalidParam | Error::InvalidOperation => false, + Error::InvalidMessage | Error::IncorrectFds | Error::OversizedMsg => false, + Error::SocketError(_) | Error::SocketConnect(_) => false, + Error::FeatureMismatch => false, + Error::ReqHandlerError(_) => false, + Error::MemFdCreateError | Error::FileTrucateError | Error::MemFdSealError => false, + } + } +} + +impl std::convert::From for Error { + #[allow(unreachable_patterns)] // EWOULDBLOCK equals to EGAIN on linux + fn from(err: std::io::Error) -> Self { + Error::SocketError(err) + } +} + +impl std::convert::From for Error { + /// Convert raw socket errors into meaningful blob manager errors. + /// + /// The vmm_sys_util::errno::Error is a simple wrapper over the raw errno, which doesn't means + /// much to the connection manager. So convert it into meaningful errors to simplify + /// the connection manager logic. + /// + /// # Return: + /// * - Error::SocketRetry: temporary error caused by signals or short of resources. + /// * - Error::SocketBroken: the underline socket is broken. + /// * - Error::SocketError: other socket related errors. + #[allow(unreachable_patterns)] // EWOULDBLOCK equals to EGAIN on linux + fn from(err: SysError) -> Self { + match err.errno() { + // The socket is marked nonblocking and the requested operation would block. + libc::EAGAIN => Error::SocketRetry(IOError::from_raw_os_error(libc::EAGAIN)), + // The socket is marked nonblocking and the requested operation would block. + libc::EWOULDBLOCK => Error::SocketRetry(IOError::from_raw_os_error(libc::EWOULDBLOCK)), + // A signal occurred before any data was transmitted + libc::EINTR => Error::SocketRetry(IOError::from_raw_os_error(libc::EINTR)), + // The output queue for a network interface was full. This generally indicates + // that the interface has stopped sending, but may be caused by transient congestion. + libc::ENOBUFS => Error::SocketRetry(IOError::from_raw_os_error(libc::ENOBUFS)), + // No memory available. + libc::ENOMEM => Error::SocketRetry(IOError::from_raw_os_error(libc::ENOMEM)), + // Connection reset by peer. + libc::ECONNRESET => Error::SocketBroken(IOError::from_raw_os_error(libc::ECONNRESET)), + // The local end has been shut down on a connection oriented socket. In this case the + // process will also receive a SIGPIPE unless MSG_NOSIGNAL is set. + libc::EPIPE => Error::SocketBroken(IOError::from_raw_os_error(libc::EPIPE)), + // Write permission is denied on the destination socket file, or search permission is + // denied for one of the directories the path prefix. + libc::EACCES => Error::SocketConnect(IOError::from_raw_os_error(libc::EACCES)), + // Catch all other errors + e => Error::SocketError(IOError::from_raw_os_error(e)), + } + } +} + +pub(crate) type Result = std::result::Result; + +/// Unix domain socket listener for accepting incoming connections. +pub(crate) struct Listener { + fd: UnixListener, + path: Option, +} + +impl Listener { + /// Create a unix domain socket listener. + /// + /// # Return: + /// * - the new Listener object on success. + /// * - SocketError: failed to create listener socket. + pub fn new>(path: P, unlink: bool) -> Result { + if unlink { + let _ = std::fs::remove_file(&path); + } + let fd = UnixListener::bind(&path).map_err(Error::SocketError)?; + + Ok(Listener { + fd, + path: Some(path.as_ref().to_owned()), + }) + } + + /// Accept an incoming connection. + /// + /// # Return: + /// * - Some(UnixStream): new UnixStream object if new incoming connection is available. + /// * - None: no incoming connection available. + /// * - SocketError: errors from accept(). + pub fn accept(&self) -> Result> { + loop { + match self.fd.accept() { + Ok((socket, _addr)) => return Ok(Some(socket)), + Err(e) => { + match e.kind() { + // No incoming connection available. + ErrorKind::WouldBlock => return Ok(None), + // New connection closed by peer. + ErrorKind::ConnectionAborted => return Ok(None), + // Interrupted by signals, retry + ErrorKind::Interrupted => continue, + _ => return Err(Error::SocketError(e)), + } + } + } + } + } + + /// Change blocking status on the listener. + /// + /// # Return: + /// * - () on success. + /// * - SocketError: failure from set_nonblocking(). + pub fn set_nonblocking(&self, block: bool) -> Result<()> { + self.fd.set_nonblocking(block).map_err(Error::SocketError) + } +} + +impl AsRawFd for Listener { + fn as_raw_fd(&self) -> RawFd { + self.fd.as_raw_fd() + } +} + +impl FromRawFd for Listener { + unsafe fn from_raw_fd(fd: RawFd) -> Self { + Listener { + fd: UnixListener::from_raw_fd(fd), + path: None, + } + } +} + +impl Drop for Listener { + fn drop(&mut self) { + if let Some(path) = &self.path { + let _ = std::fs::remove_file(path); + } + } +} + +/// Unix domain socket endpoint. +pub(crate) struct Endpoint { + sock: UnixStream, +} + +impl Endpoint { + /// Create a new stream by connecting to server at `str`. + /// + /// # Return: + /// * - the new Endpoint object on success. + /// * - SocketConnect: failed to connect to peer. + pub fn connect>(path: P) -> Result { + let sock = UnixStream::connect(path).map_err(Error::SocketConnect)?; + Ok(Self::from_stream(sock)) + } + + /// Create an endpoint from a stream object. + pub fn from_stream(sock: UnixStream) -> Self { + Endpoint { sock } + } + + /// Close the underlying socket. + pub fn close(&self) { + let _ = self.sock.shutdown(Shutdown::Both); + } + + /// Sends bytes from scatter-gather vectors over the socket with optional attached file + /// descriptors. + /// + /// # Return: + /// * - number of bytes sent on success + /// * - SocketRetry: temporary error caused by signals or short of resources. + /// * - SocketBroken: the underline socket is broken. + /// * - SocketError: other socket related errors. + pub fn send_iovec(&mut self, iovs: &[&[u8]], fds: Option<&[RawFd]>) -> Result { + let rfds = match fds { + Some(rfds) => rfds, + _ => &[], + }; + self.sock.send_with_fds(iovs, rfds).map_err(Into::into) + } + + /// Sends all bytes from scatter-gather vectors over the socket with optional attached file + /// descriptors. Will loop until all data has been transfered. + /// + /// # Return: + /// * - number of bytes sent on success + /// * - SocketBroken: the underline socket is broken. + /// * - SocketError: other socket related errors. + pub fn send_iovec_all(&mut self, iovs: &[&[u8]], fds: Option<&[RawFd]>) -> Result { + let mut data_sent = 0; + let mut data_total = 0; + let iov_lens: Vec = iovs.iter().map(|iov| iov.len()).collect(); + for len in &iov_lens { + data_total += len; + } + + while (data_total - data_sent) > 0 { + let (nr_skip, offset) = get_sub_iovs_offset(&iov_lens, data_sent); + let iov = &iovs[nr_skip][offset..]; + + let data = &[&[iov], &iovs[(nr_skip + 1)..]].concat(); + let sfds = if data_sent == 0 { fds } else { None }; + + let sent = self.send_iovec(data, sfds); + match sent { + Ok(0) => return Ok(data_sent), + Ok(n) => data_sent += n, + Err(e) => match e { + Error::SocketRetry(_) => {} + _ => return Err(e), + }, + } + } + Ok(data_sent) + } + + /// Sends bytes from a slice over the socket with optional attached file descriptors. + /// + /// # Return: + /// * - number of bytes sent on success + /// * - SocketRetry: temporary error caused by signals or short of resources. + /// * - SocketBroken: the underline socket is broken. + /// * - SocketError: other socket related errors. + pub fn send_slice(&mut self, data: &[u8], fds: Option<&[RawFd]>) -> Result { + self.send_iovec(&[data], fds) + } + + /// Sends a header-only message with optional attached file descriptors. + /// + /// # Return: + /// * - number of bytes sent on success + /// * - SocketRetry: temporary error caused by signals or short of resources. + /// * - SocketBroken: the underline socket is broken. + /// * - SocketError: other socket related errors. + /// * - PartialMessage: received a partial message. + pub fn send_header(&mut self, hdr: &MsgHeader, fds: Option<&[RawFd]>) -> Result<()> { + // Safe because there can't be other mutable referance to hdr. + let iovs = unsafe { + [slice::from_raw_parts( + hdr as *const MsgHeader as *const u8, + mem::size_of::(), + )] + }; + let bytes = self.send_iovec_all(&iovs[..], fds)?; + if bytes != mem::size_of::() { + return Err(Error::PartialMessage); + } + Ok(()) + } + + /// Send a message with header and body. Optional file descriptors may be attached to + /// the message. + /// + /// # Return: + /// * - number of bytes sent on success + /// * - SocketRetry: temporary error caused by signals or short of resources. + /// * - SocketBroken: the underline socket is broken. + /// * - SocketError: other socket related errors. + /// * - PartialMessage: received a partial message. + pub fn send_message( + &mut self, + hdr: &MsgHeader, + body: &T, + fds: Option<&[RawFd]>, + ) -> Result<()> { + if mem::size_of::() > MAX_MSG_SIZE { + return Err(Error::OversizedMsg); + } + // Safe because there can't be other mutable referance to hdr and body. + let iovs = unsafe { + [ + slice::from_raw_parts( + hdr as *const MsgHeader as *const u8, + mem::size_of::(), + ), + slice::from_raw_parts(body as *const T as *const u8, mem::size_of::()), + ] + }; + let bytes = self.send_iovec_all(&iovs[..], fds)?; + if bytes != mem::size_of::() + mem::size_of::() { + return Err(Error::PartialMessage); + } + Ok(()) + } + + /// Send a message with header, body and payload. Optional file descriptors + /// may also be attached to the message. + /// + /// # Return: + /// * - number of bytes sent on success + /// * - SocketRetry: temporary error caused by signals or short of resources. + /// * - SocketBroken: the underline socket is broken. + /// * - SocketError: other socket related errors. + /// * - OversizedMsg: message size is too big. + /// * - PartialMessage: received a partial message. + /// * - IncorrectFds: wrong number of attached fds. + pub fn send_message_with_payload( + &mut self, + hdr: &MsgHeader, + body: &T, + payload: &[u8], + fds: Option<&[RawFd]>, + ) -> Result<()> { + let len = payload.len(); + if mem::size_of::() > MAX_MSG_SIZE { + return Err(Error::OversizedMsg); + } + if len > MAX_MSG_SIZE - mem::size_of::() { + return Err(Error::OversizedMsg); + } + if let Some(fd_arr) = fds { + if fd_arr.len() > MAX_ATTACHED_FD_ENTRIES { + return Err(Error::IncorrectFds); + } + } + + // Safe because there can't be other mutable reference to hdr, body and payload. + let iovs = unsafe { + [ + slice::from_raw_parts( + hdr as *const MsgHeader as *const u8, + mem::size_of::(), + ), + slice::from_raw_parts(body as *const T as *const u8, mem::size_of::()), + slice::from_raw_parts(payload.as_ptr() as *const u8, len), + ] + }; + let total = mem::size_of::() + mem::size_of::() + len; + let len = self.send_iovec_all(&iovs, fds)?; + if len != total { + return Err(Error::PartialMessage); + } + Ok(()) + } + + /// Reads bytes from the socket into the given scatter/gather vectors. + /// + /// # Return: + /// * - (number of bytes received, buf) on success + /// * - SocketRetry: temporary error caused by signals or short of resources. + /// * - SocketBroken: the underline socket is broken. + /// * - SocketError: other socket related errors. + pub fn recv_data(&mut self, len: usize) -> Result<(usize, Vec)> { + let mut rbuf = vec![0u8; len]; + let mut iovs = [iovec { + iov_base: rbuf.as_mut_ptr() as *mut c_void, + iov_len: len, + }]; + // Safe because we own rbuf and it's safe to fill a byte array with arbitrary data. + let (bytes, _) = unsafe { self.sock.recv_with_fds(&mut iovs, &mut [])? }; + Ok((bytes, rbuf)) + } + + /// Reads bytes from the socket into the given scatter/gather vectors with optional attached + /// file. + /// + /// The underlying communication channel is a Unix domain socket in STREAM mode. It's a little + /// tricky to pass file descriptors through such a communication channel. Let's assume that a + /// sender sending a message with some file descriptors attached. To successfully receive those + /// attached file descriptors, the receiver must obey following rules: + /// 1) file descriptors are attached to a message. + /// 2) message(packet) boundaries must be respected on the receive side. + /// In other words, recvmsg() operations must not cross the packet boundary, otherwise the + /// attached file descriptors will get lost. + /// Note that this function wraps received file descriptors as `File`. + /// + /// # Return: + /// * - (number of bytes received, [received files]) on success + /// * - SocketRetry: temporary error caused by signals or short of resources. + /// * - SocketBroken: the underline socket is broken. + /// * - SocketError: other socket related errors. + /// + /// # Safety + /// + /// It is the callers responsibility to ensure it is safe for arbitrary data to be + /// written to the iovec pointers. + pub unsafe fn recv_into_iovec( + &mut self, + iovs: &mut [iovec], + ) -> Result<(usize, Option>)> { + let mut fd_array = vec![0; MAX_ATTACHED_FD_ENTRIES]; + + let (bytes, fds) = self.sock.recv_with_fds(iovs, &mut fd_array)?; + + let files = match fds { + 0 => None, + n => { + let files = fd_array + .iter() + .take(n) + .map(|fd| { + // Safe because we have the ownership of `fd`. + File::from_raw_fd(*fd) + }) + .collect(); + Some(files) + } + }; + + Ok((bytes, files)) + } + + /// Reads all bytes from the socket into the given scatter/gather vectors with optional + /// attached files. Will loop until all data has been transferred. + /// + /// The underlying communication channel is a Unix domain socket in STREAM mode. It's a little + /// tricky to pass file descriptors through such a communication channel. Let's assume that a + /// sender sending a message with some file descriptors attached. To successfully receive those + /// attached file descriptors, the receiver must obey following rules: + /// 1) file descriptors are attached to a message. + /// 2) message(packet) boundaries must be respected on the receive side. + /// In other words, recvmsg() operations must not cross the packet boundary, otherwise the + /// attached file descriptors will get lost. + /// Note that this function wraps received file descriptors as `File`. + /// + /// # Return: + /// * - (number of bytes received, [received fds]) on success + /// * - SocketBroken: the underline socket is broken. + /// * - SocketError: other socket related errors. + /// + /// # Safety + /// + /// It is the callers responsibility to ensure it is safe for arbitrary data to be + /// written to the iovec pointers. + pub unsafe fn recv_into_iovec_all( + &mut self, + iovs: &mut [iovec], + ) -> Result<(usize, Option>)> { + let mut data_read = 0; + let mut data_total = 0; + let mut rfds = None; + let iov_lens: Vec = iovs.iter().map(|iov| iov.iov_len).collect(); + for len in &iov_lens { + data_total += len; + } + + while (data_total - data_read) > 0 { + let (nr_skip, offset) = get_sub_iovs_offset(&iov_lens, data_read); + let iov = &mut iovs[nr_skip]; + + let mut data = [ + &[iovec { + iov_base: (iov.iov_base as usize + offset) as *mut c_void, + iov_len: iov.iov_len - offset, + }], + &iovs[(nr_skip + 1)..], + ] + .concat(); + + let res = self.recv_into_iovec(&mut data); + match res { + Ok((0, _)) => return Ok((data_read, rfds)), + Ok((n, fds)) => { + if data_read == 0 { + rfds = fds; + } + data_read += n; + } + Err(e) => match e { + Error::SocketRetry(_) => {} + _ => return Err(e), + }, + } + } + Ok((data_read, rfds)) + } + + /// Reads bytes from the socket into a new buffer with optional attached + /// files. Received file descriptors are set close-on-exec and converted to `File`. + /// + /// # Return: + /// * - (number of bytes received, buf, [received files]) on success. + /// * - SocketRetry: temporary error caused by signals or short of resources. + /// * - SocketBroken: the underline socket is broken. + /// * - SocketError: other socket related errors. + pub fn recv_into_buf( + &mut self, + buf_size: usize, + ) -> Result<(usize, Vec, Option>)> { + let mut buf = vec![0u8; buf_size]; + let (bytes, files) = { + let mut iovs = [iovec { + iov_base: buf.as_mut_ptr() as *mut c_void, + iov_len: buf_size, + }]; + // Safe because we own buf and it's safe to fill a byte array with arbitrary data. + unsafe { self.recv_into_iovec(&mut iovs)? } + }; + Ok((bytes, buf, files)) + } + + /// Receive a header-only message with optional attached files. + /// Note, only the first MAX_ATTACHED_FD_ENTRIES file descriptors will be + /// accepted and all other file descriptor will be discard silently. + /// + /// # Return: + /// * - (message header, [received files]) on success. + /// * - SocketRetry: temporary error caused by signals or short of resources. + /// * - SocketBroken: the underline socket is broken. + /// * - SocketError: other socket related errors. + /// * - PartialMessage: received a partial message. + /// * - InvalidMessage: received a invalid message. + pub fn recv_header(&mut self) -> Result<(MsgHeader, Option>)> { + let mut hdr = MsgHeader::default(); + let mut iovs = [iovec { + iov_base: (&mut hdr as *mut MsgHeader) as *mut c_void, + iov_len: mem::size_of::(), + }]; + // Safe because we own hdr and it's ByteValued. + let (bytes, files) = unsafe { self.recv_into_iovec_all(&mut iovs[..])? }; + + if bytes != mem::size_of::() { + return Err(Error::PartialMessage); + } else if !hdr.is_valid() { + return Err(Error::InvalidMessage); + } + + Ok((hdr, files)) + } + + /// Receive a message with optional attached file descriptors. + /// Note, only the first MAX_ATTACHED_FD_ENTRIES file descriptors will be + /// accepted and all other file descriptor will be discard silently. + /// + /// # Return: + /// * - (message header, message body, [received files]) on success. + /// * - SocketRetry: temporary error caused by signals or short of resources. + /// * - SocketBroken: the underline socket is broken. + /// * - SocketError: other socket related errors. + /// * - PartialMessage: received a partial message. + /// * - InvalidMessage: received a invalid message. + pub fn recv_body( + &mut self, + ) -> Result<(MsgHeader, T, Option>)> { + let mut hdr = MsgHeader::default(); + let mut body: T = Default::default(); + let mut iovs = [ + iovec { + iov_base: (&mut hdr as *mut MsgHeader) as *mut c_void, + iov_len: mem::size_of::(), + }, + iovec { + iov_base: (&mut body as *mut T) as *mut c_void, + iov_len: mem::size_of::(), + }, + ]; + // Safe because we own hdr and body and they're ByteValued. + let (bytes, files) = unsafe { self.recv_into_iovec_all(&mut iovs[..])? }; + + let total = mem::size_of::() + mem::size_of::(); + if bytes != total { + return Err(Error::PartialMessage); + } else if !hdr.is_valid() || !body.is_valid() { + return Err(Error::InvalidMessage); + } + + Ok((hdr, body, files)) + } + + /// Receive a message with header and optional content. Callers need to + /// pre-allocate a big enough buffer to receive the message body and + /// optional payload. If there are attached file descriptor associated + /// with the message, the first MAX_ATTACHED_FD_ENTRIES file descriptors + /// will be accepted and all other file descriptor will be discard + /// silently. + /// + /// # Return: + /// * - (message header, message size, [received files]) on success. + /// * - SocketRetry: temporary error caused by signals or short of resources. + /// * - SocketBroken: the underline socket is broken. + /// * - SocketError: other socket related errors. + /// * - PartialMessage: received a partial message. + /// * - InvalidMessage: received a invalid message. + pub fn recv_body_into_buf( + &mut self, + buf: &mut [u8], + ) -> Result<(MsgHeader, usize, Option>)> { + let mut hdr = MsgHeader::default(); + let mut iovs = [ + iovec { + iov_base: (&mut hdr as *mut MsgHeader) as *mut c_void, + iov_len: mem::size_of::(), + }, + iovec { + iov_base: buf.as_mut_ptr() as *mut c_void, + iov_len: buf.len(), + }, + ]; + // Safe because we own hdr and have a mutable borrow of buf, and hdr is ByteValued + // and it's safe to fill a byte slice with arbitrary data. + let (bytes, files) = unsafe { self.recv_into_iovec_all(&mut iovs[..])? }; + + if bytes < mem::size_of::() { + return Err(Error::PartialMessage); + } else if !hdr.is_valid() { + return Err(Error::InvalidMessage); + } + + Ok((hdr, bytes - mem::size_of::(), files)) + } + + /// Receive a message with optional payload and attached file descriptors. + /// Note, only the first MAX_ATTACHED_FD_ENTRIES file descriptors will be + /// accepted and all other file descriptor will be discard silently. + /// + /// # Return: + /// * - (message header, message body, size of payload, [received files]) on success. + /// * - SocketRetry: temporary error caused by signals or short of resources. + /// * - SocketBroken: the underline socket is broken. + /// * - SocketError: other socket related errors. + /// * - PartialMessage: received a partial message. + /// * - InvalidMessage: received a invalid message. + #[cfg_attr(feature = "cargo-clippy", allow(clippy::type_complexity))] + pub fn recv_payload_into_buf( + &mut self, + buf: &mut [u8], + ) -> Result<(MsgHeader, T, usize, Option>)> { + let mut hdr = MsgHeader::default(); + let mut body: T = Default::default(); + let mut iovs = [ + iovec { + iov_base: (&mut hdr as *mut MsgHeader) as *mut c_void, + iov_len: mem::size_of::(), + }, + iovec { + iov_base: (&mut body as *mut T) as *mut c_void, + iov_len: mem::size_of::(), + }, + iovec { + iov_base: buf.as_mut_ptr() as *mut c_void, + iov_len: buf.len(), + }, + ]; + // Safe because we own hdr and body and have a mutable borrow of buf, and + // hdr and body are ByteValued, and it's safe to fill a byte slice with + // arbitrary data. + let (bytes, files) = unsafe { self.recv_into_iovec_all(&mut iovs[..])? }; + + let total = mem::size_of::() + mem::size_of::(); + if bytes < total { + return Err(Error::PartialMessage); + } else if !hdr.is_valid() || !body.is_valid() { + return Err(Error::InvalidMessage); + } + + Ok((hdr, body, bytes - total, files)) + } +} + +impl AsRawFd for Endpoint { + fn as_raw_fd(&self) -> RawFd { + self.sock.as_raw_fd() + } +} + +// Given a slice of sizes and the `skip_size`, return the offset of `skip_size` in the slice. +// For example: +// let iov_lens = vec![4, 4, 5]; +// let size = 6; +// assert_eq!(get_sub_iovs_offset(&iov_len, size), (1, 2)); +fn get_sub_iovs_offset(iov_lens: &[usize], skip_size: usize) -> (usize, usize) { + let mut size = skip_size; + let mut nr_skip = 0; + + for len in iov_lens { + if size >= *len { + size -= *len; + nr_skip += 1; + } else { + break; + } + } + (nr_skip, size) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::{Read, Seek, SeekFrom, Write}; + use vmm_sys_util::rand::rand_alphanumerics; + use vmm_sys_util::tempfile::TempFile; + + fn temp_path() -> PathBuf { + PathBuf::from(format!( + "/tmp/blob_test_{}", + rand_alphanumerics(8).to_str().unwrap() + )) + } + + #[test] + fn create_listener() { + let path = temp_path(); + let listener = Listener::new(&path, true).unwrap(); + + assert!(listener.as_raw_fd() > 0); + } + + #[test] + fn create_listener_from_raw_fd() { + let path = temp_path(); + let file = File::create(path).unwrap(); + let listener = unsafe { Listener::from_raw_fd(file.as_raw_fd()) }; + + assert!(listener.as_raw_fd() > 0); + } + + #[test] + fn accept_connection() { + let path = temp_path(); + let listener = Listener::new(&path, true).unwrap(); + listener.set_nonblocking(true).unwrap(); + + // accept on a fd without incoming connection + let conn = listener.accept().unwrap(); + assert!(conn.is_none()); + } + + #[test] + fn send_data() { + let path = temp_path(); + let listener = Listener::new(&path, true).unwrap(); + listener.set_nonblocking(true).unwrap(); + let mut master = Endpoint::connect(&path).unwrap(); + let sock = listener.accept().unwrap().unwrap(); + let mut slave = Endpoint::from_stream(sock); + + let buf1 = vec![0x1, 0x2, 0x3, 0x4]; + let mut len = master.send_slice(&buf1[..], None).unwrap(); + assert_eq!(len, 4); + let (bytes, buf2, _) = slave.recv_into_buf(0x1000).unwrap(); + assert_eq!(bytes, 4); + assert_eq!(&buf1[..], &buf2[..bytes]); + + len = master.send_slice(&buf1[..], None).unwrap(); + assert_eq!(len, 4); + let (bytes, buf2, _) = slave.recv_into_buf(0x2).unwrap(); + assert_eq!(bytes, 2); + assert_eq!(&buf1[..2], &buf2[..]); + let (bytes, buf2, _) = slave.recv_into_buf(0x2).unwrap(); + assert_eq!(bytes, 2); + assert_eq!(&buf1[2..], &buf2[..]); + } + + #[test] + fn send_fd() { + let path = temp_path(); + let listener = Listener::new(&path, true).unwrap(); + listener.set_nonblocking(true).unwrap(); + let mut master = Endpoint::connect(&path).unwrap(); + let sock = listener.accept().unwrap().unwrap(); + let mut slave = Endpoint::from_stream(sock); + + let mut fd = TempFile::new().unwrap().into_file(); + write!(fd, "test").unwrap(); + + // Normal case for sending/receiving file descriptors + let buf1 = vec![0x1, 0x2, 0x3, 0x4]; + let len = master + .send_slice(&buf1[..], Some(&[fd.as_raw_fd()])) + .unwrap(); + assert_eq!(len, 4); + + let (bytes, buf2, files) = slave.recv_into_buf(4).unwrap(); + assert_eq!(bytes, 4); + assert_eq!(&buf1[..], &buf2[..]); + assert!(files.is_some()); + let files = files.unwrap(); + { + assert_eq!(files.len(), 1); + let mut file = &files[0]; + let mut content = String::new(); + file.seek(SeekFrom::Start(0)).unwrap(); + file.read_to_string(&mut content).unwrap(); + assert_eq!(content, "test"); + } + + // Following communication pattern should work: + // Sending side: data(header, body) with fds + // Receiving side: data(header) with fds, data(body) + let len = master + .send_slice( + &buf1[..], + Some(&[fd.as_raw_fd(), fd.as_raw_fd(), fd.as_raw_fd()]), + ) + .unwrap(); + assert_eq!(len, 4); + + let (bytes, buf2, files) = slave.recv_into_buf(0x2).unwrap(); + assert_eq!(bytes, 2); + assert_eq!(&buf1[..2], &buf2[..]); + assert!(files.is_some()); + let files = files.unwrap(); + { + assert_eq!(files.len(), 3); + let mut file = &files[1]; + let mut content = String::new(); + file.seek(SeekFrom::Start(0)).unwrap(); + file.read_to_string(&mut content).unwrap(); + assert_eq!(content, "test"); + } + let (bytes, buf2, files) = slave.recv_into_buf(0x2).unwrap(); + assert_eq!(bytes, 2); + assert_eq!(&buf1[2..], &buf2[..]); + assert!(files.is_none()); + + // Following communication pattern should not work: + // Sending side: data(header, body) with fds + // Receiving side: data(header), data(body) with fds + let len = master + .send_slice( + &buf1[..], + Some(&[fd.as_raw_fd(), fd.as_raw_fd(), fd.as_raw_fd()]), + ) + .unwrap(); + assert_eq!(len, 4); + + let (bytes, buf4) = slave.recv_data(2).unwrap(); + assert_eq!(bytes, 2); + assert_eq!(&buf1[..2], &buf4[..]); + let (bytes, buf2, files) = slave.recv_into_buf(0x2).unwrap(); + assert_eq!(bytes, 2); + assert_eq!(&buf1[2..], &buf2[..]); + assert!(files.is_none()); + + // Following communication pattern should work: + // Sending side: data, data with fds + // Receiving side: data, data with fds + let len = master.send_slice(&buf1[..], None).unwrap(); + assert_eq!(len, 4); + let len = master + .send_slice( + &buf1[..], + Some(&[fd.as_raw_fd(), fd.as_raw_fd(), fd.as_raw_fd()]), + ) + .unwrap(); + assert_eq!(len, 4); + + let (bytes, buf2, files) = slave.recv_into_buf(0x4).unwrap(); + assert_eq!(bytes, 4); + assert_eq!(&buf1[..], &buf2[..]); + assert!(files.is_none()); + + let (bytes, buf2, files) = slave.recv_into_buf(0x2).unwrap(); + assert_eq!(bytes, 2); + assert_eq!(&buf1[..2], &buf2[..]); + assert!(files.is_some()); + let files = files.unwrap(); + { + assert_eq!(files.len(), 3); + let mut file = &files[1]; + let mut content = String::new(); + file.seek(SeekFrom::Start(0)).unwrap(); + file.read_to_string(&mut content).unwrap(); + assert_eq!(content, "test"); + } + let (bytes, buf2, files) = slave.recv_into_buf(0x2).unwrap(); + assert_eq!(bytes, 2); + assert_eq!(&buf1[2..], &buf2[..]); + assert!(files.is_none()); + + // Following communication pattern should not work: + // Sending side: data1, data2 with fds + // Receiving side: data + partial of data2, left of data2 with fds + let len = master.send_slice(&buf1[..], None).unwrap(); + assert_eq!(len, 4); + let len = master + .send_slice( + &buf1[..], + Some(&[fd.as_raw_fd(), fd.as_raw_fd(), fd.as_raw_fd()]), + ) + .unwrap(); + assert_eq!(len, 4); + + let (bytes, _buf) = slave.recv_data(5).unwrap(); + #[cfg(target_os = "linux")] + assert_eq!(bytes, 5); + + #[cfg(target_os = "macos")] + assert_eq!(bytes, 4); + + let (bytes, _buf, files) = slave.recv_into_buf(0x4).unwrap(); + #[cfg(target_os = "linux")] + assert_eq!(bytes, 3); + #[cfg(target_os = "linux")] + assert!(files.is_none()); + + #[cfg(target_os = "macos")] + assert_eq!(bytes, 4); + #[cfg(target_os = "macos")] + assert!(files.is_some()); + + // If the target fd array is too small, extra file descriptors will get lost. + let len = master + .send_slice( + &buf1[..], + Some(&[fd.as_raw_fd(), fd.as_raw_fd(), fd.as_raw_fd()]), + ) + .unwrap(); + assert_eq!(len, 4); + + let (bytes, _, files) = slave.recv_into_buf(0x4).unwrap(); + assert_eq!(bytes, 4); + assert!(files.is_some()); + } + + #[test] + fn send_recv() { + let path = temp_path(); + let listener = Listener::new(&path, true).unwrap(); + listener.set_nonblocking(true).unwrap(); + let mut master = Endpoint::connect(&path).unwrap(); + let sock = listener.accept().unwrap().unwrap(); + let mut slave = Endpoint::from_stream(sock); + + let mut hdr1 = MsgHeader::new(2, RequestCode::GetBlob, 0, mem::size_of::() as u32); + hdr1.set_need_reply(true); + let features1 = 0x1u64; + master.send_message(&hdr1, &features1, None).unwrap(); + + let mut features2 = 0u64; + let slice = unsafe { + slice::from_raw_parts_mut( + (&mut features2 as *mut u64) as *mut u8, + mem::size_of::(), + ) + }; + let (hdr2, bytes, files) = slave.recv_body_into_buf(slice).unwrap(); + assert_eq!(hdr1, hdr2); + assert_eq!(bytes, 8); + assert_eq!(features1, features2); + assert!(files.is_none()); + + master.send_header(&hdr1, None).unwrap(); + let (hdr2, files) = slave.recv_header().unwrap(); + assert_eq!(hdr1, hdr2); + assert!(files.is_none()); + } +} diff --git a/storage/src/remote/message.rs b/storage/src/remote/message.rs index 9a47a42f120..8b837a9fe9c 100644 --- a/storage/src/remote/message.rs +++ b/storage/src/remote/message.rs @@ -1,428 +1,428 @@ -// Copyright (C) 2019 Alibaba Cloud. All rights reserved. -// SPDX-License-Identifier: Apache-2.0 - -//! Define communication messages for the remote blob manager. - -#![allow(dead_code)] - -use std::fmt::Debug; - -use vm_memory::ByteValued; - -pub(crate) const MAX_MSG_SIZE: usize = 0x1000; -pub(crate) const MAX_ATTACHED_FD_ENTRIES: usize = 4; - -pub(crate) trait Req: - Clone + Copy + Debug + PartialEq + Eq + PartialOrd + Ord + Send + Sync + Into -{ - fn is_valid(&self) -> bool; -} - -/// Type of requests sending from clients to servers. -#[repr(u32)] -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] -pub enum RequestCode { - /// Null operation. - Noop = 0, - /// Get a reference to a blob from the blob manager. - GetBlob = 1, - /// Ask the blob manager to fetch a range of data. - FetchRange = 2, - /// Upper bound of valid commands. - MaxCommand = 3, -} - -impl From for u32 { - fn from(req: RequestCode) -> u32 { - req as u32 - } -} - -impl Req for RequestCode { - fn is_valid(&self) -> bool { - (*self >= RequestCode::Noop) && (*self < RequestCode::MaxCommand) - } -} - -/// Vhost message Validator. -pub trait MsgValidator { - /// Validate message syntax only. - /// It doesn't validate message semantics such as protocol version number and dependency - /// on feature flags etc. - fn is_valid(&self) -> bool { - true - } -} - -// Bit mask for common message flags. -bitflags! { - /// Common message flags for blob manager requests and replies. - pub struct HeaderFlag: u32 { - /// Bits[0..2] is message version number. - const VERSION = 0x1; - /// Mark message as reply. - const REPLY = 0x4; - /// Sender anticipates a reply message from the peer. - const NEED_REPLY = 0x8; - /// All valid bits. - const ALL_FLAGS = 0xc; - /// All reserved bits. - const RESERVED_BITS = !0xf; - } -} - -/// Common message header for blob manager. -#[repr(C, packed)] -#[derive(Copy)] -pub(crate) struct MsgHeader { - tag: u64, - request: u32, - flags: u32, - size: u32, -} - -impl Debug for MsgHeader { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("MsgHeader") - .field("tag", &{ self.tag }) - .field("request", &{ self.request }) - .field("flags", &{ self.flags }) - .field("size", &{ self.size }) - .finish() - } -} - -impl Clone for MsgHeader { - fn clone(&self) -> MsgHeader { - *self - } -} - -impl PartialEq for MsgHeader { - fn eq(&self, other: &Self) -> bool { - self.tag == other.tag - && self.request == other.request - && self.flags == other.flags - && self.size == other.size - } -} - -impl MsgHeader { - /// Create a new instance of `MsgHeader`. - pub fn new(tag: u64, request: RequestCode, flags: u32, size: u32) -> Self { - // Default to protocol version 1 - let fl = (flags & HeaderFlag::ALL_FLAGS.bits()) | 0x1; - MsgHeader { - tag, - request: request.into(), - flags: fl, - size, - } - } - - /// Get message tag. - pub fn get_tag(&self) -> u64 { - self.tag - } - - /// Set message tag. - pub fn set_tag(&mut self, tag: u64) { - self.tag = tag; - } - - /// Get message type. - pub fn get_code(&self) -> RequestCode { - // It's safe because R is marked as repr(u32). - unsafe { std::mem::transmute_copy::(&{ self.request }) } - } - - /// Set message type. - pub fn set_code(&mut self, request: RequestCode) { - self.request = request.into(); - } - - /// Get message version number. - pub fn get_version(&self) -> u32 { - self.flags & 0x3 - } - - /// Set message version number. - pub fn set_version(&mut self, ver: u32) { - self.flags &= !0x3; - self.flags |= ver & 0x3; - } - - /// Check whether it's a reply message. - pub fn is_reply(&self) -> bool { - (self.flags & HeaderFlag::REPLY.bits()) != 0 - } - - /// Mark message as reply. - pub fn set_reply(&mut self, is_reply: bool) { - if is_reply { - self.flags |= HeaderFlag::REPLY.bits(); - } else { - self.flags &= !HeaderFlag::REPLY.bits(); - } - } - - /// Check whether reply for this message is requested. - pub fn is_need_reply(&self) -> bool { - (self.flags & HeaderFlag::NEED_REPLY.bits()) != 0 - } - - /// Mark that reply for this message is needed. - pub fn set_need_reply(&mut self, need_reply: bool) { - if need_reply { - self.flags |= HeaderFlag::NEED_REPLY.bits(); - } else { - self.flags &= !HeaderFlag::NEED_REPLY.bits(); - } - } - - /// Check whether it's the reply message for the request `req`. - pub fn is_reply_for(&self, req: &MsgHeader) -> bool { - self.is_reply() - && !req.is_reply() - && self.get_code() == req.get_code() - && req.tag == self.tag - } - - /// Get message size. - pub fn get_size(&self) -> u32 { - self.size - } - - /// Set message size. - pub fn set_size(&mut self, size: u32) { - self.size = size; - } -} - -impl Default for MsgHeader { - fn default() -> Self { - MsgHeader { - tag: 0, - request: 0, - flags: 0x1, - size: 0, - } - } -} - -unsafe impl ByteValued for MsgHeader {} - -impl MsgValidator for MsgHeader { - #[allow(clippy::if_same_then_else)] - fn is_valid(&self) -> bool { - if !self.get_code().is_valid() { - return false; - } else if self.tag == 0 { - return false; - } else if self.size as usize > MAX_MSG_SIZE { - return false; - } else if self.get_version() != 0x1 { - return false; - } else if (self.flags & HeaderFlag::RESERVED_BITS.bits()) != 0 { - return false; - } - true - } -} - -#[repr(C, packed)] -#[derive(Clone, Copy)] -pub(crate) struct GetBlobRequest { - pub generation: u32, - pub id: [u8; 256], -} - -impl Default for GetBlobRequest { - fn default() -> Self { - Self { - generation: 0, - id: [0u8; 256], - } - } -} - -impl GetBlobRequest { - /// Create a new instance. - pub fn new(generation: u32, id: &str) -> Self { - debug_assert!(id.len() < 256); - let mut buf = [0x0u8; 256]; - - buf.copy_from_slice(id.as_bytes()); - - GetBlobRequest { - generation, - id: buf, - } - } -} - -unsafe impl ByteValued for GetBlobRequest {} - -impl MsgValidator for GetBlobRequest { - fn is_valid(&self) -> bool { - self.id.contains(&0u8) - } -} - -#[repr(C, packed)] -#[derive(Clone, Copy, Default)] -pub(crate) struct GetBlobReply { - pub token: u64, - pub base: u64, - pub result: u32, -} - -impl GetBlobReply { - pub fn new(token: u64, base: u64, result: u32) -> Self { - Self { - token, - base, - result, - } - } -} - -unsafe impl ByteValued for GetBlobReply {} - -impl MsgValidator for GetBlobReply { - fn is_valid(&self) -> bool { - self.token != 0 || self.result != 0 - } -} - -#[repr(C, packed)] -#[derive(Copy, Clone, Default)] -pub(crate) struct FetchRangeRequest { - pub token: u64, - pub start: u64, - pub count: u64, -} - -impl FetchRangeRequest { - /// Create a new instance. - pub fn new(token: u64, start: u64, count: u64) -> Self { - FetchRangeRequest { - token, - start, - count, - } - } -} - -unsafe impl ByteValued for FetchRangeRequest {} - -impl MsgValidator for FetchRangeRequest {} - -#[repr(u32)] -pub enum FetchRangeResult { - Success = 0, - Failure = 1, - GenerationMismatch = 2, -} - -#[repr(C, packed)] -#[derive(Copy, Clone, Default)] -pub(crate) struct FetchRangeReply { - pub token: u64, - pub count: u64, - pub result: u32, -} - -impl FetchRangeReply { - /// Create a new instance. - pub fn new(token: u64, count: u64, result: u32) -> Self { - FetchRangeReply { - token, - count, - result, - } - } -} - -unsafe impl ByteValued for FetchRangeReply {} - -impl MsgValidator for FetchRangeReply {} - -#[cfg(test)] -mod tests { - use super::*; - use std::mem; - - #[test] - fn check_master_request_code() { - let code = RequestCode::Noop; - assert!(code.is_valid()); - let code = RequestCode::MaxCommand; - assert!(!code.is_valid()); - assert!(code > RequestCode::Noop); - let code = RequestCode::GetBlob; - assert!(code.is_valid()); - let code = RequestCode::FetchRange; - assert!(code.is_valid()); - assert_eq!(code, code.clone()); - let code: RequestCode = unsafe { std::mem::transmute::(10000u32) }; - assert!(!code.is_valid()); - } - - #[test] - fn msg_header_ops() { - let mut hdr = MsgHeader::new(2, RequestCode::GetBlob, 0, 0x100); - assert_eq!(hdr.get_code(), RequestCode::GetBlob); - hdr.set_code(RequestCode::FetchRange); - assert_eq!(hdr.get_code(), RequestCode::FetchRange); - - assert_eq!(hdr.get_version(), 0x1); - - assert!(!hdr.is_reply()); - hdr.set_reply(true); - assert!(hdr.is_reply()); - hdr.set_reply(false); - - assert!(!hdr.is_need_reply()); - hdr.set_need_reply(true); - assert!(hdr.is_need_reply()); - hdr.set_need_reply(false); - - assert_eq!(hdr.get_size(), 0x100); - hdr.set_size(0x200); - assert_eq!(hdr.get_size(), 0x200); - - assert!(!hdr.is_need_reply()); - assert!(!hdr.is_reply()); - assert_eq!(hdr.get_version(), 0x1); - - // Check message length - assert!(hdr.is_valid()); - hdr.set_size(0x2000); - assert!(!hdr.is_valid()); - hdr.set_size(0x100); - assert_eq!(hdr.get_size(), 0x100); - assert!(hdr.is_valid()); - hdr.set_size((MAX_MSG_SIZE - mem::size_of::()) as u32); - assert!(hdr.is_valid()); - hdr.set_size(0x0); - assert!(hdr.is_valid()); - - // Check version - hdr.set_version(0x0); - assert!(!hdr.is_valid()); - hdr.set_version(0x2); - assert!(!hdr.is_valid()); - hdr.set_version(0x1); - assert!(hdr.is_valid()); - - assert_eq!(hdr.get_tag(), 2); - hdr.set_tag(200); - assert_eq!(hdr.get_tag(), 200); - - // Test Debug, Clone, PartiaEq trait - assert_eq!(hdr, hdr.clone()); - assert_eq!(hdr.clone().get_code(), hdr.get_code()); - assert_eq!(format!("{:?}", hdr.clone()), format!("{:?}", hdr)); - } -} +// Copyright (C) 2019 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Define communication messages for the remote blob manager. + +#![allow(dead_code)] + +use std::fmt::Debug; + +use vm_memory::ByteValued; + +pub(crate) const MAX_MSG_SIZE: usize = 0x1000; +pub(crate) const MAX_ATTACHED_FD_ENTRIES: usize = 4; + +pub(crate) trait Req: + Clone + Copy + Debug + PartialEq + Eq + PartialOrd + Ord + Send + Sync + Into +{ + fn is_valid(&self) -> bool; +} + +/// Type of requests sending from clients to servers. +#[repr(u32)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum RequestCode { + /// Null operation. + Noop = 0, + /// Get a reference to a blob from the blob manager. + GetBlob = 1, + /// Ask the blob manager to fetch a range of data. + FetchRange = 2, + /// Upper bound of valid commands. + MaxCommand = 3, +} + +impl From for u32 { + fn from(req: RequestCode) -> u32 { + req as u32 + } +} + +impl Req for RequestCode { + fn is_valid(&self) -> bool { + (*self >= RequestCode::Noop) && (*self < RequestCode::MaxCommand) + } +} + +/// Vhost message Validator. +pub trait MsgValidator { + /// Validate message syntax only. + /// It doesn't validate message semantics such as protocol version number and dependency + /// on feature flags etc. + fn is_valid(&self) -> bool { + true + } +} + +// Bit mask for common message flags. +bitflags! { + /// Common message flags for blob manager requests and replies. + pub struct HeaderFlag: u32 { + /// Bits[0..2] is message version number. + const VERSION = 0x1; + /// Mark message as reply. + const REPLY = 0x4; + /// Sender anticipates a reply message from the peer. + const NEED_REPLY = 0x8; + /// All valid bits. + const ALL_FLAGS = 0xc; + /// All reserved bits. + const RESERVED_BITS = !0xf; + } +} + +/// Common message header for blob manager. +#[repr(C, packed)] +#[derive(Copy)] +pub(crate) struct MsgHeader { + tag: u64, + request: u32, + flags: u32, + size: u32, +} + +impl Debug for MsgHeader { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("MsgHeader") + .field("tag", &{ self.tag }) + .field("request", &{ self.request }) + .field("flags", &{ self.flags }) + .field("size", &{ self.size }) + .finish() + } +} + +impl Clone for MsgHeader { + fn clone(&self) -> MsgHeader { + *self + } +} + +impl PartialEq for MsgHeader { + fn eq(&self, other: &Self) -> bool { + self.tag == other.tag + && self.request == other.request + && self.flags == other.flags + && self.size == other.size + } +} + +impl MsgHeader { + /// Create a new instance of `MsgHeader`. + pub fn new(tag: u64, request: RequestCode, flags: u32, size: u32) -> Self { + // Default to protocol version 1 + let fl = (flags & HeaderFlag::ALL_FLAGS.bits()) | 0x1; + MsgHeader { + tag, + request: request.into(), + flags: fl, + size, + } + } + + /// Get message tag. + pub fn get_tag(&self) -> u64 { + self.tag + } + + /// Set message tag. + pub fn set_tag(&mut self, tag: u64) { + self.tag = tag; + } + + /// Get message type. + pub fn get_code(&self) -> RequestCode { + // It's safe because R is marked as repr(u32). + unsafe { std::mem::transmute_copy::(&{ self.request }) } + } + + /// Set message type. + pub fn set_code(&mut self, request: RequestCode) { + self.request = request.into(); + } + + /// Get message version number. + pub fn get_version(&self) -> u32 { + self.flags & 0x3 + } + + /// Set message version number. + pub fn set_version(&mut self, ver: u32) { + self.flags &= !0x3; + self.flags |= ver & 0x3; + } + + /// Check whether it's a reply message. + pub fn is_reply(&self) -> bool { + (self.flags & HeaderFlag::REPLY.bits()) != 0 + } + + /// Mark message as reply. + pub fn set_reply(&mut self, is_reply: bool) { + if is_reply { + self.flags |= HeaderFlag::REPLY.bits(); + } else { + self.flags &= !HeaderFlag::REPLY.bits(); + } + } + + /// Check whether reply for this message is requested. + pub fn is_need_reply(&self) -> bool { + (self.flags & HeaderFlag::NEED_REPLY.bits()) != 0 + } + + /// Mark that reply for this message is needed. + pub fn set_need_reply(&mut self, need_reply: bool) { + if need_reply { + self.flags |= HeaderFlag::NEED_REPLY.bits(); + } else { + self.flags &= !HeaderFlag::NEED_REPLY.bits(); + } + } + + /// Check whether it's the reply message for the request `req`. + pub fn is_reply_for(&self, req: &MsgHeader) -> bool { + self.is_reply() + && !req.is_reply() + && self.get_code() == req.get_code() + && req.tag == self.tag + } + + /// Get message size. + pub fn get_size(&self) -> u32 { + self.size + } + + /// Set message size. + pub fn set_size(&mut self, size: u32) { + self.size = size; + } +} + +impl Default for MsgHeader { + fn default() -> Self { + MsgHeader { + tag: 0, + request: 0, + flags: 0x1, + size: 0, + } + } +} + +unsafe impl ByteValued for MsgHeader {} + +impl MsgValidator for MsgHeader { + #[allow(clippy::if_same_then_else)] + fn is_valid(&self) -> bool { + if !self.get_code().is_valid() { + return false; + } else if self.tag == 0 { + return false; + } else if self.size as usize > MAX_MSG_SIZE { + return false; + } else if self.get_version() != 0x1 { + return false; + } else if (self.flags & HeaderFlag::RESERVED_BITS.bits()) != 0 { + return false; + } + true + } +} + +#[repr(C, packed)] +#[derive(Clone, Copy)] +pub(crate) struct GetBlobRequest { + pub generation: u32, + pub id: [u8; 256], +} + +impl Default for GetBlobRequest { + fn default() -> Self { + Self { + generation: 0, + id: [0u8; 256], + } + } +} + +impl GetBlobRequest { + /// Create a new instance. + pub fn new(generation: u32, id: &str) -> Self { + debug_assert!(id.len() < 256); + let mut buf = [0x0u8; 256]; + + buf.copy_from_slice(id.as_bytes()); + + GetBlobRequest { + generation, + id: buf, + } + } +} + +unsafe impl ByteValued for GetBlobRequest {} + +impl MsgValidator for GetBlobRequest { + fn is_valid(&self) -> bool { + self.id.contains(&0u8) + } +} + +#[repr(C, packed)] +#[derive(Clone, Copy, Default)] +pub(crate) struct GetBlobReply { + pub token: u64, + pub base: u64, + pub result: u32, +} + +impl GetBlobReply { + pub fn new(token: u64, base: u64, result: u32) -> Self { + Self { + token, + base, + result, + } + } +} + +unsafe impl ByteValued for GetBlobReply {} + +impl MsgValidator for GetBlobReply { + fn is_valid(&self) -> bool { + self.token != 0 || self.result != 0 + } +} + +#[repr(C, packed)] +#[derive(Copy, Clone, Default)] +pub(crate) struct FetchRangeRequest { + pub token: u64, + pub start: u64, + pub count: u64, +} + +impl FetchRangeRequest { + /// Create a new instance. + pub fn new(token: u64, start: u64, count: u64) -> Self { + FetchRangeRequest { + token, + start, + count, + } + } +} + +unsafe impl ByteValued for FetchRangeRequest {} + +impl MsgValidator for FetchRangeRequest {} + +#[repr(u32)] +pub enum FetchRangeResult { + Success = 0, + Failure = 1, + GenerationMismatch = 2, +} + +#[repr(C, packed)] +#[derive(Copy, Clone, Default)] +pub(crate) struct FetchRangeReply { + pub token: u64, + pub count: u64, + pub result: u32, +} + +impl FetchRangeReply { + /// Create a new instance. + pub fn new(token: u64, count: u64, result: u32) -> Self { + FetchRangeReply { + token, + count, + result, + } + } +} + +unsafe impl ByteValued for FetchRangeReply {} + +impl MsgValidator for FetchRangeReply {} + +#[cfg(test)] +mod tests { + use super::*; + use std::mem; + + #[test] + fn check_master_request_code() { + let code = RequestCode::Noop; + assert!(code.is_valid()); + let code = RequestCode::MaxCommand; + assert!(!code.is_valid()); + assert!(code > RequestCode::Noop); + let code = RequestCode::GetBlob; + assert!(code.is_valid()); + let code = RequestCode::FetchRange; + assert!(code.is_valid()); + assert_eq!(code, code.clone()); + let code: RequestCode = unsafe { std::mem::transmute::(10000u32) }; + assert!(!code.is_valid()); + } + + #[test] + fn msg_header_ops() { + let mut hdr = MsgHeader::new(2, RequestCode::GetBlob, 0, 0x100); + assert_eq!(hdr.get_code(), RequestCode::GetBlob); + hdr.set_code(RequestCode::FetchRange); + assert_eq!(hdr.get_code(), RequestCode::FetchRange); + + assert_eq!(hdr.get_version(), 0x1); + + assert!(!hdr.is_reply()); + hdr.set_reply(true); + assert!(hdr.is_reply()); + hdr.set_reply(false); + + assert!(!hdr.is_need_reply()); + hdr.set_need_reply(true); + assert!(hdr.is_need_reply()); + hdr.set_need_reply(false); + + assert_eq!(hdr.get_size(), 0x100); + hdr.set_size(0x200); + assert_eq!(hdr.get_size(), 0x200); + + assert!(!hdr.is_need_reply()); + assert!(!hdr.is_reply()); + assert_eq!(hdr.get_version(), 0x1); + + // Check message length + assert!(hdr.is_valid()); + hdr.set_size(0x2000); + assert!(!hdr.is_valid()); + hdr.set_size(0x100); + assert_eq!(hdr.get_size(), 0x100); + assert!(hdr.is_valid()); + hdr.set_size((MAX_MSG_SIZE - mem::size_of::()) as u32); + assert!(hdr.is_valid()); + hdr.set_size(0x0); + assert!(hdr.is_valid()); + + // Check version + hdr.set_version(0x0); + assert!(!hdr.is_valid()); + hdr.set_version(0x2); + assert!(!hdr.is_valid()); + hdr.set_version(0x1); + assert!(hdr.is_valid()); + + assert_eq!(hdr.get_tag(), 2); + hdr.set_tag(200); + assert_eq!(hdr.get_tag(), 200); + + // Test Debug, Clone, PartiaEq trait + assert_eq!(hdr, hdr.clone()); + assert_eq!(hdr.clone().get_code(), hdr.get_code()); + assert_eq!(format!("{:?}", hdr.clone()), format!("{:?}", hdr)); + } +} diff --git a/storage/src/remote/mod.rs b/storage/src/remote/mod.rs index 6c2f6e0ffc7..ffa157a7961 100644 --- a/storage/src/remote/mod.rs +++ b/storage/src/remote/mod.rs @@ -1,10 +1,10 @@ -// Copyright (C) 2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -pub use self::client::RemoteBlobMgr; -pub use self::server::Server; -mod client; -mod connection; -mod message; -mod server; +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +pub use self::client::RemoteBlobMgr; +pub use self::server::Server; +mod client; +mod connection; +mod message; +mod server; diff --git a/storage/src/remote/server.rs b/storage/src/remote/server.rs index 8d1447f6376..898ee983276 100644 --- a/storage/src/remote/server.rs +++ b/storage/src/remote/server.rs @@ -1,418 +1,418 @@ -// Copyright (C) 2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::collections::HashMap; -use std::io::Result; -use std::mem; -use std::net::Shutdown; -use std::os::unix::io::{AsRawFd, RawFd}; -use std::os::unix::net::UnixStream; -use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, Ordering}; -use std::sync::{Arc, Mutex, MutexGuard}; - -use vm_memory::ByteValued; - -use crate::remote::client::RemoteBlobMgr; -use crate::remote::connection::{Endpoint, Listener}; -use crate::remote::message::{ - FetchRangeReply, FetchRangeRequest, GetBlobReply, GetBlobRequest, MsgHeader, MsgValidator, - RequestCode, -}; - -/// Remote blob manager client connection and state. -pub struct ClientConnection { - conn: Mutex, - exiting: AtomicBool, - id: u64, - state: ServerState, - token: AtomicU32, - uds: UnixStream, -} - -impl ClientConnection { - fn new(server: ServerState, id: u64, sock: UnixStream) -> Result { - let uds = sock.try_clone()?; - - if id > u32::MAX as u64 { - return Err(einval!("ran out of connection id")); - } - - Ok(Self { - conn: Mutex::new(Endpoint::from_stream(sock)), - exiting: AtomicBool::new(false), - id, - state: server, - token: AtomicU32::new(1), - uds, - }) - } - - fn shutdown(&self) { - if !self.exiting.swap(true, Ordering::AcqRel) { - let _ = self.uds.shutdown(Shutdown::Both); - } - } - - /// Close the connection. - pub fn close(&self) { - let id = self.id; - let entry = self.state.lock_clients().remove(&id); - - if let Some(conn) = entry { - conn.shutdown(); - } - } - - /// Get a unique identifier for the client connection. - pub fn id(&self) -> u32 { - self.id as u32 - } - - fn handle_message(&self) -> Result { - if self.exiting.load(Ordering::Acquire) { - return Ok(false); - } - - let mut guard = self.lock_conn(); - let (mut hdr, _files) = guard.recv_header().map_err(|e| eio!(format!("{}", e)))?; - match hdr.get_code() { - RequestCode::Noop => self.handle_noop(&mut hdr, guard)?, - RequestCode::GetBlob => self.handle_get_blob(&mut hdr, guard)?, - RequestCode::FetchRange => self.handle_fetch_range(&mut hdr, guard)?, - cmd => { - let msg = format!("unknown request command {}", u32::from(cmd)); - return Err(einval!(msg)); - } - } - - Ok(true) - } - - fn handle_noop(&self, hdr: &mut MsgHeader, mut guard: MutexGuard) -> Result<()> { - let size = hdr.get_size() as usize; - if !hdr.is_valid() || size != 0 { - return Err(eio!("invalid noop request message")); - } - - hdr.set_reply(true); - guard.send_header(hdr, None).map_err(|_e| eio!()) - } - - fn handle_get_blob(&self, hdr: &mut MsgHeader, mut guard: MutexGuard) -> Result<()> { - let size = hdr.get_size() as usize; - if !hdr.is_valid() || size != mem::size_of::() { - return Err(eio!("invalid get blob request message")); - } - - let (sz, data) = guard.recv_data(size).map_err(|e| eio!(format!("{}", e)))?; - if sz != size || data.len() != size { - return Err(einval!("invalid get blob request message")); - } - drop(guard); - - let mut msg = GetBlobRequest::default(); - msg.as_mut_slice().copy_from_slice(&data); - - // TODO - let token = self.token.fetch_add(1, Ordering::AcqRel) as u64; - let gen = (msg.generation as u64) << 32; - let reply = GetBlobReply::new(gen | token, 0, libc::ENOSYS as u32); - - let mut guard = self.lock_conn(); - hdr.set_reply(true); - guard.send_message(hdr, &reply, None).map_err(|_e| eio!()) - } - - fn handle_fetch_range( - &self, - hdr: &mut MsgHeader, - mut guard: MutexGuard, - ) -> Result<()> { - let size = hdr.get_size() as usize; - if !hdr.is_valid() || size != mem::size_of::() { - return Err(eio!("invalid fetch range request message")); - } - - let (sz, data) = guard.recv_data(size).map_err(|e| eio!(format!("{}", e)))?; - if sz != size || data.len() != size { - return Err(einval!("invalid fetch range request message")); - } - drop(guard); - - // TODO - let mut msg = FetchRangeRequest::default(); - msg.as_mut_slice().copy_from_slice(&data); - - let reply = FetchRangeReply::new(0, msg.count, 0); - - let mut guard = self.lock_conn(); - hdr.set_reply(true); - guard.send_message(hdr, &reply, None).map_err(|_e| eio!()) - } - - fn lock_conn(&self) -> MutexGuard { - // Do not expect poisoned lock. - self.conn.lock().unwrap() - } -} - -impl AsRawFd for ClientConnection { - fn as_raw_fd(&self) -> RawFd { - let guard = self.lock_conn(); - - guard.as_raw_fd() - } -} - -#[derive(Clone)] -struct ServerState { - active_workers: Arc, - clients: Arc>>>, -} - -impl ServerState { - fn new() -> Self { - Self { - active_workers: Arc::new(AtomicU64::new(0)), - clients: Arc::new(Mutex::new(HashMap::new())), - } - } - - fn add(&self, id: u64, client: Arc) { - self.lock_clients().insert(id, client); - } - - fn remove(&self, id: u64) { - self.lock_clients().remove(&id); - } - - fn lock_clients(&self) -> MutexGuard>> { - // Do not expect poisoned lock here. - self.clients.lock().unwrap() - } -} - -/// Blob server to accept connections from clients. -pub struct Server { - sock: String, - next_id: AtomicU64, - exiting: AtomicBool, - listener: Listener, - state: ServerState, -} - -impl Server { - /// Create a new instance of `Server` to accept connections from clients. - pub fn new(sock: &str) -> Result { - let listener = Listener::new(sock, true).map_err(|_e| eio!())?; - - Ok(Server { - sock: sock.to_owned(), - next_id: AtomicU64::new(1024), - exiting: AtomicBool::new(false), - listener, - state: ServerState::new(), - }) - } - - /// Start a worker thread to handle incoming connections from clients. - pub fn start(server: Arc) -> Result<()> { - server - .listener - .set_nonblocking(false) - .map_err(|_e| eio!())?; - - std::thread::spawn(move || { - server.state.active_workers.fetch_add(1, Ordering::Acquire); - - 'listen: loop { - if server.exiting.load(Ordering::Acquire) { - break 'listen; - } - - match server.listener.accept() { - Ok(Some(sock)) => { - let id = server.next_id.fetch_add(1, Ordering::AcqRel); - let client = match ClientConnection::new(server.state.clone(), id, sock) { - Ok(v) => v, - Err(e) => { - warn!("failed to duplicate unix domain socket, {}", e); - break 'listen; - } - }; - let client = Arc::new(client); - - client.state.add(id, client.clone()); - std::thread::spawn(move || { - client.state.active_workers.fetch_add(1, Ordering::AcqRel); - loop { - if let Err(e) = client.handle_message() { - warn!("failed to handle request, {}", e); - break; - } - } - client.state.active_workers.fetch_sub(1, Ordering::AcqRel); - client.state.remove(client.id); - client.shutdown(); - }); - } - Ok(None) => {} - Err(e) => { - error!("failed to accept connection, {}", e); - break 'listen; - } - } - } - - server.state.active_workers.fetch_sub(1, Ordering::AcqRel); - }); - - Ok(()) - } - - /// Shutdown the listener and all active client connections. - pub fn stop(&self) { - if !self.exiting.swap(true, Ordering::AcqRel) { - if self.state.active_workers.load(Ordering::Acquire) > 0 { - // Hacky way to wake up the listener threads from accept(). - let client = RemoteBlobMgr::new("".to_owned(), &self.sock).unwrap(); - let _ = client.connect(); - } - - let mut guard = self.state.lock_clients(); - for (_token, client) in guard.iter() { - client.shutdown(); - } - guard.clear(); - } - } - - /// Close the client connection with `id`. - pub fn close_connection(&self, id: u32) { - let id = id as u64; - let entry = self.state.lock_clients().remove(&id); - - if let Some(conn) = entry { - conn.shutdown(); - } - } - - pub fn handle_event(&self, id: u32) -> Result<()> { - let id64 = id as u64; - let conn = self.state.lock_clients().get(&id64).cloned(); - - if let Some(c) = conn { - match c.handle_message() { - Ok(true) => Ok(()), - Ok(false) => Err(eother!("client connection is shutting down")), - Err(e) => Err(e), - } - } else { - Err(enoent!("client connect doesn't exist")) - } - } - - /// Accept one incoming connection from client. - pub fn handle_incoming_connection(&self) -> Result>> { - if self.exiting.load(Ordering::Acquire) { - return Err(eio!("server shutdown")); - } - - match self.listener.accept() { - Err(e) => Err(eio!(format!("failed to accept incoming connection, {}", e))), - Ok(None) => Ok(None), - Ok(Some(sock)) => { - let id = self.next_id.fetch_add(1, Ordering::AcqRel); - if id <= u32::MAX as u64 { - let client = Arc::new(ClientConnection::new(self.state.clone(), id, sock)?); - client.state.add(id, client.clone()); - Ok(Some(client)) - } else { - // Running out of connection id, reject the incoming connection. - Ok(None) - } - } - } - } -} - -impl AsRawFd for Server { - fn as_raw_fd(&self) -> RawFd { - self.listener.as_raw_fd() - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::time::{Duration, Instant}; - use vmm_sys_util::tempdir::TempDir; - - #[test] - #[ignore] - fn test_new_server() { - let tmpdir = TempDir::new().unwrap(); - let sock = tmpdir.as_path().to_str().unwrap().to_owned() + "/test_sock1"; - let server = Arc::new(Server::new(&sock).unwrap()); - - assert_eq!(server.state.active_workers.load(Ordering::Relaxed), 0); - Server::start(server.clone()).unwrap(); - std::thread::sleep(Duration::from_secs(1)); - assert_eq!(server.state.active_workers.load(Ordering::Relaxed), 1); - - let client = RemoteBlobMgr::new("".to_owned(), &server.sock).unwrap(); - client.connect().unwrap(); - std::thread::sleep(Duration::from_secs(1)); - assert_eq!(server.state.active_workers.load(Ordering::Relaxed), 2); - client.shutdown(); - std::thread::sleep(Duration::from_secs(1)); - assert_eq!(server.state.active_workers.load(Ordering::Relaxed), 1); - assert_eq!(server.state.clients.lock().unwrap().len(), 0); - - let client = RemoteBlobMgr::new("".to_owned(), &server.sock).unwrap(); - client.connect().unwrap(); - std::thread::sleep(Duration::from_secs(1)); - assert_eq!(server.state.active_workers.load(Ordering::Relaxed), 2); - let client = Arc::new(client); - client.start().unwrap(); - client.ping().unwrap(); - - server.stop(); - std::thread::sleep(Duration::from_secs(1)); - assert_eq!(server.state.active_workers.load(Ordering::Relaxed), 0); - } - - #[test] - #[ignore] - fn test_reconnect() { - let tmpdir = TempDir::new().unwrap(); - let sock = tmpdir.as_path().to_str().unwrap().to_owned() + "/test_sock1"; - - let server = Arc::new(Server::new(&sock).unwrap()); - Server::start(server.clone()).unwrap(); - - let client = RemoteBlobMgr::new("".to_owned(), &server.sock).unwrap(); - client.connect().unwrap(); - std::thread::sleep(Duration::from_secs(4)); - client.start().unwrap(); - client.ping().unwrap(); - - server.stop(); - std::thread::sleep(Duration::from_secs(4)); - let starttime = Instant::now(); - /* give 10secs more to try */ - while starttime.elapsed() < Duration::from_secs(10) { - if server.state.active_workers.load(Ordering::Relaxed) == 0 { - break; - } - std::thread::sleep(Duration::from_secs(1)); - } - assert_eq!(server.state.active_workers.load(Ordering::Relaxed), 0); - drop(server); - - let server = Arc::new(Server::new(&sock).unwrap()); - Server::start(server).unwrap(); - client.ping().unwrap(); - } -} +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::collections::HashMap; +use std::io::Result; +use std::mem; +use std::net::Shutdown; +use std::os::unix::io::{AsRawFd, RawFd}; +use std::os::unix::net::UnixStream; +use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, Ordering}; +use std::sync::{Arc, Mutex, MutexGuard}; + +use vm_memory::ByteValued; + +use crate::remote::client::RemoteBlobMgr; +use crate::remote::connection::{Endpoint, Listener}; +use crate::remote::message::{ + FetchRangeReply, FetchRangeRequest, GetBlobReply, GetBlobRequest, MsgHeader, MsgValidator, + RequestCode, +}; + +/// Remote blob manager client connection and state. +pub struct ClientConnection { + conn: Mutex, + exiting: AtomicBool, + id: u64, + state: ServerState, + token: AtomicU32, + uds: UnixStream, +} + +impl ClientConnection { + fn new(server: ServerState, id: u64, sock: UnixStream) -> Result { + let uds = sock.try_clone()?; + + if id > u32::MAX as u64 { + return Err(einval!("ran out of connection id")); + } + + Ok(Self { + conn: Mutex::new(Endpoint::from_stream(sock)), + exiting: AtomicBool::new(false), + id, + state: server, + token: AtomicU32::new(1), + uds, + }) + } + + fn shutdown(&self) { + if !self.exiting.swap(true, Ordering::AcqRel) { + let _ = self.uds.shutdown(Shutdown::Both); + } + } + + /// Close the connection. + pub fn close(&self) { + let id = self.id; + let entry = self.state.lock_clients().remove(&id); + + if let Some(conn) = entry { + conn.shutdown(); + } + } + + /// Get a unique identifier for the client connection. + pub fn id(&self) -> u32 { + self.id as u32 + } + + fn handle_message(&self) -> Result { + if self.exiting.load(Ordering::Acquire) { + return Ok(false); + } + + let mut guard = self.lock_conn(); + let (mut hdr, _files) = guard.recv_header().map_err(|e| eio!(format!("{}", e)))?; + match hdr.get_code() { + RequestCode::Noop => self.handle_noop(&mut hdr, guard)?, + RequestCode::GetBlob => self.handle_get_blob(&mut hdr, guard)?, + RequestCode::FetchRange => self.handle_fetch_range(&mut hdr, guard)?, + cmd => { + let msg = format!("unknown request command {}", u32::from(cmd)); + return Err(einval!(msg)); + } + } + + Ok(true) + } + + fn handle_noop(&self, hdr: &mut MsgHeader, mut guard: MutexGuard) -> Result<()> { + let size = hdr.get_size() as usize; + if !hdr.is_valid() || size != 0 { + return Err(eio!("invalid noop request message")); + } + + hdr.set_reply(true); + guard.send_header(hdr, None).map_err(|_e| eio!()) + } + + fn handle_get_blob(&self, hdr: &mut MsgHeader, mut guard: MutexGuard) -> Result<()> { + let size = hdr.get_size() as usize; + if !hdr.is_valid() || size != mem::size_of::() { + return Err(eio!("invalid get blob request message")); + } + + let (sz, data) = guard.recv_data(size).map_err(|e| eio!(format!("{}", e)))?; + if sz != size || data.len() != size { + return Err(einval!("invalid get blob request message")); + } + drop(guard); + + let mut msg = GetBlobRequest::default(); + msg.as_mut_slice().copy_from_slice(&data); + + // TODO + let token = self.token.fetch_add(1, Ordering::AcqRel) as u64; + let gen = (msg.generation as u64) << 32; + let reply = GetBlobReply::new(gen | token, 0, libc::ENOSYS as u32); + + let mut guard = self.lock_conn(); + hdr.set_reply(true); + guard.send_message(hdr, &reply, None).map_err(|_e| eio!()) + } + + fn handle_fetch_range( + &self, + hdr: &mut MsgHeader, + mut guard: MutexGuard, + ) -> Result<()> { + let size = hdr.get_size() as usize; + if !hdr.is_valid() || size != mem::size_of::() { + return Err(eio!("invalid fetch range request message")); + } + + let (sz, data) = guard.recv_data(size).map_err(|e| eio!(format!("{}", e)))?; + if sz != size || data.len() != size { + return Err(einval!("invalid fetch range request message")); + } + drop(guard); + + // TODO + let mut msg = FetchRangeRequest::default(); + msg.as_mut_slice().copy_from_slice(&data); + + let reply = FetchRangeReply::new(0, msg.count, 0); + + let mut guard = self.lock_conn(); + hdr.set_reply(true); + guard.send_message(hdr, &reply, None).map_err(|_e| eio!()) + } + + fn lock_conn(&self) -> MutexGuard { + // Do not expect poisoned lock. + self.conn.lock().unwrap() + } +} + +impl AsRawFd for ClientConnection { + fn as_raw_fd(&self) -> RawFd { + let guard = self.lock_conn(); + + guard.as_raw_fd() + } +} + +#[derive(Clone)] +struct ServerState { + active_workers: Arc, + clients: Arc>>>, +} + +impl ServerState { + fn new() -> Self { + Self { + active_workers: Arc::new(AtomicU64::new(0)), + clients: Arc::new(Mutex::new(HashMap::new())), + } + } + + fn add(&self, id: u64, client: Arc) { + self.lock_clients().insert(id, client); + } + + fn remove(&self, id: u64) { + self.lock_clients().remove(&id); + } + + fn lock_clients(&self) -> MutexGuard>> { + // Do not expect poisoned lock here. + self.clients.lock().unwrap() + } +} + +/// Blob server to accept connections from clients. +pub struct Server { + sock: String, + next_id: AtomicU64, + exiting: AtomicBool, + listener: Listener, + state: ServerState, +} + +impl Server { + /// Create a new instance of `Server` to accept connections from clients. + pub fn new(sock: &str) -> Result { + let listener = Listener::new(sock, true).map_err(|_e| eio!())?; + + Ok(Server { + sock: sock.to_owned(), + next_id: AtomicU64::new(1024), + exiting: AtomicBool::new(false), + listener, + state: ServerState::new(), + }) + } + + /// Start a worker thread to handle incoming connections from clients. + pub fn start(server: Arc) -> Result<()> { + server + .listener + .set_nonblocking(false) + .map_err(|_e| eio!())?; + + std::thread::spawn(move || { + server.state.active_workers.fetch_add(1, Ordering::Acquire); + + 'listen: loop { + if server.exiting.load(Ordering::Acquire) { + break 'listen; + } + + match server.listener.accept() { + Ok(Some(sock)) => { + let id = server.next_id.fetch_add(1, Ordering::AcqRel); + let client = match ClientConnection::new(server.state.clone(), id, sock) { + Ok(v) => v, + Err(e) => { + warn!("failed to duplicate unix domain socket, {}", e); + break 'listen; + } + }; + let client = Arc::new(client); + + client.state.add(id, client.clone()); + std::thread::spawn(move || { + client.state.active_workers.fetch_add(1, Ordering::AcqRel); + loop { + if let Err(e) = client.handle_message() { + warn!("failed to handle request, {}", e); + break; + } + } + client.state.active_workers.fetch_sub(1, Ordering::AcqRel); + client.state.remove(client.id); + client.shutdown(); + }); + } + Ok(None) => {} + Err(e) => { + error!("failed to accept connection, {}", e); + break 'listen; + } + } + } + + server.state.active_workers.fetch_sub(1, Ordering::AcqRel); + }); + + Ok(()) + } + + /// Shutdown the listener and all active client connections. + pub fn stop(&self) { + if !self.exiting.swap(true, Ordering::AcqRel) { + if self.state.active_workers.load(Ordering::Acquire) > 0 { + // Hacky way to wake up the listener threads from accept(). + let client = RemoteBlobMgr::new("".to_owned(), &self.sock).unwrap(); + let _ = client.connect(); + } + + let mut guard = self.state.lock_clients(); + for (_token, client) in guard.iter() { + client.shutdown(); + } + guard.clear(); + } + } + + /// Close the client connection with `id`. + pub fn close_connection(&self, id: u32) { + let id = id as u64; + let entry = self.state.lock_clients().remove(&id); + + if let Some(conn) = entry { + conn.shutdown(); + } + } + + pub fn handle_event(&self, id: u32) -> Result<()> { + let id64 = id as u64; + let conn = self.state.lock_clients().get(&id64).cloned(); + + if let Some(c) = conn { + match c.handle_message() { + Ok(true) => Ok(()), + Ok(false) => Err(eother!("client connection is shutting down")), + Err(e) => Err(e), + } + } else { + Err(enoent!("client connect doesn't exist")) + } + } + + /// Accept one incoming connection from client. + pub fn handle_incoming_connection(&self) -> Result>> { + if self.exiting.load(Ordering::Acquire) { + return Err(eio!("server shutdown")); + } + + match self.listener.accept() { + Err(e) => Err(eio!(format!("failed to accept incoming connection, {}", e))), + Ok(None) => Ok(None), + Ok(Some(sock)) => { + let id = self.next_id.fetch_add(1, Ordering::AcqRel); + if id <= u32::MAX as u64 { + let client = Arc::new(ClientConnection::new(self.state.clone(), id, sock)?); + client.state.add(id, client.clone()); + Ok(Some(client)) + } else { + // Running out of connection id, reject the incoming connection. + Ok(None) + } + } + } + } +} + +impl AsRawFd for Server { + fn as_raw_fd(&self) -> RawFd { + self.listener.as_raw_fd() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::time::{Duration, Instant}; + use vmm_sys_util::tempdir::TempDir; + + #[test] + #[ignore] + fn test_new_server() { + let tmpdir = TempDir::new().unwrap(); + let sock = tmpdir.as_path().to_str().unwrap().to_owned() + "/test_sock1"; + let server = Arc::new(Server::new(&sock).unwrap()); + + assert_eq!(server.state.active_workers.load(Ordering::Relaxed), 0); + Server::start(server.clone()).unwrap(); + std::thread::sleep(Duration::from_secs(1)); + assert_eq!(server.state.active_workers.load(Ordering::Relaxed), 1); + + let client = RemoteBlobMgr::new("".to_owned(), &server.sock).unwrap(); + client.connect().unwrap(); + std::thread::sleep(Duration::from_secs(1)); + assert_eq!(server.state.active_workers.load(Ordering::Relaxed), 2); + client.shutdown(); + std::thread::sleep(Duration::from_secs(1)); + assert_eq!(server.state.active_workers.load(Ordering::Relaxed), 1); + assert_eq!(server.state.clients.lock().unwrap().len(), 0); + + let client = RemoteBlobMgr::new("".to_owned(), &server.sock).unwrap(); + client.connect().unwrap(); + std::thread::sleep(Duration::from_secs(1)); + assert_eq!(server.state.active_workers.load(Ordering::Relaxed), 2); + let client = Arc::new(client); + client.start().unwrap(); + client.ping().unwrap(); + + server.stop(); + std::thread::sleep(Duration::from_secs(1)); + assert_eq!(server.state.active_workers.load(Ordering::Relaxed), 0); + } + + #[test] + #[ignore] + fn test_reconnect() { + let tmpdir = TempDir::new().unwrap(); + let sock = tmpdir.as_path().to_str().unwrap().to_owned() + "/test_sock1"; + + let server = Arc::new(Server::new(&sock).unwrap()); + Server::start(server.clone()).unwrap(); + + let client = RemoteBlobMgr::new("".to_owned(), &server.sock).unwrap(); + client.connect().unwrap(); + std::thread::sleep(Duration::from_secs(4)); + client.start().unwrap(); + client.ping().unwrap(); + + server.stop(); + std::thread::sleep(Duration::from_secs(4)); + let starttime = Instant::now(); + /* give 10secs more to try */ + while starttime.elapsed() < Duration::from_secs(10) { + if server.state.active_workers.load(Ordering::Relaxed) == 0 { + break; + } + std::thread::sleep(Duration::from_secs(1)); + } + assert_eq!(server.state.active_workers.load(Ordering::Relaxed), 0); + drop(server); + + let server = Arc::new(Server::new(&sock).unwrap()); + Server::start(server).unwrap(); + client.ping().unwrap(); + } +} diff --git a/storage/src/test.rs b/storage/src/test.rs index 2c5d9f5e401..e1d2b626a68 100644 --- a/storage/src/test.rs +++ b/storage/src/test.rs @@ -1,119 +1,119 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::sync::Arc; - -use nydus_utils::digest::RafsDigest; -use nydus_utils::metrics::BackendMetrics; - -use super::impl_getter; -use crate::backend::{BackendResult, BlobBackend, BlobReader}; -use crate::device::v5::BlobV5ChunkInfo; -use crate::device::{BlobChunkFlags, BlobChunkInfo}; -use std::any::Any; - -pub(crate) struct MockBackend { - pub metrics: Arc, -} - -impl BlobReader for MockBackend { - fn blob_size(&self) -> BackendResult { - Ok(0) - } - - fn try_read(&self, buf: &mut [u8], _offset: u64) -> BackendResult { - let mut i = 0; - while i < buf.len() { - buf[i] = i as u8; - i += 1; - } - Ok(i) - } - - fn metrics(&self) -> &BackendMetrics { - // Safe because nydusd must have backend attached with id, only image builder can no id - // but use backend instance to upload blob. - &self.metrics - } -} - -impl BlobBackend for MockBackend { - fn shutdown(&self) {} - - fn metrics(&self) -> &BackendMetrics { - // Safe because nydusd must have backend attached with id, only image builder can no id - // but use backend instance to upload blob. - &self.metrics - } - - fn get_reader(&self, _blob_id: &str) -> BackendResult> { - Ok(Arc::new(MockBackend { - metrics: self.metrics.clone(), - })) - } -} - -#[derive(Default, Clone)] -pub(crate) struct MockChunkInfo { - pub block_id: RafsDigest, - pub blob_index: u32, - pub flags: BlobChunkFlags, - pub compress_size: u32, - pub uncompress_size: u32, - pub compress_offset: u64, - pub uncompress_offset: u64, - pub file_offset: u64, - pub index: u32, - #[allow(unused)] - pub reserved: u32, -} - -impl MockChunkInfo { - pub fn new() -> Self { - MockChunkInfo::default() - } -} - -impl BlobChunkInfo for MockChunkInfo { - fn chunk_id(&self) -> &RafsDigest { - &self.block_id - } - - fn id(&self) -> u32 { - self.index - } - - fn is_batch(&self) -> bool { - self.flags.contains(BlobChunkFlags::BATCH) - } - - fn is_compressed(&self) -> bool { - self.flags.contains(BlobChunkFlags::COMPRESSED) - } - - fn is_encrypted(&self) -> bool { - false - } - - fn as_any(&self) -> &dyn Any { - self - } - - impl_getter!(blob_index, blob_index, u32); - impl_getter!(compressed_offset, compress_offset, u64); - impl_getter!(compressed_size, compress_size, u32); - impl_getter!(uncompressed_offset, uncompress_offset, u64); - impl_getter!(uncompressed_size, uncompress_size, u32); -} - -impl BlobV5ChunkInfo for MockChunkInfo { - fn as_base(&self) -> &dyn BlobChunkInfo { - self - } - - impl_getter!(index, index, u32); - impl_getter!(file_offset, file_offset, u64); - impl_getter!(flags, flags, BlobChunkFlags); -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::sync::Arc; + +use nydus_utils::digest::RafsDigest; +use nydus_utils::metrics::BackendMetrics; + +use super::impl_getter; +use crate::backend::{BackendResult, BlobBackend, BlobReader}; +use crate::device::v5::BlobV5ChunkInfo; +use crate::device::{BlobChunkFlags, BlobChunkInfo}; +use std::any::Any; + +pub(crate) struct MockBackend { + pub metrics: Arc, +} + +impl BlobReader for MockBackend { + fn blob_size(&self) -> BackendResult { + Ok(0) + } + + fn try_read(&self, buf: &mut [u8], _offset: u64) -> BackendResult { + let mut i = 0; + while i < buf.len() { + buf[i] = i as u8; + i += 1; + } + Ok(i) + } + + fn metrics(&self) -> &BackendMetrics { + // Safe because nydusd must have backend attached with id, only image builder can no id + // but use backend instance to upload blob. + &self.metrics + } +} + +impl BlobBackend for MockBackend { + fn shutdown(&self) {} + + fn metrics(&self) -> &BackendMetrics { + // Safe because nydusd must have backend attached with id, only image builder can no id + // but use backend instance to upload blob. + &self.metrics + } + + fn get_reader(&self, _blob_id: &str) -> BackendResult> { + Ok(Arc::new(MockBackend { + metrics: self.metrics.clone(), + })) + } +} + +#[derive(Default, Clone)] +pub(crate) struct MockChunkInfo { + pub block_id: RafsDigest, + pub blob_index: u32, + pub flags: BlobChunkFlags, + pub compress_size: u32, + pub uncompress_size: u32, + pub compress_offset: u64, + pub uncompress_offset: u64, + pub file_offset: u64, + pub index: u32, + #[allow(unused)] + pub reserved: u32, +} + +impl MockChunkInfo { + pub fn new() -> Self { + MockChunkInfo::default() + } +} + +impl BlobChunkInfo for MockChunkInfo { + fn chunk_id(&self) -> &RafsDigest { + &self.block_id + } + + fn id(&self) -> u32 { + self.index + } + + fn is_batch(&self) -> bool { + self.flags.contains(BlobChunkFlags::BATCH) + } + + fn is_compressed(&self) -> bool { + self.flags.contains(BlobChunkFlags::COMPRESSED) + } + + fn is_encrypted(&self) -> bool { + false + } + + fn as_any(&self) -> &dyn Any { + self + } + + impl_getter!(blob_index, blob_index, u32); + impl_getter!(compressed_offset, compress_offset, u64); + impl_getter!(compressed_size, compress_size, u32); + impl_getter!(uncompressed_offset, uncompress_offset, u64); + impl_getter!(uncompressed_size, uncompress_size, u32); +} + +impl BlobV5ChunkInfo for MockChunkInfo { + fn as_base(&self) -> &dyn BlobChunkInfo { + self + } + + impl_getter!(index, index, u32); + impl_getter!(file_offset, file_offset, u64); + impl_getter!(flags, flags, BlobChunkFlags); +} diff --git a/storage/src/utils.rs b/storage/src/utils.rs index 726ad921cf4..db3fa2f0dfd 100644 --- a/storage/src/utils.rs +++ b/storage/src/utils.rs @@ -1,375 +1,375 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Utility helpers to support the storage subsystem. -use std::alloc::{alloc, Layout}; -use std::cmp::{self, min}; -use std::io::{ErrorKind, IoSliceMut, Result}; -use std::os::unix::io::RawFd; -use std::slice::from_raw_parts_mut; - -use fuse_backend_rs::abi::fuse_abi::off64_t; -use fuse_backend_rs::file_buf::FileVolatileSlice; -#[cfg(target_os = "macos")] -use libc::{fcntl, radvisory}; -use nix::sys::uio::preadv; -use nydus_utils::{ - digest::{self, RafsDigest}, - round_down_4k, -}; -use vm_memory::bytes::Bytes; - -use crate::{StorageError, StorageResult}; - -/// Just a simple wrapper for posix `preadv`. Provide a slice of `IoVec` as input. -pub fn readv(fd: RawFd, iovec: &mut [IoSliceMut], offset: u64) -> Result { - loop { - match preadv(fd, iovec, offset as off64_t).map_err(|_| last_error!()) { - Ok(ret) => return Ok(ret), - // Retry if the IO is interrupted by signal. - Err(err) if err.kind() != ErrorKind::Interrupted => return Err(err), - _ => continue, - } - } -} - -/// Copy from buffer slice to another buffer slice. -/// -/// `offset` is where to start copy in the first buffer of source slice. -/// Up to bytes of `length` is wanted in `src`. -/// `dst_index` and `dst_slice_offset` indicate from where to start write destination. -/// Return (Total copied bytes, (Final written destination index, Final written destination offset)) -pub fn copyv>( - src: &[S], - dst: &[FileVolatileSlice], - offset: usize, - length: usize, - mut dst_index: usize, - mut dst_offset: usize, -) -> StorageResult<(usize, (usize, usize))> { - // Validate input parameters first to protect following loop block. - if src.is_empty() || length == 0 { - return Ok((0, (dst_index, dst_offset))); - } else if offset > src[0].as_ref().len() - || dst_index >= dst.len() - || dst_offset > dst[dst_index].len() - { - return Err(StorageError::MemOverflow); - } - - let mut copied = 0; - let mut src_offset = offset; - 'next_source: for s in src { - let s = s.as_ref(); - let mut buffer_len = min(s.len() - src_offset, length - copied); - - loop { - if dst_index >= dst.len() { - return Err(StorageError::MemOverflow); - } - - let dst_slice = &dst[dst_index]; - let buffer = &s[src_offset..src_offset + buffer_len]; - let written = dst_slice - .write(buffer, dst_offset) - .map_err(StorageError::VolatileSlice)?; - - copied += written; - if dst_slice.len() - dst_offset == written { - dst_index += 1; - dst_offset = 0; - } else { - dst_offset += written; - } - - // Move to next source buffer if the current source buffer has been exhausted. - if written == buffer_len { - src_offset = 0; - continue 'next_source; - } else { - src_offset += written; - buffer_len -= written; - } - } - } - - Ok((copied, (dst_index, dst_offset))) -} - -/// An memory cursor to access an `FileVolatileSlice` array. -pub struct MemSliceCursor<'a> { - pub mem_slice: &'a [FileVolatileSlice<'a>], - pub index: usize, - pub offset: usize, -} - -impl<'a> MemSliceCursor<'a> { - /// Create a new `MemSliceCursor` object. - pub fn new<'b: 'a>(slice: &'b [FileVolatileSlice]) -> Self { - Self { - mem_slice: slice, - index: 0, - offset: 0, - } - } - - /// Move cursor forward by `size`. - pub fn move_cursor(&mut self, mut size: usize) { - while size > 0 && self.index < self.mem_slice.len() { - let slice = self.mem_slice[self.index]; - let this_left = slice.len() - self.offset; - - match this_left.cmp(&size) { - cmp::Ordering::Equal => { - self.index += 1; - self.offset = 0; - return; - } - cmp::Ordering::Greater => { - self.offset += size; - return; - } - cmp::Ordering::Less => { - self.index += 1; - self.offset = 0; - size -= this_left; - continue; - } - } - } - } - - /// Consume `size` bytes of memory content from the cursor. - pub fn consume(&mut self, mut size: usize) -> Vec { - let mut vectors: Vec = Vec::with_capacity(8); - - while size > 0 && self.index < self.mem_slice.len() { - let slice = self.mem_slice[self.index]; - let this_left = slice.len() - self.offset; - - match this_left.cmp(&size) { - cmp::Ordering::Greater => { - // Safe because self.offset is valid and we have checked `size`. - let p = unsafe { slice.as_ptr().add(self.offset) }; - let s = unsafe { from_raw_parts_mut(p, size) }; - vectors.push(IoSliceMut::new(s)); - self.offset += size; - break; - } - cmp::Ordering::Equal => { - // Safe because self.offset is valid and we have checked `size`. - let p = unsafe { slice.as_ptr().add(self.offset) }; - let s = unsafe { from_raw_parts_mut(p, size) }; - vectors.push(IoSliceMut::new(s)); - self.index += 1; - self.offset = 0; - break; - } - cmp::Ordering::Less => { - let p = unsafe { slice.as_ptr().add(self.offset) }; - let s = unsafe { from_raw_parts_mut(p, this_left) }; - vectors.push(IoSliceMut::new(s)); - self.index += 1; - self.offset = 0; - size -= this_left; - } - } - } - - vectors - } - - /// Get the inner `FileVolatileSlice` array. - pub fn inner_slice(&self) -> &[FileVolatileSlice] { - self.mem_slice - } -} - -/// A customized readahead function to ask kernel to fault in all pages from offset to end. -/// -/// Call libc::readahead on every 128KB range because otherwise readahead stops at kernel bdi -/// readahead size which is 128KB by default. -#[cfg(target_os = "linux")] -pub fn readahead(fd: libc::c_int, mut offset: u64, end: u64) { - offset = round_down_4k(offset); - while offset < end { - // Kernel default 128KB readahead size - let count = std::cmp::min(128 << 10, end - offset); - unsafe { libc::readahead(fd, offset as i64, count as usize) }; - offset += count; - } -} - -#[cfg(target_os = "macos")] -pub fn readahead(fd: libc::c_int, mut offset: u64, end: u64) { - offset = round_down_4k(offset); - while offset < end { - // Kernel default 128KB readahead size - let count = std::cmp::min(128 << 10, end - offset); - unsafe { - fcntl( - fd, - libc::F_RDADVISE, - radvisory { - ra_offset: offset as i64, - ra_count: count as i32, - }, - ); - } - offset += count; - } -} - -/// A customized buf allocator that avoids zeroing -pub fn alloc_buf(size: usize) -> Vec { - assert!(size < isize::MAX as usize); - let layout = Layout::from_size_align(size, 0x1000) - .unwrap() - .pad_to_align(); - let ptr = unsafe { alloc(layout) }; - unsafe { Vec::from_raw_parts(ptr, size, layout.size()) } -} - -/// Check hash of data matches provided one -pub fn check_digest(data: &[u8], digest: &RafsDigest, digester: digest::Algorithm) -> bool { - digest == &RafsDigest::from_buf(data, digester) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_copyv() { - let mut dst_buf1 = vec![0x0u8; 4]; - let mut dst_buf2 = vec![0x0u8; 4]; - let volatile_slice_1 = - unsafe { FileVolatileSlice::from_raw_ptr(dst_buf1.as_mut_ptr(), dst_buf1.len()) }; - let volatile_slice_2 = - unsafe { FileVolatileSlice::from_raw_ptr(dst_buf2.as_mut_ptr(), dst_buf2.len()) }; - let dst_bufs = [volatile_slice_1, volatile_slice_2]; - - let src_buf_1 = vec![1u8, 2u8, 3u8]; - let src_buf_2 = vec![4u8, 5u8, 6u8]; - let src_bufs = vec![src_buf_1.as_slice(), src_buf_2.as_slice()]; - - assert_eq!( - copyv(&[Vec::::new(); 0], &dst_bufs, 0, 1, 1, 1).unwrap(), - (0, (1, 1)) - ); - assert_eq!( - copyv(&src_bufs, &dst_bufs, 0, 0, 1, 1).unwrap(), - (0, (1, 1)) - ); - assert!(copyv(&src_bufs, &dst_bufs, 5, 1, 1, 1).is_err()); - assert!(copyv(&src_bufs, &dst_bufs, 0, 1, 2, 0).is_err()); - assert!(copyv(&src_bufs, &dst_bufs, 0, 1, 1, 3).is_err()); - - assert_eq!( - copyv(&src_bufs, &dst_bufs, 1, 5, 0, 0,).unwrap(), - (5, (1, 1)) - ); - assert_eq!(dst_buf1[0], 2); - assert_eq!(dst_buf1[1], 3); - assert_eq!(dst_buf1[2], 4); - assert_eq!(dst_buf1[3], 5); - assert_eq!(dst_buf2[0], 6); - - assert_eq!( - copyv(&src_bufs, &dst_bufs, 1, 3, 1, 0,).unwrap(), - (3, (1, 3)) - ); - assert_eq!(dst_buf2[0], 2); - assert_eq!(dst_buf2[1], 3); - assert_eq!(dst_buf2[2], 4); - - assert_eq!( - copyv(&src_bufs, &dst_bufs, 1, 3, 1, 1,).unwrap(), - (3, (2, 0)) - ); - assert_eq!(dst_buf2[1], 2); - assert_eq!(dst_buf2[2], 3); - assert_eq!(dst_buf2[3], 4); - - assert_eq!( - copyv(&src_bufs, &dst_bufs, 1, 6, 0, 3,).unwrap(), - (5, (2, 0)) - ); - assert_eq!(dst_buf1[3], 2); - assert_eq!(dst_buf2[0], 3); - assert_eq!(dst_buf2[1], 4); - assert_eq!(dst_buf2[2], 5); - assert_eq!(dst_buf2[3], 6); - } - - #[test] - fn test_mem_slice_cursor_move() { - let mut buf1 = vec![0x0u8; 2]; - let vs1 = unsafe { FileVolatileSlice::from_raw_ptr(buf1.as_mut_ptr(), buf1.len()) }; - let mut buf2 = vec![0x0u8; 2]; - let vs2 = unsafe { FileVolatileSlice::from_raw_ptr(buf2.as_mut_ptr(), buf2.len()) }; - let vs = [vs1, vs2]; - - let mut cursor = MemSliceCursor::new(&vs); - assert_eq!(cursor.index, 0); - assert_eq!(cursor.offset, 0); - - cursor.move_cursor(0); - assert_eq!(cursor.index, 0); - assert_eq!(cursor.offset, 0); - - cursor.move_cursor(1); - assert_eq!(cursor.index, 0); - assert_eq!(cursor.offset, 1); - - cursor.move_cursor(1); - assert_eq!(cursor.index, 1); - assert_eq!(cursor.offset, 0); - - cursor.move_cursor(1); - assert_eq!(cursor.index, 1); - assert_eq!(cursor.offset, 1); - - cursor.move_cursor(2); - assert_eq!(cursor.index, 2); - assert_eq!(cursor.offset, 0); - - cursor.move_cursor(1); - assert_eq!(cursor.index, 2); - assert_eq!(cursor.offset, 0); - } - - #[test] - fn test_mem_slice_cursor_consume() { - let mut buf1 = vec![0x0u8; 2]; - let vs1 = unsafe { FileVolatileSlice::from_raw_ptr(buf1.as_mut_ptr(), buf1.len()) }; - let mut buf2 = vec![0x0u8; 2]; - let vs2 = unsafe { FileVolatileSlice::from_raw_ptr(buf2.as_mut_ptr(), buf2.len()) }; - let vs = [vs1, vs2]; - - let mut cursor = MemSliceCursor::new(&vs); - assert_eq!(cursor.index, 0); - assert_eq!(cursor.offset, 0); - - assert_eq!(cursor.consume(0).len(), 0); - assert_eq!(cursor.index, 0); - assert_eq!(cursor.offset, 0); - - assert_eq!(cursor.consume(1).len(), 1); - assert_eq!(cursor.index, 0); - assert_eq!(cursor.offset, 1); - - assert_eq!(cursor.consume(2).len(), 2); - assert_eq!(cursor.index, 1); - assert_eq!(cursor.offset, 1); - - assert_eq!(cursor.consume(2).len(), 1); - assert_eq!(cursor.index, 2); - assert_eq!(cursor.offset, 0); - - assert_eq!(cursor.consume(2).len(), 0); - assert_eq!(cursor.index, 2); - assert_eq!(cursor.offset, 0); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Utility helpers to support the storage subsystem. +use std::alloc::{alloc, Layout}; +use std::cmp::{self, min}; +use std::io::{ErrorKind, IoSliceMut, Result}; +use std::os::unix::io::RawFd; +use std::slice::from_raw_parts_mut; + +use fuse_backend_rs::abi::fuse_abi::off64_t; +use fuse_backend_rs::file_buf::FileVolatileSlice; +#[cfg(target_os = "macos")] +use libc::{fcntl, radvisory}; +use nix::sys::uio::preadv; +use nydus_utils::{ + digest::{self, RafsDigest}, + round_down_4k, +}; +use vm_memory::bytes::Bytes; + +use crate::{StorageError, StorageResult}; + +/// Just a simple wrapper for posix `preadv`. Provide a slice of `IoVec` as input. +pub fn readv(fd: RawFd, iovec: &mut [IoSliceMut], offset: u64) -> Result { + loop { + match preadv(fd, iovec, offset as off64_t).map_err(|_| last_error!()) { + Ok(ret) => return Ok(ret), + // Retry if the IO is interrupted by signal. + Err(err) if err.kind() != ErrorKind::Interrupted => return Err(err), + _ => continue, + } + } +} + +/// Copy from buffer slice to another buffer slice. +/// +/// `offset` is where to start copy in the first buffer of source slice. +/// Up to bytes of `length` is wanted in `src`. +/// `dst_index` and `dst_slice_offset` indicate from where to start write destination. +/// Return (Total copied bytes, (Final written destination index, Final written destination offset)) +pub fn copyv>( + src: &[S], + dst: &[FileVolatileSlice], + offset: usize, + length: usize, + mut dst_index: usize, + mut dst_offset: usize, +) -> StorageResult<(usize, (usize, usize))> { + // Validate input parameters first to protect following loop block. + if src.is_empty() || length == 0 { + return Ok((0, (dst_index, dst_offset))); + } else if offset > src[0].as_ref().len() + || dst_index >= dst.len() + || dst_offset > dst[dst_index].len() + { + return Err(StorageError::MemOverflow); + } + + let mut copied = 0; + let mut src_offset = offset; + 'next_source: for s in src { + let s = s.as_ref(); + let mut buffer_len = min(s.len() - src_offset, length - copied); + + loop { + if dst_index >= dst.len() { + return Err(StorageError::MemOverflow); + } + + let dst_slice = &dst[dst_index]; + let buffer = &s[src_offset..src_offset + buffer_len]; + let written = dst_slice + .write(buffer, dst_offset) + .map_err(StorageError::VolatileSlice)?; + + copied += written; + if dst_slice.len() - dst_offset == written { + dst_index += 1; + dst_offset = 0; + } else { + dst_offset += written; + } + + // Move to next source buffer if the current source buffer has been exhausted. + if written == buffer_len { + src_offset = 0; + continue 'next_source; + } else { + src_offset += written; + buffer_len -= written; + } + } + } + + Ok((copied, (dst_index, dst_offset))) +} + +/// An memory cursor to access an `FileVolatileSlice` array. +pub struct MemSliceCursor<'a> { + pub mem_slice: &'a [FileVolatileSlice<'a>], + pub index: usize, + pub offset: usize, +} + +impl<'a> MemSliceCursor<'a> { + /// Create a new `MemSliceCursor` object. + pub fn new<'b: 'a>(slice: &'b [FileVolatileSlice]) -> Self { + Self { + mem_slice: slice, + index: 0, + offset: 0, + } + } + + /// Move cursor forward by `size`. + pub fn move_cursor(&mut self, mut size: usize) { + while size > 0 && self.index < self.mem_slice.len() { + let slice = self.mem_slice[self.index]; + let this_left = slice.len() - self.offset; + + match this_left.cmp(&size) { + cmp::Ordering::Equal => { + self.index += 1; + self.offset = 0; + return; + } + cmp::Ordering::Greater => { + self.offset += size; + return; + } + cmp::Ordering::Less => { + self.index += 1; + self.offset = 0; + size -= this_left; + continue; + } + } + } + } + + /// Consume `size` bytes of memory content from the cursor. + pub fn consume(&mut self, mut size: usize) -> Vec { + let mut vectors: Vec = Vec::with_capacity(8); + + while size > 0 && self.index < self.mem_slice.len() { + let slice = self.mem_slice[self.index]; + let this_left = slice.len() - self.offset; + + match this_left.cmp(&size) { + cmp::Ordering::Greater => { + // Safe because self.offset is valid and we have checked `size`. + let p = unsafe { slice.as_ptr().add(self.offset) }; + let s = unsafe { from_raw_parts_mut(p, size) }; + vectors.push(IoSliceMut::new(s)); + self.offset += size; + break; + } + cmp::Ordering::Equal => { + // Safe because self.offset is valid and we have checked `size`. + let p = unsafe { slice.as_ptr().add(self.offset) }; + let s = unsafe { from_raw_parts_mut(p, size) }; + vectors.push(IoSliceMut::new(s)); + self.index += 1; + self.offset = 0; + break; + } + cmp::Ordering::Less => { + let p = unsafe { slice.as_ptr().add(self.offset) }; + let s = unsafe { from_raw_parts_mut(p, this_left) }; + vectors.push(IoSliceMut::new(s)); + self.index += 1; + self.offset = 0; + size -= this_left; + } + } + } + + vectors + } + + /// Get the inner `FileVolatileSlice` array. + pub fn inner_slice(&self) -> &[FileVolatileSlice] { + self.mem_slice + } +} + +/// A customized readahead function to ask kernel to fault in all pages from offset to end. +/// +/// Call libc::readahead on every 128KB range because otherwise readahead stops at kernel bdi +/// readahead size which is 128KB by default. +#[cfg(target_os = "linux")] +pub fn readahead(fd: libc::c_int, mut offset: u64, end: u64) { + offset = round_down_4k(offset); + while offset < end { + // Kernel default 128KB readahead size + let count = std::cmp::min(128 << 10, end - offset); + unsafe { libc::readahead(fd, offset as i64, count as usize) }; + offset += count; + } +} + +#[cfg(target_os = "macos")] +pub fn readahead(fd: libc::c_int, mut offset: u64, end: u64) { + offset = round_down_4k(offset); + while offset < end { + // Kernel default 128KB readahead size + let count = std::cmp::min(128 << 10, end - offset); + unsafe { + fcntl( + fd, + libc::F_RDADVISE, + radvisory { + ra_offset: offset as i64, + ra_count: count as i32, + }, + ); + } + offset += count; + } +} + +/// A customized buf allocator that avoids zeroing +pub fn alloc_buf(size: usize) -> Vec { + assert!(size < isize::MAX as usize); + let layout = Layout::from_size_align(size, 0x1000) + .unwrap() + .pad_to_align(); + let ptr = unsafe { alloc(layout) }; + unsafe { Vec::from_raw_parts(ptr, size, layout.size()) } +} + +/// Check hash of data matches provided one +pub fn check_digest(data: &[u8], digest: &RafsDigest, digester: digest::Algorithm) -> bool { + digest == &RafsDigest::from_buf(data, digester) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_copyv() { + let mut dst_buf1 = vec![0x0u8; 4]; + let mut dst_buf2 = vec![0x0u8; 4]; + let volatile_slice_1 = + unsafe { FileVolatileSlice::from_raw_ptr(dst_buf1.as_mut_ptr(), dst_buf1.len()) }; + let volatile_slice_2 = + unsafe { FileVolatileSlice::from_raw_ptr(dst_buf2.as_mut_ptr(), dst_buf2.len()) }; + let dst_bufs = [volatile_slice_1, volatile_slice_2]; + + let src_buf_1 = vec![1u8, 2u8, 3u8]; + let src_buf_2 = vec![4u8, 5u8, 6u8]; + let src_bufs = vec![src_buf_1.as_slice(), src_buf_2.as_slice()]; + + assert_eq!( + copyv(&[Vec::::new(); 0], &dst_bufs, 0, 1, 1, 1).unwrap(), + (0, (1, 1)) + ); + assert_eq!( + copyv(&src_bufs, &dst_bufs, 0, 0, 1, 1).unwrap(), + (0, (1, 1)) + ); + assert!(copyv(&src_bufs, &dst_bufs, 5, 1, 1, 1).is_err()); + assert!(copyv(&src_bufs, &dst_bufs, 0, 1, 2, 0).is_err()); + assert!(copyv(&src_bufs, &dst_bufs, 0, 1, 1, 3).is_err()); + + assert_eq!( + copyv(&src_bufs, &dst_bufs, 1, 5, 0, 0,).unwrap(), + (5, (1, 1)) + ); + assert_eq!(dst_buf1[0], 2); + assert_eq!(dst_buf1[1], 3); + assert_eq!(dst_buf1[2], 4); + assert_eq!(dst_buf1[3], 5); + assert_eq!(dst_buf2[0], 6); + + assert_eq!( + copyv(&src_bufs, &dst_bufs, 1, 3, 1, 0,).unwrap(), + (3, (1, 3)) + ); + assert_eq!(dst_buf2[0], 2); + assert_eq!(dst_buf2[1], 3); + assert_eq!(dst_buf2[2], 4); + + assert_eq!( + copyv(&src_bufs, &dst_bufs, 1, 3, 1, 1,).unwrap(), + (3, (2, 0)) + ); + assert_eq!(dst_buf2[1], 2); + assert_eq!(dst_buf2[2], 3); + assert_eq!(dst_buf2[3], 4); + + assert_eq!( + copyv(&src_bufs, &dst_bufs, 1, 6, 0, 3,).unwrap(), + (5, (2, 0)) + ); + assert_eq!(dst_buf1[3], 2); + assert_eq!(dst_buf2[0], 3); + assert_eq!(dst_buf2[1], 4); + assert_eq!(dst_buf2[2], 5); + assert_eq!(dst_buf2[3], 6); + } + + #[test] + fn test_mem_slice_cursor_move() { + let mut buf1 = vec![0x0u8; 2]; + let vs1 = unsafe { FileVolatileSlice::from_raw_ptr(buf1.as_mut_ptr(), buf1.len()) }; + let mut buf2 = vec![0x0u8; 2]; + let vs2 = unsafe { FileVolatileSlice::from_raw_ptr(buf2.as_mut_ptr(), buf2.len()) }; + let vs = [vs1, vs2]; + + let mut cursor = MemSliceCursor::new(&vs); + assert_eq!(cursor.index, 0); + assert_eq!(cursor.offset, 0); + + cursor.move_cursor(0); + assert_eq!(cursor.index, 0); + assert_eq!(cursor.offset, 0); + + cursor.move_cursor(1); + assert_eq!(cursor.index, 0); + assert_eq!(cursor.offset, 1); + + cursor.move_cursor(1); + assert_eq!(cursor.index, 1); + assert_eq!(cursor.offset, 0); + + cursor.move_cursor(1); + assert_eq!(cursor.index, 1); + assert_eq!(cursor.offset, 1); + + cursor.move_cursor(2); + assert_eq!(cursor.index, 2); + assert_eq!(cursor.offset, 0); + + cursor.move_cursor(1); + assert_eq!(cursor.index, 2); + assert_eq!(cursor.offset, 0); + } + + #[test] + fn test_mem_slice_cursor_consume() { + let mut buf1 = vec![0x0u8; 2]; + let vs1 = unsafe { FileVolatileSlice::from_raw_ptr(buf1.as_mut_ptr(), buf1.len()) }; + let mut buf2 = vec![0x0u8; 2]; + let vs2 = unsafe { FileVolatileSlice::from_raw_ptr(buf2.as_mut_ptr(), buf2.len()) }; + let vs = [vs1, vs2]; + + let mut cursor = MemSliceCursor::new(&vs); + assert_eq!(cursor.index, 0); + assert_eq!(cursor.offset, 0); + + assert_eq!(cursor.consume(0).len(), 0); + assert_eq!(cursor.index, 0); + assert_eq!(cursor.offset, 0); + + assert_eq!(cursor.consume(1).len(), 1); + assert_eq!(cursor.index, 0); + assert_eq!(cursor.offset, 1); + + assert_eq!(cursor.consume(2).len(), 2); + assert_eq!(cursor.index, 1); + assert_eq!(cursor.offset, 1); + + assert_eq!(cursor.consume(2).len(), 1); + assert_eq!(cursor.index, 2); + assert_eq!(cursor.offset, 0); + + assert_eq!(cursor.consume(2).len(), 0); + assert_eq!(cursor.index, 2); + assert_eq!(cursor.offset, 0); + } +} diff --git a/tests/bats/Makefile b/tests/bats/Makefile index c43bb98667e..e775b4b0c3b 100644 --- a/tests/bats/Makefile +++ b/tests/bats/Makefile @@ -1,15 +1,15 @@ -ifneq (,$(wildcard /usr/lib/os-release)) -include /usr/lib/os-release -else -include /etc/os-release -endif - -ci: - bash -f ./install_bats.sh - bats --formatter tap build_docker_image.bats - bats --formatter tap compile_nydusd.bats - bats --formatter tap compile_ctr_remote.bats - bats --formatter tap compile_nydus_snapshotter.bats - bats --formatter tap run_container_with_rafs.bats - bats --formatter tap run_container_with_zran.bats - bats --formatter tap run_container_with_rafs_and_compile_linux.bats +ifneq (,$(wildcard /usr/lib/os-release)) +include /usr/lib/os-release +else +include /etc/os-release +endif + +ci: + bash -f ./install_bats.sh + bats --formatter tap build_docker_image.bats + bats --formatter tap compile_nydusd.bats + bats --formatter tap compile_ctr_remote.bats + bats --formatter tap compile_nydus_snapshotter.bats + bats --formatter tap run_container_with_rafs.bats + bats --formatter tap run_container_with_zran.bats + bats --formatter tap run_container_with_rafs_and_compile_linux.bats diff --git a/tests/bats/build_docker_image.bats b/tests/bats/build_docker_image.bats index 1e0782cc58d..f106c0b3a23 100644 --- a/tests/bats/build_docker_image.bats +++ b/tests/bats/build_docker_image.bats @@ -1,18 +1,18 @@ -#!/usr/bin/bats - -load "${BATS_TEST_DIRNAME}/common_tests.sh" - -setup() { - dockerfile="/tmp/rust_golang_dockerfile" - generate_rust_golang_dockerfile $dockerfile -} - -@test "build rust golang image" { - yum install -y docker - docker build -f $dockerfile -t $compile_image . -} - -teardown() { - rm -f $dockerfile -} - +#!/usr/bin/bats + +load "${BATS_TEST_DIRNAME}/common_tests.sh" + +setup() { + dockerfile="/tmp/rust_golang_dockerfile" + generate_rust_golang_dockerfile $dockerfile +} + +@test "build rust golang image" { + yum install -y docker + docker build -f $dockerfile -t $compile_image . +} + +teardown() { + rm -f $dockerfile +} + diff --git a/tests/bats/common_tests.sh b/tests/bats/common_tests.sh index 0f67c03e7ac..03521347ea8 100644 --- a/tests/bats/common_tests.sh +++ b/tests/bats/common_tests.sh @@ -1,105 +1,105 @@ -parse_toml() { - local input=$1 - local key=$2 - - # Using sed to extract the value of a specified key from Toml content - local value - value=$(echo "$input" | sed -n 's/.*'"$key"' = "\(.*\)"/\1/p') - # Remove quote - # shellcheck disable=SC2001 - value=$(echo "$value" | sed 's/"//g') - - echo "$value" -} - -get_rust_toolcahin() { - local base_dir=$1 - local toml_file="${base_dir}/rust-toolchain.toml" - local legacy_toml_file="${base_dir}/rust-toolchain" - local version - - if [ -f "$toml_file" ]; then - local toml_content - toml_content=$(cat "$toml_file") - version=$(parse_toml "$toml_content" 'channel') - else - version=$(cat "$legacy_toml_file") - fi - - echo "$version" -} - -repo_base_dir="${BATS_TEST_DIRNAME}/../.." -rust_toolchain=$(get_rust_toolcahin "$repo_base_dir") -compile_image="localhost/compile-image:${rust_toolchain}" -nydus_snapshotter_repo="https://github.com/containerd/nydus-snapshotter.git" - -generate_rust_golang_dockerfile() { - local dockerfile=${1:-"/tmp/rust_golang_dockerfile"} - local rust_version=${2:-"${rust_toolchain}"} - cat > $dockerfile </tmp/nydus-erofs-config.json < $nydus_snapshotter_logfile 2>&1 & -} - -config_containerd_for_nydus() { - [ -d "/etc/containerd" ] || mkdir -p /etc/containerd - cat >/etc/containerd/config.toml < $dockerfile </tmp/nydus-erofs-config.json < $nydus_snapshotter_logfile 2>&1 & +} + +config_containerd_for_nydus() { + [ -d "/etc/containerd" ] || mkdir -p /etc/containerd + cat >/etc/containerd/config.toml < ${BATS_TEST_DIRNAME}/dmesg-${BATS_TEST_NAME}.log - ctr images ls | grep -q "${nydus_rafs_image}" && ctr images rm $nydus_rafs_image - if ps -ef | grep containerd-nydus-grpc | grep -v grep; then - ps -ef | grep containerd-nydus-grpc | grep -v grep | awk '{print $2}' | xargs kill -9 - fi - if ps -ef | grep nydusd | grep fscache; then - ps -ef | grep nydusd | grep fscache | awk '{print $2}' | xargs kill -9 - fi - if mount | grep 'erofs on'; then - mount | grep 'erofs on' | awk '{print $3}' | xargs umount - fi -} +#!/usr/bin/bats + +load "${BATS_TEST_DIRNAME}/common_tests.sh" + +setup() { + nydus_rafs_image="docker.io/hsiangkao/ubuntu:20.04-rafs-v6" + run_nydus_snapshotter + config_containerd_for_nydus + ctr images ls | grep -q "${nydus_rafs_image}" && ctr images rm $nydus_rafs_image + ctr-remote images rpull $nydus_rafs_image +} + +@test "run container with rafs" { + ctr run --rm --snapshotter=nydus $nydus_rafs_image test_container tar cvf /tmp/foo.tar --exclude=/sys --exclude=/proc --exclude=/dev / +} + +teardown() { + dmesg -T | tail -300 > ${BATS_TEST_DIRNAME}/dmesg-${BATS_TEST_NAME}.log + ctr images ls | grep -q "${nydus_rafs_image}" && ctr images rm $nydus_rafs_image + if ps -ef | grep containerd-nydus-grpc | grep -v grep; then + ps -ef | grep containerd-nydus-grpc | grep -v grep | awk '{print $2}' | xargs kill -9 + fi + if ps -ef | grep nydusd | grep fscache; then + ps -ef | grep nydusd | grep fscache | awk '{print $2}' | xargs kill -9 + fi + if mount | grep 'erofs on'; then + mount | grep 'erofs on' | awk '{print $3}' | xargs umount + fi +} diff --git a/tests/bats/run_container_with_rafs_and_compile_linux.bats b/tests/bats/run_container_with_rafs_and_compile_linux.bats index 6a43434595a..98c568fc744 100644 --- a/tests/bats/run_container_with_rafs_and_compile_linux.bats +++ b/tests/bats/run_container_with_rafs_and_compile_linux.bats @@ -1,37 +1,37 @@ -load "${BATS_TEST_DIRNAME}/common_tests.sh" - -setup() { - nydus_rafs_image="docker.io/openanolis/bldlinux:v0.1-rafs-v6-lz4" - nydus_rafs_image_bak="ghcr.io/dragonflyoss/image-service/bldlinux:v0.1-rafs-v6-lz4" - run_nydus_snapshotter - config_containerd_for_nydus - ctr images ls | grep -q "${nydus_rafs_image}" && ctr images rm $nydus_rafs_image - nerdctl pull --snapshotter=nydus $nydus_rafs_image - ctr images ls | grep -q "${nydus_rafs_image_bak}" && ctr images rm $nydus_rafs_image_bak - nerdctl pull --snapshotter=nydus $nydus_rafs_image_bak -} - -@test "run container with rafs and compile linux" { - nerdctl run --rm --net=host --snapshotter=nydus $nydus_rafs_image /bin/bash -c 'cd /linux-5.10.87; make defconfig; make -j8' - if [ $? -ne 0 ]; then - nydus_rafs_image=${nydus_rafs_image_bak} - nerdctl run --rm --net=host --snapshotter=nydus $nydus_rafs_image /bin/bash -c 'cd /linux-5.10.87; make defconfig; make -j8' - fi - echo "drop cache and compile linux in container again" - echo 3 > /proc/sys/vm/drop_caches - nerdctl run --rm --net=host --snapshotter=nydus $nydus_rafs_image /bin/bash -c 'cd /linux-5.10.87; make defconfig; make -j8' -} - -teardown() { - dmesg -T | tail -300 > ${BATS_TEST_DIRNAME}/dmesg-${BATS_TEST_NAME}.log - ctr images ls | grep -q "${nydus_rafs_image}" && ctr images rm $nydus_rafs_image - if ps -ef | grep containerd-nydus-grpc | grep -v grep; then - ps -ef | grep containerd-nydus-grpc | grep -v grep | awk '{print $2}' | xargs kill -9 - fi - if ps -ef | grep nydusd | grep fscache; then - ps -ef | grep nydusd | grep fscache | awk '{print $2}' | xargs kill -9 - fi - if mount | grep 'erofs on'; then - mount | grep 'erofs on' | awk '{print $3}' | xargs umount - fi -} +load "${BATS_TEST_DIRNAME}/common_tests.sh" + +setup() { + nydus_rafs_image="docker.io/openanolis/bldlinux:v0.1-rafs-v6-lz4" + nydus_rafs_image_bak="ghcr.io/dragonflyoss/image-service/bldlinux:v0.1-rafs-v6-lz4" + run_nydus_snapshotter + config_containerd_for_nydus + ctr images ls | grep -q "${nydus_rafs_image}" && ctr images rm $nydus_rafs_image + nerdctl pull --snapshotter=nydus $nydus_rafs_image + ctr images ls | grep -q "${nydus_rafs_image_bak}" && ctr images rm $nydus_rafs_image_bak + nerdctl pull --snapshotter=nydus $nydus_rafs_image_bak +} + +@test "run container with rafs and compile linux" { + nerdctl run --rm --net=host --snapshotter=nydus $nydus_rafs_image /bin/bash -c 'cd /linux-5.10.87; make defconfig; make -j8' + if [ $? -ne 0 ]; then + nydus_rafs_image=${nydus_rafs_image_bak} + nerdctl run --rm --net=host --snapshotter=nydus $nydus_rafs_image /bin/bash -c 'cd /linux-5.10.87; make defconfig; make -j8' + fi + echo "drop cache and compile linux in container again" + echo 3 > /proc/sys/vm/drop_caches + nerdctl run --rm --net=host --snapshotter=nydus $nydus_rafs_image /bin/bash -c 'cd /linux-5.10.87; make defconfig; make -j8' +} + +teardown() { + dmesg -T | tail -300 > ${BATS_TEST_DIRNAME}/dmesg-${BATS_TEST_NAME}.log + ctr images ls | grep -q "${nydus_rafs_image}" && ctr images rm $nydus_rafs_image + if ps -ef | grep containerd-nydus-grpc | grep -v grep; then + ps -ef | grep containerd-nydus-grpc | grep -v grep | awk '{print $2}' | xargs kill -9 + fi + if ps -ef | grep nydusd | grep fscache; then + ps -ef | grep nydusd | grep fscache | awk '{print $2}' | xargs kill -9 + fi + if mount | grep 'erofs on'; then + mount | grep 'erofs on' | awk '{print $3}' | xargs umount + fi +} diff --git a/tests/bats/run_container_with_zran.bats b/tests/bats/run_container_with_zran.bats index bee0645e08b..f922d203e42 100644 --- a/tests/bats/run_container_with_zran.bats +++ b/tests/bats/run_container_with_zran.bats @@ -1,29 +1,29 @@ -#!/usr/bin/bats - -load "${BATS_TEST_DIRNAME}/common_tests.sh" - -setup() { - nydus_zran_image="docker.io/hsiangkao/node:18-nydus-oci-ref" - run_nydus_snapshotter - config_containerd_for_nydus - ctr images ls | grep -q "${nydus_zran_image}" && ctr images rm $nydus_zran_image - ctr-remote images rpull $nydus_zran_image -} - -@test "run container with zran" { - ctr run --rm --snapshotter=nydus $nydus_zran_image test_container tar cvf /tmp/foo.tar --exclude=/sys --exclude=/proc --exclude=/dev / -} - -teardown() { - dmesg -T | tail -300 > ${BATS_TEST_DIRNAME}/dmesg-${BATS_TEST_NAME}.log - ctr images ls | grep -q "${nydus_zran_image}" && ctr images rm $nydus_zran_image - if ps -ef | grep containerd-nydus-grpc | grep -v grep; then - ps -ef | grep containerd-nydus-grpc | grep -v grep | awk '{print $2}' | xargs kill -9 - fi - if ps -ef | grep nydusd | grep fscache; then - ps -ef | grep nydusd | grep fscache | awk '{print $2}' | xargs kill -9 - fi - if mount | grep 'erofs on'; then - mount | grep 'erofs on' | awk '{print $3}' | xargs umount - fi -} +#!/usr/bin/bats + +load "${BATS_TEST_DIRNAME}/common_tests.sh" + +setup() { + nydus_zran_image="docker.io/hsiangkao/node:18-nydus-oci-ref" + run_nydus_snapshotter + config_containerd_for_nydus + ctr images ls | grep -q "${nydus_zran_image}" && ctr images rm $nydus_zran_image + ctr-remote images rpull $nydus_zran_image +} + +@test "run container with zran" { + ctr run --rm --snapshotter=nydus $nydus_zran_image test_container tar cvf /tmp/foo.tar --exclude=/sys --exclude=/proc --exclude=/dev / +} + +teardown() { + dmesg -T | tail -300 > ${BATS_TEST_DIRNAME}/dmesg-${BATS_TEST_NAME}.log + ctr images ls | grep -q "${nydus_zran_image}" && ctr images rm $nydus_zran_image + if ps -ef | grep containerd-nydus-grpc | grep -v grep; then + ps -ef | grep containerd-nydus-grpc | grep -v grep | awk '{print $2}' | xargs kill -9 + fi + if ps -ef | grep nydusd | grep fscache; then + ps -ef | grep nydusd | grep fscache | awk '{print $2}' | xargs kill -9 + fi + if mount | grep 'erofs on'; then + mount | grep 'erofs on' | awk '{print $3}' | xargs umount + fi +} diff --git a/tests/texture/repeatable/blake3-lz4_block-non_repeatable.result b/tests/texture/repeatable/blake3-lz4_block-non_repeatable.result index e2808e64bae..30acac68537 100644 --- a/tests/texture/repeatable/blake3-lz4_block-non_repeatable.result +++ b/tests/texture/repeatable/blake3-lz4_block-non_repeatable.result @@ -1,104 +1,104 @@ -[ - {"type":"directory","name":"","contents":[ - {"type":"directory","name":"hardlink-test","contents":[ - {"type":"file","name":"foo"}, - {"type":"file","name":"test.sh"} - ]}, - {"type":"directory","name":"normal-file-test","contents":[ - {"type":"directory","name":"busybox","contents":[ - {"type":"directory","name":"1f777dbdd68d1c4d554bc0d20e027bfef9ed2dfe2d8d5f029e958552a0e12475","contents":[ - {"type":"file","name":"VERSION"}, - {"type":"directory","name":"bin","contents":[ - ]}, - {"type":"directory","name":"dev","contents":[ - ]}, - {"type":"directory","name":"etc","contents":[ - {"type":"file","name":"group"}, - {"type":"file","name":"localtime"}, - {"type":"directory","name":"network","contents":[ - {"type":"directory","name":"if-down.d","contents":[ - ]}, - {"type":"directory","name":"if-post-down.d","contents":[ - ]}, - {"type":"directory","name":"if-pre-up.d","contents":[ - ]}, - {"type":"directory","name":"if-up.d","contents":[ - ]} - ]}, - {"type":"file","name":"passwd"}, - {"type":"file","name":"shadow"} - ]}, - {"type":"directory","name":"home","contents":[ - ]}, - {"type":"file","name":"json"}, - {"type":"file","name":"layer.tar"}, - {"type":"directory","name":"root","contents":[ - ]}, - {"type":"directory","name":"tmp","contents":[ - ]}, - {"type":"directory","name":"usr","contents":[ - {"type":"directory","name":"sbin","contents":[ - ]} - ]}, - {"type":"directory","name":"var","contents":[ - {"type":"directory","name":"spool","contents":[ - {"type":"directory","name":"mail","contents":[ - ]} - ]}, - {"type":"directory","name":"www","contents":[ - ]} - ]} - ]}, - {"type":"file","name":"af2f74c517aac1d26793a6ed05ff45b299a037e1a9eefeae5eacda133e70a825.json"}, - {"type":"file","name":"manifest.json"}, - {"type":"file","name":"repositories"} - ]}, - {"type":"directory","name":"holefiles","contents":[ - {"type":"file","name":"chsum"}, - {"type":"file","name":"head-hole-1"}, - {"type":"file","name":"head-hole-2"}, - {"type":"file","name":"head-hole-3"}, - {"type":"file","name":"mid-non-hole-1024"}, - {"type":"file","name":"mid-non-hole-4096"}, - {"type":"file","name":"mid-non-hole-5000"}, - {"type":"file","name":"tail-hole-1024"}, - {"type":"file","name":"tail-hole-4096"}, - {"type":"file","name":"tail-hole-5000"} - ]}, - {"type":"file","name":"test.sh"} - ]}, - {"type":"directory","name":"symlink-test","contents":[ - {"type":"link","name":"bar","target":"../hardlink-test-tt","contents":[]}, - {"type":"link","name":"foo","target":"../normal-file-test/","contents":[]}, - {"type":"link","name":"foo1","target":"../../fc/foo","contents":[]}, - {"type":"link","name":"root","target":"/","contents":[]}, - {"type":"link","name":"root-1","target":"/////////////////","contents":[]}, - {"type":"link","name":"root-2","target":"/foobar","contents":[]}, - {"type":"link","name":"test.sh","target":"../normal-file-test/test.sh","contents":[]} - ]} - ]}, - {"type":"report","directories":26,"files":27} -] -0441db85b7cdd66b6eb9322b009f3f92 /normal-file-test/holefiles/head-hole-3 -090dde7adf8e77be248f27e3ae615e46 /normal-file-test/busybox/1f777dbdd68d1c4d554bc0d20e027bfef9ed2dfe2d8d5f029e958552a0e12475/etc/group -2e076fc116332a0ce3619404d12d592e /normal-file-test/busybox/repositories -552dde9afaa7e1989b4482abf47bca6e /normal-file-test/busybox/1f777dbdd68d1c4d554bc0d20e027bfef9ed2dfe2d8d5f029e958552a0e12475/layer.tar -5b2b424e6d107c3fcc0a8e8f3fd0feae /normal-file-test/holefiles/mid-non-hole-1024 -5e23c02b9208ee1d67fb41c95e13bae1 /normal-file-test/holefiles/tail-hole-4096 -60eac4fea7fa1cf1b68773050fc1c0e4 /normal-file-test/holefiles/mid-non-hole-4096 -7a19c32d2c75345debe9031cfa9b649a /normal-file-test/busybox/af2f74c517aac1d26793a6ed05ff45b299a037e1a9eefeae5eacda133e70a825.json -7a7e3e0c4cb86c05f2a6101f6f776eb7 /normal-file-test/busybox/manifest.json -7c04e662b37e09a98c46141cf08fb247 /normal-file-test/busybox/1f777dbdd68d1c4d554bc0d20e027bfef9ed2dfe2d8d5f029e958552a0e12475/etc/passwd -9003819f55c5a859802a2764e550fa59 /normal-file-test/busybox/1f777dbdd68d1c4d554bc0d20e027bfef9ed2dfe2d8d5f029e958552a0e12475/json -91768f15d2bdcd21972564048e4fe2e2 /normal-file-test/busybox/1f777dbdd68d1c4d554bc0d20e027bfef9ed2dfe2d8d5f029e958552a0e12475/etc/shadow -92b174ad356a759b690ad51283849cb0 /normal-file-test/holefiles/chsum -9bad2bfc660b13a8742935a94c299b6a /normal-file-test/holefiles/mid-non-hole-5000 -a292e7c6b19c3566a7bbee6a7da632ab /hardlink-test/foo -a292e7c6b19c3566a7bbee6a7da632ab /hardlink-test/test.sh -a292e7c6b19c3566a7bbee6a7da632ab /normal-file-test/test.sh -cbe00d25b87d730d188245ea67c6a47e /normal-file-test/holefiles/head-hole-2 -e0d2f7f78d741d55271cb34c30c45e8b /normal-file-test/holefiles/tail-hole-1024 -e4c2e8edac362acab7123654b9e73432 /normal-file-test/busybox/1f777dbdd68d1c4d554bc0d20e027bfef9ed2dfe2d8d5f029e958552a0e12475/VERSION -ea7ac8dc0ae3673040f3b1d816d972d7 /normal-file-test/holefiles/head-hole-1 -fe9ad2d5c4c79122a99b4d5ed44fda0e /normal-file-test/busybox/1f777dbdd68d1c4d554bc0d20e027bfef9ed2dfe2d8d5f029e958552a0e12475/etc/localtime -ff5ebc09868133b341c77bf223a0c350 /normal-file-test/holefiles/tail-hole-5000 +[ + {"type":"directory","name":"","contents":[ + {"type":"directory","name":"hardlink-test","contents":[ + {"type":"file","name":"foo"}, + {"type":"file","name":"test.sh"} + ]}, + {"type":"directory","name":"normal-file-test","contents":[ + {"type":"directory","name":"busybox","contents":[ + {"type":"directory","name":"1f777dbdd68d1c4d554bc0d20e027bfef9ed2dfe2d8d5f029e958552a0e12475","contents":[ + {"type":"file","name":"VERSION"}, + {"type":"directory","name":"bin","contents":[ + ]}, + {"type":"directory","name":"dev","contents":[ + ]}, + {"type":"directory","name":"etc","contents":[ + {"type":"file","name":"group"}, + {"type":"file","name":"localtime"}, + {"type":"directory","name":"network","contents":[ + {"type":"directory","name":"if-down.d","contents":[ + ]}, + {"type":"directory","name":"if-post-down.d","contents":[ + ]}, + {"type":"directory","name":"if-pre-up.d","contents":[ + ]}, + {"type":"directory","name":"if-up.d","contents":[ + ]} + ]}, + {"type":"file","name":"passwd"}, + {"type":"file","name":"shadow"} + ]}, + {"type":"directory","name":"home","contents":[ + ]}, + {"type":"file","name":"json"}, + {"type":"file","name":"layer.tar"}, + {"type":"directory","name":"root","contents":[ + ]}, + {"type":"directory","name":"tmp","contents":[ + ]}, + {"type":"directory","name":"usr","contents":[ + {"type":"directory","name":"sbin","contents":[ + ]} + ]}, + {"type":"directory","name":"var","contents":[ + {"type":"directory","name":"spool","contents":[ + {"type":"directory","name":"mail","contents":[ + ]} + ]}, + {"type":"directory","name":"www","contents":[ + ]} + ]} + ]}, + {"type":"file","name":"af2f74c517aac1d26793a6ed05ff45b299a037e1a9eefeae5eacda133e70a825.json"}, + {"type":"file","name":"manifest.json"}, + {"type":"file","name":"repositories"} + ]}, + {"type":"directory","name":"holefiles","contents":[ + {"type":"file","name":"chsum"}, + {"type":"file","name":"head-hole-1"}, + {"type":"file","name":"head-hole-2"}, + {"type":"file","name":"head-hole-3"}, + {"type":"file","name":"mid-non-hole-1024"}, + {"type":"file","name":"mid-non-hole-4096"}, + {"type":"file","name":"mid-non-hole-5000"}, + {"type":"file","name":"tail-hole-1024"}, + {"type":"file","name":"tail-hole-4096"}, + {"type":"file","name":"tail-hole-5000"} + ]}, + {"type":"file","name":"test.sh"} + ]}, + {"type":"directory","name":"symlink-test","contents":[ + {"type":"link","name":"bar","target":"../hardlink-test-tt","contents":[]}, + {"type":"link","name":"foo","target":"../normal-file-test/","contents":[]}, + {"type":"link","name":"foo1","target":"../../fc/foo","contents":[]}, + {"type":"link","name":"root","target":"/","contents":[]}, + {"type":"link","name":"root-1","target":"/////////////////","contents":[]}, + {"type":"link","name":"root-2","target":"/foobar","contents":[]}, + {"type":"link","name":"test.sh","target":"../normal-file-test/test.sh","contents":[]} + ]} + ]}, + {"type":"report","directories":26,"files":27} +] +0441db85b7cdd66b6eb9322b009f3f92 /normal-file-test/holefiles/head-hole-3 +090dde7adf8e77be248f27e3ae615e46 /normal-file-test/busybox/1f777dbdd68d1c4d554bc0d20e027bfef9ed2dfe2d8d5f029e958552a0e12475/etc/group +2e076fc116332a0ce3619404d12d592e /normal-file-test/busybox/repositories +552dde9afaa7e1989b4482abf47bca6e /normal-file-test/busybox/1f777dbdd68d1c4d554bc0d20e027bfef9ed2dfe2d8d5f029e958552a0e12475/layer.tar +5b2b424e6d107c3fcc0a8e8f3fd0feae /normal-file-test/holefiles/mid-non-hole-1024 +5e23c02b9208ee1d67fb41c95e13bae1 /normal-file-test/holefiles/tail-hole-4096 +60eac4fea7fa1cf1b68773050fc1c0e4 /normal-file-test/holefiles/mid-non-hole-4096 +7a19c32d2c75345debe9031cfa9b649a /normal-file-test/busybox/af2f74c517aac1d26793a6ed05ff45b299a037e1a9eefeae5eacda133e70a825.json +7a7e3e0c4cb86c05f2a6101f6f776eb7 /normal-file-test/busybox/manifest.json +7c04e662b37e09a98c46141cf08fb247 /normal-file-test/busybox/1f777dbdd68d1c4d554bc0d20e027bfef9ed2dfe2d8d5f029e958552a0e12475/etc/passwd +9003819f55c5a859802a2764e550fa59 /normal-file-test/busybox/1f777dbdd68d1c4d554bc0d20e027bfef9ed2dfe2d8d5f029e958552a0e12475/json +91768f15d2bdcd21972564048e4fe2e2 /normal-file-test/busybox/1f777dbdd68d1c4d554bc0d20e027bfef9ed2dfe2d8d5f029e958552a0e12475/etc/shadow +92b174ad356a759b690ad51283849cb0 /normal-file-test/holefiles/chsum +9bad2bfc660b13a8742935a94c299b6a /normal-file-test/holefiles/mid-non-hole-5000 +a292e7c6b19c3566a7bbee6a7da632ab /hardlink-test/foo +a292e7c6b19c3566a7bbee6a7da632ab /hardlink-test/test.sh +a292e7c6b19c3566a7bbee6a7da632ab /normal-file-test/test.sh +cbe00d25b87d730d188245ea67c6a47e /normal-file-test/holefiles/head-hole-2 +e0d2f7f78d741d55271cb34c30c45e8b /normal-file-test/holefiles/tail-hole-1024 +e4c2e8edac362acab7123654b9e73432 /normal-file-test/busybox/1f777dbdd68d1c4d554bc0d20e027bfef9ed2dfe2d8d5f029e958552a0e12475/VERSION +ea7ac8dc0ae3673040f3b1d816d972d7 /normal-file-test/holefiles/head-hole-1 +fe9ad2d5c4c79122a99b4d5ed44fda0e /normal-file-test/busybox/1f777dbdd68d1c4d554bc0d20e027bfef9ed2dfe2d8d5f029e958552a0e12475/etc/localtime +ff5ebc09868133b341c77bf223a0c350 /normal-file-test/holefiles/tail-hole-5000 diff --git a/tests/texture/repeatable/sha256-nocompress-repeatable.result b/tests/texture/repeatable/sha256-nocompress-repeatable.result index e2808e64bae..30acac68537 100644 --- a/tests/texture/repeatable/sha256-nocompress-repeatable.result +++ b/tests/texture/repeatable/sha256-nocompress-repeatable.result @@ -1,104 +1,104 @@ -[ - {"type":"directory","name":"","contents":[ - {"type":"directory","name":"hardlink-test","contents":[ - {"type":"file","name":"foo"}, - {"type":"file","name":"test.sh"} - ]}, - {"type":"directory","name":"normal-file-test","contents":[ - {"type":"directory","name":"busybox","contents":[ - {"type":"directory","name":"1f777dbdd68d1c4d554bc0d20e027bfef9ed2dfe2d8d5f029e958552a0e12475","contents":[ - {"type":"file","name":"VERSION"}, - {"type":"directory","name":"bin","contents":[ - ]}, - {"type":"directory","name":"dev","contents":[ - ]}, - {"type":"directory","name":"etc","contents":[ - {"type":"file","name":"group"}, - {"type":"file","name":"localtime"}, - {"type":"directory","name":"network","contents":[ - {"type":"directory","name":"if-down.d","contents":[ - ]}, - {"type":"directory","name":"if-post-down.d","contents":[ - ]}, - {"type":"directory","name":"if-pre-up.d","contents":[ - ]}, - {"type":"directory","name":"if-up.d","contents":[ - ]} - ]}, - {"type":"file","name":"passwd"}, - {"type":"file","name":"shadow"} - ]}, - {"type":"directory","name":"home","contents":[ - ]}, - {"type":"file","name":"json"}, - {"type":"file","name":"layer.tar"}, - {"type":"directory","name":"root","contents":[ - ]}, - {"type":"directory","name":"tmp","contents":[ - ]}, - {"type":"directory","name":"usr","contents":[ - {"type":"directory","name":"sbin","contents":[ - ]} - ]}, - {"type":"directory","name":"var","contents":[ - {"type":"directory","name":"spool","contents":[ - {"type":"directory","name":"mail","contents":[ - ]} - ]}, - {"type":"directory","name":"www","contents":[ - ]} - ]} - ]}, - {"type":"file","name":"af2f74c517aac1d26793a6ed05ff45b299a037e1a9eefeae5eacda133e70a825.json"}, - {"type":"file","name":"manifest.json"}, - {"type":"file","name":"repositories"} - ]}, - {"type":"directory","name":"holefiles","contents":[ - {"type":"file","name":"chsum"}, - {"type":"file","name":"head-hole-1"}, - {"type":"file","name":"head-hole-2"}, - {"type":"file","name":"head-hole-3"}, - {"type":"file","name":"mid-non-hole-1024"}, - {"type":"file","name":"mid-non-hole-4096"}, - {"type":"file","name":"mid-non-hole-5000"}, - {"type":"file","name":"tail-hole-1024"}, - {"type":"file","name":"tail-hole-4096"}, - {"type":"file","name":"tail-hole-5000"} - ]}, - {"type":"file","name":"test.sh"} - ]}, - {"type":"directory","name":"symlink-test","contents":[ - {"type":"link","name":"bar","target":"../hardlink-test-tt","contents":[]}, - {"type":"link","name":"foo","target":"../normal-file-test/","contents":[]}, - {"type":"link","name":"foo1","target":"../../fc/foo","contents":[]}, - {"type":"link","name":"root","target":"/","contents":[]}, - {"type":"link","name":"root-1","target":"/////////////////","contents":[]}, - {"type":"link","name":"root-2","target":"/foobar","contents":[]}, - {"type":"link","name":"test.sh","target":"../normal-file-test/test.sh","contents":[]} - ]} - ]}, - {"type":"report","directories":26,"files":27} -] -0441db85b7cdd66b6eb9322b009f3f92 /normal-file-test/holefiles/head-hole-3 -090dde7adf8e77be248f27e3ae615e46 /normal-file-test/busybox/1f777dbdd68d1c4d554bc0d20e027bfef9ed2dfe2d8d5f029e958552a0e12475/etc/group -2e076fc116332a0ce3619404d12d592e /normal-file-test/busybox/repositories -552dde9afaa7e1989b4482abf47bca6e /normal-file-test/busybox/1f777dbdd68d1c4d554bc0d20e027bfef9ed2dfe2d8d5f029e958552a0e12475/layer.tar -5b2b424e6d107c3fcc0a8e8f3fd0feae /normal-file-test/holefiles/mid-non-hole-1024 -5e23c02b9208ee1d67fb41c95e13bae1 /normal-file-test/holefiles/tail-hole-4096 -60eac4fea7fa1cf1b68773050fc1c0e4 /normal-file-test/holefiles/mid-non-hole-4096 -7a19c32d2c75345debe9031cfa9b649a /normal-file-test/busybox/af2f74c517aac1d26793a6ed05ff45b299a037e1a9eefeae5eacda133e70a825.json -7a7e3e0c4cb86c05f2a6101f6f776eb7 /normal-file-test/busybox/manifest.json -7c04e662b37e09a98c46141cf08fb247 /normal-file-test/busybox/1f777dbdd68d1c4d554bc0d20e027bfef9ed2dfe2d8d5f029e958552a0e12475/etc/passwd -9003819f55c5a859802a2764e550fa59 /normal-file-test/busybox/1f777dbdd68d1c4d554bc0d20e027bfef9ed2dfe2d8d5f029e958552a0e12475/json -91768f15d2bdcd21972564048e4fe2e2 /normal-file-test/busybox/1f777dbdd68d1c4d554bc0d20e027bfef9ed2dfe2d8d5f029e958552a0e12475/etc/shadow -92b174ad356a759b690ad51283849cb0 /normal-file-test/holefiles/chsum -9bad2bfc660b13a8742935a94c299b6a /normal-file-test/holefiles/mid-non-hole-5000 -a292e7c6b19c3566a7bbee6a7da632ab /hardlink-test/foo -a292e7c6b19c3566a7bbee6a7da632ab /hardlink-test/test.sh -a292e7c6b19c3566a7bbee6a7da632ab /normal-file-test/test.sh -cbe00d25b87d730d188245ea67c6a47e /normal-file-test/holefiles/head-hole-2 -e0d2f7f78d741d55271cb34c30c45e8b /normal-file-test/holefiles/tail-hole-1024 -e4c2e8edac362acab7123654b9e73432 /normal-file-test/busybox/1f777dbdd68d1c4d554bc0d20e027bfef9ed2dfe2d8d5f029e958552a0e12475/VERSION -ea7ac8dc0ae3673040f3b1d816d972d7 /normal-file-test/holefiles/head-hole-1 -fe9ad2d5c4c79122a99b4d5ed44fda0e /normal-file-test/busybox/1f777dbdd68d1c4d554bc0d20e027bfef9ed2dfe2d8d5f029e958552a0e12475/etc/localtime -ff5ebc09868133b341c77bf223a0c350 /normal-file-test/holefiles/tail-hole-5000 +[ + {"type":"directory","name":"","contents":[ + {"type":"directory","name":"hardlink-test","contents":[ + {"type":"file","name":"foo"}, + {"type":"file","name":"test.sh"} + ]}, + {"type":"directory","name":"normal-file-test","contents":[ + {"type":"directory","name":"busybox","contents":[ + {"type":"directory","name":"1f777dbdd68d1c4d554bc0d20e027bfef9ed2dfe2d8d5f029e958552a0e12475","contents":[ + {"type":"file","name":"VERSION"}, + {"type":"directory","name":"bin","contents":[ + ]}, + {"type":"directory","name":"dev","contents":[ + ]}, + {"type":"directory","name":"etc","contents":[ + {"type":"file","name":"group"}, + {"type":"file","name":"localtime"}, + {"type":"directory","name":"network","contents":[ + {"type":"directory","name":"if-down.d","contents":[ + ]}, + {"type":"directory","name":"if-post-down.d","contents":[ + ]}, + {"type":"directory","name":"if-pre-up.d","contents":[ + ]}, + {"type":"directory","name":"if-up.d","contents":[ + ]} + ]}, + {"type":"file","name":"passwd"}, + {"type":"file","name":"shadow"} + ]}, + {"type":"directory","name":"home","contents":[ + ]}, + {"type":"file","name":"json"}, + {"type":"file","name":"layer.tar"}, + {"type":"directory","name":"root","contents":[ + ]}, + {"type":"directory","name":"tmp","contents":[ + ]}, + {"type":"directory","name":"usr","contents":[ + {"type":"directory","name":"sbin","contents":[ + ]} + ]}, + {"type":"directory","name":"var","contents":[ + {"type":"directory","name":"spool","contents":[ + {"type":"directory","name":"mail","contents":[ + ]} + ]}, + {"type":"directory","name":"www","contents":[ + ]} + ]} + ]}, + {"type":"file","name":"af2f74c517aac1d26793a6ed05ff45b299a037e1a9eefeae5eacda133e70a825.json"}, + {"type":"file","name":"manifest.json"}, + {"type":"file","name":"repositories"} + ]}, + {"type":"directory","name":"holefiles","contents":[ + {"type":"file","name":"chsum"}, + {"type":"file","name":"head-hole-1"}, + {"type":"file","name":"head-hole-2"}, + {"type":"file","name":"head-hole-3"}, + {"type":"file","name":"mid-non-hole-1024"}, + {"type":"file","name":"mid-non-hole-4096"}, + {"type":"file","name":"mid-non-hole-5000"}, + {"type":"file","name":"tail-hole-1024"}, + {"type":"file","name":"tail-hole-4096"}, + {"type":"file","name":"tail-hole-5000"} + ]}, + {"type":"file","name":"test.sh"} + ]}, + {"type":"directory","name":"symlink-test","contents":[ + {"type":"link","name":"bar","target":"../hardlink-test-tt","contents":[]}, + {"type":"link","name":"foo","target":"../normal-file-test/","contents":[]}, + {"type":"link","name":"foo1","target":"../../fc/foo","contents":[]}, + {"type":"link","name":"root","target":"/","contents":[]}, + {"type":"link","name":"root-1","target":"/////////////////","contents":[]}, + {"type":"link","name":"root-2","target":"/foobar","contents":[]}, + {"type":"link","name":"test.sh","target":"../normal-file-test/test.sh","contents":[]} + ]} + ]}, + {"type":"report","directories":26,"files":27} +] +0441db85b7cdd66b6eb9322b009f3f92 /normal-file-test/holefiles/head-hole-3 +090dde7adf8e77be248f27e3ae615e46 /normal-file-test/busybox/1f777dbdd68d1c4d554bc0d20e027bfef9ed2dfe2d8d5f029e958552a0e12475/etc/group +2e076fc116332a0ce3619404d12d592e /normal-file-test/busybox/repositories +552dde9afaa7e1989b4482abf47bca6e /normal-file-test/busybox/1f777dbdd68d1c4d554bc0d20e027bfef9ed2dfe2d8d5f029e958552a0e12475/layer.tar +5b2b424e6d107c3fcc0a8e8f3fd0feae /normal-file-test/holefiles/mid-non-hole-1024 +5e23c02b9208ee1d67fb41c95e13bae1 /normal-file-test/holefiles/tail-hole-4096 +60eac4fea7fa1cf1b68773050fc1c0e4 /normal-file-test/holefiles/mid-non-hole-4096 +7a19c32d2c75345debe9031cfa9b649a /normal-file-test/busybox/af2f74c517aac1d26793a6ed05ff45b299a037e1a9eefeae5eacda133e70a825.json +7a7e3e0c4cb86c05f2a6101f6f776eb7 /normal-file-test/busybox/manifest.json +7c04e662b37e09a98c46141cf08fb247 /normal-file-test/busybox/1f777dbdd68d1c4d554bc0d20e027bfef9ed2dfe2d8d5f029e958552a0e12475/etc/passwd +9003819f55c5a859802a2764e550fa59 /normal-file-test/busybox/1f777dbdd68d1c4d554bc0d20e027bfef9ed2dfe2d8d5f029e958552a0e12475/json +91768f15d2bdcd21972564048e4fe2e2 /normal-file-test/busybox/1f777dbdd68d1c4d554bc0d20e027bfef9ed2dfe2d8d5f029e958552a0e12475/etc/shadow +92b174ad356a759b690ad51283849cb0 /normal-file-test/holefiles/chsum +9bad2bfc660b13a8742935a94c299b6a /normal-file-test/holefiles/mid-non-hole-5000 +a292e7c6b19c3566a7bbee6a7da632ab /hardlink-test/foo +a292e7c6b19c3566a7bbee6a7da632ab /hardlink-test/test.sh +a292e7c6b19c3566a7bbee6a7da632ab /normal-file-test/test.sh +cbe00d25b87d730d188245ea67c6a47e /normal-file-test/holefiles/head-hole-2 +e0d2f7f78d741d55271cb34c30c45e8b /normal-file-test/holefiles/tail-hole-1024 +e4c2e8edac362acab7123654b9e73432 /normal-file-test/busybox/1f777dbdd68d1c4d554bc0d20e027bfef9ed2dfe2d8d5f029e958552a0e12475/VERSION +ea7ac8dc0ae3673040f3b1d816d972d7 /normal-file-test/holefiles/head-hole-1 +fe9ad2d5c4c79122a99b4d5ed44fda0e /normal-file-test/busybox/1f777dbdd68d1c4d554bc0d20e027bfef9ed2dfe2d8d5f029e958552a0e12475/etc/localtime +ff5ebc09868133b341c77bf223a0c350 /normal-file-test/holefiles/tail-hole-5000 diff --git a/tests/texture/stargz/estargz_sample.json b/tests/texture/stargz/estargz_sample.json index fd86c5a7ee6..e349bb94b98 100644 --- a/tests/texture/stargz/estargz_sample.json +++ b/tests/texture/stargz/estargz_sample.json @@ -1,50 +1,50 @@ -{ - "version": 1, - "entries": [ - { - "name": "bin/", - "type": "dir", - "modtime": "2019-08-20T10:30:43Z", - "mode": 16877, - "NumLink": 0 - }, - { - "name": "bin/busybox", - "type": "reg", - "size": 833104, - "modtime": "2019-06-12T17:52:45Z", - "mode": 33261, - "offset": 126, - "NumLink": 0, - "digest": "sha256:8b7c559b8cccca0d30d01bc4b5dc944766208a53d18a03aa8afe97252207521f", - "chunkDigest": "sha256:8b7c559b8cccca0d30d01bc4b5dc944766208a53d18a03aa8afe97252207521f" - }, - { - "name": "bin/busybox2", - "type": "hardlink", - "linkName": "bin/busybox", - "modtime": "2019-06-12T17:52:45Z", - "mode": 33261 - }, - { - "name": "lib/ld-musl-x86_64.so.1", - "type": "reg", - "size": 580144, - "modtime": "2019-08-07T07:15:30Z", - "mode": 33261, - "offset": 512427, - "NumLink": 0, - "digest": "sha256:45c6ee3bd1862697eab8058ec0e462f5a760927331c709d7d233da8ffee40e9e", - "chunkDigest": "sha256:45c6ee3bd1862697eab8058ec0e462f5a760927331c709d7d233da8ffee40e9e" - }, - { - "name": ".prefetch.landmark", - "type": "reg", - "size": 1, - "offset": 886633, - "NumLink": 0, - "digest": "sha256:dc0e9c3658a1a3ed1ec94274d8b19925c93e1abb7ddba294923ad9bde30f8cb8", - "chunkDigest": "sha256:dc0e9c3658a1a3ed1ec94274d8b19925c93e1abb7ddba294923ad9bde30f8cb8" - } - ] -} +{ + "version": 1, + "entries": [ + { + "name": "bin/", + "type": "dir", + "modtime": "2019-08-20T10:30:43Z", + "mode": 16877, + "NumLink": 0 + }, + { + "name": "bin/busybox", + "type": "reg", + "size": 833104, + "modtime": "2019-06-12T17:52:45Z", + "mode": 33261, + "offset": 126, + "NumLink": 0, + "digest": "sha256:8b7c559b8cccca0d30d01bc4b5dc944766208a53d18a03aa8afe97252207521f", + "chunkDigest": "sha256:8b7c559b8cccca0d30d01bc4b5dc944766208a53d18a03aa8afe97252207521f" + }, + { + "name": "bin/busybox2", + "type": "hardlink", + "linkName": "bin/busybox", + "modtime": "2019-06-12T17:52:45Z", + "mode": 33261 + }, + { + "name": "lib/ld-musl-x86_64.so.1", + "type": "reg", + "size": 580144, + "modtime": "2019-08-07T07:15:30Z", + "mode": 33261, + "offset": 512427, + "NumLink": 0, + "digest": "sha256:45c6ee3bd1862697eab8058ec0e462f5a760927331c709d7d233da8ffee40e9e", + "chunkDigest": "sha256:45c6ee3bd1862697eab8058ec0e462f5a760927331c709d7d233da8ffee40e9e" + }, + { + "name": ".prefetch.landmark", + "type": "reg", + "size": 1, + "offset": 886633, + "NumLink": 0, + "digest": "sha256:dc0e9c3658a1a3ed1ec94274d8b19925c93e1abb7ddba294923ad9bde30f8cb8", + "chunkDigest": "sha256:dc0e9c3658a1a3ed1ec94274d8b19925c93e1abb7ddba294923ad9bde30f8cb8" + } + ] +} diff --git a/upgrade/Cargo.toml b/upgrade/Cargo.toml index 0d57759692e..d05beb54642 100644 --- a/upgrade/Cargo.toml +++ b/upgrade/Cargo.toml @@ -1,16 +1,16 @@ -[package] -name = "nydus-upgrade" -version = "0.1.0" -description = "Nydus Daemon Upgrade" -authors = ["The Nydus Developers"] -license = "Apache-2.0" -homepage = "https://nydus.dev/" -repository = "https://github.com/dragonflyoss/nydus" -edition = "2021" - -[dependencies] -sendfd = "0.4.3" -dbs-snapshot = "1.5.1" -thiserror = "1" -versionize_derive = "0.1.6" -versionize = "0.1.10" +[package] +name = "nydus-upgrade" +version = "0.1.0" +description = "Nydus Daemon Upgrade" +authors = ["The Nydus Developers"] +license = "Apache-2.0" +homepage = "https://nydus.dev/" +repository = "https://github.com/dragonflyoss/nydus" +edition = "2021" + +[dependencies] +sendfd = "0.4.3" +dbs-snapshot = "1.5.1" +thiserror = "1" +versionize_derive = "0.1.6" +versionize = "0.1.10" diff --git a/upgrade/src/backend/mod.rs b/upgrade/src/backend/mod.rs index 2135ba2b04c..bc9b3868c4c 100644 --- a/upgrade/src/backend/mod.rs +++ b/upgrade/src/backend/mod.rs @@ -1,78 +1,78 @@ -// Copyright 2023 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::{io, os::fd::RawFd}; - -pub mod unix_domain_socket; - -#[derive(thiserror::Error, Debug)] -pub enum StorageBackendErr { - #[error("failed to create UnixStream, {0}")] - CreateUnixStream(io::Error), - #[error("failed to send fd over UnixStream, {0}")] - SendFd(io::Error), - #[error("failed to receive fd over UnixStream, {0}")] - RecvFd(io::Error), - #[error("no enough fds")] - NoEnoughFds, -} - -pub type Result = std::result::Result; - -/// StorageBackend trait is used to save and restore the dev fds and daemon state data for online upgrade. -pub trait StorageBackend: Send + Sync { - /// Save the dev fds and daemon state data for online upgrade. - /// Returns the length of bytes of state data. - fn save(&mut self, fds: &[RawFd], data: &[u8]) -> Result; - - /// Restore the dev fds and daemon state data for online upgrade. - /// Returns the fds and state data - fn restore(&mut self) -> Result<(Vec, Vec)>; -} - -#[cfg(test)] -mod test { - - #[test] - fn test_storage_backend() { - use std::os::fd::RawFd; - - use crate::backend::{Result, StorageBackend}; - - #[derive(Default)] - struct TestStorageBackend { - fds: Vec, - data: Vec, - } - - impl StorageBackend for TestStorageBackend { - fn save(&mut self, fds: &[RawFd], data: &[u8]) -> Result { - self.fds = Vec::new(); - fds.iter().for_each(|fd| self.fds.push(*fd)); - - self.data = vec![0u8; data.len()]; - self.data.clone_from_slice(data); - - Ok(self.data.len()) - } - - fn restore(&mut self) -> Result<(Vec, Vec)> { - Ok((self.fds.clone(), self.data.clone())) - } - } - - const FDS_LEN: usize = 10; - const DATA_LEN: usize = 5; - let fds = [5 as RawFd; FDS_LEN]; - let data: [u8; DATA_LEN] = [7, 8, 9, 10, 12]; - - let mut backend: Box = Box::::default(); - let saved_data_len = backend.save(&fds, &data).unwrap(); - assert_eq!(saved_data_len, DATA_LEN); - - let (restored_fds, restored_data) = backend.restore().unwrap(); - assert_eq!(restored_data, data); - assert_eq!(restored_fds, fds); - } -} +// Copyright 2023 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::{io, os::fd::RawFd}; + +pub mod unix_domain_socket; + +#[derive(thiserror::Error, Debug)] +pub enum StorageBackendErr { + #[error("failed to create UnixStream, {0}")] + CreateUnixStream(io::Error), + #[error("failed to send fd over UnixStream, {0}")] + SendFd(io::Error), + #[error("failed to receive fd over UnixStream, {0}")] + RecvFd(io::Error), + #[error("no enough fds")] + NoEnoughFds, +} + +pub type Result = std::result::Result; + +/// StorageBackend trait is used to save and restore the dev fds and daemon state data for online upgrade. +pub trait StorageBackend: Send + Sync { + /// Save the dev fds and daemon state data for online upgrade. + /// Returns the length of bytes of state data. + fn save(&mut self, fds: &[RawFd], data: &[u8]) -> Result; + + /// Restore the dev fds and daemon state data for online upgrade. + /// Returns the fds and state data + fn restore(&mut self) -> Result<(Vec, Vec)>; +} + +#[cfg(test)] +mod test { + + #[test] + fn test_storage_backend() { + use std::os::fd::RawFd; + + use crate::backend::{Result, StorageBackend}; + + #[derive(Default)] + struct TestStorageBackend { + fds: Vec, + data: Vec, + } + + impl StorageBackend for TestStorageBackend { + fn save(&mut self, fds: &[RawFd], data: &[u8]) -> Result { + self.fds = Vec::new(); + fds.iter().for_each(|fd| self.fds.push(*fd)); + + self.data = vec![0u8; data.len()]; + self.data.clone_from_slice(data); + + Ok(self.data.len()) + } + + fn restore(&mut self) -> Result<(Vec, Vec)> { + Ok((self.fds.clone(), self.data.clone())) + } + } + + const FDS_LEN: usize = 10; + const DATA_LEN: usize = 5; + let fds = [5 as RawFd; FDS_LEN]; + let data: [u8; DATA_LEN] = [7, 8, 9, 10, 12]; + + let mut backend: Box = Box::::default(); + let saved_data_len = backend.save(&fds, &data).unwrap(); + assert_eq!(saved_data_len, DATA_LEN); + + let (restored_fds, restored_data) = backend.restore().unwrap(); + assert_eq!(restored_data, data); + assert_eq!(restored_fds, fds); + } +} diff --git a/upgrade/src/backend/unix_domain_socket.rs b/upgrade/src/backend/unix_domain_socket.rs index c6bd63ca35e..ca304c8989b 100644 --- a/upgrade/src/backend/unix_domain_socket.rs +++ b/upgrade/src/backend/unix_domain_socket.rs @@ -1,56 +1,56 @@ -// Copyright 2023 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::{ - os::{fd::RawFd, unix::net::UnixStream}, - path::PathBuf, -}; - -use sendfd::{RecvWithFd, SendWithFd}; - -use super::{Result, StorageBackend, StorageBackendErr}; - -pub struct UdsStorageBackend { - socket_path: PathBuf, -} - -impl UdsStorageBackend { - pub fn new(socket_path: PathBuf) -> Self { - UdsStorageBackend { socket_path } - } -} - -const MAX_STATE_DATA_LENGTH: usize = 1024 * 32; - -impl StorageBackend for UdsStorageBackend { - fn save(&mut self, fds: &[RawFd], data: &[u8]) -> Result { - if fds.is_empty() { - return Err(StorageBackendErr::NoEnoughFds); - } - - let socket = - UnixStream::connect(&self.socket_path).map_err(StorageBackendErr::CreateUnixStream)?; - let len = socket - .send_with_fd(data, fds) - .map_err(StorageBackendErr::SendFd)?; - - Ok(len) - } - - fn restore(&mut self) -> Result<(Vec, Vec)> { - let mut data = vec![0u8; MAX_STATE_DATA_LENGTH]; - let mut fds = vec![0i32; 16]; - let socket = - UnixStream::connect(&self.socket_path).map_err(StorageBackendErr::CreateUnixStream)?; - let (_, fds_cnt) = socket - .recv_with_fd(data.as_mut_slice(), fds.as_mut_slice()) - .map_err(StorageBackendErr::RecvFd)?; - - if fds.is_empty() { - return Err(StorageBackendErr::NoEnoughFds); - } - fds.truncate(fds_cnt); - Ok((fds, data)) - } -} +// Copyright 2023 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::{ + os::{fd::RawFd, unix::net::UnixStream}, + path::PathBuf, +}; + +use sendfd::{RecvWithFd, SendWithFd}; + +use super::{Result, StorageBackend, StorageBackendErr}; + +pub struct UdsStorageBackend { + socket_path: PathBuf, +} + +impl UdsStorageBackend { + pub fn new(socket_path: PathBuf) -> Self { + UdsStorageBackend { socket_path } + } +} + +const MAX_STATE_DATA_LENGTH: usize = 1024 * 32; + +impl StorageBackend for UdsStorageBackend { + fn save(&mut self, fds: &[RawFd], data: &[u8]) -> Result { + if fds.is_empty() { + return Err(StorageBackendErr::NoEnoughFds); + } + + let socket = + UnixStream::connect(&self.socket_path).map_err(StorageBackendErr::CreateUnixStream)?; + let len = socket + .send_with_fd(data, fds) + .map_err(StorageBackendErr::SendFd)?; + + Ok(len) + } + + fn restore(&mut self) -> Result<(Vec, Vec)> { + let mut data = vec![0u8; MAX_STATE_DATA_LENGTH]; + let mut fds = vec![0i32; 16]; + let socket = + UnixStream::connect(&self.socket_path).map_err(StorageBackendErr::CreateUnixStream)?; + let (_, fds_cnt) = socket + .recv_with_fd(data.as_mut_slice(), fds.as_mut_slice()) + .map_err(StorageBackendErr::RecvFd)?; + + if fds.is_empty() { + return Err(StorageBackendErr::NoEnoughFds); + } + fds.truncate(fds_cnt); + Ok((fds, data)) + } +} diff --git a/upgrade/src/lib.rs b/upgrade/src/lib.rs index 2436b487aaf..08dd83fdd9f 100644 --- a/upgrade/src/lib.rs +++ b/upgrade/src/lib.rs @@ -1,6 +1,6 @@ -// Copyright 2023 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -pub mod backend; -pub mod persist; +// Copyright 2023 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +pub mod backend; +pub mod persist; diff --git a/upgrade/src/persist.rs b/upgrade/src/persist.rs index 8a86ecb18fe..54d24932ef1 100644 --- a/upgrade/src/persist.rs +++ b/upgrade/src/persist.rs @@ -1,67 +1,67 @@ -// Copyright 2023 Nydus Developers. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::fmt::Debug; -use std::io::{Error as IoError, ErrorKind, Result}; -use std::{any::TypeId, collections::HashMap}; - -use dbs_snapshot::Snapshot; -use versionize::{VersionMap, Versionize}; - -/// A list of versions. -type Versions = Vec>; - -/// A trait for snapshotting. -/// This trait is used to save and restore a struct -/// which implements `versionize::Versionize`. -pub trait Snapshotter: Versionize + Sized + Debug { - /// Returns a list of versions. - fn get_versions() -> Versions; - - /// Returns a `VersionMap` with the versions defined by `get_versions`. - fn new_version_map() -> VersionMap { - let mut version_map = VersionMap::new(); - for (idx, map) in Self::get_versions().into_iter().enumerate() { - if idx > 0 { - version_map.new_version(); - } - for (type_id, version) in map { - version_map.set_type_version(type_id, version); - } - } - version_map - } - - /// Returns a new `Snapshot` with the versions defined by `get_versions`. - fn new_snapshot() -> Snapshot { - let vm = Self::new_version_map(); - let target_version = vm.latest_version(); - Snapshot::new(vm, target_version) - } - - /// Saves the struct to a `Vec`. - fn save(&self) -> Result> { - let mut buf = Vec::new(); - let mut snapshot = Self::new_snapshot(); - snapshot.save(&mut buf, self).map_err(|e| { - IoError::new( - ErrorKind::Other, - format!("Failed to save snapshot: {:?}", e), - ) - })?; - - Ok(buf) - } - - /// Restores the struct from a `Vec`. - fn restore(buf: &mut Vec) -> Result { - match Snapshot::load(&mut buf.as_slice(), buf.len(), Self::new_version_map()) { - Ok((o, _)) => Ok(o), - Err(e) => Err(IoError::new( - ErrorKind::Other, - format!("Failed to load snapshot: {:?}", e), - )), - } - } -} +// Copyright 2023 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::fmt::Debug; +use std::io::{Error as IoError, ErrorKind, Result}; +use std::{any::TypeId, collections::HashMap}; + +use dbs_snapshot::Snapshot; +use versionize::{VersionMap, Versionize}; + +/// A list of versions. +type Versions = Vec>; + +/// A trait for snapshotting. +/// This trait is used to save and restore a struct +/// which implements `versionize::Versionize`. +pub trait Snapshotter: Versionize + Sized + Debug { + /// Returns a list of versions. + fn get_versions() -> Versions; + + /// Returns a `VersionMap` with the versions defined by `get_versions`. + fn new_version_map() -> VersionMap { + let mut version_map = VersionMap::new(); + for (idx, map) in Self::get_versions().into_iter().enumerate() { + if idx > 0 { + version_map.new_version(); + } + for (type_id, version) in map { + version_map.set_type_version(type_id, version); + } + } + version_map + } + + /// Returns a new `Snapshot` with the versions defined by `get_versions`. + fn new_snapshot() -> Snapshot { + let vm = Self::new_version_map(); + let target_version = vm.latest_version(); + Snapshot::new(vm, target_version) + } + + /// Saves the struct to a `Vec`. + fn save(&self) -> Result> { + let mut buf = Vec::new(); + let mut snapshot = Self::new_snapshot(); + snapshot.save(&mut buf, self).map_err(|e| { + IoError::new( + ErrorKind::Other, + format!("Failed to save snapshot: {:?}", e), + ) + })?; + + Ok(buf) + } + + /// Restores the struct from a `Vec`. + fn restore(buf: &mut Vec) -> Result { + match Snapshot::load(&mut buf.as_slice(), buf.len(), Self::new_version_map()) { + Ok((o, _)) => Ok(o), + Err(e) => Err(IoError::new( + ErrorKind::Other, + format!("Failed to load snapshot: {:?}", e), + )), + } + } +} diff --git a/utils/CHANGELOG.md b/utils/CHANGELOG.md index 57826d660bb..dad8b98dc2a 100644 --- a/utils/CHANGELOG.md +++ b/utils/CHANGELOG.md @@ -1,22 +1,22 @@ -# Changelog -## [Unreleased] - -## [v0.3.1] -- Add async io helpers - -## [v0.3] - -### Added -- Asynchronous multi-producer multi-consumer channel - -### Fixed -- Refine metrics APIs - -### Deprecated -- Remove dependency on fuse-backend-rs crate - -## [v0.1.0] - -### Added - -- Initial release +# Changelog +## [Unreleased] + +## [v0.3.1] +- Add async io helpers + +## [v0.3] + +### Added +- Asynchronous multi-producer multi-consumer channel + +### Fixed +- Refine metrics APIs + +### Deprecated +- Remove dependency on fuse-backend-rs crate + +## [v0.1.0] + +### Added + +- Initial release diff --git a/utils/Cargo.toml b/utils/Cargo.toml index dd02c8544ac..de99b28c695 100644 --- a/utils/Cargo.toml +++ b/utils/Cargo.toml @@ -1,48 +1,48 @@ -[package] -name = "nydus-utils" -version = "0.4.3" -description = "Utilities and helpers for Nydus Image Service" -authors = ["The Nydus Developers"] -license = "Apache-2.0 OR BSD-3-Clause" -homepage = "https://nydus.dev/" -repository = "https://github.com/dragonflyoss/nydus" -edition = "2021" - -[dependencies] -thiserror = "1.0.30" -blake3 = "1.3" -httpdate = "1.0" -lazy_static = "1.4" -libc = "0.2" -log = "0.4" -lz4-sys = "1.9.4" -lz4 = "1.24.0" -openssl = { version = "0.10.48", features = ["vendored"], optional = true } -serde = { version = ">=1.0.27", features = ["serde_derive", "rc"] } -serde_json = ">=1.0.9" -sha2 = "0.10.0" -tokio = { version = "1.19.0", features = ["rt", "sync"] } -zstd = "0.11" -nix = "0.24" - -nydus-api = { version = "0.3", path = "../api" } - -# libz-ng-sys doesn't compile on ppc64. Have to fallback to stock zlib-sys -[target.'cfg(target_arch = "powerpc64")'.dependencies] -libz-sys = { version = "1.1.12", features = ["stock-zlib"], default-features = false, optional = true } -flate2 = { version = "1.0.28", features = ["zlib"], default-features = false } -[target.'cfg(not(target_arch = "powerpc64"))'.dependencies] -libz-sys = { version = "1.1.8", features = ["zlib-ng"], default-features = false, optional = true } -flate2 = { version = "1.0.28", features = ["zlib-ng-compat"], default-features = false } - -[dev-dependencies] -vmm-sys-util = "0.11.0" -tar = "0.4.40" - -[features] -zran = ["libz-sys"] -encryption = ["openssl"] - -[package.metadata.docs.rs] -all-features = true -targets = ["x86_64-unknown-linux-gnu", "aarch64-unknown-linux-gnu", "aarch64-apple-darwin"] +[package] +name = "nydus-utils" +version = "0.4.3" +description = "Utilities and helpers for Nydus Image Service" +authors = ["The Nydus Developers"] +license = "Apache-2.0 OR BSD-3-Clause" +homepage = "https://nydus.dev/" +repository = "https://github.com/dragonflyoss/nydus" +edition = "2021" + +[dependencies] +thiserror = "1.0.30" +blake3 = "1.3" +httpdate = "1.0" +lazy_static = "1.4" +libc = "0.2" +log = "0.4" +lz4-sys = "1.9.4" +lz4 = "1.24.0" +openssl = { version = "0.10.48", features = ["vendored"], optional = true } +serde = { version = ">=1.0.27", features = ["serde_derive", "rc"] } +serde_json = ">=1.0.9" +sha2 = "0.10.0" +tokio = { version = "1.19.0", features = ["rt", "sync"] } +zstd = "0.11" +nix = "0.24" + +nydus-api = { version = "0.3", path = "../api" } + +# libz-ng-sys doesn't compile on ppc64. Have to fallback to stock zlib-sys +[target.'cfg(target_arch = "powerpc64")'.dependencies] +libz-sys = { version = "1.1.12", features = ["stock-zlib"], default-features = false, optional = true } +flate2 = { version = "1.0.28", features = ["zlib"], default-features = false } +[target.'cfg(not(target_arch = "powerpc64"))'.dependencies] +libz-sys = { version = "1.1.8", features = ["zlib-ng"], default-features = false, optional = true } +flate2 = { version = "1.0.28", features = ["zlib-ng-compat"], default-features = false } + +[dev-dependencies] +vmm-sys-util = "0.11.0" +tar = "0.4.40" + +[features] +zran = ["libz-sys"] +encryption = ["openssl"] + +[package.metadata.docs.rs] +all-features = true +targets = ["x86_64-unknown-linux-gnu", "aarch64-unknown-linux-gnu", "aarch64-apple-darwin"] diff --git a/utils/README.md b/utils/README.md index 794c7d59316..ca5decaba0b 100644 --- a/utils/README.md +++ b/utils/README.md @@ -1,25 +1,25 @@ -# nydus-utils - -A collection of utilities to support [Nydus Image Service](https://nydus.dev/). -It provides: -- Asynchronous Multi-Producer Multi-Consumer channel -- Blake3 and SHA256 message digest algorithms -- LZ4 and zstd compression algorithms -- `InodeBitmap`: a bitmap implementation to manage inode numbers -- Per-thread async runtime of type tokio current thread Runtime. -- exec() helper -- metric helpers - -## Support - -**Platforms**: -- x86_64 -- aarch64 - -**Operating Systems**: -- Linux -- MacOS - -## License - -This code is licensed under [Apache-2.0](LICENSE-APACHE) or [BSD-3-Clause](LICENSE-BSD-3-Clause). +# nydus-utils + +A collection of utilities to support [Nydus Image Service](https://nydus.dev/). +It provides: +- Asynchronous Multi-Producer Multi-Consumer channel +- Blake3 and SHA256 message digest algorithms +- LZ4 and zstd compression algorithms +- `InodeBitmap`: a bitmap implementation to manage inode numbers +- Per-thread async runtime of type tokio current thread Runtime. +- exec() helper +- metric helpers + +## Support + +**Platforms**: +- x86_64 +- aarch64 + +**Operating Systems**: +- Linux +- MacOS + +## License + +This code is licensed under [Apache-2.0](LICENSE-APACHE) or [BSD-3-Clause](LICENSE-BSD-3-Clause). diff --git a/utils/src/async_helper.rs b/utils/src/async_helper.rs index 54aa6154751..a6c37bb99e1 100644 --- a/utils/src/async_helper.rs +++ b/utils/src/async_helper.rs @@ -1,33 +1,33 @@ -// Copyright (C) 2022 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 -use tokio::runtime::{Builder, Runtime}; - -std::thread_local! { - static CURRENT_THREAD_RT: Runtime = Builder::new_current_thread() - .enable_all() - .build() - .expect("utils: failed to create tokio runtime for current thread"); -} - -/// Run the callback with a tokio current thread runtime instance. -pub fn with_runtime(f: F) -> R -where - F: FnOnce(&Runtime) -> R, -{ - CURRENT_THREAD_RT.with(f) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_with_runtime() { - let res = with_runtime(|rt| rt.block_on(async { 1 })); - assert_eq!(res, 1); - - let res = with_runtime(|rt| rt.block_on(async { 3 })); - assert_eq!(res, 3); - } -} +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 +use tokio::runtime::{Builder, Runtime}; + +std::thread_local! { + static CURRENT_THREAD_RT: Runtime = Builder::new_current_thread() + .enable_all() + .build() + .expect("utils: failed to create tokio runtime for current thread"); +} + +/// Run the callback with a tokio current thread runtime instance. +pub fn with_runtime(f: F) -> R +where + F: FnOnce(&Runtime) -> R, +{ + CURRENT_THREAD_RT.with(f) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_with_runtime() { + let res = with_runtime(|rt| rt.block_on(async { 1 })); + assert_eq!(res, 1); + + let res = with_runtime(|rt| rt.block_on(async { 3 })); + assert_eq!(res, 3); + } +} diff --git a/utils/src/compact.rs b/utils/src/compact.rs index 48f93598650..a6b91f27813 100644 --- a/utils/src/compact.rs +++ b/utils/src/compact.rs @@ -1,55 +1,55 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2020 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 -use nix::sys::stat::dev_t; - -// makedev calculations compact with macos -// view https://opensource.apple.com/source/xnu/xnu-201/bsd/sys/types.h.auto.html -pub fn makedev(major: u64, minor: u64) -> dev_t { - #[cfg(target_os = "linux")] - { - nix::sys::stat::makedev(major, minor) - } - #[cfg(target_os = "macos")] - { - ((major & 0xff << 24) | (minor & 0xffffff)) as dev_t - } -} - -pub fn major_dev(dev: u64) -> u64 { - #[cfg(target_os = "linux")] - { - nix::sys::stat::major(dev) - } - #[cfg(target_os = "macos")] - { - (dev >> 24) & 0xff - } -} - -pub fn minor_dev(dev: u64) -> u64 { - #[cfg(target_os = "linux")] - { - nix::sys::stat::minor(dev) - } - #[cfg(target_os = "macos")] - { - dev & 0xffffff - } -} - -#[cfg(all(test, target_os = "linux"))] -mod tests { - - use super::*; - - #[test] - fn test_dev() { - let major: u64 = 0xffff_ffff_ffff_abcd; - let minor: u64 = 0xffff_ffff_abcd_ffff; - let dev = nix::sys::stat::makedev(major, minor); - assert_eq!(major_dev(dev), 0xffff_abcd); - assert_eq!(minor_dev(dev), 0xabcd_ffff); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2020 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 +use nix::sys::stat::dev_t; + +// makedev calculations compact with macos +// view https://opensource.apple.com/source/xnu/xnu-201/bsd/sys/types.h.auto.html +pub fn makedev(major: u64, minor: u64) -> dev_t { + #[cfg(target_os = "linux")] + { + nix::sys::stat::makedev(major, minor) + } + #[cfg(target_os = "macos")] + { + ((major & 0xff << 24) | (minor & 0xffffff)) as dev_t + } +} + +pub fn major_dev(dev: u64) -> u64 { + #[cfg(target_os = "linux")] + { + nix::sys::stat::major(dev) + } + #[cfg(target_os = "macos")] + { + (dev >> 24) & 0xff + } +} + +pub fn minor_dev(dev: u64) -> u64 { + #[cfg(target_os = "linux")] + { + nix::sys::stat::minor(dev) + } + #[cfg(target_os = "macos")] + { + dev & 0xffffff + } +} + +#[cfg(all(test, target_os = "linux"))] +mod tests { + + use super::*; + + #[test] + fn test_dev() { + let major: u64 = 0xffff_ffff_ffff_abcd; + let minor: u64 = 0xffff_ffff_abcd_ffff; + let dev = nix::sys::stat::makedev(major, minor); + assert_eq!(major_dev(dev), 0xffff_abcd); + assert_eq!(minor_dev(dev), 0xabcd_ffff); + } +} diff --git a/utils/src/compress/lz4_standard.rs b/utils/src/compress/lz4_standard.rs index bd4e51d940f..ed92396eb8b 100644 --- a/utils/src/compress/lz4_standard.rs +++ b/utils/src/compress/lz4_standard.rs @@ -1,73 +1,73 @@ -// Copyright (C) 2020 Alibaba Cloud. All rights reserved. -// SPDX-License-Identifier: Apache-2.0 - -use std::io::Result; - -use libc::c_char; -use lz4_sys::{LZ4_compressBound, LZ4_compress_default, LZ4_decompress_safe}; - -pub(super) fn lz4_compress(src: &[u8]) -> Result> { - // 0 iff src too large - let compress_bound: i32 = unsafe { LZ4_compressBound(src.len() as i32) }; - - if src.len() > (i32::max_value() as usize) || compress_bound <= 0 { - return Err(einval!("compression input data is too big")); - } - - let mut dst_buf = Vec::with_capacity(compress_bound as usize); - let cmp_size = unsafe { - LZ4_compress_default( - src.as_ptr() as *const c_char, - dst_buf.as_mut_ptr() as *mut c_char, - src.len() as i32, - compress_bound, - ) - }; - if cmp_size <= 0 { - return Err(eio!("compression failed")); - } - - assert!(cmp_size as usize <= dst_buf.capacity()); - unsafe { dst_buf.set_len(cmp_size as usize) }; - - Ok(dst_buf) -} - -pub(super) fn lz4_decompress(src: &[u8], dst: &mut [u8]) -> Result { - if dst.len() >= std::i32::MAX as usize { - return Err(einval!("the destination buffer is big than i32::MAX")); - } - let size = dst.len() as i32; - - if unsafe { LZ4_compressBound(size) } <= 0 { - return Err(einval!("given size parameter is too big")); - } - - let dec_bytes = unsafe { - LZ4_decompress_safe( - src.as_ptr() as *const c_char, - dst.as_mut_ptr() as *mut c_char, - src.len() as i32, - size, - ) - }; - - if dec_bytes < 0 { - return Err(eio!("decompression failed")); - } - - Ok(dec_bytes as usize) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_error_input() { - let mut big_buf = vec![0x0u8; u32::MAX as usize]; - let mock_compressed = vec![0x0u8; 32]; - assert!(lz4_compress(&big_buf).is_err()); - assert!(lz4_decompress(&mock_compressed, big_buf.as_mut_slice()).is_err()); - } -} +// Copyright (C) 2020 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::io::Result; + +use libc::c_char; +use lz4_sys::{LZ4_compressBound, LZ4_compress_default, LZ4_decompress_safe}; + +pub(super) fn lz4_compress(src: &[u8]) -> Result> { + // 0 iff src too large + let compress_bound: i32 = unsafe { LZ4_compressBound(src.len() as i32) }; + + if src.len() > (i32::max_value() as usize) || compress_bound <= 0 { + return Err(einval!("compression input data is too big")); + } + + let mut dst_buf = Vec::with_capacity(compress_bound as usize); + let cmp_size = unsafe { + LZ4_compress_default( + src.as_ptr() as *const c_char, + dst_buf.as_mut_ptr() as *mut c_char, + src.len() as i32, + compress_bound, + ) + }; + if cmp_size <= 0 { + return Err(eio!("compression failed")); + } + + assert!(cmp_size as usize <= dst_buf.capacity()); + unsafe { dst_buf.set_len(cmp_size as usize) }; + + Ok(dst_buf) +} + +pub(super) fn lz4_decompress(src: &[u8], dst: &mut [u8]) -> Result { + if dst.len() >= std::i32::MAX as usize { + return Err(einval!("the destination buffer is big than i32::MAX")); + } + let size = dst.len() as i32; + + if unsafe { LZ4_compressBound(size) } <= 0 { + return Err(einval!("given size parameter is too big")); + } + + let dec_bytes = unsafe { + LZ4_decompress_safe( + src.as_ptr() as *const c_char, + dst.as_mut_ptr() as *mut c_char, + src.len() as i32, + size, + ) + }; + + if dec_bytes < 0 { + return Err(eio!("decompression failed")); + } + + Ok(dec_bytes as usize) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_error_input() { + let mut big_buf = vec![0x0u8; u32::MAX as usize]; + let mock_compressed = vec![0x0u8; 32]; + assert!(lz4_compress(&big_buf).is_err()); + assert!(lz4_decompress(&mock_compressed, big_buf.as_mut_slice()).is_err()); + } +} diff --git a/utils/src/compress/mod.rs b/utils/src/compress/mod.rs index 665ae8b6486..a8b4b38b449 100644 --- a/utils/src/compress/mod.rs +++ b/utils/src/compress/mod.rs @@ -1,596 +1,596 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::borrow::Cow; -use std::convert::TryFrom; -use std::fmt; -use std::io::{BufReader, Error, Read, Result, Write}; -use std::str::FromStr; - -mod lz4_standard; -use self::lz4_standard::*; - -#[cfg(feature = "zran")] -pub mod zlib_random; - -const COMPRESSION_MINIMUM_RATIO: usize = 100; - -/// Supported compression algorithms. -#[repr(u32)] -#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] -pub enum Algorithm { - #[default] - None = 0, - Lz4Block = 1, - GZip = 2, - Zstd = 3, -} - -impl fmt::Display for Algorithm { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{:?}", self) - } -} - -impl FromStr for Algorithm { - type Err = Error; - - fn from_str(s: &str) -> std::result::Result { - match s { - "none" => Ok(Self::None), - "lz4_block" => Ok(Self::Lz4Block), - "gzip" => Ok(Self::GZip), - "zstd" => Ok(Self::Zstd), - _ => Err(einval!("compression algorithm should be none or lz4_block")), - } - } -} - -impl TryFrom for Algorithm { - type Error = (); - - fn try_from(value: u32) -> std::result::Result { - if value == Algorithm::None as u32 { - Ok(Algorithm::None) - } else if value == Algorithm::Lz4Block as u32 { - Ok(Algorithm::Lz4Block) - } else if value == Algorithm::GZip as u32 { - Ok(Algorithm::GZip) - } else if value == Algorithm::Zstd as u32 { - Ok(Algorithm::Zstd) - } else { - Err(()) - } - } -} - -impl TryFrom for Algorithm { - type Error = (); - - fn try_from(value: u64) -> std::result::Result { - if value == Algorithm::None as u64 { - Ok(Algorithm::None) - } else if value == Algorithm::Lz4Block as u64 { - Ok(Algorithm::Lz4Block) - } else if value == Algorithm::GZip as u64 { - Ok(Algorithm::GZip) - } else if value == Algorithm::Zstd as u64 { - Ok(Algorithm::Zstd) - } else { - Err(()) - } - } -} - -impl Algorithm { - /// Check whether the compression algorithm is none. - pub fn is_none(self) -> bool { - self == Self::None - } -} - -/// Compress data with the specified compression algorithm. -pub fn compress(src: &[u8], algorithm: Algorithm) -> Result<(Cow<[u8]>, bool)> { - let src_size = src.len(); - if src_size == 0 { - return Ok((Cow::Borrowed(src), false)); - } - - let compressed = match algorithm { - Algorithm::None => return Ok((Cow::Borrowed(src), false)), - Algorithm::Lz4Block => lz4_compress(src)?, - Algorithm::GZip => { - let dst: Vec = Vec::new(); - let mut gz = flate2::write::GzEncoder::new(dst, flate2::Compression::default()); - gz.write_all(src)?; - gz.finish()? - } - Algorithm::Zstd => zstd_compress(src)?, - }; - - // Abandon compressed data when compression ratio greater than COMPRESSION_MINIMUM_RATIO - if (COMPRESSION_MINIMUM_RATIO == 100 && compressed.len() >= src_size) - || ((100 * compressed.len() / src_size) >= COMPRESSION_MINIMUM_RATIO) - { - Ok((Cow::Borrowed(src), false)) - } else { - Ok((Cow::Owned(compressed), true)) - } -} - -/// Decompress a source slice or file stream into destination slice, with provided compression algorithm. -/// Use the file as decompress source if provided. -pub fn decompress(src: &[u8], dst: &mut [u8], algorithm: Algorithm) -> Result { - match algorithm { - Algorithm::None => { - assert_eq!(src.len(), dst.len()); - dst.copy_from_slice(src); - Ok(dst.len()) - } - Algorithm::Lz4Block => lz4_decompress(src, dst), - Algorithm::GZip => { - let mut gz = flate2::bufread::GzDecoder::new(src); - gz.read_exact(dst)?; - Ok(dst.len()) - } - Algorithm::Zstd => zstd::bulk::decompress_to_buffer(src, dst), - } -} - -#[allow(clippy::large_enum_variant)] -/// Stream decoder for gzip/lz4/zstd. -pub enum Decoder<'a, R: Read> { - None(R), - Gzip(flate2::bufread::MultiGzDecoder>), - Zstd(zstd::stream::Decoder<'a, BufReader>), -} - -impl<'a, R: Read> Decoder<'a, R> { - /// Create a new instance of `Decoder`. - pub fn new(reader: R, algorithm: Algorithm) -> Result { - let decoder = match algorithm { - Algorithm::None => Decoder::None(reader), - Algorithm::GZip => { - Decoder::Gzip(flate2::bufread::MultiGzDecoder::new(BufReader::new(reader))) - } - Algorithm::Lz4Block => panic!("Decoder doesn't support lz4_block"), - Algorithm::Zstd => Decoder::Zstd(zstd::stream::Decoder::new(reader)?), - }; - Ok(decoder) - } -} - -impl<'a, R: Read> Read for Decoder<'a, R> { - fn read(&mut self, buf: &mut [u8]) -> Result { - match self { - Decoder::None(r) => r.read(buf), - Decoder::Gzip(r) => r.read(buf), - Decoder::Zstd(r) => r.read(buf), - } - } -} - -/// Stream decoder for zlib/gzip. -pub struct ZlibDecoder { - stream: flate2::bufread::MultiGzDecoder>, -} - -impl ZlibDecoder { - /// Create a new instance of `ZlibDecoder`. - pub fn new(reader: R) -> Self { - ZlibDecoder { - stream: flate2::bufread::MultiGzDecoder::new(BufReader::new(reader)), - } - } -} - -impl Read for ZlibDecoder { - fn read(&mut self, buf: &mut [u8]) -> Result { - self.stream.read(buf) - } -} - -/// Estimate the maximum compressed data size from uncompressed data size. -/// -/// Gzip is special that it doesn't carry compress_size. We need to read the maximum possible size -/// of compressed data for `chunk_decompress_size`, and try to decompress `chunk_decompress_size` -/// bytes of data out of it. -// -// Per man(1) gzip -// The worst case expansion is a few bytes for the gzip file header, plus 5 bytes every 32K block, -// or an expansion ratio of 0.015% for large files. -// -// Per http://www.zlib.org/rfc-gzip.html#header-trailer, each member has the following structure: -// +---+---+---+---+---+---+---+---+---+---+ -// |ID1|ID2|CM |FLG| MTIME |XFL|OS | (more-->) -// +---+---+---+---+---+---+---+---+---+---+ -// (if FLG.FEXTRA set) -// +---+---+=================================+ -// | XLEN |...XLEN bytes of "extra field"...| (more-->) -// +---+---+=================================+ -// (if FLG.FNAME set) -// +=========================================+ -// |...original file name, zero-terminated...| (more-->) -// +=========================================+ -// (if FLG.FCOMMENT set) -// +===================================+ -// |...file comment, zero-terminated...| (more-->) -// +===================================+ -// (if FLG.FHCRC set) -// +---+---+ -// | CRC16 | -// +---+---+ -// +=======================+ -// |...compressed blocks...| (more-->) -// +=======================+ -// 0 1 2 3 4 5 6 7 -// +---+---+---+---+---+---+---+---+ -// | CRC32 | ISIZE | -// +---+---+---+---+---+---+---+---+ -// gzip head+footer is at least 10+8 bytes, stargz header doesn't include any flags -// so it's 18 bytes. Let's read at least 128 bytes more, to allow the decompressor to -// find out end of the gzip stream. -// -// Ideally we should introduce a streaming cache for stargz that maintains internal -// chunks and expose stream APIs. -pub fn compute_compressed_gzip_size(size: usize, max_size: usize) -> usize { - let size = size + 10 + 8 + 5 + (size / (16 << 10)) * 5 + 128; - - std::cmp::min(size, max_size) -} - -fn zstd_compress(src: &[u8]) -> Result> { - zstd::bulk::compress(src, zstd::DEFAULT_COMPRESSION_LEVEL) -} - -#[cfg(test)] -mod tests { - use super::*; - use std::fs::OpenOptions; - use std::io::{Seek, SeekFrom}; - use std::path::Path; - use vmm_sys_util::tempfile::TempFile; - - #[test] - fn test_compress_algorithm_gzip() { - let buf = vec![0x2u8; 4095]; - let compressed = compress(&buf, Algorithm::GZip).unwrap(); - assert!(compressed.1); - let (compressed, _) = compressed; - assert_ne!(compressed.len(), 0); - - let mut decompressed = vec![0; buf.len()]; - let sz = decompress(&compressed, decompressed.as_mut_slice(), Algorithm::GZip).unwrap(); - assert_eq!(sz, 4095); - assert_eq!(buf, decompressed); - - let mut tmp_file = TempFile::new().unwrap().into_file(); - tmp_file.write_all(&compressed).unwrap(); - tmp_file.seek(SeekFrom::Start(0)).unwrap(); - - let mut decompressed = vec![0; buf.len()]; - let mut decoder = Decoder::new(tmp_file, Algorithm::GZip).unwrap(); - decoder.read_exact(decompressed.as_mut_slice()).unwrap(); - assert_eq!(sz, 4095); - assert_eq!(buf, decompressed); - } - - #[test] - fn test_compress_algorithm_none() { - let buf = [ - 0x1u8, 0x2u8, 0x3u8, 0x4u8, 0x1u8, 0x2u8, 0x3u8, 0x4u8, 0x1u8, 0x2u8, 0x3u8, 0x4u8, - 0x1u8, 0x2u8, 0x3u8, 0x4u8, - ]; - let mut dst = [0x0u8; 16]; - let (compressed, _) = compress(&buf, Algorithm::None).unwrap(); - assert_eq!(buf.to_vec(), compressed.to_vec()); - let _len = decompress(&buf, &mut dst, Algorithm::None).unwrap(); - assert_eq!(dst.to_vec(), compressed.to_vec()); - } - - #[test] - fn test_compress_algorithm_ztsd() { - let buf = vec![0x2u8; 4097]; - let mut decompressed = vec![0; buf.len()]; - let (compressed, _) = compress(&buf, Algorithm::Zstd).unwrap(); - let sz = decompress(&compressed, decompressed.as_mut_slice(), Algorithm::Zstd).unwrap(); - assert_eq!(sz, 4097); - assert_eq!(buf, decompressed); - } - - #[test] - fn test_compress_algorithm_lz4() { - let buf = [ - 0x1u8, 0x2u8, 0x3u8, 0x4u8, 0x1u8, 0x2u8, 0x3u8, 0x4u8, 0x1u8, 0x2u8, 0x3u8, 0x4u8, - 0x1u8, 0x2u8, 0x3u8, 0x4u8, - ]; - let mut decompressed = vec![0; buf.len()]; - let (compressed, _) = compress(&buf, Algorithm::Lz4Block).unwrap(); - let _len = decompress( - &compressed, - decompressed.as_mut_slice(), - Algorithm::Lz4Block, - ) - .unwrap(); - assert_eq!(decompressed.to_vec(), buf.to_vec()); - } - - #[test] - fn test_lz4_compress_decompress_1_byte() { - let buf = vec![0x1u8]; - let compressed = lz4_compress(&buf).unwrap(); - let mut decompressed = vec![0; buf.len()]; - let sz = decompress( - &compressed, - decompressed.as_mut_slice(), - Algorithm::Lz4Block, - ) - .unwrap(); - - assert_eq!(sz, 1); - assert_eq!(buf, decompressed); - } - - #[test] - fn test_lz4_compress_decompress_2_bytes() { - let buf = vec![0x2u8, 0x3u8]; - let compressed = lz4_compress(&buf).unwrap(); - let mut decompressed = vec![0; buf.len()]; - let sz = decompress( - &compressed, - decompressed.as_mut_slice(), - Algorithm::Lz4Block, - ) - .unwrap(); - - assert_eq!(sz, 2); - assert_eq!(buf, decompressed); - } - - #[test] - fn test_lz4_compress_decompress_16_bytes() { - let buf = [ - 0x1u8, 0x2u8, 0x3u8, 0x4u8, 0x1u8, 0x2u8, 0x3u8, 0x4u8, 0x1u8, 0x2u8, 0x3u8, 0x4u8, - 0x1u8, 0x2u8, 0x3u8, 0x4u8, - ]; - let compressed = lz4_compress(&buf).unwrap(); - let mut decompressed = vec![0; buf.len()]; - let sz = decompress( - &compressed, - decompressed.as_mut_slice(), - Algorithm::Lz4Block, - ) - .unwrap(); - - assert_eq!(sz, 16); - assert_eq!(&buf, decompressed.as_slice()); - } - - #[test] - fn test_lz4_compress_decompress_4095_bytes() { - let buf = vec![0x2u8; 4095]; - let compressed = lz4_compress(&buf).unwrap(); - let mut decompressed = vec![0; buf.len()]; - let sz = decompress( - &compressed, - decompressed.as_mut_slice(), - Algorithm::Lz4Block, - ) - .unwrap(); - - assert_eq!(sz, 4095); - assert_eq!(buf, decompressed); - } - - #[test] - fn test_lz4_compress_decompress_4096_bytes() { - let buf = vec![0x2u8; 4096]; - let compressed = lz4_compress(&buf).unwrap(); - let mut decompressed = vec![0; buf.len()]; - let sz = decompress( - &compressed, - decompressed.as_mut_slice(), - Algorithm::Lz4Block, - ) - .unwrap(); - - assert_eq!(sz, 4096); - assert_eq!(buf, decompressed); - } - - #[test] - fn test_lz4_compress_decompress_4097_bytes() { - let buf = vec![0x2u8; 4097]; - let compressed = lz4_compress(&buf).unwrap(); - let mut decompressed = vec![0; buf.len()]; - let sz = decompress( - &compressed, - decompressed.as_mut_slice(), - Algorithm::Lz4Block, - ) - .unwrap(); - - assert_eq!(sz, 4097); - assert_eq!(buf, decompressed); - } - - #[test] - fn test_zstd_compress_decompress_1_byte() { - let buf = vec![0x1u8]; - let compressed = zstd_compress(&buf).unwrap(); - let mut decompressed = vec![0; buf.len()]; - let sz = decompress(&compressed, decompressed.as_mut_slice(), Algorithm::Zstd).unwrap(); - - assert_eq!(sz, 1); - assert_eq!(buf, decompressed); - } - - #[test] - fn test_zstd_compress_decompress_2_bytes() { - let buf = vec![0x2u8, 0x3u8]; - let compressed = zstd_compress(&buf).unwrap(); - let mut decompressed = vec![0; buf.len()]; - let sz = decompress(&compressed, decompressed.as_mut_slice(), Algorithm::Zstd).unwrap(); - - assert_eq!(sz, 2); - assert_eq!(buf, decompressed); - } - - #[test] - fn test_zstd_compress_decompress_16_bytes() { - let buf = [ - 0x1u8, 0x2u8, 0x3u8, 0x4u8, 0x1u8, 0x2u8, 0x3u8, 0x4u8, 0x1u8, 0x2u8, 0x3u8, 0x4u8, - 0x1u8, 0x2u8, 0x3u8, 0x4u8, - ]; - let compressed = zstd_compress(&buf).unwrap(); - let mut decompressed = vec![0; buf.len()]; - let sz = decompress(&compressed, decompressed.as_mut_slice(), Algorithm::Zstd).unwrap(); - - assert_eq!(sz, 16); - assert_eq!(&buf, decompressed.as_slice()); - } - - #[test] - fn test_zstd_compress_decompress_4095_bytes() { - let buf = vec![0x2u8; 4095]; - let compressed = zstd_compress(&buf).unwrap(); - let mut decompressed = vec![0; buf.len()]; - let sz = decompress(&compressed, decompressed.as_mut_slice(), Algorithm::Zstd).unwrap(); - - assert_eq!(sz, 4095); - assert_eq!(buf, decompressed); - } - - #[test] - fn test_zstd_compress_decompress_4096_bytes() { - let buf = vec![0x2u8; 4096]; - let compressed = zstd_compress(&buf).unwrap(); - let mut decompressed = vec![0; buf.len()]; - let sz = decompress(&compressed, decompressed.as_mut_slice(), Algorithm::Zstd).unwrap(); - - assert_eq!(sz, 4096); - assert_eq!(buf, decompressed); - } - - #[test] - fn test_zstd_compress_decompress_4097_bytes() { - let buf = vec![0x2u8; 4097]; - let compressed = zstd_compress(&buf).unwrap(); - let mut decompressed = vec![0; buf.len()]; - let sz = decompress(&compressed, decompressed.as_mut_slice(), Algorithm::Zstd).unwrap(); - - assert_eq!(sz, 4097); - assert_eq!(buf, decompressed); - } - - #[test] - fn test_new_decoder_none() { - let buf = b"This is a test"; - let mut decoder = Decoder::new(buf.as_slice(), Algorithm::None).unwrap(); - let mut buf2 = vec![0u8; 1024]; - let res = decoder.read(&mut buf2).unwrap(); - assert_eq!(res, 14); - assert_eq!(&buf2[0..14], buf.as_slice()); - } - - #[test] - fn test_gzip_decoder() { - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let path = Path::new(root_dir).join("../tests/texture/zran/zlib_sample.txt.gz"); - let file = OpenOptions::new().read(true).open(path).unwrap(); - let mut decoder = Decoder::new(file, Algorithm::GZip).unwrap(); - let mut buf = [0u8; 8]; - - decoder.read_exact(&mut buf).unwrap(); - assert_eq!(&String::from_utf8_lossy(&buf), "This is "); - decoder.read_exact(&mut buf).unwrap(); - assert_eq!(&String::from_utf8_lossy(&buf), "a test f"); - decoder.read_exact(&mut buf).unwrap(); - assert_eq!(&String::from_utf8_lossy(&buf), "ile for "); - let ret = decoder.read(&mut buf).unwrap(); - assert_eq!(ret, 6); - assert_eq!(&String::from_utf8_lossy(&buf[0..6]), "zlib.\n"); - let ret = decoder.read(&mut buf).unwrap(); - assert_eq!(ret, 0); - } - - #[test] - fn test_zlib_decoder() { - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let path = Path::new(root_dir).join("../tests/texture/zran/zlib_sample.txt.gz"); - let file = OpenOptions::new().read(true).open(path).unwrap(); - let mut decoder = ZlibDecoder::new(file); - let mut buf = [0u8; 8]; - - decoder.read_exact(&mut buf).unwrap(); - assert_eq!(&String::from_utf8_lossy(&buf), "This is "); - decoder.read_exact(&mut buf).unwrap(); - assert_eq!(&String::from_utf8_lossy(&buf), "a test f"); - decoder.read_exact(&mut buf).unwrap(); - assert_eq!(&String::from_utf8_lossy(&buf), "ile for "); - let ret = decoder.read(&mut buf).unwrap(); - assert_eq!(ret, 6); - assert_eq!(&String::from_utf8_lossy(&buf[0..6]), "zlib.\n"); - let ret = decoder.read(&mut buf).unwrap(); - assert_eq!(ret, 0); - print!( - "{:?}, {:?}, {:?}, {:?},", - Algorithm::GZip, - Algorithm::Lz4Block, - Algorithm::Zstd, - Algorithm::None - ) - } - - #[test] - fn test_algorithm_from() { - assert_eq!(Algorithm::from_str("none").unwrap(), Algorithm::None); - assert_eq!( - Algorithm::from_str("lz4_block").unwrap(), - Algorithm::Lz4Block - ); - assert_eq!(Algorithm::from_str("gzip").unwrap(), Algorithm::GZip); - assert_eq!(Algorithm::from_str("zstd").unwrap(), Algorithm::Zstd); - assert!(Algorithm::from_str("foo").is_err()); - assert_eq!( - Algorithm::try_from(Algorithm::None as u32).unwrap(), - Algorithm::None - ); - assert_eq!( - Algorithm::try_from(Algorithm::Lz4Block as u32).unwrap(), - Algorithm::Lz4Block - ); - assert_eq!( - Algorithm::try_from(Algorithm::GZip as u32).unwrap(), - Algorithm::GZip - ); - assert_eq!( - Algorithm::try_from(Algorithm::Zstd as u32).unwrap(), - Algorithm::Zstd - ); - assert!(Algorithm::try_from(u32::MAX).is_err()); - - assert_eq!( - Algorithm::try_from(Algorithm::None as u64).unwrap(), - Algorithm::None - ); - assert_eq!( - Algorithm::try_from(Algorithm::Lz4Block as u64).unwrap(), - Algorithm::Lz4Block - ); - assert_eq!( - Algorithm::try_from(Algorithm::GZip as u64).unwrap(), - Algorithm::GZip - ); - assert_eq!( - Algorithm::try_from(Algorithm::Zstd as u64).unwrap(), - Algorithm::Zstd - ); - assert!(Algorithm::try_from(u64::MAX).is_err()); - assert!(Algorithm::None.is_none()); - assert!(!Algorithm::Lz4Block.is_none()); - assert!(!Algorithm::GZip.is_none()); - assert!(!Algorithm::Zstd.is_none()); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::borrow::Cow; +use std::convert::TryFrom; +use std::fmt; +use std::io::{BufReader, Error, Read, Result, Write}; +use std::str::FromStr; + +mod lz4_standard; +use self::lz4_standard::*; + +#[cfg(feature = "zran")] +pub mod zlib_random; + +const COMPRESSION_MINIMUM_RATIO: usize = 100; + +/// Supported compression algorithms. +#[repr(u32)] +#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] +pub enum Algorithm { + #[default] + None = 0, + Lz4Block = 1, + GZip = 2, + Zstd = 3, +} + +impl fmt::Display for Algorithm { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{:?}", self) + } +} + +impl FromStr for Algorithm { + type Err = Error; + + fn from_str(s: &str) -> std::result::Result { + match s { + "none" => Ok(Self::None), + "lz4_block" => Ok(Self::Lz4Block), + "gzip" => Ok(Self::GZip), + "zstd" => Ok(Self::Zstd), + _ => Err(einval!("compression algorithm should be none or lz4_block")), + } + } +} + +impl TryFrom for Algorithm { + type Error = (); + + fn try_from(value: u32) -> std::result::Result { + if value == Algorithm::None as u32 { + Ok(Algorithm::None) + } else if value == Algorithm::Lz4Block as u32 { + Ok(Algorithm::Lz4Block) + } else if value == Algorithm::GZip as u32 { + Ok(Algorithm::GZip) + } else if value == Algorithm::Zstd as u32 { + Ok(Algorithm::Zstd) + } else { + Err(()) + } + } +} + +impl TryFrom for Algorithm { + type Error = (); + + fn try_from(value: u64) -> std::result::Result { + if value == Algorithm::None as u64 { + Ok(Algorithm::None) + } else if value == Algorithm::Lz4Block as u64 { + Ok(Algorithm::Lz4Block) + } else if value == Algorithm::GZip as u64 { + Ok(Algorithm::GZip) + } else if value == Algorithm::Zstd as u64 { + Ok(Algorithm::Zstd) + } else { + Err(()) + } + } +} + +impl Algorithm { + /// Check whether the compression algorithm is none. + pub fn is_none(self) -> bool { + self == Self::None + } +} + +/// Compress data with the specified compression algorithm. +pub fn compress(src: &[u8], algorithm: Algorithm) -> Result<(Cow<[u8]>, bool)> { + let src_size = src.len(); + if src_size == 0 { + return Ok((Cow::Borrowed(src), false)); + } + + let compressed = match algorithm { + Algorithm::None => return Ok((Cow::Borrowed(src), false)), + Algorithm::Lz4Block => lz4_compress(src)?, + Algorithm::GZip => { + let dst: Vec = Vec::new(); + let mut gz = flate2::write::GzEncoder::new(dst, flate2::Compression::default()); + gz.write_all(src)?; + gz.finish()? + } + Algorithm::Zstd => zstd_compress(src)?, + }; + + // Abandon compressed data when compression ratio greater than COMPRESSION_MINIMUM_RATIO + if (COMPRESSION_MINIMUM_RATIO == 100 && compressed.len() >= src_size) + || ((100 * compressed.len() / src_size) >= COMPRESSION_MINIMUM_RATIO) + { + Ok((Cow::Borrowed(src), false)) + } else { + Ok((Cow::Owned(compressed), true)) + } +} + +/// Decompress a source slice or file stream into destination slice, with provided compression algorithm. +/// Use the file as decompress source if provided. +pub fn decompress(src: &[u8], dst: &mut [u8], algorithm: Algorithm) -> Result { + match algorithm { + Algorithm::None => { + assert_eq!(src.len(), dst.len()); + dst.copy_from_slice(src); + Ok(dst.len()) + } + Algorithm::Lz4Block => lz4_decompress(src, dst), + Algorithm::GZip => { + let mut gz = flate2::bufread::GzDecoder::new(src); + gz.read_exact(dst)?; + Ok(dst.len()) + } + Algorithm::Zstd => zstd::bulk::decompress_to_buffer(src, dst), + } +} + +#[allow(clippy::large_enum_variant)] +/// Stream decoder for gzip/lz4/zstd. +pub enum Decoder<'a, R: Read> { + None(R), + Gzip(flate2::bufread::MultiGzDecoder>), + Zstd(zstd::stream::Decoder<'a, BufReader>), +} + +impl<'a, R: Read> Decoder<'a, R> { + /// Create a new instance of `Decoder`. + pub fn new(reader: R, algorithm: Algorithm) -> Result { + let decoder = match algorithm { + Algorithm::None => Decoder::None(reader), + Algorithm::GZip => { + Decoder::Gzip(flate2::bufread::MultiGzDecoder::new(BufReader::new(reader))) + } + Algorithm::Lz4Block => panic!("Decoder doesn't support lz4_block"), + Algorithm::Zstd => Decoder::Zstd(zstd::stream::Decoder::new(reader)?), + }; + Ok(decoder) + } +} + +impl<'a, R: Read> Read for Decoder<'a, R> { + fn read(&mut self, buf: &mut [u8]) -> Result { + match self { + Decoder::None(r) => r.read(buf), + Decoder::Gzip(r) => r.read(buf), + Decoder::Zstd(r) => r.read(buf), + } + } +} + +/// Stream decoder for zlib/gzip. +pub struct ZlibDecoder { + stream: flate2::bufread::MultiGzDecoder>, +} + +impl ZlibDecoder { + /// Create a new instance of `ZlibDecoder`. + pub fn new(reader: R) -> Self { + ZlibDecoder { + stream: flate2::bufread::MultiGzDecoder::new(BufReader::new(reader)), + } + } +} + +impl Read for ZlibDecoder { + fn read(&mut self, buf: &mut [u8]) -> Result { + self.stream.read(buf) + } +} + +/// Estimate the maximum compressed data size from uncompressed data size. +/// +/// Gzip is special that it doesn't carry compress_size. We need to read the maximum possible size +/// of compressed data for `chunk_decompress_size`, and try to decompress `chunk_decompress_size` +/// bytes of data out of it. +// +// Per man(1) gzip +// The worst case expansion is a few bytes for the gzip file header, plus 5 bytes every 32K block, +// or an expansion ratio of 0.015% for large files. +// +// Per http://www.zlib.org/rfc-gzip.html#header-trailer, each member has the following structure: +// +---+---+---+---+---+---+---+---+---+---+ +// |ID1|ID2|CM |FLG| MTIME |XFL|OS | (more-->) +// +---+---+---+---+---+---+---+---+---+---+ +// (if FLG.FEXTRA set) +// +---+---+=================================+ +// | XLEN |...XLEN bytes of "extra field"...| (more-->) +// +---+---+=================================+ +// (if FLG.FNAME set) +// +=========================================+ +// |...original file name, zero-terminated...| (more-->) +// +=========================================+ +// (if FLG.FCOMMENT set) +// +===================================+ +// |...file comment, zero-terminated...| (more-->) +// +===================================+ +// (if FLG.FHCRC set) +// +---+---+ +// | CRC16 | +// +---+---+ +// +=======================+ +// |...compressed blocks...| (more-->) +// +=======================+ +// 0 1 2 3 4 5 6 7 +// +---+---+---+---+---+---+---+---+ +// | CRC32 | ISIZE | +// +---+---+---+---+---+---+---+---+ +// gzip head+footer is at least 10+8 bytes, stargz header doesn't include any flags +// so it's 18 bytes. Let's read at least 128 bytes more, to allow the decompressor to +// find out end of the gzip stream. +// +// Ideally we should introduce a streaming cache for stargz that maintains internal +// chunks and expose stream APIs. +pub fn compute_compressed_gzip_size(size: usize, max_size: usize) -> usize { + let size = size + 10 + 8 + 5 + (size / (16 << 10)) * 5 + 128; + + std::cmp::min(size, max_size) +} + +fn zstd_compress(src: &[u8]) -> Result> { + zstd::bulk::compress(src, zstd::DEFAULT_COMPRESSION_LEVEL) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs::OpenOptions; + use std::io::{Seek, SeekFrom}; + use std::path::Path; + use vmm_sys_util::tempfile::TempFile; + + #[test] + fn test_compress_algorithm_gzip() { + let buf = vec![0x2u8; 4095]; + let compressed = compress(&buf, Algorithm::GZip).unwrap(); + assert!(compressed.1); + let (compressed, _) = compressed; + assert_ne!(compressed.len(), 0); + + let mut decompressed = vec![0; buf.len()]; + let sz = decompress(&compressed, decompressed.as_mut_slice(), Algorithm::GZip).unwrap(); + assert_eq!(sz, 4095); + assert_eq!(buf, decompressed); + + let mut tmp_file = TempFile::new().unwrap().into_file(); + tmp_file.write_all(&compressed).unwrap(); + tmp_file.seek(SeekFrom::Start(0)).unwrap(); + + let mut decompressed = vec![0; buf.len()]; + let mut decoder = Decoder::new(tmp_file, Algorithm::GZip).unwrap(); + decoder.read_exact(decompressed.as_mut_slice()).unwrap(); + assert_eq!(sz, 4095); + assert_eq!(buf, decompressed); + } + + #[test] + fn test_compress_algorithm_none() { + let buf = [ + 0x1u8, 0x2u8, 0x3u8, 0x4u8, 0x1u8, 0x2u8, 0x3u8, 0x4u8, 0x1u8, 0x2u8, 0x3u8, 0x4u8, + 0x1u8, 0x2u8, 0x3u8, 0x4u8, + ]; + let mut dst = [0x0u8; 16]; + let (compressed, _) = compress(&buf, Algorithm::None).unwrap(); + assert_eq!(buf.to_vec(), compressed.to_vec()); + let _len = decompress(&buf, &mut dst, Algorithm::None).unwrap(); + assert_eq!(dst.to_vec(), compressed.to_vec()); + } + + #[test] + fn test_compress_algorithm_ztsd() { + let buf = vec![0x2u8; 4097]; + let mut decompressed = vec![0; buf.len()]; + let (compressed, _) = compress(&buf, Algorithm::Zstd).unwrap(); + let sz = decompress(&compressed, decompressed.as_mut_slice(), Algorithm::Zstd).unwrap(); + assert_eq!(sz, 4097); + assert_eq!(buf, decompressed); + } + + #[test] + fn test_compress_algorithm_lz4() { + let buf = [ + 0x1u8, 0x2u8, 0x3u8, 0x4u8, 0x1u8, 0x2u8, 0x3u8, 0x4u8, 0x1u8, 0x2u8, 0x3u8, 0x4u8, + 0x1u8, 0x2u8, 0x3u8, 0x4u8, + ]; + let mut decompressed = vec![0; buf.len()]; + let (compressed, _) = compress(&buf, Algorithm::Lz4Block).unwrap(); + let _len = decompress( + &compressed, + decompressed.as_mut_slice(), + Algorithm::Lz4Block, + ) + .unwrap(); + assert_eq!(decompressed.to_vec(), buf.to_vec()); + } + + #[test] + fn test_lz4_compress_decompress_1_byte() { + let buf = vec![0x1u8]; + let compressed = lz4_compress(&buf).unwrap(); + let mut decompressed = vec![0; buf.len()]; + let sz = decompress( + &compressed, + decompressed.as_mut_slice(), + Algorithm::Lz4Block, + ) + .unwrap(); + + assert_eq!(sz, 1); + assert_eq!(buf, decompressed); + } + + #[test] + fn test_lz4_compress_decompress_2_bytes() { + let buf = vec![0x2u8, 0x3u8]; + let compressed = lz4_compress(&buf).unwrap(); + let mut decompressed = vec![0; buf.len()]; + let sz = decompress( + &compressed, + decompressed.as_mut_slice(), + Algorithm::Lz4Block, + ) + .unwrap(); + + assert_eq!(sz, 2); + assert_eq!(buf, decompressed); + } + + #[test] + fn test_lz4_compress_decompress_16_bytes() { + let buf = [ + 0x1u8, 0x2u8, 0x3u8, 0x4u8, 0x1u8, 0x2u8, 0x3u8, 0x4u8, 0x1u8, 0x2u8, 0x3u8, 0x4u8, + 0x1u8, 0x2u8, 0x3u8, 0x4u8, + ]; + let compressed = lz4_compress(&buf).unwrap(); + let mut decompressed = vec![0; buf.len()]; + let sz = decompress( + &compressed, + decompressed.as_mut_slice(), + Algorithm::Lz4Block, + ) + .unwrap(); + + assert_eq!(sz, 16); + assert_eq!(&buf, decompressed.as_slice()); + } + + #[test] + fn test_lz4_compress_decompress_4095_bytes() { + let buf = vec![0x2u8; 4095]; + let compressed = lz4_compress(&buf).unwrap(); + let mut decompressed = vec![0; buf.len()]; + let sz = decompress( + &compressed, + decompressed.as_mut_slice(), + Algorithm::Lz4Block, + ) + .unwrap(); + + assert_eq!(sz, 4095); + assert_eq!(buf, decompressed); + } + + #[test] + fn test_lz4_compress_decompress_4096_bytes() { + let buf = vec![0x2u8; 4096]; + let compressed = lz4_compress(&buf).unwrap(); + let mut decompressed = vec![0; buf.len()]; + let sz = decompress( + &compressed, + decompressed.as_mut_slice(), + Algorithm::Lz4Block, + ) + .unwrap(); + + assert_eq!(sz, 4096); + assert_eq!(buf, decompressed); + } + + #[test] + fn test_lz4_compress_decompress_4097_bytes() { + let buf = vec![0x2u8; 4097]; + let compressed = lz4_compress(&buf).unwrap(); + let mut decompressed = vec![0; buf.len()]; + let sz = decompress( + &compressed, + decompressed.as_mut_slice(), + Algorithm::Lz4Block, + ) + .unwrap(); + + assert_eq!(sz, 4097); + assert_eq!(buf, decompressed); + } + + #[test] + fn test_zstd_compress_decompress_1_byte() { + let buf = vec![0x1u8]; + let compressed = zstd_compress(&buf).unwrap(); + let mut decompressed = vec![0; buf.len()]; + let sz = decompress(&compressed, decompressed.as_mut_slice(), Algorithm::Zstd).unwrap(); + + assert_eq!(sz, 1); + assert_eq!(buf, decompressed); + } + + #[test] + fn test_zstd_compress_decompress_2_bytes() { + let buf = vec![0x2u8, 0x3u8]; + let compressed = zstd_compress(&buf).unwrap(); + let mut decompressed = vec![0; buf.len()]; + let sz = decompress(&compressed, decompressed.as_mut_slice(), Algorithm::Zstd).unwrap(); + + assert_eq!(sz, 2); + assert_eq!(buf, decompressed); + } + + #[test] + fn test_zstd_compress_decompress_16_bytes() { + let buf = [ + 0x1u8, 0x2u8, 0x3u8, 0x4u8, 0x1u8, 0x2u8, 0x3u8, 0x4u8, 0x1u8, 0x2u8, 0x3u8, 0x4u8, + 0x1u8, 0x2u8, 0x3u8, 0x4u8, + ]; + let compressed = zstd_compress(&buf).unwrap(); + let mut decompressed = vec![0; buf.len()]; + let sz = decompress(&compressed, decompressed.as_mut_slice(), Algorithm::Zstd).unwrap(); + + assert_eq!(sz, 16); + assert_eq!(&buf, decompressed.as_slice()); + } + + #[test] + fn test_zstd_compress_decompress_4095_bytes() { + let buf = vec![0x2u8; 4095]; + let compressed = zstd_compress(&buf).unwrap(); + let mut decompressed = vec![0; buf.len()]; + let sz = decompress(&compressed, decompressed.as_mut_slice(), Algorithm::Zstd).unwrap(); + + assert_eq!(sz, 4095); + assert_eq!(buf, decompressed); + } + + #[test] + fn test_zstd_compress_decompress_4096_bytes() { + let buf = vec![0x2u8; 4096]; + let compressed = zstd_compress(&buf).unwrap(); + let mut decompressed = vec![0; buf.len()]; + let sz = decompress(&compressed, decompressed.as_mut_slice(), Algorithm::Zstd).unwrap(); + + assert_eq!(sz, 4096); + assert_eq!(buf, decompressed); + } + + #[test] + fn test_zstd_compress_decompress_4097_bytes() { + let buf = vec![0x2u8; 4097]; + let compressed = zstd_compress(&buf).unwrap(); + let mut decompressed = vec![0; buf.len()]; + let sz = decompress(&compressed, decompressed.as_mut_slice(), Algorithm::Zstd).unwrap(); + + assert_eq!(sz, 4097); + assert_eq!(buf, decompressed); + } + + #[test] + fn test_new_decoder_none() { + let buf = b"This is a test"; + let mut decoder = Decoder::new(buf.as_slice(), Algorithm::None).unwrap(); + let mut buf2 = vec![0u8; 1024]; + let res = decoder.read(&mut buf2).unwrap(); + assert_eq!(res, 14); + assert_eq!(&buf2[0..14], buf.as_slice()); + } + + #[test] + fn test_gzip_decoder() { + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let path = Path::new(root_dir).join("../tests/texture/zran/zlib_sample.txt.gz"); + let file = OpenOptions::new().read(true).open(path).unwrap(); + let mut decoder = Decoder::new(file, Algorithm::GZip).unwrap(); + let mut buf = [0u8; 8]; + + decoder.read_exact(&mut buf).unwrap(); + assert_eq!(&String::from_utf8_lossy(&buf), "This is "); + decoder.read_exact(&mut buf).unwrap(); + assert_eq!(&String::from_utf8_lossy(&buf), "a test f"); + decoder.read_exact(&mut buf).unwrap(); + assert_eq!(&String::from_utf8_lossy(&buf), "ile for "); + let ret = decoder.read(&mut buf).unwrap(); + assert_eq!(ret, 6); + assert_eq!(&String::from_utf8_lossy(&buf[0..6]), "zlib.\n"); + let ret = decoder.read(&mut buf).unwrap(); + assert_eq!(ret, 0); + } + + #[test] + fn test_zlib_decoder() { + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let path = Path::new(root_dir).join("../tests/texture/zran/zlib_sample.txt.gz"); + let file = OpenOptions::new().read(true).open(path).unwrap(); + let mut decoder = ZlibDecoder::new(file); + let mut buf = [0u8; 8]; + + decoder.read_exact(&mut buf).unwrap(); + assert_eq!(&String::from_utf8_lossy(&buf), "This is "); + decoder.read_exact(&mut buf).unwrap(); + assert_eq!(&String::from_utf8_lossy(&buf), "a test f"); + decoder.read_exact(&mut buf).unwrap(); + assert_eq!(&String::from_utf8_lossy(&buf), "ile for "); + let ret = decoder.read(&mut buf).unwrap(); + assert_eq!(ret, 6); + assert_eq!(&String::from_utf8_lossy(&buf[0..6]), "zlib.\n"); + let ret = decoder.read(&mut buf).unwrap(); + assert_eq!(ret, 0); + print!( + "{:?}, {:?}, {:?}, {:?},", + Algorithm::GZip, + Algorithm::Lz4Block, + Algorithm::Zstd, + Algorithm::None + ) + } + + #[test] + fn test_algorithm_from() { + assert_eq!(Algorithm::from_str("none").unwrap(), Algorithm::None); + assert_eq!( + Algorithm::from_str("lz4_block").unwrap(), + Algorithm::Lz4Block + ); + assert_eq!(Algorithm::from_str("gzip").unwrap(), Algorithm::GZip); + assert_eq!(Algorithm::from_str("zstd").unwrap(), Algorithm::Zstd); + assert!(Algorithm::from_str("foo").is_err()); + assert_eq!( + Algorithm::try_from(Algorithm::None as u32).unwrap(), + Algorithm::None + ); + assert_eq!( + Algorithm::try_from(Algorithm::Lz4Block as u32).unwrap(), + Algorithm::Lz4Block + ); + assert_eq!( + Algorithm::try_from(Algorithm::GZip as u32).unwrap(), + Algorithm::GZip + ); + assert_eq!( + Algorithm::try_from(Algorithm::Zstd as u32).unwrap(), + Algorithm::Zstd + ); + assert!(Algorithm::try_from(u32::MAX).is_err()); + + assert_eq!( + Algorithm::try_from(Algorithm::None as u64).unwrap(), + Algorithm::None + ); + assert_eq!( + Algorithm::try_from(Algorithm::Lz4Block as u64).unwrap(), + Algorithm::Lz4Block + ); + assert_eq!( + Algorithm::try_from(Algorithm::GZip as u64).unwrap(), + Algorithm::GZip + ); + assert_eq!( + Algorithm::try_from(Algorithm::Zstd as u64).unwrap(), + Algorithm::Zstd + ); + assert!(Algorithm::try_from(u64::MAX).is_err()); + assert!(Algorithm::None.is_none()); + assert!(!Algorithm::Lz4Block.is_none()); + assert!(!Algorithm::GZip.is_none()); + assert!(!Algorithm::Zstd.is_none()); + } +} diff --git a/utils/src/compress/zlib_random.rs b/utils/src/compress/zlib_random.rs index cccc0d24f50..35aeda988dd 100644 --- a/utils/src/compress/zlib_random.rs +++ b/utils/src/compress/zlib_random.rs @@ -1,1013 +1,1013 @@ -// Copyright (C) 2022 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Generate context information to randomly access gzip/zlib stream. - -use std::alloc::{self, Layout}; -use std::convert::TryFrom; -use std::io::{Read, Result}; -use std::ops::DerefMut; -use std::os::raw::{c_int, c_void}; -use std::sync::{Arc, Mutex}; -use std::{mem, ptr}; - -use libz_sys::{ - inflate, inflateEnd, inflateInit2_, inflatePrime, inflateReset, inflateReset2, - inflateSetDictionary, uInt, z_stream, zlibVersion, Z_BLOCK, Z_BUF_ERROR, Z_OK, Z_STREAM_END, -}; -use sha2::{Digest, Sha256}; - -/// Size of inflate dictionary to support random access. -pub const ZRAN_DICT_WIN_SIZE: usize = 1 << 15; -/// Maximum number of random access slices per compression object. -pub const ZRAN_MAX_CI_ENTRIES: usize = 1 << 24; -/// Buffer size for ZRAN reader. -pub const ZRAN_READER_BUF_SIZE: usize = 64 * 1024; - -const ZRAN_MIN_COMP_SIZE: u64 = 768 * 1024; -const ZRAN_MAX_COMP_SIZE: u64 = 2048 * 1024; -const ZRAN_MAX_UNCOMP_SIZE: u64 = 2048 * 1024; -const ZLIB_ALIGN: usize = std::mem::align_of::(); - -/// Information to retrieve a data chunk from an associated random access slice. -#[derive(Debug, Eq, PartialEq)] -pub struct ZranChunkInfo { - /// Index into the inflate context array for the associated inflate context. - pub ci_index: u32, - /// Offset to get data chunk from the uncompressed content. - pub ci_offset: u32, - /// Size of the uncompressed chunk data. - pub ci_len: u32, - /// Position in the compressed data stream. - pub in_pos: u64, - /// Size of compressed data in input stream. - pub in_len: u32, -} - -/// Context information to decode data from a random access slice. -pub struct ZranContext { - /// Offset in the original compression data stream. - pub in_offset: u64, - /// Offset in the uncompression data stream. - pub out_offset: u64, - /// Size of original compressed data. - pub in_len: u32, - /// Size of uncompressed data. - pub out_len: u32, - /// Optional previous byte in the original compressed data stream, used when `ctx_bits` is non-zero. - pub ctx_byte: u8, - /// Bits from previous byte to feeds into the inflate context for random access. - pub ctx_bits: u8, - /// Inflate dictionary for random access. - pub dict: Vec, -} - -impl ZranContext { - fn new(info: &ZranCompInfo, dict: Vec) -> Self { - ZranContext { - in_offset: info.in_pos, - out_offset: info.out_pos, - in_len: 0, - out_len: 0, - ctx_byte: info.previous_byte, - ctx_bits: info.pending_bits, - dict, - } - } -} - -/// Gzip/zlib decoder to randomly uncompress Gzip/zlib stream. -pub struct ZranDecoder { - stream: ZranStream, -} - -impl ZranDecoder { - /// Create a new instance of `ZranDecoder`. - pub fn new() -> Result { - let stream = ZranStream::new(true)?; - Ok(Self { stream }) - } - - /// Uncompress gzip/zlib compressed data chunk. - /// - /// # Arguments - /// - ctx: context to random access compressed stream. - /// - dict: use this dictionary instead of `ctx.dict` to decode data - /// - input: input compressed data stream - /// - output: buffer to receive uncompressed data - pub fn uncompress( - &mut self, - ctx: &ZranContext, - dict: Option<&[u8]>, - input: &[u8], - output: &mut [u8], - ) -> Result { - if input.len() != ctx.in_len as usize { - return Err(einval!("size of input buffer doesn't match")); - } else if ctx.out_len as usize > output.len() { - return Err(einval!("buffer to receive decompressed data is too small")); - } - - self.stream.reset()?; - if ctx.ctx_bits != 0 { - let bits = ctx.ctx_bits & 0x7; - self.stream.set_prime(bits, ctx.ctx_byte)?; - } - let dict = dict.unwrap_or(ctx.dict.as_slice()); - self.stream.set_dict(dict)?; - - self.stream.set_next_in(input); - - let mut left = ctx.out_len; - loop { - let used = (ctx.out_len - left) as usize; - self.stream.set_next_out(&mut output[used..]); - self.stream.set_avail_out(left as uInt); - let mut got = self.stream.avail_out(); - let mut ret = self.stream.raw_inflate(0); - got -= self.stream.avail_out(); - left -= got; - - match ret { - Z_OK => { - let count = self.stream.next_out() as usize - output.as_ptr() as usize; - if count != ctx.out_len as usize { - return Err(eio!("failed to decode data from stream, size mismatch")); - } else { - return Ok(count); - } - } - Z_STREAM_END => { - // Discard the gzip trailer. - let drop = 8; - if self.stream.avail_in() >= drop { - let avail_in = self.stream.avail_in(); - let used = input.len() - avail_in as usize + drop as usize; - self.stream.set_next_in(&input[used..]); - } else { - // The input does not have a complete trailer. - return Err(eio!("the input does not have a complete gzip trailer")); - } - // Use inflate to skip the gzip header and resume the raw inflate there. - self.stream.reset2(true)?; - let mut discard = vec![0u8; ZRAN_DICT_WIN_SIZE]; - loop { - self.stream.set_next_out(&mut discard); - self.stream.set_avail_out(ZRAN_DICT_WIN_SIZE as u32); - ret = self.stream.raw_inflate(Z_BLOCK); // stop at end of header - if ret == Z_OK && (self.stream.data_type() & 0x80) == 0 { - continue; - } - - if ret != Z_OK { - return Err(eio!(format!( - "failed to handle gzip multi member, ret: {:?}", - ret - ))); - } - self.stream.reset2(false)?; - break; - } - } - e => { - return Err(eio!(format!( - "failed to decode data from compressed data stream, ret: {}", - e - ))) - } - } - } - } -} - -/// Struct to generate random access information for OCIv1 image tarballs. -/// -/// `ZranGenerator` generates decompression context information to support random access to the -/// tarball later. It only tracks information related to Tar file content, and ignores all other -/// tar headers and zlib headers when possible. The work flow is: -/// 1) create a `ZranGenerator` object `zran`. -/// 2) create a tar::Archive object from `zran`. -/// 3) walk all entries in the tarball, for each tar regular file: -/// 3.1) get file size and split it into chunks, for each file data chunk -/// 3.2) call zran.begin_data_chunk() -/// 3.3) read file content from the tar Entry object -/// 3.4) call zran.end_data_chunk() to get chunk decompression information -/// 4) call zran.get_compression_info_array() to get all decompression context information for -/// random access later -pub struct ZranGenerator { - reader: ZranReader, - min_comp_size: u64, - max_comp_size: u64, - max_uncomp_size: u64, - curr_block_start: u64, - curr_ci_offset: u64, - curr_in_offset: u64, - curr_ci_idx: Option, - ci_array: Vec, -} - -impl ZranGenerator { - /// Create a new instance of `ZranGenerator` from a reader. - pub fn new(reader: ZranReader) -> Self { - Self { - reader, - min_comp_size: ZRAN_MIN_COMP_SIZE, - max_comp_size: ZRAN_MAX_COMP_SIZE, - max_uncomp_size: ZRAN_MAX_UNCOMP_SIZE, - curr_block_start: 0, - curr_ci_offset: 0, - curr_in_offset: 0, - curr_ci_idx: None, - ci_array: Vec::new(), - } - } - - /// Begin a transaction to read data from the zlib stream. - /// - /// # Arguments - /// - `chunk_size`: size of data to be read from the zlib stream. - #[allow(clippy::if_same_then_else)] - pub fn begin_read(&mut self, chunk_size: u64) -> Result { - let info = self.reader.get_current_ctx_info(); - let ci_idx = if let Some(idx) = self.curr_ci_idx { - let ctx = &self.ci_array[idx]; - let comp_size = info.in_pos - ctx.in_offset; - let uncomp_size = info.out_pos - ctx.out_offset; - let first = self.is_first_block(); - let enough = !first - && (comp_size >= self.max_comp_size / 2 - || uncomp_size + chunk_size >= self.max_uncomp_size); - if info.stream_switched != 0 || enough { - // The slice becomes too big after merging current data chunk. - self.new_ci_entry()? - } else if !first - && comp_size > 2 * ctx.in_len as u64 - && ctx.in_len as u64 > self.min_comp_size - { - // The gap between current chunk and last chunk is too big. - self.new_ci_entry()? - } else { - idx - } - } else { - self.new_ci_entry()? - }; - - if ci_idx > ZRAN_MAX_CI_ENTRIES { - Err(einval!("too many compression information entries")) - } else { - self.curr_ci_idx = Some(ci_idx); - self.curr_ci_offset = info.out_pos; - self.curr_in_offset = info.in_pos; - Ok(ci_idx as u32) - } - } - - /// Mark end of a data read operation and returns information to decode data from the random - /// access slice. - pub fn end_read(&mut self) -> Result { - let info = self.reader.get_current_ctx_info(); - if let Some(idx) = self.curr_ci_idx { - let ctx = &mut self.ci_array[idx]; - let comp_size = info.in_pos - ctx.in_offset; - let uncomp_size = info.out_pos - ctx.out_offset; - let ci = ZranChunkInfo { - ci_index: idx as u32, - ci_offset: (self.curr_ci_offset - ctx.out_offset) as u32, - ci_len: (info.out_pos - self.curr_ci_offset) as u32, - in_pos: self.curr_in_offset, - in_len: (info.in_pos - self.curr_in_offset) as u32, - }; - ctx.out_len = uncomp_size as u32; - ctx.in_len = comp_size as u32; - Ok(ci) - } else { - Err(einval!("invalid compression state")) - } - } - - /// Get an immutable reference to the random access context information array. - pub fn get_compression_ctx_array(&self) -> &[ZranContext] { - &self.ci_array - } - - /// Set minimal compressed size to emit an random access slice. - /// - /// Please ensure "min_compressed_size * 2 <= max_compressed_size". - pub fn set_min_compressed_size(&mut self, sz: u64) { - self.min_comp_size = sz; - } - - /// Set maximum compressed size to emit an random access slice. - /// - /// Please ensure "min_compressed_size * 2 <= max_compressed_size". - pub fn set_max_compressed_size(&mut self, sz: u64) { - self.max_comp_size = sz; - } - - /// Set maximum uncompressed size to emit an random access slice. - /// - /// Please ensure "min_compressed_size * 2 < max_compressed_size". - pub fn set_max_uncompressed_size(&mut self, sz: u64) { - self.max_uncomp_size = sz; - } - - fn new_ci_entry(&mut self) -> Result { - let info = self.reader.get_block_ctx_info(); - let dict = self.reader.get_block_ctx_dict(); - self.ci_array.push(ZranContext::new(&info, dict)); - self.curr_block_start = info.in_pos; - Ok(self.ci_array.len() - 1) - } - - fn is_first_block(&self) -> bool { - let info = self.reader.get_block_ctx_info(); - info.in_pos == self.curr_block_start - } -} - -impl Read for ZranGenerator { - fn read(&mut self, buf: &mut [u8]) -> Result { - self.reader.read(buf) - } -} - -/// A specialized gzip reader for OCI image tarballs. -/// -/// This reader assumes that the compressed file is a tar file, and restricts access patterns. -pub struct ZranReader { - inner: Arc>>, -} - -impl ZranReader { - /// Create a `ZranReader` from a reader. - pub fn new(reader: R) -> Result { - let inner = ZranReaderState::new(reader)?; - Ok(Self { - inner: Arc::new(Mutex::new(inner)), - }) - } - - /// Copy data from the buffer into the internal input buffer. - pub fn set_initial_data(&self, buf: &[u8]) { - let mut state = self.inner.lock().unwrap(); - assert_eq!(state.stream.avail_in(), 0); - assert!(buf.len() <= state.input.len()); - let ptr = state.input.as_mut_ptr(); - assert_eq!(state.stream.stream.next_in, ptr); - - state.input[..buf.len()].copy_from_slice(buf); - state.reader_hash.update(buf); - state.reader_size += buf.len() as u64; - state.stream.set_avail_in(buf.len() as u32); - } - - /// Get size of data read from the reader. - pub fn get_data_size(&self) -> u64 { - self.inner.lock().unwrap().reader_size - } - - /// Get sha256 hash value of data read from the reader. - pub fn get_data_digest(&self) -> Sha256 { - self.inner.lock().unwrap().reader_hash.clone() - } - - /// Get inflate context information for current inflate position. - fn get_current_ctx_info(&self) -> ZranCompInfo { - self.inner.lock().unwrap().get_compression_info() - } - - /// Get inflate context information for current inflate block. - fn get_block_ctx_info(&self) -> ZranCompInfo { - self.inner.lock().unwrap().block_ctx_info - } - - /// Get inflate dictionary for current inflate block. - fn get_block_ctx_dict(&self) -> Vec { - let state = self.inner.lock().unwrap(); - state.block_ctx_dict[..state.block_ctx_dict_size].to_vec() - } -} - -impl Read for ZranReader { - fn read(&mut self, buf: &mut [u8]) -> Result { - self.inner.lock().unwrap().read(buf) - } -} - -impl Clone for ZranReader { - fn clone(&self) -> Self { - Self { - inner: self.inner.clone(), - } - } -} - -#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] -struct ZranCompInfo { - in_pos: u64, - out_pos: u64, - flags: u32, - previous_byte: u8, - pending_bits: u8, - stream_switched: u8, -} - -struct ZranReaderState { - stream: ZranStream, - input: Vec, - reader: R, - reader_hash: Sha256, - reader_size: u64, - block_ctx_info: ZranCompInfo, - block_ctx_dict: Vec, - block_ctx_dict_size: usize, - stream_switched: u8, -} - -impl ZranReaderState { - fn new(reader: R) -> Result { - let mut stream = ZranStream::new(false)?; - let input = vec![0u8; ZRAN_READER_BUF_SIZE]; - stream.set_next_in(&input[0..0]); - - Ok(ZranReaderState { - stream, - input, - reader, - reader_hash: Sha256::new(), - reader_size: 0, - block_ctx_info: ZranCompInfo::default(), - block_ctx_dict: vec![0u8; ZRAN_DICT_WIN_SIZE], - block_ctx_dict_size: 0, - stream_switched: 0, - }) - } - - /// Get decompression information about the stream. - fn get_compression_info(&mut self) -> ZranCompInfo { - let stream_switched = self.stream_switched; - self.stream_switched = 0; - self.stream - .get_compression_info(&self.input, stream_switched) - } - - fn get_compression_dict(&mut self) -> Result<()> { - self.block_ctx_dict_size = self.stream.get_compression_dict(&mut self.block_ctx_dict)?; - Ok(()) - } -} - -impl Read for ZranReaderState { - fn read(&mut self, buf: &mut [u8]) -> Result { - self.stream.set_next_out(buf); - self.stream.set_avail_out(buf.len() as u32); - - loop { - // Reload the input buffer when needed. - if self.stream.avail_in() == 0 { - if self.stream.stream.next_in > self.input.as_mut_ptr() { - self.stream.last_byte = unsafe { *self.stream.stream.next_in.sub(1) }; - } - let sz = self.reader.read(self.input.as_mut_slice())?; - if sz == 0 { - return Ok(0); - } - self.reader_hash.update(&self.input[0..sz]); - self.reader_size += sz as u64; - self.stream.set_next_in(&self.input[..sz]); - } - - match self.stream.inflate(false) { - Z_STREAM_END => { - self.stream.reset()?; - self.stream_switched = 1; - continue; - } - Z_OK => { - let count = self.stream.next_out() as usize - buf.as_ptr() as usize; - let info = self.get_compression_info(); - if info.flags & 0x80 != 0 { - self.get_compression_dict()?; - self.block_ctx_info = info; - } - if count == 0 { - // zlib/gzip compression header, continue for next data block. - continue; - } else { - return Ok(count); - } - } - Z_BUF_ERROR => { - if self.stream.avail_in() == 0 { - // Need more input data, continue to feed data into the input buffer. - continue; - } else { - return Err(eio!("failed to decode data from compressed data stream")); - } - } - e => { - return Err(eio!(format!( - "failed to decode data from compressed data stream, error code {}", - e - ))); - } - } - } - } -} - -struct ZranStream { - stream: Box, - total_in: u64, - total_out: u64, - last_byte: u8, -} - -impl ZranStream { - fn new(decode: bool) -> Result { - let mut stream = Box::new(z_stream { - next_in: ptr::null_mut(), - avail_in: 0, - total_in: 0, - next_out: ptr::null_mut(), - avail_out: 0, - total_out: 0, - msg: ptr::null_mut(), - adler: 0, - data_type: 0, - reserved: 0, - opaque: ptr::null_mut(), - state: ptr::null_mut(), - zalloc, - zfree, - }); - // windowBits can also be greater than 15 for optional gzip decoding. - // Add 32 to windowBits to enable zlib and gzip decoding with automatic header detection, - // or add 16 to decode only the gzip format (the zlib format will return a Z_DATA_ERROR). - // -15 means raw mode. - let mode = if decode { -15 } else { 31 }; - let ret = unsafe { - inflateInit2_( - stream.deref_mut() as *mut z_stream, - mode, - zlibVersion(), - mem::size_of::() as c_int, - ) - }; - if ret != Z_OK { - return Err(einval!("failed to initialize zlib inflate context")); - } - - Ok(Self { - stream, - total_in: 0, - total_out: 0, - last_byte: 0, - }) - } - - fn inflate(&mut self, decode: bool) -> i32 { - // Z_BLOCK requests that inflate() stop if and when it gets to the next deflate block - // boundary. When decoding the zlib or gzip format, this will cause inflate() to return - // immediately after the header and before the first block. When doing a raw inflate, - // inflate() will go ahead and process the first block, and will return when it gets to - // the end of that block, or when it runs out of data. - let mode = if decode { 0 } else { Z_BLOCK }; - self.total_in += self.stream.avail_in as u64; - self.total_out += self.stream.avail_out as u64; - let ret = self.raw_inflate(mode); - self.total_in -= self.stream.avail_in as u64; - self.total_out -= self.stream.avail_out as u64; - ret - } - - fn raw_inflate(&mut self, mode: i32) -> i32 { - unsafe { inflate(self.stream.deref_mut() as *mut z_stream, mode) } - } - - fn reset(&mut self) -> Result<()> { - let ret = unsafe { inflateReset(self.stream.deref_mut() as *mut z_stream) }; - if ret != Z_OK { - return Err(einval!("failed to reset zlib inflate context")); - } - Ok(()) - } - - fn reset2(&mut self, is_gzip: bool) -> Result<()> { - let winodw_bits = if is_gzip { 31 } else { -15 }; - let ret = unsafe { inflateReset2(self.stream.deref_mut() as *mut z_stream, winodw_bits) }; - if ret != Z_OK { - return Err(einval!("failed to reset zlib inflate context")); - } - Ok(()) - } - - fn get_compression_info(&mut self, buf: &[u8], stream_switched: u8) -> ZranCompInfo { - let previous_byte = if self.stream.data_type & 0x7 != 0 { - assert!(self.stream.next_in as usize >= buf.as_ptr() as usize); - if self.stream.next_in as usize == buf.as_ptr() as usize { - self.last_byte - } else { - unsafe { *self.stream.next_in.sub(1) } - } - } else { - 0 - }; - ZranCompInfo { - in_pos: self.total_in, - out_pos: self.total_out, - flags: self.stream.data_type as u32, - previous_byte, - pending_bits: self.stream.data_type as u8 & 0x7, - stream_switched, - } - } - - fn get_compression_dict(&mut self, buf: &mut [u8]) -> Result { - let mut len: uInt = 0; - assert_eq!(buf.len(), ZRAN_DICT_WIN_SIZE); - - let ret = unsafe { - inflateGetDictionary( - self.stream.deref_mut() as *mut z_stream, - buf.as_mut_ptr(), - &mut len as *mut uInt, - ) - }; - - if ret != Z_OK { - Err(einval!("failed to get inflate dictionary")) - } else { - Ok(len as usize) - } - } - - fn set_dict(&mut self, dict: &[u8]) -> Result<()> { - let ret = unsafe { - inflateSetDictionary(self.stream.deref_mut(), dict.as_ptr(), dict.len() as uInt) - }; - if ret != Z_OK { - return Err(einval!("failed to reset zlib inflate context")); - } - Ok(()) - } - - fn set_prime(&mut self, bits: u8, prime: u8) -> Result<()> { - let ret = unsafe { - inflatePrime( - self.stream.deref_mut(), - bits as c_int, - prime as c_int >> (8 - bits), - ) - }; - if ret != Z_OK { - return Err(einval!("failed to reset zlib inflate context")); - } - Ok(()) - } - - fn set_next_in(&mut self, buf: &[u8]) { - self.stream.next_in = buf.as_ptr() as *mut u8; - self.set_avail_in(buf.len() as u32); - } - - fn avail_in(&self) -> u32 { - self.stream.avail_in - } - - fn avail_out(&self) -> u32 { - self.stream.avail_out - } - - fn data_type(&self) -> i32 { - self.stream.data_type - } - - fn set_avail_in(&mut self, avail_in: u32) { - self.stream.avail_in = avail_in; - } - - fn next_out(&self) -> *mut u8 { - self.stream.next_out - } - - fn set_next_out(&mut self, buf: &mut [u8]) { - self.stream.next_out = buf.as_mut_ptr(); - } - - fn set_avail_out(&mut self, avail_out: u32) { - self.stream.avail_out = avail_out; - } -} - -impl Drop for ZranStream { - fn drop(&mut self) { - unsafe { inflateEnd(self.stream.deref_mut() as *mut z_stream) }; - } -} - -// Code from https://github.com/rust-lang/flate2-rs/blob/main/src/ffi/c.rs with modification. -fn align_up(size: usize, align: usize) -> usize { - (size + align - 1) & !(align - 1) -} - -#[allow(unused)] -extern "C" fn zalloc(_ptr: *mut c_void, items: uInt, item_size: uInt) -> *mut c_void { - // We need to multiply `items` and `item_size` to get the actual desired - // allocation size. Since `zfree` doesn't receive a size argument we - // also need to allocate space for a `usize` as a header so we can store - // how large the allocation is to deallocate later. - let size = match items - .checked_mul(item_size) - .and_then(|i| usize::try_from(i).ok()) - .map(|size| align_up(size, ZLIB_ALIGN)) - .and_then(|i| i.checked_add(std::mem::size_of::())) - { - Some(i) => i, - None => return ptr::null_mut(), - }; - - // Make sure the `size` isn't too big to fail `Layout`'s restrictions - let layout = match Layout::from_size_align(size, ZLIB_ALIGN) { - Ok(layout) => layout, - Err(_) => return ptr::null_mut(), - }; - - unsafe { - // Allocate the data, and if successful store the size we allocated - // at the beginning and then return an offset pointer. - let ptr = alloc::alloc(layout) as *mut usize; - if ptr.is_null() { - return ptr as *mut c_void; - } - *ptr = size; - ptr.add(1) as *mut c_void - } -} - -#[allow(unused)] -extern "C" fn zfree(_ptr: *mut c_void, address: *mut c_void) { - unsafe { - // Move our address being freed back one pointer, read the size we - // stored in `zalloc`, and then free it using the standard Rust - // allocator. - let ptr = (address as *mut usize).offset(-1); - let size = *ptr; - let layout = Layout::from_size_align_unchecked(size, ZLIB_ALIGN); - alloc::dealloc(ptr as *mut u8, layout) - } -} - -extern "system" { - pub fn inflateGetDictionary( - strm: *mut z_stream, - dictionary: *mut u8, - dictLength: *mut uInt, - ) -> c_int; -} - -#[cfg(test)] -mod tests { - use super::*; - use std::fs::OpenOptions; - use std::io::{Seek, SeekFrom}; - use std::path::PathBuf; - use tar::{Archive, EntryType}; - - #[test] - fn test_parse_single_gzip_object() { - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let path = PathBuf::from(root_dir).join("../tests/texture/zran/zran-single-stream.tar.gz"); - let file = OpenOptions::new().read(true).open(path).unwrap(); - - let mut files = 0; - let mut objects = 0; - let reader = ZranReader::new(file).unwrap(); - let mut tar = Archive::new(reader); - let entries = tar.entries().unwrap(); - for entry in entries { - let entry = entry.unwrap(); - objects += 1; - if entry.header().entry_type() == EntryType::Regular { - files += 1; - } - } - - assert_eq!(objects, 7); - assert_eq!(files, 3); - } - - #[test] - fn test_parse_first_gzip_object() { - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let path = PathBuf::from(root_dir).join("../tests/texture/zran/zran-two-streams.tar.gz"); - let file = OpenOptions::new().read(true).open(path).unwrap(); - - let mut files = 0; - let mut objects = 0; - let reader = ZranReader::new(file).unwrap(); - let mut tar = Archive::new(reader); - - let entries = tar.entries().unwrap(); - for entry in entries { - let entry = entry.unwrap(); - objects += 1; - if entry.header().entry_type() == EntryType::Regular { - files += 1; - } - } - - assert_eq!(objects, 7); - assert_eq!(files, 3); - } - - #[test] - fn test_parse_two_gzip_objects() { - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let path = PathBuf::from(root_dir).join("../tests/texture/zran/zran-two-streams.tar.gz"); - let file = OpenOptions::new().read(true).open(path).unwrap(); - - let mut files = 0; - let mut objects = 0; - let reader = ZranReader::new(file).unwrap(); - let mut tar = Archive::new(reader); - tar.set_ignore_zeros(true); - - let entries = tar.entries().unwrap(); - for entry in entries { - let entry = entry.unwrap(); - objects += 1; - if entry.header().entry_type() == EntryType::Regular { - files += 1; - } - } - - assert_eq!(objects, 10); - assert_eq!(files, 5); - } - - #[test] - fn test_parse_gzip_with_big_zero() { - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let path = PathBuf::from(root_dir).join("../tests/texture/zran/zran-zero-file.tar.gz"); - let file = OpenOptions::new().read(true).open(path).unwrap(); - let reader = ZranReader::new(file).unwrap(); - let mut tar = Archive::new(reader.clone()); - let entries = tar.entries().unwrap(); - - let mut last: Option = None; - for entry in entries { - let mut entry = entry.unwrap(); - assert_eq!(entry.header().entry_type(), EntryType::Regular); - loop { - let mut buf = vec![0u8; 512]; - let sz = entry.read(&mut buf).unwrap(); - if sz == 0 { - break; - } - - let info = reader.get_current_ctx_info(); - if let Some(prev) = last { - assert_ne!(prev, info); - } - last = Some(info); - } - } - } - - #[test] - fn test_generate_comp_info() { - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let path = PathBuf::from(root_dir).join("../tests/texture/zran/zran-two-streams.tar.gz"); - let file = OpenOptions::new().read(true).open(path).unwrap(); - - let reader = ZranReader::new(file).unwrap(); - let mut tar = Archive::new(reader.clone()); - tar.set_ignore_zeros(true); - let mut generator = ZranGenerator::new(reader); - generator.set_min_compressed_size(1024); - generator.set_max_compressed_size(2048); - generator.set_max_uncompressed_size(4096); - - let entries = tar.entries().unwrap(); - for entry in entries { - let mut entry = entry.unwrap(); - if entry.header().entry_type() == EntryType::Regular { - loop { - let _start = generator.begin_read(512).unwrap(); - let mut buf = vec![0u8; 512]; - let sz = entry.read(&mut buf).unwrap(); - if sz == 0 { - break; - } - let _info = generator.end_read().unwrap(); - } - } - } - - let ctx = generator.get_compression_ctx_array(); - assert_eq!(ctx.len(), 3); - } - - #[test] - fn test_zran_bgzip() { - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let path = PathBuf::from(root_dir).join("../tests/texture/zran/bgzip.tar.gz"); - let file = OpenOptions::new().read(true).open(&path).unwrap(); - let reader = ZranReader::new(file).unwrap(); - let mut tar = Archive::new(reader.clone()); - tar.set_ignore_zeros(true); - let mut generator = ZranGenerator::new(reader); - generator.set_min_compressed_size(1024); - generator.set_max_compressed_size(2048); - generator.set_max_uncompressed_size(4096); - - let entries = tar.entries().unwrap(); - for entry in entries { - let mut entry = entry.unwrap(); - if entry.header().entry_type() == EntryType::Regular { - loop { - let _start = generator.begin_read(512).unwrap(); - let mut buf = vec![0u8; 512]; - let sz = entry.read(&mut buf).unwrap(); - let _info = generator.end_read().unwrap(); - if sz == 0 { - break; - } - } - } - } - - let ctx_array = generator.get_compression_ctx_array(); - for ctx in ctx_array.iter() { - let mut c_buf = vec![0u8; ctx.in_len as usize]; - let mut file = OpenOptions::new().read(true).open(&path).unwrap(); - file.seek(SeekFrom::Start(ctx.in_offset)).unwrap(); - file.read_exact(&mut c_buf).unwrap(); - - let mut d_buf = vec![0u8; ctx.out_len as usize]; - let mut decoder = ZranDecoder::new().unwrap(); - decoder.uncompress(ctx, None, &c_buf, &mut d_buf).unwrap(); - } - } - - #[test] - fn test_zran_decoder() { - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let path = PathBuf::from(root_dir).join("../tests/texture/zran/zran-two-streams.tar.gz"); - let file = OpenOptions::new().read(true).open(&path).unwrap(); - - let reader = ZranReader::new(file).unwrap(); - let mut tar = Archive::new(reader.clone()); - tar.set_ignore_zeros(true); - let mut generator = ZranGenerator::new(reader); - generator.set_min_compressed_size(1024); - generator.set_max_compressed_size(2048); - generator.set_max_uncompressed_size(4096); - - let entries = tar.entries().unwrap(); - for entry in entries { - let mut entry = entry.unwrap(); - if entry.header().entry_type() == EntryType::Regular { - loop { - let _start = generator.begin_read(512).unwrap(); - let mut buf = vec![0u8; 512]; - let sz = entry.read(&mut buf).unwrap(); - let _info = generator.end_read().unwrap(); - if sz == 0 { - break; - } - } - } - } - - let ctx_array = generator.get_compression_ctx_array(); - assert_eq!(ctx_array.len(), 3); - for ctx in ctx_array.iter().take(3) { - let mut c_buf = vec![0u8; ctx.in_len as usize]; - let mut file = OpenOptions::new().read(true).open(&path).unwrap(); - file.seek(SeekFrom::Start(ctx.in_offset)).unwrap(); - file.read_exact(&mut c_buf).unwrap(); - - let mut d_buf = vec![0u8; ctx.out_len as usize]; - let mut decoder = ZranDecoder::new().unwrap(); - decoder.uncompress(ctx, None, &c_buf, &mut d_buf).unwrap(); - } - } - - #[test] - fn test_zran_reader() { - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let path = PathBuf::from(root_dir).join("../tests/texture/zran/zran-two-streams.tar.gz"); - let file = OpenOptions::new().read(true).open(path).unwrap(); - - let reader = ZranReader::new(file).unwrap(); - assert_eq!(reader.get_data_size(), 0); - - let buf = vec![0x0u8; 32]; - reader.set_initial_data(&buf); - assert_eq!(reader.get_data_size(), 32); - } -} +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Generate context information to randomly access gzip/zlib stream. + +use std::alloc::{self, Layout}; +use std::convert::TryFrom; +use std::io::{Read, Result}; +use std::ops::DerefMut; +use std::os::raw::{c_int, c_void}; +use std::sync::{Arc, Mutex}; +use std::{mem, ptr}; + +use libz_sys::{ + inflate, inflateEnd, inflateInit2_, inflatePrime, inflateReset, inflateReset2, + inflateSetDictionary, uInt, z_stream, zlibVersion, Z_BLOCK, Z_BUF_ERROR, Z_OK, Z_STREAM_END, +}; +use sha2::{Digest, Sha256}; + +/// Size of inflate dictionary to support random access. +pub const ZRAN_DICT_WIN_SIZE: usize = 1 << 15; +/// Maximum number of random access slices per compression object. +pub const ZRAN_MAX_CI_ENTRIES: usize = 1 << 24; +/// Buffer size for ZRAN reader. +pub const ZRAN_READER_BUF_SIZE: usize = 64 * 1024; + +const ZRAN_MIN_COMP_SIZE: u64 = 768 * 1024; +const ZRAN_MAX_COMP_SIZE: u64 = 2048 * 1024; +const ZRAN_MAX_UNCOMP_SIZE: u64 = 2048 * 1024; +const ZLIB_ALIGN: usize = std::mem::align_of::(); + +/// Information to retrieve a data chunk from an associated random access slice. +#[derive(Debug, Eq, PartialEq)] +pub struct ZranChunkInfo { + /// Index into the inflate context array for the associated inflate context. + pub ci_index: u32, + /// Offset to get data chunk from the uncompressed content. + pub ci_offset: u32, + /// Size of the uncompressed chunk data. + pub ci_len: u32, + /// Position in the compressed data stream. + pub in_pos: u64, + /// Size of compressed data in input stream. + pub in_len: u32, +} + +/// Context information to decode data from a random access slice. +pub struct ZranContext { + /// Offset in the original compression data stream. + pub in_offset: u64, + /// Offset in the uncompression data stream. + pub out_offset: u64, + /// Size of original compressed data. + pub in_len: u32, + /// Size of uncompressed data. + pub out_len: u32, + /// Optional previous byte in the original compressed data stream, used when `ctx_bits` is non-zero. + pub ctx_byte: u8, + /// Bits from previous byte to feeds into the inflate context for random access. + pub ctx_bits: u8, + /// Inflate dictionary for random access. + pub dict: Vec, +} + +impl ZranContext { + fn new(info: &ZranCompInfo, dict: Vec) -> Self { + ZranContext { + in_offset: info.in_pos, + out_offset: info.out_pos, + in_len: 0, + out_len: 0, + ctx_byte: info.previous_byte, + ctx_bits: info.pending_bits, + dict, + } + } +} + +/// Gzip/zlib decoder to randomly uncompress Gzip/zlib stream. +pub struct ZranDecoder { + stream: ZranStream, +} + +impl ZranDecoder { + /// Create a new instance of `ZranDecoder`. + pub fn new() -> Result { + let stream = ZranStream::new(true)?; + Ok(Self { stream }) + } + + /// Uncompress gzip/zlib compressed data chunk. + /// + /// # Arguments + /// - ctx: context to random access compressed stream. + /// - dict: use this dictionary instead of `ctx.dict` to decode data + /// - input: input compressed data stream + /// - output: buffer to receive uncompressed data + pub fn uncompress( + &mut self, + ctx: &ZranContext, + dict: Option<&[u8]>, + input: &[u8], + output: &mut [u8], + ) -> Result { + if input.len() != ctx.in_len as usize { + return Err(einval!("size of input buffer doesn't match")); + } else if ctx.out_len as usize > output.len() { + return Err(einval!("buffer to receive decompressed data is too small")); + } + + self.stream.reset()?; + if ctx.ctx_bits != 0 { + let bits = ctx.ctx_bits & 0x7; + self.stream.set_prime(bits, ctx.ctx_byte)?; + } + let dict = dict.unwrap_or(ctx.dict.as_slice()); + self.stream.set_dict(dict)?; + + self.stream.set_next_in(input); + + let mut left = ctx.out_len; + loop { + let used = (ctx.out_len - left) as usize; + self.stream.set_next_out(&mut output[used..]); + self.stream.set_avail_out(left as uInt); + let mut got = self.stream.avail_out(); + let mut ret = self.stream.raw_inflate(0); + got -= self.stream.avail_out(); + left -= got; + + match ret { + Z_OK => { + let count = self.stream.next_out() as usize - output.as_ptr() as usize; + if count != ctx.out_len as usize { + return Err(eio!("failed to decode data from stream, size mismatch")); + } else { + return Ok(count); + } + } + Z_STREAM_END => { + // Discard the gzip trailer. + let drop = 8; + if self.stream.avail_in() >= drop { + let avail_in = self.stream.avail_in(); + let used = input.len() - avail_in as usize + drop as usize; + self.stream.set_next_in(&input[used..]); + } else { + // The input does not have a complete trailer. + return Err(eio!("the input does not have a complete gzip trailer")); + } + // Use inflate to skip the gzip header and resume the raw inflate there. + self.stream.reset2(true)?; + let mut discard = vec![0u8; ZRAN_DICT_WIN_SIZE]; + loop { + self.stream.set_next_out(&mut discard); + self.stream.set_avail_out(ZRAN_DICT_WIN_SIZE as u32); + ret = self.stream.raw_inflate(Z_BLOCK); // stop at end of header + if ret == Z_OK && (self.stream.data_type() & 0x80) == 0 { + continue; + } + + if ret != Z_OK { + return Err(eio!(format!( + "failed to handle gzip multi member, ret: {:?}", + ret + ))); + } + self.stream.reset2(false)?; + break; + } + } + e => { + return Err(eio!(format!( + "failed to decode data from compressed data stream, ret: {}", + e + ))) + } + } + } + } +} + +/// Struct to generate random access information for OCIv1 image tarballs. +/// +/// `ZranGenerator` generates decompression context information to support random access to the +/// tarball later. It only tracks information related to Tar file content, and ignores all other +/// tar headers and zlib headers when possible. The work flow is: +/// 1) create a `ZranGenerator` object `zran`. +/// 2) create a tar::Archive object from `zran`. +/// 3) walk all entries in the tarball, for each tar regular file: +/// 3.1) get file size and split it into chunks, for each file data chunk +/// 3.2) call zran.begin_data_chunk() +/// 3.3) read file content from the tar Entry object +/// 3.4) call zran.end_data_chunk() to get chunk decompression information +/// 4) call zran.get_compression_info_array() to get all decompression context information for +/// random access later +pub struct ZranGenerator { + reader: ZranReader, + min_comp_size: u64, + max_comp_size: u64, + max_uncomp_size: u64, + curr_block_start: u64, + curr_ci_offset: u64, + curr_in_offset: u64, + curr_ci_idx: Option, + ci_array: Vec, +} + +impl ZranGenerator { + /// Create a new instance of `ZranGenerator` from a reader. + pub fn new(reader: ZranReader) -> Self { + Self { + reader, + min_comp_size: ZRAN_MIN_COMP_SIZE, + max_comp_size: ZRAN_MAX_COMP_SIZE, + max_uncomp_size: ZRAN_MAX_UNCOMP_SIZE, + curr_block_start: 0, + curr_ci_offset: 0, + curr_in_offset: 0, + curr_ci_idx: None, + ci_array: Vec::new(), + } + } + + /// Begin a transaction to read data from the zlib stream. + /// + /// # Arguments + /// - `chunk_size`: size of data to be read from the zlib stream. + #[allow(clippy::if_same_then_else)] + pub fn begin_read(&mut self, chunk_size: u64) -> Result { + let info = self.reader.get_current_ctx_info(); + let ci_idx = if let Some(idx) = self.curr_ci_idx { + let ctx = &self.ci_array[idx]; + let comp_size = info.in_pos - ctx.in_offset; + let uncomp_size = info.out_pos - ctx.out_offset; + let first = self.is_first_block(); + let enough = !first + && (comp_size >= self.max_comp_size / 2 + || uncomp_size + chunk_size >= self.max_uncomp_size); + if info.stream_switched != 0 || enough { + // The slice becomes too big after merging current data chunk. + self.new_ci_entry()? + } else if !first + && comp_size > 2 * ctx.in_len as u64 + && ctx.in_len as u64 > self.min_comp_size + { + // The gap between current chunk and last chunk is too big. + self.new_ci_entry()? + } else { + idx + } + } else { + self.new_ci_entry()? + }; + + if ci_idx > ZRAN_MAX_CI_ENTRIES { + Err(einval!("too many compression information entries")) + } else { + self.curr_ci_idx = Some(ci_idx); + self.curr_ci_offset = info.out_pos; + self.curr_in_offset = info.in_pos; + Ok(ci_idx as u32) + } + } + + /// Mark end of a data read operation and returns information to decode data from the random + /// access slice. + pub fn end_read(&mut self) -> Result { + let info = self.reader.get_current_ctx_info(); + if let Some(idx) = self.curr_ci_idx { + let ctx = &mut self.ci_array[idx]; + let comp_size = info.in_pos - ctx.in_offset; + let uncomp_size = info.out_pos - ctx.out_offset; + let ci = ZranChunkInfo { + ci_index: idx as u32, + ci_offset: (self.curr_ci_offset - ctx.out_offset) as u32, + ci_len: (info.out_pos - self.curr_ci_offset) as u32, + in_pos: self.curr_in_offset, + in_len: (info.in_pos - self.curr_in_offset) as u32, + }; + ctx.out_len = uncomp_size as u32; + ctx.in_len = comp_size as u32; + Ok(ci) + } else { + Err(einval!("invalid compression state")) + } + } + + /// Get an immutable reference to the random access context information array. + pub fn get_compression_ctx_array(&self) -> &[ZranContext] { + &self.ci_array + } + + /// Set minimal compressed size to emit an random access slice. + /// + /// Please ensure "min_compressed_size * 2 <= max_compressed_size". + pub fn set_min_compressed_size(&mut self, sz: u64) { + self.min_comp_size = sz; + } + + /// Set maximum compressed size to emit an random access slice. + /// + /// Please ensure "min_compressed_size * 2 <= max_compressed_size". + pub fn set_max_compressed_size(&mut self, sz: u64) { + self.max_comp_size = sz; + } + + /// Set maximum uncompressed size to emit an random access slice. + /// + /// Please ensure "min_compressed_size * 2 < max_compressed_size". + pub fn set_max_uncompressed_size(&mut self, sz: u64) { + self.max_uncomp_size = sz; + } + + fn new_ci_entry(&mut self) -> Result { + let info = self.reader.get_block_ctx_info(); + let dict = self.reader.get_block_ctx_dict(); + self.ci_array.push(ZranContext::new(&info, dict)); + self.curr_block_start = info.in_pos; + Ok(self.ci_array.len() - 1) + } + + fn is_first_block(&self) -> bool { + let info = self.reader.get_block_ctx_info(); + info.in_pos == self.curr_block_start + } +} + +impl Read for ZranGenerator { + fn read(&mut self, buf: &mut [u8]) -> Result { + self.reader.read(buf) + } +} + +/// A specialized gzip reader for OCI image tarballs. +/// +/// This reader assumes that the compressed file is a tar file, and restricts access patterns. +pub struct ZranReader { + inner: Arc>>, +} + +impl ZranReader { + /// Create a `ZranReader` from a reader. + pub fn new(reader: R) -> Result { + let inner = ZranReaderState::new(reader)?; + Ok(Self { + inner: Arc::new(Mutex::new(inner)), + }) + } + + /// Copy data from the buffer into the internal input buffer. + pub fn set_initial_data(&self, buf: &[u8]) { + let mut state = self.inner.lock().unwrap(); + assert_eq!(state.stream.avail_in(), 0); + assert!(buf.len() <= state.input.len()); + let ptr = state.input.as_mut_ptr(); + assert_eq!(state.stream.stream.next_in, ptr); + + state.input[..buf.len()].copy_from_slice(buf); + state.reader_hash.update(buf); + state.reader_size += buf.len() as u64; + state.stream.set_avail_in(buf.len() as u32); + } + + /// Get size of data read from the reader. + pub fn get_data_size(&self) -> u64 { + self.inner.lock().unwrap().reader_size + } + + /// Get sha256 hash value of data read from the reader. + pub fn get_data_digest(&self) -> Sha256 { + self.inner.lock().unwrap().reader_hash.clone() + } + + /// Get inflate context information for current inflate position. + fn get_current_ctx_info(&self) -> ZranCompInfo { + self.inner.lock().unwrap().get_compression_info() + } + + /// Get inflate context information for current inflate block. + fn get_block_ctx_info(&self) -> ZranCompInfo { + self.inner.lock().unwrap().block_ctx_info + } + + /// Get inflate dictionary for current inflate block. + fn get_block_ctx_dict(&self) -> Vec { + let state = self.inner.lock().unwrap(); + state.block_ctx_dict[..state.block_ctx_dict_size].to_vec() + } +} + +impl Read for ZranReader { + fn read(&mut self, buf: &mut [u8]) -> Result { + self.inner.lock().unwrap().read(buf) + } +} + +impl Clone for ZranReader { + fn clone(&self) -> Self { + Self { + inner: self.inner.clone(), + } + } +} + +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +struct ZranCompInfo { + in_pos: u64, + out_pos: u64, + flags: u32, + previous_byte: u8, + pending_bits: u8, + stream_switched: u8, +} + +struct ZranReaderState { + stream: ZranStream, + input: Vec, + reader: R, + reader_hash: Sha256, + reader_size: u64, + block_ctx_info: ZranCompInfo, + block_ctx_dict: Vec, + block_ctx_dict_size: usize, + stream_switched: u8, +} + +impl ZranReaderState { + fn new(reader: R) -> Result { + let mut stream = ZranStream::new(false)?; + let input = vec![0u8; ZRAN_READER_BUF_SIZE]; + stream.set_next_in(&input[0..0]); + + Ok(ZranReaderState { + stream, + input, + reader, + reader_hash: Sha256::new(), + reader_size: 0, + block_ctx_info: ZranCompInfo::default(), + block_ctx_dict: vec![0u8; ZRAN_DICT_WIN_SIZE], + block_ctx_dict_size: 0, + stream_switched: 0, + }) + } + + /// Get decompression information about the stream. + fn get_compression_info(&mut self) -> ZranCompInfo { + let stream_switched = self.stream_switched; + self.stream_switched = 0; + self.stream + .get_compression_info(&self.input, stream_switched) + } + + fn get_compression_dict(&mut self) -> Result<()> { + self.block_ctx_dict_size = self.stream.get_compression_dict(&mut self.block_ctx_dict)?; + Ok(()) + } +} + +impl Read for ZranReaderState { + fn read(&mut self, buf: &mut [u8]) -> Result { + self.stream.set_next_out(buf); + self.stream.set_avail_out(buf.len() as u32); + + loop { + // Reload the input buffer when needed. + if self.stream.avail_in() == 0 { + if self.stream.stream.next_in > self.input.as_mut_ptr() { + self.stream.last_byte = unsafe { *self.stream.stream.next_in.sub(1) }; + } + let sz = self.reader.read(self.input.as_mut_slice())?; + if sz == 0 { + return Ok(0); + } + self.reader_hash.update(&self.input[0..sz]); + self.reader_size += sz as u64; + self.stream.set_next_in(&self.input[..sz]); + } + + match self.stream.inflate(false) { + Z_STREAM_END => { + self.stream.reset()?; + self.stream_switched = 1; + continue; + } + Z_OK => { + let count = self.stream.next_out() as usize - buf.as_ptr() as usize; + let info = self.get_compression_info(); + if info.flags & 0x80 != 0 { + self.get_compression_dict()?; + self.block_ctx_info = info; + } + if count == 0 { + // zlib/gzip compression header, continue for next data block. + continue; + } else { + return Ok(count); + } + } + Z_BUF_ERROR => { + if self.stream.avail_in() == 0 { + // Need more input data, continue to feed data into the input buffer. + continue; + } else { + return Err(eio!("failed to decode data from compressed data stream")); + } + } + e => { + return Err(eio!(format!( + "failed to decode data from compressed data stream, error code {}", + e + ))); + } + } + } + } +} + +struct ZranStream { + stream: Box, + total_in: u64, + total_out: u64, + last_byte: u8, +} + +impl ZranStream { + fn new(decode: bool) -> Result { + let mut stream = Box::new(z_stream { + next_in: ptr::null_mut(), + avail_in: 0, + total_in: 0, + next_out: ptr::null_mut(), + avail_out: 0, + total_out: 0, + msg: ptr::null_mut(), + adler: 0, + data_type: 0, + reserved: 0, + opaque: ptr::null_mut(), + state: ptr::null_mut(), + zalloc, + zfree, + }); + // windowBits can also be greater than 15 for optional gzip decoding. + // Add 32 to windowBits to enable zlib and gzip decoding with automatic header detection, + // or add 16 to decode only the gzip format (the zlib format will return a Z_DATA_ERROR). + // -15 means raw mode. + let mode = if decode { -15 } else { 31 }; + let ret = unsafe { + inflateInit2_( + stream.deref_mut() as *mut z_stream, + mode, + zlibVersion(), + mem::size_of::() as c_int, + ) + }; + if ret != Z_OK { + return Err(einval!("failed to initialize zlib inflate context")); + } + + Ok(Self { + stream, + total_in: 0, + total_out: 0, + last_byte: 0, + }) + } + + fn inflate(&mut self, decode: bool) -> i32 { + // Z_BLOCK requests that inflate() stop if and when it gets to the next deflate block + // boundary. When decoding the zlib or gzip format, this will cause inflate() to return + // immediately after the header and before the first block. When doing a raw inflate, + // inflate() will go ahead and process the first block, and will return when it gets to + // the end of that block, or when it runs out of data. + let mode = if decode { 0 } else { Z_BLOCK }; + self.total_in += self.stream.avail_in as u64; + self.total_out += self.stream.avail_out as u64; + let ret = self.raw_inflate(mode); + self.total_in -= self.stream.avail_in as u64; + self.total_out -= self.stream.avail_out as u64; + ret + } + + fn raw_inflate(&mut self, mode: i32) -> i32 { + unsafe { inflate(self.stream.deref_mut() as *mut z_stream, mode) } + } + + fn reset(&mut self) -> Result<()> { + let ret = unsafe { inflateReset(self.stream.deref_mut() as *mut z_stream) }; + if ret != Z_OK { + return Err(einval!("failed to reset zlib inflate context")); + } + Ok(()) + } + + fn reset2(&mut self, is_gzip: bool) -> Result<()> { + let winodw_bits = if is_gzip { 31 } else { -15 }; + let ret = unsafe { inflateReset2(self.stream.deref_mut() as *mut z_stream, winodw_bits) }; + if ret != Z_OK { + return Err(einval!("failed to reset zlib inflate context")); + } + Ok(()) + } + + fn get_compression_info(&mut self, buf: &[u8], stream_switched: u8) -> ZranCompInfo { + let previous_byte = if self.stream.data_type & 0x7 != 0 { + assert!(self.stream.next_in as usize >= buf.as_ptr() as usize); + if self.stream.next_in as usize == buf.as_ptr() as usize { + self.last_byte + } else { + unsafe { *self.stream.next_in.sub(1) } + } + } else { + 0 + }; + ZranCompInfo { + in_pos: self.total_in, + out_pos: self.total_out, + flags: self.stream.data_type as u32, + previous_byte, + pending_bits: self.stream.data_type as u8 & 0x7, + stream_switched, + } + } + + fn get_compression_dict(&mut self, buf: &mut [u8]) -> Result { + let mut len: uInt = 0; + assert_eq!(buf.len(), ZRAN_DICT_WIN_SIZE); + + let ret = unsafe { + inflateGetDictionary( + self.stream.deref_mut() as *mut z_stream, + buf.as_mut_ptr(), + &mut len as *mut uInt, + ) + }; + + if ret != Z_OK { + Err(einval!("failed to get inflate dictionary")) + } else { + Ok(len as usize) + } + } + + fn set_dict(&mut self, dict: &[u8]) -> Result<()> { + let ret = unsafe { + inflateSetDictionary(self.stream.deref_mut(), dict.as_ptr(), dict.len() as uInt) + }; + if ret != Z_OK { + return Err(einval!("failed to reset zlib inflate context")); + } + Ok(()) + } + + fn set_prime(&mut self, bits: u8, prime: u8) -> Result<()> { + let ret = unsafe { + inflatePrime( + self.stream.deref_mut(), + bits as c_int, + prime as c_int >> (8 - bits), + ) + }; + if ret != Z_OK { + return Err(einval!("failed to reset zlib inflate context")); + } + Ok(()) + } + + fn set_next_in(&mut self, buf: &[u8]) { + self.stream.next_in = buf.as_ptr() as *mut u8; + self.set_avail_in(buf.len() as u32); + } + + fn avail_in(&self) -> u32 { + self.stream.avail_in + } + + fn avail_out(&self) -> u32 { + self.stream.avail_out + } + + fn data_type(&self) -> i32 { + self.stream.data_type + } + + fn set_avail_in(&mut self, avail_in: u32) { + self.stream.avail_in = avail_in; + } + + fn next_out(&self) -> *mut u8 { + self.stream.next_out + } + + fn set_next_out(&mut self, buf: &mut [u8]) { + self.stream.next_out = buf.as_mut_ptr(); + } + + fn set_avail_out(&mut self, avail_out: u32) { + self.stream.avail_out = avail_out; + } +} + +impl Drop for ZranStream { + fn drop(&mut self) { + unsafe { inflateEnd(self.stream.deref_mut() as *mut z_stream) }; + } +} + +// Code from https://github.com/rust-lang/flate2-rs/blob/main/src/ffi/c.rs with modification. +fn align_up(size: usize, align: usize) -> usize { + (size + align - 1) & !(align - 1) +} + +#[allow(unused)] +extern "C" fn zalloc(_ptr: *mut c_void, items: uInt, item_size: uInt) -> *mut c_void { + // We need to multiply `items` and `item_size` to get the actual desired + // allocation size. Since `zfree` doesn't receive a size argument we + // also need to allocate space for a `usize` as a header so we can store + // how large the allocation is to deallocate later. + let size = match items + .checked_mul(item_size) + .and_then(|i| usize::try_from(i).ok()) + .map(|size| align_up(size, ZLIB_ALIGN)) + .and_then(|i| i.checked_add(std::mem::size_of::())) + { + Some(i) => i, + None => return ptr::null_mut(), + }; + + // Make sure the `size` isn't too big to fail `Layout`'s restrictions + let layout = match Layout::from_size_align(size, ZLIB_ALIGN) { + Ok(layout) => layout, + Err(_) => return ptr::null_mut(), + }; + + unsafe { + // Allocate the data, and if successful store the size we allocated + // at the beginning and then return an offset pointer. + let ptr = alloc::alloc(layout) as *mut usize; + if ptr.is_null() { + return ptr as *mut c_void; + } + *ptr = size; + ptr.add(1) as *mut c_void + } +} + +#[allow(unused)] +extern "C" fn zfree(_ptr: *mut c_void, address: *mut c_void) { + unsafe { + // Move our address being freed back one pointer, read the size we + // stored in `zalloc`, and then free it using the standard Rust + // allocator. + let ptr = (address as *mut usize).offset(-1); + let size = *ptr; + let layout = Layout::from_size_align_unchecked(size, ZLIB_ALIGN); + alloc::dealloc(ptr as *mut u8, layout) + } +} + +extern "system" { + pub fn inflateGetDictionary( + strm: *mut z_stream, + dictionary: *mut u8, + dictLength: *mut uInt, + ) -> c_int; +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs::OpenOptions; + use std::io::{Seek, SeekFrom}; + use std::path::PathBuf; + use tar::{Archive, EntryType}; + + #[test] + fn test_parse_single_gzip_object() { + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let path = PathBuf::from(root_dir).join("../tests/texture/zran/zran-single-stream.tar.gz"); + let file = OpenOptions::new().read(true).open(path).unwrap(); + + let mut files = 0; + let mut objects = 0; + let reader = ZranReader::new(file).unwrap(); + let mut tar = Archive::new(reader); + let entries = tar.entries().unwrap(); + for entry in entries { + let entry = entry.unwrap(); + objects += 1; + if entry.header().entry_type() == EntryType::Regular { + files += 1; + } + } + + assert_eq!(objects, 7); + assert_eq!(files, 3); + } + + #[test] + fn test_parse_first_gzip_object() { + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let path = PathBuf::from(root_dir).join("../tests/texture/zran/zran-two-streams.tar.gz"); + let file = OpenOptions::new().read(true).open(path).unwrap(); + + let mut files = 0; + let mut objects = 0; + let reader = ZranReader::new(file).unwrap(); + let mut tar = Archive::new(reader); + + let entries = tar.entries().unwrap(); + for entry in entries { + let entry = entry.unwrap(); + objects += 1; + if entry.header().entry_type() == EntryType::Regular { + files += 1; + } + } + + assert_eq!(objects, 7); + assert_eq!(files, 3); + } + + #[test] + fn test_parse_two_gzip_objects() { + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let path = PathBuf::from(root_dir).join("../tests/texture/zran/zran-two-streams.tar.gz"); + let file = OpenOptions::new().read(true).open(path).unwrap(); + + let mut files = 0; + let mut objects = 0; + let reader = ZranReader::new(file).unwrap(); + let mut tar = Archive::new(reader); + tar.set_ignore_zeros(true); + + let entries = tar.entries().unwrap(); + for entry in entries { + let entry = entry.unwrap(); + objects += 1; + if entry.header().entry_type() == EntryType::Regular { + files += 1; + } + } + + assert_eq!(objects, 10); + assert_eq!(files, 5); + } + + #[test] + fn test_parse_gzip_with_big_zero() { + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let path = PathBuf::from(root_dir).join("../tests/texture/zran/zran-zero-file.tar.gz"); + let file = OpenOptions::new().read(true).open(path).unwrap(); + let reader = ZranReader::new(file).unwrap(); + let mut tar = Archive::new(reader.clone()); + let entries = tar.entries().unwrap(); + + let mut last: Option = None; + for entry in entries { + let mut entry = entry.unwrap(); + assert_eq!(entry.header().entry_type(), EntryType::Regular); + loop { + let mut buf = vec![0u8; 512]; + let sz = entry.read(&mut buf).unwrap(); + if sz == 0 { + break; + } + + let info = reader.get_current_ctx_info(); + if let Some(prev) = last { + assert_ne!(prev, info); + } + last = Some(info); + } + } + } + + #[test] + fn test_generate_comp_info() { + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let path = PathBuf::from(root_dir).join("../tests/texture/zran/zran-two-streams.tar.gz"); + let file = OpenOptions::new().read(true).open(path).unwrap(); + + let reader = ZranReader::new(file).unwrap(); + let mut tar = Archive::new(reader.clone()); + tar.set_ignore_zeros(true); + let mut generator = ZranGenerator::new(reader); + generator.set_min_compressed_size(1024); + generator.set_max_compressed_size(2048); + generator.set_max_uncompressed_size(4096); + + let entries = tar.entries().unwrap(); + for entry in entries { + let mut entry = entry.unwrap(); + if entry.header().entry_type() == EntryType::Regular { + loop { + let _start = generator.begin_read(512).unwrap(); + let mut buf = vec![0u8; 512]; + let sz = entry.read(&mut buf).unwrap(); + if sz == 0 { + break; + } + let _info = generator.end_read().unwrap(); + } + } + } + + let ctx = generator.get_compression_ctx_array(); + assert_eq!(ctx.len(), 3); + } + + #[test] + fn test_zran_bgzip() { + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let path = PathBuf::from(root_dir).join("../tests/texture/zran/bgzip.tar.gz"); + let file = OpenOptions::new().read(true).open(&path).unwrap(); + let reader = ZranReader::new(file).unwrap(); + let mut tar = Archive::new(reader.clone()); + tar.set_ignore_zeros(true); + let mut generator = ZranGenerator::new(reader); + generator.set_min_compressed_size(1024); + generator.set_max_compressed_size(2048); + generator.set_max_uncompressed_size(4096); + + let entries = tar.entries().unwrap(); + for entry in entries { + let mut entry = entry.unwrap(); + if entry.header().entry_type() == EntryType::Regular { + loop { + let _start = generator.begin_read(512).unwrap(); + let mut buf = vec![0u8; 512]; + let sz = entry.read(&mut buf).unwrap(); + let _info = generator.end_read().unwrap(); + if sz == 0 { + break; + } + } + } + } + + let ctx_array = generator.get_compression_ctx_array(); + for ctx in ctx_array.iter() { + let mut c_buf = vec![0u8; ctx.in_len as usize]; + let mut file = OpenOptions::new().read(true).open(&path).unwrap(); + file.seek(SeekFrom::Start(ctx.in_offset)).unwrap(); + file.read_exact(&mut c_buf).unwrap(); + + let mut d_buf = vec![0u8; ctx.out_len as usize]; + let mut decoder = ZranDecoder::new().unwrap(); + decoder.uncompress(ctx, None, &c_buf, &mut d_buf).unwrap(); + } + } + + #[test] + fn test_zran_decoder() { + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let path = PathBuf::from(root_dir).join("../tests/texture/zran/zran-two-streams.tar.gz"); + let file = OpenOptions::new().read(true).open(&path).unwrap(); + + let reader = ZranReader::new(file).unwrap(); + let mut tar = Archive::new(reader.clone()); + tar.set_ignore_zeros(true); + let mut generator = ZranGenerator::new(reader); + generator.set_min_compressed_size(1024); + generator.set_max_compressed_size(2048); + generator.set_max_uncompressed_size(4096); + + let entries = tar.entries().unwrap(); + for entry in entries { + let mut entry = entry.unwrap(); + if entry.header().entry_type() == EntryType::Regular { + loop { + let _start = generator.begin_read(512).unwrap(); + let mut buf = vec![0u8; 512]; + let sz = entry.read(&mut buf).unwrap(); + let _info = generator.end_read().unwrap(); + if sz == 0 { + break; + } + } + } + } + + let ctx_array = generator.get_compression_ctx_array(); + assert_eq!(ctx_array.len(), 3); + for ctx in ctx_array.iter().take(3) { + let mut c_buf = vec![0u8; ctx.in_len as usize]; + let mut file = OpenOptions::new().read(true).open(&path).unwrap(); + file.seek(SeekFrom::Start(ctx.in_offset)).unwrap(); + file.read_exact(&mut c_buf).unwrap(); + + let mut d_buf = vec![0u8; ctx.out_len as usize]; + let mut decoder = ZranDecoder::new().unwrap(); + decoder.uncompress(ctx, None, &c_buf, &mut d_buf).unwrap(); + } + } + + #[test] + fn test_zran_reader() { + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let path = PathBuf::from(root_dir).join("../tests/texture/zran/zran-two-streams.tar.gz"); + let file = OpenOptions::new().read(true).open(path).unwrap(); + + let reader = ZranReader::new(file).unwrap(); + assert_eq!(reader.get_data_size(), 0); + + let buf = vec![0x0u8; 32]; + reader.set_initial_data(&buf); + assert_eq!(reader.get_data_size(), 32); + } +} diff --git a/utils/src/crypt.rs b/utils/src/crypt.rs index f37b40670fe..acdf505f0ce 100644 --- a/utils/src/crypt.rs +++ b/utils/src/crypt.rs @@ -1,809 +1,809 @@ -// Copyright (C) 2022-2023 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::alloc::{alloc, Layout}; -use std::borrow::Cow; -use std::convert::TryFrom; -use std::fmt::{self, Debug, Formatter}; -use std::io::Error; -use std::str::FromStr; -use std::sync::Arc; - -use openssl::{rand, symm}; - -// The length of the data unit to be encrypted. -pub const DATA_UNIT_LENGTH: usize = 16; -// The length of thd iv (Initialization Vector) to do AES-XTS encryption. -pub const AES_XTS_IV_LENGTH: usize = 16; -// The length of the key to do AES-128-XTS encryption. -pub const AES_128_XTS_KEY_LENGTH: usize = 32; -// The length of the key to do AES-256-XTS encryption. -pub const AES_256_XTS_KEY_LENGTH: usize = 64; -// The length of the key to do AES-256-GCM encryption. -pub const AES_256_GCM_KEY_LENGTH: usize = 32; - -// The padding magic end. -pub const PADDING_MAGIC_END: [u8; 2] = [0x78, 0x90]; -// DATA_UNIT_LENGTH + length of PADDING_MAGIC_END. -pub const PADDING_LENGTH: usize = 18; -// Openssl rejects keys with identical first and second halves for xts. -// Use a default key for such cases. -const DEFAULT_CE_KEY: [u8; 32] = [ - 0xac, 0xed, 0x14, 0x69, 0x94, 0x23, 0x1e, 0xca, 0x44, 0x8c, 0xed, 0x2f, 0x6b, 0x40, 0x0c, 0x00, - 0xfd, 0xbb, 0x3f, 0xac, 0xdd, 0xc7, 0xd9, 0xee, 0x83, 0xf6, 0x5c, 0xd9, 0x3c, 0xaa, 0x28, 0x7c, -]; -const DEFAULT_CE_KEY_64: [u8; 64] = [ - 0xac, 0xed, 0x14, 0x69, 0x94, 0x23, 0x1e, 0xca, 0x44, 0x8c, 0xed, 0x2f, 0x6b, 0x40, 0x0c, 0x00, - 0xfd, 0xbb, 0x3f, 0xac, 0xdd, 0xc7, 0xd9, 0xee, 0x83, 0xf6, 0x5c, 0xd9, 0x3c, 0xaa, 0x28, 0x7c, - 0xfd, 0xbb, 0x3f, 0xac, 0xdd, 0xc7, 0xd9, 0xee, 0x83, 0xf6, 0x5c, 0xd9, 0x3c, 0xaa, 0x28, 0x7c, - 0xac, 0xed, 0x14, 0x69, 0x94, 0x23, 0x1e, 0xca, 0x44, 0x8c, 0xed, 0x2f, 0x6b, 0x40, 0x0c, 0x00, -]; - -/// Supported cipher algorithms. -#[repr(u32)] -#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)] -pub enum Algorithm { - #[default] - None = 0, - Aes128Xts = 1, - Aes256Xts = 2, - Aes256Gcm = 3, -} - -impl Algorithm { - /// Create a new cipher object. - pub fn new_cipher(&self) -> Result { - match self { - Algorithm::None => Ok(Cipher::None), - Algorithm::Aes128Xts => { - let cipher = symm::Cipher::aes_128_xts(); - Ok(Cipher::Aes128Xts(cipher)) - } - Algorithm::Aes256Xts => { - let cipher = symm::Cipher::aes_256_xts(); - Ok(Cipher::Aes256Xts(cipher)) - } - Algorithm::Aes256Gcm => { - let cipher = symm::Cipher::aes_256_gcm(); - Ok(Cipher::Aes256Gcm(cipher)) - } - } - } - - /// Check whether data encryption is enabled or not. - pub fn is_encryption_enabled(&self) -> bool { - *self != Algorithm::None - } - - /// Check whether algorithm is AEAD. - pub fn is_aead(&self) -> bool { - match self { - Algorithm::None => false, - Algorithm::Aes128Xts => false, - Algorithm::Aes256Xts => false, - Algorithm::Aes256Gcm => true, - } - } - - /// Get size of tag associated with encrypted data. - pub fn tag_size(&self) -> usize { - match self { - Algorithm::None => 0, - Algorithm::Aes128Xts => 0, - Algorithm::Aes256Xts => 0, - Algorithm::Aes256Gcm => 12, - } - } - - /// Get key size of the encryption algorithm. - pub fn key_length(&self) -> usize { - match self { - Algorithm::None => 0, - Algorithm::Aes128Xts => AES_128_XTS_KEY_LENGTH, - Algorithm::Aes256Xts => AES_256_XTS_KEY_LENGTH, - Algorithm::Aes256Gcm => AES_256_GCM_KEY_LENGTH, - } - } -} - -impl fmt::Display for Algorithm { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{:?}", self) - } -} - -impl FromStr for Algorithm { - type Err = Error; - - fn from_str(s: &str) -> Result { - match s { - "none" => Ok(Self::None), - "aes128xts" => Ok(Self::Aes128Xts), - "aes256xts" => Ok(Self::Aes256Xts), - "aes256gcm" => Ok(Self::Aes256Gcm), - _ => Err(einval!("cypher algorithm should be none or aes_gcm")), - } - } -} - -impl TryFrom for Algorithm { - type Error = (); - - fn try_from(value: u32) -> Result { - if value == Algorithm::None as u32 { - Ok(Algorithm::None) - } else if value == Algorithm::Aes128Xts as u32 { - Ok(Algorithm::Aes128Xts) - } else if value == Algorithm::Aes256Xts as u32 { - Ok(Algorithm::Aes256Xts) - } else if value == Algorithm::Aes256Gcm as u32 { - Ok(Algorithm::Aes256Gcm) - } else { - Err(()) - } - } -} - -impl TryFrom for Algorithm { - type Error = (); - - fn try_from(value: u64) -> Result { - if value == Algorithm::None as u64 { - Ok(Algorithm::None) - } else if value == Algorithm::Aes128Xts as u64 { - Ok(Algorithm::Aes128Xts) - } else if value == Algorithm::Aes256Xts as u64 { - Ok(Algorithm::Aes256Xts) - } else if value == Algorithm::Aes256Gcm as u64 { - Ok(Algorithm::Aes256Gcm) - } else { - Err(()) - } - } -} - -/// Cipher object to encrypt/decrypt data. -#[derive(Default)] -pub enum Cipher { - #[default] - None, - Aes128Xts(symm::Cipher), - Aes256Xts(symm::Cipher), - Aes256Gcm(symm::Cipher), -} - -impl Debug for Cipher { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - match self { - Cipher::None => write!(f, "cipher: none"), - Cipher::Aes128Xts(_) => write!(f, "cypher: aes128_xts"), - Cipher::Aes256Xts(_) => write!(f, "cypher: aes256_xts"), - Cipher::Aes256Gcm(_) => write!(f, "cipher: aes256_gcm"), - } - } -} - -impl Cipher { - /// Encrypt plaintext with optional IV and return the encrypted data. - /// - /// For XTS, the caller needs to ensure that the top half of key is not identical to the - /// bottom half of the key, otherwise the encryption will fail. - pub fn encrypt<'a>( - &self, - key: &[u8], - iv: Option<&[u8]>, - data: &'a [u8], - ) -> Result, Error> { - match self { - Cipher::None => Ok(Cow::from(data)), - Cipher::Aes128Xts(cipher) => { - assert_eq!(key.len(), AES_128_XTS_KEY_LENGTH); - let mut buf; - let data = if data.len() >= DATA_UNIT_LENGTH { - data - } else { - // CMS (Cryptographic Message Syntax). - // This pads with the same value as the number of padding bytes - // and appends the magic padding end. - let val = (DATA_UNIT_LENGTH - data.len()) as u8; - buf = [val; PADDING_LENGTH]; - buf[..data.len()].copy_from_slice(data); - buf[DATA_UNIT_LENGTH..PADDING_LENGTH].copy_from_slice(&PADDING_MAGIC_END); - &buf - }; - Self::cipher(*cipher, symm::Mode::Encrypt, key, iv, data) - .map(Cow::from) - .map_err(|e| eother!(format!("failed to encrypt data, {}", e))) - } - Cipher::Aes256Xts(cipher) => { - assert_eq!(key.len(), AES_256_XTS_KEY_LENGTH); - let mut buf; - let data = if data.len() >= DATA_UNIT_LENGTH { - data - } else { - let val = (DATA_UNIT_LENGTH - data.len()) as u8; - buf = [val; PADDING_LENGTH]; - buf[..data.len()].copy_from_slice(data); - buf[DATA_UNIT_LENGTH..PADDING_LENGTH].copy_from_slice(&PADDING_MAGIC_END); - &buf - }; - Self::cipher(*cipher, symm::Mode::Encrypt, key, iv, data) - .map(Cow::from) - .map_err(|e| eother!(format!("failed to encrypt data, {}", e))) - } - Cipher::Aes256Gcm(_cipher) => { - Err(einval!("Cipher::encrypt() doesn't support Aes256Gcm")) - } - } - } - - /// Decrypt encrypted data with optional IV and return the decrypted data. - pub fn decrypt(&self, key: &[u8], iv: Option<&[u8]>, data: &[u8]) -> Result, Error> { - let mut data = match self { - Cipher::None => Ok(data.to_vec()), - Cipher::Aes128Xts(cipher) => Self::cipher(*cipher, symm::Mode::Decrypt, key, iv, data) - .map_err(|e| eother!(format!("failed to decrypt data, {}", e))), - Cipher::Aes256Xts(cipher) => Self::cipher(*cipher, symm::Mode::Decrypt, key, iv, data) - .map_err(|e| eother!(format!("failed to decrypt data, {}", e))), - Cipher::Aes256Gcm(_cipher) => { - Err(einval!("Cipher::decrypt() doesn't support Aes256Gcm")) - } - }?; - - // Trim possible padding. - if data.len() == PADDING_LENGTH - && data[PADDING_LENGTH - PADDING_MAGIC_END.len()..PADDING_LENGTH] == PADDING_MAGIC_END - { - let val = data[DATA_UNIT_LENGTH - 1] as usize; - if val < DATA_UNIT_LENGTH { - data.truncate(DATA_UNIT_LENGTH - val); - } else { - return Err(einval!(format!( - "Cipher::decrypt: invalid padding data, value {}", - val, - ))); - } - }; - - Ok(data) - } - - /// Encrypt plaintext and return the ciphertext with authentication tag. - pub fn encrypt_aead( - &self, - key: &[u8], - iv: Option<&[u8]>, - data: &[u8], - tag: &mut [u8], - ) -> Result, Error> { - match self { - Cipher::Aes256Gcm(cipher) => symm::encrypt_aead(*cipher, key, iv, &[], data, tag) - .map_err(|e| eother!(format!("failed to encrypt data, {}", e))), - _ => Err(einval!("invalid algorithm for encrypt_aead()")), - } - } - - /// Decrypt plaintext and return the encrypted data with authentication tag. - pub fn decrypt_aead( - &self, - key: &[u8], - iv: Option<&[u8]>, - data: &[u8], - tag: &[u8], - ) -> Result, Error> { - match self { - Cipher::Aes256Gcm(cipher) => symm::decrypt_aead(*cipher, key, iv, &[], data, tag) - .map_err(|e| eother!(format!("failed to encrypt data, {}", e))), - _ => Err(einval!("invalid algorithm for decrypt_aead()")), - } - } - - /// Get size of tag associated with encrypted data. - pub fn tag_size(&self) -> usize { - match self { - Cipher::Aes256Gcm(_) => 12, - _ => 0, - } - } - - /// Get size of ciphertext from size of plaintext. - pub fn encrypted_size(&self, plaintext_size: usize) -> usize { - match self { - Cipher::None => plaintext_size, - Cipher::Aes128Xts(_) | Cipher::Aes256Xts(_) => { - if plaintext_size < DATA_UNIT_LENGTH { - DATA_UNIT_LENGTH - } else { - plaintext_size - } - } - Cipher::Aes256Gcm(_) => { - assert!(plaintext_size.checked_add(12).is_some()); - plaintext_size + 12 - } - } - } - - /// Tweak key for XTS mode. - pub fn tweak_key_for_xts(key: &[u8]) -> Cow<[u8]> { - let len = key.len() >> 1; - if key[..len] == key[len..] { - let mut buf = if key[len] == 0xa5 { - vec![0x5a; key.len()] - } else { - vec![0xa5; key.len()] - }; - buf[len..].copy_from_slice(&key[len..]); - Cow::from(buf) - } else { - Cow::from(key) - } - } - - fn cipher( - t: symm::Cipher, - mode: symm::Mode, - key: &[u8], - iv: Option<&[u8]>, - data: &[u8], - ) -> Result, Error> { - let mut c = symm::Crypter::new(t, mode, key, iv)?; - let mut out = alloc_buf(data.len() + t.block_size()); - let count = c.update(data, &mut out)?; - let rest = c.finalize(&mut out[count..])?; - out.truncate(count + rest); - Ok(out) - } - - pub fn generate_random_key(cipher_algo: Algorithm) -> Result, Error> { - let length = cipher_algo.key_length(); - let mut buf = vec![0u8; length]; - if let Err(e) = rand::rand_bytes(&mut buf) { - Err(eother!(format!( - "failed to generate key for {}, {}", - cipher_algo, e - ))) - } else { - Ok(Self::tweak_key_for_xts(&buf).to_vec()) - } - } - - pub fn generate_random_iv() -> Result, Error> { - let mut buf = vec![0u8; AES_XTS_IV_LENGTH]; - if let Err(e) = rand::rand_bytes(&mut buf) { - Err(eother!(format!("failed to generate iv, {}", e))) - } else { - Ok(buf) - } - } -} - -/// Struct to provide context information for data encryption/decryption. -#[derive(Default, Debug, Clone)] -pub struct CipherContext { - key: Vec, - iv: Vec, - convergent_encryption: bool, - cipher_algo: Algorithm, -} - -impl CipherContext { - /// Create a new instance of [CipherContext]. - pub fn new( - key: Vec, - iv: Vec, - convergent_encryption: bool, - cipher_algo: Algorithm, - ) -> Result { - let key_length = key.len(); - if key_length != cipher_algo.key_length() { - return Err(einval!(format!( - "invalid key length {} for {} encryption", - key_length, cipher_algo - ))); - } else if key[0..key_length >> 1] == key[key_length >> 1..key_length] { - return Err(einval!("invalid symmetry key for encryption")); - } - - Ok(CipherContext { - key, - iv, - convergent_encryption, - cipher_algo, - }) - } - - /// Generate context information from data for encryption/decryption. - pub fn generate_cipher_meta<'a>(&'a self, data: &'a [u8]) -> (&'a [u8], Vec) { - let length = data.len(); - assert_eq!(length, self.cipher_algo.key_length()); - let iv = vec![0u8; AES_XTS_IV_LENGTH]; - if self.convergent_encryption { - if length == AES_128_XTS_KEY_LENGTH && data[0..length >> 1] == data[length >> 1..length] - { - (&DEFAULT_CE_KEY, iv) - } else if length == AES_256_XTS_KEY_LENGTH - && data[0..length >> 1] == data[length >> 1..length] - { - (&DEFAULT_CE_KEY_64, iv) - } else { - (data, iv) - } - } else { - (&self.key, iv) - } - } - - /// Get context information for meta data encryption/decryption. - pub fn get_cipher_meta(&self) -> (&[u8], &[u8]) { - (&self.key, &self.iv) - } -} - -/// A customized buf allocator that avoids zeroing -fn alloc_buf(size: usize) -> Vec { - assert!(size < isize::MAX as usize); - let layout = Layout::from_size_align(size, 0x1000) - .unwrap() - .pad_to_align(); - let ptr = unsafe { alloc(layout) }; - unsafe { Vec::from_raw_parts(ptr, size, layout.size()) } -} - -// Encrypt data with Cipher and CipherContext. -pub fn encrypt_with_context<'a>( - data: &'a [u8], - cipher_obj: &Arc, - cipher_ctx: &Option, - encrypted: bool, -) -> Result, Error> { - if encrypted { - if let Some(cipher_ctx) = cipher_ctx { - let (key, iv) = cipher_ctx.get_cipher_meta(); - Ok(cipher_obj.encrypt(key, Some(iv), data)?) - } else { - Err(einval!("the encrypt context can not be none")) - } - } else { - Ok(Cow::Borrowed(data)) - } -} - -// Decrypt data with Cipher and CipherContext. -pub fn decrypt_with_context<'a>( - data: &'a [u8], - cipher_obj: &Arc, - cipher_ctx: &Option, - encrypted: bool, -) -> Result, Error> { - if encrypted { - if let Some(cipher_ctx) = cipher_ctx { - let (key, iv) = cipher_ctx.get_cipher_meta(); - Ok(Cow::from(cipher_obj.decrypt(key, Some(iv), data)?)) - } else { - Err(einval!("the decrypt context can not be none")) - } - } else { - Ok(Cow::Borrowed(data)) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_aes_128_xts_encrypt() { - let mut key = [0xcu8; 32]; - key[31] = 0xa; - - let cipher = Algorithm::Aes128Xts.new_cipher().unwrap(); - assert_eq!(cipher.encrypted_size(1), 16); - assert_eq!(cipher.encrypted_size(16), 16); - assert_eq!(cipher.encrypted_size(17), 17); - - let ciphertext1 = cipher - .encrypt(key.as_slice(), Some(&[0u8; 16]), b"1") - .unwrap(); - let ciphertext2 = cipher - .encrypt(key.as_slice(), Some(&[0u8; 16]), b"1") - .unwrap(); - assert_eq!(ciphertext1, ciphertext2); - assert_eq!(ciphertext2.len(), PADDING_LENGTH); - - let ciphertext3 = cipher - .encrypt(key.as_slice(), Some(&[0u8; 16]), b"11111111111111111") - .unwrap(); - assert_eq!(ciphertext3.len(), 17); - - let ciphertext4 = cipher - .encrypt(key.as_slice(), Some(&[1u8; 16]), b"11111111111111111") - .unwrap(); - assert_eq!(ciphertext4.len(), 17); - assert_ne!(ciphertext4, ciphertext3); - - let ciphertext5 = cipher - .encrypt(key.as_slice(), Some(&[1u8; 16]), b"21111111111111111") - .unwrap(); - assert_eq!(ciphertext5.len(), 17); - assert_ne!(ciphertext5, ciphertext4); - } - - #[test] - fn test_aes_256_xts_encrypt() { - let mut key = [0xcu8; 64]; - key[31] = 0xa; - - let cipher = Algorithm::Aes256Xts.new_cipher().unwrap(); - let ciphertext1 = cipher - .encrypt(key.as_slice(), Some(&[0u8; 16]), b"1") - .unwrap(); - let ciphertext2 = cipher - .encrypt(key.as_slice(), Some(&[0u8; 16]), b"1") - .unwrap(); - assert_eq!(ciphertext1, ciphertext2); - assert_eq!(ciphertext2.len(), PADDING_LENGTH); - - let ciphertext3 = cipher - .encrypt(key.as_slice(), Some(&[0u8; 16]), b"11111111111111111") - .unwrap(); - assert_eq!(ciphertext3.len(), 17); - - let ciphertext4 = cipher - .encrypt(key.as_slice(), Some(&[1u8; 16]), b"11111111111111111") - .unwrap(); - assert_eq!(ciphertext4.len(), 17); - assert_ne!(ciphertext4, ciphertext3); - - let ciphertext5 = cipher - .encrypt(key.as_slice(), Some(&[1u8; 16]), b"21111111111111111") - .unwrap(); - assert_eq!(ciphertext5.len(), 17); - assert_ne!(ciphertext5, ciphertext4); - } - - #[test] - fn test_aes_128_xts_decrypt() { - let mut key = [0xcu8; 32]; - key[31] = 0xa; - - let cipher = Algorithm::Aes128Xts.new_cipher().unwrap(); - let ciphertext1 = cipher - .encrypt(key.as_slice(), Some(&[0u8; 16]), b"1") - .unwrap(); - let plaintext1 = cipher - .decrypt(key.as_slice(), Some(&[0u8; 16]), &ciphertext1) - .unwrap(); - assert_eq!(&plaintext1, b"1"); - - let ciphertext2 = cipher - .encrypt(key.as_slice(), Some(&[0u8; 16]), b"11111111111111111") - .unwrap(); - let plaintext2 = cipher - .decrypt(key.as_slice(), Some(&[0u8; 16]), &ciphertext2) - .unwrap(); - assert_eq!(&plaintext2, b"11111111111111111"); - - let ciphertext3 = cipher - .encrypt(key.as_slice(), Some(&[1u8; 16]), b"11111111111111111") - .unwrap(); - let plaintext3 = cipher - .decrypt(key.as_slice(), Some(&[1u8; 16]), &ciphertext3) - .unwrap(); - assert_eq!(&plaintext3, b"11111111111111111"); - } - - #[test] - fn test_aes_256_xts_decrypt() { - let mut key = [0xcu8; 64]; - key[31] = 0xa; - - let cipher = Algorithm::Aes256Xts.new_cipher().unwrap(); - let ciphertext1 = cipher - .encrypt(key.as_slice(), Some(&[0u8; 16]), b"1") - .unwrap(); - let plaintext1 = cipher - .decrypt(key.as_slice(), Some(&[0u8; 16]), &ciphertext1) - .unwrap(); - assert_eq!(&plaintext1, b"1"); - - let ciphertext2 = cipher - .encrypt(key.as_slice(), Some(&[0u8; 16]), b"11111111111111111") - .unwrap(); - let plaintext2 = cipher - .decrypt(key.as_slice(), Some(&[0u8; 16]), &ciphertext2) - .unwrap(); - assert_eq!(&plaintext2, b"11111111111111111"); - - let ciphertext3 = cipher - .encrypt(key.as_slice(), Some(&[1u8; 16]), b"11111111111111111") - .unwrap(); - let plaintext3 = cipher - .decrypt(key.as_slice(), Some(&[1u8; 16]), &ciphertext3) - .unwrap(); - assert_eq!(&plaintext3, b"11111111111111111"); - } - - #[test] - fn test_aes_256_gcm() { - let key = [0xcu8; 32]; - let mut tag = vec![0u8; 12]; - - let cipher = Algorithm::Aes256Gcm.new_cipher().unwrap(); - assert_eq!(cipher.tag_size(), 12); - assert_eq!(cipher.encrypted_size(1), 13); - - let ciphertext1 = cipher - .encrypt_aead(key.as_slice(), Some(&[0u8; 16]), b"1", &mut tag) - .unwrap(); - assert_eq!(ciphertext1.len(), 1); - assert_eq!(tag.len(), 12); - let plaintext1 = cipher - .decrypt_aead(key.as_slice(), Some(&[0u8; 16]), &ciphertext1, &tag) - .unwrap(); - assert_eq!(&plaintext1, b"1"); - - let ciphertext2 = cipher - .encrypt_aead( - key.as_slice(), - Some(&[0u8; 16]), - b"11111111111111111", - &mut tag, - ) - .unwrap(); - assert_eq!(ciphertext2.len(), 17); - assert_eq!(tag.len(), 12); - let plaintext2 = cipher - .decrypt_aead(key.as_slice(), Some(&[0u8; 16]), &ciphertext2, &tag) - .unwrap(); - assert_eq!(&plaintext2, b"11111111111111111"); - - let ciphertext3 = cipher - .encrypt_aead( - key.as_slice(), - Some(&[1u8; 16]), - b"11111111111111111", - &mut tag, - ) - .unwrap(); - assert_ne!(ciphertext3, ciphertext2); - assert_eq!(ciphertext3.len(), 17); - assert_eq!(tag.len(), 12); - let plaintext3 = cipher - .decrypt_aead(key.as_slice(), Some(&[1u8; 16]), &ciphertext3, &tag) - .unwrap(); - assert_eq!(&plaintext3, b"11111111111111111"); - } - - #[test] - fn test_tweak_key_for_xts() { - let buf = vec![0x0; 32]; - let buf2 = Cipher::tweak_key_for_xts(&buf); - assert_eq!(buf2[0], 0xa5); - assert_eq!(buf2[16], 0x0); - - let buf = vec![0xa5; 32]; - let buf2 = Cipher::tweak_key_for_xts(&buf); - assert_eq!(buf2[0], 0x5a); - assert_eq!(buf2[16], 0xa5); - } - - #[test] - fn test_attribute() { - let none = Algorithm::None.new_cipher().unwrap(); - let aes128xts = Algorithm::Aes128Xts.new_cipher().unwrap(); - let aes256xts = Algorithm::Aes256Xts.new_cipher().unwrap(); - let aes256gcm = Algorithm::Aes256Gcm.new_cipher().unwrap(); - - assert!(!Algorithm::None.is_encryption_enabled()); - assert!(Algorithm::Aes128Xts.is_encryption_enabled()); - assert!(Algorithm::Aes256Xts.is_encryption_enabled()); - assert!(Algorithm::Aes256Gcm.is_encryption_enabled()); - - assert!(!Algorithm::None.is_aead()); - assert!(!Algorithm::Aes128Xts.is_aead()); - assert!(!Algorithm::Aes256Xts.is_aead()); - assert!(Algorithm::Aes256Gcm.is_aead()); - - assert_eq!(Algorithm::None.tag_size(), 0); - assert_eq!(Algorithm::Aes128Xts.tag_size(), 0); - assert_eq!(Algorithm::Aes256Xts.tag_size(), 0); - assert_eq!(Algorithm::Aes256Gcm.tag_size(), 12); - - assert_eq!(Algorithm::None.key_length(), 0); - assert_eq!(Algorithm::Aes128Xts.key_length(), AES_128_XTS_KEY_LENGTH); - assert_eq!(Algorithm::Aes256Xts.key_length(), AES_256_XTS_KEY_LENGTH); - assert_eq!(Algorithm::Aes256Gcm.key_length(), AES_256_GCM_KEY_LENGTH); - - print!("{}", Algorithm::Aes128Xts); - assert!(Algorithm::from_str("none").is_ok()); - assert!(Algorithm::from_str("aes128xts").is_ok()); - assert!(Algorithm::from_str("aes256xts").is_ok()); - assert!(Algorithm::from_str("aes256gcm").is_ok()); - assert!(Algorithm::from_str("non-exist").is_err()); - - assert!(Algorithm::try_from(Algorithm::None as u32).is_ok()); - assert!(Algorithm::try_from(Algorithm::Aes128Xts as u32).is_ok()); - assert!(Algorithm::try_from(Algorithm::Aes256Xts as u32).is_ok()); - assert!(Algorithm::try_from(Algorithm::Aes256Gcm as u32).is_ok()); - assert!(Algorithm::try_from(u32::MAX).is_err()); - - assert!(Algorithm::try_from(Algorithm::None as u64).is_ok()); - assert!(Algorithm::try_from(Algorithm::Aes128Xts as u64).is_ok()); - assert!(Algorithm::try_from(Algorithm::Aes256Xts as u64).is_ok()); - assert!(Algorithm::try_from(Algorithm::Aes256Gcm as u64).is_ok()); - assert!(Algorithm::try_from(u64::MAX).is_err()); - - println!("{:?},{:?},{:?},{:?}", none, aes128xts, aes256xts, aes256gcm); - } - - #[test] - fn test_crypt_with_context() { - let error_key = [0xcu8, 64]; - let symmetry_key = [0xcu8, 32]; - let mut key = [0xcu8; 32]; - key[31] = 0xa; - let iv = [0u8; 16]; - let data = b"11111111111111111"; - // create with mismatch key length and algo - assert!( - CipherContext::new(error_key.to_vec(), iv.to_vec(), true, Algorithm::Aes128Xts) - .is_err() - ); - // create with symmetry key - assert!(CipherContext::new( - symmetry_key.to_vec(), - iv.to_vec(), - true, - Algorithm::Aes128Xts - ) - .is_err()); - - // test context is none - let ctx = - CipherContext::new(key.to_vec(), iv.to_vec(), false, Algorithm::Aes128Xts).unwrap(); - let obj = Arc::new(Algorithm::Aes128Xts.new_cipher().unwrap()); - assert!(encrypt_with_context(data, &obj, &None, true).is_err()); - assert!(decrypt_with_context(b"somedata", &obj, &None, true).is_err()); - - // test encrypted is false - let no_change = encrypt_with_context(data, &obj, &Some(ctx.clone()), false).unwrap(); - assert_eq!(no_change.clone().into_owned(), data); - let bind = no_change.into_owned(); - let plain_text_no_change = - decrypt_with_context(&bind, &obj, &Some(ctx.clone()), false).unwrap(); - assert_eq!(plain_text_no_change.into_owned(), data); - - // test normal encrypt and decrypt - let encrypt_text = encrypt_with_context(data, &obj, &Some(ctx.clone()), true).unwrap(); - let bind = encrypt_text.into_owned(); - let plain_text = decrypt_with_context(&bind, &obj, &Some(ctx), true).unwrap(); - assert_eq!(&plain_text.into_owned(), data); - } - - fn test_gen_key(convergent_encryption: bool) { - let mut key = [0xcu8; 32]; - key[31] = 0xa; - let iv = [0u8; 16]; - let data = b"11111111111111111"; - let ctx = CipherContext::new( - key.to_vec(), - iv.to_vec(), - convergent_encryption, - Algorithm::Aes128Xts, - ) - .unwrap(); - let obj = Arc::new(Algorithm::Aes128Xts.new_cipher().unwrap()); - let (gen_key, gen_iv) = ctx.generate_cipher_meta(&key); - let ciphertext = obj.encrypt(gen_key, Some(&gen_iv), data).unwrap(); - let plaintext = obj.decrypt(gen_key, Some(&gen_iv), &ciphertext).unwrap(); - assert_eq!(&plaintext, data); - } - - #[test] - fn test_generate_cipher_meta() { - test_gen_key(true); - test_gen_key(false); - } -} +// Copyright (C) 2022-2023 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::alloc::{alloc, Layout}; +use std::borrow::Cow; +use std::convert::TryFrom; +use std::fmt::{self, Debug, Formatter}; +use std::io::Error; +use std::str::FromStr; +use std::sync::Arc; + +use openssl::{rand, symm}; + +// The length of the data unit to be encrypted. +pub const DATA_UNIT_LENGTH: usize = 16; +// The length of thd iv (Initialization Vector) to do AES-XTS encryption. +pub const AES_XTS_IV_LENGTH: usize = 16; +// The length of the key to do AES-128-XTS encryption. +pub const AES_128_XTS_KEY_LENGTH: usize = 32; +// The length of the key to do AES-256-XTS encryption. +pub const AES_256_XTS_KEY_LENGTH: usize = 64; +// The length of the key to do AES-256-GCM encryption. +pub const AES_256_GCM_KEY_LENGTH: usize = 32; + +// The padding magic end. +pub const PADDING_MAGIC_END: [u8; 2] = [0x78, 0x90]; +// DATA_UNIT_LENGTH + length of PADDING_MAGIC_END. +pub const PADDING_LENGTH: usize = 18; +// Openssl rejects keys with identical first and second halves for xts. +// Use a default key for such cases. +const DEFAULT_CE_KEY: [u8; 32] = [ + 0xac, 0xed, 0x14, 0x69, 0x94, 0x23, 0x1e, 0xca, 0x44, 0x8c, 0xed, 0x2f, 0x6b, 0x40, 0x0c, 0x00, + 0xfd, 0xbb, 0x3f, 0xac, 0xdd, 0xc7, 0xd9, 0xee, 0x83, 0xf6, 0x5c, 0xd9, 0x3c, 0xaa, 0x28, 0x7c, +]; +const DEFAULT_CE_KEY_64: [u8; 64] = [ + 0xac, 0xed, 0x14, 0x69, 0x94, 0x23, 0x1e, 0xca, 0x44, 0x8c, 0xed, 0x2f, 0x6b, 0x40, 0x0c, 0x00, + 0xfd, 0xbb, 0x3f, 0xac, 0xdd, 0xc7, 0xd9, 0xee, 0x83, 0xf6, 0x5c, 0xd9, 0x3c, 0xaa, 0x28, 0x7c, + 0xfd, 0xbb, 0x3f, 0xac, 0xdd, 0xc7, 0xd9, 0xee, 0x83, 0xf6, 0x5c, 0xd9, 0x3c, 0xaa, 0x28, 0x7c, + 0xac, 0xed, 0x14, 0x69, 0x94, 0x23, 0x1e, 0xca, 0x44, 0x8c, 0xed, 0x2f, 0x6b, 0x40, 0x0c, 0x00, +]; + +/// Supported cipher algorithms. +#[repr(u32)] +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)] +pub enum Algorithm { + #[default] + None = 0, + Aes128Xts = 1, + Aes256Xts = 2, + Aes256Gcm = 3, +} + +impl Algorithm { + /// Create a new cipher object. + pub fn new_cipher(&self) -> Result { + match self { + Algorithm::None => Ok(Cipher::None), + Algorithm::Aes128Xts => { + let cipher = symm::Cipher::aes_128_xts(); + Ok(Cipher::Aes128Xts(cipher)) + } + Algorithm::Aes256Xts => { + let cipher = symm::Cipher::aes_256_xts(); + Ok(Cipher::Aes256Xts(cipher)) + } + Algorithm::Aes256Gcm => { + let cipher = symm::Cipher::aes_256_gcm(); + Ok(Cipher::Aes256Gcm(cipher)) + } + } + } + + /// Check whether data encryption is enabled or not. + pub fn is_encryption_enabled(&self) -> bool { + *self != Algorithm::None + } + + /// Check whether algorithm is AEAD. + pub fn is_aead(&self) -> bool { + match self { + Algorithm::None => false, + Algorithm::Aes128Xts => false, + Algorithm::Aes256Xts => false, + Algorithm::Aes256Gcm => true, + } + } + + /// Get size of tag associated with encrypted data. + pub fn tag_size(&self) -> usize { + match self { + Algorithm::None => 0, + Algorithm::Aes128Xts => 0, + Algorithm::Aes256Xts => 0, + Algorithm::Aes256Gcm => 12, + } + } + + /// Get key size of the encryption algorithm. + pub fn key_length(&self) -> usize { + match self { + Algorithm::None => 0, + Algorithm::Aes128Xts => AES_128_XTS_KEY_LENGTH, + Algorithm::Aes256Xts => AES_256_XTS_KEY_LENGTH, + Algorithm::Aes256Gcm => AES_256_GCM_KEY_LENGTH, + } + } +} + +impl fmt::Display for Algorithm { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{:?}", self) + } +} + +impl FromStr for Algorithm { + type Err = Error; + + fn from_str(s: &str) -> Result { + match s { + "none" => Ok(Self::None), + "aes128xts" => Ok(Self::Aes128Xts), + "aes256xts" => Ok(Self::Aes256Xts), + "aes256gcm" => Ok(Self::Aes256Gcm), + _ => Err(einval!("cypher algorithm should be none or aes_gcm")), + } + } +} + +impl TryFrom for Algorithm { + type Error = (); + + fn try_from(value: u32) -> Result { + if value == Algorithm::None as u32 { + Ok(Algorithm::None) + } else if value == Algorithm::Aes128Xts as u32 { + Ok(Algorithm::Aes128Xts) + } else if value == Algorithm::Aes256Xts as u32 { + Ok(Algorithm::Aes256Xts) + } else if value == Algorithm::Aes256Gcm as u32 { + Ok(Algorithm::Aes256Gcm) + } else { + Err(()) + } + } +} + +impl TryFrom for Algorithm { + type Error = (); + + fn try_from(value: u64) -> Result { + if value == Algorithm::None as u64 { + Ok(Algorithm::None) + } else if value == Algorithm::Aes128Xts as u64 { + Ok(Algorithm::Aes128Xts) + } else if value == Algorithm::Aes256Xts as u64 { + Ok(Algorithm::Aes256Xts) + } else if value == Algorithm::Aes256Gcm as u64 { + Ok(Algorithm::Aes256Gcm) + } else { + Err(()) + } + } +} + +/// Cipher object to encrypt/decrypt data. +#[derive(Default)] +pub enum Cipher { + #[default] + None, + Aes128Xts(symm::Cipher), + Aes256Xts(symm::Cipher), + Aes256Gcm(symm::Cipher), +} + +impl Debug for Cipher { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + Cipher::None => write!(f, "cipher: none"), + Cipher::Aes128Xts(_) => write!(f, "cypher: aes128_xts"), + Cipher::Aes256Xts(_) => write!(f, "cypher: aes256_xts"), + Cipher::Aes256Gcm(_) => write!(f, "cipher: aes256_gcm"), + } + } +} + +impl Cipher { + /// Encrypt plaintext with optional IV and return the encrypted data. + /// + /// For XTS, the caller needs to ensure that the top half of key is not identical to the + /// bottom half of the key, otherwise the encryption will fail. + pub fn encrypt<'a>( + &self, + key: &[u8], + iv: Option<&[u8]>, + data: &'a [u8], + ) -> Result, Error> { + match self { + Cipher::None => Ok(Cow::from(data)), + Cipher::Aes128Xts(cipher) => { + assert_eq!(key.len(), AES_128_XTS_KEY_LENGTH); + let mut buf; + let data = if data.len() >= DATA_UNIT_LENGTH { + data + } else { + // CMS (Cryptographic Message Syntax). + // This pads with the same value as the number of padding bytes + // and appends the magic padding end. + let val = (DATA_UNIT_LENGTH - data.len()) as u8; + buf = [val; PADDING_LENGTH]; + buf[..data.len()].copy_from_slice(data); + buf[DATA_UNIT_LENGTH..PADDING_LENGTH].copy_from_slice(&PADDING_MAGIC_END); + &buf + }; + Self::cipher(*cipher, symm::Mode::Encrypt, key, iv, data) + .map(Cow::from) + .map_err(|e| eother!(format!("failed to encrypt data, {}", e))) + } + Cipher::Aes256Xts(cipher) => { + assert_eq!(key.len(), AES_256_XTS_KEY_LENGTH); + let mut buf; + let data = if data.len() >= DATA_UNIT_LENGTH { + data + } else { + let val = (DATA_UNIT_LENGTH - data.len()) as u8; + buf = [val; PADDING_LENGTH]; + buf[..data.len()].copy_from_slice(data); + buf[DATA_UNIT_LENGTH..PADDING_LENGTH].copy_from_slice(&PADDING_MAGIC_END); + &buf + }; + Self::cipher(*cipher, symm::Mode::Encrypt, key, iv, data) + .map(Cow::from) + .map_err(|e| eother!(format!("failed to encrypt data, {}", e))) + } + Cipher::Aes256Gcm(_cipher) => { + Err(einval!("Cipher::encrypt() doesn't support Aes256Gcm")) + } + } + } + + /// Decrypt encrypted data with optional IV and return the decrypted data. + pub fn decrypt(&self, key: &[u8], iv: Option<&[u8]>, data: &[u8]) -> Result, Error> { + let mut data = match self { + Cipher::None => Ok(data.to_vec()), + Cipher::Aes128Xts(cipher) => Self::cipher(*cipher, symm::Mode::Decrypt, key, iv, data) + .map_err(|e| eother!(format!("failed to decrypt data, {}", e))), + Cipher::Aes256Xts(cipher) => Self::cipher(*cipher, symm::Mode::Decrypt, key, iv, data) + .map_err(|e| eother!(format!("failed to decrypt data, {}", e))), + Cipher::Aes256Gcm(_cipher) => { + Err(einval!("Cipher::decrypt() doesn't support Aes256Gcm")) + } + }?; + + // Trim possible padding. + if data.len() == PADDING_LENGTH + && data[PADDING_LENGTH - PADDING_MAGIC_END.len()..PADDING_LENGTH] == PADDING_MAGIC_END + { + let val = data[DATA_UNIT_LENGTH - 1] as usize; + if val < DATA_UNIT_LENGTH { + data.truncate(DATA_UNIT_LENGTH - val); + } else { + return Err(einval!(format!( + "Cipher::decrypt: invalid padding data, value {}", + val, + ))); + } + }; + + Ok(data) + } + + /// Encrypt plaintext and return the ciphertext with authentication tag. + pub fn encrypt_aead( + &self, + key: &[u8], + iv: Option<&[u8]>, + data: &[u8], + tag: &mut [u8], + ) -> Result, Error> { + match self { + Cipher::Aes256Gcm(cipher) => symm::encrypt_aead(*cipher, key, iv, &[], data, tag) + .map_err(|e| eother!(format!("failed to encrypt data, {}", e))), + _ => Err(einval!("invalid algorithm for encrypt_aead()")), + } + } + + /// Decrypt plaintext and return the encrypted data with authentication tag. + pub fn decrypt_aead( + &self, + key: &[u8], + iv: Option<&[u8]>, + data: &[u8], + tag: &[u8], + ) -> Result, Error> { + match self { + Cipher::Aes256Gcm(cipher) => symm::decrypt_aead(*cipher, key, iv, &[], data, tag) + .map_err(|e| eother!(format!("failed to encrypt data, {}", e))), + _ => Err(einval!("invalid algorithm for decrypt_aead()")), + } + } + + /// Get size of tag associated with encrypted data. + pub fn tag_size(&self) -> usize { + match self { + Cipher::Aes256Gcm(_) => 12, + _ => 0, + } + } + + /// Get size of ciphertext from size of plaintext. + pub fn encrypted_size(&self, plaintext_size: usize) -> usize { + match self { + Cipher::None => plaintext_size, + Cipher::Aes128Xts(_) | Cipher::Aes256Xts(_) => { + if plaintext_size < DATA_UNIT_LENGTH { + DATA_UNIT_LENGTH + } else { + plaintext_size + } + } + Cipher::Aes256Gcm(_) => { + assert!(plaintext_size.checked_add(12).is_some()); + plaintext_size + 12 + } + } + } + + /// Tweak key for XTS mode. + pub fn tweak_key_for_xts(key: &[u8]) -> Cow<[u8]> { + let len = key.len() >> 1; + if key[..len] == key[len..] { + let mut buf = if key[len] == 0xa5 { + vec![0x5a; key.len()] + } else { + vec![0xa5; key.len()] + }; + buf[len..].copy_from_slice(&key[len..]); + Cow::from(buf) + } else { + Cow::from(key) + } + } + + fn cipher( + t: symm::Cipher, + mode: symm::Mode, + key: &[u8], + iv: Option<&[u8]>, + data: &[u8], + ) -> Result, Error> { + let mut c = symm::Crypter::new(t, mode, key, iv)?; + let mut out = alloc_buf(data.len() + t.block_size()); + let count = c.update(data, &mut out)?; + let rest = c.finalize(&mut out[count..])?; + out.truncate(count + rest); + Ok(out) + } + + pub fn generate_random_key(cipher_algo: Algorithm) -> Result, Error> { + let length = cipher_algo.key_length(); + let mut buf = vec![0u8; length]; + if let Err(e) = rand::rand_bytes(&mut buf) { + Err(eother!(format!( + "failed to generate key for {}, {}", + cipher_algo, e + ))) + } else { + Ok(Self::tweak_key_for_xts(&buf).to_vec()) + } + } + + pub fn generate_random_iv() -> Result, Error> { + let mut buf = vec![0u8; AES_XTS_IV_LENGTH]; + if let Err(e) = rand::rand_bytes(&mut buf) { + Err(eother!(format!("failed to generate iv, {}", e))) + } else { + Ok(buf) + } + } +} + +/// Struct to provide context information for data encryption/decryption. +#[derive(Default, Debug, Clone)] +pub struct CipherContext { + key: Vec, + iv: Vec, + convergent_encryption: bool, + cipher_algo: Algorithm, +} + +impl CipherContext { + /// Create a new instance of [CipherContext]. + pub fn new( + key: Vec, + iv: Vec, + convergent_encryption: bool, + cipher_algo: Algorithm, + ) -> Result { + let key_length = key.len(); + if key_length != cipher_algo.key_length() { + return Err(einval!(format!( + "invalid key length {} for {} encryption", + key_length, cipher_algo + ))); + } else if key[0..key_length >> 1] == key[key_length >> 1..key_length] { + return Err(einval!("invalid symmetry key for encryption")); + } + + Ok(CipherContext { + key, + iv, + convergent_encryption, + cipher_algo, + }) + } + + /// Generate context information from data for encryption/decryption. + pub fn generate_cipher_meta<'a>(&'a self, data: &'a [u8]) -> (&'a [u8], Vec) { + let length = data.len(); + assert_eq!(length, self.cipher_algo.key_length()); + let iv = vec![0u8; AES_XTS_IV_LENGTH]; + if self.convergent_encryption { + if length == AES_128_XTS_KEY_LENGTH && data[0..length >> 1] == data[length >> 1..length] + { + (&DEFAULT_CE_KEY, iv) + } else if length == AES_256_XTS_KEY_LENGTH + && data[0..length >> 1] == data[length >> 1..length] + { + (&DEFAULT_CE_KEY_64, iv) + } else { + (data, iv) + } + } else { + (&self.key, iv) + } + } + + /// Get context information for meta data encryption/decryption. + pub fn get_cipher_meta(&self) -> (&[u8], &[u8]) { + (&self.key, &self.iv) + } +} + +/// A customized buf allocator that avoids zeroing +fn alloc_buf(size: usize) -> Vec { + assert!(size < isize::MAX as usize); + let layout = Layout::from_size_align(size, 0x1000) + .unwrap() + .pad_to_align(); + let ptr = unsafe { alloc(layout) }; + unsafe { Vec::from_raw_parts(ptr, size, layout.size()) } +} + +// Encrypt data with Cipher and CipherContext. +pub fn encrypt_with_context<'a>( + data: &'a [u8], + cipher_obj: &Arc, + cipher_ctx: &Option, + encrypted: bool, +) -> Result, Error> { + if encrypted { + if let Some(cipher_ctx) = cipher_ctx { + let (key, iv) = cipher_ctx.get_cipher_meta(); + Ok(cipher_obj.encrypt(key, Some(iv), data)?) + } else { + Err(einval!("the encrypt context can not be none")) + } + } else { + Ok(Cow::Borrowed(data)) + } +} + +// Decrypt data with Cipher and CipherContext. +pub fn decrypt_with_context<'a>( + data: &'a [u8], + cipher_obj: &Arc, + cipher_ctx: &Option, + encrypted: bool, +) -> Result, Error> { + if encrypted { + if let Some(cipher_ctx) = cipher_ctx { + let (key, iv) = cipher_ctx.get_cipher_meta(); + Ok(Cow::from(cipher_obj.decrypt(key, Some(iv), data)?)) + } else { + Err(einval!("the decrypt context can not be none")) + } + } else { + Ok(Cow::Borrowed(data)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_aes_128_xts_encrypt() { + let mut key = [0xcu8; 32]; + key[31] = 0xa; + + let cipher = Algorithm::Aes128Xts.new_cipher().unwrap(); + assert_eq!(cipher.encrypted_size(1), 16); + assert_eq!(cipher.encrypted_size(16), 16); + assert_eq!(cipher.encrypted_size(17), 17); + + let ciphertext1 = cipher + .encrypt(key.as_slice(), Some(&[0u8; 16]), b"1") + .unwrap(); + let ciphertext2 = cipher + .encrypt(key.as_slice(), Some(&[0u8; 16]), b"1") + .unwrap(); + assert_eq!(ciphertext1, ciphertext2); + assert_eq!(ciphertext2.len(), PADDING_LENGTH); + + let ciphertext3 = cipher + .encrypt(key.as_slice(), Some(&[0u8; 16]), b"11111111111111111") + .unwrap(); + assert_eq!(ciphertext3.len(), 17); + + let ciphertext4 = cipher + .encrypt(key.as_slice(), Some(&[1u8; 16]), b"11111111111111111") + .unwrap(); + assert_eq!(ciphertext4.len(), 17); + assert_ne!(ciphertext4, ciphertext3); + + let ciphertext5 = cipher + .encrypt(key.as_slice(), Some(&[1u8; 16]), b"21111111111111111") + .unwrap(); + assert_eq!(ciphertext5.len(), 17); + assert_ne!(ciphertext5, ciphertext4); + } + + #[test] + fn test_aes_256_xts_encrypt() { + let mut key = [0xcu8; 64]; + key[31] = 0xa; + + let cipher = Algorithm::Aes256Xts.new_cipher().unwrap(); + let ciphertext1 = cipher + .encrypt(key.as_slice(), Some(&[0u8; 16]), b"1") + .unwrap(); + let ciphertext2 = cipher + .encrypt(key.as_slice(), Some(&[0u8; 16]), b"1") + .unwrap(); + assert_eq!(ciphertext1, ciphertext2); + assert_eq!(ciphertext2.len(), PADDING_LENGTH); + + let ciphertext3 = cipher + .encrypt(key.as_slice(), Some(&[0u8; 16]), b"11111111111111111") + .unwrap(); + assert_eq!(ciphertext3.len(), 17); + + let ciphertext4 = cipher + .encrypt(key.as_slice(), Some(&[1u8; 16]), b"11111111111111111") + .unwrap(); + assert_eq!(ciphertext4.len(), 17); + assert_ne!(ciphertext4, ciphertext3); + + let ciphertext5 = cipher + .encrypt(key.as_slice(), Some(&[1u8; 16]), b"21111111111111111") + .unwrap(); + assert_eq!(ciphertext5.len(), 17); + assert_ne!(ciphertext5, ciphertext4); + } + + #[test] + fn test_aes_128_xts_decrypt() { + let mut key = [0xcu8; 32]; + key[31] = 0xa; + + let cipher = Algorithm::Aes128Xts.new_cipher().unwrap(); + let ciphertext1 = cipher + .encrypt(key.as_slice(), Some(&[0u8; 16]), b"1") + .unwrap(); + let plaintext1 = cipher + .decrypt(key.as_slice(), Some(&[0u8; 16]), &ciphertext1) + .unwrap(); + assert_eq!(&plaintext1, b"1"); + + let ciphertext2 = cipher + .encrypt(key.as_slice(), Some(&[0u8; 16]), b"11111111111111111") + .unwrap(); + let plaintext2 = cipher + .decrypt(key.as_slice(), Some(&[0u8; 16]), &ciphertext2) + .unwrap(); + assert_eq!(&plaintext2, b"11111111111111111"); + + let ciphertext3 = cipher + .encrypt(key.as_slice(), Some(&[1u8; 16]), b"11111111111111111") + .unwrap(); + let plaintext3 = cipher + .decrypt(key.as_slice(), Some(&[1u8; 16]), &ciphertext3) + .unwrap(); + assert_eq!(&plaintext3, b"11111111111111111"); + } + + #[test] + fn test_aes_256_xts_decrypt() { + let mut key = [0xcu8; 64]; + key[31] = 0xa; + + let cipher = Algorithm::Aes256Xts.new_cipher().unwrap(); + let ciphertext1 = cipher + .encrypt(key.as_slice(), Some(&[0u8; 16]), b"1") + .unwrap(); + let plaintext1 = cipher + .decrypt(key.as_slice(), Some(&[0u8; 16]), &ciphertext1) + .unwrap(); + assert_eq!(&plaintext1, b"1"); + + let ciphertext2 = cipher + .encrypt(key.as_slice(), Some(&[0u8; 16]), b"11111111111111111") + .unwrap(); + let plaintext2 = cipher + .decrypt(key.as_slice(), Some(&[0u8; 16]), &ciphertext2) + .unwrap(); + assert_eq!(&plaintext2, b"11111111111111111"); + + let ciphertext3 = cipher + .encrypt(key.as_slice(), Some(&[1u8; 16]), b"11111111111111111") + .unwrap(); + let plaintext3 = cipher + .decrypt(key.as_slice(), Some(&[1u8; 16]), &ciphertext3) + .unwrap(); + assert_eq!(&plaintext3, b"11111111111111111"); + } + + #[test] + fn test_aes_256_gcm() { + let key = [0xcu8; 32]; + let mut tag = vec![0u8; 12]; + + let cipher = Algorithm::Aes256Gcm.new_cipher().unwrap(); + assert_eq!(cipher.tag_size(), 12); + assert_eq!(cipher.encrypted_size(1), 13); + + let ciphertext1 = cipher + .encrypt_aead(key.as_slice(), Some(&[0u8; 16]), b"1", &mut tag) + .unwrap(); + assert_eq!(ciphertext1.len(), 1); + assert_eq!(tag.len(), 12); + let plaintext1 = cipher + .decrypt_aead(key.as_slice(), Some(&[0u8; 16]), &ciphertext1, &tag) + .unwrap(); + assert_eq!(&plaintext1, b"1"); + + let ciphertext2 = cipher + .encrypt_aead( + key.as_slice(), + Some(&[0u8; 16]), + b"11111111111111111", + &mut tag, + ) + .unwrap(); + assert_eq!(ciphertext2.len(), 17); + assert_eq!(tag.len(), 12); + let plaintext2 = cipher + .decrypt_aead(key.as_slice(), Some(&[0u8; 16]), &ciphertext2, &tag) + .unwrap(); + assert_eq!(&plaintext2, b"11111111111111111"); + + let ciphertext3 = cipher + .encrypt_aead( + key.as_slice(), + Some(&[1u8; 16]), + b"11111111111111111", + &mut tag, + ) + .unwrap(); + assert_ne!(ciphertext3, ciphertext2); + assert_eq!(ciphertext3.len(), 17); + assert_eq!(tag.len(), 12); + let plaintext3 = cipher + .decrypt_aead(key.as_slice(), Some(&[1u8; 16]), &ciphertext3, &tag) + .unwrap(); + assert_eq!(&plaintext3, b"11111111111111111"); + } + + #[test] + fn test_tweak_key_for_xts() { + let buf = vec![0x0; 32]; + let buf2 = Cipher::tweak_key_for_xts(&buf); + assert_eq!(buf2[0], 0xa5); + assert_eq!(buf2[16], 0x0); + + let buf = vec![0xa5; 32]; + let buf2 = Cipher::tweak_key_for_xts(&buf); + assert_eq!(buf2[0], 0x5a); + assert_eq!(buf2[16], 0xa5); + } + + #[test] + fn test_attribute() { + let none = Algorithm::None.new_cipher().unwrap(); + let aes128xts = Algorithm::Aes128Xts.new_cipher().unwrap(); + let aes256xts = Algorithm::Aes256Xts.new_cipher().unwrap(); + let aes256gcm = Algorithm::Aes256Gcm.new_cipher().unwrap(); + + assert!(!Algorithm::None.is_encryption_enabled()); + assert!(Algorithm::Aes128Xts.is_encryption_enabled()); + assert!(Algorithm::Aes256Xts.is_encryption_enabled()); + assert!(Algorithm::Aes256Gcm.is_encryption_enabled()); + + assert!(!Algorithm::None.is_aead()); + assert!(!Algorithm::Aes128Xts.is_aead()); + assert!(!Algorithm::Aes256Xts.is_aead()); + assert!(Algorithm::Aes256Gcm.is_aead()); + + assert_eq!(Algorithm::None.tag_size(), 0); + assert_eq!(Algorithm::Aes128Xts.tag_size(), 0); + assert_eq!(Algorithm::Aes256Xts.tag_size(), 0); + assert_eq!(Algorithm::Aes256Gcm.tag_size(), 12); + + assert_eq!(Algorithm::None.key_length(), 0); + assert_eq!(Algorithm::Aes128Xts.key_length(), AES_128_XTS_KEY_LENGTH); + assert_eq!(Algorithm::Aes256Xts.key_length(), AES_256_XTS_KEY_LENGTH); + assert_eq!(Algorithm::Aes256Gcm.key_length(), AES_256_GCM_KEY_LENGTH); + + print!("{}", Algorithm::Aes128Xts); + assert!(Algorithm::from_str("none").is_ok()); + assert!(Algorithm::from_str("aes128xts").is_ok()); + assert!(Algorithm::from_str("aes256xts").is_ok()); + assert!(Algorithm::from_str("aes256gcm").is_ok()); + assert!(Algorithm::from_str("non-exist").is_err()); + + assert!(Algorithm::try_from(Algorithm::None as u32).is_ok()); + assert!(Algorithm::try_from(Algorithm::Aes128Xts as u32).is_ok()); + assert!(Algorithm::try_from(Algorithm::Aes256Xts as u32).is_ok()); + assert!(Algorithm::try_from(Algorithm::Aes256Gcm as u32).is_ok()); + assert!(Algorithm::try_from(u32::MAX).is_err()); + + assert!(Algorithm::try_from(Algorithm::None as u64).is_ok()); + assert!(Algorithm::try_from(Algorithm::Aes128Xts as u64).is_ok()); + assert!(Algorithm::try_from(Algorithm::Aes256Xts as u64).is_ok()); + assert!(Algorithm::try_from(Algorithm::Aes256Gcm as u64).is_ok()); + assert!(Algorithm::try_from(u64::MAX).is_err()); + + println!("{:?},{:?},{:?},{:?}", none, aes128xts, aes256xts, aes256gcm); + } + + #[test] + fn test_crypt_with_context() { + let error_key = [0xcu8, 64]; + let symmetry_key = [0xcu8, 32]; + let mut key = [0xcu8; 32]; + key[31] = 0xa; + let iv = [0u8; 16]; + let data = b"11111111111111111"; + // create with mismatch key length and algo + assert!( + CipherContext::new(error_key.to_vec(), iv.to_vec(), true, Algorithm::Aes128Xts) + .is_err() + ); + // create with symmetry key + assert!(CipherContext::new( + symmetry_key.to_vec(), + iv.to_vec(), + true, + Algorithm::Aes128Xts + ) + .is_err()); + + // test context is none + let ctx = + CipherContext::new(key.to_vec(), iv.to_vec(), false, Algorithm::Aes128Xts).unwrap(); + let obj = Arc::new(Algorithm::Aes128Xts.new_cipher().unwrap()); + assert!(encrypt_with_context(data, &obj, &None, true).is_err()); + assert!(decrypt_with_context(b"somedata", &obj, &None, true).is_err()); + + // test encrypted is false + let no_change = encrypt_with_context(data, &obj, &Some(ctx.clone()), false).unwrap(); + assert_eq!(no_change.clone().into_owned(), data); + let bind = no_change.into_owned(); + let plain_text_no_change = + decrypt_with_context(&bind, &obj, &Some(ctx.clone()), false).unwrap(); + assert_eq!(plain_text_no_change.into_owned(), data); + + // test normal encrypt and decrypt + let encrypt_text = encrypt_with_context(data, &obj, &Some(ctx.clone()), true).unwrap(); + let bind = encrypt_text.into_owned(); + let plain_text = decrypt_with_context(&bind, &obj, &Some(ctx), true).unwrap(); + assert_eq!(&plain_text.into_owned(), data); + } + + fn test_gen_key(convergent_encryption: bool) { + let mut key = [0xcu8; 32]; + key[31] = 0xa; + let iv = [0u8; 16]; + let data = b"11111111111111111"; + let ctx = CipherContext::new( + key.to_vec(), + iv.to_vec(), + convergent_encryption, + Algorithm::Aes128Xts, + ) + .unwrap(); + let obj = Arc::new(Algorithm::Aes128Xts.new_cipher().unwrap()); + let (gen_key, gen_iv) = ctx.generate_cipher_meta(&key); + let ciphertext = obj.encrypt(gen_key, Some(&gen_iv), data).unwrap(); + let plaintext = obj.decrypt(gen_key, Some(&gen_iv), &ciphertext).unwrap(); + assert_eq!(&plaintext, data); + } + + #[test] + fn test_generate_cipher_meta() { + test_gen_key(true); + test_gen_key(false); + } +} diff --git a/utils/src/digest.rs b/utils/src/digest.rs index 12e74486f3b..6d180c4223b 100644 --- a/utils/src/digest.rs +++ b/utils/src/digest.rs @@ -1,338 +1,338 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2020-2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Fast message digest algorithms for Rafs and Nydus, including Blake3 and SHA256. - -use std::convert::TryFrom; -use std::fmt; -use std::io::{Error, Read}; -use std::str::FromStr; - -use sha2::digest::Digest; -use sha2::Sha256; - -/// Size in bytes of chunk digest value. -pub const RAFS_DIGEST_LENGTH: usize = 32; - -/// Type alias for digest data. -pub type DigestData = [u8; RAFS_DIGEST_LENGTH]; - -#[repr(u32)] -#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)] -pub enum Algorithm { - #[default] - Blake3 = 0, - Sha256 = 1, -} - -impl fmt::Display for Algorithm { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{:?}", self) - } -} - -impl FromStr for Algorithm { - type Err = Error; - - fn from_str(s: &str) -> Result { - match s { - "blake3" => Ok(Self::Blake3), - "sha256" => Ok(Self::Sha256), - _ => Err(einval!("digest algorithm should be blake3 or sha256")), - } - } -} - -impl TryFrom for Algorithm { - type Error = (); - - fn try_from(value: u32) -> Result { - if value == Algorithm::Sha256 as u32 { - Ok(Algorithm::Sha256) - } else if value == Algorithm::Blake3 as u32 { - Ok(Algorithm::Blake3) - } else { - Err(()) - } - } -} - -impl TryFrom for Algorithm { - type Error = (); - - fn try_from(value: u64) -> Result { - if value == Algorithm::Sha256 as u64 { - Ok(Algorithm::Sha256) - } else if value == Algorithm::Blake3 as u64 { - Ok(Algorithm::Blake3) - } else { - Err(()) - } - } -} - -pub trait DigestHasher { - fn digest_update(&mut self, buf: &[u8]); - fn digest_finalize(self) -> RafsDigest; -} - -/// Fast message digest algorithm. -/// -/// The size of Hasher struct is a little big, say -/// blake3::Hasher: 1912 bytes -/// Sha256: 112 bytes -/// RafsDigestHasher: 1920 -/// -/// So we should avoid any unnecessary clone() operation. Add we prefer allocation on stack -/// instead of allocation on heap. -/// -/// If allocating memory for blake3::Hasher is preferred over using the stack, please try: -/// Blake3(Box). But be careful, this will cause one extra memory allocation/free -/// for each digest. -#[derive(Clone, Debug)] -pub enum RafsDigestHasher { - Blake3(Box), - Sha256(Sha256), -} - -impl DigestHasher for RafsDigestHasher { - fn digest_update(&mut self, buf: &[u8]) { - match self { - RafsDigestHasher::Blake3(hasher) => { - hasher.update(buf); - } - RafsDigestHasher::Sha256(hasher) => { - hasher.update(buf); - } - } - } - - fn digest_finalize(self) -> RafsDigest { - let data = match self { - RafsDigestHasher::Blake3(hasher) => hasher.finalize().into(), - RafsDigestHasher::Sha256(hasher) => hasher.finalize().into(), - }; - - RafsDigest { data } - } -} - -impl DigestHasher for blake3::Hasher { - fn digest_update(&mut self, buf: &[u8]) { - self.update(buf); - } - - fn digest_finalize(self) -> RafsDigest { - RafsDigest { - data: self.finalize().into(), - } - } -} - -impl DigestHasher for Sha256 { - fn digest_update(&mut self, buf: &[u8]) { - self.update(buf); - } - - fn digest_finalize(self) -> RafsDigest { - RafsDigest { - data: self.finalize().into(), - } - } -} - -#[repr(C)] -#[derive(Clone, Copy, Hash, PartialEq, Eq, Debug, Default, Ord, PartialOrd)] -pub struct RafsDigest { - pub data: DigestData, -} - -impl RafsDigest { - pub fn from_buf(buf: &[u8], algorithm: Algorithm) -> Self { - let data: DigestData = match algorithm { - Algorithm::Blake3 => blake3::hash(buf).into(), - Algorithm::Sha256 => { - let mut hasher = Sha256::new(); - hasher.update(buf); - hasher.finalize().into() - } - }; - - RafsDigest { data } - } - - /// Compute message digest with the given algorithm by read data from the reader. - pub fn from_reader(reader: &mut R, algorithm: Algorithm) -> std::io::Result { - let mut digester = Self::hasher(algorithm); - let mut buf = vec![0u8; 16384]; - loop { - let sz = reader.read(&mut buf)?; - if sz == 0 { - return Ok(digester.digest_finalize()); - } - digester.digest_update(&buf[..sz]); - } - } - - /// According to the format of sha256. - pub fn from_string(input: &str) -> Self { - let mut digest = RafsDigest::default(); - - for (i, byte) in input.as_bytes().chunks(2).enumerate() { - let hex_str = std::str::from_utf8(byte).unwrap(); - digest.data[i] = u8::from_str_radix(hex_str, 16).unwrap(); - } - - digest - } - - pub fn hasher(algorithm: Algorithm) -> RafsDigestHasher { - match algorithm { - Algorithm::Blake3 => RafsDigestHasher::Blake3(Box::new(blake3::Hasher::new())), - Algorithm::Sha256 => RafsDigestHasher::Sha256(Sha256::new()), - } - } -} - -impl From for RafsDigest { - fn from(data: DigestData) -> Self { - Self { data } - } -} - -impl From<&DigestData> for &RafsDigest { - fn from(data: &DigestData) -> Self { - unsafe { &*(data as *const DigestData as *const u8 as *const RafsDigest) } - } -} - -impl AsRef<[u8]> for RafsDigest { - fn as_ref(&self) -> &[u8] { - &self.data - } -} - -impl fmt::Display for RafsDigest { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - for c in &self.data { - write!(f, "{:02x}", c).unwrap() - } - Ok(()) - } -} - -impl From for String { - fn from(d: RafsDigest) -> Self { - format!("{}", d) - } -} - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn test_algorithm() { - assert_eq!(Algorithm::from_str("blake3").unwrap(), Algorithm::Blake3); - assert_eq!(Algorithm::from_str("sha256").unwrap(), Algorithm::Sha256); - Algorithm::from_str("Blake3").unwrap_err(); - Algorithm::from_str("SHA256").unwrap_err(); - } - - #[test] - fn test_hash_from_buf() { - let text = b"The quick brown fox jumps over the lazy dog"; - - let blake3 = RafsDigest::from_buf(text, Algorithm::Blake3); - let str: String = blake3.into(); - assert_eq!( - str.as_bytes(), - b"2f1514181aadccd913abd94cfa592701a5686ab23f8df1dff1b74710febc6d4a" - ); - - let sha256 = RafsDigest::from_buf(text, Algorithm::Sha256); - let str: String = sha256.into(); - assert_eq!( - str.as_bytes(), - b"d7a8fbb307d7809469ca9abcb0082e4f8d5651e46d3cdb762d02d0bf37c9e592" - ); - } - - #[test] - fn test_hasher() { - let text = b"The quick brown fox jumps "; - let text2 = b"over the lazy dog"; - - let mut hasher = RafsDigest::hasher(Algorithm::Blake3); - hasher.digest_update(text); - hasher.digest_update(text2); - let blake3 = hasher.digest_finalize(); - let str: String = blake3.into(); - assert_eq!( - str.as_bytes(), - b"2f1514181aadccd913abd94cfa592701a5686ab23f8df1dff1b74710febc6d4a" - ); - - let mut hasher = RafsDigest::hasher(Algorithm::Sha256); - hasher.digest_update(text); - hasher.digest_update(text2); - let sha256 = hasher.digest_finalize(); - let str: String = sha256.into(); - assert_eq!( - str.as_bytes(), - b"d7a8fbb307d7809469ca9abcb0082e4f8d5651e46d3cdb762d02d0bf37c9e592" - ); - } - - #[test] - fn test_try_from() { - assert!(Algorithm::try_from(Algorithm::Sha256 as u32).is_ok()); - assert!(Algorithm::try_from(Algorithm::Blake3 as u32).is_ok()); - assert!(Algorithm::try_from(0xffff_abcd as u32).is_err()); - - assert!(Algorithm::try_from(Algorithm::Sha256 as u64).is_ok()); - assert!(Algorithm::try_from(Algorithm::Blake3 as u64).is_ok()); - assert!(Algorithm::try_from(0xffff_abcd as u64).is_err()); - } - - #[test] - fn test_spec_hasher_new() { - let text = b"The quick brown fox jumps "; - let text2 = b"over the lazy dog"; - - let mut hasher: blake3::Hasher = blake3::Hasher::new(); - hasher.digest_update(text); - hasher.digest_update(text2); - let blake3 = hasher.digest_finalize(); - let str: String = blake3.into(); - assert_eq!( - str.as_bytes(), - b"2f1514181aadccd913abd94cfa592701a5686ab23f8df1dff1b74710febc6d4a" - ); - - let mut hasher = RafsDigestHasher::Sha256(Sha256::new()); - hasher.digest_update(text); - hasher.digest_update(text2); - let sha256 = hasher.digest_finalize(); - let str: String = sha256.into(); - assert_eq!( - str.as_bytes(), - b"d7a8fbb307d7809469ca9abcb0082e4f8d5651e46d3cdb762d02d0bf37c9e592" - ); - } - - #[test] - fn test_rafs_digest_try_from() { - let text = b"The quick brown fox jumps over the lazy dog"; - - let d1 = RafsDigest::from_buf(text, Algorithm::Blake3); - let d2 = RafsDigest::try_from(d1.data).unwrap(); - let s1: String = d1.into(); - let s2: String = d2.into(); - print!("{:?}", d1); - assert_eq!(s1, s2); - print!("{:?}, {:?}", Algorithm::Blake3, Algorithm::Sha256); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2020-2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Fast message digest algorithms for Rafs and Nydus, including Blake3 and SHA256. + +use std::convert::TryFrom; +use std::fmt; +use std::io::{Error, Read}; +use std::str::FromStr; + +use sha2::digest::Digest; +use sha2::Sha256; + +/// Size in bytes of chunk digest value. +pub const RAFS_DIGEST_LENGTH: usize = 32; + +/// Type alias for digest data. +pub type DigestData = [u8; RAFS_DIGEST_LENGTH]; + +#[repr(u32)] +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)] +pub enum Algorithm { + #[default] + Blake3 = 0, + Sha256 = 1, +} + +impl fmt::Display for Algorithm { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{:?}", self) + } +} + +impl FromStr for Algorithm { + type Err = Error; + + fn from_str(s: &str) -> Result { + match s { + "blake3" => Ok(Self::Blake3), + "sha256" => Ok(Self::Sha256), + _ => Err(einval!("digest algorithm should be blake3 or sha256")), + } + } +} + +impl TryFrom for Algorithm { + type Error = (); + + fn try_from(value: u32) -> Result { + if value == Algorithm::Sha256 as u32 { + Ok(Algorithm::Sha256) + } else if value == Algorithm::Blake3 as u32 { + Ok(Algorithm::Blake3) + } else { + Err(()) + } + } +} + +impl TryFrom for Algorithm { + type Error = (); + + fn try_from(value: u64) -> Result { + if value == Algorithm::Sha256 as u64 { + Ok(Algorithm::Sha256) + } else if value == Algorithm::Blake3 as u64 { + Ok(Algorithm::Blake3) + } else { + Err(()) + } + } +} + +pub trait DigestHasher { + fn digest_update(&mut self, buf: &[u8]); + fn digest_finalize(self) -> RafsDigest; +} + +/// Fast message digest algorithm. +/// +/// The size of Hasher struct is a little big, say +/// blake3::Hasher: 1912 bytes +/// Sha256: 112 bytes +/// RafsDigestHasher: 1920 +/// +/// So we should avoid any unnecessary clone() operation. Add we prefer allocation on stack +/// instead of allocation on heap. +/// +/// If allocating memory for blake3::Hasher is preferred over using the stack, please try: +/// Blake3(Box). But be careful, this will cause one extra memory allocation/free +/// for each digest. +#[derive(Clone, Debug)] +pub enum RafsDigestHasher { + Blake3(Box), + Sha256(Sha256), +} + +impl DigestHasher for RafsDigestHasher { + fn digest_update(&mut self, buf: &[u8]) { + match self { + RafsDigestHasher::Blake3(hasher) => { + hasher.update(buf); + } + RafsDigestHasher::Sha256(hasher) => { + hasher.update(buf); + } + } + } + + fn digest_finalize(self) -> RafsDigest { + let data = match self { + RafsDigestHasher::Blake3(hasher) => hasher.finalize().into(), + RafsDigestHasher::Sha256(hasher) => hasher.finalize().into(), + }; + + RafsDigest { data } + } +} + +impl DigestHasher for blake3::Hasher { + fn digest_update(&mut self, buf: &[u8]) { + self.update(buf); + } + + fn digest_finalize(self) -> RafsDigest { + RafsDigest { + data: self.finalize().into(), + } + } +} + +impl DigestHasher for Sha256 { + fn digest_update(&mut self, buf: &[u8]) { + self.update(buf); + } + + fn digest_finalize(self) -> RafsDigest { + RafsDigest { + data: self.finalize().into(), + } + } +} + +#[repr(C)] +#[derive(Clone, Copy, Hash, PartialEq, Eq, Debug, Default, Ord, PartialOrd)] +pub struct RafsDigest { + pub data: DigestData, +} + +impl RafsDigest { + pub fn from_buf(buf: &[u8], algorithm: Algorithm) -> Self { + let data: DigestData = match algorithm { + Algorithm::Blake3 => blake3::hash(buf).into(), + Algorithm::Sha256 => { + let mut hasher = Sha256::new(); + hasher.update(buf); + hasher.finalize().into() + } + }; + + RafsDigest { data } + } + + /// Compute message digest with the given algorithm by read data from the reader. + pub fn from_reader(reader: &mut R, algorithm: Algorithm) -> std::io::Result { + let mut digester = Self::hasher(algorithm); + let mut buf = vec![0u8; 16384]; + loop { + let sz = reader.read(&mut buf)?; + if sz == 0 { + return Ok(digester.digest_finalize()); + } + digester.digest_update(&buf[..sz]); + } + } + + /// According to the format of sha256. + pub fn from_string(input: &str) -> Self { + let mut digest = RafsDigest::default(); + + for (i, byte) in input.as_bytes().chunks(2).enumerate() { + let hex_str = std::str::from_utf8(byte).unwrap(); + digest.data[i] = u8::from_str_radix(hex_str, 16).unwrap(); + } + + digest + } + + pub fn hasher(algorithm: Algorithm) -> RafsDigestHasher { + match algorithm { + Algorithm::Blake3 => RafsDigestHasher::Blake3(Box::new(blake3::Hasher::new())), + Algorithm::Sha256 => RafsDigestHasher::Sha256(Sha256::new()), + } + } +} + +impl From for RafsDigest { + fn from(data: DigestData) -> Self { + Self { data } + } +} + +impl From<&DigestData> for &RafsDigest { + fn from(data: &DigestData) -> Self { + unsafe { &*(data as *const DigestData as *const u8 as *const RafsDigest) } + } +} + +impl AsRef<[u8]> for RafsDigest { + fn as_ref(&self) -> &[u8] { + &self.data + } +} + +impl fmt::Display for RafsDigest { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for c in &self.data { + write!(f, "{:02x}", c).unwrap() + } + Ok(()) + } +} + +impl From for String { + fn from(d: RafsDigest) -> Self { + format!("{}", d) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_algorithm() { + assert_eq!(Algorithm::from_str("blake3").unwrap(), Algorithm::Blake3); + assert_eq!(Algorithm::from_str("sha256").unwrap(), Algorithm::Sha256); + Algorithm::from_str("Blake3").unwrap_err(); + Algorithm::from_str("SHA256").unwrap_err(); + } + + #[test] + fn test_hash_from_buf() { + let text = b"The quick brown fox jumps over the lazy dog"; + + let blake3 = RafsDigest::from_buf(text, Algorithm::Blake3); + let str: String = blake3.into(); + assert_eq!( + str.as_bytes(), + b"2f1514181aadccd913abd94cfa592701a5686ab23f8df1dff1b74710febc6d4a" + ); + + let sha256 = RafsDigest::from_buf(text, Algorithm::Sha256); + let str: String = sha256.into(); + assert_eq!( + str.as_bytes(), + b"d7a8fbb307d7809469ca9abcb0082e4f8d5651e46d3cdb762d02d0bf37c9e592" + ); + } + + #[test] + fn test_hasher() { + let text = b"The quick brown fox jumps "; + let text2 = b"over the lazy dog"; + + let mut hasher = RafsDigest::hasher(Algorithm::Blake3); + hasher.digest_update(text); + hasher.digest_update(text2); + let blake3 = hasher.digest_finalize(); + let str: String = blake3.into(); + assert_eq!( + str.as_bytes(), + b"2f1514181aadccd913abd94cfa592701a5686ab23f8df1dff1b74710febc6d4a" + ); + + let mut hasher = RafsDigest::hasher(Algorithm::Sha256); + hasher.digest_update(text); + hasher.digest_update(text2); + let sha256 = hasher.digest_finalize(); + let str: String = sha256.into(); + assert_eq!( + str.as_bytes(), + b"d7a8fbb307d7809469ca9abcb0082e4f8d5651e46d3cdb762d02d0bf37c9e592" + ); + } + + #[test] + fn test_try_from() { + assert!(Algorithm::try_from(Algorithm::Sha256 as u32).is_ok()); + assert!(Algorithm::try_from(Algorithm::Blake3 as u32).is_ok()); + assert!(Algorithm::try_from(0xffff_abcd as u32).is_err()); + + assert!(Algorithm::try_from(Algorithm::Sha256 as u64).is_ok()); + assert!(Algorithm::try_from(Algorithm::Blake3 as u64).is_ok()); + assert!(Algorithm::try_from(0xffff_abcd as u64).is_err()); + } + + #[test] + fn test_spec_hasher_new() { + let text = b"The quick brown fox jumps "; + let text2 = b"over the lazy dog"; + + let mut hasher: blake3::Hasher = blake3::Hasher::new(); + hasher.digest_update(text); + hasher.digest_update(text2); + let blake3 = hasher.digest_finalize(); + let str: String = blake3.into(); + assert_eq!( + str.as_bytes(), + b"2f1514181aadccd913abd94cfa592701a5686ab23f8df1dff1b74710febc6d4a" + ); + + let mut hasher = RafsDigestHasher::Sha256(Sha256::new()); + hasher.digest_update(text); + hasher.digest_update(text2); + let sha256 = hasher.digest_finalize(); + let str: String = sha256.into(); + assert_eq!( + str.as_bytes(), + b"d7a8fbb307d7809469ca9abcb0082e4f8d5651e46d3cdb762d02d0bf37c9e592" + ); + } + + #[test] + fn test_rafs_digest_try_from() { + let text = b"The quick brown fox jumps over the lazy dog"; + + let d1 = RafsDigest::from_buf(text, Algorithm::Blake3); + let d2 = RafsDigest::try_from(d1.data).unwrap(); + let s1: String = d1.into(); + let s2: String = d2.into(); + print!("{:?}", d1); + assert_eq!(s1, s2); + print!("{:?}, {:?}", Algorithm::Blake3, Algorithm::Sha256); + } +} diff --git a/utils/src/exec.rs b/utils/src/exec.rs index 14f775044bc..0a22017337f 100644 --- a/utils/src/exec.rs +++ b/utils/src/exec.rs @@ -1,68 +1,68 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::io::{Result, Write}; -use std::process::{Command, Stdio}; - -pub fn exec(cmd: &str, output: bool, input: &[u8]) -> Result { - debug!("exec `{}`", cmd); - let has_input = !input.is_empty(); - let mut basic_cmd = Command::new("sh"); - let mut cmd_object = basic_cmd.arg("-c").arg(cmd).env("RUST_BACKTRACE", "1"); - - if has_input { - cmd_object = cmd_object.stdin(Stdio::piped()); - } else { - cmd_object = cmd_object.stdin(Stdio::null()); - } - - if output { - cmd_object = cmd_object.stdout(Stdio::piped()).stderr(Stdio::piped()); - } else { - cmd_object = cmd_object.stdout(Stdio::inherit()).stderr(Stdio::inherit()) - } - let mut child = cmd_object.spawn()?; - - if has_input { - let mut input_stream = child.stdin.take().unwrap(); - input_stream.write_all(input)?; - drop(input_stream); - } - - if output { - let output = child.wait_with_output()?; - if !output.status.success() { - return Err(eother!("exit with non-zero status")); - } - let stdout = std::str::from_utf8(&output.stdout).map_err(|e| einval!(e))?; - return Ok(stdout.to_string()); - } - - let status = child.wait()?; - if !status.success() { - return Err(eother!("exit with non-zero status")); - } - - Ok(String::from("")) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_exec() { - let val = exec("echo hello", true, b"").unwrap(); - assert_eq!(val, "hello\n"); - - let val = exec("echo hello", false, b"").unwrap(); - assert_eq!(val, ""); - - let val = exec("cat -", true, b"test").unwrap(); - assert_eq!(val, "test"); - - let val = exec("cat -", false, b"test").unwrap(); - assert_eq!(val, ""); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::io::{Result, Write}; +use std::process::{Command, Stdio}; + +pub fn exec(cmd: &str, output: bool, input: &[u8]) -> Result { + debug!("exec `{}`", cmd); + let has_input = !input.is_empty(); + let mut basic_cmd = Command::new("sh"); + let mut cmd_object = basic_cmd.arg("-c").arg(cmd).env("RUST_BACKTRACE", "1"); + + if has_input { + cmd_object = cmd_object.stdin(Stdio::piped()); + } else { + cmd_object = cmd_object.stdin(Stdio::null()); + } + + if output { + cmd_object = cmd_object.stdout(Stdio::piped()).stderr(Stdio::piped()); + } else { + cmd_object = cmd_object.stdout(Stdio::inherit()).stderr(Stdio::inherit()) + } + let mut child = cmd_object.spawn()?; + + if has_input { + let mut input_stream = child.stdin.take().unwrap(); + input_stream.write_all(input)?; + drop(input_stream); + } + + if output { + let output = child.wait_with_output()?; + if !output.status.success() { + return Err(eother!("exit with non-zero status")); + } + let stdout = std::str::from_utf8(&output.stdout).map_err(|e| einval!(e))?; + return Ok(stdout.to_string()); + } + + let status = child.wait()?; + if !status.success() { + return Err(eother!("exit with non-zero status")); + } + + Ok(String::from("")) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_exec() { + let val = exec("echo hello", true, b"").unwrap(); + assert_eq!(val, "hello\n"); + + let val = exec("echo hello", false, b"").unwrap(); + assert_eq!(val, ""); + + let val = exec("cat -", true, b"test").unwrap(); + assert_eq!(val, "test"); + + let val = exec("cat -", false, b"test").unwrap(); + assert_eq!(val, ""); + } +} diff --git a/utils/src/filemap.rs b/utils/src/filemap.rs index efa935d13c3..c0216656ead 100644 --- a/utils/src/filemap.rs +++ b/utils/src/filemap.rs @@ -1,278 +1,278 @@ -// Copyright (C) 2022 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::fs::File; -use std::io::Result; -use std::mem::size_of; -use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, RawFd}; - -/// Struct to manage memory range mapped from file objects. -/// -/// It maps a region from a file into current process by using libc::mmap(). -/// Then it provides safe interfaces to access the memory mapped region. -pub struct FileMapState { - base: *const u8, - end: *const u8, - size: usize, - fd: RawFd, -} - -// Safe to Send/Sync because the underlying data structures are readonly -unsafe impl Send for FileMapState {} -unsafe impl Sync for FileMapState {} - -impl Default for FileMapState { - fn default() -> Self { - FileMapState { - fd: -1, - base: std::ptr::null(), - end: std::ptr::null(), - size: 0, - } - } -} - -impl Drop for FileMapState { - fn drop(&mut self) { - if !self.base.is_null() { - unsafe { libc::munmap(self.base as *mut u8 as *mut libc::c_void, self.size) }; - self.base = std::ptr::null(); - self.end = std::ptr::null(); - self.size = 0; - } - if self.fd >= 0 { - let _ = nix::unistd::close(self.fd); - self.fd = -1; - } - } -} - -impl FileMapState { - /// Memory map a region of the file object into current process. - /// - /// It takes ownership of the file object and will close it when the returned object is dropped. - pub fn new(file: File, offset: libc::off_t, size: usize, writable: bool) -> Result { - let prot = if writable { - libc::PROT_READ | libc::PROT_WRITE - } else { - libc::PROT_READ - }; - let base = unsafe { - libc::mmap( - std::ptr::null_mut(), - size, - prot, - libc::MAP_NORESERVE | libc::MAP_SHARED, - file.as_raw_fd(), - offset, - ) - } as *const u8; - if base as *mut core::ffi::c_void == libc::MAP_FAILED { - return Err(last_error!( - "failed to memory map file region into current process" - )); - } else if base.is_null() { - return Err(last_error!( - "failed to memory map file region into current process" - )); - } - // Safe because the mmap area should covered the range [start, end) - let end = unsafe { base.add(size) }; - - Ok(Self { - fd: file.into_raw_fd(), - base, - end, - size, - }) - } - - /// Get size of mapped region. - pub fn size(&self) -> usize { - self.size - } - - /// Cast a subregion of the mapped area to an object reference. - pub fn get_ref(&self, offset: usize) -> Result<&T> { - let start = self.base.wrapping_add(offset); - let end = start.wrapping_add(size_of::()); - - if start > end - || start < self.base - || end < self.base - || end > self.end - || start as usize & (std::mem::align_of::() - 1) != 0 - { - return Err(einval!("invalid mmap offset")); - } - - Ok(unsafe { &*(start as *const T) }) - } - - /// Cast a subregion of the mapped area to an mutable object reference. - pub fn get_mut(&mut self, offset: usize) -> Result<&mut T> { - let start = self.base.wrapping_add(offset); - let end = start.wrapping_add(size_of::()); - - if start > end - || start < self.base - || end < self.base - || end > self.end - || start as usize & (std::mem::align_of::() - 1) != 0 - { - return Err(einval!("invalid mmap offset")); - } - - Ok(unsafe { &mut *(start as *const T as *mut T) }) - } - - /// Get an immutable slice of 'T' at 'offset' with 'count' entries. - pub fn get_slice(&self, offset: usize, count: usize) -> Result<&[T]> { - let start = self.base.wrapping_add(offset); - if count.checked_mul(size_of::()).is_none() { - bail_einval!("count 0x{count:x} to validate_slice() is too big"); - } - let size = count * size_of::(); - if size.checked_add(start as usize).is_none() { - bail_einval!( - "invalid parameter to validate_slice(), offset 0x{offset:x}, count 0x{count:x}" - ); - } - let end = start.wrapping_add(size); - if start > end || start < self.base || end < self.base || end > self.end { - bail_einval!( - "invalid range in validate_slice, base 0x{:p}, start 0x{start:p}, end 0x{end:p}", - self.base - ); - } - Ok(unsafe { std::slice::from_raw_parts(start as *const T, count) }) - } - - /// Get a mutable slice of 'T' at 'offset' with 'count' entries. - pub fn get_slice_mut(&mut self, offset: usize, count: usize) -> Result<&mut [T]> { - let start = self.base.wrapping_add(offset); - if count.checked_mul(size_of::()).is_none() { - bail_einval!("count 0x{count:x} to validate_slice() is too big"); - } - let size = count * size_of::(); - if size.checked_add(start as usize).is_none() { - bail_einval!( - "invalid parameter to validate_slice(), offset 0x{offset:x}, count 0x{count:x}" - ); - } - let end = start.wrapping_add(size); - if start > end || start < self.base || end < self.base || end > self.end { - bail_einval!( - "invalid range in validate_slice, base 0x{:p}, start 0x{start:p}, end 0x{end:p}", - self.base - ); - } - Ok(unsafe { std::slice::from_raw_parts_mut(start as *mut T, count) }) - } - - /// Check whether the range [offset, offset + size) is valid and return the start address. - pub fn validate_range(&self, offset: usize, size: usize) -> Result<*const u8> { - let start = self.base.wrapping_add(offset); - let end = start.wrapping_add(size); - - if start > end || start < self.base || end < self.base || end > self.end { - return Err(einval!("invalid range")); - } - - Ok(start) - } - - /// Add `offset` to the base pointer. - /// - /// # Safety - /// The caller should ensure that `offset` is within range. - pub unsafe fn offset(&self, offset: usize) -> *const u8 { - self.base.wrapping_add(offset) - } - - /// Sync mapped file data into disk. - pub fn sync_data(&self) -> Result<()> { - let file = unsafe { File::from_raw_fd(self.fd) }; - let result = file.sync_data(); - std::mem::forget(file); - result - } -} - -/// Duplicate a file object by `libc::dup()`. -pub fn clone_file(fd: RawFd) -> Result { - unsafe { - let fd = libc::dup(fd); - if fd < 0 { - return Err(last_error!("failed to dup bootstrap file fd")); - } - Ok(File::from_raw_fd(fd)) - } -} - -#[cfg(test)] -mod tests { - use vmm_sys_util::tempfile::TempFile; - - use super::*; - use std::fs::OpenOptions; - use std::path::PathBuf; - - #[test] - fn create_file_map_object() { - let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); - let path = PathBuf::from(root_dir).join("../tests/texture/bootstrap/rafs-v5.boot"); - let file = OpenOptions::new() - .read(true) - .write(false) - .open(path) - .unwrap(); - let map = FileMapState::new(file, 0, 4096, false).unwrap(); - - let magic = map.get_ref::(0).unwrap(); - assert_eq!(u32::from_le(*magic), 0x52414653); - - map.get_ref::(4096).unwrap_err(); - let _ = map.get_ref::(4092).unwrap(); - let _ = map.get_ref::(0).unwrap(); - map.validate_range(4096, 1).unwrap_err(); - let _ = map.validate_range(4095, 1).unwrap(); - let _ = map.validate_range(0, 1).unwrap(); - drop(map); - } - - #[test] - fn create_default_file_map_object() { - let map = FileMapState::default(); - drop(map); - } - - #[test] - fn test_file_map_error() { - let temp = TempFile::new().unwrap(); - let file = OpenOptions::new() - .read(true) - .write(false) - .open(temp.as_path()) - .unwrap(); - assert!(FileMapState::new(file, 0, 4096, true).is_err()); - - let temp = TempFile::new().unwrap(); - let file = OpenOptions::new() - .read(true) - .write(false) - .open(temp.as_path()) - .unwrap(); - let mut map = FileMapState::new(file, 0, 4096, false).unwrap(); - assert!(map.get_slice::(0, usize::MAX).is_err()); - assert!(map.get_slice::(usize::MAX, 1).is_err()); - assert!(map.get_slice::(4096, 4096).is_err()); - assert!(map.get_slice::(0, 128).is_ok()); - - assert!(map.get_slice_mut::(0, usize::MAX).is_err()); - assert!(map.get_slice_mut::(usize::MAX, 1).is_err()); - assert!(map.get_slice_mut::(4096, 4096).is_err()); - assert!(map.get_slice_mut::(0, 128).is_ok()); - } -} +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::fs::File; +use std::io::Result; +use std::mem::size_of; +use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, RawFd}; + +/// Struct to manage memory range mapped from file objects. +/// +/// It maps a region from a file into current process by using libc::mmap(). +/// Then it provides safe interfaces to access the memory mapped region. +pub struct FileMapState { + base: *const u8, + end: *const u8, + size: usize, + fd: RawFd, +} + +// Safe to Send/Sync because the underlying data structures are readonly +unsafe impl Send for FileMapState {} +unsafe impl Sync for FileMapState {} + +impl Default for FileMapState { + fn default() -> Self { + FileMapState { + fd: -1, + base: std::ptr::null(), + end: std::ptr::null(), + size: 0, + } + } +} + +impl Drop for FileMapState { + fn drop(&mut self) { + if !self.base.is_null() { + unsafe { libc::munmap(self.base as *mut u8 as *mut libc::c_void, self.size) }; + self.base = std::ptr::null(); + self.end = std::ptr::null(); + self.size = 0; + } + if self.fd >= 0 { + let _ = nix::unistd::close(self.fd); + self.fd = -1; + } + } +} + +impl FileMapState { + /// Memory map a region of the file object into current process. + /// + /// It takes ownership of the file object and will close it when the returned object is dropped. + pub fn new(file: File, offset: libc::off_t, size: usize, writable: bool) -> Result { + let prot = if writable { + libc::PROT_READ | libc::PROT_WRITE + } else { + libc::PROT_READ + }; + let base = unsafe { + libc::mmap( + std::ptr::null_mut(), + size, + prot, + libc::MAP_NORESERVE | libc::MAP_SHARED, + file.as_raw_fd(), + offset, + ) + } as *const u8; + if base as *mut core::ffi::c_void == libc::MAP_FAILED { + return Err(last_error!( + "failed to memory map file region into current process" + )); + } else if base.is_null() { + return Err(last_error!( + "failed to memory map file region into current process" + )); + } + // Safe because the mmap area should covered the range [start, end) + let end = unsafe { base.add(size) }; + + Ok(Self { + fd: file.into_raw_fd(), + base, + end, + size, + }) + } + + /// Get size of mapped region. + pub fn size(&self) -> usize { + self.size + } + + /// Cast a subregion of the mapped area to an object reference. + pub fn get_ref(&self, offset: usize) -> Result<&T> { + let start = self.base.wrapping_add(offset); + let end = start.wrapping_add(size_of::()); + + if start > end + || start < self.base + || end < self.base + || end > self.end + || start as usize & (std::mem::align_of::() - 1) != 0 + { + return Err(einval!("invalid mmap offset")); + } + + Ok(unsafe { &*(start as *const T) }) + } + + /// Cast a subregion of the mapped area to an mutable object reference. + pub fn get_mut(&mut self, offset: usize) -> Result<&mut T> { + let start = self.base.wrapping_add(offset); + let end = start.wrapping_add(size_of::()); + + if start > end + || start < self.base + || end < self.base + || end > self.end + || start as usize & (std::mem::align_of::() - 1) != 0 + { + return Err(einval!("invalid mmap offset")); + } + + Ok(unsafe { &mut *(start as *const T as *mut T) }) + } + + /// Get an immutable slice of 'T' at 'offset' with 'count' entries. + pub fn get_slice(&self, offset: usize, count: usize) -> Result<&[T]> { + let start = self.base.wrapping_add(offset); + if count.checked_mul(size_of::()).is_none() { + bail_einval!("count 0x{count:x} to validate_slice() is too big"); + } + let size = count * size_of::(); + if size.checked_add(start as usize).is_none() { + bail_einval!( + "invalid parameter to validate_slice(), offset 0x{offset:x}, count 0x{count:x}" + ); + } + let end = start.wrapping_add(size); + if start > end || start < self.base || end < self.base || end > self.end { + bail_einval!( + "invalid range in validate_slice, base 0x{:p}, start 0x{start:p}, end 0x{end:p}", + self.base + ); + } + Ok(unsafe { std::slice::from_raw_parts(start as *const T, count) }) + } + + /// Get a mutable slice of 'T' at 'offset' with 'count' entries. + pub fn get_slice_mut(&mut self, offset: usize, count: usize) -> Result<&mut [T]> { + let start = self.base.wrapping_add(offset); + if count.checked_mul(size_of::()).is_none() { + bail_einval!("count 0x{count:x} to validate_slice() is too big"); + } + let size = count * size_of::(); + if size.checked_add(start as usize).is_none() { + bail_einval!( + "invalid parameter to validate_slice(), offset 0x{offset:x}, count 0x{count:x}" + ); + } + let end = start.wrapping_add(size); + if start > end || start < self.base || end < self.base || end > self.end { + bail_einval!( + "invalid range in validate_slice, base 0x{:p}, start 0x{start:p}, end 0x{end:p}", + self.base + ); + } + Ok(unsafe { std::slice::from_raw_parts_mut(start as *mut T, count) }) + } + + /// Check whether the range [offset, offset + size) is valid and return the start address. + pub fn validate_range(&self, offset: usize, size: usize) -> Result<*const u8> { + let start = self.base.wrapping_add(offset); + let end = start.wrapping_add(size); + + if start > end || start < self.base || end < self.base || end > self.end { + return Err(einval!("invalid range")); + } + + Ok(start) + } + + /// Add `offset` to the base pointer. + /// + /// # Safety + /// The caller should ensure that `offset` is within range. + pub unsafe fn offset(&self, offset: usize) -> *const u8 { + self.base.wrapping_add(offset) + } + + /// Sync mapped file data into disk. + pub fn sync_data(&self) -> Result<()> { + let file = unsafe { File::from_raw_fd(self.fd) }; + let result = file.sync_data(); + std::mem::forget(file); + result + } +} + +/// Duplicate a file object by `libc::dup()`. +pub fn clone_file(fd: RawFd) -> Result { + unsafe { + let fd = libc::dup(fd); + if fd < 0 { + return Err(last_error!("failed to dup bootstrap file fd")); + } + Ok(File::from_raw_fd(fd)) + } +} + +#[cfg(test)] +mod tests { + use vmm_sys_util::tempfile::TempFile; + + use super::*; + use std::fs::OpenOptions; + use std::path::PathBuf; + + #[test] + fn create_file_map_object() { + let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR"); + let path = PathBuf::from(root_dir).join("../tests/texture/bootstrap/rafs-v5.boot"); + let file = OpenOptions::new() + .read(true) + .write(false) + .open(path) + .unwrap(); + let map = FileMapState::new(file, 0, 4096, false).unwrap(); + + let magic = map.get_ref::(0).unwrap(); + assert_eq!(u32::from_le(*magic), 0x52414653); + + map.get_ref::(4096).unwrap_err(); + let _ = map.get_ref::(4092).unwrap(); + let _ = map.get_ref::(0).unwrap(); + map.validate_range(4096, 1).unwrap_err(); + let _ = map.validate_range(4095, 1).unwrap(); + let _ = map.validate_range(0, 1).unwrap(); + drop(map); + } + + #[test] + fn create_default_file_map_object() { + let map = FileMapState::default(); + drop(map); + } + + #[test] + fn test_file_map_error() { + let temp = TempFile::new().unwrap(); + let file = OpenOptions::new() + .read(true) + .write(false) + .open(temp.as_path()) + .unwrap(); + assert!(FileMapState::new(file, 0, 4096, true).is_err()); + + let temp = TempFile::new().unwrap(); + let file = OpenOptions::new() + .read(true) + .write(false) + .open(temp.as_path()) + .unwrap(); + let mut map = FileMapState::new(file, 0, 4096, false).unwrap(); + assert!(map.get_slice::(0, usize::MAX).is_err()); + assert!(map.get_slice::(usize::MAX, 1).is_err()); + assert!(map.get_slice::(4096, 4096).is_err()); + assert!(map.get_slice::(0, 128).is_ok()); + + assert!(map.get_slice_mut::(0, usize::MAX).is_err()); + assert!(map.get_slice_mut::(usize::MAX, 1).is_err()); + assert!(map.get_slice_mut::(4096, 4096).is_err()); + assert!(map.get_slice_mut::(0, 128).is_ok()); + } +} diff --git a/utils/src/inode_bitmap.rs b/utils/src/inode_bitmap.rs index 8ee283a72d4..f12712799ad 100644 --- a/utils/src/inode_bitmap.rs +++ b/utils/src/inode_bitmap.rs @@ -1,192 +1,192 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::collections::BTreeMap; -use std::fmt::{Debug, Display, Formatter}; -use std::sync::atomic::{AtomicU64, Ordering}; -use std::sync::RwLock; - -#[derive(Default)] -pub struct InodeBitmap { - map: RwLock>, -} - -impl Debug for InodeBitmap { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.write_str(self.to_string().as_str()) - } -} - -impl Display for InodeBitmap { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.write_str( - serde_json::json!({"inode_range": self.bitmap_to_array()}) - .to_string() - .as_str(), - ) - } -} - -impl InodeBitmap { - pub fn new() -> Self { - Self::default() - } - - #[inline(always)] - fn get_index_and_mask(ino: u64) -> (u64, u64) { - (ino >> 6, 1_u64 << (ino & 0x3f_u64)) - } - - #[inline(always)] - fn range_to_vec(start: u64, end: u64) -> Vec { - if start == end { - vec![start] - } else { - vec![start, end] - } - } - - pub fn set(&self, ino: u64) { - let (index, mask) = Self::get_index_and_mask(ino); - - let m = self.map.read().unwrap(); - if let Some(v) = m.get(&index) { - v.fetch_or(mask, Ordering::Relaxed); - return; - } - drop(m); - - let mut m = self.map.write().unwrap(); - m.entry(index) - .or_insert_with(|| AtomicU64::new(0)) - .fetch_or(mask, Ordering::Relaxed); - } - - pub fn is_set(&self, ino: u64) -> bool { - let (index, mask) = InodeBitmap::get_index_and_mask(ino); - self.map - .read() - .unwrap() - .get(&index) - .map_or(false, |v| v.load(Ordering::Relaxed) & mask != 0) - } - - pub fn clear(&self, ino: u64) { - let (index, mask) = InodeBitmap::get_index_and_mask(ino); - let m = self.map.read().unwrap(); - - if let Some(v) = m.get(&index) { - v.fetch_and(!mask, Ordering::Relaxed); - } - } - - pub fn clear_all(&self) { - let m = self.map.read().unwrap(); - - for it in m.values() { - it.store(0_u64, Ordering::Relaxed); - } - } - - /// "[[1,5],[8],[10],[100,199],...]" - fn bitmap_to_vec(&self, load: fn(&AtomicU64) -> u64) -> Vec> { - let m = self.map.read().unwrap(); - let mut ret: Vec> = Vec::new(); - let mut start: Option = None; - // 0 is an invalid inode number - let mut last: u64 = 0; - - for it in m.iter() { - let base = it.0 << 6; - let mut v = load(it.1); - - while v != 0 { - // trailing_zeros need rustup version >= 1.46 - let ino = base + v.trailing_zeros() as u64; - v &= v - 1; - start = match start { - None => Some(ino), - Some(s) => { - if ino != last + 1 { - ret.push(InodeBitmap::range_to_vec(s, last)); - Some(ino) - } else { - Some(s) - } - } - }; - last = ino; - } - } - if let Some(s) = start { - ret.push(InodeBitmap::range_to_vec(s, last)); - } - - ret - } - - pub fn bitmap_to_array(&self) -> Vec> { - self.bitmap_to_vec(|v| v.load(Ordering::Relaxed)) - } - - pub fn bitmap_to_array_and_clear(&self) -> Vec> { - self.bitmap_to_vec(|v| v.fetch_and(0_u64, Ordering::Relaxed)) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_inode_bitmap() { - let empty: Vec> = Vec::new(); - let m = InodeBitmap::new(); - m.set(1); - m.set(2); - m.set(5); - assert_eq!(m.bitmap_to_array(), [vec![1, 2], vec![5]]); - - assert!(m.is_set(2)); - m.clear(2); - assert!(!m.is_set(2)); - assert_eq!(m.bitmap_to_array(), [[1], [5]]); - - m.set(65); - m.set(66); - m.set(4000); - m.set(40001); - m.set(40002); - m.set(40003); - assert_eq!( - m.bitmap_to_array(), - [ - vec![1], - vec![5], - vec![65, 66], - vec![4000], - vec![40001, 40003] - ] - ); - - m.clear_all(); - assert_eq!(m.bitmap_to_array(), empty); - - m.set(65); - m.set(40001); - assert_eq!(m.bitmap_to_array(), [vec![65], vec![40001]]); - - for i in 0..100000 { - m.set(i); - } - m.set(100002); - assert_eq!( - m.bitmap_to_array_and_clear(), - [vec![0, 99999], vec![100002]] - ); - assert!(!m.is_set(9000)); - assert_eq!(m.bitmap_to_array(), empty); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::collections::BTreeMap; +use std::fmt::{Debug, Display, Formatter}; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::RwLock; + +#[derive(Default)] +pub struct InodeBitmap { + map: RwLock>, +} + +impl Debug for InodeBitmap { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.write_str(self.to_string().as_str()) + } +} + +impl Display for InodeBitmap { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.write_str( + serde_json::json!({"inode_range": self.bitmap_to_array()}) + .to_string() + .as_str(), + ) + } +} + +impl InodeBitmap { + pub fn new() -> Self { + Self::default() + } + + #[inline(always)] + fn get_index_and_mask(ino: u64) -> (u64, u64) { + (ino >> 6, 1_u64 << (ino & 0x3f_u64)) + } + + #[inline(always)] + fn range_to_vec(start: u64, end: u64) -> Vec { + if start == end { + vec![start] + } else { + vec![start, end] + } + } + + pub fn set(&self, ino: u64) { + let (index, mask) = Self::get_index_and_mask(ino); + + let m = self.map.read().unwrap(); + if let Some(v) = m.get(&index) { + v.fetch_or(mask, Ordering::Relaxed); + return; + } + drop(m); + + let mut m = self.map.write().unwrap(); + m.entry(index) + .or_insert_with(|| AtomicU64::new(0)) + .fetch_or(mask, Ordering::Relaxed); + } + + pub fn is_set(&self, ino: u64) -> bool { + let (index, mask) = InodeBitmap::get_index_and_mask(ino); + self.map + .read() + .unwrap() + .get(&index) + .map_or(false, |v| v.load(Ordering::Relaxed) & mask != 0) + } + + pub fn clear(&self, ino: u64) { + let (index, mask) = InodeBitmap::get_index_and_mask(ino); + let m = self.map.read().unwrap(); + + if let Some(v) = m.get(&index) { + v.fetch_and(!mask, Ordering::Relaxed); + } + } + + pub fn clear_all(&self) { + let m = self.map.read().unwrap(); + + for it in m.values() { + it.store(0_u64, Ordering::Relaxed); + } + } + + /// "[[1,5],[8],[10],[100,199],...]" + fn bitmap_to_vec(&self, load: fn(&AtomicU64) -> u64) -> Vec> { + let m = self.map.read().unwrap(); + let mut ret: Vec> = Vec::new(); + let mut start: Option = None; + // 0 is an invalid inode number + let mut last: u64 = 0; + + for it in m.iter() { + let base = it.0 << 6; + let mut v = load(it.1); + + while v != 0 { + // trailing_zeros need rustup version >= 1.46 + let ino = base + v.trailing_zeros() as u64; + v &= v - 1; + start = match start { + None => Some(ino), + Some(s) => { + if ino != last + 1 { + ret.push(InodeBitmap::range_to_vec(s, last)); + Some(ino) + } else { + Some(s) + } + } + }; + last = ino; + } + } + if let Some(s) = start { + ret.push(InodeBitmap::range_to_vec(s, last)); + } + + ret + } + + pub fn bitmap_to_array(&self) -> Vec> { + self.bitmap_to_vec(|v| v.load(Ordering::Relaxed)) + } + + pub fn bitmap_to_array_and_clear(&self) -> Vec> { + self.bitmap_to_vec(|v| v.fetch_and(0_u64, Ordering::Relaxed)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_inode_bitmap() { + let empty: Vec> = Vec::new(); + let m = InodeBitmap::new(); + m.set(1); + m.set(2); + m.set(5); + assert_eq!(m.bitmap_to_array(), [vec![1, 2], vec![5]]); + + assert!(m.is_set(2)); + m.clear(2); + assert!(!m.is_set(2)); + assert_eq!(m.bitmap_to_array(), [[1], [5]]); + + m.set(65); + m.set(66); + m.set(4000); + m.set(40001); + m.set(40002); + m.set(40003); + assert_eq!( + m.bitmap_to_array(), + [ + vec![1], + vec![5], + vec![65, 66], + vec![4000], + vec![40001, 40003] + ] + ); + + m.clear_all(); + assert_eq!(m.bitmap_to_array(), empty); + + m.set(65); + m.set(40001); + assert_eq!(m.bitmap_to_array(), [vec![65], vec![40001]]); + + for i in 0..100000 { + m.set(i); + } + m.set(100002); + assert_eq!( + m.bitmap_to_array_and_clear(), + [vec![0, 99999], vec![100002]] + ); + assert!(!m.is_set(9000)); + assert_eq!(m.bitmap_to_array(), empty); + } +} diff --git a/utils/src/lib.rs b/utils/src/lib.rs index 9a1c4d1f901..a32c65c8836 100644 --- a/utils/src/lib.rs +++ b/utils/src/lib.rs @@ -1,174 +1,174 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -#[macro_use] -extern crate log; -#[macro_use] -extern crate serde; -#[macro_use] -extern crate lazy_static; -#[macro_use] -extern crate nydus_api; - -use std::convert::{Into, TryFrom, TryInto}; -use std::time::Duration; - -pub use self::exec::*; -pub use self::inode_bitmap::InodeBitmap; -pub use self::reader::*; -pub use self::types::*; - -pub mod async_helper; -pub mod compact; -pub mod compress; -#[cfg(feature = "encryption")] -pub mod crypt; -pub mod digest; -pub mod exec; -pub mod filemap; -pub mod inode_bitmap; -pub mod logger; -pub mod metrics; -pub mod mpmc; -pub mod reader; -pub mod trace; -pub mod types; -pub mod verity; - -/// Round up and divide the value `n` by `d`. -pub fn div_round_up(n: u64, d: u64) -> u64 { - debug_assert!(d != 0); - debug_assert!(d.is_power_of_two()); - (n + d - 1) / d -} - -/// Round up the value `n` to by `d`. -pub fn round_up(n: u64, d: u64) -> u64 { - debug_assert!(d != 0); - debug_assert!(d.is_power_of_two()); - (n + d - 1) / d * d -} - -/// Round up the value `n` to by `d`. -pub fn round_up_usize(n: usize, d: usize) -> usize { - debug_assert!(d != 0); - debug_assert!(d.is_power_of_two()); - (n + d - 1) / d * d -} - -/// Overflow can fail this rounder if the base value is large enough with 4095 added. -pub fn try_round_up_4k, T: Into>(x: T) -> Option { - let t = 4095u64; - if let Some(v) = x.into().checked_add(t) { - let z = v & (!t); - z.try_into().ok() - } else { - None - } -} - -pub fn round_down_4k(x: u64) -> u64 { - x & (!4095u64) -} - -/// Round down the value `n` to by `d`. -pub fn round_down(n: u64, d: u64) -> u64 { - debug_assert!(d != 0); - debug_assert!(d.is_power_of_two()); - n / d * d -} - -pub enum DelayType { - Fixed, - // an exponential delay between each attempts - BackOff, -} - -pub struct Delayer { - r#type: DelayType, - attempts: u32, - time: Duration, -} - -impl Delayer { - pub fn new(t: DelayType, time: Duration) -> Self { - Delayer { - r#type: t, - attempts: 0, - time, - } - } - - pub fn delay(&mut self) { - use std::thread::sleep; - - match self.r#type { - DelayType::Fixed => sleep(self.time), - DelayType::BackOff => sleep((1 << self.attempts) * self.time), - } - self.attempts += 1; - } -} - -struct LazyDrop { - v: T, -} - -unsafe impl Send for LazyDrop {} - -/// Lazy drop of object. -pub fn lazy_drop(v: T) { - let v = LazyDrop { v }; - std::thread::spawn(move || { - std::thread::sleep(Duration::from_secs(600)); - let _ = v.v; - }); -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_rounders() { - assert_eq!(round_down_4k(0), 0); - assert_eq!(round_down_4k(100), 0); - assert_eq!(round_down_4k(4300), 4096); - assert_eq!(round_down_4k(4096), 4096); - assert_eq!(round_down_4k(4095), 0); - assert_eq!(round_down_4k(4097), 4096); - assert_eq!(round_down_4k(u64::MAX - 1), u64::MAX - 4095); - assert_eq!(round_down_4k(u64::MAX - 4095), u64::MAX - 4095); - // zero is rounded up to zero - assert_eq!(try_round_up_4k::(0u32), Some(0i32)); - assert_eq!(try_round_up_4k::(0u32), Some(0u32)); - assert_eq!(try_round_up_4k::(1u32), Some(4096u32)); - assert_eq!(try_round_up_4k::(100u32), Some(4096u32)); - assert_eq!(try_round_up_4k::(4100u32), Some(8192u32)); - assert_eq!(try_round_up_4k::(4096u32), Some(4096u32)); - assert_eq!(try_round_up_4k::(4095u32), Some(4096u32)); - assert_eq!(try_round_up_4k::(4097u32), Some(8192u32)); - assert_eq!(try_round_up_4k::(u32::MAX), None); - assert_eq!(try_round_up_4k::(u32::MAX), Some(0x1_0000_0000u64)); - assert_eq!(try_round_up_4k::(u64::MAX - 1), None); - assert_eq!(try_round_up_4k::(u64::MAX), None); - assert_eq!(try_round_up_4k::(u64::MAX - 4097), None); - // success - assert_eq!( - try_round_up_4k::(u64::MAX - 4096), - Some(u64::MAX - 4095) - ); - // overflow - assert_eq!(try_round_up_4k::(u64::MAX - 1), None); - // fail to convert u64 to u32 - assert_eq!(try_round_up_4k::(u64::MAX - 4096), None); - } - - #[test] - fn test_round_up_usize() { - assert_eq!(round_up_usize(10, 8), 16); - assert_eq!(round_up_usize(100, 8), 104); - assert_eq!(round_up_usize(1000, 8), 1000); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +#[macro_use] +extern crate log; +#[macro_use] +extern crate serde; +#[macro_use] +extern crate lazy_static; +#[macro_use] +extern crate nydus_api; + +use std::convert::{Into, TryFrom, TryInto}; +use std::time::Duration; + +pub use self::exec::*; +pub use self::inode_bitmap::InodeBitmap; +pub use self::reader::*; +pub use self::types::*; + +pub mod async_helper; +pub mod compact; +pub mod compress; +#[cfg(feature = "encryption")] +pub mod crypt; +pub mod digest; +pub mod exec; +pub mod filemap; +pub mod inode_bitmap; +pub mod logger; +pub mod metrics; +pub mod mpmc; +pub mod reader; +pub mod trace; +pub mod types; +pub mod verity; + +/// Round up and divide the value `n` by `d`. +pub fn div_round_up(n: u64, d: u64) -> u64 { + debug_assert!(d != 0); + debug_assert!(d.is_power_of_two()); + (n + d - 1) / d +} + +/// Round up the value `n` to by `d`. +pub fn round_up(n: u64, d: u64) -> u64 { + debug_assert!(d != 0); + debug_assert!(d.is_power_of_two()); + (n + d - 1) / d * d +} + +/// Round up the value `n` to by `d`. +pub fn round_up_usize(n: usize, d: usize) -> usize { + debug_assert!(d != 0); + debug_assert!(d.is_power_of_two()); + (n + d - 1) / d * d +} + +/// Overflow can fail this rounder if the base value is large enough with 4095 added. +pub fn try_round_up_4k, T: Into>(x: T) -> Option { + let t = 4095u64; + if let Some(v) = x.into().checked_add(t) { + let z = v & (!t); + z.try_into().ok() + } else { + None + } +} + +pub fn round_down_4k(x: u64) -> u64 { + x & (!4095u64) +} + +/// Round down the value `n` to by `d`. +pub fn round_down(n: u64, d: u64) -> u64 { + debug_assert!(d != 0); + debug_assert!(d.is_power_of_two()); + n / d * d +} + +pub enum DelayType { + Fixed, + // an exponential delay between each attempts + BackOff, +} + +pub struct Delayer { + r#type: DelayType, + attempts: u32, + time: Duration, +} + +impl Delayer { + pub fn new(t: DelayType, time: Duration) -> Self { + Delayer { + r#type: t, + attempts: 0, + time, + } + } + + pub fn delay(&mut self) { + use std::thread::sleep; + + match self.r#type { + DelayType::Fixed => sleep(self.time), + DelayType::BackOff => sleep((1 << self.attempts) * self.time), + } + self.attempts += 1; + } +} + +struct LazyDrop { + v: T, +} + +unsafe impl Send for LazyDrop {} + +/// Lazy drop of object. +pub fn lazy_drop(v: T) { + let v = LazyDrop { v }; + std::thread::spawn(move || { + std::thread::sleep(Duration::from_secs(600)); + let _ = v.v; + }); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_rounders() { + assert_eq!(round_down_4k(0), 0); + assert_eq!(round_down_4k(100), 0); + assert_eq!(round_down_4k(4300), 4096); + assert_eq!(round_down_4k(4096), 4096); + assert_eq!(round_down_4k(4095), 0); + assert_eq!(round_down_4k(4097), 4096); + assert_eq!(round_down_4k(u64::MAX - 1), u64::MAX - 4095); + assert_eq!(round_down_4k(u64::MAX - 4095), u64::MAX - 4095); + // zero is rounded up to zero + assert_eq!(try_round_up_4k::(0u32), Some(0i32)); + assert_eq!(try_round_up_4k::(0u32), Some(0u32)); + assert_eq!(try_round_up_4k::(1u32), Some(4096u32)); + assert_eq!(try_round_up_4k::(100u32), Some(4096u32)); + assert_eq!(try_round_up_4k::(4100u32), Some(8192u32)); + assert_eq!(try_round_up_4k::(4096u32), Some(4096u32)); + assert_eq!(try_round_up_4k::(4095u32), Some(4096u32)); + assert_eq!(try_round_up_4k::(4097u32), Some(8192u32)); + assert_eq!(try_round_up_4k::(u32::MAX), None); + assert_eq!(try_round_up_4k::(u32::MAX), Some(0x1_0000_0000u64)); + assert_eq!(try_round_up_4k::(u64::MAX - 1), None); + assert_eq!(try_round_up_4k::(u64::MAX), None); + assert_eq!(try_round_up_4k::(u64::MAX - 4097), None); + // success + assert_eq!( + try_round_up_4k::(u64::MAX - 4096), + Some(u64::MAX - 4095) + ); + // overflow + assert_eq!(try_round_up_4k::(u64::MAX - 1), None); + // fail to convert u64 to u32 + assert_eq!(try_round_up_4k::(u64::MAX - 4096), None); + } + + #[test] + fn test_round_up_usize() { + assert_eq!(round_up_usize(10, 8), 16); + assert_eq!(round_up_usize(100, 8), 104); + assert_eq!(round_up_usize(1000, 8), 1000); + } +} diff --git a/utils/src/logger.rs b/utils/src/logger.rs index 1ab187b97b9..3b9b75e72b7 100644 --- a/utils/src/logger.rs +++ b/utils/src/logger.rs @@ -1,110 +1,110 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::collections::VecDeque; -use std::sync::Mutex; -use std::time::SystemTime; - -use serde::Serialize; -use serde_json::Error as SerdeError; - -/// Error codes for `ErrorHolder`. -#[derive(Debug)] -pub enum ErrorHolderError { - TooLarge(usize), - Serde(SerdeError), -} - -/// `Result` specialized for `ErrorHolder`. -pub type Result = std::result::Result; - -/// Struct to record important or critical events or errors in circular buffer mode. -#[derive(Serialize, Default, Debug)] -pub struct ErrorHolder { - max_errors: usize, - total_errors: usize, - max_size: usize, - total_size: usize, - errors: Mutex>, -} - -impl ErrorHolder { - /// Create a `ErrorHolder` object. - pub fn new(max_errors: usize, max_size: usize) -> Self { - Self { - max_errors, - max_size, - total_errors: 0, - total_size: 0, - errors: Mutex::new(VecDeque::with_capacity(max_errors)), - } - } - - /// Push an error into the circular buffer. - pub fn push(&mut self, error: &str) -> Result<()> { - let mut guard = self.errors.lock().unwrap(); - let formatted_error = format!("{} - {}", httpdate::fmt_http_date(SystemTime::now()), error); - - loop { - if formatted_error.len() + self.total_size > self.max_size - || self.total_errors >= self.max_errors - { - let victim = guard.pop_front(); - match victim { - Some(v) => { - self.total_size -= v.len(); - self.total_errors -= 1; - } - None => return Err(ErrorHolderError::TooLarge(error.len())), - } - } else { - break; - } - } - - self.total_size += formatted_error.len(); - self.total_errors += 1; - guard.push_back(formatted_error); - Ok(()) - } - - /// Export all errors in the circular buffer as an `JSON` string. - pub fn export(&self) -> Result { - let _guard = self.errors.lock().unwrap(); - serde_json::to_string(self).map_err(ErrorHolderError::Serde) - } -} - -#[cfg(test)] -mod tests { - use super::{ErrorHolder, ErrorHolderError}; - - #[test] - fn test_overflow() { - let mut holder = ErrorHolder::new(10, 80); - let error_msg = "123456789"; - let mut left = 16; - while left >= 0 { - let r = holder.push(error_msg); - assert!(r.is_ok()); - left -= 1; - } - - assert!(holder.total_errors <= 10); - assert!(holder.total_size <= 80); - - let mut multi = 10; - let mut error_msg_long = "".to_string(); - while multi >= 0 { - multi -= 1; - error_msg_long.push_str("123456789"); - } - - let r = holder.push(&error_msg_long); - match r { - Err(ErrorHolderError::TooLarge(len)) => assert_eq!(len, error_msg_long.len()), - _ => panic!(), - } - } -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::collections::VecDeque; +use std::sync::Mutex; +use std::time::SystemTime; + +use serde::Serialize; +use serde_json::Error as SerdeError; + +/// Error codes for `ErrorHolder`. +#[derive(Debug)] +pub enum ErrorHolderError { + TooLarge(usize), + Serde(SerdeError), +} + +/// `Result` specialized for `ErrorHolder`. +pub type Result = std::result::Result; + +/// Struct to record important or critical events or errors in circular buffer mode. +#[derive(Serialize, Default, Debug)] +pub struct ErrorHolder { + max_errors: usize, + total_errors: usize, + max_size: usize, + total_size: usize, + errors: Mutex>, +} + +impl ErrorHolder { + /// Create a `ErrorHolder` object. + pub fn new(max_errors: usize, max_size: usize) -> Self { + Self { + max_errors, + max_size, + total_errors: 0, + total_size: 0, + errors: Mutex::new(VecDeque::with_capacity(max_errors)), + } + } + + /// Push an error into the circular buffer. + pub fn push(&mut self, error: &str) -> Result<()> { + let mut guard = self.errors.lock().unwrap(); + let formatted_error = format!("{} - {}", httpdate::fmt_http_date(SystemTime::now()), error); + + loop { + if formatted_error.len() + self.total_size > self.max_size + || self.total_errors >= self.max_errors + { + let victim = guard.pop_front(); + match victim { + Some(v) => { + self.total_size -= v.len(); + self.total_errors -= 1; + } + None => return Err(ErrorHolderError::TooLarge(error.len())), + } + } else { + break; + } + } + + self.total_size += formatted_error.len(); + self.total_errors += 1; + guard.push_back(formatted_error); + Ok(()) + } + + /// Export all errors in the circular buffer as an `JSON` string. + pub fn export(&self) -> Result { + let _guard = self.errors.lock().unwrap(); + serde_json::to_string(self).map_err(ErrorHolderError::Serde) + } +} + +#[cfg(test)] +mod tests { + use super::{ErrorHolder, ErrorHolderError}; + + #[test] + fn test_overflow() { + let mut holder = ErrorHolder::new(10, 80); + let error_msg = "123456789"; + let mut left = 16; + while left >= 0 { + let r = holder.push(error_msg); + assert!(r.is_ok()); + left -= 1; + } + + assert!(holder.total_errors <= 10); + assert!(holder.total_size <= 80); + + let mut multi = 10; + let mut error_msg_long = "".to_string(); + while multi >= 0 { + multi -= 1; + error_msg_long.push_str("123456789"); + } + + let r = holder.push(&error_msg_long); + match r { + Err(ErrorHolderError::TooLarge(len)) => assert_eq!(len, error_msg_long.len()), + _ => panic!(), + } + } +} diff --git a/utils/src/metrics.rs b/utils/src/metrics.rs index 5810b542e7c..1b921dab056 100644 --- a/utils/src/metrics.rs +++ b/utils/src/metrics.rs @@ -1,1093 +1,1093 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Nydus error events and performance related metrics. -//! -//! There are several types of metrics supported: -//! - Global error events of type [`ErrorHolder`] -//! - Storage backend metrics of type ['BackendMetrics'] -//! - Blobcache metrics of type ['BlobcacheMetrics'] -//! - Filesystem metrics of type ['FsIoStats`], supported by Rafs in fuse/virtiofs only. - -use std::collections::{HashMap, HashSet}; -use std::ops::{Deref, Drop}; -use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, AtomicUsize, Ordering}; -use std::sync::{Arc, Mutex, RwLock}; -use std::time::{Duration, SystemTime}; - -use nydus_api::http::MetricsError; - -use crate::logger::ErrorHolder; -use crate::InodeBitmap; - -/// Type of `inode`. -pub type Inode = u64; - -/// Type of file operation statistics counter. -#[derive(PartialEq, Copy, Clone)] -pub enum StatsFop { - Getattr, - Readlink, - Open, - Release, - Read, - Statfs, - Getxattr, - Listxattr, - Opendir, - Lookup, - Readdir, - Readdirplus, - Access, - Forget, - BatchForget, - Max, -} - -type IoStatsResult = Result; - -// Block size separated counters. -// [0-3]: <1K;1K~;4K~;16K~; -// [5-7]: 64K~;128K~;512K~;1M~ -const BLOCK_READ_SIZES_MAX: usize = 8; - -#[inline] -fn request_size_index(size: usize) -> usize { - let ceil = (size >> 10).leading_zeros(); - let shift = (std::cmp::max(ceil, 53) - 53) << 2; - - (0x0112_2334_5567u64 >> shift) as usize & 0xf -} - -// <=1ms, <=20ms, <=50ms, <=100ms, <=500ms, <=1s, <=2s, >2s -const READ_LATENCY_RANGE_MAX: usize = 8; - -fn latency_millis_range_index(elapsed: u64) -> usize { - match elapsed { - _ if elapsed <= 1 => 0, - _ if elapsed <= 20 => 1, - _ if elapsed <= 50 => 2, - _ if elapsed <= 100 => 3, - _ if elapsed <= 500 => 4, - _ if elapsed <= 1000 => 5, - _ if elapsed <= 2000 => 6, - _ => 7, - } -} - -// <=200us, <=1ms, <=20ms, <=50ms, <=500ms, <=1s, <=2s, >2s -fn latency_micros_range_index(elapsed: u64) -> usize { - match elapsed { - _ if elapsed <= 200 => 0, - _ if elapsed <= 1_000 => 1, - _ if elapsed <= 20_000 => 2, - _ if elapsed <= 50_000 => 3, - _ if elapsed <= 500_000 => 4, - _ if elapsed <= 1_000_000 => 5, - _ if elapsed <= 2_000_000 => 6, - _ => 7, - } -} - -// Defining below global static metrics set so that a specific metrics counter can -// be found as per the rafs backend mountpoint/id. Remind that nydusd can have -// multiple backends mounted. -lazy_static! { - static ref FS_METRICS: RwLock>> = Default::default(); -} - -lazy_static! { - static ref BACKEND_METRICS: RwLock>> = Default::default(); -} - -lazy_static! { - static ref BLOBCACHE_METRICS: RwLock>> = - Default::default(); -} - -lazy_static! { - pub static ref ERROR_HOLDER: Arc> = - Arc::new(Mutex::new(ErrorHolder::new(500, 50 * 1024))); -} - -/// Trait to manipulate per inode statistics metrics. -pub trait InodeStatsCounter { - fn stats_fop_inc(&self, fop: StatsFop); - fn stats_fop_err_inc(&self, fop: StatsFop); - fn stats_cumulative(&self, fop: StatsFop, value: usize); -} - -/// Per inode io statistics metrics. -#[derive(Default, Debug, Serialize)] -pub struct InodeIoStats { - total_fops: BasicMetric, - data_read: BasicMetric, - // Cumulative bytes for different block size. - block_count_read: [BasicMetric; BLOCK_READ_SIZES_MAX], - fop_hits: [BasicMetric; StatsFop::Max as usize], - fop_errors: [BasicMetric; StatsFop::Max as usize], -} - -impl InodeStatsCounter for InodeIoStats { - fn stats_fop_inc(&self, fop: StatsFop) { - self.fop_hits[fop as usize].inc(); - self.total_fops.inc(); - } - - fn stats_fop_err_inc(&self, fop: StatsFop) { - self.fop_errors[fop as usize].inc(); - } - - fn stats_cumulative(&self, fop: StatsFop, value: usize) { - if fop == StatsFop::Read { - self.data_read.add(value as u64); - // Put counters into $BLOCK_READ_COUNT_MAX catagories - // 1K; 4K; 16K; 64K, 128K, 512K, 1M - let idx = request_size_index(value); - self.block_count_read[idx].inc(); - } - } -} - -/// Records how a file is accessed. -/// For security sake, each file can associate an access pattern recorder, which -/// is globally configured through nydusd configuration file. -/// For now, the pattern is composed of: -/// 1. How many times a file is read regardless of io block size and request offset. -/// And this counter can not be cleared. -/// 2. First time point at which this file is read. It's wall-time in unit of seconds. -/// 3. File path relative to current rafs root. -/// -/// Yes, we now don't have an abundant pattern recorder now. It can be negotiated in the -/// future about how to enrich it. -/// -#[derive(Default, Debug, Serialize)] -pub struct AccessPattern { - ino: u64, - nr_read: BasicMetric, - /// In unit of seconds. - first_access_time_secs: AtomicU64, - first_access_time_nanos: AtomicU32, -} - -impl AccessPattern { - fn record_access_time(&self) { - if self.first_access_time_secs.load(Ordering::Relaxed) == 0 { - let t = SystemTime::now() - .duration_since(SystemTime::UNIX_EPOCH) - .unwrap(); - self.first_access_time_secs - .store(t.as_secs(), Ordering::Relaxed); - self.first_access_time_nanos - .store(t.subsec_nanos(), Ordering::Relaxed); - } - } -} - -/// Filesystem level statistics and metrics. -/// -/// Currently only Rafs in Fuse/Virtiofs mode supports filesystem level statistics and metrics. -#[derive(Default, Debug, Serialize)] -pub struct FsIoStats { - // Whether to enable each file accounting switch. - // As fop accounting might consume much memory space, it is disabled by default. - // But global fop accounting is always working within each Rafs. - files_account_enabled: AtomicBool, - access_pattern_enabled: AtomicBool, - record_latest_read_files_enabled: AtomicBool, - // Flag to enable record operation latency. - measure_latency: AtomicBool, - - id: String, - // Total number of files that are currently open. - nr_opens: BasicMetric, - // Total bytes read against the filesystem. - data_read: BasicMetric, - // Cumulative bytes for different block size. - block_count_read: [BasicMetric; BLOCK_READ_SIZES_MAX], - // Counters for successful various file operations. - fop_hits: [BasicMetric; StatsFop::Max as usize], - // Counters for failed file operations. - fop_errors: [BasicMetric; StatsFop::Max as usize], - - // Cumulative latency's life cycle is equivalent to Rafs, unlike incremental - // latency which will be cleared each time dumped. Unit as micro-seconds. - // * @total means io_stats simply adds every fop latency to the counter which is never cleared. - // It is useful for other tools to calculate their metrics report. - fop_cumulative_latency_total: [BasicMetric; StatsFop::Max as usize], - // Record how many times read latency drops to the ranges. - // This helps us to understand the io service time stability. - read_latency_dist: [BasicMetric; READ_LATENCY_RANGE_MAX], - - // Rwlock closes the race that more than one threads are creating counters concurrently. - #[serde(skip_serializing, skip_deserializing)] - file_counters: RwLock>>, - #[serde(skip_serializing, skip_deserializing)] - access_patterns: RwLock>>, - // record regular file read - #[serde(skip_serializing, skip_deserializing)] - recent_read_files: InodeBitmap, -} - -macro_rules! impl_iostat_option { - ($get:ident, $set:ident, $opt:ident) => { - #[inline] - fn $get(&self) -> bool { - self.$opt.load(Ordering::Relaxed) - } - - #[inline] - pub fn $set(&self, switch: bool) { - self.$opt.store(switch, Ordering::Relaxed) - } - }; -} - -impl FsIoStats { - /// Create a new instance of [`FsIoStats`] for filesystem `id`. - pub fn new(id: &str) -> Arc { - let c = Arc::new(FsIoStats { - id: id.to_string(), - ..Default::default() - }); - FS_METRICS - .write() - .unwrap() - .insert(id.to_string(), c.clone()); - c.init(); - c - } - - /// Initialize the [`FsIoStats`] object. - pub fn init(&self) { - self.files_account_enabled.store(false, Ordering::Relaxed); - self.measure_latency.store(true, Ordering::Relaxed); - } - - impl_iostat_option!(files_enabled, toggle_files_recording, files_account_enabled); - impl_iostat_option!( - access_pattern_enabled, - toggle_access_pattern, - access_pattern_enabled - ); - impl_iostat_option!( - record_latest_read_files_enabled, - toggle_latest_read_files_recording, - record_latest_read_files_enabled - ); - - /// Prepare for recording statistics information about `ino`. - pub fn new_file_counter(&self, ino: Inode) { - if self.files_enabled() { - let mut counters = self.file_counters.write().unwrap(); - if counters.get(&ino).is_none() { - counters.insert(ino, Arc::new(InodeIoStats::default())); - } - } - - if self.access_pattern_enabled() { - let mut records = self.access_patterns.write().unwrap(); - if records.get(&ino).is_none() { - records.insert( - ino, - Arc::new(AccessPattern { - ino, - ..Default::default() - }), - ); - } - } - } - - fn file_stats_update(&self, ino: Inode, fop: StatsFop, bsize: usize, success: bool) { - self.fop_update(fop, bsize, success); - - if self.files_enabled() { - let counters = self.file_counters.read().unwrap(); - match counters.get(&ino) { - Some(c) => { - c.stats_fop_inc(fop); - c.stats_cumulative(fop, bsize); - } - None => warn!("No iostats counter for file {}", ino), - } - } - - if self.access_pattern_enabled() && fop == StatsFop::Read { - let records = self.access_patterns.read().unwrap(); - match records.get(&ino) { - Some(r) => { - r.nr_read.inc(); - r.record_access_time(); - } - None => warn!("No pattern record for file {}", ino), - } - } - - if self.record_latest_read_files_enabled() && fop == StatsFop::Read && success { - self.recent_read_files.set(ino); - } - } - - fn fop_update(&self, fop: StatsFop, value: usize, success: bool) { - // Linux kernel no longer splits IO into sizes smaller than 128K. - // So 512K and 1M is added. - // We put block count into 5 catagories e.g. 1K; 4K; 16K; 64K; 128K; 512K; 1M - if fop == StatsFop::Read { - let idx = request_size_index(value); - self.block_count_read[idx].inc() - } - - if success { - self.fop_hits[fop as usize].inc(); - match fop { - StatsFop::Read => self.data_read.add(value as u64), - StatsFop::Open => self.nr_opens.inc(), - StatsFop::Release => self.nr_opens.dec(), - _ => (), - }; - } else { - self.fop_errors[fop as usize].inc(); - } - } - - /// Mark starting of filesystem operation. - pub fn latency_start(&self) -> Option { - if !self.measure_latency.load(Ordering::Relaxed) { - return None; - } - - Some(SystemTime::now()) - } - - /// Mark ending of filesystem operation and record statistics. - pub fn latency_end(&self, start: &Option, fop: StatsFop) { - if let Some(start) = start { - if let Ok(d) = SystemTime::elapsed(start) { - let elapsed = saturating_duration_micros(&d); - self.read_latency_dist[latency_micros_range_index(elapsed)].inc(); - self.fop_cumulative_latency_total[fop as usize].add(elapsed); - } - } - } - - fn export_files_stats(&self) -> Result { - serde_json::to_string( - self.file_counters - .read() - .expect("Not expect poisoned lock") - .deref(), - ) - .map_err(MetricsError::Serialize) - } - - fn export_latest_read_files(&self) -> String { - serde_json::json!(self.recent_read_files.bitmap_to_array_and_clear()).to_string() - } - - fn export_files_access_patterns(&self) -> Result { - serde_json::to_string( - &self - .access_patterns - .read() - .expect("Not poisoned lock") - .deref() - .values() - .filter(|r| r.nr_read.count() != 0) - .collect::>>(), - ) - .map_err(MetricsError::Serialize) - } - - fn export_fs_stats(&self) -> Result { - serde_json::to_string(self).map_err(MetricsError::Serialize) - } -} - -/// Guard object to record file operation metrics associated with an inode. -/// -/// Call its `settle()` method to generate an on-stack recorder. -/// If the operation succeeds, call `mark_success()` to change the recorder's internal state. -/// If the operation fails, its internal state will not be changed. -/// Finally, when the recorder is being destroyed, iostats counter will be updated. -pub struct FopRecorder<'a> { - fop: StatsFop, - inode: u64, - success: bool, - // Now, the size only makes sense for `Read` FOP. - size: usize, - ios: &'a FsIoStats, -} - -impl<'a> Drop for FopRecorder<'a> { - fn drop(&mut self) { - self.ios - .file_stats_update(self.inode, self.fop, self.size, self.success); - } -} - -impl<'a> FopRecorder<'a> { - /// Create a guard object for filesystem operation `fop` associated with `inode`. - pub fn settle(fop: StatsFop, inode: u64, ios: &'a T) -> Self - where - T: AsRef, - { - FopRecorder { - fop, - inode, - success: false, - size: 0, - ios: ios.as_ref(), - } - } - - /// Mark operation as success. - pub fn mark_success(&mut self, size: usize) { - self.success = true; - self.size = size; - } -} - -/// Export file metrics of a filesystem. -pub fn export_files_stats( - name: &Option, - latest_read_files: bool, -) -> Result { - let fs_metrics = FS_METRICS.read().unwrap(); - - match name { - Some(k) => fs_metrics.get(k).ok_or(MetricsError::NoCounter).map(|v| { - if !latest_read_files { - v.export_files_stats() - } else { - Ok(v.export_latest_read_files()) - } - })?, - None => { - if fs_metrics.len() == 1 { - if let Some(ios) = fs_metrics.values().next() { - return if !latest_read_files { - ios.export_files_stats() - } else { - Ok(ios.export_latest_read_files()) - }; - } - } - Err(MetricsError::NoCounter) - } - } -} - -/// Export file access pattern of a filesystem. -pub fn export_files_access_pattern(name: &Option) -> Result { - let fs_metrics = FS_METRICS.read().unwrap(); - match name { - Some(k) => fs_metrics - .get(k) - .ok_or(MetricsError::NoCounter) - .map(|v| v.export_files_access_patterns())?, - None => { - if fs_metrics.len() == 1 { - if let Some(ios) = fs_metrics.values().next() { - return ios.export_files_access_patterns(); - } - } - Err(MetricsError::NoCounter) - } - } -} - -/// Export filesystem metrics. -pub fn export_global_stats(name: &Option) -> Result { - // With only one rafs instance, we allow caller to ask for an unknown ios name. - let fs_metrics = FS_METRICS.read().unwrap(); - - match name { - Some(k) => fs_metrics - .get(k) - .ok_or(MetricsError::NoCounter) - .map(|v| v.export_fs_stats())?, - None => { - if fs_metrics.len() == 1 { - if let Some(ios) = fs_metrics.values().next() { - return ios.export_fs_stats(); - } - } - Err(MetricsError::NoCounter) - } - } -} - -/// Export storage backend metrics. -pub fn export_backend_metrics(name: &Option) -> IoStatsResult { - let metrics = BACKEND_METRICS.read().unwrap(); - - match name { - Some(k) => metrics - .get(k) - .ok_or(MetricsError::NoCounter) - .map(|v| v.export_metrics())?, - None => { - if metrics.len() == 1 { - if let Some(m) = metrics.values().next() { - return m.export_metrics(); - } - } - Err(MetricsError::NoCounter) - } - } -} - -/// Export blob cache metircs. -pub fn export_blobcache_metrics(id: &Option) -> IoStatsResult { - let metrics = BLOBCACHE_METRICS.read().unwrap(); - - match id { - Some(k) => metrics - .get(k) - .ok_or(MetricsError::NoCounter) - .map(|v| v.export_metrics())?, - None => { - if metrics.len() == 1 { - if let Some(m) = metrics.values().next() { - return m.export_metrics(); - } - } - Err(MetricsError::NoCounter) - } - } -} - -/// Export global error events. -pub fn export_events() -> IoStatsResult { - serde_json::to_string(ERROR_HOLDER.lock().unwrap().deref()).map_err(MetricsError::Serialize) -} - -/// Trait to manipulate metric counters. -pub trait Metric { - /// Adds `value` to the current counter. - fn add(&self, value: u64); - /// Increments by 1 unit the current counter. - fn inc(&self) { - self.add(1); - } - /// Returns current value of the counter. - fn count(&self) -> u64; - /// Subtract `value` from the current counter. - fn sub(&self, value: u64); - /// Decrease the current counter. - fn dec(&self) { - self.sub(1); - } - - fn set(&self, value: u64); -} - -/// Basic 64-bit metric counter. -#[derive(Default, Serialize, Debug)] -pub struct BasicMetric(AtomicU64); - -impl Metric for BasicMetric { - fn add(&self, value: u64) { - self.0.fetch_add(value, Ordering::Relaxed); - } - - fn count(&self) -> u64 { - self.0.load(Ordering::Relaxed) - } - - fn sub(&self, value: u64) { - self.0.fetch_sub(value, Ordering::Relaxed); - } - - fn set(&self, value: u64) { - self.0.store(value, Ordering::Relaxed); - } -} - -/// Metrics for storage backends. -#[derive(Default, Serialize, Debug)] -pub struct BackendMetrics { - #[serde(skip_serializing, skip_deserializing)] - id: String, - // type of storage backend. - backend_type: String, - // Cumulative count of read request to backend - read_count: BasicMetric, - // Cumulative count of read failure to backend - read_errors: BasicMetric, - // Cumulative amount of data from to backend in unit of Byte. External tools - // are responsible for calculating BPS from this field. - read_amount_total: BasicMetric, - // In unit of millisecond - read_cumulative_latency_millis_total: BasicMetric, - read_cumulative_latency_millis_dist: [BasicMetric; BLOCK_READ_SIZES_MAX], - read_count_block_size_dist: [BasicMetric; BLOCK_READ_SIZES_MAX], - // Categorize metrics as per their latency and request size - read_latency_sizes_dist: [[BasicMetric; READ_LATENCY_RANGE_MAX]; BLOCK_READ_SIZES_MAX], -} - -impl BackendMetrics { - /// Create a [`BackendMetrics`] object for a storage backend. - pub fn new(id: &str, backend_type: &str) -> Arc { - let backend_metrics = Arc::new(Self { - id: id.to_string(), - backend_type: backend_type.to_string(), - ..Default::default() - }); - - BACKEND_METRICS - .write() - .unwrap() - .insert(id.to_string(), backend_metrics.clone()); - - backend_metrics - } - - /// Release a [`BackendMetrics`] object for a storage backend. - pub fn release(&self) -> IoStatsResult<()> { - BACKEND_METRICS - .write() - .unwrap() - .remove(&self.id) - .map(|_| ()) - .ok_or(MetricsError::NoCounter) - } - - /// Mark starting of an IO operations. - pub fn begin(&self) -> SystemTime { - SystemTime::now() - } - - /// Mark ending of an IO operations. - pub fn end(&self, begin: &SystemTime, size: usize, error: bool) { - if let Ok(d) = SystemTime::elapsed(begin) { - let elapsed = saturating_duration_millis(&d); - - self.read_count.inc(); - if error { - self.read_errors.inc(); - } - - self.read_cumulative_latency_millis_total.add(elapsed); - self.read_amount_total.add(size as u64); - let lat_idx = latency_millis_range_index(elapsed); - let size_idx = request_size_index(size); - self.read_cumulative_latency_millis_dist[size_idx].add(elapsed); - self.read_count_block_size_dist[size_idx].inc(); - self.read_latency_sizes_dist[size_idx][lat_idx].inc(); - } - } - - fn export_metrics(&self) -> IoStatsResult { - serde_json::to_string(self).map_err(MetricsError::Serialize) - } -} - -// This function assumes that the counted duration won't be too long. -fn saturating_duration_millis(d: &Duration) -> u64 { - let d_secs = d.as_secs(); - if d_secs == 0 { - d.subsec_millis() as u64 - } else { - d_secs - .saturating_mul(1000) - .saturating_add(d.subsec_millis() as u64) - } -} - -fn saturating_duration_micros(d: &Duration) -> u64 { - let d_secs = d.as_secs(); - if d_secs == 0 { - d.subsec_micros() as u64 - } else { - d_secs - .saturating_mul(1_000_000) - .saturating_add(d.subsec_micros() as u64) - } -} - -#[derive(Debug, Default, Serialize)] -pub struct BlobcacheMetrics { - #[serde(skip_serializing, skip_deserializing)] - id: String, - // Prefer to let external tool get file's state like file size and disk usage. - // Because stat(2) file may get blocked. - // It should include the real blob cache file names, so that the external GC - // process can handle it directly. - pub underlying_files: Mutex>, - pub store_path: String, - // Cache hit percentage = (partial_hits + whole_hits) / total - pub partial_hits: BasicMetric, - pub whole_hits: BasicMetric, - // How many `read` requests are processed by the blobcache instance. - // This metric will be helpful when comparing with cache hits times. - pub total: BasicMetric, - // Scale of blobcache. Blobcache does not evict entries. - // Means the number of chunks in ready status. - pub entries_count: BasicMetric, - // Together with below two fields, we can figure out average merging size thus - // to estimate the possibility to merge backend IOs. - // In unit of Bytes - pub prefetch_data_amount: BasicMetric, - // Total prefetch requests issued from storage/blobs or rafs filesystem layer for each file that needs prefetch - pub prefetch_requests_count: BasicMetric, - pub prefetch_workers: AtomicUsize, - pub prefetch_unmerged_chunks: BasicMetric, - // Cumulative time latencies of each prefetch request which can be handled in parallel. - // It starts when the request is born including nydusd processing and schedule and end when the chunk is downloaded and stored. - // Then the average prefetch latency can be calculated by - // `prefetch_cumulative_time_millis / prefetch_requests_count` - pub prefetch_cumulative_time_millis: BasicMetric, - // The time seconds part when nydusd begins to prefetch - // We can calculate prefetch average bandwidth by - // `prefetch_data_amount / (prefetch_end_time_secs - prefetch_begin_time_secs)`. Note, it does not take milliseconds into account yet.s - pub prefetch_begin_time_secs: BasicMetric, - // The time milliseconds part when nydusd begins to prefetch - pub prefetch_begin_time_millis: BasicMetric, - // The time seconds part when nydusd ends prefetching - pub prefetch_end_time_secs: BasicMetric, - // The time milliseconds part when nydusd ends prefetching - pub prefetch_end_time_millis: BasicMetric, - pub buffered_backend_size: BasicMetric, - pub data_all_ready: AtomicBool, -} - -impl BlobcacheMetrics { - /// Create a [`BlobcacheMetrics`] object for a blob cache manager. - pub fn new(id: &str, store_path: &str) -> Arc { - let metrics = Arc::new(Self { - id: id.to_string(), - store_path: store_path.to_string(), - ..Default::default() - }); - - // Old metrics will be dropped when BlobCache is swapped. So we don't - // have to worry about swapping its metrics either which means it's - // not necessary to release metrics recorder when blobcache is dropped due to swapping. - BLOBCACHE_METRICS - .write() - .unwrap() - .insert(id.to_string(), metrics.clone()); - - metrics - } - - /// Release a [`BlobcacheMetrics`] object for a blob cache manager. - pub fn release(&self) -> IoStatsResult<()> { - BLOBCACHE_METRICS - .write() - .unwrap() - .remove(&self.id) - .map(|_| ()) - .ok_or(MetricsError::NoCounter) - } - - /// Export blobcache metric information. - pub fn export_metrics(&self) -> IoStatsResult { - serde_json::to_string(self).map_err(MetricsError::Serialize) - } - - pub fn calculate_prefetch_metrics(&self, begin_time: SystemTime) { - let now = SystemTime::now(); - if let Ok(ref t) = now.duration_since(SystemTime::UNIX_EPOCH) { - self.prefetch_end_time_secs.set(t.as_secs()); - self.prefetch_end_time_millis.set(t.subsec_millis() as u64); - } - if let Ok(ref t) = now.duration_since(begin_time) { - let elapsed = saturating_duration_millis(t); - self.prefetch_cumulative_time_millis.add(elapsed); - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_request_size_index() { - assert_eq!(request_size_index(0x0), 0); - assert_eq!(request_size_index(0x3ff), 0); - assert_eq!(request_size_index(0x400), 1); - assert_eq!(request_size_index(0xfff), 1); - assert_eq!(request_size_index(0x1000), 2); - assert_eq!(request_size_index(0x3fff), 2); - assert_eq!(request_size_index(0x4000), 3); - assert_eq!(request_size_index(0xffff), 3); - assert_eq!(request_size_index(0x1_0000), 4); - assert_eq!(request_size_index(0x1_ffff), 4); - assert_eq!(request_size_index(0x2_0000), 5); - assert_eq!(request_size_index(0x7_ffff), 5); - assert_eq!(request_size_index(0x8_0000), 6); - assert_eq!(request_size_index(0xf_ffff), 6); - assert_eq!(request_size_index(0x10_0000), 7); - assert_eq!(request_size_index(usize::MAX), 7); - } - - #[test] - fn test_block_read_count() { - let g = FsIoStats::default(); - g.init(); - g.fop_update(StatsFop::Read, 4000, true); - assert_eq!(g.block_count_read[1].count(), 1); - - g.fop_update(StatsFop::Read, 4096, true); - assert_eq!(g.block_count_read[2].count(), 1); - - g.fop_update(StatsFop::Read, 65535, true); - assert_eq!(g.block_count_read[3].count(), 1); - - g.fop_update(StatsFop::Read, 131072, true); - assert_eq!(g.block_count_read[5].count(), 1); - - g.fop_update(StatsFop::Read, 65520, true); - assert_eq!(g.block_count_read[3].count(), 2); - - g.fop_update(StatsFop::Read, 2015520, true); - assert_eq!(g.block_count_read[3].count(), 2); - } - - #[test] - fn test_latency_millis_range_index() { - assert_eq!(latency_millis_range_index(0), 0); - assert_eq!(latency_millis_range_index(1), 0); - assert_eq!(latency_millis_range_index(10), 1); - assert_eq!(latency_millis_range_index(20), 1); - assert_eq!(latency_millis_range_index(40), 2); - assert_eq!(latency_millis_range_index(80), 3); - assert_eq!(latency_millis_range_index(160), 4); - assert_eq!(latency_millis_range_index(320), 4); - assert_eq!(latency_millis_range_index(640), 5); - assert_eq!(latency_millis_range_index(1280), 6); - assert_eq!(latency_millis_range_index(2560), 7); - } - - #[test] - fn test_latency_micros_range_index() { - assert_eq!(latency_micros_range_index(100), 0); - assert_eq!(latency_micros_range_index(500), 1); - assert_eq!(latency_micros_range_index(10_000), 2); - assert_eq!(latency_micros_range_index(30_000), 3); - assert_eq!(latency_micros_range_index(100_000), 4); - assert_eq!(latency_micros_range_index(1_000_000), 5); - assert_eq!(latency_micros_range_index(1_500_000), 6); - assert_eq!(latency_micros_range_index(3_000_000), 7); - } - - #[test] - fn test_inode_stats() { - let stat = InodeIoStats::default(); - stat.stats_fop_inc(StatsFop::Read); - stat.stats_fop_inc(StatsFop::Open); - assert_eq!(stat.fop_hits[StatsFop::Read as usize].count(), 1); - assert_eq!(stat.total_fops.count(), 2); - - stat.stats_cumulative(StatsFop::Open, 1000); - stat.stats_cumulative(StatsFop::Read, 4000); - stat.stats_cumulative(StatsFop::Read, 5000); - - assert_eq!(stat.block_count_read[0].count(), 0); - assert_eq!(stat.block_count_read[1].count(), 1); - assert_eq!(stat.block_count_read[2].count(), 1); - } - - #[test] - fn test_access_pattern() { - let ap = AccessPattern::default(); - ap.record_access_time(); - assert_ne!(ap.first_access_time_secs.load(Ordering::Relaxed), 0); - assert_ne!(ap.first_access_time_nanos.load(Ordering::Relaxed), 0); - } - - #[test] - fn test_file_stats_update() { - let f = FsIoStats::default(); - let node1: Inode = 1; - let node2: Inode = 2; - let node3: Inode = 3; - - f.new_file_counter(node1); - f.new_file_counter(node2); - assert!(f.access_patterns.read().unwrap().is_empty()); - assert!(f.file_counters.read().unwrap().is_empty()); - - f.access_pattern_enabled.store(true, Ordering::Relaxed); - f.files_account_enabled.store(true, Ordering::Relaxed); - f.record_latest_read_files_enabled - .store(true, Ordering::Relaxed); - f.new_file_counter(node1); - f.new_file_counter(node2); - f.file_stats_update(node1, StatsFop::Read, 4000, true); - f.file_stats_update(node1, StatsFop::Read, 5000, true); - f.file_stats_update(node1, StatsFop::Open, 0, true); - f.file_stats_update(node3, StatsFop::Open, 0, true); - assert_eq!( - f.access_patterns - .read() - .unwrap() - .get(&node1) - .unwrap() - .nr_read - .count(), - 2 - ); - assert_eq!( - f.file_counters - .read() - .unwrap() - .get(&node1) - .unwrap() - .fop_hits[StatsFop::Read as usize] - .count(), - 2 - ); - assert!(f.recent_read_files.is_set(node1 as u64)); - } - - #[test] - fn test_fop_update() { - let f = FsIoStats::default(); - assert_eq!(f.nr_opens.count(), 0); - f.fop_update(StatsFop::Open, 0, true); - assert_eq!(f.nr_opens.count(), 1); - f.fop_update(StatsFop::Release, 0, true); - assert_eq!(f.nr_opens.count(), 0); - f.fop_update(StatsFop::Opendir, 0, true); - assert_eq!(f.fop_errors[StatsFop::Opendir as usize].count(), 0); - f.fop_update(StatsFop::Opendir, 0, false); - assert_eq!(f.fop_errors[StatsFop::Opendir as usize].count(), 1); - } - - #[test] - fn test_latecny() { - let f = FsIoStats::default(); - assert_eq!(f.latency_start(), None); - f.measure_latency.store(true, Ordering::Relaxed); - let s = f.latency_start().unwrap(); - let d = Duration::new(1, 500_000_000); - /* because of the timer resolution, the elapsed maybe greater than 1.5s gentlely*/ - f.latency_end(&s.checked_sub(d), StatsFop::Read); - assert_eq!( - f.read_latency_dist[latency_micros_range_index(1_500_000)].count(), - 1 - ); - /* we think if the latency delta error no more 1ms, the test is successful. */ - assert!( - f.fop_cumulative_latency_total[StatsFop::Read as usize].count() - - saturating_duration_micros(&d) - <= 1000 - ); - } - - #[test] - fn test_fs_io_stats_new_and_export() { - let id0: Option = Some("id-0".to_string()); - let id1: Option = Some("id-1".to_string()); - let none: Option = None; - - let _f1 = FsIoStats::new("id-0"); - assert!(export_files_stats(&id0, true).is_ok()); - assert!(export_files_stats(&none, true).is_ok()); - assert!(export_global_stats(&id0).is_ok()); - assert!(export_global_stats(&id1).is_err()); - assert!(export_global_stats(&none).is_ok()); - - let _f2 = FsIoStats::new("id-1"); - assert!(export_files_stats(&none, false).is_err()); - assert!(export_files_stats(&id0, true).is_ok()); - assert!(export_files_stats(&id0, false).is_ok()); - assert!(export_global_stats(&none).is_err()); - assert!(export_files_access_pattern(&id0).is_ok()); - assert!(export_files_access_pattern(&none).is_err()); - - let ios = FsIoStats::default(); - assert!(ios.export_files_access_patterns().is_ok()); - assert!(ios.export_files_stats().is_ok()); - assert!(ios.export_fs_stats().is_ok()); - ios.export_latest_read_files(); - - test_fop_record(); - } - - fn test_fop_record() { - let ios = FsIoStats::new("0"); - let mut recorder = FopRecorder::settle(StatsFop::Read, 0, &ios); - assert!(!recorder.success); - assert_eq!(recorder.size, 0); - - recorder.mark_success(10); - assert!(recorder.success); - assert_eq!(recorder.size, 10); - drop(recorder); - } - - #[test] - fn test_saturating_duration() { - assert_eq!( - saturating_duration_millis(&Duration::from_millis(1234)), - 1234 - ); - assert_eq!( - saturating_duration_micros(&Duration::from_millis(888)), - 888_000 - ); - assert_eq!( - saturating_duration_micros(&Duration::from_millis(1888)), - 1_888_000 - ); - } - - #[test] - fn test_blob_cache_metric() { - let m1: Arc = BlobcacheMetrics::new("id", "path"); - { - let metrics = BLOBCACHE_METRICS.read().unwrap(); - assert_eq!(metrics.len(), 1); - } - assert!(m1.export_metrics().is_ok()); - assert!(m1.release().is_ok()); - { - let metrics = BLOBCACHE_METRICS.read().unwrap(); - assert_eq!(metrics.len(), 0); - } - - let now = SystemTime::now(); - let prev = now.checked_sub(Duration::new(10, 0)).unwrap(); - m1.calculate_prefetch_metrics(prev); - assert_eq!(m1.prefetch_cumulative_time_millis.count(), 10_000); - assert_eq!( - m1.prefetch_end_time_secs.count(), - now.duration_since(SystemTime::UNIX_EPOCH) - .expect("No error") - .as_secs() - ); - - let id0: Option = Some("id-0".to_string()); - let none: Option = None; - BlobcacheMetrics::new("id-0", "t0"); - assert!(export_blobcache_metrics(&id0).is_ok()); - assert!(export_blobcache_metrics(&none).is_ok()); - BlobcacheMetrics::new("id-1", "t1"); - assert!(export_blobcache_metrics(&none).is_err()); - assert!(export_events().is_ok()); - } - - #[test] - fn test_backend_metric() { - let id0: Option = Some("id-0".to_string()); - let id1: Option = Some("id-1".to_string()); - let none: Option = None; - let b0 = BackendMetrics::new("id-0", "t0"); - assert!(export_backend_metrics(&id0).is_ok()); - assert!(export_backend_metrics(&id1).is_err()); - assert!(export_backend_metrics(&none).is_ok()); - let b1 = BackendMetrics::new("id-1", "t1"); - assert!(export_backend_metrics(&id0).is_ok()); - assert!(export_backend_metrics(&id1).is_ok()); - assert!(export_backend_metrics(&none).is_err()); - assert!(b0.release().is_ok()); - assert!(b1.release().is_ok()); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Nydus error events and performance related metrics. +//! +//! There are several types of metrics supported: +//! - Global error events of type [`ErrorHolder`] +//! - Storage backend metrics of type ['BackendMetrics'] +//! - Blobcache metrics of type ['BlobcacheMetrics'] +//! - Filesystem metrics of type ['FsIoStats`], supported by Rafs in fuse/virtiofs only. + +use std::collections::{HashMap, HashSet}; +use std::ops::{Deref, Drop}; +use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, AtomicUsize, Ordering}; +use std::sync::{Arc, Mutex, RwLock}; +use std::time::{Duration, SystemTime}; + +use nydus_api::http::MetricsError; + +use crate::logger::ErrorHolder; +use crate::InodeBitmap; + +/// Type of `inode`. +pub type Inode = u64; + +/// Type of file operation statistics counter. +#[derive(PartialEq, Copy, Clone)] +pub enum StatsFop { + Getattr, + Readlink, + Open, + Release, + Read, + Statfs, + Getxattr, + Listxattr, + Opendir, + Lookup, + Readdir, + Readdirplus, + Access, + Forget, + BatchForget, + Max, +} + +type IoStatsResult = Result; + +// Block size separated counters. +// [0-3]: <1K;1K~;4K~;16K~; +// [5-7]: 64K~;128K~;512K~;1M~ +const BLOCK_READ_SIZES_MAX: usize = 8; + +#[inline] +fn request_size_index(size: usize) -> usize { + let ceil = (size >> 10).leading_zeros(); + let shift = (std::cmp::max(ceil, 53) - 53) << 2; + + (0x0112_2334_5567u64 >> shift) as usize & 0xf +} + +// <=1ms, <=20ms, <=50ms, <=100ms, <=500ms, <=1s, <=2s, >2s +const READ_LATENCY_RANGE_MAX: usize = 8; + +fn latency_millis_range_index(elapsed: u64) -> usize { + match elapsed { + _ if elapsed <= 1 => 0, + _ if elapsed <= 20 => 1, + _ if elapsed <= 50 => 2, + _ if elapsed <= 100 => 3, + _ if elapsed <= 500 => 4, + _ if elapsed <= 1000 => 5, + _ if elapsed <= 2000 => 6, + _ => 7, + } +} + +// <=200us, <=1ms, <=20ms, <=50ms, <=500ms, <=1s, <=2s, >2s +fn latency_micros_range_index(elapsed: u64) -> usize { + match elapsed { + _ if elapsed <= 200 => 0, + _ if elapsed <= 1_000 => 1, + _ if elapsed <= 20_000 => 2, + _ if elapsed <= 50_000 => 3, + _ if elapsed <= 500_000 => 4, + _ if elapsed <= 1_000_000 => 5, + _ if elapsed <= 2_000_000 => 6, + _ => 7, + } +} + +// Defining below global static metrics set so that a specific metrics counter can +// be found as per the rafs backend mountpoint/id. Remind that nydusd can have +// multiple backends mounted. +lazy_static! { + static ref FS_METRICS: RwLock>> = Default::default(); +} + +lazy_static! { + static ref BACKEND_METRICS: RwLock>> = Default::default(); +} + +lazy_static! { + static ref BLOBCACHE_METRICS: RwLock>> = + Default::default(); +} + +lazy_static! { + pub static ref ERROR_HOLDER: Arc> = + Arc::new(Mutex::new(ErrorHolder::new(500, 50 * 1024))); +} + +/// Trait to manipulate per inode statistics metrics. +pub trait InodeStatsCounter { + fn stats_fop_inc(&self, fop: StatsFop); + fn stats_fop_err_inc(&self, fop: StatsFop); + fn stats_cumulative(&self, fop: StatsFop, value: usize); +} + +/// Per inode io statistics metrics. +#[derive(Default, Debug, Serialize)] +pub struct InodeIoStats { + total_fops: BasicMetric, + data_read: BasicMetric, + // Cumulative bytes for different block size. + block_count_read: [BasicMetric; BLOCK_READ_SIZES_MAX], + fop_hits: [BasicMetric; StatsFop::Max as usize], + fop_errors: [BasicMetric; StatsFop::Max as usize], +} + +impl InodeStatsCounter for InodeIoStats { + fn stats_fop_inc(&self, fop: StatsFop) { + self.fop_hits[fop as usize].inc(); + self.total_fops.inc(); + } + + fn stats_fop_err_inc(&self, fop: StatsFop) { + self.fop_errors[fop as usize].inc(); + } + + fn stats_cumulative(&self, fop: StatsFop, value: usize) { + if fop == StatsFop::Read { + self.data_read.add(value as u64); + // Put counters into $BLOCK_READ_COUNT_MAX catagories + // 1K; 4K; 16K; 64K, 128K, 512K, 1M + let idx = request_size_index(value); + self.block_count_read[idx].inc(); + } + } +} + +/// Records how a file is accessed. +/// For security sake, each file can associate an access pattern recorder, which +/// is globally configured through nydusd configuration file. +/// For now, the pattern is composed of: +/// 1. How many times a file is read regardless of io block size and request offset. +/// And this counter can not be cleared. +/// 2. First time point at which this file is read. It's wall-time in unit of seconds. +/// 3. File path relative to current rafs root. +/// +/// Yes, we now don't have an abundant pattern recorder now. It can be negotiated in the +/// future about how to enrich it. +/// +#[derive(Default, Debug, Serialize)] +pub struct AccessPattern { + ino: u64, + nr_read: BasicMetric, + /// In unit of seconds. + first_access_time_secs: AtomicU64, + first_access_time_nanos: AtomicU32, +} + +impl AccessPattern { + fn record_access_time(&self) { + if self.first_access_time_secs.load(Ordering::Relaxed) == 0 { + let t = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap(); + self.first_access_time_secs + .store(t.as_secs(), Ordering::Relaxed); + self.first_access_time_nanos + .store(t.subsec_nanos(), Ordering::Relaxed); + } + } +} + +/// Filesystem level statistics and metrics. +/// +/// Currently only Rafs in Fuse/Virtiofs mode supports filesystem level statistics and metrics. +#[derive(Default, Debug, Serialize)] +pub struct FsIoStats { + // Whether to enable each file accounting switch. + // As fop accounting might consume much memory space, it is disabled by default. + // But global fop accounting is always working within each Rafs. + files_account_enabled: AtomicBool, + access_pattern_enabled: AtomicBool, + record_latest_read_files_enabled: AtomicBool, + // Flag to enable record operation latency. + measure_latency: AtomicBool, + + id: String, + // Total number of files that are currently open. + nr_opens: BasicMetric, + // Total bytes read against the filesystem. + data_read: BasicMetric, + // Cumulative bytes for different block size. + block_count_read: [BasicMetric; BLOCK_READ_SIZES_MAX], + // Counters for successful various file operations. + fop_hits: [BasicMetric; StatsFop::Max as usize], + // Counters for failed file operations. + fop_errors: [BasicMetric; StatsFop::Max as usize], + + // Cumulative latency's life cycle is equivalent to Rafs, unlike incremental + // latency which will be cleared each time dumped. Unit as micro-seconds. + // * @total means io_stats simply adds every fop latency to the counter which is never cleared. + // It is useful for other tools to calculate their metrics report. + fop_cumulative_latency_total: [BasicMetric; StatsFop::Max as usize], + // Record how many times read latency drops to the ranges. + // This helps us to understand the io service time stability. + read_latency_dist: [BasicMetric; READ_LATENCY_RANGE_MAX], + + // Rwlock closes the race that more than one threads are creating counters concurrently. + #[serde(skip_serializing, skip_deserializing)] + file_counters: RwLock>>, + #[serde(skip_serializing, skip_deserializing)] + access_patterns: RwLock>>, + // record regular file read + #[serde(skip_serializing, skip_deserializing)] + recent_read_files: InodeBitmap, +} + +macro_rules! impl_iostat_option { + ($get:ident, $set:ident, $opt:ident) => { + #[inline] + fn $get(&self) -> bool { + self.$opt.load(Ordering::Relaxed) + } + + #[inline] + pub fn $set(&self, switch: bool) { + self.$opt.store(switch, Ordering::Relaxed) + } + }; +} + +impl FsIoStats { + /// Create a new instance of [`FsIoStats`] for filesystem `id`. + pub fn new(id: &str) -> Arc { + let c = Arc::new(FsIoStats { + id: id.to_string(), + ..Default::default() + }); + FS_METRICS + .write() + .unwrap() + .insert(id.to_string(), c.clone()); + c.init(); + c + } + + /// Initialize the [`FsIoStats`] object. + pub fn init(&self) { + self.files_account_enabled.store(false, Ordering::Relaxed); + self.measure_latency.store(true, Ordering::Relaxed); + } + + impl_iostat_option!(files_enabled, toggle_files_recording, files_account_enabled); + impl_iostat_option!( + access_pattern_enabled, + toggle_access_pattern, + access_pattern_enabled + ); + impl_iostat_option!( + record_latest_read_files_enabled, + toggle_latest_read_files_recording, + record_latest_read_files_enabled + ); + + /// Prepare for recording statistics information about `ino`. + pub fn new_file_counter(&self, ino: Inode) { + if self.files_enabled() { + let mut counters = self.file_counters.write().unwrap(); + if counters.get(&ino).is_none() { + counters.insert(ino, Arc::new(InodeIoStats::default())); + } + } + + if self.access_pattern_enabled() { + let mut records = self.access_patterns.write().unwrap(); + if records.get(&ino).is_none() { + records.insert( + ino, + Arc::new(AccessPattern { + ino, + ..Default::default() + }), + ); + } + } + } + + fn file_stats_update(&self, ino: Inode, fop: StatsFop, bsize: usize, success: bool) { + self.fop_update(fop, bsize, success); + + if self.files_enabled() { + let counters = self.file_counters.read().unwrap(); + match counters.get(&ino) { + Some(c) => { + c.stats_fop_inc(fop); + c.stats_cumulative(fop, bsize); + } + None => warn!("No iostats counter for file {}", ino), + } + } + + if self.access_pattern_enabled() && fop == StatsFop::Read { + let records = self.access_patterns.read().unwrap(); + match records.get(&ino) { + Some(r) => { + r.nr_read.inc(); + r.record_access_time(); + } + None => warn!("No pattern record for file {}", ino), + } + } + + if self.record_latest_read_files_enabled() && fop == StatsFop::Read && success { + self.recent_read_files.set(ino); + } + } + + fn fop_update(&self, fop: StatsFop, value: usize, success: bool) { + // Linux kernel no longer splits IO into sizes smaller than 128K. + // So 512K and 1M is added. + // We put block count into 5 catagories e.g. 1K; 4K; 16K; 64K; 128K; 512K; 1M + if fop == StatsFop::Read { + let idx = request_size_index(value); + self.block_count_read[idx].inc() + } + + if success { + self.fop_hits[fop as usize].inc(); + match fop { + StatsFop::Read => self.data_read.add(value as u64), + StatsFop::Open => self.nr_opens.inc(), + StatsFop::Release => self.nr_opens.dec(), + _ => (), + }; + } else { + self.fop_errors[fop as usize].inc(); + } + } + + /// Mark starting of filesystem operation. + pub fn latency_start(&self) -> Option { + if !self.measure_latency.load(Ordering::Relaxed) { + return None; + } + + Some(SystemTime::now()) + } + + /// Mark ending of filesystem operation and record statistics. + pub fn latency_end(&self, start: &Option, fop: StatsFop) { + if let Some(start) = start { + if let Ok(d) = SystemTime::elapsed(start) { + let elapsed = saturating_duration_micros(&d); + self.read_latency_dist[latency_micros_range_index(elapsed)].inc(); + self.fop_cumulative_latency_total[fop as usize].add(elapsed); + } + } + } + + fn export_files_stats(&self) -> Result { + serde_json::to_string( + self.file_counters + .read() + .expect("Not expect poisoned lock") + .deref(), + ) + .map_err(MetricsError::Serialize) + } + + fn export_latest_read_files(&self) -> String { + serde_json::json!(self.recent_read_files.bitmap_to_array_and_clear()).to_string() + } + + fn export_files_access_patterns(&self) -> Result { + serde_json::to_string( + &self + .access_patterns + .read() + .expect("Not poisoned lock") + .deref() + .values() + .filter(|r| r.nr_read.count() != 0) + .collect::>>(), + ) + .map_err(MetricsError::Serialize) + } + + fn export_fs_stats(&self) -> Result { + serde_json::to_string(self).map_err(MetricsError::Serialize) + } +} + +/// Guard object to record file operation metrics associated with an inode. +/// +/// Call its `settle()` method to generate an on-stack recorder. +/// If the operation succeeds, call `mark_success()` to change the recorder's internal state. +/// If the operation fails, its internal state will not be changed. +/// Finally, when the recorder is being destroyed, iostats counter will be updated. +pub struct FopRecorder<'a> { + fop: StatsFop, + inode: u64, + success: bool, + // Now, the size only makes sense for `Read` FOP. + size: usize, + ios: &'a FsIoStats, +} + +impl<'a> Drop for FopRecorder<'a> { + fn drop(&mut self) { + self.ios + .file_stats_update(self.inode, self.fop, self.size, self.success); + } +} + +impl<'a> FopRecorder<'a> { + /// Create a guard object for filesystem operation `fop` associated with `inode`. + pub fn settle(fop: StatsFop, inode: u64, ios: &'a T) -> Self + where + T: AsRef, + { + FopRecorder { + fop, + inode, + success: false, + size: 0, + ios: ios.as_ref(), + } + } + + /// Mark operation as success. + pub fn mark_success(&mut self, size: usize) { + self.success = true; + self.size = size; + } +} + +/// Export file metrics of a filesystem. +pub fn export_files_stats( + name: &Option, + latest_read_files: bool, +) -> Result { + let fs_metrics = FS_METRICS.read().unwrap(); + + match name { + Some(k) => fs_metrics.get(k).ok_or(MetricsError::NoCounter).map(|v| { + if !latest_read_files { + v.export_files_stats() + } else { + Ok(v.export_latest_read_files()) + } + })?, + None => { + if fs_metrics.len() == 1 { + if let Some(ios) = fs_metrics.values().next() { + return if !latest_read_files { + ios.export_files_stats() + } else { + Ok(ios.export_latest_read_files()) + }; + } + } + Err(MetricsError::NoCounter) + } + } +} + +/// Export file access pattern of a filesystem. +pub fn export_files_access_pattern(name: &Option) -> Result { + let fs_metrics = FS_METRICS.read().unwrap(); + match name { + Some(k) => fs_metrics + .get(k) + .ok_or(MetricsError::NoCounter) + .map(|v| v.export_files_access_patterns())?, + None => { + if fs_metrics.len() == 1 { + if let Some(ios) = fs_metrics.values().next() { + return ios.export_files_access_patterns(); + } + } + Err(MetricsError::NoCounter) + } + } +} + +/// Export filesystem metrics. +pub fn export_global_stats(name: &Option) -> Result { + // With only one rafs instance, we allow caller to ask for an unknown ios name. + let fs_metrics = FS_METRICS.read().unwrap(); + + match name { + Some(k) => fs_metrics + .get(k) + .ok_or(MetricsError::NoCounter) + .map(|v| v.export_fs_stats())?, + None => { + if fs_metrics.len() == 1 { + if let Some(ios) = fs_metrics.values().next() { + return ios.export_fs_stats(); + } + } + Err(MetricsError::NoCounter) + } + } +} + +/// Export storage backend metrics. +pub fn export_backend_metrics(name: &Option) -> IoStatsResult { + let metrics = BACKEND_METRICS.read().unwrap(); + + match name { + Some(k) => metrics + .get(k) + .ok_or(MetricsError::NoCounter) + .map(|v| v.export_metrics())?, + None => { + if metrics.len() == 1 { + if let Some(m) = metrics.values().next() { + return m.export_metrics(); + } + } + Err(MetricsError::NoCounter) + } + } +} + +/// Export blob cache metircs. +pub fn export_blobcache_metrics(id: &Option) -> IoStatsResult { + let metrics = BLOBCACHE_METRICS.read().unwrap(); + + match id { + Some(k) => metrics + .get(k) + .ok_or(MetricsError::NoCounter) + .map(|v| v.export_metrics())?, + None => { + if metrics.len() == 1 { + if let Some(m) = metrics.values().next() { + return m.export_metrics(); + } + } + Err(MetricsError::NoCounter) + } + } +} + +/// Export global error events. +pub fn export_events() -> IoStatsResult { + serde_json::to_string(ERROR_HOLDER.lock().unwrap().deref()).map_err(MetricsError::Serialize) +} + +/// Trait to manipulate metric counters. +pub trait Metric { + /// Adds `value` to the current counter. + fn add(&self, value: u64); + /// Increments by 1 unit the current counter. + fn inc(&self) { + self.add(1); + } + /// Returns current value of the counter. + fn count(&self) -> u64; + /// Subtract `value` from the current counter. + fn sub(&self, value: u64); + /// Decrease the current counter. + fn dec(&self) { + self.sub(1); + } + + fn set(&self, value: u64); +} + +/// Basic 64-bit metric counter. +#[derive(Default, Serialize, Debug)] +pub struct BasicMetric(AtomicU64); + +impl Metric for BasicMetric { + fn add(&self, value: u64) { + self.0.fetch_add(value, Ordering::Relaxed); + } + + fn count(&self) -> u64 { + self.0.load(Ordering::Relaxed) + } + + fn sub(&self, value: u64) { + self.0.fetch_sub(value, Ordering::Relaxed); + } + + fn set(&self, value: u64) { + self.0.store(value, Ordering::Relaxed); + } +} + +/// Metrics for storage backends. +#[derive(Default, Serialize, Debug)] +pub struct BackendMetrics { + #[serde(skip_serializing, skip_deserializing)] + id: String, + // type of storage backend. + backend_type: String, + // Cumulative count of read request to backend + read_count: BasicMetric, + // Cumulative count of read failure to backend + read_errors: BasicMetric, + // Cumulative amount of data from to backend in unit of Byte. External tools + // are responsible for calculating BPS from this field. + read_amount_total: BasicMetric, + // In unit of millisecond + read_cumulative_latency_millis_total: BasicMetric, + read_cumulative_latency_millis_dist: [BasicMetric; BLOCK_READ_SIZES_MAX], + read_count_block_size_dist: [BasicMetric; BLOCK_READ_SIZES_MAX], + // Categorize metrics as per their latency and request size + read_latency_sizes_dist: [[BasicMetric; READ_LATENCY_RANGE_MAX]; BLOCK_READ_SIZES_MAX], +} + +impl BackendMetrics { + /// Create a [`BackendMetrics`] object for a storage backend. + pub fn new(id: &str, backend_type: &str) -> Arc { + let backend_metrics = Arc::new(Self { + id: id.to_string(), + backend_type: backend_type.to_string(), + ..Default::default() + }); + + BACKEND_METRICS + .write() + .unwrap() + .insert(id.to_string(), backend_metrics.clone()); + + backend_metrics + } + + /// Release a [`BackendMetrics`] object for a storage backend. + pub fn release(&self) -> IoStatsResult<()> { + BACKEND_METRICS + .write() + .unwrap() + .remove(&self.id) + .map(|_| ()) + .ok_or(MetricsError::NoCounter) + } + + /// Mark starting of an IO operations. + pub fn begin(&self) -> SystemTime { + SystemTime::now() + } + + /// Mark ending of an IO operations. + pub fn end(&self, begin: &SystemTime, size: usize, error: bool) { + if let Ok(d) = SystemTime::elapsed(begin) { + let elapsed = saturating_duration_millis(&d); + + self.read_count.inc(); + if error { + self.read_errors.inc(); + } + + self.read_cumulative_latency_millis_total.add(elapsed); + self.read_amount_total.add(size as u64); + let lat_idx = latency_millis_range_index(elapsed); + let size_idx = request_size_index(size); + self.read_cumulative_latency_millis_dist[size_idx].add(elapsed); + self.read_count_block_size_dist[size_idx].inc(); + self.read_latency_sizes_dist[size_idx][lat_idx].inc(); + } + } + + fn export_metrics(&self) -> IoStatsResult { + serde_json::to_string(self).map_err(MetricsError::Serialize) + } +} + +// This function assumes that the counted duration won't be too long. +fn saturating_duration_millis(d: &Duration) -> u64 { + let d_secs = d.as_secs(); + if d_secs == 0 { + d.subsec_millis() as u64 + } else { + d_secs + .saturating_mul(1000) + .saturating_add(d.subsec_millis() as u64) + } +} + +fn saturating_duration_micros(d: &Duration) -> u64 { + let d_secs = d.as_secs(); + if d_secs == 0 { + d.subsec_micros() as u64 + } else { + d_secs + .saturating_mul(1_000_000) + .saturating_add(d.subsec_micros() as u64) + } +} + +#[derive(Debug, Default, Serialize)] +pub struct BlobcacheMetrics { + #[serde(skip_serializing, skip_deserializing)] + id: String, + // Prefer to let external tool get file's state like file size and disk usage. + // Because stat(2) file may get blocked. + // It should include the real blob cache file names, so that the external GC + // process can handle it directly. + pub underlying_files: Mutex>, + pub store_path: String, + // Cache hit percentage = (partial_hits + whole_hits) / total + pub partial_hits: BasicMetric, + pub whole_hits: BasicMetric, + // How many `read` requests are processed by the blobcache instance. + // This metric will be helpful when comparing with cache hits times. + pub total: BasicMetric, + // Scale of blobcache. Blobcache does not evict entries. + // Means the number of chunks in ready status. + pub entries_count: BasicMetric, + // Together with below two fields, we can figure out average merging size thus + // to estimate the possibility to merge backend IOs. + // In unit of Bytes + pub prefetch_data_amount: BasicMetric, + // Total prefetch requests issued from storage/blobs or rafs filesystem layer for each file that needs prefetch + pub prefetch_requests_count: BasicMetric, + pub prefetch_workers: AtomicUsize, + pub prefetch_unmerged_chunks: BasicMetric, + // Cumulative time latencies of each prefetch request which can be handled in parallel. + // It starts when the request is born including nydusd processing and schedule and end when the chunk is downloaded and stored. + // Then the average prefetch latency can be calculated by + // `prefetch_cumulative_time_millis / prefetch_requests_count` + pub prefetch_cumulative_time_millis: BasicMetric, + // The time seconds part when nydusd begins to prefetch + // We can calculate prefetch average bandwidth by + // `prefetch_data_amount / (prefetch_end_time_secs - prefetch_begin_time_secs)`. Note, it does not take milliseconds into account yet.s + pub prefetch_begin_time_secs: BasicMetric, + // The time milliseconds part when nydusd begins to prefetch + pub prefetch_begin_time_millis: BasicMetric, + // The time seconds part when nydusd ends prefetching + pub prefetch_end_time_secs: BasicMetric, + // The time milliseconds part when nydusd ends prefetching + pub prefetch_end_time_millis: BasicMetric, + pub buffered_backend_size: BasicMetric, + pub data_all_ready: AtomicBool, +} + +impl BlobcacheMetrics { + /// Create a [`BlobcacheMetrics`] object for a blob cache manager. + pub fn new(id: &str, store_path: &str) -> Arc { + let metrics = Arc::new(Self { + id: id.to_string(), + store_path: store_path.to_string(), + ..Default::default() + }); + + // Old metrics will be dropped when BlobCache is swapped. So we don't + // have to worry about swapping its metrics either which means it's + // not necessary to release metrics recorder when blobcache is dropped due to swapping. + BLOBCACHE_METRICS + .write() + .unwrap() + .insert(id.to_string(), metrics.clone()); + + metrics + } + + /// Release a [`BlobcacheMetrics`] object for a blob cache manager. + pub fn release(&self) -> IoStatsResult<()> { + BLOBCACHE_METRICS + .write() + .unwrap() + .remove(&self.id) + .map(|_| ()) + .ok_or(MetricsError::NoCounter) + } + + /// Export blobcache metric information. + pub fn export_metrics(&self) -> IoStatsResult { + serde_json::to_string(self).map_err(MetricsError::Serialize) + } + + pub fn calculate_prefetch_metrics(&self, begin_time: SystemTime) { + let now = SystemTime::now(); + if let Ok(ref t) = now.duration_since(SystemTime::UNIX_EPOCH) { + self.prefetch_end_time_secs.set(t.as_secs()); + self.prefetch_end_time_millis.set(t.subsec_millis() as u64); + } + if let Ok(ref t) = now.duration_since(begin_time) { + let elapsed = saturating_duration_millis(t); + self.prefetch_cumulative_time_millis.add(elapsed); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_request_size_index() { + assert_eq!(request_size_index(0x0), 0); + assert_eq!(request_size_index(0x3ff), 0); + assert_eq!(request_size_index(0x400), 1); + assert_eq!(request_size_index(0xfff), 1); + assert_eq!(request_size_index(0x1000), 2); + assert_eq!(request_size_index(0x3fff), 2); + assert_eq!(request_size_index(0x4000), 3); + assert_eq!(request_size_index(0xffff), 3); + assert_eq!(request_size_index(0x1_0000), 4); + assert_eq!(request_size_index(0x1_ffff), 4); + assert_eq!(request_size_index(0x2_0000), 5); + assert_eq!(request_size_index(0x7_ffff), 5); + assert_eq!(request_size_index(0x8_0000), 6); + assert_eq!(request_size_index(0xf_ffff), 6); + assert_eq!(request_size_index(0x10_0000), 7); + assert_eq!(request_size_index(usize::MAX), 7); + } + + #[test] + fn test_block_read_count() { + let g = FsIoStats::default(); + g.init(); + g.fop_update(StatsFop::Read, 4000, true); + assert_eq!(g.block_count_read[1].count(), 1); + + g.fop_update(StatsFop::Read, 4096, true); + assert_eq!(g.block_count_read[2].count(), 1); + + g.fop_update(StatsFop::Read, 65535, true); + assert_eq!(g.block_count_read[3].count(), 1); + + g.fop_update(StatsFop::Read, 131072, true); + assert_eq!(g.block_count_read[5].count(), 1); + + g.fop_update(StatsFop::Read, 65520, true); + assert_eq!(g.block_count_read[3].count(), 2); + + g.fop_update(StatsFop::Read, 2015520, true); + assert_eq!(g.block_count_read[3].count(), 2); + } + + #[test] + fn test_latency_millis_range_index() { + assert_eq!(latency_millis_range_index(0), 0); + assert_eq!(latency_millis_range_index(1), 0); + assert_eq!(latency_millis_range_index(10), 1); + assert_eq!(latency_millis_range_index(20), 1); + assert_eq!(latency_millis_range_index(40), 2); + assert_eq!(latency_millis_range_index(80), 3); + assert_eq!(latency_millis_range_index(160), 4); + assert_eq!(latency_millis_range_index(320), 4); + assert_eq!(latency_millis_range_index(640), 5); + assert_eq!(latency_millis_range_index(1280), 6); + assert_eq!(latency_millis_range_index(2560), 7); + } + + #[test] + fn test_latency_micros_range_index() { + assert_eq!(latency_micros_range_index(100), 0); + assert_eq!(latency_micros_range_index(500), 1); + assert_eq!(latency_micros_range_index(10_000), 2); + assert_eq!(latency_micros_range_index(30_000), 3); + assert_eq!(latency_micros_range_index(100_000), 4); + assert_eq!(latency_micros_range_index(1_000_000), 5); + assert_eq!(latency_micros_range_index(1_500_000), 6); + assert_eq!(latency_micros_range_index(3_000_000), 7); + } + + #[test] + fn test_inode_stats() { + let stat = InodeIoStats::default(); + stat.stats_fop_inc(StatsFop::Read); + stat.stats_fop_inc(StatsFop::Open); + assert_eq!(stat.fop_hits[StatsFop::Read as usize].count(), 1); + assert_eq!(stat.total_fops.count(), 2); + + stat.stats_cumulative(StatsFop::Open, 1000); + stat.stats_cumulative(StatsFop::Read, 4000); + stat.stats_cumulative(StatsFop::Read, 5000); + + assert_eq!(stat.block_count_read[0].count(), 0); + assert_eq!(stat.block_count_read[1].count(), 1); + assert_eq!(stat.block_count_read[2].count(), 1); + } + + #[test] + fn test_access_pattern() { + let ap = AccessPattern::default(); + ap.record_access_time(); + assert_ne!(ap.first_access_time_secs.load(Ordering::Relaxed), 0); + assert_ne!(ap.first_access_time_nanos.load(Ordering::Relaxed), 0); + } + + #[test] + fn test_file_stats_update() { + let f = FsIoStats::default(); + let node1: Inode = 1; + let node2: Inode = 2; + let node3: Inode = 3; + + f.new_file_counter(node1); + f.new_file_counter(node2); + assert!(f.access_patterns.read().unwrap().is_empty()); + assert!(f.file_counters.read().unwrap().is_empty()); + + f.access_pattern_enabled.store(true, Ordering::Relaxed); + f.files_account_enabled.store(true, Ordering::Relaxed); + f.record_latest_read_files_enabled + .store(true, Ordering::Relaxed); + f.new_file_counter(node1); + f.new_file_counter(node2); + f.file_stats_update(node1, StatsFop::Read, 4000, true); + f.file_stats_update(node1, StatsFop::Read, 5000, true); + f.file_stats_update(node1, StatsFop::Open, 0, true); + f.file_stats_update(node3, StatsFop::Open, 0, true); + assert_eq!( + f.access_patterns + .read() + .unwrap() + .get(&node1) + .unwrap() + .nr_read + .count(), + 2 + ); + assert_eq!( + f.file_counters + .read() + .unwrap() + .get(&node1) + .unwrap() + .fop_hits[StatsFop::Read as usize] + .count(), + 2 + ); + assert!(f.recent_read_files.is_set(node1 as u64)); + } + + #[test] + fn test_fop_update() { + let f = FsIoStats::default(); + assert_eq!(f.nr_opens.count(), 0); + f.fop_update(StatsFop::Open, 0, true); + assert_eq!(f.nr_opens.count(), 1); + f.fop_update(StatsFop::Release, 0, true); + assert_eq!(f.nr_opens.count(), 0); + f.fop_update(StatsFop::Opendir, 0, true); + assert_eq!(f.fop_errors[StatsFop::Opendir as usize].count(), 0); + f.fop_update(StatsFop::Opendir, 0, false); + assert_eq!(f.fop_errors[StatsFop::Opendir as usize].count(), 1); + } + + #[test] + fn test_latecny() { + let f = FsIoStats::default(); + assert_eq!(f.latency_start(), None); + f.measure_latency.store(true, Ordering::Relaxed); + let s = f.latency_start().unwrap(); + let d = Duration::new(1, 500_000_000); + /* because of the timer resolution, the elapsed maybe greater than 1.5s gentlely*/ + f.latency_end(&s.checked_sub(d), StatsFop::Read); + assert_eq!( + f.read_latency_dist[latency_micros_range_index(1_500_000)].count(), + 1 + ); + /* we think if the latency delta error no more 1ms, the test is successful. */ + assert!( + f.fop_cumulative_latency_total[StatsFop::Read as usize].count() + - saturating_duration_micros(&d) + <= 1000 + ); + } + + #[test] + fn test_fs_io_stats_new_and_export() { + let id0: Option = Some("id-0".to_string()); + let id1: Option = Some("id-1".to_string()); + let none: Option = None; + + let _f1 = FsIoStats::new("id-0"); + assert!(export_files_stats(&id0, true).is_ok()); + assert!(export_files_stats(&none, true).is_ok()); + assert!(export_global_stats(&id0).is_ok()); + assert!(export_global_stats(&id1).is_err()); + assert!(export_global_stats(&none).is_ok()); + + let _f2 = FsIoStats::new("id-1"); + assert!(export_files_stats(&none, false).is_err()); + assert!(export_files_stats(&id0, true).is_ok()); + assert!(export_files_stats(&id0, false).is_ok()); + assert!(export_global_stats(&none).is_err()); + assert!(export_files_access_pattern(&id0).is_ok()); + assert!(export_files_access_pattern(&none).is_err()); + + let ios = FsIoStats::default(); + assert!(ios.export_files_access_patterns().is_ok()); + assert!(ios.export_files_stats().is_ok()); + assert!(ios.export_fs_stats().is_ok()); + ios.export_latest_read_files(); + + test_fop_record(); + } + + fn test_fop_record() { + let ios = FsIoStats::new("0"); + let mut recorder = FopRecorder::settle(StatsFop::Read, 0, &ios); + assert!(!recorder.success); + assert_eq!(recorder.size, 0); + + recorder.mark_success(10); + assert!(recorder.success); + assert_eq!(recorder.size, 10); + drop(recorder); + } + + #[test] + fn test_saturating_duration() { + assert_eq!( + saturating_duration_millis(&Duration::from_millis(1234)), + 1234 + ); + assert_eq!( + saturating_duration_micros(&Duration::from_millis(888)), + 888_000 + ); + assert_eq!( + saturating_duration_micros(&Duration::from_millis(1888)), + 1_888_000 + ); + } + + #[test] + fn test_blob_cache_metric() { + let m1: Arc = BlobcacheMetrics::new("id", "path"); + { + let metrics = BLOBCACHE_METRICS.read().unwrap(); + assert_eq!(metrics.len(), 1); + } + assert!(m1.export_metrics().is_ok()); + assert!(m1.release().is_ok()); + { + let metrics = BLOBCACHE_METRICS.read().unwrap(); + assert_eq!(metrics.len(), 0); + } + + let now = SystemTime::now(); + let prev = now.checked_sub(Duration::new(10, 0)).unwrap(); + m1.calculate_prefetch_metrics(prev); + assert_eq!(m1.prefetch_cumulative_time_millis.count(), 10_000); + assert_eq!( + m1.prefetch_end_time_secs.count(), + now.duration_since(SystemTime::UNIX_EPOCH) + .expect("No error") + .as_secs() + ); + + let id0: Option = Some("id-0".to_string()); + let none: Option = None; + BlobcacheMetrics::new("id-0", "t0"); + assert!(export_blobcache_metrics(&id0).is_ok()); + assert!(export_blobcache_metrics(&none).is_ok()); + BlobcacheMetrics::new("id-1", "t1"); + assert!(export_blobcache_metrics(&none).is_err()); + assert!(export_events().is_ok()); + } + + #[test] + fn test_backend_metric() { + let id0: Option = Some("id-0".to_string()); + let id1: Option = Some("id-1".to_string()); + let none: Option = None; + let b0 = BackendMetrics::new("id-0", "t0"); + assert!(export_backend_metrics(&id0).is_ok()); + assert!(export_backend_metrics(&id1).is_err()); + assert!(export_backend_metrics(&none).is_ok()); + let b1 = BackendMetrics::new("id-1", "t1"); + assert!(export_backend_metrics(&id0).is_ok()); + assert!(export_backend_metrics(&id1).is_ok()); + assert!(export_backend_metrics(&none).is_err()); + assert!(b0.release().is_ok()); + assert!(b1.release().is_ok()); + } +} diff --git a/utils/src/mpmc.rs b/utils/src/mpmc.rs index ba5da1eafba..ba74ea55d8f 100644 --- a/utils/src/mpmc.rs +++ b/utils/src/mpmc.rs @@ -1,172 +1,172 @@ -// Copyright (C) 2022 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Asynchronous Multi-Producer Multi-Consumer channel. -//! -//! This module provides an asynchronous multi-producer multi-consumer channel based on [tokio::sync::Notify]. - -use std::collections::VecDeque; -use std::io::{Error, ErrorKind, Result}; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::{Mutex, MutexGuard}; -use tokio::sync::Notify; - -/// An asynchronous multi-producer multi-consumer channel based on [tokio::sync::Notify]. -pub struct Channel { - closed: AtomicBool, - notifier: Notify, - requests: Mutex>, -} - -impl Default for Channel { - fn default() -> Self { - Self::new() - } -} - -impl Channel { - /// Create a new instance of [`Channel`]. - pub fn new() -> Self { - Channel { - closed: AtomicBool::new(false), - notifier: Notify::new(), - requests: Mutex::new(VecDeque::new()), - } - } - - /// Close the channel. - pub fn close(&self) { - self.closed.store(true, Ordering::Release); - self.notifier.notify_waiters(); - } - - /// Send a message to the channel. - /// - /// The message object will be returned on error, to ease the lifecycle management. - pub fn send(&self, msg: T) -> std::result::Result<(), T> { - if self.closed.load(Ordering::Acquire) { - Err(msg) - } else { - self.requests.lock().unwrap().push_back(msg); - self.notifier.notify_one(); - Ok(()) - } - } - - /// Try to receive a message from the channel. - pub fn try_recv(&self) -> Option { - self.requests.lock().unwrap().pop_front() - } - - /// Receive message from the channel in asynchronous mode. - pub async fn recv(&self) -> Result { - let future = self.notifier.notified(); - tokio::pin!(future); - - loop { - // Make sure that no wakeup is lost if we get `None` from `try_recv`. - future.as_mut().enable(); - - if let Some(msg) = self.try_recv() { - return Ok(msg); - } else if self.closed.load(Ordering::Acquire) { - return Err(Error::new(ErrorKind::BrokenPipe, "channel has been closed")); - } - - // Wait for a call to `notify_one`. - // - // This uses `.as_mut()` to avoid consuming the future, - // which lets us call `Pin::set` below. - future.as_mut().await; - - // Reset the future in case another call to `try_recv` got the message before us. - future.set(self.notifier.notified()); - } - } - - /// Flush all pending requests specified by the predicator. - /// - pub fn flush_pending_prefetch_requests(&self, mut f: F) - where - F: FnMut(&T) -> bool, - { - self.requests.lock().unwrap().retain(|t| !f(t)); - } - - /// Lock the channel to block all queue operations. - pub fn lock_channel(&self) -> MutexGuard> { - self.requests.lock().unwrap() - } - - /// Notify all waiters. - pub fn notify_waiters(&self) { - self.notifier.notify_waiters(); - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::sync::Arc; - - #[test] - fn test_new_channel() { - let channel = Channel::new(); - - channel.send(1u32).unwrap(); - channel.send(2u32).unwrap(); - assert_eq!(channel.try_recv().unwrap(), 1); - assert_eq!(channel.try_recv().unwrap(), 2); - - channel.close(); - channel.send(2u32).unwrap_err(); - } - - #[test] - fn test_flush_channel() { - let channel = Channel::new(); - - channel.send(1u32).unwrap(); - channel.send(2u32).unwrap(); - channel.flush_pending_prefetch_requests(|_| true); - assert!(channel.try_recv().is_none()); - - channel.notify_waiters(); - let _guard = channel.lock_channel(); - } - - #[test] - fn test_async_recv() { - let channel = Arc::new(Channel::new()); - let channel2 = channel.clone(); - - let t = std::thread::spawn(move || { - channel2.send(1u32).unwrap(); - }); - - let rt = tokio::runtime::Builder::new_current_thread() - .enable_all() - .build() - .unwrap(); - rt.block_on(async { - let msg = channel.recv().await.unwrap(); - assert_eq!(msg, 1); - }); - - t.join().unwrap(); - } - - #[test] - fn test_default_channel_send_and_recv() { - let channel = Channel::default(); - - channel.send(0x1u32).unwrap(); - channel.send(0x2u32).unwrap(); - assert_eq!(channel.try_recv().unwrap(), 0x1); - assert_eq!(channel.try_recv().unwrap(), 0x2); - - channel.close(); - channel.send(2u32).unwrap_err(); - } -} +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Asynchronous Multi-Producer Multi-Consumer channel. +//! +//! This module provides an asynchronous multi-producer multi-consumer channel based on [tokio::sync::Notify]. + +use std::collections::VecDeque; +use std::io::{Error, ErrorKind, Result}; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::{Mutex, MutexGuard}; +use tokio::sync::Notify; + +/// An asynchronous multi-producer multi-consumer channel based on [tokio::sync::Notify]. +pub struct Channel { + closed: AtomicBool, + notifier: Notify, + requests: Mutex>, +} + +impl Default for Channel { + fn default() -> Self { + Self::new() + } +} + +impl Channel { + /// Create a new instance of [`Channel`]. + pub fn new() -> Self { + Channel { + closed: AtomicBool::new(false), + notifier: Notify::new(), + requests: Mutex::new(VecDeque::new()), + } + } + + /// Close the channel. + pub fn close(&self) { + self.closed.store(true, Ordering::Release); + self.notifier.notify_waiters(); + } + + /// Send a message to the channel. + /// + /// The message object will be returned on error, to ease the lifecycle management. + pub fn send(&self, msg: T) -> std::result::Result<(), T> { + if self.closed.load(Ordering::Acquire) { + Err(msg) + } else { + self.requests.lock().unwrap().push_back(msg); + self.notifier.notify_one(); + Ok(()) + } + } + + /// Try to receive a message from the channel. + pub fn try_recv(&self) -> Option { + self.requests.lock().unwrap().pop_front() + } + + /// Receive message from the channel in asynchronous mode. + pub async fn recv(&self) -> Result { + let future = self.notifier.notified(); + tokio::pin!(future); + + loop { + // Make sure that no wakeup is lost if we get `None` from `try_recv`. + future.as_mut().enable(); + + if let Some(msg) = self.try_recv() { + return Ok(msg); + } else if self.closed.load(Ordering::Acquire) { + return Err(Error::new(ErrorKind::BrokenPipe, "channel has been closed")); + } + + // Wait for a call to `notify_one`. + // + // This uses `.as_mut()` to avoid consuming the future, + // which lets us call `Pin::set` below. + future.as_mut().await; + + // Reset the future in case another call to `try_recv` got the message before us. + future.set(self.notifier.notified()); + } + } + + /// Flush all pending requests specified by the predicator. + /// + pub fn flush_pending_prefetch_requests(&self, mut f: F) + where + F: FnMut(&T) -> bool, + { + self.requests.lock().unwrap().retain(|t| !f(t)); + } + + /// Lock the channel to block all queue operations. + pub fn lock_channel(&self) -> MutexGuard> { + self.requests.lock().unwrap() + } + + /// Notify all waiters. + pub fn notify_waiters(&self) { + self.notifier.notify_waiters(); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::Arc; + + #[test] + fn test_new_channel() { + let channel = Channel::new(); + + channel.send(1u32).unwrap(); + channel.send(2u32).unwrap(); + assert_eq!(channel.try_recv().unwrap(), 1); + assert_eq!(channel.try_recv().unwrap(), 2); + + channel.close(); + channel.send(2u32).unwrap_err(); + } + + #[test] + fn test_flush_channel() { + let channel = Channel::new(); + + channel.send(1u32).unwrap(); + channel.send(2u32).unwrap(); + channel.flush_pending_prefetch_requests(|_| true); + assert!(channel.try_recv().is_none()); + + channel.notify_waiters(); + let _guard = channel.lock_channel(); + } + + #[test] + fn test_async_recv() { + let channel = Arc::new(Channel::new()); + let channel2 = channel.clone(); + + let t = std::thread::spawn(move || { + channel2.send(1u32).unwrap(); + }); + + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .unwrap(); + rt.block_on(async { + let msg = channel.recv().await.unwrap(); + assert_eq!(msg, 1); + }); + + t.join().unwrap(); + } + + #[test] + fn test_default_channel_send_and_recv() { + let channel = Channel::default(); + + channel.send(0x1u32).unwrap(); + channel.send(0x2u32).unwrap(); + assert_eq!(channel.try_recv().unwrap(), 0x1); + assert_eq!(channel.try_recv().unwrap(), 0x2); + + channel.close(); + channel.send(2u32).unwrap_err(); + } +} diff --git a/utils/src/reader.rs b/utils/src/reader.rs index 21419f450de..0ed986fd924 100644 --- a/utils/src/reader.rs +++ b/utils/src/reader.rs @@ -1,136 +1,136 @@ -// Copyright (C) 2022 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::fs::File; -use std::io::{BufReader, Read, Seek, SeekFrom}; -use std::marker::PhantomData; -use std::os::unix::io::{AsRawFd, RawFd}; -use std::sync::{Arc, Mutex}; - -use sha2::Sha256; - -use crate::digest::DigestHasher; - -/// A wrapper reader to read a range of data from a file. -pub struct FileRangeReader<'a> { - fd: RawFd, - offset: u64, - size: u64, - r: PhantomData<&'a u8>, -} - -impl<'a> FileRangeReader<'a> { - /// Create a wrapper reader to read a range of data from the file. - pub fn new(f: &File, offset: u64, size: u64) -> Self { - Self { - fd: f.as_raw_fd(), - offset, - size, - r: PhantomData, - } - } -} - -impl<'a> Read for FileRangeReader<'a> { - fn read(&mut self, buf: &mut [u8]) -> std::io::Result { - let size = std::cmp::min(self.size as usize, buf.len()); - let nr_read = nix::sys::uio::pread(self.fd, &mut buf[0..size], self.offset as i64) - .map_err(|_| last_error!())?; - self.offset += nr_read as u64; - self.size -= nr_read as u64; - Ok(nr_read) - } -} - -struct BufReaderState { - reader: BufReader, - pos: u64, - hash: Sha256, -} - -/// A wrapper over `BufReader` to track current position. -pub struct BufReaderInfo { - calc_digest: bool, - state: Arc>>, -} - -impl BufReaderInfo { - /// Create a new instance of `BufReaderPos` from a `BufReader`. - pub fn from_buf_reader(buf_reader: BufReader) -> Self { - let state = BufReaderState { - reader: buf_reader, - pos: 0, - hash: Sha256::default(), - }; - Self { - calc_digest: true, - state: Arc::new(Mutex::new(state)), - } - } - - /// Get current position of the reader. - pub fn position(&self) -> u64 { - self.state.lock().unwrap().pos - } - - /// Get the hash object. - pub fn get_hash_object(&self) -> Sha256 { - self.state.lock().unwrap().hash.clone() - } - - /// Enable or disable blob digest calculation. - pub fn enable_digest_calculation(&mut self, enable: bool) { - self.calc_digest = enable; - } -} - -impl Read for BufReaderInfo { - fn read(&mut self, buf: &mut [u8]) -> std::io::Result { - let mut state = self.state.lock().unwrap(); - state.reader.read(buf).map(|v| { - state.pos += v as u64; - if v > 0 && self.calc_digest { - state.hash.digest_update(&buf[..v]); - } - v - }) - } -} - -impl Seek for BufReaderInfo { - fn seek(&mut self, pos: SeekFrom) -> std::io::Result { - let mut state = self.state.lock().unwrap(); - let pos = state.reader.seek(pos)?; - state.pos = pos; - Ok(pos) - } -} - -impl Clone for BufReaderInfo { - fn clone(&self) -> Self { - Self { - calc_digest: self.calc_digest, - state: self.state.clone(), - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use vmm_sys_util::tempfile::TempFile; - - #[test] - fn test_file_range_reader() { - let file = TempFile::new().unwrap(); - std::fs::write(file.as_path(), b"This is a test").unwrap(); - let mut reader = FileRangeReader::new(file.as_file(), 4, 6); - let mut buf = vec![0u8; 128]; - let res = reader.read(&mut buf).unwrap(); - assert_eq!(res, 6); - assert_eq!(&buf[..6], b" is a ".as_slice()); - let res = reader.read(&mut buf).unwrap(); - assert_eq!(res, 0); - } -} +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::fs::File; +use std::io::{BufReader, Read, Seek, SeekFrom}; +use std::marker::PhantomData; +use std::os::unix::io::{AsRawFd, RawFd}; +use std::sync::{Arc, Mutex}; + +use sha2::Sha256; + +use crate::digest::DigestHasher; + +/// A wrapper reader to read a range of data from a file. +pub struct FileRangeReader<'a> { + fd: RawFd, + offset: u64, + size: u64, + r: PhantomData<&'a u8>, +} + +impl<'a> FileRangeReader<'a> { + /// Create a wrapper reader to read a range of data from the file. + pub fn new(f: &File, offset: u64, size: u64) -> Self { + Self { + fd: f.as_raw_fd(), + offset, + size, + r: PhantomData, + } + } +} + +impl<'a> Read for FileRangeReader<'a> { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + let size = std::cmp::min(self.size as usize, buf.len()); + let nr_read = nix::sys::uio::pread(self.fd, &mut buf[0..size], self.offset as i64) + .map_err(|_| last_error!())?; + self.offset += nr_read as u64; + self.size -= nr_read as u64; + Ok(nr_read) + } +} + +struct BufReaderState { + reader: BufReader, + pos: u64, + hash: Sha256, +} + +/// A wrapper over `BufReader` to track current position. +pub struct BufReaderInfo { + calc_digest: bool, + state: Arc>>, +} + +impl BufReaderInfo { + /// Create a new instance of `BufReaderPos` from a `BufReader`. + pub fn from_buf_reader(buf_reader: BufReader) -> Self { + let state = BufReaderState { + reader: buf_reader, + pos: 0, + hash: Sha256::default(), + }; + Self { + calc_digest: true, + state: Arc::new(Mutex::new(state)), + } + } + + /// Get current position of the reader. + pub fn position(&self) -> u64 { + self.state.lock().unwrap().pos + } + + /// Get the hash object. + pub fn get_hash_object(&self) -> Sha256 { + self.state.lock().unwrap().hash.clone() + } + + /// Enable or disable blob digest calculation. + pub fn enable_digest_calculation(&mut self, enable: bool) { + self.calc_digest = enable; + } +} + +impl Read for BufReaderInfo { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + let mut state = self.state.lock().unwrap(); + state.reader.read(buf).map(|v| { + state.pos += v as u64; + if v > 0 && self.calc_digest { + state.hash.digest_update(&buf[..v]); + } + v + }) + } +} + +impl Seek for BufReaderInfo { + fn seek(&mut self, pos: SeekFrom) -> std::io::Result { + let mut state = self.state.lock().unwrap(); + let pos = state.reader.seek(pos)?; + state.pos = pos; + Ok(pos) + } +} + +impl Clone for BufReaderInfo { + fn clone(&self) -> Self { + Self { + calc_digest: self.calc_digest, + state: self.state.clone(), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use vmm_sys_util::tempfile::TempFile; + + #[test] + fn test_file_range_reader() { + let file = TempFile::new().unwrap(); + std::fs::write(file.as_path(), b"This is a test").unwrap(); + let mut reader = FileRangeReader::new(file.as_file(), 4, 6); + let mut buf = vec![0u8; 128]; + let res = reader.read(&mut buf).unwrap(); + assert_eq!(res, 6); + assert_eq!(&buf[..6], b" is a ".as_slice()); + let res = reader.read(&mut buf).unwrap(); + assert_eq!(res, 0); + } +} diff --git a/utils/src/trace.rs b/utils/src/trace.rs index d083cc99e0b..4d88440cca6 100644 --- a/utils/src/trace.rs +++ b/utils/src/trace.rs @@ -1,342 +1,342 @@ -// Copyright 2020 Ant Group. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Trace image building procedure - -use std::any::Any; -use std::cmp::{Eq, PartialEq}; -use std::collections::HashMap; -use std::fmt::{Display, Formatter, Result as FmtResult}; -use std::sync::{atomic::AtomicU64, Arc, Mutex, RwLock}; -use std::time::SystemTime; - -use serde::Serialize; -use serde_json::{error::Error, value::Value}; -use thiserror::Error; - -impl Display for TraceClass { - fn fmt(&self, f: &mut Formatter) -> FmtResult { - match self { - TraceClass::Timing => write!(f, "consumed_time"), - TraceClass::Event => write!(f, "registered_events"), - } - } -} - -macro_rules! enum_str { - ($m:meta - pub enum $name:ident { - $($variant:ident = $val:expr),*, - }) => { - #[$m] - pub enum $name { - $($variant = $val),* - } - - impl $name { - fn name(&self) -> String { - match self { - $($name::$variant => format!("{}", $name::$variant)),* - } - } - } - }; -} - -enum_str! { -derive(Hash, Eq, PartialEq) -pub enum TraceClass { - Timing = 1, - Event = 2, -} -} - -#[derive(Error, Debug)] -pub enum TraceError { - #[error("serialize error: {0}")] - Serde(Error), -} - -type Result = std::result::Result; - -/// Used to measure time consuming and gather all tracing points when building image. -#[derive(Serialize, Default)] -pub struct TimingTracerClass { - // Generally speaking, we won't have many timing tracers act from multiple points. - // So `Mutex` should fill our requirements. - #[serde(flatten)] - records: Mutex>, -} - -pub trait TracerClass: Send + Sync + 'static { - fn release(&self) -> Result; - fn as_any(&self) -> &dyn Any; -} - -impl TracerClass for TimingTracerClass { - fn release(&self) -> Result { - serde_json::to_value(self).map_err(TraceError::Serde) - } - fn as_any(&self) -> &dyn Any { - self - } -} - -pub fn trace_timing T, T>( - point: &str, - tracer: Option<&TimingTracerClass>, - f: F, -) -> T { - let begin = SystemTime::now(); - let r = f(); - let elapsed = SystemTime::now().duration_since(begin).unwrap(); - - // Not expect poisoned lock. - if let Some(t) = tracer { - t.records - .lock() - .unwrap() - .insert(point.to_string(), elapsed.as_secs_f32()); - } - - r -} - -/// The root tracer manages all kinds of tracers registered to it. -/// The statistics/events/records can be printed out or persisted from the root -/// tracer. When building procedure is finished, root tracer can dump all tracing -/// points to specified output file. -pub struct BuildRootTracer { - tracers: RwLock>>, -} - -impl BuildRootTracer { - pub fn register(&self, class: TraceClass, tracer: Arc) { - let mut guard = self.tracers.write().unwrap(); - // In case a certain class is registered multiple times, e.g. from several - // concurrently running test cases. - if guard.get(&class).is_none() { - guard.insert(class, tracer); - } - } - - pub fn tracer(&self, class: TraceClass) -> Option> { - let g = self.tracers.read().unwrap(); - // Safe to unwrap because tracers should always be enabled - (&g).get(&class).cloned() - } - - pub fn dump_summary_map(&self) -> Result> { - let mut map = serde_json::Map::new(); - for c in self.tracers.write().unwrap().iter() { - map.insert(c.0.name(), c.1.release()?); - } - Ok(map) - } -} - -#[derive(Serialize)] -#[serde(untagged)] -#[allow(dead_code)] -pub enum TraceEvent { - Counter(AtomicU64), - Fixed(u64), - Desc(String), -} - -#[derive(Serialize, Default)] -pub struct EventTracerClass { - #[serde(flatten)] - pub events: RwLock>, -} - -impl TracerClass for EventTracerClass { - fn release(&self) -> Result { - serde_json::to_value(self).map_err(TraceError::Serde) - } - fn as_any(&self) -> &dyn Any { - self - } -} - -lazy_static! { - pub static ref BUILDING_RECORDER: BuildRootTracer = BuildRootTracer { - tracers: RwLock::new(HashMap::default()) - }; -} - -#[macro_export] -macro_rules! root_tracer { - () => { - &$crate::trace::BUILDING_RECORDER as &$crate::trace::BuildRootTracer - }; -} - -#[macro_export] -macro_rules! timing_tracer { - () => { - root_tracer!() - .tracer($crate::trace::TraceClass::Timing) - .as_ref() - .map(|t| { - t.as_any() - .downcast_ref::<$crate::trace::TimingTracerClass>() - .unwrap() - }) - }; - ($f:block, $key:expr) => { - $crate::trace::trace_timing($key, timing_tracer!(), || $f) - }; - ($f:block, $key:expr, $t:ty) => { - $crate::trace::trace_timing::<_, $t>($key, timing_tracer!(), || $f) - }; -} - -#[macro_export] -macro_rules! register_tracer { - ($class:expr, $r:ty) => { - root_tracer!().register($class, std::sync::Arc::new(<$r>::default())); - }; -} - -#[macro_export] -macro_rules! event_tracer { - () => { - root_tracer!() - .tracer($crate::trace::TraceClass::Event) - .as_ref() - .map(|t| { - t.as_any() - .downcast_ref::<$crate::trace::EventTracerClass>() - .unwrap() - }) - }; - ($event:expr, $desc:expr) => { - event_tracer!().events.write().unwrap().insert( - $event.to_string(), - $crate::trace::TraceEvent::Fixed($desc as u64), - ) - }; - ($event:expr, +$value:expr) => { - let mut new: bool = true; - - if let Some(t) = event_tracer!() { - if let Some($crate::trace::TraceEvent::Counter(ref e)) = - t.events.read().unwrap().get($event) - { - e.fetch_add($value as u64, std::sync::atomic::Ordering::Relaxed); - new = false; - } - - if new { - // Double check to close the race that another thread has already inserted. - // Cast integer to u64 should be reliable for most cases. - if let Ok(ref mut guard) = t.events.write() { - if let Some($crate::trace::TraceEvent::Counter(ref e)) = guard.get($event) { - e.fetch_add($value as u64, std::sync::atomic::Ordering::Relaxed); - } else { - guard.insert( - $event.to_string(), - $crate::trace::TraceEvent::Counter(std::sync::atomic::AtomicU64::new( - $value as u64, - )), - ); - } - } - } - } - }; - ($event:expr, $format:expr, $value:expr) => { - if let Some(t) = event_tracer!() { - if let Ok(ref mut guard) = t.events.write() { - guard.insert( - $event.to_string(), - $crate::trace::TraceEvent::Desc(format!($format, $value)), - ); - } - } - }; -} - -#[cfg(test)] -pub mod tests { - use crate::trace::TimingTracerClass; - - use super::{EventTracerClass, TraceClass}; - use std::thread; - - #[test] - fn test_event_trace() { - register_tracer!(TraceClass::Event, EventTracerClass); - - let t1 = thread::Builder::new() - .spawn(move || { - for _i in 0..100 { - event_tracer!("event_1", +2); - event_tracer!("event_2", +3); - } - }) - .unwrap(); - - let t2 = thread::Builder::new() - .spawn(move || { - for _i in 0..100 { - event_tracer!("event_1", +2); - event_tracer!("event_2", +3); - } - }) - .unwrap(); - - let t3 = thread::Builder::new() - .spawn(move || { - for _i in 0..100 { - event_tracer!("event_1", +2); - event_tracer!("event_2", +3); - } - }) - .unwrap(); - - t1.join().unwrap(); - t2.join().unwrap(); - t3.join().unwrap(); - - let map = root_tracer!().dump_summary_map().unwrap(); - assert_eq!(map["registered_events"]["event_1"].as_u64(), Some(600)); - assert_eq!(map["registered_events"]["event_2"].as_u64(), Some(900)); - } - - #[test] - fn test_timing_trace() { - register_tracer!(TraceClass::Timing, TimingTracerClass); - let f = || (); - - let t1 = thread::Builder::new() - .spawn(move || { - for i in 0..100 { - timing_tracer!({ f() }, format!("t1.{}", i).as_str()); - } - }) - .unwrap(); - - let t2 = thread::Builder::new() - .spawn(move || { - for i in 0..100 { - timing_tracer!({ f() }, format!("t2.{}", i).as_str()); - } - }) - .unwrap(); - let t3 = thread::Builder::new() - .spawn(move || { - for i in 0..100 { - timing_tracer!({ f() }, format!("t3.{}", i).as_str()); - } - }) - .unwrap(); - - t1.join().unwrap(); - t2.join().unwrap(); - t3.join().unwrap(); - assert_eq!(timing_tracer!().unwrap().records.lock().unwrap().len(), 300); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Trace image building procedure + +use std::any::Any; +use std::cmp::{Eq, PartialEq}; +use std::collections::HashMap; +use std::fmt::{Display, Formatter, Result as FmtResult}; +use std::sync::{atomic::AtomicU64, Arc, Mutex, RwLock}; +use std::time::SystemTime; + +use serde::Serialize; +use serde_json::{error::Error, value::Value}; +use thiserror::Error; + +impl Display for TraceClass { + fn fmt(&self, f: &mut Formatter) -> FmtResult { + match self { + TraceClass::Timing => write!(f, "consumed_time"), + TraceClass::Event => write!(f, "registered_events"), + } + } +} + +macro_rules! enum_str { + ($m:meta + pub enum $name:ident { + $($variant:ident = $val:expr),*, + }) => { + #[$m] + pub enum $name { + $($variant = $val),* + } + + impl $name { + fn name(&self) -> String { + match self { + $($name::$variant => format!("{}", $name::$variant)),* + } + } + } + }; +} + +enum_str! { +derive(Hash, Eq, PartialEq) +pub enum TraceClass { + Timing = 1, + Event = 2, +} +} + +#[derive(Error, Debug)] +pub enum TraceError { + #[error("serialize error: {0}")] + Serde(Error), +} + +type Result = std::result::Result; + +/// Used to measure time consuming and gather all tracing points when building image. +#[derive(Serialize, Default)] +pub struct TimingTracerClass { + // Generally speaking, we won't have many timing tracers act from multiple points. + // So `Mutex` should fill our requirements. + #[serde(flatten)] + records: Mutex>, +} + +pub trait TracerClass: Send + Sync + 'static { + fn release(&self) -> Result; + fn as_any(&self) -> &dyn Any; +} + +impl TracerClass for TimingTracerClass { + fn release(&self) -> Result { + serde_json::to_value(self).map_err(TraceError::Serde) + } + fn as_any(&self) -> &dyn Any { + self + } +} + +pub fn trace_timing T, T>( + point: &str, + tracer: Option<&TimingTracerClass>, + f: F, +) -> T { + let begin = SystemTime::now(); + let r = f(); + let elapsed = SystemTime::now().duration_since(begin).unwrap(); + + // Not expect poisoned lock. + if let Some(t) = tracer { + t.records + .lock() + .unwrap() + .insert(point.to_string(), elapsed.as_secs_f32()); + } + + r +} + +/// The root tracer manages all kinds of tracers registered to it. +/// The statistics/events/records can be printed out or persisted from the root +/// tracer. When building procedure is finished, root tracer can dump all tracing +/// points to specified output file. +pub struct BuildRootTracer { + tracers: RwLock>>, +} + +impl BuildRootTracer { + pub fn register(&self, class: TraceClass, tracer: Arc) { + let mut guard = self.tracers.write().unwrap(); + // In case a certain class is registered multiple times, e.g. from several + // concurrently running test cases. + if guard.get(&class).is_none() { + guard.insert(class, tracer); + } + } + + pub fn tracer(&self, class: TraceClass) -> Option> { + let g = self.tracers.read().unwrap(); + // Safe to unwrap because tracers should always be enabled + (&g).get(&class).cloned() + } + + pub fn dump_summary_map(&self) -> Result> { + let mut map = serde_json::Map::new(); + for c in self.tracers.write().unwrap().iter() { + map.insert(c.0.name(), c.1.release()?); + } + Ok(map) + } +} + +#[derive(Serialize)] +#[serde(untagged)] +#[allow(dead_code)] +pub enum TraceEvent { + Counter(AtomicU64), + Fixed(u64), + Desc(String), +} + +#[derive(Serialize, Default)] +pub struct EventTracerClass { + #[serde(flatten)] + pub events: RwLock>, +} + +impl TracerClass for EventTracerClass { + fn release(&self) -> Result { + serde_json::to_value(self).map_err(TraceError::Serde) + } + fn as_any(&self) -> &dyn Any { + self + } +} + +lazy_static! { + pub static ref BUILDING_RECORDER: BuildRootTracer = BuildRootTracer { + tracers: RwLock::new(HashMap::default()) + }; +} + +#[macro_export] +macro_rules! root_tracer { + () => { + &$crate::trace::BUILDING_RECORDER as &$crate::trace::BuildRootTracer + }; +} + +#[macro_export] +macro_rules! timing_tracer { + () => { + root_tracer!() + .tracer($crate::trace::TraceClass::Timing) + .as_ref() + .map(|t| { + t.as_any() + .downcast_ref::<$crate::trace::TimingTracerClass>() + .unwrap() + }) + }; + ($f:block, $key:expr) => { + $crate::trace::trace_timing($key, timing_tracer!(), || $f) + }; + ($f:block, $key:expr, $t:ty) => { + $crate::trace::trace_timing::<_, $t>($key, timing_tracer!(), || $f) + }; +} + +#[macro_export] +macro_rules! register_tracer { + ($class:expr, $r:ty) => { + root_tracer!().register($class, std::sync::Arc::new(<$r>::default())); + }; +} + +#[macro_export] +macro_rules! event_tracer { + () => { + root_tracer!() + .tracer($crate::trace::TraceClass::Event) + .as_ref() + .map(|t| { + t.as_any() + .downcast_ref::<$crate::trace::EventTracerClass>() + .unwrap() + }) + }; + ($event:expr, $desc:expr) => { + event_tracer!().events.write().unwrap().insert( + $event.to_string(), + $crate::trace::TraceEvent::Fixed($desc as u64), + ) + }; + ($event:expr, +$value:expr) => { + let mut new: bool = true; + + if let Some(t) = event_tracer!() { + if let Some($crate::trace::TraceEvent::Counter(ref e)) = + t.events.read().unwrap().get($event) + { + e.fetch_add($value as u64, std::sync::atomic::Ordering::Relaxed); + new = false; + } + + if new { + // Double check to close the race that another thread has already inserted. + // Cast integer to u64 should be reliable for most cases. + if let Ok(ref mut guard) = t.events.write() { + if let Some($crate::trace::TraceEvent::Counter(ref e)) = guard.get($event) { + e.fetch_add($value as u64, std::sync::atomic::Ordering::Relaxed); + } else { + guard.insert( + $event.to_string(), + $crate::trace::TraceEvent::Counter(std::sync::atomic::AtomicU64::new( + $value as u64, + )), + ); + } + } + } + } + }; + ($event:expr, $format:expr, $value:expr) => { + if let Some(t) = event_tracer!() { + if let Ok(ref mut guard) = t.events.write() { + guard.insert( + $event.to_string(), + $crate::trace::TraceEvent::Desc(format!($format, $value)), + ); + } + } + }; +} + +#[cfg(test)] +pub mod tests { + use crate::trace::TimingTracerClass; + + use super::{EventTracerClass, TraceClass}; + use std::thread; + + #[test] + fn test_event_trace() { + register_tracer!(TraceClass::Event, EventTracerClass); + + let t1 = thread::Builder::new() + .spawn(move || { + for _i in 0..100 { + event_tracer!("event_1", +2); + event_tracer!("event_2", +3); + } + }) + .unwrap(); + + let t2 = thread::Builder::new() + .spawn(move || { + for _i in 0..100 { + event_tracer!("event_1", +2); + event_tracer!("event_2", +3); + } + }) + .unwrap(); + + let t3 = thread::Builder::new() + .spawn(move || { + for _i in 0..100 { + event_tracer!("event_1", +2); + event_tracer!("event_2", +3); + } + }) + .unwrap(); + + t1.join().unwrap(); + t2.join().unwrap(); + t3.join().unwrap(); + + let map = root_tracer!().dump_summary_map().unwrap(); + assert_eq!(map["registered_events"]["event_1"].as_u64(), Some(600)); + assert_eq!(map["registered_events"]["event_2"].as_u64(), Some(900)); + } + + #[test] + fn test_timing_trace() { + register_tracer!(TraceClass::Timing, TimingTracerClass); + let f = || (); + + let t1 = thread::Builder::new() + .spawn(move || { + for i in 0..100 { + timing_tracer!({ f() }, format!("t1.{}", i).as_str()); + } + }) + .unwrap(); + + let t2 = thread::Builder::new() + .spawn(move || { + for i in 0..100 { + timing_tracer!({ f() }, format!("t2.{}", i).as_str()); + } + }) + .unwrap(); + let t3 = thread::Builder::new() + .spawn(move || { + for i in 0..100 { + timing_tracer!({ f() }, format!("t3.{}", i).as_str()); + } + }) + .unwrap(); + + t1.join().unwrap(); + t2.join().unwrap(); + t3.join().unwrap(); + assert_eq!(timing_tracer!().unwrap().records.lock().unwrap().len(), 300); + } +} diff --git a/utils/src/types.rs b/utils/src/types.rs index 5edc5114e51..9c763e480f3 100644 --- a/utils/src/types.rs +++ b/utils/src/types.rs @@ -1,74 +1,74 @@ -// Copyright 2020 Ant Group. All rights reserved. -// Copyright (C) 2020 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -use std::ffi::{OsStr, OsString}; -use std::os::unix::ffi::OsStrExt; -use std::path::PathBuf; - -pub trait ByteSize { - fn byte_size(&self) -> usize; -} - -impl ByteSize for OsString { - fn byte_size(&self) -> usize { - self.as_bytes().len() - } -} - -impl ByteSize for OsStr { - fn byte_size(&self) -> usize { - self.as_bytes().len() - } -} - -impl ByteSize for PathBuf { - fn byte_size(&self) -> usize { - self.as_os_str().byte_size() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_os_string_empty() { - let os_str = OsStr::new(""); - let os_string = OsString::from(""); - - assert_eq!(os_str.len(), 0); - assert_eq!(os_str.byte_size(), 0); - assert_eq!(os_string.len(), 0); - assert_eq!(os_string.byte_size(), 0); - } - - #[test] - fn test_os_string_size() { - let os_str = OsStr::new("foo"); - let os_string = OsString::from("foo"); - - assert_eq!(os_str.len(), 3); - assert_eq!(os_str.byte_size(), 3); - assert_eq!(os_string.len(), 3); - assert_eq!(os_string.byte_size(), 3); - } - - #[test] - fn test_pathbuf_size() { - let mut path = PathBuf::new(); - - assert_eq!(path.byte_size(), 0); - - path.push("/"); - assert_eq!(path.byte_size(), 1); - - path.push("test"); - assert_eq!(path.byte_size(), 5); - - // "/test/a" - path.push("a"); - assert_eq!(path.byte_size(), 7); - } -} +// Copyright 2020 Ant Group. All rights reserved. +// Copyright (C) 2020 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::ffi::{OsStr, OsString}; +use std::os::unix::ffi::OsStrExt; +use std::path::PathBuf; + +pub trait ByteSize { + fn byte_size(&self) -> usize; +} + +impl ByteSize for OsString { + fn byte_size(&self) -> usize { + self.as_bytes().len() + } +} + +impl ByteSize for OsStr { + fn byte_size(&self) -> usize { + self.as_bytes().len() + } +} + +impl ByteSize for PathBuf { + fn byte_size(&self) -> usize { + self.as_os_str().byte_size() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_os_string_empty() { + let os_str = OsStr::new(""); + let os_string = OsString::from(""); + + assert_eq!(os_str.len(), 0); + assert_eq!(os_str.byte_size(), 0); + assert_eq!(os_string.len(), 0); + assert_eq!(os_string.byte_size(), 0); + } + + #[test] + fn test_os_string_size() { + let os_str = OsStr::new("foo"); + let os_string = OsString::from("foo"); + + assert_eq!(os_str.len(), 3); + assert_eq!(os_str.byte_size(), 3); + assert_eq!(os_string.len(), 3); + assert_eq!(os_string.byte_size(), 3); + } + + #[test] + fn test_pathbuf_size() { + let mut path = PathBuf::new(); + + assert_eq!(path.byte_size(), 0); + + path.push("/"); + assert_eq!(path.byte_size(), 1); + + path.push("test"); + assert_eq!(path.byte_size(), 5); + + // "/test/a" + path.push("a"); + assert_eq!(path.byte_size(), 7); + } +} diff --git a/utils/src/verity.rs b/utils/src/verity.rs index 92c321391b1..2266796b16f 100644 --- a/utils/src/verity.rs +++ b/utils/src/verity.rs @@ -1,457 +1,457 @@ -// Copyright (C) 2020-2021 Alibaba Cloud. All rights reserved. -// -// SPDX-License-Identifier: Apache-2.0 - -//! Utilities to generate Merkle trees for data integrity verification. - -use std::fs::File; -use std::io::Result; -use std::mem::size_of; -use std::sync::Mutex; - -use crate::digest::{Algorithm, DigestData, RafsDigest}; -use crate::div_round_up; -use crate::filemap::FileMapState; - -const NON_EXIST_ENTRY_DIGEST: RafsDigest = RafsDigest { - data: [ - 173, 127, 172, 178, 88, 111, 198, 233, 102, 192, 4, 215, 209, 209, 107, 2, 79, 88, 5, 255, - 124, 180, 124, 122, 133, 218, 189, 139, 72, 137, 44, 167, - ], -}; - -/// Struct to maintain and compute Merkle Tree topology and layout. -pub struct MerkleTree { - digest_algo: Algorithm, - digest_per_page: u32, - digest_size: usize, - data_pages: u32, - page_size: u32, - max_levels: u32, -} - -impl MerkleTree { - /// Create a new instance of `MerkleTree`. - pub fn new(page_size: u32, data_pages: u32, digest_algo: Algorithm) -> Self { - assert_eq!(page_size, 4096); - assert_eq!(digest_algo, Algorithm::Sha256); - let digest_size = 32; - let digest_shift = u32::trailing_zeros(page_size / digest_size); - let digest_per_page = 1u32 << digest_shift; - - let mut max_levels = 0; - let mut tmp_pages = data_pages as u64; - while tmp_pages > 1 { - tmp_pages = div_round_up(tmp_pages, digest_per_page as u64); - max_levels += 1; - } - - MerkleTree { - digest_algo, - digest_per_page: 1 << digest_shift, - digest_size: digest_size as usize, - page_size, - data_pages, - max_levels, - } - } - - /// Get digest algorithm used to generate the Merkle tree. - pub fn digest_algorithm(&self) -> Algorithm { - self.digest_algo - } - - /// Get height of the Merkle tree, 0 means there is only a root digest for one data page. - pub fn max_levels(&self) -> u32 { - self.max_levels - } - - /// Get number of pages to store digest at specified Merkle tree level. - pub fn level_pages(&self, mut level: u32) -> u32 { - if level > self.max_levels { - 0 - } else { - let mut pages = self.data_pages as u64; - while level > 0 && pages > 0 { - pages = div_round_up(pages, self.digest_per_page as u64); - level -= 1; - } - pages as u32 - } - } - - /// Get number of digest entries at specified Merkle tree level. - pub fn level_entries(&self, level: u32) -> u32 { - if self.data_pages == 0 || level > self.max_levels { - 0 - } else { - self.level_index(level, self.data_pages - 1) + 1 - } - } - - /// Get entry index at the specified level covering the data page with index `page_index`. - pub fn level_index(&self, mut level: u32, mut page_index: u32) -> u32 { - if level <= 1 { - page_index - } else { - level -= 1; - while level > 0 { - page_index /= self.digest_per_page; - level -= 1; - } - page_index - } - } - - /// Get base position of digest array for the specified Merkle tree level. - pub fn level_base(&self, level: u32) -> u64 { - if level >= self.max_levels { - 0 - } else { - let mut offset = 0; - let mut curr = self.max_levels; - while curr > level { - let pages = self.level_pages(curr); - offset += pages as u64 * self.page_size as u64; - curr -= 1; - } - offset - } - } - - /// Get total pages needed to store the Merkle Tree. - pub fn total_pages(&self) -> u32 { - let mut pages = 0; - for idx in 1..=self.max_levels { - pages += self.level_pages(idx); - } - pages - } -} - -/// Merkle tree generator for data integrity verification. -pub struct VerityGenerator { - mkl_tree: MerkleTree, - file_map: Mutex, - root_digest: RafsDigest, -} - -impl VerityGenerator { - /// Create a new instance [VerityGenerator]. - pub fn new(file: File, offset: u64, data_pages: u32) -> Result { - let mkl_tree = MerkleTree::new(4096, data_pages, Algorithm::Sha256); - let total_size = mkl_tree.total_pages() as usize * 4096; - let file_map = if data_pages > 1 { - if offset.checked_add(total_size as u64).is_none() { - return Err(einval!(format!( - "verity data offset 0x{:x} and size 0x{:x} is too big", - offset, total_size - ))); - } - - let md = file.metadata()?; - if md.len() < total_size as u64 + offset { - file.set_len(total_size as u64 + offset)?; - } - FileMapState::new(file, offset as libc::off_t, total_size, true)? - } else { - FileMapState::default() - }; - - Ok(VerityGenerator { - mkl_tree, - file_map: Mutex::new(file_map), - root_digest: NON_EXIST_ENTRY_DIGEST, - }) - } - - /// Initialize all digest values. - pub fn initialize(&mut self) -> Result<()> { - let total_size = self.mkl_tree.total_pages() as usize * 4096; - let mut offset = 0; - let mut map = self.file_map.lock().unwrap(); - - while offset < total_size { - let digest = map.get_mut::(offset)?; - digest.copy_from_slice(&NON_EXIST_ENTRY_DIGEST.data); - offset += size_of::(); - } - - Ok(()) - } - - /// Set digest value for Merkle entry at `level` with `index`. - /// - /// Digests for data pages must be set by calling this method. It can also be used to set - /// digest values for intermediate digest pages. - pub fn set_digest(&mut self, level: u32, index: u32, digest: &[u8]) -> Result<()> { - let digest_size = self.mkl_tree.digest_size; - if digest.len() != digest_size { - return Err(einval!(format!( - "size of digest data is not {}", - digest_size - ))); - } - - // Handle special case of zero-level Merkle tree. - if self.mkl_tree.data_pages == 1 && level == 1 && index == 0 { - self.root_digest.data.copy_from_slice(digest); - return Ok(()); - } - - if level > self.mkl_tree.max_levels() || level == 0 { - return Err(einval!(format!( - "level {} is out of range, max {}", - level, - self.mkl_tree.max_levels() - ))); - } else if index >= self.mkl_tree.level_entries(level) { - return Err(einval!(format!( - "index {} is out of range, max {}", - index, - self.mkl_tree.level_entries(level) - 1 - ))); - } - - let base = self.mkl_tree.level_base(level) as usize; - let offset = base + index as usize * digest_size; - let mut guard = self.file_map.lock().unwrap(); - let buf = guard.get_mut::(offset)?; - buf.copy_from_slice(digest); - - Ok(()) - } - - /// Generate digest values from lower level digest pages. - pub fn generate_level_digests(&mut self, level: u32) -> Result<()> { - assert!(level > 1 && level <= self.mkl_tree.max_levels); - let page_size = self.mkl_tree.page_size as usize; - let count = self.mkl_tree.level_entries(level) as usize; - let mut digest_base = self.mkl_tree.level_base(level) as usize; - let mut data_base = self.mkl_tree.level_base(level - 1) as usize; - let mut guard = self.file_map.lock().unwrap(); - - for _ in 0..count { - let data = guard.get_slice::(data_base, page_size)?; - let digest = RafsDigest::from_buf(data, self.mkl_tree.digest_algo); - let buf = guard.get_mut::(digest_base)?; - buf.copy_from_slice(digest.as_ref()); - data_base += page_size; - digest_base += self.mkl_tree.digest_size; - } - - Ok(()) - } - - /// Generate Merkle root digest. - /// - /// The returned Merkle tree root digest will be: - /// - `NON_EXIST_ENTRY_DIGEST` if there's no data page - /// - digest of the data page if there's only one data page - /// - digest of the intermediate digest page if there's more than one data pages - pub fn generate_root_digest(&mut self) -> Result { - if self.mkl_tree.max_levels == 0 { - Ok(self.root_digest) - } else { - let guard = self.file_map.lock().unwrap(); - let data = guard.get_slice::(0, self.mkl_tree.page_size as usize)?; - Ok(RafsDigest::from_buf(data, self.mkl_tree.digest_algo)) - } - } - - /// Generate all intermediate and root digests for the Merkle tree. - /// - /// Digests for data pages at level 1 must be set up by calling [set_digest()] before this - /// function to generate intermediate and root digests. - pub fn generate_all_digests(&mut self) -> Result { - for level in 2..=self.mkl_tree.max_levels { - self.generate_level_digests(level)?; - } - self.generate_root_digest() - } -} - -#[cfg(test)] -mod tests { - use super::*; - use vmm_sys_util::tempfile::TempFile; - - #[test] - fn test_max_levels() { - let mkl = MerkleTree::new(4096, 1, Algorithm::Sha256); - assert_eq!(mkl.max_levels(), 0); - assert_eq!(mkl.level_pages(0), 1); - assert_eq!(mkl.level_pages(1), 0); - assert_eq!(mkl.level_base(0), 0); - assert_eq!(mkl.level_base(1), 0); - assert_eq!(mkl.level_entries(0), 1); - assert_eq!(mkl.level_entries(1), 0); - assert_eq!(mkl.total_pages(), 0); - - let mkl = MerkleTree::new(4096, 2, Algorithm::Sha256); - assert_eq!(mkl.max_levels(), 1); - assert_eq!(mkl.level_pages(0), 2); - assert_eq!(mkl.level_pages(1), 1); - assert_eq!(mkl.level_pages(2), 0); - assert_eq!(mkl.level_entries(0), 2); - assert_eq!(mkl.level_entries(1), 2); - assert_eq!(mkl.level_entries(2), 0); - assert_eq!(mkl.level_base(0), 4096); - assert_eq!(mkl.level_base(1), 0); - assert_eq!(mkl.level_base(2), 0); - assert_eq!(mkl.total_pages(), 1); - - let mkl = MerkleTree::new(4096, 128, Algorithm::Sha256); - assert_eq!(mkl.max_levels(), 1); - assert_eq!(mkl.level_pages(0), 128); - assert_eq!(mkl.level_pages(1), 1); - assert_eq!(mkl.level_pages(2), 0); - assert_eq!(mkl.level_entries(0), 128); - assert_eq!(mkl.level_entries(1), 128); - assert_eq!(mkl.level_entries(2), 0); - assert_eq!(mkl.level_base(0), 4096); - assert_eq!(mkl.level_base(1), 0); - assert_eq!(mkl.level_base(2), 0); - assert_eq!(mkl.total_pages(), 1); - - let mkl = MerkleTree::new(4096, 129, Algorithm::Sha256); - assert_eq!(mkl.max_levels(), 2); - assert_eq!(mkl.level_pages(0), 129); - assert_eq!(mkl.level_pages(1), 2); - assert_eq!(mkl.level_pages(2), 1); - assert_eq!(mkl.level_pages(3), 0); - assert_eq!(mkl.level_entries(0), 129); - assert_eq!(mkl.level_entries(1), 129); - assert_eq!(mkl.level_entries(2), 2); - assert_eq!(mkl.level_entries(3), 0); - assert_eq!(mkl.level_base(0), 4096 * 3); - assert_eq!(mkl.level_base(1), 4096); - assert_eq!(mkl.level_base(2), 0); - assert_eq!(mkl.level_base(3), 0); - assert_eq!(mkl.total_pages(), 3); - - let mkl = MerkleTree::new(4096, 128 * 128, Algorithm::Sha256); - assert_eq!(mkl.max_levels(), 2); - assert_eq!(mkl.level_pages(0), 128 * 128); - assert_eq!(mkl.level_pages(1), 128); - assert_eq!(mkl.level_pages(2), 1); - assert_eq!(mkl.level_pages(3), 0); - assert_eq!(mkl.level_base(0), 4096 * 129); - assert_eq!(mkl.level_base(1), 4096); - assert_eq!(mkl.level_base(2), 0); - assert_eq!(mkl.level_base(3), 0); - assert_eq!(mkl.total_pages(), 129); - - let mkl = MerkleTree::new(4096, 128 * 128 + 1, Algorithm::Sha256); - assert_eq!(mkl.max_levels(), 3); - assert_eq!(mkl.level_pages(0), 128 * 128 + 1); - assert_eq!(mkl.level_pages(1), 129); - assert_eq!(mkl.level_pages(2), 2); - assert_eq!(mkl.level_pages(3), 1); - assert_eq!(mkl.level_pages(4), 0); - assert_eq!(mkl.level_entries(0), 128 * 128 + 1); - assert_eq!(mkl.level_entries(1), 128 * 128 + 1); - assert_eq!(mkl.level_entries(2), 129); - assert_eq!(mkl.level_entries(3), 2); - assert_eq!(mkl.level_entries(4), 0); - assert_eq!(mkl.level_base(0), 4096 * 132); - assert_eq!(mkl.level_base(1), 4096 * 3); - assert_eq!(mkl.level_base(2), 4096); - assert_eq!(mkl.level_base(3), 0); - assert_eq!(mkl.level_base(4), 0); - assert_eq!(mkl.total_pages(), 132); - - let mkl = MerkleTree::new(4096, u32::MAX, Algorithm::Sha256); - assert_eq!(mkl.max_levels(), 5); - } - - #[test] - fn test_generate_mkl_tree_zero_entry() { - let digest = RafsDigest::from_buf(&[0u8; 4096], Algorithm::Sha256); - assert_eq!(digest, NON_EXIST_ENTRY_DIGEST); - - let file = TempFile::new().unwrap(); - let mut generator = VerityGenerator::new(file.into_file(), 0, 0).unwrap(); - - assert!(generator - .set_digest(0, 0, &NON_EXIST_ENTRY_DIGEST.data) - .is_err()); - assert!(generator - .set_digest(1, 0, &NON_EXIST_ENTRY_DIGEST.data) - .is_err()); - - let root_digest = generator.generate_all_digests().unwrap(); - assert_eq!(root_digest, NON_EXIST_ENTRY_DIGEST); - } - - #[test] - fn test_generate_mkl_tree_one_entry() { - let file = TempFile::new().unwrap(); - let mut generator = VerityGenerator::new(file.into_file(), 0, 1).unwrap(); - - let digest = RafsDigest::from_buf(&[1u8; 4096], Algorithm::Sha256); - assert!(generator.set_digest(0, 0, &digest.data).is_err()); - assert!(generator.set_digest(2, 0, &digest.data).is_err()); - assert!(generator.set_digest(1, 1, &digest.data).is_err()); - generator.set_digest(1, 0, &digest.data).unwrap(); - - let root_digest = generator.generate_all_digests().unwrap(); - assert_eq!(root_digest, digest); - } - - #[test] - fn test_generate_mkl_tree_two_entries() { - let file = TempFile::new().unwrap(); - let mut generator = VerityGenerator::new(file.into_file(), 0, 2).unwrap(); - - let digest = RafsDigest::from_buf(&[1u8; 4096], Algorithm::Sha256); - assert!(generator.set_digest(0, 0, &digest.data).is_err()); - assert!(generator.set_digest(2, 0, &digest.data).is_err()); - assert!(generator.set_digest(1, 2, &digest.data).is_err()); - generator.set_digest(1, 0, &digest.data).unwrap(); - generator.set_digest(1, 1, &digest.data).unwrap(); - - let root_digest = generator.generate_all_digests().unwrap(); - assert_ne!(root_digest, digest); - } - - #[test] - fn test_generate_mkl_tree_4097_entries() { - let file = TempFile::new().unwrap(); - let mut generator = VerityGenerator::new(file.into_file(), 0, 4097).unwrap(); - - let digest = RafsDigest::from_buf(&[1u8; 4096], Algorithm::Sha256); - assert!(generator.set_digest(0, 0, &digest.data).is_err()); - generator.set_digest(2, 0, &digest.data).unwrap(); - for idx in 0..4097 { - generator.set_digest(1, idx, &digest.data).unwrap(); - } - - let root_digest = generator.generate_all_digests().unwrap(); - assert_ne!(root_digest, digest); - assert_eq!(generator.mkl_tree.max_levels, 2); - } - - #[test] - fn test_merkle_tree_digest_algo() { - let mkl = MerkleTree::new(4096, 1, Algorithm::Sha256); - assert_eq!(mkl.digest_algorithm(), Algorithm::Sha256); - } - - #[test] - fn test_verity_generator_error() { - let file = TempFile::new().unwrap(); - assert!(VerityGenerator::new(file.into_file(), u64::MAX, u32::MAX).is_err()); - - let file = TempFile::new().unwrap(); - let mut generator = VerityGenerator::new(file.into_file(), 0, 4097).unwrap(); - assert!(generator.set_digest(1, 0, &[1u8; 64]).is_err()); - } - - #[test] - fn test_verity_initialize() { - let file = TempFile::new().unwrap(); - let mut generator = VerityGenerator::new(file.into_file(), 0, 4097).unwrap(); - assert!(generator.initialize().is_ok()); - } -} +// Copyright (C) 2020-2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Utilities to generate Merkle trees for data integrity verification. + +use std::fs::File; +use std::io::Result; +use std::mem::size_of; +use std::sync::Mutex; + +use crate::digest::{Algorithm, DigestData, RafsDigest}; +use crate::div_round_up; +use crate::filemap::FileMapState; + +const NON_EXIST_ENTRY_DIGEST: RafsDigest = RafsDigest { + data: [ + 173, 127, 172, 178, 88, 111, 198, 233, 102, 192, 4, 215, 209, 209, 107, 2, 79, 88, 5, 255, + 124, 180, 124, 122, 133, 218, 189, 139, 72, 137, 44, 167, + ], +}; + +/// Struct to maintain and compute Merkle Tree topology and layout. +pub struct MerkleTree { + digest_algo: Algorithm, + digest_per_page: u32, + digest_size: usize, + data_pages: u32, + page_size: u32, + max_levels: u32, +} + +impl MerkleTree { + /// Create a new instance of `MerkleTree`. + pub fn new(page_size: u32, data_pages: u32, digest_algo: Algorithm) -> Self { + assert_eq!(page_size, 4096); + assert_eq!(digest_algo, Algorithm::Sha256); + let digest_size = 32; + let digest_shift = u32::trailing_zeros(page_size / digest_size); + let digest_per_page = 1u32 << digest_shift; + + let mut max_levels = 0; + let mut tmp_pages = data_pages as u64; + while tmp_pages > 1 { + tmp_pages = div_round_up(tmp_pages, digest_per_page as u64); + max_levels += 1; + } + + MerkleTree { + digest_algo, + digest_per_page: 1 << digest_shift, + digest_size: digest_size as usize, + page_size, + data_pages, + max_levels, + } + } + + /// Get digest algorithm used to generate the Merkle tree. + pub fn digest_algorithm(&self) -> Algorithm { + self.digest_algo + } + + /// Get height of the Merkle tree, 0 means there is only a root digest for one data page. + pub fn max_levels(&self) -> u32 { + self.max_levels + } + + /// Get number of pages to store digest at specified Merkle tree level. + pub fn level_pages(&self, mut level: u32) -> u32 { + if level > self.max_levels { + 0 + } else { + let mut pages = self.data_pages as u64; + while level > 0 && pages > 0 { + pages = div_round_up(pages, self.digest_per_page as u64); + level -= 1; + } + pages as u32 + } + } + + /// Get number of digest entries at specified Merkle tree level. + pub fn level_entries(&self, level: u32) -> u32 { + if self.data_pages == 0 || level > self.max_levels { + 0 + } else { + self.level_index(level, self.data_pages - 1) + 1 + } + } + + /// Get entry index at the specified level covering the data page with index `page_index`. + pub fn level_index(&self, mut level: u32, mut page_index: u32) -> u32 { + if level <= 1 { + page_index + } else { + level -= 1; + while level > 0 { + page_index /= self.digest_per_page; + level -= 1; + } + page_index + } + } + + /// Get base position of digest array for the specified Merkle tree level. + pub fn level_base(&self, level: u32) -> u64 { + if level >= self.max_levels { + 0 + } else { + let mut offset = 0; + let mut curr = self.max_levels; + while curr > level { + let pages = self.level_pages(curr); + offset += pages as u64 * self.page_size as u64; + curr -= 1; + } + offset + } + } + + /// Get total pages needed to store the Merkle Tree. + pub fn total_pages(&self) -> u32 { + let mut pages = 0; + for idx in 1..=self.max_levels { + pages += self.level_pages(idx); + } + pages + } +} + +/// Merkle tree generator for data integrity verification. +pub struct VerityGenerator { + mkl_tree: MerkleTree, + file_map: Mutex, + root_digest: RafsDigest, +} + +impl VerityGenerator { + /// Create a new instance [VerityGenerator]. + pub fn new(file: File, offset: u64, data_pages: u32) -> Result { + let mkl_tree = MerkleTree::new(4096, data_pages, Algorithm::Sha256); + let total_size = mkl_tree.total_pages() as usize * 4096; + let file_map = if data_pages > 1 { + if offset.checked_add(total_size as u64).is_none() { + return Err(einval!(format!( + "verity data offset 0x{:x} and size 0x{:x} is too big", + offset, total_size + ))); + } + + let md = file.metadata()?; + if md.len() < total_size as u64 + offset { + file.set_len(total_size as u64 + offset)?; + } + FileMapState::new(file, offset as libc::off_t, total_size, true)? + } else { + FileMapState::default() + }; + + Ok(VerityGenerator { + mkl_tree, + file_map: Mutex::new(file_map), + root_digest: NON_EXIST_ENTRY_DIGEST, + }) + } + + /// Initialize all digest values. + pub fn initialize(&mut self) -> Result<()> { + let total_size = self.mkl_tree.total_pages() as usize * 4096; + let mut offset = 0; + let mut map = self.file_map.lock().unwrap(); + + while offset < total_size { + let digest = map.get_mut::(offset)?; + digest.copy_from_slice(&NON_EXIST_ENTRY_DIGEST.data); + offset += size_of::(); + } + + Ok(()) + } + + /// Set digest value for Merkle entry at `level` with `index`. + /// + /// Digests for data pages must be set by calling this method. It can also be used to set + /// digest values for intermediate digest pages. + pub fn set_digest(&mut self, level: u32, index: u32, digest: &[u8]) -> Result<()> { + let digest_size = self.mkl_tree.digest_size; + if digest.len() != digest_size { + return Err(einval!(format!( + "size of digest data is not {}", + digest_size + ))); + } + + // Handle special case of zero-level Merkle tree. + if self.mkl_tree.data_pages == 1 && level == 1 && index == 0 { + self.root_digest.data.copy_from_slice(digest); + return Ok(()); + } + + if level > self.mkl_tree.max_levels() || level == 0 { + return Err(einval!(format!( + "level {} is out of range, max {}", + level, + self.mkl_tree.max_levels() + ))); + } else if index >= self.mkl_tree.level_entries(level) { + return Err(einval!(format!( + "index {} is out of range, max {}", + index, + self.mkl_tree.level_entries(level) - 1 + ))); + } + + let base = self.mkl_tree.level_base(level) as usize; + let offset = base + index as usize * digest_size; + let mut guard = self.file_map.lock().unwrap(); + let buf = guard.get_mut::(offset)?; + buf.copy_from_slice(digest); + + Ok(()) + } + + /// Generate digest values from lower level digest pages. + pub fn generate_level_digests(&mut self, level: u32) -> Result<()> { + assert!(level > 1 && level <= self.mkl_tree.max_levels); + let page_size = self.mkl_tree.page_size as usize; + let count = self.mkl_tree.level_entries(level) as usize; + let mut digest_base = self.mkl_tree.level_base(level) as usize; + let mut data_base = self.mkl_tree.level_base(level - 1) as usize; + let mut guard = self.file_map.lock().unwrap(); + + for _ in 0..count { + let data = guard.get_slice::(data_base, page_size)?; + let digest = RafsDigest::from_buf(data, self.mkl_tree.digest_algo); + let buf = guard.get_mut::(digest_base)?; + buf.copy_from_slice(digest.as_ref()); + data_base += page_size; + digest_base += self.mkl_tree.digest_size; + } + + Ok(()) + } + + /// Generate Merkle root digest. + /// + /// The returned Merkle tree root digest will be: + /// - `NON_EXIST_ENTRY_DIGEST` if there's no data page + /// - digest of the data page if there's only one data page + /// - digest of the intermediate digest page if there's more than one data pages + pub fn generate_root_digest(&mut self) -> Result { + if self.mkl_tree.max_levels == 0 { + Ok(self.root_digest) + } else { + let guard = self.file_map.lock().unwrap(); + let data = guard.get_slice::(0, self.mkl_tree.page_size as usize)?; + Ok(RafsDigest::from_buf(data, self.mkl_tree.digest_algo)) + } + } + + /// Generate all intermediate and root digests for the Merkle tree. + /// + /// Digests for data pages at level 1 must be set up by calling [set_digest()] before this + /// function to generate intermediate and root digests. + pub fn generate_all_digests(&mut self) -> Result { + for level in 2..=self.mkl_tree.max_levels { + self.generate_level_digests(level)?; + } + self.generate_root_digest() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use vmm_sys_util::tempfile::TempFile; + + #[test] + fn test_max_levels() { + let mkl = MerkleTree::new(4096, 1, Algorithm::Sha256); + assert_eq!(mkl.max_levels(), 0); + assert_eq!(mkl.level_pages(0), 1); + assert_eq!(mkl.level_pages(1), 0); + assert_eq!(mkl.level_base(0), 0); + assert_eq!(mkl.level_base(1), 0); + assert_eq!(mkl.level_entries(0), 1); + assert_eq!(mkl.level_entries(1), 0); + assert_eq!(mkl.total_pages(), 0); + + let mkl = MerkleTree::new(4096, 2, Algorithm::Sha256); + assert_eq!(mkl.max_levels(), 1); + assert_eq!(mkl.level_pages(0), 2); + assert_eq!(mkl.level_pages(1), 1); + assert_eq!(mkl.level_pages(2), 0); + assert_eq!(mkl.level_entries(0), 2); + assert_eq!(mkl.level_entries(1), 2); + assert_eq!(mkl.level_entries(2), 0); + assert_eq!(mkl.level_base(0), 4096); + assert_eq!(mkl.level_base(1), 0); + assert_eq!(mkl.level_base(2), 0); + assert_eq!(mkl.total_pages(), 1); + + let mkl = MerkleTree::new(4096, 128, Algorithm::Sha256); + assert_eq!(mkl.max_levels(), 1); + assert_eq!(mkl.level_pages(0), 128); + assert_eq!(mkl.level_pages(1), 1); + assert_eq!(mkl.level_pages(2), 0); + assert_eq!(mkl.level_entries(0), 128); + assert_eq!(mkl.level_entries(1), 128); + assert_eq!(mkl.level_entries(2), 0); + assert_eq!(mkl.level_base(0), 4096); + assert_eq!(mkl.level_base(1), 0); + assert_eq!(mkl.level_base(2), 0); + assert_eq!(mkl.total_pages(), 1); + + let mkl = MerkleTree::new(4096, 129, Algorithm::Sha256); + assert_eq!(mkl.max_levels(), 2); + assert_eq!(mkl.level_pages(0), 129); + assert_eq!(mkl.level_pages(1), 2); + assert_eq!(mkl.level_pages(2), 1); + assert_eq!(mkl.level_pages(3), 0); + assert_eq!(mkl.level_entries(0), 129); + assert_eq!(mkl.level_entries(1), 129); + assert_eq!(mkl.level_entries(2), 2); + assert_eq!(mkl.level_entries(3), 0); + assert_eq!(mkl.level_base(0), 4096 * 3); + assert_eq!(mkl.level_base(1), 4096); + assert_eq!(mkl.level_base(2), 0); + assert_eq!(mkl.level_base(3), 0); + assert_eq!(mkl.total_pages(), 3); + + let mkl = MerkleTree::new(4096, 128 * 128, Algorithm::Sha256); + assert_eq!(mkl.max_levels(), 2); + assert_eq!(mkl.level_pages(0), 128 * 128); + assert_eq!(mkl.level_pages(1), 128); + assert_eq!(mkl.level_pages(2), 1); + assert_eq!(mkl.level_pages(3), 0); + assert_eq!(mkl.level_base(0), 4096 * 129); + assert_eq!(mkl.level_base(1), 4096); + assert_eq!(mkl.level_base(2), 0); + assert_eq!(mkl.level_base(3), 0); + assert_eq!(mkl.total_pages(), 129); + + let mkl = MerkleTree::new(4096, 128 * 128 + 1, Algorithm::Sha256); + assert_eq!(mkl.max_levels(), 3); + assert_eq!(mkl.level_pages(0), 128 * 128 + 1); + assert_eq!(mkl.level_pages(1), 129); + assert_eq!(mkl.level_pages(2), 2); + assert_eq!(mkl.level_pages(3), 1); + assert_eq!(mkl.level_pages(4), 0); + assert_eq!(mkl.level_entries(0), 128 * 128 + 1); + assert_eq!(mkl.level_entries(1), 128 * 128 + 1); + assert_eq!(mkl.level_entries(2), 129); + assert_eq!(mkl.level_entries(3), 2); + assert_eq!(mkl.level_entries(4), 0); + assert_eq!(mkl.level_base(0), 4096 * 132); + assert_eq!(mkl.level_base(1), 4096 * 3); + assert_eq!(mkl.level_base(2), 4096); + assert_eq!(mkl.level_base(3), 0); + assert_eq!(mkl.level_base(4), 0); + assert_eq!(mkl.total_pages(), 132); + + let mkl = MerkleTree::new(4096, u32::MAX, Algorithm::Sha256); + assert_eq!(mkl.max_levels(), 5); + } + + #[test] + fn test_generate_mkl_tree_zero_entry() { + let digest = RafsDigest::from_buf(&[0u8; 4096], Algorithm::Sha256); + assert_eq!(digest, NON_EXIST_ENTRY_DIGEST); + + let file = TempFile::new().unwrap(); + let mut generator = VerityGenerator::new(file.into_file(), 0, 0).unwrap(); + + assert!(generator + .set_digest(0, 0, &NON_EXIST_ENTRY_DIGEST.data) + .is_err()); + assert!(generator + .set_digest(1, 0, &NON_EXIST_ENTRY_DIGEST.data) + .is_err()); + + let root_digest = generator.generate_all_digests().unwrap(); + assert_eq!(root_digest, NON_EXIST_ENTRY_DIGEST); + } + + #[test] + fn test_generate_mkl_tree_one_entry() { + let file = TempFile::new().unwrap(); + let mut generator = VerityGenerator::new(file.into_file(), 0, 1).unwrap(); + + let digest = RafsDigest::from_buf(&[1u8; 4096], Algorithm::Sha256); + assert!(generator.set_digest(0, 0, &digest.data).is_err()); + assert!(generator.set_digest(2, 0, &digest.data).is_err()); + assert!(generator.set_digest(1, 1, &digest.data).is_err()); + generator.set_digest(1, 0, &digest.data).unwrap(); + + let root_digest = generator.generate_all_digests().unwrap(); + assert_eq!(root_digest, digest); + } + + #[test] + fn test_generate_mkl_tree_two_entries() { + let file = TempFile::new().unwrap(); + let mut generator = VerityGenerator::new(file.into_file(), 0, 2).unwrap(); + + let digest = RafsDigest::from_buf(&[1u8; 4096], Algorithm::Sha256); + assert!(generator.set_digest(0, 0, &digest.data).is_err()); + assert!(generator.set_digest(2, 0, &digest.data).is_err()); + assert!(generator.set_digest(1, 2, &digest.data).is_err()); + generator.set_digest(1, 0, &digest.data).unwrap(); + generator.set_digest(1, 1, &digest.data).unwrap(); + + let root_digest = generator.generate_all_digests().unwrap(); + assert_ne!(root_digest, digest); + } + + #[test] + fn test_generate_mkl_tree_4097_entries() { + let file = TempFile::new().unwrap(); + let mut generator = VerityGenerator::new(file.into_file(), 0, 4097).unwrap(); + + let digest = RafsDigest::from_buf(&[1u8; 4096], Algorithm::Sha256); + assert!(generator.set_digest(0, 0, &digest.data).is_err()); + generator.set_digest(2, 0, &digest.data).unwrap(); + for idx in 0..4097 { + generator.set_digest(1, idx, &digest.data).unwrap(); + } + + let root_digest = generator.generate_all_digests().unwrap(); + assert_ne!(root_digest, digest); + assert_eq!(generator.mkl_tree.max_levels, 2); + } + + #[test] + fn test_merkle_tree_digest_algo() { + let mkl = MerkleTree::new(4096, 1, Algorithm::Sha256); + assert_eq!(mkl.digest_algorithm(), Algorithm::Sha256); + } + + #[test] + fn test_verity_generator_error() { + let file = TempFile::new().unwrap(); + assert!(VerityGenerator::new(file.into_file(), u64::MAX, u32::MAX).is_err()); + + let file = TempFile::new().unwrap(); + let mut generator = VerityGenerator::new(file.into_file(), 0, 4097).unwrap(); + assert!(generator.set_digest(1, 0, &[1u8; 64]).is_err()); + } + + #[test] + fn test_verity_initialize() { + let file = TempFile::new().unwrap(); + let mut generator = VerityGenerator::new(file.into_file(), 0, 4097).unwrap(); + assert!(generator.initialize().is_ok()); + } +}