diff --git a/.asf.yaml b/.asf.yaml
index bd063d4bbf4a..366c719597aa 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -38,7 +38,7 @@ github:
   features:
     issues: true
   protected_branches:
-    master:
+    main:
       required_status_checks:
         # require branches to be up-to-date before merging
         strict: true
diff --git a/.github/actions/setup-builder/action.yaml b/.github/actions/setup-builder/action.yaml
index aa1d1d9c14da..20da777ec0e5 100644
--- a/.github/actions/setup-builder/action.yaml
+++ b/.github/actions/setup-builder/action.yaml
@@ -30,7 +30,7 @@ runs:
   using: "composite"
   steps:
     - name: Cache Cargo
-      uses: actions/cache@v3
+      uses: actions/cache@v4
       with:
         # these represent dependencies downloaded by cargo
         # and thus do not depend on the OS, arch nor rust version.
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index ffde5378da93..b22c01f8a1b9 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -5,14 +5,14 @@ updates:
     schedule:
       interval: daily
     open-pull-requests-limit: 10
-    target-branch: master
+    target-branch: main
     labels: [ auto-dependencies, arrow ]
   - package-ecosystem: cargo
     directory: "/object_store"
     schedule:
       interval: daily
     open-pull-requests-limit: 10
-    target-branch: master
+    target-branch: main
     labels: [ auto-dependencies, object_store ]
   - package-ecosystem: "github-actions"
     directory: "/"
diff --git a/.github/workflows/README.md b/.github/workflows/README.md
index 679ccc956a20..08bdf123f4d6 100644
--- a/.github/workflows/README.md
+++ b/.github/workflows/README.md
@@ -20,6 +20,6 @@
 The CI is structured so most tests are run in specific workflows:
 `arrow.yml` for `arrow`, `parquet.yml` for `parquet` and so on.
 
-The basic idea is to run all tests on pushes to master (to ensure we
-keep master green) but run only the individual workflows on PRs that
+The basic idea is to run all tests on pushes to main (to ensure we
+keep main green) but run only the individual workflows on PRs that
 change files that could affect them.
diff --git a/.github/workflows/arrow.yml b/.github/workflows/arrow.yml
index d3b2526740fa..daf38f2523fc 100644
--- a/.github/workflows/arrow.yml
+++ b/.github/workflows/arrow.yml
@@ -26,7 +26,7 @@ on:
   # always trigger
   push:
     branches:
-      - master
+      - main
   pull_request:
     paths:
       - .github/**
@@ -61,39 +61,39 @@ jobs:
           submodules: true
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
-      - name: Test arrow-buffer with all features
+      - name: Test arrow-buffer
         run: cargo test -p arrow-buffer --all-features
-      - name: Test arrow-data with all features
+      - name: Test arrow-data
         run: cargo test -p arrow-data --all-features
-      - name: Test arrow-schema with all features
+      - name: Test arrow-schema
         run: cargo test -p arrow-schema --all-features
-      - name: Test arrow-array with all features
+      - name: Test arrow-array
         run: cargo test -p arrow-array --all-features
-      - name: Test arrow-select with all features
+      - name: Test arrow-select
         run: cargo test -p arrow-select --all-features
-      - name: Test arrow-cast with all features
+      - name: Test arrow-cast
         run: cargo test -p arrow-cast --all-features
-      - name: Test arrow-ipc with all features
+      - name: Test arrow-ipc
         run: cargo test -p arrow-ipc --all-features
-      - name: Test arrow-csv with all features
+      - name: Test arrow-csv
         run: cargo test -p arrow-csv --all-features
-      - name: Test arrow-json with all features
+      - name: Test arrow-json
         run: cargo test -p arrow-json --all-features
-      - name: Test arrow-avro with all features
+      - name: Test arrow-avro
         run: cargo test -p arrow-avro --all-features
-      - name: Test arrow-string with all features
+      - name: Test arrow-string
         run: cargo test -p arrow-string --all-features
-      - name: Test arrow-ord with all features
+      - name: Test arrow-ord
         run: cargo test -p arrow-ord --all-features
-      - name: Test arrow-arith with all features
+      - name: Test arrow-arith
         run: cargo test -p arrow-arith --all-features
-      - name: Test arrow-row with all features
+      - name: Test arrow-row
         run: cargo test -p arrow-row --all-features
-      - name: Test arrow-integration-test with all features
+      - name: Test arrow-integration-test
         run: cargo test -p arrow-integration-test --all-features
       - name: Test arrow with default features
         run: cargo test -p arrow
-      - name: Test arrow with all features except pyarrow
+      - name: Test arrow except pyarrow
         run: cargo test -p arrow --features=force_validate,prettyprint,ipc_compression,ffi,chrono-tz
       - name: Run examples
         run: |
@@ -163,37 +163,139 @@ jobs:
         uses: ./.github/actions/setup-builder
       - name: Setup Clippy
         run: rustup component add clippy
-      - name: Clippy arrow-buffer with all features
-        run: cargo clippy -p arrow-buffer --all-targets --all-features -- -D warnings
-      - name: Clippy arrow-data with all features
-        run: cargo clippy -p arrow-data --all-targets --all-features -- -D warnings
-      - name: Clippy arrow-schema with all features
-        run: cargo clippy -p arrow-schema --all-targets --all-features -- -D warnings
-      - name: Clippy arrow-array with all features
-        run: cargo clippy -p arrow-array --all-targets --all-features -- -D warnings
-      - name: Clippy arrow-select with all features
-        run: cargo clippy -p arrow-select --all-targets --all-features -- -D warnings
-      - name: Clippy arrow-cast with all features
-        run: cargo clippy -p arrow-cast --all-targets --all-features -- -D warnings
-      - name: Clippy arrow-ipc with all features
-        run: cargo clippy -p arrow-ipc --all-targets --all-features -- -D warnings
-      - name: Clippy arrow-csv with all features
-        run: cargo clippy -p arrow-csv --all-targets --all-features -- -D warnings
-      - name: Clippy arrow-json with all features
-        run: cargo clippy -p arrow-json --all-targets --all-features -- -D warnings
-      - name: Clippy arrow-avro with all features
-        run: cargo clippy -p arrow-avro --all-targets --all-features -- -D warnings
-      - name: Clippy arrow-string with all features
-        run: cargo clippy -p arrow-string --all-targets --all-features -- -D warnings
-      - name: Clippy arrow-ord with all features
-        run: cargo clippy -p arrow-ord --all-targets --all-features -- -D warnings
-      - name: Clippy arrow-arith with all features
-        run: cargo clippy -p arrow-arith --all-targets --all-features -- -D warnings
-      - name: Clippy arrow-row with all features
-        run: cargo clippy -p arrow-row --all-targets --all-features -- -D warnings
-      - name: Clippy arrow with all features
-        run: cargo clippy -p arrow --all-features --all-targets -- -D warnings
-      - name: Clippy arrow-integration-test with all features
-        run: cargo clippy -p arrow-integration-test --all-targets --all-features -- -D warnings
-      - name: Clippy arrow-integration-testing with all features
-        run: cargo clippy -p arrow-integration-testing --all-targets --all-features -- -D warnings
+      - name: Clippy arrow-buffer
+        run: |
+          mod=arrow-buffer
+          cargo clippy -p "$mod" --all-targets --all-features -- -D warnings
+          # Dependency checks excluding tests & benches.
+          cargo clippy -p "$mod" -- -D unused_crate_dependencies
+          cargo clippy -p "$mod" --all-features -- -D unused_crate_dependencies
+          cargo clippy -p "$mod" --no-default-features -- -D unused_crate_dependencies
+      - name: Clippy arrow-data
+        run: |
+          mod=arrow-data
+          cargo clippy -p "$mod" --all-targets --all-features -- -D warnings
+          # Dependency checks excluding tests & benches.
+          cargo clippy -p "$mod" -- -D unused_crate_dependencies
+          cargo clippy -p "$mod" --all-features -- -D unused_crate_dependencies
+          cargo clippy -p "$mod" --no-default-features -- -D unused_crate_dependencies
+      - name: Clippy arrow-schema
+        run: |
+          mod=arrow-schema
+          cargo clippy -p "$mod" --all-targets --all-features -- -D warnings
+          # Dependency checks excluding tests & benches.
+          cargo clippy -p "$mod" -- -D unused_crate_dependencies
+          cargo clippy -p "$mod" --all-features -- -D unused_crate_dependencies
+          cargo clippy -p "$mod" --no-default-features -- -D unused_crate_dependencies
+      - name: Clippy arrow-array
+        run: |
+          mod=arrow-array
+          cargo clippy -p "$mod" --all-targets --all-features -- -D warnings
+          # Dependency checks excluding tests & benches.
+          cargo clippy -p "$mod" -- -D unused_crate_dependencies
+          cargo clippy -p "$mod" --all-features -- -D unused_crate_dependencies
+          cargo clippy -p "$mod" --no-default-features -- -D unused_crate_dependencies
+      - name: Clippy arrow-select
+        run: |
+          mod=arrow-select
+          cargo clippy -p "$mod" --all-targets --all-features -- -D warnings
+          # Dependency checks excluding tests & benches.
+          cargo clippy -p "$mod" -- -D unused_crate_dependencies
+          cargo clippy -p "$mod" --all-features -- -D unused_crate_dependencies
+          cargo clippy -p "$mod" --no-default-features -- -D unused_crate_dependencies
+      - name: Clippy arrow-cast
+        run: |
+          mod=arrow-cast
+          cargo clippy -p "$mod" --all-targets --all-features -- -D warnings
+          # Dependency checks excluding tests & benches.
+          cargo clippy -p "$mod" -- -D unused_crate_dependencies
+          cargo clippy -p "$mod" --all-features -- -D unused_crate_dependencies
+          cargo clippy -p "$mod" --no-default-features -- -D unused_crate_dependencies
+      - name: Clippy arrow-ipc
+        run: |
+          mod=arrow-ipc
+          cargo clippy -p "$mod" --all-targets --all-features -- -D warnings
+          # Dependency checks excluding tests & benches.
+          cargo clippy -p "$mod" -- -D unused_crate_dependencies
+          cargo clippy -p "$mod" --all-features -- -D unused_crate_dependencies
+          cargo clippy -p "$mod" --no-default-features -- -D unused_crate_dependencies
+      - name: Clippy arrow-csv
+        run: |
+          mod=arrow-csv
+          cargo clippy -p "$mod" --all-targets --all-features -- -D warnings
+          # Dependency checks excluding tests & benches.
+          cargo clippy -p "$mod" -- -D unused_crate_dependencies
+          cargo clippy -p "$mod" --all-features -- -D unused_crate_dependencies
+          cargo clippy -p "$mod" --no-default-features -- -D unused_crate_dependencies
+      - name: Clippy arrow-json
+        run: |
+          mod=arrow-json
+          cargo clippy -p "$mod" --all-targets --all-features -- -D warnings
+          # Dependency checks excluding tests & benches.
+          cargo clippy -p "$mod" -- -D unused_crate_dependencies
+          cargo clippy -p "$mod" --all-features -- -D unused_crate_dependencies
+          cargo clippy -p "$mod" --no-default-features -- -D unused_crate_dependencies
+      - name: Clippy arrow-avro
+        run: |
+          mod=arrow-avro
+          cargo clippy -p "$mod" --all-targets --all-features -- -D warnings
+          # Dependency checks excluding tests & benches.
+          cargo clippy -p "$mod" -- -D unused_crate_dependencies
+          cargo clippy -p "$mod" --all-features -- -D unused_crate_dependencies
+          cargo clippy -p "$mod" --no-default-features -- -D unused_crate_dependencies
+      - name: Clippy arrow-string
+        run: |
+          mod=arrow-string
+          cargo clippy -p "$mod" --all-targets --all-features -- -D warnings
+          # Dependency checks excluding tests & benches.
+          cargo clippy -p "$mod" -- -D unused_crate_dependencies
+          cargo clippy -p "$mod" --all-features -- -D unused_crate_dependencies
+          cargo clippy -p "$mod" --no-default-features -- -D unused_crate_dependencies
+      - name: Clippy arrow-ord
+        run: |
+          mod=arrow-ord
+          cargo clippy -p "$mod" --all-targets --all-features -- -D warnings
+          # Dependency checks excluding tests & benches.
+          cargo clippy -p "$mod" -- -D unused_crate_dependencies
+          cargo clippy -p "$mod" --all-features -- -D unused_crate_dependencies
+          cargo clippy -p "$mod" --no-default-features -- -D unused_crate_dependencies
+      - name: Clippy arrow-arith
+        run: |
+          mod=arrow-arith
+          cargo clippy -p "$mod" --all-targets --all-features -- -D warnings
+          # Dependency checks excluding tests & benches.
+          cargo clippy -p "$mod" -- -D unused_crate_dependencies
+          cargo clippy -p "$mod" --all-features -- -D unused_crate_dependencies
+          cargo clippy -p "$mod" --no-default-features -- -D unused_crate_dependencies
+      - name: Clippy arrow-row
+        run: |
+          mod=arrow-row
+          cargo clippy -p "$mod" --all-targets --all-features -- -D warnings
+          # Dependency checks excluding tests & benches.
+          cargo clippy -p "$mod" -- -D unused_crate_dependencies
+          cargo clippy -p "$mod" --all-features -- -D unused_crate_dependencies
+          cargo clippy -p "$mod" --no-default-features -- -D unused_crate_dependencies
+      - name: Clippy arrow
+        run: |
+          mod=arrow
+          cargo clippy -p "$mod" --all-targets --all-features -- -D warnings
+          # Dependency checks excluding tests & benches.
+          cargo clippy -p "$mod" -- -D unused_crate_dependencies
+          cargo clippy -p "$mod" --all-features -- -D unused_crate_dependencies
+          cargo clippy -p "$mod" --no-default-features -- -D unused_crate_dependencies
+      - name: Clippy arrow-integration-test
+        run: |
+          mod=arrow-integration-test
+          cargo clippy -p "$mod" --all-targets --all-features -- -D warnings
+          # Dependency checks excluding tests & benches.
+          cargo clippy -p "$mod" -- -D unused_crate_dependencies
+          cargo clippy -p "$mod" --all-features -- -D unused_crate_dependencies
+          cargo clippy -p "$mod" --no-default-features -- -D unused_crate_dependencies
+      - name: Clippy arrow-integration-testing
+        run: |
+          mod=arrow-integration-testing
+          cargo clippy -p "$mod" --all-targets --all-features -- -D warnings
+          # Dependency checks excluding tests & benches.
+          cargo clippy -p "$mod" -- -D unused_crate_dependencies
+          cargo clippy -p "$mod" --all-features -- -D unused_crate_dependencies
+          cargo clippy -p "$mod" --no-default-features -- -D unused_crate_dependencies
diff --git a/.github/workflows/arrow_flight.yml b/.github/workflows/arrow_flight.yml
index 242e0f2a3b0d..79627448ca40 100644
--- a/.github/workflows/arrow_flight.yml
+++ b/.github/workflows/arrow_flight.yml
@@ -23,11 +23,11 @@ concurrency:
   group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
   cancel-in-progress: true
 
-# trigger for all PRs that touch certain files and changes to master
+# trigger for all PRs that touch certain files and changes to main
 on:
   push:
     branches:
-      - master
+      - main
   pull_request:
     paths:
       - arrow-array/**
diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml
index 2c1dcdfd2100..e6254ea24a58 100644
--- a/.github/workflows/audit.yml
+++ b/.github/workflows/audit.yml
@@ -21,11 +21,11 @@ concurrency:
   group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
   cancel-in-progress: true
 
-# trigger for all PRs that touch certain files and changes to master
+# trigger for all PRs that touch certain files and changes to main
 on:
   push:
     branches:
-      - master
+      - main
   pull_request:
     paths:
       - '**/Cargo.toml'
diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
index 2026e257ab29..b28e8c20cfe7 100644
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -21,11 +21,11 @@ concurrency:
   group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
   cancel-in-progress: true
 
-# trigger for all PRs and changes to master
+# trigger for all PRs and changes to main
 on:
   push:
     branches:
-      - master
+      - main
   pull_request:
 
 env:
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 08d287bcceb2..d6ec0622f6ed 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -21,11 +21,11 @@ concurrency:
   group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
   cancel-in-progress: true
 
-# trigger for all PRs and changes to master
+# trigger for all PRs and changes to main
 on:
   push:
     branches:
-      - master
+      - main
   pull_request:
 
 jobs:
@@ -70,8 +70,8 @@ jobs:
           path: target/doc
 
   deploy:
-    # Only deploy if a push to master
-    if: github.ref_name == 'master' && github.event_name == 'push'
+    # Only deploy if a push to main
+    if: github.ref_name == 'main' && github.event_name == 'push'
     needs: docs
     permissions:
       contents: write
@@ -90,7 +90,7 @@ jobs:
           cp .asf.yaml ./website/build/.asf.yaml
       - name: Deploy to gh-pages
         uses: peaceiris/actions-gh-pages@v4.0.0
-        if: github.event_name == 'push' && github.ref_name == 'master'
+        if: github.event_name == 'push' && github.ref_name == 'main'
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
           publish_dir: website/build
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 868729a168e8..9b23b1b5ad2e 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -21,11 +21,11 @@ concurrency:
   group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
   cancel-in-progress: true
 
-# trigger for all PRs that touch certain files and changes to master
+# trigger for all PRs that touch certain files and changes to main
 on:
   push:
     branches:
-      - master
+      - main
   pull_request:
     paths:
       - .github/**
diff --git a/.github/workflows/miri.yaml b/.github/workflows/miri.yaml
index 19b432121b6f..ce67546a104b 100644
--- a/.github/workflows/miri.yaml
+++ b/.github/workflows/miri.yaml
@@ -21,11 +21,11 @@ concurrency:
   group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
   cancel-in-progress: true
 
-# trigger for all PRs that touch certain files and changes to master
+# trigger for all PRs that touch certain files and changes to main
 on:
   push:
     branches:
-      - master
+      - main
   pull_request:
     paths:
       - .github/**
diff --git a/.github/workflows/object_store.yml b/.github/workflows/object_store.yml
index 1857b330326a..93f809aaabd4 100644
--- a/.github/workflows/object_store.yml
+++ b/.github/workflows/object_store.yml
@@ -23,11 +23,11 @@ concurrency:
   group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
   cancel-in-progress: true
 
-# trigger for all PRs that touch certain files and changes to master
+# trigger for all PRs that touch certain files and changes to main
 on:
   push:
     branches:
-      - master
+      - main
   pull_request:
     paths:
       - object_store/**
@@ -138,9 +138,10 @@ jobs:
 
       - name: Setup LocalStack (AWS emulation)
         run: |
-          echo "LOCALSTACK_CONTAINER=$(docker run -d -p 4566:4566 localstack/localstack:3.8.1)" >> $GITHUB_ENV
+          echo "LOCALSTACK_CONTAINER=$(docker run -d -p 4566:4566 localstack/localstack:4.0.3)" >> $GITHUB_ENV
           echo "EC2_METADATA_CONTAINER=$(docker run -d -p 1338:1338 amazon/amazon-ec2-metadata-mock:v1.9.2 --imdsv2)" >> $GITHUB_ENV
           aws --endpoint-url=http://localhost:4566 s3 mb s3://test-bucket
+          aws --endpoint-url=http://localhost:4566 s3api create-bucket --bucket test-object-lock --object-lock-enabled-for-bucket
           aws --endpoint-url=http://localhost:4566 dynamodb create-table --table-name test-table --key-schema AttributeName=path,KeyType=HASH AttributeName=etag,KeyType=RANGE --attribute-definitions AttributeName=path,AttributeType=S AttributeName=etag,AttributeType=S --provisioned-throughput ReadCapacityUnits=5,WriteCapacityUnits=5
 
           KMS_KEY=$(aws --endpoint-url=http://localhost:4566 kms create-key --description "test key")
@@ -164,7 +165,7 @@ jobs:
       - name: Run object_store tests (AWS native conditional put)
         run: cargo test --features=aws
         env:
-          AWS_CONDITIONAL_PUT: etag-put-if-not-exists
+          AWS_CONDITIONAL_PUT: etag
           AWS_COPY_IF_NOT_EXISTS: multipart
 
       - name: GCS Output
diff --git a/.github/workflows/parquet.yml b/.github/workflows/parquet.yml
index a4e654892662..2269950fd235 100644
--- a/.github/workflows/parquet.yml
+++ b/.github/workflows/parquet.yml
@@ -23,11 +23,11 @@ concurrency:
   group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
   cancel-in-progress: true
 
-# trigger for all PRs that touch certain files and changes to master
+# trigger for all PRs that touch certain files and changes to main
 on:
   push:
     branches:
-      - master
+      - main
   pull_request:
     paths:
       - arrow/**
diff --git a/.github/workflows/parquet_derive.yml b/.github/workflows/parquet_derive.yml
index d8b02f73a8aa..17aec724a820 100644
--- a/.github/workflows/parquet_derive.yml
+++ b/.github/workflows/parquet_derive.yml
@@ -23,11 +23,11 @@ concurrency:
   group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
   cancel-in-progress: true
 
-# trigger for all PRs that touch certain files and changes to master
+# trigger for all PRs that touch certain files and changes to main
 on:
   push:
     branches:
-      - master
+      - main
   pull_request:
     paths:
       - parquet/**
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 1b65c5057de1..ff5040fd2947 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -22,11 +22,11 @@ concurrency:
   group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
   cancel-in-progress: true
 
-# trigger for all PRs and changes to master
+# trigger for all PRs and changes to main
 on:
   push:
     branches:
-      - master
+      - main
   pull_request:
 
 jobs:
diff --git a/CHANGELOG-old.md b/CHANGELOG-old.md
index 5b3a3255ffcd..376da6277114 100644
--- a/CHANGELOG-old.md
+++ b/CHANGELOG-old.md
@@ -19,6 +19,69 @@
 
 # Historical Changelog
 
+## [53.2.0](https://github.com/apache/arrow-rs/tree/53.2.0) (2024-10-21)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/53.1.0...53.2.0)
+
+**Implemented enhancements:**
+
+- Implement arrow\_json encoder for Decimal128 & Decimal256 DataTypes [\#6605](https://github.com/apache/arrow-rs/issues/6605) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support DataType::FixedSizeList in make\_builder within struct\_builder.rs [\#6594](https://github.com/apache/arrow-rs/issues/6594) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support DataType::Dictionary in `make_builder` within struct\_builder.rs [\#6589](https://github.com/apache/arrow-rs/issues/6589) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Interval parsing from string - accept "mon" and "mons" token [\#6548](https://github.com/apache/arrow-rs/issues/6548) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- `AsyncArrowWriter` API to get the total size of a written parquet file [\#6530](https://github.com/apache/arrow-rs/issues/6530) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- `append_many` for Dictionary builders [\#6529](https://github.com/apache/arrow-rs/issues/6529) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Missing tonic `GRPC_STATUS` with tonic 0.12.1 [\#6515](https://github.com/apache/arrow-rs/issues/6515) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+- Add example of how to use parquet metadata reader APIs for a local cache [\#6504](https://github.com/apache/arrow-rs/issues/6504) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Remove reliance on `raw-entry` feature of Hashbrown [\#6498](https://github.com/apache/arrow-rs/issues/6498) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+- Improve page index metadata loading in `SerializedFileReader::new_with_options` [\#6491](https://github.com/apache/arrow-rs/issues/6491) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Release arrow-rs / parquet minor version `53.1.0` \(October 2024\) [\#6340](https://github.com/apache/arrow-rs/issues/6340) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+**Fixed bugs:**
+
+- Compilation fail where `c_char = u8` [\#6571](https://github.com/apache/arrow-rs/issues/6571) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Arrow flight CI test failing on `master` [\#6568](https://github.com/apache/arrow-rs/issues/6568) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+
+**Documentation updates:**
+
+- Minor: Document SIMD rationale and tips [\#6554](https://github.com/apache/arrow-rs/pull/6554) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+
+**Closed issues:**
+
+- Casting to and from unions [\#6247](https://github.com/apache/arrow-rs/issues/6247) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+**Merged pull requests:**
+
+- Minor: more comments for `RecordBatch.get_array_memory_size()` [\#6607](https://github.com/apache/arrow-rs/pull/6607) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([2010YOUY01](https://github.com/2010YOUY01))
+- Implement arrow\_json encoder for Decimal128 & Decimal256 [\#6606](https://github.com/apache/arrow-rs/pull/6606) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([phillipleblanc](https://github.com/phillipleblanc))
+- Add support for building FixedSizeListBuilder in struct\_builder's mak… [\#6595](https://github.com/apache/arrow-rs/pull/6595) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kszlim](https://github.com/kszlim))
+- Add limited support for dictionary builders in `make_builders` for stru… [\#6593](https://github.com/apache/arrow-rs/pull/6593) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kszlim](https://github.com/kszlim))
+- Fix CI with new valid certificates and add script for future usage [\#6585](https://github.com/apache/arrow-rs/pull/6585) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([itsjunetime](https://github.com/itsjunetime))
+- Update proc-macro2 requirement from =1.0.87 to =1.0.88 [\#6579](https://github.com/apache/arrow-rs/pull/6579) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Fix clippy complaints [\#6573](https://github.com/apache/arrow-rs/pull/6573) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([itsjunetime](https://github.com/itsjunetime))
+- Use c\_char instead of i8 to compile on platforms where c\_char = u8 [\#6572](https://github.com/apache/arrow-rs/pull/6572) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([itsjunetime](https://github.com/itsjunetime))
+- Bump pyspark from 3.3.1 to 3.3.2 in /parquet/pytest [\#6564](https://github.com/apache/arrow-rs/pull/6564) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- `unsafe` improvements [\#6551](https://github.com/apache/arrow-rs/pull/6551) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ssbr](https://github.com/ssbr))
+- Update README.md [\#6550](https://github.com/apache/arrow-rs/pull/6550) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([Abdullahsab3](https://github.com/Abdullahsab3))
+- Fix string '0' cast to decimal with scale 0 [\#6547](https://github.com/apache/arrow-rs/pull/6547) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
+- Add finish to `AsyncArrowWriter::finish` [\#6543](https://github.com/apache/arrow-rs/pull/6543) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Add append\_nulls to dictionary builders [\#6542](https://github.com/apache/arrow-rs/pull/6542) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([adriangb](https://github.com/adriangb))
+- Improve UnionArray::is\_nullable [\#6540](https://github.com/apache/arrow-rs/pull/6540) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Allow to read parquet binary column as UTF8 type [\#6539](https://github.com/apache/arrow-rs/pull/6539) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([goldmedal](https://github.com/goldmedal))
+- Use HashTable instead of raw\_entry\_mut [\#6537](https://github.com/apache/arrow-rs/pull/6537) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Add append\_many to dictionary arrays to allow adding repeated values [\#6534](https://github.com/apache/arrow-rs/pull/6534) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([adriangb](https://github.com/adriangb))
+- Adds documentation and example recommending Vec\<ArrayRef\> over ChunkedArray [\#6527](https://github.com/apache/arrow-rs/pull/6527) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([efredine](https://github.com/efredine))
+- Update proc-macro2 requirement from =1.0.86 to =1.0.87 [\#6526](https://github.com/apache/arrow-rs/pull/6526) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Add `ColumnChunkMetadataBuilder` clear APIs [\#6523](https://github.com/apache/arrow-rs/pull/6523) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Update sysinfo requirement from 0.31.2 to 0.32.0 [\#6521](https://github.com/apache/arrow-rs/pull/6521) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Update Tonic to 0.12.3 [\#6517](https://github.com/apache/arrow-rs/pull/6517) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([cisaacson](https://github.com/cisaacson))
+- Detect missing page indexes while reading Parquet metadata [\#6507](https://github.com/apache/arrow-rs/pull/6507) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Use ParquetMetaDataReader to load page indexes in `SerializedFileReader::new_with_options` [\#6506](https://github.com/apache/arrow-rs/pull/6506) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Improve parquet `MetadataFetch` and `AsyncFileReader` docs [\#6505](https://github.com/apache/arrow-rs/pull/6505) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- fix arrow-json encoding with dictionary including nulls [\#6503](https://github.com/apache/arrow-rs/pull/6503) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([samuelcolvin](https://github.com/samuelcolvin))
+- Update brotli requirement from 6.0 to 7.0 [\#6499](https://github.com/apache/arrow-rs/pull/6499) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Benchmark both scenarios, with records skipped and without skipping, for delta-bin-packed primitive arrays with half nulls. [\#6489](https://github.com/apache/arrow-rs/pull/6489) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([wiedld](https://github.com/wiedld))
+- Add round trip tests for reading/writing parquet metadata [\#6463](https://github.com/apache/arrow-rs/pull/6463) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
 ## [53.1.0](https://github.com/apache/arrow-rs/tree/53.1.0) (2024-10-02)
 
 [Full Changelog](https://github.com/apache/arrow-rs/compare/53.0.0...53.1.0)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8fdf9b6dd95c..3b729360608b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -19,69 +19,101 @@
 
 # Changelog
 
-## [53.2.0](https://github.com/apache/arrow-rs/tree/53.2.0) (2024-10-21)
+## [53.3.0](https://github.com/apache/arrow-rs/tree/53.3.0) (2024-11-17)
 
-[Full Changelog](https://github.com/apache/arrow-rs/compare/53.1.0...53.2.0)
+[Full Changelog](https://github.com/apache/arrow-rs/compare/53.2.0...53.3.0)
 
 **Implemented enhancements:**
 
-- Implement arrow\_json encoder for Decimal128 & Decimal256 DataTypes [\#6605](https://github.com/apache/arrow-rs/issues/6605) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Support DataType::FixedSizeList in make\_builder within struct\_builder.rs [\#6594](https://github.com/apache/arrow-rs/issues/6594) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Support DataType::Dictionary in `make_builder` within struct\_builder.rs [\#6589](https://github.com/apache/arrow-rs/issues/6589) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Interval parsing from string - accept "mon" and "mons" token [\#6548](https://github.com/apache/arrow-rs/issues/6548) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- `AsyncArrowWriter` API to get the total size of a written parquet file [\#6530](https://github.com/apache/arrow-rs/issues/6530) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- `append_many` for Dictionary builders [\#6529](https://github.com/apache/arrow-rs/issues/6529) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Missing tonic `GRPC_STATUS` with tonic 0.12.1 [\#6515](https://github.com/apache/arrow-rs/issues/6515) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
-- Add example of how to use parquet metadata reader APIs for a local cache [\#6504](https://github.com/apache/arrow-rs/issues/6504) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Remove reliance on `raw-entry` feature of Hashbrown [\#6498](https://github.com/apache/arrow-rs/issues/6498) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
-- Improve page index metadata loading in `SerializedFileReader::new_with_options` [\#6491](https://github.com/apache/arrow-rs/issues/6491) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Release arrow-rs / parquet minor version `53.1.0` \(October 2024\) [\#6340](https://github.com/apache/arrow-rs/issues/6340) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- `PartialEq` of GenericByteViewArray \(StringViewArray / ByteViewArray\) that compares on equality rather than logical value [\#6679](https://github.com/apache/arrow-rs/issues/6679) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Need a mechanism to handle schema changes due to dictionary hydration in FlightSQL server implementations [\#6672](https://github.com/apache/arrow-rs/issues/6672) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+- Support encoding Utf8View columns to JSON [\#6642](https://github.com/apache/arrow-rs/issues/6642) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Implement `append_n` for `BooleanBuilder` [\#6634](https://github.com/apache/arrow-rs/issues/6634) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Some take optimizations [\#6621](https://github.com/apache/arrow-rs/issues/6621) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Error Instead of Panic On Attempting to Write More Than 32769 Row Groups [\#6591](https://github.com/apache/arrow-rs/issues/6591) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Make casting from a timestamp without timezone to a timestamp with timezone configurable [\#6555](https://github.com/apache/arrow-rs/issues/6555)
+- Add `record_batch!` macro for easy record batch creation [\#6553](https://github.com/apache/arrow-rs/issues/6553) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support `Binary` --\> `Utf8View` casting [\#6531](https://github.com/apache/arrow-rs/issues/6531) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- `downcast_primitive_array` and `downcast_dictionary_array` are not hygienic wrt imports [\#6400](https://github.com/apache/arrow-rs/issues/6400) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Implement interleave\_record\_batch [\#6731](https://github.com/apache/arrow-rs/pull/6731) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([waynexia](https://github.com/waynexia))
+- feat: `record_batch!` macro [\#6588](https://github.com/apache/arrow-rs/pull/6588) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ByteBaker](https://github.com/ByteBaker))
 
 **Fixed bugs:**
 
-- Compilation fail where `c_char = u8` [\#6571](https://github.com/apache/arrow-rs/issues/6571) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Arrow flight CI test failing on `master` [\#6568](https://github.com/apache/arrow-rs/issues/6568) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+- Signed decimal e-notation parsing bug [\#6728](https://github.com/apache/arrow-rs/issues/6728) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add support for Utf8View -\> numeric in can\_cast\_types [\#6715](https://github.com/apache/arrow-rs/issues/6715)
+- IPC file writer produces incorrect footer when not preserving dict ID [\#6710](https://github.com/apache/arrow-rs/issues/6710) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- parquet from\_thrift\_helper incorrectly checks index [\#6693](https://github.com/apache/arrow-rs/issues/6693) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Primitive REPEATED fields not contained in LIST annotated groups aren't read as lists by record reader [\#6648](https://github.com/apache/arrow-rs/issues/6648) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- DictionaryHandling does not recurse into Map fields [\#6644](https://github.com/apache/arrow-rs/issues/6644) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+- Array writer output empty when no record is written [\#6613](https://github.com/apache/arrow-rs/issues/6613) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Archery Integration Test with c\# failing on main [\#6577](https://github.com/apache/arrow-rs/issues/6577) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Potential unsoundness in `filter_run_end_array` [\#6569](https://github.com/apache/arrow-rs/issues/6569) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Parquet reader can generate incorrect validity buffer information for nested structures [\#6510](https://github.com/apache/arrow-rs/issues/6510) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- arrow-array ffi: FFI\_ArrowArray.null\_count is always interpreted as unsigned and initialized during conversion from C to Rust. [\#6497](https://github.com/apache/arrow-rs/issues/6497) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
 
 **Documentation updates:**
 
-- Minor: Document SIMD rationale and tips [\#6554](https://github.com/apache/arrow-rs/pull/6554) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Minor: Document pattern for accessing views in StringView [\#6673](https://github.com/apache/arrow-rs/pull/6673) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Improve Array::is\_nullable documentation [\#6615](https://github.com/apache/arrow-rs/pull/6615) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
+- Minor: improve docs for ByteViewArray-\>ByteArray From impl [\#6610](https://github.com/apache/arrow-rs/pull/6610) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+
+**Performance improvements:**
+
+- Speed up `filter_run_end_array` [\#6712](https://github.com/apache/arrow-rs/pull/6712) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
 
 **Closed issues:**
 
-- Casting to and from unions [\#6247](https://github.com/apache/arrow-rs/issues/6247) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Incorrect like results for pattern starting/ending with `%` percent and containing escape characters [\#6702](https://github.com/apache/arrow-rs/issues/6702) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
 
 **Merged pull requests:**
 
-- Minor: more comments for `RecordBatch.get_array_memory_size()` [\#6607](https://github.com/apache/arrow-rs/pull/6607) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([2010YOUY01](https://github.com/2010YOUY01))
-- Implement arrow\_json encoder for Decimal128 & Decimal256 [\#6606](https://github.com/apache/arrow-rs/pull/6606) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([phillipleblanc](https://github.com/phillipleblanc))
-- Add support for building FixedSizeListBuilder in struct\_builder's mak… [\#6595](https://github.com/apache/arrow-rs/pull/6595) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kszlim](https://github.com/kszlim))
-- Add limited support for dictionary builders in `make_builders` for stru… [\#6593](https://github.com/apache/arrow-rs/pull/6593) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kszlim](https://github.com/kszlim))
-- Fix CI with new valid certificates and add script for future usage [\#6585](https://github.com/apache/arrow-rs/pull/6585) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([itsjunetime](https://github.com/itsjunetime))
-- Update proc-macro2 requirement from =1.0.87 to =1.0.88 [\#6579](https://github.com/apache/arrow-rs/pull/6579) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
-- Fix clippy complaints [\#6573](https://github.com/apache/arrow-rs/pull/6573) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([itsjunetime](https://github.com/itsjunetime))
-- Use c\_char instead of i8 to compile on platforms where c\_char = u8 [\#6572](https://github.com/apache/arrow-rs/pull/6572) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([itsjunetime](https://github.com/itsjunetime))
-- Bump pyspark from 3.3.1 to 3.3.2 in /parquet/pytest [\#6564](https://github.com/apache/arrow-rs/pull/6564) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([dependabot[bot]](https://github.com/apps/dependabot))
-- `unsafe` improvements [\#6551](https://github.com/apache/arrow-rs/pull/6551) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ssbr](https://github.com/ssbr))
-- Update README.md [\#6550](https://github.com/apache/arrow-rs/pull/6550) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([Abdullahsab3](https://github.com/Abdullahsab3))
-- Fix string '0' cast to decimal with scale 0 [\#6547](https://github.com/apache/arrow-rs/pull/6547) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
-- Add finish to `AsyncArrowWriter::finish` [\#6543](https://github.com/apache/arrow-rs/pull/6543) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
-- Add append\_nulls to dictionary builders [\#6542](https://github.com/apache/arrow-rs/pull/6542) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([adriangb](https://github.com/adriangb))
-- Improve UnionArray::is\_nullable [\#6540](https://github.com/apache/arrow-rs/pull/6540) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Allow to read parquet binary column as UTF8 type [\#6539](https://github.com/apache/arrow-rs/pull/6539) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([goldmedal](https://github.com/goldmedal))
-- Use HashTable instead of raw\_entry\_mut [\#6537](https://github.com/apache/arrow-rs/pull/6537) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Add append\_many to dictionary arrays to allow adding repeated values [\#6534](https://github.com/apache/arrow-rs/pull/6534) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([adriangb](https://github.com/adriangb))
-- Adds documentation and example recommending Vec\<ArrayRef\> over ChunkedArray [\#6527](https://github.com/apache/arrow-rs/pull/6527) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([efredine](https://github.com/efredine))
-- Update proc-macro2 requirement from =1.0.86 to =1.0.87 [\#6526](https://github.com/apache/arrow-rs/pull/6526) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
-- Add `ColumnChunkMetadataBuilder` clear APIs [\#6523](https://github.com/apache/arrow-rs/pull/6523) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
-- Update sysinfo requirement from 0.31.2 to 0.32.0 [\#6521](https://github.com/apache/arrow-rs/pull/6521) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([dependabot[bot]](https://github.com/apps/dependabot))
-- Update Tonic to 0.12.3 [\#6517](https://github.com/apache/arrow-rs/pull/6517) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([cisaacson](https://github.com/cisaacson))
-- Detect missing page indexes while reading Parquet metadata [\#6507](https://github.com/apache/arrow-rs/pull/6507) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
-- Use ParquetMetaDataReader to load page indexes in `SerializedFileReader::new_with_options` [\#6506](https://github.com/apache/arrow-rs/pull/6506) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
-- Improve parquet `MetadataFetch` and `AsyncFileReader` docs [\#6505](https://github.com/apache/arrow-rs/pull/6505) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
-- fix arrow-json encoding with dictionary including nulls [\#6503](https://github.com/apache/arrow-rs/pull/6503) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([samuelcolvin](https://github.com/samuelcolvin))
-- Update brotli requirement from 6.0 to 7.0 [\#6499](https://github.com/apache/arrow-rs/pull/6499) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([dependabot[bot]](https://github.com/apps/dependabot))
-- Benchmark both scenarios, with records skipped and without skipping, for delta-bin-packed primitive arrays with half nulls. [\#6489](https://github.com/apache/arrow-rs/pull/6489) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([wiedld](https://github.com/wiedld))
-- Add round trip tests for reading/writing parquet metadata [\#6463](https://github.com/apache/arrow-rs/pull/6463) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Fix signed decimal e-notation parsing [\#6729](https://github.com/apache/arrow-rs/pull/6729) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([gruuya](https://github.com/gruuya))
+- Clean up some arrow-flight tests and duplicated code [\#6725](https://github.com/apache/arrow-rs/pull/6725) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([itsjunetime](https://github.com/itsjunetime))
+- Update PR template section about API breaking changes [\#6723](https://github.com/apache/arrow-rs/pull/6723) ([findepi](https://github.com/findepi))
+- Support for casting `StringViewArray` to `DecimalArray` [\#6720](https://github.com/apache/arrow-rs/pull/6720) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tlm365](https://github.com/tlm365))
+- File writer preserve dict bug [\#6711](https://github.com/apache/arrow-rs/pull/6711) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz))
+- Add filter\_kernel benchmark for run array [\#6706](https://github.com/apache/arrow-rs/pull/6706) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([delamarch3](https://github.com/delamarch3))
+- Fix string view ILIKE checks with NULL values [\#6705](https://github.com/apache/arrow-rs/pull/6705) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
+- Implement logical\_null\_count for more array types [\#6704](https://github.com/apache/arrow-rs/pull/6704) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
+- Fix LIKE with escapes [\#6703](https://github.com/apache/arrow-rs/pull/6703) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
+- Speed up `filter_bytes` [\#6699](https://github.com/apache/arrow-rs/pull/6699) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- Minor: fix misleading comment in byte view [\#6695](https://github.com/apache/arrow-rs/pull/6695) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jayzhan211](https://github.com/jayzhan211))
+- minor fix on checking index [\#6694](https://github.com/apache/arrow-rs/pull/6694) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([jp0317](https://github.com/jp0317))
+- Undo run end filter performance regression [\#6691](https://github.com/apache/arrow-rs/pull/6691) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([delamarch3](https://github.com/delamarch3))
+- Reimplement `PartialEq` of `GenericByteViewArray` compares by logical value [\#6689](https://github.com/apache/arrow-rs/pull/6689) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tlm365](https://github.com/tlm365))
+- feat: expose known\_schema from FlightDataEncoder [\#6688](https://github.com/apache/arrow-rs/pull/6688) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([nathanielc](https://github.com/nathanielc))
+- Update hashbrown requirement from 0.14.2 to 0.15.1 [\#6684](https://github.com/apache/arrow-rs/pull/6684) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Support Duration in JSON Reader [\#6683](https://github.com/apache/arrow-rs/pull/6683) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([simonvandel](https://github.com/simonvandel))
+- Check predicate and values are the same length for run end array filter safety [\#6675](https://github.com/apache/arrow-rs/pull/6675) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([delamarch3](https://github.com/delamarch3))
+- \[ffi\] Fix arrow-array null\_count error during conversion from C to Rust [\#6674](https://github.com/apache/arrow-rs/pull/6674) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([adbmal](https://github.com/adbmal))
+- Support `Utf8View` for `bit_length` kernel [\#6671](https://github.com/apache/arrow-rs/pull/6671) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([austin362667](https://github.com/austin362667))
+- Fix string view LIKE checks with NULL values [\#6662](https://github.com/apache/arrow-rs/pull/6662) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
+- Improve documentation for `nullif` kernel [\#6658](https://github.com/apache/arrow-rs/pull/6658) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Improve test\_auth error message when contains\(\) fails [\#6657](https://github.com/apache/arrow-rs/pull/6657) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([findepi](https://github.com/findepi))
+- Let std::fmt::Debug for StructArray output Null/Validity info [\#6655](https://github.com/apache/arrow-rs/pull/6655) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([XinyuZeng](https://github.com/XinyuZeng))
+- Include offending line number when processing CSV file fails [\#6653](https://github.com/apache/arrow-rs/pull/6653) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
+- feat: add write\_bytes for GenericBinaryBuilder [\#6652](https://github.com/apache/arrow-rs/pull/6652) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tisonkun](https://github.com/tisonkun))
+- feat: Support Utf8View in JSON serialization [\#6651](https://github.com/apache/arrow-rs/pull/6651) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jonmmease](https://github.com/jonmmease))
+- fix: include chrono-tz in flight sql cli [\#6650](https://github.com/apache/arrow-rs/pull/6650) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([crepererum](https://github.com/crepererum))
+- Handle primitive REPEATED field not contained in LIST annotated group [\#6649](https://github.com/apache/arrow-rs/pull/6649) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([zeevm](https://github.com/zeevm))
+- Implement `append_n` for `BooleanBuilder` [\#6646](https://github.com/apache/arrow-rs/pull/6646) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([delamarch3](https://github.com/delamarch3))
+- fix: recurse into Map datatype when hydrating dictionaries [\#6645](https://github.com/apache/arrow-rs/pull/6645) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([nathanielc](https://github.com/nathanielc))
+- fix: enable TLS roots for flight CLI client [\#6640](https://github.com/apache/arrow-rs/pull/6640) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([crepererum](https://github.com/crepererum))
+- doc: Clarify take kernel semantics [\#6632](https://github.com/apache/arrow-rs/pull/6632) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Return error rather than panic when too many row groups are written [\#6629](https://github.com/apache/arrow-rs/pull/6629) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Fix test feature selection so all feature combinations work as expected [\#6626](https://github.com/apache/arrow-rs/pull/6626) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([itsjunetime](https://github.com/itsjunetime))
+- Add Parquet RowSelection benchmark [\#6623](https://github.com/apache/arrow-rs/pull/6623) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([XiangpengHao](https://github.com/XiangpengHao))
+- Optimize `take_bits` to optimize `take_boolean` / `take_primitive` / `take_byte_view`: up to -25% [\#6622](https://github.com/apache/arrow-rs/pull/6622) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- Make downcast macros hygenic \(\#6400\) [\#6620](https://github.com/apache/arrow-rs/pull/6620) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Update proc-macro2 requirement from =1.0.88 to =1.0.89 [\#6618](https://github.com/apache/arrow-rs/pull/6618) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Fix arrow-json writer empty [\#6614](https://github.com/apache/arrow-rs/pull/6614) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([gwik](https://github.com/gwik))
+- Add `ParquetObjectReader::with_runtime` [\#6612](https://github.com/apache/arrow-rs/pull/6612) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([itsjunetime](https://github.com/itsjunetime))
+- Re-enable `C#` arrow flight integration test [\#6611](https://github.com/apache/arrow-rs/pull/6611) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Add Array::logical\_null\_count for inspecting number of null values [\#6608](https://github.com/apache/arrow-rs/pull/6608) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
+- Added casting from Binary/LargeBinary to Utf8View [\#6592](https://github.com/apache/arrow-rs/pull/6592) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ngli-me](https://github.com/ngli-me))
+- Parquet AsyncReader: Don't panic when empty offset\_index is Some\(\[\]\) [\#6582](https://github.com/apache/arrow-rs/pull/6582) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([jroddev](https://github.com/jroddev))
+- Skip writing down null buffers for non-nullable primitive arrays [\#6524](https://github.com/apache/arrow-rs/pull/6524) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([bkirwi](https://github.com/bkirwi))
 
 
 
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 2dea0b2cca64..38236ee39125 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -138,7 +138,7 @@ cargo test
 cargo test -p arrow
 ```
 
-For some changes, you may want to run additional tests. You can find up-to-date information on the current CI tests in [.github/workflows](https://github.com/apache/arrow-rs/tree/master/.github/workflows). Here are some examples of additional tests you may want to run:
+For some changes, you may want to run additional tests. You can find up-to-date information on the current CI tests in [.github/workflows](https://github.com/apache/arrow-rs/tree/main/.github/workflows). Here are some examples of additional tests you may want to run:
 
 ```bash
 # run tests for the parquet crate
@@ -217,13 +217,13 @@ cargo bench -p arrow-cast --bench parse_time
 To set the baseline for your benchmarks, use the --save-baseline flag:
 
 ```bash
-git checkout master
+git checkout main
 
-cargo bench --bench parse_time -- --save-baseline master
+cargo bench --bench parse_time -- --save-baseline main
 
 git checkout feature
 
-cargo bench --bench parse_time -- --baseline master
+cargo bench --bench parse_time -- --baseline main
 ```
 
 ## Git Pre-Commit Hook
diff --git a/Cargo.toml b/Cargo.toml
index f210ae210012..375a4efac551 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -62,7 +62,7 @@ exclude = [
 ]
 
 [workspace.package]
-version = "53.2.0"
+version = "53.3.0"
 homepage = "https://github.com/apache/arrow-rs"
 repository = "https://github.com/apache/arrow-rs"
 authors = ["Apache Arrow <dev@arrow.apache.org>"]
@@ -77,20 +77,20 @@ edition = "2021"
 rust-version = "1.62"
 
 [workspace.dependencies]
-arrow = { version = "53.2.0", path = "./arrow", default-features = false }
-arrow-arith = { version = "53.2.0", path = "./arrow-arith" }
-arrow-array = { version = "53.2.0", path = "./arrow-array" }
-arrow-buffer = { version = "53.2.0", path = "./arrow-buffer" }
-arrow-cast = { version = "53.2.0", path = "./arrow-cast" }
-arrow-csv = { version = "53.2.0", path = "./arrow-csv" }
-arrow-data = { version = "53.2.0", path = "./arrow-data" }
-arrow-ipc = { version = "53.2.0", path = "./arrow-ipc" }
-arrow-json = { version = "53.2.0", path = "./arrow-json" }
-arrow-ord = { version = "53.2.0", path = "./arrow-ord" }
-arrow-row = { version = "53.2.0", path = "./arrow-row" }
-arrow-schema = { version = "53.2.0", path = "./arrow-schema" }
-arrow-select = { version = "53.2.0", path = "./arrow-select" }
-arrow-string = { version = "53.2.0", path = "./arrow-string" }
-parquet = { version = "53.2.0", path = "./parquet", default-features = false }
+arrow = { version = "53.3.0", path = "./arrow", default-features = false }
+arrow-arith = { version = "53.3.0", path = "./arrow-arith" }
+arrow-array = { version = "53.3.0", path = "./arrow-array" }
+arrow-buffer = { version = "53.3.0", path = "./arrow-buffer" }
+arrow-cast = { version = "53.3.0", path = "./arrow-cast" }
+arrow-csv = { version = "53.3.0", path = "./arrow-csv" }
+arrow-data = { version = "53.3.0", path = "./arrow-data" }
+arrow-ipc = { version = "53.3.0", path = "./arrow-ipc" }
+arrow-json = { version = "53.3.0", path = "./arrow-json" }
+arrow-ord = { version = "53.3.0", path = "./arrow-ord" }
+arrow-row = { version = "53.3.0", path = "./arrow-row" }
+arrow-schema = { version = "53.3.0", path = "./arrow-schema" }
+arrow-select = { version = "53.3.0", path = "./arrow-select" }
+arrow-string = { version = "53.3.0", path = "./arrow-string" }
+parquet = { version = "53.3.0", path = "./parquet", default-features = false }
 
 chrono = { version = "0.4.34", default-features = false, features = ["clock"] }
diff --git a/README.md b/README.md
index 98c0a6615d9d..57794b1d6a46 100644
--- a/README.md
+++ b/README.md
@@ -19,8 +19,6 @@
 
 # Native Rust implementation of Apache Arrow and Apache Parquet
 
-[![Coverage Status](https://codecov.io/gh/apache/arrow-rs/rust/branch/master/graph/badge.svg)](https://codecov.io/gh/apache/arrow-rs?branch=master)
-
 Welcome to the [Rust][rust] implementation of [Apache Arrow], the popular in-memory columnar format.
 
 This repo contains the following main components:
@@ -58,7 +56,7 @@ breaking API changes) at most once a quarter, and release incremental minor
 versions in the intervening months. See [this ticket] for more details.
 
 To keep our maintenance burden down, we do regularly scheduled releases (major
-and minor) from the `master` branch. How we handle PRs with breaking API changes
+and minor) from the `main` branch. How we handle PRs with breaking API changes
 is described in the [contributing] guide.
 
 [contributing]: CONTRIBUTING.md#breaking-changes
@@ -67,11 +65,11 @@ Planned Release Schedule
 
 | Approximate Date | Version  | Notes                                   |
 | ---------------- | -------- | --------------------------------------- |
-| Sep 2024         | `53.0.0` | Major, potentially breaking API changes |
-| Oct 2024         | `53.1.0` | Minor, NO breaking API changes          |
-| Oct 2024         | `53.2.0` | Minor, NO breaking API changes          |
 | Nov 2024         | `53.3.0` | Minor, NO breaking API changes          |
 | Dec 2024         | `54.0.0` | Major, potentially breaking API changes |
+| Jan 2025         | `54.1.0` | Minor, NO breaking API changes          |
+| Feb 2025         | `54.2.0` | Minor, NO breaking API changes          |
+| Mar 2025         | `55.0.0` | Major, potentially breaking API changes |
 
 [this ticket]: https://github.com/apache/arrow-rs/issues/5368
 [semantic versioning]: https://semver.org/
diff --git a/arrow-arith/Cargo.toml b/arrow-arith/Cargo.toml
index d2ee0b9e2c72..66696df8aa04 100644
--- a/arrow-arith/Cargo.toml
+++ b/arrow-arith/Cargo.toml
@@ -39,7 +39,6 @@ arrow-buffer = { workspace = true }
 arrow-data = { workspace = true }
 arrow-schema = { workspace = true }
 chrono = { workspace = true }
-half = { version = "2.1", default-features = false }
 num = { version = "0.4", default-features = false, features = ["std"] }
 
 [dev-dependencies]
diff --git a/arrow-arith/LICENSE.txt b/arrow-arith/LICENSE.txt
new file mode 120000
index 000000000000..4ab43736a839
--- /dev/null
+++ b/arrow-arith/LICENSE.txt
@@ -0,0 +1 @@
+../LICENSE.txt
\ No newline at end of file
diff --git a/arrow-arith/NOTICE.txt b/arrow-arith/NOTICE.txt
new file mode 120000
index 000000000000..eb9f24e040b5
--- /dev/null
+++ b/arrow-arith/NOTICE.txt
@@ -0,0 +1 @@
+../NOTICE.txt
\ No newline at end of file
diff --git a/arrow-arith/src/arity.rs b/arrow-arith/src/arity.rs
index bb983e1225ac..9b3272abb617 100644
--- a/arrow-arith/src/arity.rs
+++ b/arrow-arith/src/arity.rs
@@ -18,14 +18,12 @@
 //! Kernels for operating on [`PrimitiveArray`]s
 
 use arrow_array::builder::BufferBuilder;
-use arrow_array::types::ArrowDictionaryKeyType;
 use arrow_array::*;
 use arrow_buffer::buffer::NullBuffer;
 use arrow_buffer::ArrowNativeType;
 use arrow_buffer::{Buffer, MutableBuffer};
 use arrow_data::ArrayData;
 use arrow_schema::ArrowError;
-use std::sync::Arc;
 
 /// See [`PrimitiveArray::unary`]
 pub fn unary<I, F, O>(array: &PrimitiveArray<I>, op: F) -> PrimitiveArray<O>
@@ -71,97 +69,6 @@ where
     array.try_unary_mut(op)
 }
 
-/// A helper function that applies an infallible unary function to a dictionary array with primitive value type.
-fn unary_dict<K, F, T>(array: &DictionaryArray<K>, op: F) -> Result<ArrayRef, ArrowError>
-where
-    K: ArrowDictionaryKeyType + ArrowNumericType,
-    T: ArrowPrimitiveType,
-    F: Fn(T::Native) -> T::Native,
-{
-    let dict_values = array.values().as_any().downcast_ref().unwrap();
-    let values = unary::<T, F, T>(dict_values, op);
-    Ok(Arc::new(array.with_values(Arc::new(values))))
-}
-
-/// A helper function that applies a fallible unary function to a dictionary array with primitive value type.
-fn try_unary_dict<K, F, T>(array: &DictionaryArray<K>, op: F) -> Result<ArrayRef, ArrowError>
-where
-    K: ArrowDictionaryKeyType + ArrowNumericType,
-    T: ArrowPrimitiveType,
-    F: Fn(T::Native) -> Result<T::Native, ArrowError>,
-{
-    if !PrimitiveArray::<T>::is_compatible(&array.value_type()) {
-        return Err(ArrowError::CastError(format!(
-            "Cannot perform the unary operation of type {} on dictionary array of value type {}",
-            T::DATA_TYPE,
-            array.value_type()
-        )));
-    }
-
-    let dict_values = array.values().as_any().downcast_ref().unwrap();
-    let values = try_unary::<T, F, T>(dict_values, op)?;
-    Ok(Arc::new(array.with_values(Arc::new(values))))
-}
-
-/// Applies an infallible unary function to an array with primitive values.
-#[deprecated(note = "Use arrow_array::AnyDictionaryArray")]
-pub fn unary_dyn<F, T>(array: &dyn Array, op: F) -> Result<ArrayRef, ArrowError>
-where
-    T: ArrowPrimitiveType,
-    F: Fn(T::Native) -> T::Native,
-{
-    downcast_dictionary_array! {
-        array => unary_dict::<_, F, T>(array, op),
-        t => {
-            if PrimitiveArray::<T>::is_compatible(t) {
-                Ok(Arc::new(unary::<T, F, T>(
-                    array.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap(),
-                    op,
-                )))
-            } else {
-                Err(ArrowError::NotYetImplemented(format!(
-                    "Cannot perform unary operation of type {} on array of type {}",
-                    T::DATA_TYPE,
-                    t
-                )))
-            }
-        }
-    }
-}
-
-/// Applies a fallible unary function to an array with primitive values.
-#[deprecated(note = "Use arrow_array::AnyDictionaryArray")]
-pub fn try_unary_dyn<F, T>(array: &dyn Array, op: F) -> Result<ArrayRef, ArrowError>
-where
-    T: ArrowPrimitiveType,
-    F: Fn(T::Native) -> Result<T::Native, ArrowError>,
-{
-    downcast_dictionary_array! {
-        array => if array.values().data_type() == &T::DATA_TYPE {
-            try_unary_dict::<_, F, T>(array, op)
-        } else {
-            Err(ArrowError::NotYetImplemented(format!(
-                "Cannot perform unary operation on dictionary array of type {}",
-                array.data_type()
-            )))
-        },
-        t => {
-            if PrimitiveArray::<T>::is_compatible(t) {
-                Ok(Arc::new(try_unary::<T, F, T>(
-                    array.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap(),
-                    op,
-                )?))
-            } else {
-                Err(ArrowError::NotYetImplemented(format!(
-                    "Cannot perform unary operation of type {} on array of type {}",
-                    T::DATA_TYPE,
-                    t
-                )))
-            }
-        }
-    }
-}
-
 /// Allies a binary infallable function to two [`PrimitiveArray`]s,
 /// producing a new [`PrimitiveArray`]
 ///
@@ -510,8 +417,8 @@ where
 #[cfg(test)]
 mod tests {
     use super::*;
-    use arrow_array::builder::*;
     use arrow_array::types::*;
+    use std::sync::Arc;
 
     #[test]
     #[allow(deprecated)]
@@ -523,53 +430,6 @@ mod tests {
             result,
             Float64Array::from(vec![None, Some(7.0), None, Some(7.0)])
         );
-
-        let result = unary_dyn::<_, Float64Type>(&input_slice, |n| n + 1.0).unwrap();
-
-        assert_eq!(
-            result.as_any().downcast_ref::<Float64Array>().unwrap(),
-            &Float64Array::from(vec![None, Some(7.8), None, Some(8.2)])
-        );
-    }
-
-    #[test]
-    #[allow(deprecated)]
-    fn test_unary_dict_and_unary_dyn() {
-        let mut builder = PrimitiveDictionaryBuilder::<Int8Type, Int32Type>::new();
-        builder.append(5).unwrap();
-        builder.append(6).unwrap();
-        builder.append(7).unwrap();
-        builder.append(8).unwrap();
-        builder.append_null();
-        builder.append(9).unwrap();
-        let dictionary_array = builder.finish();
-
-        let mut builder = PrimitiveDictionaryBuilder::<Int8Type, Int32Type>::new();
-        builder.append(6).unwrap();
-        builder.append(7).unwrap();
-        builder.append(8).unwrap();
-        builder.append(9).unwrap();
-        builder.append_null();
-        builder.append(10).unwrap();
-        let expected = builder.finish();
-
-        let result = unary_dict::<_, _, Int32Type>(&dictionary_array, |n| n + 1).unwrap();
-        assert_eq!(
-            result
-                .as_any()
-                .downcast_ref::<DictionaryArray<Int8Type>>()
-                .unwrap(),
-            &expected
-        );
-
-        let result = unary_dyn::<_, Int32Type>(&dictionary_array, |n| n + 1).unwrap();
-        assert_eq!(
-            result
-                .as_any()
-                .downcast_ref::<DictionaryArray<Int8Type>>()
-                .unwrap(),
-            &expected
-        );
     }
 
     #[test]
diff --git a/arrow-arith/src/temporal.rs b/arrow-arith/src/temporal.rs
index 09d690d3237c..3458669a6fd1 100644
--- a/arrow-arith/src/temporal.rs
+++ b/arrow-arith/src/temporal.rs
@@ -21,7 +21,7 @@ use std::sync::Arc;
 
 use arrow_array::cast::AsArray;
 use cast::as_primitive_array;
-use chrono::{Datelike, NaiveDateTime, Offset, TimeZone, Timelike, Utc};
+use chrono::{Datelike, TimeZone, Timelike, Utc};
 
 use arrow_array::temporal_conversions::{
     date32_to_datetime, date64_to_datetime, timestamp_ms_to_datetime, timestamp_ns_to_datetime,
@@ -82,6 +82,7 @@ impl std::fmt::Display for DatePart {
 /// Returns function to extract relevant [`DatePart`] from types like a
 /// [`NaiveDateTime`] or [`DateTime`].
 ///
+/// [`NaiveDateTime`]: chrono::NaiveDateTime
 /// [`DateTime`]: chrono::DateTime
 fn get_date_time_part_extract_fn<T>(part: DatePart) -> fn(T) -> i32
 where
@@ -664,20 +665,6 @@ impl<T: Datelike> ChronoDateExt for T {
     }
 }
 
-/// Parse the given string into a string representing fixed-offset that is correct as of the given
-/// UTC NaiveDateTime.
-///
-/// Note that the offset is function of time and can vary depending on whether daylight savings is
-/// in effect or not. e.g. Australia/Sydney is +10:00 or +11:00 depending on DST.
-#[deprecated(note = "Use arrow_array::timezone::Tz instead")]
-pub fn using_chrono_tz_and_utc_naive_date_time(
-    tz: &str,
-    utc: NaiveDateTime,
-) -> Option<chrono::offset::FixedOffset> {
-    let tz: Tz = tz.parse().ok()?;
-    Some(tz.offset_from_utc_datetime(&utc).fix())
-}
-
 /// Extracts the hours of a given array as an array of integers within
 /// the range of [0, 23]. If the given array isn't temporal primitive or dictionary array,
 /// an `Err` will be returned.
diff --git a/arrow-array/LICENSE.txt b/arrow-array/LICENSE.txt
new file mode 120000
index 000000000000..4ab43736a839
--- /dev/null
+++ b/arrow-array/LICENSE.txt
@@ -0,0 +1 @@
+../LICENSE.txt
\ No newline at end of file
diff --git a/arrow-array/NOTICE.txt b/arrow-array/NOTICE.txt
new file mode 120000
index 000000000000..eb9f24e040b5
--- /dev/null
+++ b/arrow-array/NOTICE.txt
@@ -0,0 +1 @@
+../NOTICE.txt
\ No newline at end of file
diff --git a/arrow-array/benches/fixed_size_list_array.rs b/arrow-array/benches/fixed_size_list_array.rs
index 5f001a4f3d3a..5270a4a5def3 100644
--- a/arrow-array/benches/fixed_size_list_array.rs
+++ b/arrow-array/benches/fixed_size_list_array.rs
@@ -26,7 +26,7 @@ fn gen_fsl(len: usize, value_len: usize) -> FixedSizeListArray {
     let values = Arc::new(Int32Array::from(
         (0..len).map(|_| rng.gen::<i32>()).collect::<Vec<_>>(),
     ));
-    let field = Arc::new(Field::new("item", values.data_type().clone(), true));
+    let field = Arc::new(Field::new_list_field(values.data_type().clone(), true));
     FixedSizeListArray::new(field, value_len as i32, values, None)
 }
 
diff --git a/arrow-array/src/array/binary_array.rs b/arrow-array/src/array/binary_array.rs
index 8f8a39b2093f..0e8a7a7cb618 100644
--- a/arrow-array/src/array/binary_array.rs
+++ b/arrow-array/src/array/binary_array.rs
@@ -24,12 +24,6 @@ use arrow_schema::DataType;
 pub type GenericBinaryArray<OffsetSize> = GenericByteArray<GenericBinaryType<OffsetSize>>;
 
 impl<OffsetSize: OffsetSizeTrait> GenericBinaryArray<OffsetSize> {
-    /// Get the data type of the array.
-    #[deprecated(note = "please use `Self::DATA_TYPE` instead")]
-    pub const fn get_data_type() -> DataType {
-        Self::DATA_TYPE
-    }
-
     /// Creates a [GenericBinaryArray] from a vector of byte slices
     ///
     /// See also [`Self::from_iter_values`]
@@ -358,7 +352,7 @@ mod tests {
         let values = b"helloparquet";
         let child_data = ArrayData::builder(DataType::UInt8)
             .len(12)
-            .add_buffer(Buffer::from(&values[..]))
+            .add_buffer(Buffer::from(values))
             .build()
             .unwrap();
         let offsets = [0, 5, 5, 12].map(|n| O::from_usize(n).unwrap());
@@ -372,11 +366,9 @@ mod tests {
             .unwrap();
         let binary_array1 = GenericBinaryArray::<O>::from(array_data1);
 
-        let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(Field::new(
-            "item",
-            DataType::UInt8,
-            false,
-        )));
+        let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(
+            Field::new_list_field(DataType::UInt8, false),
+        ));
 
         let array_data2 = ArrayData::builder(data_type)
             .len(3)
@@ -415,17 +407,15 @@ mod tests {
         let child_data = ArrayData::builder(DataType::UInt8)
             .len(15)
             .offset(5)
-            .add_buffer(Buffer::from(&values[..]))
+            .add_buffer(Buffer::from(values))
             .build()
             .unwrap();
 
         let offsets = [0, 5, 8, 15].map(|n| O::from_usize(n).unwrap());
         let null_buffer = Buffer::from_slice_ref([0b101]);
-        let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(Field::new(
-            "item",
-            DataType::UInt8,
-            false,
-        )));
+        let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(
+            Field::new_list_field(DataType::UInt8, false),
+        ));
 
         // [None, Some(b"Parquet")]
         let array_data = ArrayData::builder(data_type)
@@ -460,17 +450,15 @@ mod tests {
         let values = b"HelloArrow";
         let child_data = ArrayData::builder(DataType::UInt8)
             .len(10)
-            .add_buffer(Buffer::from(&values[..]))
+            .add_buffer(Buffer::from(values))
             .null_bit_buffer(Some(Buffer::from_slice_ref([0b1010101010])))
             .build()
             .unwrap();
 
         let offsets = [0, 5, 10].map(|n| O::from_usize(n).unwrap());
-        let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(Field::new(
-            "item",
-            DataType::UInt8,
-            true,
-        )));
+        let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(
+            Field::new_list_field(DataType::UInt8, true),
+        ));
 
         // [None, Some(b"Parquet")]
         let array_data = ArrayData::builder(data_type)
@@ -558,7 +546,7 @@ mod tests {
             .unwrap();
         let offsets: [i32; 4] = [0, 5, 5, 12];
 
-        let data_type = DataType::List(Arc::new(Field::new("item", DataType::UInt32, false)));
+        let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::UInt32, false)));
         let array_data = ArrayData::builder(data_type)
             .len(3)
             .add_buffer(Buffer::from_slice_ref(offsets))
diff --git a/arrow-array/src/array/boolean_array.rs b/arrow-array/src/array/boolean_array.rs
index 0f95adacf10c..9c2d4af8c454 100644
--- a/arrow-array/src/array/boolean_array.rs
+++ b/arrow-array/src/array/boolean_array.rs
@@ -308,6 +308,13 @@ impl Array for BooleanArray {
         self.values.is_empty()
     }
 
+    fn shrink_to_fit(&mut self) {
+        self.values.shrink_to_fit();
+        if let Some(nulls) = &mut self.nulls {
+            nulls.shrink_to_fit();
+        }
+    }
+
     fn offset(&self) -> usize {
         self.values.offset()
     }
diff --git a/arrow-array/src/array/byte_array.rs b/arrow-array/src/array/byte_array.rs
index bec0caab1045..f2b22507081d 100644
--- a/arrow-array/src/array/byte_array.rs
+++ b/arrow-array/src/array/byte_array.rs
@@ -453,6 +453,14 @@ impl<T: ByteArrayType> Array for GenericByteArray<T> {
         self.value_offsets.len() <= 1
     }
 
+    fn shrink_to_fit(&mut self) {
+        self.value_offsets.shrink_to_fit();
+        self.value_data.shrink_to_fit();
+        if let Some(nulls) = &mut self.nulls {
+            nulls.shrink_to_fit();
+        }
+    }
+
     fn offset(&self) -> usize {
         0
     }
diff --git a/arrow-array/src/array/byte_view_array.rs b/arrow-array/src/array/byte_view_array.rs
index 81bb6a38550b..9d2d396a5266 100644
--- a/arrow-array/src/array/byte_view_array.rs
+++ b/arrow-array/src/array/byte_view_array.rs
@@ -430,31 +430,31 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
     ///
     /// Before GC:
     /// ```text
-    ///                                        ┌──────┐                 
-    ///                                        │......│                 
-    ///                                        │......│                 
-    /// ┌────────────────────┐       ┌ ─ ─ ─ ▶ │Data1 │   Large buffer  
+    ///                                        ┌──────┐
+    ///                                        │......│
+    ///                                        │......│
+    /// ┌────────────────────┐       ┌ ─ ─ ─ ▶ │Data1 │   Large buffer
     /// │       View 1       │─ ─ ─ ─          │......│  with data that
     /// ├────────────────────┤                 │......│ is not referred
     /// │       View 2       │─ ─ ─ ─ ─ ─ ─ ─▶ │Data2 │ to by View 1 or
-    /// └────────────────────┘                 │......│      View 2     
-    ///                                        │......│                 
-    ///    2 views, refer to                   │......│                 
-    ///   small portions of a                  └──────┘                 
-    ///      large buffer                                               
+    /// └────────────────────┘                 │......│      View 2
+    ///                                        │......│
+    ///    2 views, refer to                   │......│
+    ///   small portions of a                  └──────┘
+    ///      large buffer
     /// ```
-    ///                                                                
+    ///
     /// After GC:
     ///
     /// ```text
     /// ┌────────────────────┐                 ┌─────┐    After gc, only
-    /// │       View 1       │─ ─ ─ ─ ─ ─ ─ ─▶ │Data1│     data that is  
-    /// ├────────────────────┤       ┌ ─ ─ ─ ▶ │Data2│    pointed to by  
-    /// │       View 2       │─ ─ ─ ─          └─────┘     the views is  
-    /// └────────────────────┘                                 left      
-    ///                                                                  
-    ///                                                                  
-    ///         2 views                                                  
+    /// │       View 1       │─ ─ ─ ─ ─ ─ ─ ─▶ │Data1│     data that is
+    /// ├────────────────────┤       ┌ ─ ─ ─ ▶ │Data2│    pointed to by
+    /// │       View 2       │─ ─ ─ ─          └─────┘     the views is
+    /// └────────────────────┘                                 left
+    ///
+    ///
+    ///         2 views
     /// ```
     /// This method will compact the data buffers by recreating the view array and only include the data
     /// that is pointed to by the views.
@@ -575,6 +575,15 @@ impl<T: ByteViewType + ?Sized> Array for GenericByteViewArray<T> {
         self.views.is_empty()
     }
 
+    fn shrink_to_fit(&mut self) {
+        self.views.shrink_to_fit();
+        self.buffers.iter_mut().for_each(|b| b.shrink_to_fit());
+        self.buffers.shrink_to_fit();
+        if let Some(nulls) = &mut self.nulls {
+            nulls.shrink_to_fit();
+        }
+    }
+
     fn offset(&self) -> usize {
         0
     }
diff --git a/arrow-array/src/array/dictionary_array.rs b/arrow-array/src/array/dictionary_array.rs
index 1187e16769a0..f852b57fb65e 100644
--- a/arrow-array/src/array/dictionary_array.rs
+++ b/arrow-array/src/array/dictionary_array.rs
@@ -249,7 +249,7 @@ pub struct DictionaryArray<K: ArrowDictionaryKeyType> {
     /// map to the real values.
     keys: PrimitiveArray<K>,
 
-    /// Array of dictionary values (can by any DataType).
+    /// Array of dictionary values (can be any DataType).
     values: ArrayRef,
 
     /// Values are ordered.
@@ -720,6 +720,11 @@ impl<T: ArrowDictionaryKeyType> Array for DictionaryArray<T> {
         self.keys.is_empty()
     }
 
+    fn shrink_to_fit(&mut self) {
+        self.keys.shrink_to_fit();
+        self.values.shrink_to_fit();
+    }
+
     fn offset(&self) -> usize {
         self.keys.offset()
     }
@@ -729,7 +734,7 @@ impl<T: ArrowDictionaryKeyType> Array for DictionaryArray<T> {
     }
 
     fn logical_nulls(&self) -> Option<NullBuffer> {
-        match self.values.nulls() {
+        match self.values.logical_nulls() {
             None => self.nulls().cloned(),
             Some(value_nulls) => {
                 let mut builder = BooleanBufferBuilder::new(self.len());
@@ -749,6 +754,26 @@ impl<T: ArrowDictionaryKeyType> Array for DictionaryArray<T> {
         }
     }
 
+    fn logical_null_count(&self) -> usize {
+        match (self.keys.nulls(), self.values.logical_nulls()) {
+            (None, None) => 0,
+            (Some(key_nulls), None) => key_nulls.null_count(),
+            (None, Some(value_nulls)) => self
+                .keys
+                .values()
+                .iter()
+                .filter(|k| value_nulls.is_null(k.as_usize()))
+                .count(),
+            (Some(key_nulls), Some(value_nulls)) => self
+                .keys
+                .values()
+                .iter()
+                .enumerate()
+                .filter(|(idx, k)| key_nulls.is_null(*idx) || value_nulls.is_null(k.as_usize()))
+                .count(),
+        }
+    }
+
     fn is_nullable(&self) -> bool {
         !self.is_empty() && (self.nulls().is_some() || self.values.is_nullable())
     }
@@ -1020,7 +1045,7 @@ impl<K: ArrowDictionaryKeyType> AnyDictionaryArray for DictionaryArray<K> {
 mod tests {
     use super::*;
     use crate::cast::as_dictionary_array;
-    use crate::{Int16Array, Int32Array, Int8Array};
+    use crate::{Int16Array, Int32Array, Int8Array, RunArray};
     use arrow_buffer::{Buffer, ToByteSlice};
 
     #[test]
@@ -1445,6 +1470,54 @@ mod tests {
         assert_eq!(values, &[Some(50), None, None, Some(2)])
     }
 
+    #[test]
+    fn test_logical_nulls() -> Result<(), ArrowError> {
+        let values = Arc::new(RunArray::try_new(
+            &Int32Array::from(vec![1, 3, 7]),
+            &Int32Array::from(vec![Some(1), None, Some(3)]),
+        )?) as ArrayRef;
+
+        // For this test to be meaningful, the values array need to have different nulls and logical nulls
+        assert_eq!(values.null_count(), 0);
+        assert_eq!(values.logical_null_count(), 2);
+
+        // Construct a trivial dictionary with 1-1 mapping to underlying array
+        let dictionary = DictionaryArray::<Int8Type>::try_new(
+            Int8Array::from((0..values.len()).map(|i| i as i8).collect::<Vec<_>>()),
+            Arc::clone(&values),
+        )?;
+
+        // No keys are null
+        assert_eq!(dictionary.null_count(), 0);
+        // Dictionary array values are logically nullable
+        assert_eq!(dictionary.logical_null_count(), values.logical_null_count());
+        assert_eq!(dictionary.logical_nulls(), values.logical_nulls());
+        assert!(dictionary.is_nullable());
+
+        // Construct a trivial dictionary with 1-1 mapping to underlying array except that key 0 is nulled out
+        let dictionary = DictionaryArray::<Int8Type>::try_new(
+            Int8Array::from(
+                (0..values.len())
+                    .map(|i| i as i8)
+                    .map(|i| if i == 0 { None } else { Some(i) })
+                    .collect::<Vec<_>>(),
+            ),
+            Arc::clone(&values),
+        )?;
+
+        // One key is null
+        assert_eq!(dictionary.null_count(), 1);
+
+        // Dictionary array values are logically nullable
+        assert_eq!(
+            dictionary.logical_null_count(),
+            values.logical_null_count() + 1
+        );
+        assert!(dictionary.is_nullable());
+
+        Ok(())
+    }
+
     #[test]
     fn test_normalized_keys() {
         let values = vec![132, 0, 1].into();
diff --git a/arrow-array/src/array/fixed_size_binary_array.rs b/arrow-array/src/array/fixed_size_binary_array.rs
index 8f1489ee4c3c..576b8012491b 100644
--- a/arrow-array/src/array/fixed_size_binary_array.rs
+++ b/arrow-array/src/array/fixed_size_binary_array.rs
@@ -237,6 +237,7 @@ impl FixedSizeBinaryArray {
     ///
     /// Returns error if argument has length zero, or sizes of nested slices don't match.
     #[deprecated(
+        since = "28.0.0",
         note = "This function will fail if the iterator produces only None values; prefer `try_from_sparse_iter_with_size`"
     )]
     pub fn try_from_sparse_iter<T, U>(mut iter: T) -> Result<Self, ArrowError>
@@ -602,6 +603,13 @@ impl Array for FixedSizeBinaryArray {
         self.len == 0
     }
 
+    fn shrink_to_fit(&mut self) {
+        self.value_data.shrink_to_fit();
+        if let Some(nulls) = &mut self.nulls {
+            nulls.shrink_to_fit();
+        }
+    }
+
     fn offset(&self) -> usize {
         0
     }
@@ -662,7 +670,7 @@ mod tests {
 
         let array_data = ArrayData::builder(DataType::FixedSizeBinary(5))
             .len(3)
-            .add_buffer(Buffer::from(&values[..]))
+            .add_buffer(Buffer::from(&values))
             .build()
             .unwrap();
         let fixed_size_binary_array = FixedSizeBinaryArray::from(array_data);
@@ -691,7 +699,7 @@ mod tests {
         let array_data = ArrayData::builder(DataType::FixedSizeBinary(5))
             .len(2)
             .offset(1)
-            .add_buffer(Buffer::from(&values[..]))
+            .add_buffer(Buffer::from(&values))
             .build()
             .unwrap();
         let fixed_size_binary_array = FixedSizeBinaryArray::from(array_data);
@@ -721,7 +729,7 @@ mod tests {
         // [null, [10, 11, 12, 13]]
         let array_data = unsafe {
             ArrayData::builder(DataType::FixedSizeList(
-                Arc::new(Field::new("item", DataType::UInt8, false)),
+                Arc::new(Field::new_list_field(DataType::UInt8, false)),
                 4,
             ))
             .len(2)
@@ -757,7 +765,7 @@ mod tests {
 
         let array_data = unsafe {
             ArrayData::builder(DataType::FixedSizeList(
-                Arc::new(Field::new("item", DataType::Binary, false)),
+                Arc::new(Field::new_list_field(DataType::Binary, false)),
                 4,
             ))
             .len(3)
@@ -781,7 +789,7 @@ mod tests {
 
         let array_data = unsafe {
             ArrayData::builder(DataType::FixedSizeList(
-                Arc::new(Field::new("item", DataType::UInt8, false)),
+                Arc::new(Field::new_list_field(DataType::UInt8, false)),
                 4,
             ))
             .len(3)
@@ -798,7 +806,7 @@ mod tests {
 
         let array_data = ArrayData::builder(DataType::FixedSizeBinary(5))
             .len(3)
-            .add_buffer(Buffer::from(&values[..]))
+            .add_buffer(Buffer::from(&values))
             .build()
             .unwrap();
         let arr = FixedSizeBinaryArray::from(array_data);
diff --git a/arrow-array/src/array/fixed_size_list_array.rs b/arrow-array/src/array/fixed_size_list_array.rs
index 00a3144a87ad..44be442c9f85 100644
--- a/arrow-array/src/array/fixed_size_list_array.rs
+++ b/arrow-array/src/array/fixed_size_list_array.rs
@@ -95,7 +95,7 @@ use std::sync::Arc;
 ///     .build()
 ///     .unwrap();
 /// let list_data_type = DataType::FixedSizeList(
-///     Arc::new(Field::new("item", DataType::Int32, false)),
+///     Arc::new(Field::new_list_field(DataType::Int32, false)),
 ///     3,
 /// );
 /// let list_data = ArrayData::builder(list_data_type.clone())
@@ -401,6 +401,13 @@ impl Array for FixedSizeListArray {
         self.len == 0
     }
 
+    fn shrink_to_fit(&mut self) {
+        self.values.shrink_to_fit();
+        if let Some(nulls) = &mut self.nulls {
+            nulls.shrink_to_fit();
+        }
+    }
+
     fn offset(&self) -> usize {
         0
     }
@@ -487,7 +494,7 @@ mod tests {
 
         // Construct a list array from the above two
         let list_data_type =
-            DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, false)), 3);
+            DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, false)), 3);
         let list_data = ArrayData::builder(list_data_type.clone())
             .len(3)
             .add_child_data(value_data.clone())
@@ -540,7 +547,7 @@ mod tests {
 
         // Construct a list array from the above two
         let list_data_type =
-            DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, false)), 3);
+            DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, false)), 3);
         let list_data = unsafe {
             ArrayData::builder(list_data_type)
                 .len(3)
@@ -569,7 +576,7 @@ mod tests {
 
         // Construct a fixed size list array from the above two
         let list_data_type =
-            DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, false)), 2);
+            DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, false)), 2);
         let list_data = ArrayData::builder(list_data_type)
             .len(5)
             .add_child_data(value_data.clone())
@@ -627,7 +634,7 @@ mod tests {
 
         // Construct a fixed size list array from the above two
         let list_data_type =
-            DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, false)), 2);
+            DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, false)), 2);
         let list_data = ArrayData::builder(list_data_type)
             .len(5)
             .add_child_data(value_data)
@@ -650,7 +657,7 @@ mod tests {
             Some(4),
         ]));
 
-        let field = Arc::new(Field::new("item", DataType::Int32, true));
+        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
         let list = FixedSizeListArray::new(field.clone(), 2, values.clone(), None);
         assert_eq!(list.len(), 3);
 
@@ -674,7 +681,7 @@ mod tests {
         let err = FixedSizeListArray::try_new(field, 2, values.clone(), Some(nulls)).unwrap_err();
         assert_eq!(err.to_string(), "Invalid argument error: Incorrect length of null buffer for FixedSizeListArray, expected 3 got 2");
 
-        let field = Arc::new(Field::new("item", DataType::Int32, false));
+        let field = Arc::new(Field::new_list_field(DataType::Int32, false));
         let err = FixedSizeListArray::try_new(field.clone(), 2, values.clone(), None).unwrap_err();
         assert_eq!(err.to_string(), "Invalid argument error: Found unmasked nulls for non-nullable FixedSizeListArray field \"item\"");
 
@@ -682,14 +689,14 @@ mod tests {
         let nulls = NullBuffer::new(BooleanBuffer::new(Buffer::from([0b0000101]), 0, 3));
         FixedSizeListArray::new(field, 2, values.clone(), Some(nulls));
 
-        let field = Arc::new(Field::new("item", DataType::Int64, true));
+        let field = Arc::new(Field::new_list_field(DataType::Int64, true));
         let err = FixedSizeListArray::try_new(field, 2, values, None).unwrap_err();
         assert_eq!(err.to_string(), "Invalid argument error: FixedSizeListArray expected data type Int64 got Int32 for \"item\"");
     }
 
     #[test]
     fn empty_fixed_size_list() {
-        let field = Arc::new(Field::new("item", DataType::Int32, true));
+        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
         let nulls = NullBuffer::new_null(2);
         let values = new_empty_array(&DataType::Int32);
         let list = FixedSizeListArray::new(field.clone(), 0, values, Some(nulls));
diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs
index 1fab0009f2cc..bed0bdf889b2 100644
--- a/arrow-array/src/array/list_array.rs
+++ b/arrow-array/src/array/list_array.rs
@@ -485,6 +485,14 @@ impl<OffsetSize: OffsetSizeTrait> Array for GenericListArray<OffsetSize> {
         self.value_offsets.len() <= 1
     }
 
+    fn shrink_to_fit(&mut self) {
+        if let Some(nulls) = &mut self.nulls {
+            nulls.shrink_to_fit();
+        }
+        self.values.shrink_to_fit();
+        self.value_offsets.shrink_to_fit();
+    }
+
     fn offset(&self) -> usize {
         0
     }
@@ -565,7 +573,7 @@ mod tests {
         //  [[0, 1, 2], [3, 4, 5], [6, 7]]
         let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
         let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 6, 8]));
-        let field = Arc::new(Field::new("item", DataType::Int32, true));
+        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
         ListArray::new(field, offsets, Arc::new(values), None)
     }
 
@@ -595,7 +603,8 @@ mod tests {
         let value_offsets = Buffer::from([]);
 
         // Construct a list array from the above two
-        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
+        let list_data_type =
+            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
         let list_data = ArrayData::builder(list_data_type)
             .len(0)
             .add_buffer(value_offsets)
@@ -621,7 +630,8 @@ mod tests {
         let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
 
         // Construct a list array from the above two
-        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
+        let list_data_type =
+            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
         let list_data = ArrayData::builder(list_data_type.clone())
             .len(3)
             .add_buffer(value_offsets.clone())
@@ -766,7 +776,8 @@ mod tests {
         bit_util::set_bit(&mut null_bits, 8);
 
         // Construct a list array from the above two
-        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
+        let list_data_type =
+            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
         let list_data = ArrayData::builder(list_data_type)
             .len(9)
             .add_buffer(value_offsets)
@@ -917,7 +928,8 @@ mod tests {
                 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
                 .build_unchecked()
         };
-        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
+        let list_data_type =
+            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
         let list_data = unsafe {
             ArrayData::builder(list_data_type)
                 .len(3)
@@ -934,7 +946,8 @@ mod tests {
     #[cfg(not(feature = "force_validate"))]
     fn test_list_array_invalid_child_array_len() {
         let value_offsets = Buffer::from_slice_ref([0, 2, 5, 7]);
-        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
+        let list_data_type =
+            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
         let list_data = unsafe {
             ArrayData::builder(list_data_type)
                 .len(3)
@@ -964,7 +977,8 @@ mod tests {
 
         let value_offsets = Buffer::from_slice_ref([2, 2, 5, 7]);
 
-        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
+        let list_data_type =
+            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
         let list_data = ArrayData::builder(list_data_type)
             .len(3)
             .add_buffer(value_offsets)
@@ -1010,7 +1024,8 @@ mod tests {
                 .build_unchecked()
         };
 
-        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
+        let list_data_type =
+            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
         let list_data = unsafe {
             ArrayData::builder(list_data_type)
                 .add_buffer(buf2)
diff --git a/arrow-array/src/array/list_view_array.rs b/arrow-array/src/array/list_view_array.rs
index 4e949a642701..7e52a6f3e457 100644
--- a/arrow-array/src/array/list_view_array.rs
+++ b/arrow-array/src/array/list_view_array.rs
@@ -326,6 +326,15 @@ impl<OffsetSize: OffsetSizeTrait> Array for GenericListViewArray<OffsetSize> {
         self.value_sizes.is_empty()
     }
 
+    fn shrink_to_fit(&mut self) {
+        if let Some(nulls) = &mut self.nulls {
+            nulls.shrink_to_fit();
+        }
+        self.values.shrink_to_fit();
+        self.value_offsets.shrink_to_fit();
+        self.value_sizes.shrink_to_fit();
+    }
+
     fn offset(&self) -> usize {
         0
     }
@@ -490,7 +499,7 @@ mod tests {
     fn test_empty_list_view_array() {
         // Construct an empty value array
         let vec: Vec<i32> = vec![];
-        let field = Arc::new(Field::new("item", DataType::Int32, true));
+        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
         let sizes = ScalarBuffer::from(vec![]);
         let offsets = ScalarBuffer::from(vec![]);
         let values = Int32Array::from(vec);
@@ -508,7 +517,7 @@ mod tests {
             .build()
             .unwrap();
 
-        let field = Arc::new(Field::new("item", DataType::Int32, true));
+        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
         let sizes = ScalarBuffer::from(vec![3i32, 3, 2]);
         let offsets = ScalarBuffer::from(vec![0i32, 3, 6]);
         let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
@@ -544,7 +553,7 @@ mod tests {
             .build()
             .unwrap();
 
-        let field = Arc::new(Field::new("item", DataType::Int32, true));
+        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
         let sizes = ScalarBuffer::from(vec![3i64, 3, 2]);
         let offsets = ScalarBuffer::from(vec![0i64, 3, 6]);
         let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
@@ -590,7 +599,7 @@ mod tests {
         let buffer = BooleanBuffer::new(Buffer::from(null_bits), 0, 9);
         let null_buffer = NullBuffer::new(buffer);
 
-        let field = Arc::new(Field::new("item", DataType::Int32, true));
+        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
         let sizes = ScalarBuffer::from(vec![2, 0, 0, 2, 2, 0, 3, 0, 1]);
         let offsets = ScalarBuffer::from(vec![0, 2, 2, 2, 4, 6, 6, 9, 9]);
         let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
@@ -656,7 +665,7 @@ mod tests {
         let null_buffer = NullBuffer::new(buffer);
 
         // Construct a large list view array from the above two
-        let field = Arc::new(Field::new("item", DataType::Int32, true));
+        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
         let sizes = ScalarBuffer::from(vec![2i64, 0, 0, 2, 2, 0, 3, 0, 1]);
         let offsets = ScalarBuffer::from(vec![0i64, 2, 2, 2, 4, 6, 6, 9, 9]);
         let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
@@ -718,7 +727,7 @@ mod tests {
         // Construct a buffer for value offsets, for the nested array:
         //  [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]]
         // Construct a list array from the above two
-        let field = Arc::new(Field::new("item", DataType::Int32, true));
+        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
         let sizes = ScalarBuffer::from(vec![2i32, 0, 0, 2, 2, 0, 3, 0, 1]);
         let offsets = ScalarBuffer::from(vec![0i32, 2, 2, 2, 4, 6, 6, 9, 9]);
         let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
@@ -741,7 +750,7 @@ mod tests {
                 .build_unchecked()
         };
         let list_data_type =
-            DataType::ListView(Arc::new(Field::new("item", DataType::Int32, false)));
+            DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false)));
         let list_data = unsafe {
             ArrayData::builder(list_data_type)
                 .len(3)
@@ -759,7 +768,7 @@ mod tests {
     fn test_list_view_array_invalid_child_array_len() {
         let value_offsets = Buffer::from_slice_ref([0, 2, 5, 7]);
         let list_data_type =
-            DataType::ListView(Arc::new(Field::new("item", DataType::Int32, false)));
+            DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false)));
         let list_data = unsafe {
             ArrayData::builder(list_data_type)
                 .len(3)
@@ -771,7 +780,7 @@ mod tests {
 
     #[test]
     fn test_list_view_array_offsets_need_not_start_at_zero() {
-        let field = Arc::new(Field::new("item", DataType::Int32, true));
+        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
         let sizes = ScalarBuffer::from(vec![0i32, 0, 3]);
         let offsets = ScalarBuffer::from(vec![2i32, 2, 5]);
         let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
@@ -800,7 +809,7 @@ mod tests {
         };
 
         let list_data_type =
-            DataType::ListView(Arc::new(Field::new("item", DataType::Int32, false)));
+            DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false)));
         let list_data = unsafe {
             ArrayData::builder(list_data_type)
                 .add_buffer(offset_buf2)
@@ -942,7 +951,7 @@ mod tests {
                 .build_unchecked()
         };
         let list_data_type =
-            DataType::ListView(Arc::new(Field::new("item", DataType::Int32, false)));
+            DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false)));
         let list_data = unsafe {
             ArrayData::builder(list_data_type)
                 .len(2)
@@ -976,7 +985,7 @@ mod tests {
                 .build_unchecked()
         };
         let list_data_type =
-            DataType::ListView(Arc::new(Field::new("item", DataType::Int32, false)));
+            DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false)));
         let list_data = unsafe {
             ArrayData::builder(list_data_type)
                 .len(3)
@@ -1015,7 +1024,7 @@ mod tests {
                 .build_unchecked()
         };
         let list_data_type =
-            DataType::ListView(Arc::new(Field::new("item", DataType::Int32, false)));
+            DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false)));
         let list_data = unsafe {
             ArrayData::builder(list_data_type)
                 .len(3)
diff --git a/arrow-array/src/array/map_array.rs b/arrow-array/src/array/map_array.rs
index 254437630a44..18a7c491aa16 100644
--- a/arrow-array/src/array/map_array.rs
+++ b/arrow-array/src/array/map_array.rs
@@ -372,6 +372,14 @@ impl Array for MapArray {
         self.value_offsets.len() <= 1
     }
 
+    fn shrink_to_fit(&mut self) {
+        if let Some(nulls) = &mut self.nulls {
+            nulls.shrink_to_fit();
+        }
+        self.entries.shrink_to_fit();
+        self.value_offsets.shrink_to_fit();
+    }
+
     fn offset(&self) -> usize {
         0
     }
diff --git a/arrow-array/src/array/mod.rs b/arrow-array/src/array/mod.rs
index 4a9e54a60789..23b3cb628aaf 100644
--- a/arrow-array/src/array/mod.rs
+++ b/arrow-array/src/array/mod.rs
@@ -76,6 +76,8 @@ mod list_view_array;
 
 pub use list_view_array::*;
 
+use crate::iterator::ArrayIter;
+
 /// An array in the [arrow columnar format](https://arrow.apache.org/docs/format/Columnar.html)
 pub trait Array: std::fmt::Debug + Send + Sync {
     /// Returns the array as [`Any`] so that it can be
@@ -165,6 +167,12 @@ pub trait Array: std::fmt::Debug + Send + Sync {
     /// ```
     fn is_empty(&self) -> bool;
 
+    /// Shrinks the capacity of any exclusively owned buffer as much as possible
+    ///
+    /// Shared or externally allocated buffers will be ignored, and
+    /// any buffer offsets will be preserved.
+    fn shrink_to_fit(&mut self) {}
+
     /// Returns the offset into the underlying data used by this array(-slice).
     /// Note that the underlying data can be shared by many arrays.
     /// This defaults to `0`.
@@ -315,8 +323,7 @@ pub trait Array: std::fmt::Debug + Send + Sync {
     /// even if the nulls present in [`DictionaryArray::values`] are not referenced by any key,
     /// and therefore would not appear in [`Array::logical_nulls`].
     fn is_nullable(&self) -> bool {
-        // TODO this is not necessarily perfect default implementation, since null_count() and logical_null_count() are not always equivalent
-        self.null_count() != 0
+        self.logical_null_count() != 0
     }
 
     /// Returns the total number of bytes of memory pointed to by this array.
@@ -364,6 +371,15 @@ impl Array for ArrayRef {
         self.as_ref().is_empty()
     }
 
+    /// For shared buffers, this is a no-op.
+    fn shrink_to_fit(&mut self) {
+        if let Some(slf) = Arc::get_mut(self) {
+            slf.shrink_to_fit();
+        } else {
+            // We ignore shared buffers.
+        }
+    }
+
     fn offset(&self) -> usize {
         self.as_ref().offset()
     }
@@ -570,6 +586,40 @@ pub trait ArrayAccessor: Array {
     unsafe fn value_unchecked(&self, index: usize) -> Self::Item;
 }
 
+/// A trait for Arrow String Arrays, currently three types are supported:
+/// - `StringArray`
+/// - `LargeStringArray`
+/// - `StringViewArray`
+///
+/// This trait helps to abstract over the different types of string arrays
+/// so that we don't need to duplicate the implementation for each type.
+pub trait StringArrayType<'a>: ArrayAccessor<Item = &'a str> + Sized {
+    /// Returns true if all data within this string array is ASCII
+    fn is_ascii(&self) -> bool;
+
+    /// Constructs a new iterator
+    fn iter(&self) -> ArrayIter<Self>;
+}
+
+impl<'a, O: OffsetSizeTrait> StringArrayType<'a> for &'a GenericStringArray<O> {
+    fn is_ascii(&self) -> bool {
+        GenericStringArray::<O>::is_ascii(self)
+    }
+
+    fn iter(&self) -> ArrayIter<Self> {
+        GenericStringArray::<O>::iter(self)
+    }
+}
+impl<'a> StringArrayType<'a> for &'a StringViewArray {
+    fn is_ascii(&self) -> bool {
+        StringViewArray::is_ascii(self)
+    }
+
+    fn iter(&self) -> ArrayIter<Self> {
+        StringViewArray::iter(self)
+    }
+}
+
 impl PartialEq for dyn Array + '_ {
     fn eq(&self, other: &Self) -> bool {
         self.to_data().eq(&other.to_data())
@@ -876,7 +926,7 @@ mod tests {
 
     #[test]
     fn test_empty_list_primitive() {
-        let data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
+        let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
         let array = new_empty_array(&data_type);
         let a = array.as_any().downcast_ref::<ListArray>().unwrap();
         assert_eq!(a.len(), 0);
@@ -934,7 +984,7 @@ mod tests {
 
     #[test]
     fn test_null_list_primitive() {
-        let data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
+        let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
         let array = new_null_array(&data_type, 9);
         let a = array.as_any().downcast_ref::<ListArray>().unwrap();
         assert_eq!(a.len(), 9);
diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs
index 7b0d6c5ca1b6..57aa23bf9040 100644
--- a/arrow-array/src/array/primitive_array.rs
+++ b/arrow-array/src/array/primitive_array.rs
@@ -1152,6 +1152,13 @@ impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> {
         self.values.is_empty()
     }
 
+    fn shrink_to_fit(&mut self) {
+        self.values.shrink_to_fit();
+        if let Some(nulls) = &mut self.nulls {
+            nulls.shrink_to_fit();
+        }
+    }
+
     fn offset(&self) -> usize {
         0
     }
@@ -1480,24 +1487,6 @@ def_numeric_from_vec!(TimestampMicrosecondType);
 def_numeric_from_vec!(TimestampNanosecondType);
 
 impl<T: ArrowTimestampType> PrimitiveArray<T> {
-    /// Construct a timestamp array from a vec of i64 values and an optional timezone
-    #[deprecated(note = "Use with_timezone_opt instead")]
-    pub fn from_vec(data: Vec<i64>, timezone: Option<String>) -> Self
-    where
-        Self: From<Vec<i64>>,
-    {
-        Self::from(data).with_timezone_opt(timezone)
-    }
-
-    /// Construct a timestamp array from a vec of `Option<i64>` values and an optional timezone
-    #[deprecated(note = "Use with_timezone_opt instead")]
-    pub fn from_opt_vec(data: Vec<Option<i64>>, timezone: Option<String>) -> Self
-    where
-        Self: From<Vec<Option<i64>>>,
-    {
-        Self::from(data).with_timezone_opt(timezone)
-    }
-
     /// Returns the timezone of this array if any
     pub fn timezone(&self) -> Option<&str> {
         match self.data_type() {
@@ -2296,7 +2285,7 @@ mod tests {
         ];
         let array_data = ArrayData::builder(DataType::Decimal128(38, 6))
             .len(2)
-            .add_buffer(Buffer::from(&values[..]))
+            .add_buffer(Buffer::from(&values))
             .build()
             .unwrap();
         let decimal_array = Decimal128Array::from(array_data);
diff --git a/arrow-array/src/array/run_array.rs b/arrow-array/src/array/run_array.rs
index dc4e6c96d9da..b340bf9a9065 100644
--- a/arrow-array/src/array/run_array.rs
+++ b/arrow-array/src/array/run_array.rs
@@ -330,6 +330,11 @@ impl<T: RunEndIndexType> Array for RunArray<T> {
         self.run_ends.is_empty()
     }
 
+    fn shrink_to_fit(&mut self) {
+        self.run_ends.shrink_to_fit();
+        self.values.shrink_to_fit();
+    }
+
     fn offset(&self) -> usize {
         self.run_ends.offset()
     }
diff --git a/arrow-array/src/array/string_array.rs b/arrow-array/src/array/string_array.rs
index 25581cfaa49d..ed70e5744fff 100644
--- a/arrow-array/src/array/string_array.rs
+++ b/arrow-array/src/array/string_array.rs
@@ -17,18 +17,12 @@
 
 use crate::types::GenericStringType;
 use crate::{GenericBinaryArray, GenericByteArray, GenericListArray, OffsetSizeTrait};
-use arrow_schema::{ArrowError, DataType};
+use arrow_schema::ArrowError;
 
 /// A [`GenericByteArray`] for storing `str`
 pub type GenericStringArray<OffsetSize> = GenericByteArray<GenericStringType<OffsetSize>>;
 
 impl<OffsetSize: OffsetSizeTrait> GenericStringArray<OffsetSize> {
-    /// Get the data type of the array.
-    #[deprecated(note = "please use `Self::DATA_TYPE` instead")]
-    pub const fn get_data_type() -> DataType {
-        Self::DATA_TYPE
-    }
-
     /// Returns the number of `Unicode Scalar Value` in the string at index `i`.
     /// # Performance
     /// This function has `O(n)` time complexity where `n` is the string length.
@@ -167,7 +161,7 @@ mod tests {
     use crate::Array;
     use arrow_buffer::Buffer;
     use arrow_data::ArrayData;
-    use arrow_schema::Field;
+    use arrow_schema::{DataType, Field};
     use std::sync::Arc;
 
     #[test]
@@ -382,17 +376,15 @@ mod tests {
         let child_data = ArrayData::builder(DataType::UInt8)
             .len(15)
             .offset(5)
-            .add_buffer(Buffer::from(&values[..]))
+            .add_buffer(Buffer::from(values))
             .build()
             .unwrap();
 
         let offsets = [0, 5, 8, 15].map(|n| O::from_usize(n).unwrap());
         let null_buffer = Buffer::from_slice_ref([0b101]);
-        let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(Field::new(
-            "item",
-            DataType::UInt8,
-            false,
-        )));
+        let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(
+            Field::new_list_field(DataType::UInt8, false),
+        ));
 
         // [None, Some("Parquet")]
         let array_data = ArrayData::builder(data_type)
@@ -427,7 +419,7 @@ mod tests {
         let values = b"HelloArrow";
         let child_data = ArrayData::builder(DataType::UInt8)
             .len(10)
-            .add_buffer(Buffer::from(&values[..]))
+            .add_buffer(Buffer::from(values))
             .null_bit_buffer(Some(Buffer::from_slice_ref([0b1010101010])))
             .build()
             .unwrap();
@@ -436,11 +428,9 @@ mod tests {
 
         // It is possible to create a null struct containing a non-nullable child
         // see https://github.com/apache/arrow-rs/pull/3244 for details
-        let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(Field::new(
-            "item",
-            DataType::UInt8,
-            true,
-        )));
+        let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(
+            Field::new_list_field(DataType::UInt8, true),
+        ));
 
         // [None, Some(b"Parquet")]
         let array_data = ArrayData::builder(data_type)
@@ -469,16 +459,14 @@ mod tests {
         let values = b"HelloArrow";
         let child_data = ArrayData::builder(DataType::UInt16)
             .len(5)
-            .add_buffer(Buffer::from(&values[..]))
+            .add_buffer(Buffer::from(values))
             .build()
             .unwrap();
 
         let offsets = [0, 2, 3].map(|n| O::from_usize(n).unwrap());
-        let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(Field::new(
-            "item",
-            DataType::UInt16,
-            false,
-        )));
+        let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(
+            Field::new_list_field(DataType::UInt16, false),
+        ));
 
         let array_data = ArrayData::builder(data_type)
             .len(2)
diff --git a/arrow-array/src/array/struct_array.rs b/arrow-array/src/array/struct_array.rs
index 41eb8235e540..de6d9c699d22 100644
--- a/arrow-array/src/array/struct_array.rs
+++ b/arrow-array/src/array/struct_array.rs
@@ -239,12 +239,6 @@ impl StructArray {
         &self.fields
     }
 
-    /// Returns child array refs of the struct array
-    #[deprecated(note = "Use columns().to_vec()")]
-    pub fn columns_ref(&self) -> Vec<ArrayRef> {
-        self.columns().to_vec()
-    }
-
     /// Return field names in this struct array
     pub fn column_names(&self) -> Vec<&str> {
         match self.data_type() {
@@ -370,6 +364,13 @@ impl Array for StructArray {
         self.len == 0
     }
 
+    fn shrink_to_fit(&mut self) {
+        if let Some(nulls) = &mut self.nulls {
+            nulls.shrink_to_fit();
+        }
+        self.fields.iter_mut().for_each(|n| n.shrink_to_fit());
+    }
+
     fn offset(&self) -> usize {
         0
     }
diff --git a/arrow-array/src/array/union_array.rs b/arrow-array/src/array/union_array.rs
index 3c6da5a7b5c0..b442395b4978 100644
--- a/arrow-array/src/array/union_array.rs
+++ b/arrow-array/src/array/union_array.rs
@@ -653,6 +653,17 @@ impl UnionArray {
             }
         }
     }
+
+    /// Returns a vector of tuples containing each field's type_id and its logical null buffer.
+    /// Only fields with non-zero null counts are included.
+    fn fields_logical_nulls(&self) -> Vec<(i8, NullBuffer)> {
+        self.fields
+            .iter()
+            .enumerate()
+            .filter_map(|(type_id, field)| Some((type_id as i8, field.as_ref()?.logical_nulls()?)))
+            .filter(|(_, nulls)| nulls.null_count() > 0)
+            .collect()
+    }
 }
 
 impl From<ArrayData> for UnionArray {
@@ -744,6 +755,17 @@ impl Array for UnionArray {
         self.type_ids.is_empty()
     }
 
+    fn shrink_to_fit(&mut self) {
+        self.type_ids.shrink_to_fit();
+        if let Some(offsets) = &mut self.offsets {
+            offsets.shrink_to_fit();
+        }
+        for array in self.fields.iter_mut().flatten() {
+            array.shrink_to_fit();
+        }
+        self.fields.shrink_to_fit();
+    }
+
     fn offset(&self) -> usize {
         0
     }
@@ -768,11 +790,7 @@ impl Array for UnionArray {
                 .flatten();
         }
 
-        let logical_nulls = fields
-            .iter()
-            .filter_map(|(type_id, _)| Some((type_id, self.child(type_id).logical_nulls()?)))
-            .filter(|(_, nulls)| nulls.null_count() > 0)
-            .collect::<Vec<_>>();
+        let logical_nulls = self.fields_logical_nulls();
 
         if logical_nulls.is_empty() {
             return None;
@@ -1941,15 +1959,14 @@ mod tests {
 
         let array = UnionArray::try_new(union_fields(), type_ids, Some(offsets), children).unwrap();
 
-        let result = array.logical_nulls();
+        let expected = BooleanBuffer::from(vec![true, true, true, false, false, false]);
 
-        let expected = NullBuffer::from(vec![true, true, true, false, false, false]);
-        assert_eq!(Some(expected), result);
+        assert_eq!(expected, array.logical_nulls().unwrap().into_inner());
+        assert_eq!(expected, array.gather_nulls(array.fields_logical_nulls()));
     }
 
     #[test]
     fn test_sparse_union_logical_nulls_mask_all_nulls_skip_one() {
-        // If we used union_fields() (3 fields with nulls), the choosen strategy would be Gather on x86 without any specified target feature e.g CI runtime
         let fields: UnionFields = [
             (1, Arc::new(Field::new("A", DataType::Int32, true))),
             (3, Arc::new(Field::new("B", DataType::Float64, true))),
@@ -1966,10 +1983,13 @@ mod tests {
 
         let array = UnionArray::try_new(fields.clone(), type_ids, None, children).unwrap();
 
-        let result = array.logical_nulls();
+        let expected = BooleanBuffer::from(vec![false, false, true, false]);
 
-        let expected = NullBuffer::from(vec![false, false, true, false]);
-        assert_eq!(Some(expected), result);
+        assert_eq!(expected, array.logical_nulls().unwrap().into_inner());
+        assert_eq!(
+            expected,
+            array.mask_sparse_all_with_nulls_skip_one(array.fields_logical_nulls())
+        );
 
         //like above, but repeated to genereate two exact bitmasks and a non empty remainder
         let len = 2 * 64 + 32;
@@ -1986,12 +2006,15 @@ mod tests {
         )
         .unwrap();
 
-        let result = array.logical_nulls();
-
         let expected =
-            NullBuffer::from_iter([false, false, true, false].into_iter().cycle().take(len));
+            BooleanBuffer::from_iter([false, false, true, false].into_iter().cycle().take(len));
+
         assert_eq!(array.len(), len);
-        assert_eq!(Some(expected), result);
+        assert_eq!(expected, array.logical_nulls().unwrap().into_inner());
+        assert_eq!(
+            expected,
+            array.mask_sparse_all_with_nulls_skip_one(array.fields_logical_nulls())
+        );
     }
 
     #[test]
@@ -2010,10 +2033,13 @@ mod tests {
 
         let array = UnionArray::try_new(union_fields(), type_ids, None, children).unwrap();
 
-        let result = array.logical_nulls();
+        let expected = BooleanBuffer::from(vec![true, true, true, true, false, false]);
 
-        let expected = NullBuffer::from(vec![true, true, true, true, false, false]);
-        assert_eq!(Some(expected), result);
+        assert_eq!(expected, array.logical_nulls().unwrap().into_inner());
+        assert_eq!(
+            expected,
+            array.mask_sparse_skip_without_nulls(array.fields_logical_nulls())
+        );
 
         //like above, but repeated to genereate two exact bitmasks and a non empty remainder
         let len = 2 * 64 + 32;
@@ -2031,16 +2057,19 @@ mod tests {
 
         let array = UnionArray::try_new(union_fields(), type_ids, None, children).unwrap();
 
-        let result = array.logical_nulls();
-
-        let expected = NullBuffer::from_iter(
+        let expected = BooleanBuffer::from_iter(
             [true, true, true, true, false, true]
                 .into_iter()
                 .cycle()
                 .take(len),
         );
+
         assert_eq!(array.len(), len);
-        assert_eq!(Some(expected), result);
+        assert_eq!(expected, array.logical_nulls().unwrap().into_inner());
+        assert_eq!(
+            expected,
+            array.mask_sparse_skip_without_nulls(array.fields_logical_nulls())
+        );
     }
 
     #[test]
@@ -2059,10 +2088,13 @@ mod tests {
 
         let array = UnionArray::try_new(union_fields(), type_ids, None, children).unwrap();
 
-        let result = array.logical_nulls();
+        let expected = BooleanBuffer::from(vec![false, false, true, true, false, false]);
 
-        let expected = NullBuffer::from(vec![false, false, true, true, false, false]);
-        assert_eq!(Some(expected), result);
+        assert_eq!(expected, array.logical_nulls().unwrap().into_inner());
+        assert_eq!(
+            expected,
+            array.mask_sparse_skip_fully_null(array.fields_logical_nulls())
+        );
 
         //like above, but repeated to genereate two exact bitmasks and a non empty remainder
         let len = 2 * 64 + 32;
@@ -2080,16 +2112,19 @@ mod tests {
 
         let array = UnionArray::try_new(union_fields(), type_ids, None, children).unwrap();
 
-        let result = array.logical_nulls();
-
-        let expected = NullBuffer::from_iter(
+        let expected = BooleanBuffer::from_iter(
             [false, false, true, true, false, false]
                 .into_iter()
                 .cycle()
                 .take(len),
         );
+
         assert_eq!(array.len(), len);
-        assert_eq!(Some(expected), result);
+        assert_eq!(expected, array.logical_nulls().unwrap().into_inner());
+        assert_eq!(
+            expected,
+            array.mask_sparse_skip_fully_null(array.fields_logical_nulls())
+        );
     }
 
     #[test]
@@ -2125,11 +2160,10 @@ mod tests {
         )
         .unwrap();
 
-        let result = array.logical_nulls();
-
-        let expected = NullBuffer::from(vec![true, false, true, false]);
+        let expected = BooleanBuffer::from(vec![true, false, true, false]);
 
-        assert_eq!(Some(expected), result);
+        assert_eq!(expected, array.logical_nulls().unwrap().into_inner());
+        assert_eq!(expected, array.gather_nulls(array.fields_logical_nulls()));
     }
 
     fn union_fields() -> UnionFields {
diff --git a/arrow-array/src/builder/fixed_size_list_builder.rs b/arrow-array/src/builder/fixed_size_list_builder.rs
index 5dff67650687..5c142b277d14 100644
--- a/arrow-array/src/builder/fixed_size_list_builder.rs
+++ b/arrow-array/src/builder/fixed_size_list_builder.rs
@@ -182,7 +182,7 @@ where
         let field = self
             .field
             .clone()
-            .unwrap_or_else(|| Arc::new(Field::new("item", values.data_type().clone(), true)));
+            .unwrap_or_else(|| Arc::new(Field::new_list_field(values.data_type().clone(), true)));
 
         FixedSizeListArray::new(field, self.list_len, values, nulls)
     }
@@ -204,7 +204,7 @@ where
         let field = self
             .field
             .clone()
-            .unwrap_or_else(|| Arc::new(Field::new("item", values.data_type().clone(), true)));
+            .unwrap_or_else(|| Arc::new(Field::new_list_field(values.data_type().clone(), true)));
 
         FixedSizeListArray::new(field, self.list_len, values, nulls)
     }
diff --git a/arrow-array/src/builder/generic_bytes_view_builder.rs b/arrow-array/src/builder/generic_bytes_view_builder.rs
index d12c2b7db468..7268e751b149 100644
--- a/arrow-array/src/builder/generic_bytes_view_builder.rs
+++ b/arrow-array/src/builder/generic_bytes_view_builder.rs
@@ -136,7 +136,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewBuilder<T> {
 
     /// Override the size of buffers to allocate for holding string data
     /// Use `with_fixed_block_size` instead.
-    #[deprecated(note = "Use `with_fixed_block_size` instead")]
+    #[deprecated(since = "53.0.0", note = "Use `with_fixed_block_size` instead")]
     pub fn with_block_size(self, block_size: u32) -> Self {
         self.with_fixed_block_size(block_size)
     }
diff --git a/arrow-array/src/builder/generic_list_builder.rs b/arrow-array/src/builder/generic_list_builder.rs
index a7d16f45f53b..a9c88ec6c586 100644
--- a/arrow-array/src/builder/generic_list_builder.rs
+++ b/arrow-array/src/builder/generic_list_builder.rs
@@ -49,7 +49,6 @@ use std::sync::Arc;
 /// builder.append(true);
 ///
 /// // Null
-/// builder.values().append_value("?"); // irrelevant
 /// builder.append(false);
 ///
 /// // [D]
@@ -70,15 +69,14 @@ use std::sync::Arc;
 ///   array.values().as_ref(),
 ///   &StringArray::from(vec![
 ///     Some("A"), Some("B"), Some("C"),
-///     Some("?"), Some("D"), None,
-///     Some("F")
+///     Some("D"), None, Some("F")
 ///   ])
 /// );
 ///
 /// // Offsets are indexes into the values array
 /// assert_eq!(
 ///   array.value_offsets(),
-///   &[0, 3, 3, 4, 5, 7]
+///   &[0, 3, 3, 3, 4, 6]
 /// );
 /// ```
 ///
@@ -299,7 +297,7 @@ where
 
         let field = match &self.field {
             Some(f) => f.clone(),
-            None => Arc::new(Field::new("item", values.data_type().clone(), true)),
+            None => Arc::new(Field::new_list_field(values.data_type().clone(), true)),
         };
 
         GenericListArray::new(field, offsets, values, nulls)
@@ -316,7 +314,7 @@ where
 
         let field = match &self.field {
             Some(f) => f.clone(),
-            None => Arc::new(Field::new("item", values.data_type().clone(), true)),
+            None => Arc::new(Field::new_list_field(values.data_type().clone(), true)),
         };
 
         GenericListArray::new(field, offsets, values, nulls)
@@ -586,7 +584,7 @@ mod tests {
     fn test_boxed_list_list_array_builder() {
         // This test is same as `test_list_list_array_builder` but uses boxed builders.
         let values_builder = make_builder(
-            &DataType::List(Arc::new(Field::new("item", DataType::Int32, true))),
+            &DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
             10,
         );
         test_boxed_generic_list_generic_list_array_builder::<i32>(values_builder);
@@ -596,7 +594,7 @@ mod tests {
     fn test_boxed_large_list_large_list_array_builder() {
         // This test is same as `test_list_list_array_builder` but uses boxed builders.
         let values_builder = make_builder(
-            &DataType::LargeList(Arc::new(Field::new("item", DataType::Int32, true))),
+            &DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int32, true))),
             10,
         );
         test_boxed_generic_list_generic_list_array_builder::<i64>(values_builder);
@@ -791,7 +789,7 @@ mod tests {
     #[test]
     #[should_panic(expected = "Non-nullable field of ListArray \\\"item\\\" cannot contain nulls")]
     fn test_checks_nullability() {
-        let field = Arc::new(Field::new("item", DataType::Int32, false));
+        let field = Arc::new(Field::new_list_field(DataType::Int32, false));
         let mut builder = ListBuilder::new(Int32Builder::new()).with_field(field.clone());
         builder.append_value([Some(1), None]);
         builder.finish();
@@ -800,7 +798,7 @@ mod tests {
     #[test]
     #[should_panic(expected = "ListArray expected data type Int64 got Int32")]
     fn test_checks_data_type() {
-        let field = Arc::new(Field::new("item", DataType::Int64, false));
+        let field = Arc::new(Field::new_list_field(DataType::Int64, false));
         let mut builder = ListBuilder::new(Int32Builder::new()).with_field(field.clone());
         builder.append_value([Some(1)]);
         builder.finish();
diff --git a/arrow-array/src/builder/mod.rs b/arrow-array/src/builder/mod.rs
index dd1a5c3ae722..89a96280eb87 100644
--- a/arrow-array/src/builder/mod.rs
+++ b/arrow-array/src/builder/mod.rs
@@ -123,7 +123,7 @@
 //!         let string_field = Arc::new(Field::new("i32", DataType::Utf8, false));
 //!
 //!         let i32_list = Arc::new(self.i32_list.finish()) as ArrayRef;
-//!         let value_field = Arc::new(Field::new("item", DataType::Int32, true));
+//!         let value_field = Arc::new(Field::new_list_field(DataType::Int32, true));
 //!         let i32_list_field = Arc::new(Field::new("i32_list", DataType::List(value_field), true));
 //!
 //!         StructArray::from(vec![
diff --git a/arrow-array/src/builder/struct_builder.rs b/arrow-array/src/builder/struct_builder.rs
index 396ab2fed851..2b288445c74b 100644
--- a/arrow-array/src/builder/struct_builder.rs
+++ b/arrow-array/src/builder/struct_builder.rs
@@ -15,9 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::builder::*;
-use crate::types::Int32Type;
 use crate::StructArray;
+use crate::{
+    builder::*,
+    types::{Int16Type, Int32Type, Int64Type, Int8Type},
+};
 use arrow_buffer::NullBufferBuilder;
 use arrow_schema::{DataType, Fields, IntervalUnit, SchemaBuilder, TimeUnit};
 use std::sync::Arc;
@@ -46,8 +48,7 @@ use std::sync::Arc;
 ///    let mut example_col = ListBuilder::new(StructBuilder::from_fields(
 ///        vec![Field::new(
 ///            "value_list",
-///            DataType::List(Arc::new(Field::new(
-///                "item",
+///            DataType::List(Arc::new(Field::new_list_field(
 ///                DataType::Struct(Fields::from(vec![
 ///                    Field::new("key", DataType::Utf8, true),
 ///                    Field::new("value", DataType::Utf8, true),
@@ -291,29 +292,42 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
             t => panic!("The field of Map data type {t:?} should has a child Struct field"),
         },
         DataType::Struct(fields) => Box::new(StructBuilder::from_fields(fields.clone(), capacity)),
-        DataType::Dictionary(key_type, value_type) if **key_type == DataType::Int32 => {
-            match &**value_type {
-                DataType::Utf8 => {
-                    let dict_builder: StringDictionaryBuilder<Int32Type> =
-                        StringDictionaryBuilder::with_capacity(capacity, 256, 1024);
-                    Box::new(dict_builder)
-                }
-                DataType::LargeUtf8 => {
-                    let dict_builder: LargeStringDictionaryBuilder<Int32Type> =
-                        LargeStringDictionaryBuilder::with_capacity(capacity, 256, 1024);
-                    Box::new(dict_builder)
-                }
-                DataType::Binary => {
-                    let dict_builder: BinaryDictionaryBuilder<Int32Type> =
-                        BinaryDictionaryBuilder::with_capacity(capacity, 256, 1024);
-                    Box::new(dict_builder)
-                }
-                DataType::LargeBinary => {
-                    let dict_builder: LargeBinaryDictionaryBuilder<Int32Type> =
-                        LargeBinaryDictionaryBuilder::with_capacity(capacity, 256, 1024);
-                    Box::new(dict_builder)
+        t @ DataType::Dictionary(key_type, value_type) => {
+            macro_rules! dict_builder {
+                ($key_type:ty) => {
+                    match &**value_type {
+                        DataType::Utf8 => {
+                            let dict_builder: StringDictionaryBuilder<$key_type> =
+                                StringDictionaryBuilder::with_capacity(capacity, 256, 1024);
+                            Box::new(dict_builder)
+                        }
+                        DataType::LargeUtf8 => {
+                            let dict_builder: LargeStringDictionaryBuilder<$key_type> =
+                                LargeStringDictionaryBuilder::with_capacity(capacity, 256, 1024);
+                            Box::new(dict_builder)
+                        }
+                        DataType::Binary => {
+                            let dict_builder: BinaryDictionaryBuilder<$key_type> =
+                                BinaryDictionaryBuilder::with_capacity(capacity, 256, 1024);
+                            Box::new(dict_builder)
+                        }
+                        DataType::LargeBinary => {
+                            let dict_builder: LargeBinaryDictionaryBuilder<$key_type> =
+                                LargeBinaryDictionaryBuilder::with_capacity(capacity, 256, 1024);
+                            Box::new(dict_builder)
+                        }
+                        t => panic!("Dictionary value type {t:?} is not currently supported"),
+                    }
+                };
+            }
+            match &**key_type {
+                DataType::Int8 => dict_builder!(Int8Type),
+                DataType::Int16 => dict_builder!(Int16Type),
+                DataType::Int32 => dict_builder!(Int32Type),
+                DataType::Int64 => dict_builder!(Int64Type),
+                _ => {
+                    panic!("Data type {t:?} with key type {key_type:?} is not currently supported")
                 }
-                t => panic!("Unsupported dictionary value type {t:?} is not currently supported"),
             }
         }
         t => panic!("Data type {t:?} is not currently supported"),
@@ -431,12 +445,14 @@ impl StructBuilder {
 
 #[cfg(test)]
 mod tests {
+    use std::any::type_name;
+
     use super::*;
     use arrow_buffer::Buffer;
     use arrow_data::ArrayData;
     use arrow_schema::Field;
 
-    use crate::array::Array;
+    use crate::{array::Array, types::ArrowDictionaryKeyType};
 
     #[test]
     fn test_struct_array_builder() {
@@ -691,10 +707,31 @@ mod tests {
     }
 
     #[test]
-    fn test_struct_array_builder_from_dictionary_type() {
+    fn test_struct_array_builder_from_dictionary_type_int8_key() {
+        test_struct_array_builder_from_dictionary_type_inner::<Int8Type>(DataType::Int8);
+    }
+
+    #[test]
+    fn test_struct_array_builder_from_dictionary_type_int16_key() {
+        test_struct_array_builder_from_dictionary_type_inner::<Int16Type>(DataType::Int16);
+    }
+
+    #[test]
+    fn test_struct_array_builder_from_dictionary_type_int32_key() {
+        test_struct_array_builder_from_dictionary_type_inner::<Int32Type>(DataType::Int32);
+    }
+
+    #[test]
+    fn test_struct_array_builder_from_dictionary_type_int64_key() {
+        test_struct_array_builder_from_dictionary_type_inner::<Int64Type>(DataType::Int64);
+    }
+
+    fn test_struct_array_builder_from_dictionary_type_inner<K: ArrowDictionaryKeyType>(
+        key_type: DataType,
+    ) {
         let dict_field = Field::new(
             "f1",
-            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+            DataType::Dictionary(Box::new(key_type), Box::new(DataType::Utf8)),
             false,
         );
         let fields = vec![dict_field.clone()];
@@ -702,10 +739,14 @@ mod tests {
         let cloned_dict_field = dict_field.clone();
         let expected_child_dtype = dict_field.data_type();
         let mut struct_builder = StructBuilder::from_fields(vec![cloned_dict_field], 5);
-        struct_builder
-            .field_builder::<StringDictionaryBuilder<Int32Type>>(0)
-            .expect("Builder should be StringDictionaryBuilder")
-            .append_value("dict string");
+        let Some(dict_builder) = struct_builder.field_builder::<StringDictionaryBuilder<K>>(0)
+        else {
+            panic!(
+                "Builder should be StringDictionaryBuilder<{}>",
+                type_name::<K>()
+            )
+        };
+        dict_builder.append_value("dict string");
         struct_builder.append(true);
         let array = struct_builder.finish();
 
@@ -715,13 +756,15 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(expected = "Data type Dictionary(Int16, Utf8) is not currently supported")]
+    #[should_panic(
+        expected = "Data type Dictionary(UInt64, Utf8) with key type UInt64 is not currently supported"
+    )]
     fn test_struct_array_builder_from_schema_unsupported_type() {
         let fields = vec![
-            Field::new("f1", DataType::Int16, false),
+            Field::new("f1", DataType::UInt64, false),
             Field::new(
                 "f2",
-                DataType::Dictionary(Box::new(DataType::Int16), Box::new(DataType::Utf8)),
+                DataType::Dictionary(Box::new(DataType::UInt64), Box::new(DataType::Utf8)),
                 false,
             ),
         ];
@@ -730,7 +773,7 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(expected = "Unsupported dictionary value type Int32 is not currently supported")]
+    #[should_panic(expected = "Dictionary value type Int32 is not currently supported")]
     fn test_struct_array_builder_from_dict_with_unsupported_value_type() {
         let fields = vec![Field::new(
             "f1",
diff --git a/arrow-array/src/cast.rs b/arrow-array/src/cast.rs
index 232b29560cbf..fc657f94c6a6 100644
--- a/arrow-array/src/cast.rs
+++ b/arrow-array/src/cast.rs
@@ -689,12 +689,6 @@ array_downcast_fn!(as_struct_array, StructArray);
 array_downcast_fn!(as_union_array, UnionArray);
 array_downcast_fn!(as_map_array, MapArray);
 
-/// Force downcast of an Array, such as an ArrayRef to Decimal128Array, panic’ing on failure.
-#[deprecated(note = "please use `as_primitive_array::<Decimal128Type>` instead")]
-pub fn as_decimal_array(arr: &dyn Array) -> &PrimitiveArray<Decimal128Type> {
-    as_primitive_array::<Decimal128Type>(arr)
-}
-
 /// Downcasts a `dyn Array` to a concrete type
 ///
 /// ```
diff --git a/arrow-array/src/ffi.rs b/arrow-array/src/ffi.rs
index 4426e0986409..144f2a21afec 100644
--- a/arrow-array/src/ffi.rs
+++ b/arrow-array/src/ffi.rs
@@ -121,7 +121,10 @@ type Result<T> = std::result::Result<T, ArrowError>;
 /// This function copies the content of two FFI structs [arrow_data::ffi::FFI_ArrowArray] and
 /// [arrow_schema::ffi::FFI_ArrowSchema] in the array to the location pointed by the raw pointers.
 /// Usually the raw pointers are provided by the array data consumer.
-#[deprecated(note = "Use FFI_ArrowArray::new and FFI_ArrowSchema::try_from")]
+#[deprecated(
+    since = "52.0.0",
+    note = "Use FFI_ArrowArray::new and FFI_ArrowSchema::try_from"
+)]
 pub unsafe fn export_array_into_raw(
     src: ArrayRef,
     out_array: *mut FFI_ArrowArray,
@@ -719,7 +722,7 @@ mod tests_to_then_from_ffi {
 
         // Construct a list array from the above two
         let list_data_type = GenericListArray::<Offset>::DATA_TYPE_CONSTRUCTOR(Arc::new(
-            Field::new("item", DataType::Int32, false),
+            Field::new_list_field(DataType::Int32, false),
         ));
 
         let list_data = ArrayData::builder(list_data_type)
@@ -1478,7 +1481,7 @@ mod tests_from_ffi {
         let offsets: Vec<i32> = vec![0, 2, 4, 6, 8, 10, 12, 14, 16];
         let value_offsets = Buffer::from_slice_ref(offsets);
         let inner_list_data_type =
-            DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
+            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
         let inner_list_data = ArrayData::builder(inner_list_data_type.clone())
             .len(8)
             .add_buffer(value_offsets)
diff --git a/arrow-array/src/ffi_stream.rs b/arrow-array/src/ffi_stream.rs
index 34f0cd7cfc74..3d4e89e80b89 100644
--- a/arrow-array/src/ffi_stream.rs
+++ b/arrow-array/src/ffi_stream.rs
@@ -379,21 +379,6 @@ impl RecordBatchReader for ArrowArrayStreamReader {
     }
 }
 
-/// Exports a record batch reader to raw pointer of the C Stream Interface provided by the consumer.
-///
-/// # Safety
-/// Assumes that the pointer represents valid C Stream Interfaces, both in memory
-/// representation and lifetime via the `release` mechanism.
-#[deprecated(note = "Use FFI_ArrowArrayStream::new")]
-pub unsafe fn export_reader_into_raw(
-    reader: Box<dyn RecordBatchReader + Send>,
-    out_stream: *mut FFI_ArrowArrayStream,
-) {
-    let stream = FFI_ArrowArrayStream::new(reader);
-
-    std::ptr::write_unaligned(out_stream, stream);
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/arrow-array/src/record_batch.rs b/arrow-array/src/record_batch.rs
index 78108d441b05..8958ca6fae62 100644
--- a/arrow-array/src/record_batch.rs
+++ b/arrow-array/src/record_batch.rs
@@ -32,15 +32,6 @@ pub trait RecordBatchReader: Iterator<Item = Result<RecordBatch, ArrowError>> {
     /// Implementation of this trait should guarantee that all `RecordBatch`'s returned by this
     /// reader should have the same schema as returned from this method.
     fn schema(&self) -> SchemaRef;
-
-    /// Reads the next `RecordBatch`.
-    #[deprecated(
-        since = "2.0.0",
-        note = "This method is deprecated in favour of `next` from the trait Iterator."
-    )]
-    fn next_batch(&mut self) -> Result<Option<RecordBatch>, ArrowError> {
-        self.next().transpose()
-    }
 }
 
 impl<R: RecordBatchReader + ?Sized> RecordBatchReader for Box<R> {
@@ -58,6 +49,129 @@ pub trait RecordBatchWriter {
     fn close(self) -> Result<(), ArrowError>;
 }
 
+/// Creates an array from a literal slice of values,
+/// suitable for rapid testing and development.
+///
+/// Example:
+///
+/// ```rust
+///
+/// use arrow_array::create_array;
+///
+/// let array = create_array!(Int32, [1, 2, 3, 4, 5]);
+/// let array = create_array!(Utf8, [Some("a"), Some("b"), None, Some("e")]);
+/// ```
+/// Support for limited data types is available. The macro will return a compile error if an unsupported data type is used.
+/// Presently supported data types are:
+/// - `Boolean`, `Null`
+/// - `Decimal128`, `Decimal256`
+/// - `Float16`, `Float32`, `Float64`
+/// - `Int8`, `Int16`, `Int32`, `Int64`
+/// - `UInt8`, `UInt16`, `UInt32`, `UInt64`
+/// - `IntervalDayTime`, `IntervalYearMonth`
+/// - `Second`, `Millisecond`, `Microsecond`, `Nanosecond`
+/// - `Second32`, `Millisecond32`, `Microsecond64`, `Nanosecond64`
+/// - `DurationSecond`, `DurationMillisecond`, `DurationMicrosecond`, `DurationNanosecond`
+/// - `TimestampSecond`, `TimestampMillisecond`, `TimestampMicrosecond`, `TimestampNanosecond`
+/// - `Utf8`, `Utf8View`, `LargeUtf8`, `Binary`, `LargeBinary`
+#[macro_export]
+macro_rules! create_array {
+    // `@from` is used for those types that have a common method `<type>::from`
+    (@from Boolean) => { $crate::BooleanArray };
+    (@from Int8) => { $crate::Int8Array };
+    (@from Int16) => { $crate::Int16Array };
+    (@from Int32) => { $crate::Int32Array };
+    (@from Int64) => { $crate::Int64Array };
+    (@from UInt8) => { $crate::UInt8Array };
+    (@from UInt16) => { $crate::UInt16Array };
+    (@from UInt32) => { $crate::UInt32Array };
+    (@from UInt64) => { $crate::UInt64Array };
+    (@from Float16) => { $crate::Float16Array };
+    (@from Float32) => { $crate::Float32Array };
+    (@from Float64) => { $crate::Float64Array };
+    (@from Utf8) => { $crate::StringArray };
+    (@from Utf8View) => { $crate::StringViewArray };
+    (@from LargeUtf8) => { $crate::LargeStringArray };
+    (@from IntervalDayTime) => { $crate::IntervalDayTimeArray };
+    (@from IntervalYearMonth) => { $crate::IntervalYearMonthArray };
+    (@from Second) => { $crate::TimestampSecondArray };
+    (@from Millisecond) => { $crate::TimestampMillisecondArray };
+    (@from Microsecond) => { $crate::TimestampMicrosecondArray };
+    (@from Nanosecond) => { $crate::TimestampNanosecondArray };
+    (@from Second32) => { $crate::Time32SecondArray };
+    (@from Millisecond32) => { $crate::Time32MillisecondArray };
+    (@from Microsecond64) => { $crate::Time64MicrosecondArray };
+    (@from Nanosecond64) => { $crate::Time64Nanosecond64Array };
+    (@from DurationSecond) => { $crate::DurationSecondArray };
+    (@from DurationMillisecond) => { $crate::DurationMillisecondArray };
+    (@from DurationMicrosecond) => { $crate::DurationMicrosecondArray };
+    (@from DurationNanosecond) => { $crate::DurationNanosecondArray };
+    (@from Decimal128) => { $crate::Decimal128Array };
+    (@from Decimal256) => { $crate::Decimal256Array };
+    (@from TimestampSecond) => { $crate::TimestampSecondArray };
+    (@from TimestampMillisecond) => { $crate::TimestampMillisecondArray };
+    (@from TimestampMicrosecond) => { $crate::TimestampMicrosecondArray };
+    (@from TimestampNanosecond) => { $crate::TimestampNanosecondArray };
+
+    (@from $ty: ident) => {
+        compile_error!(concat!("Unsupported data type: ", stringify!($ty)))
+    };
+
+    (Null, $size: expr) => {
+        std::sync::Arc::new($crate::NullArray::new($size))
+    };
+
+    (Binary, [$($values: expr),*]) => {
+        std::sync::Arc::new($crate::BinaryArray::from_vec(vec![$($values),*]))
+    };
+
+    (LargeBinary, [$($values: expr),*]) => {
+        std::sync::Arc::new($crate::LargeBinaryArray::from_vec(vec![$($values),*]))
+    };
+
+    ($ty: tt, [$($values: expr),*]) => {
+        std::sync::Arc::new(<$crate::create_array!(@from $ty)>::from(vec![$($values),*]))
+    };
+}
+
+/// Creates a record batch from literal slice of values, suitable for rapid
+/// testing and development.
+///
+/// Example:
+///
+/// ```rust
+/// use arrow_array::record_batch;
+/// use arrow_schema;
+///
+/// let batch = record_batch!(
+///     ("a", Int32, [1, 2, 3]),
+///     ("b", Float64, [Some(4.0), None, Some(5.0)]),
+///     ("c", Utf8, ["alpha", "beta", "gamma"])
+/// );
+/// ```
+/// Due to limitation of [`create_array!`] macro, support for limited data types is available.
+#[macro_export]
+macro_rules! record_batch {
+    ($(($name: expr, $type: ident, [$($values: expr),*])),*) => {
+        {
+            let schema = std::sync::Arc::new(arrow_schema::Schema::new(vec![
+                $(
+                    arrow_schema::Field::new($name, arrow_schema::DataType::$type, true),
+                )*
+            ]));
+
+            let batch = $crate::RecordBatch::try_new(
+                schema,
+                vec![$(
+                    $crate::create_array!($type, [$($values),*]),
+                )*]
+            );
+
+            batch
+        }
+    }
+}
+
 /// A two-dimensional batch of column-oriented data with a defined
 /// [schema](arrow_schema::Schema).
 ///
@@ -68,6 +182,19 @@ pub trait RecordBatchWriter {
 ///
 /// Record batches are a convenient unit of work for various
 /// serialization and computation functions, possibly incremental.
+///
+/// Use the [`record_batch!`] macro to create a [`RecordBatch`] from
+/// literal slice of values, useful for rapid prototyping and testing.
+///
+/// Example:
+/// ```rust
+/// use arrow_array::record_batch;
+/// let batch = record_batch!(
+///     ("a", Int32, [1, 2, 3]),
+///     ("b", Float64, [Some(4.0), None, Some(5.0)]),
+///     ("c", Utf8, ["alpha", "beta", "gamma"])
+/// );
+/// ```
 #[derive(Clone, Debug, PartialEq)]
 pub struct RecordBatch {
     schema: SchemaRef,
@@ -411,6 +538,19 @@ impl RecordBatch {
     ///   ("b", b),
     /// ]);
     /// ```
+    /// Another way to quickly create a [`RecordBatch`] is to use the [`record_batch!`] macro,
+    /// which is particularly helpful for rapid prototyping and testing.
+    ///
+    /// Example:
+    ///
+    /// ```rust
+    /// use arrow_array::record_batch;
+    /// let batch = record_batch!(
+    ///     ("a", Int32, [1, 2, 3]),
+    ///     ("b", Float64, [Some(4.0), None, Some(5.0)]),
+    ///     ("c", Utf8, ["alpha", "beta", "gamma"])
+    /// );
+    /// ```
     pub fn try_from_iter<I, F>(value: I) -> Result<Self, ArrowError>
     where
         I: IntoIterator<Item = (F, ArrayRef)>,
@@ -806,7 +946,7 @@ mod tests {
     fn create_record_batch_field_name_mismatch() {
         let fields = vec![
             Field::new("a1", DataType::Int32, false),
-            Field::new_list("a2", Field::new("item", DataType::Int8, false), false),
+            Field::new_list("a2", Field::new_list_field(DataType::Int8, false), false),
         ];
         let schema = Arc::new(Schema::new(vec![Field::new_struct("a", fields, true)]));
 
diff --git a/arrow-array/src/temporal_conversions.rs b/arrow-array/src/temporal_conversions.rs
index 8d238b3a196c..23f950d55048 100644
--- a/arrow-array/src/temporal_conversions.rs
+++ b/arrow-array/src/temporal_conversions.rs
@@ -37,8 +37,18 @@ pub const MILLISECONDS_IN_DAY: i64 = SECONDS_IN_DAY * MILLISECONDS;
 pub const MICROSECONDS_IN_DAY: i64 = SECONDS_IN_DAY * MICROSECONDS;
 /// Number of nanoseconds in a day
 pub const NANOSECONDS_IN_DAY: i64 = SECONDS_IN_DAY * NANOSECONDS;
-/// Number of days between 0001-01-01 and 1970-01-01
-pub const EPOCH_DAYS_FROM_CE: i32 = 719_163;
+
+/// Constant from chrono crate
+///
+/// Number of days between Januari 1, 1970 and December 31, 1 BCE which we define to be day 0.
+/// 4 full leap year cycles until December 31, 1600     4 * 146097 = 584388
+/// 1 day until January 1, 1601                                           1
+/// 369 years until Januari 1, 1970                      369 * 365 = 134685
+/// of which floor(369 / 4) are leap years          floor(369 / 4) =     92
+/// except for 1700, 1800 and 1900                                       -3 +
+///                                                                  --------
+///                                                                  719163
+pub const UNIX_EPOCH_DAY: i64 = 719_163;
 
 /// converts a `i32` representing a `date32` to [`NaiveDateTime`]
 #[inline]
@@ -134,6 +144,31 @@ pub fn timestamp_s_to_datetime(v: i64) -> Option<NaiveDateTime> {
     Some(DateTime::from_timestamp(v, 0)?.naive_utc())
 }
 
+/// Similar to timestamp_s_to_datetime but only compute `date`
+#[inline]
+pub fn timestamp_s_to_date(secs: i64) -> Option<NaiveDateTime> {
+    let days = secs.div_euclid(86_400) + UNIX_EPOCH_DAY;
+    if days < i32::MIN as i64 || days > i32::MAX as i64 {
+        return None;
+    }
+    let date = NaiveDate::from_num_days_from_ce_opt(days as i32)?;
+    Some(date.and_time(NaiveTime::default()).and_utc().naive_utc())
+}
+
+/// Similar to timestamp_s_to_datetime but only compute `time`
+#[inline]
+pub fn timestamp_s_to_time(secs: i64) -> Option<NaiveDateTime> {
+    let secs = secs.rem_euclid(86_400);
+    let time = NaiveTime::from_num_seconds_from_midnight_opt(secs as u32, 0)?;
+    Some(
+        DateTime::<Utc>::from_naive_utc_and_offset(
+            NaiveDateTime::new(NaiveDate::default(), time),
+            Utc,
+        )
+        .naive_utc(),
+    )
+}
+
 /// converts a `i64` representing a `timestamp(ms)` to [`NaiveDateTime`]
 #[inline]
 pub fn timestamp_ms_to_datetime(v: i64) -> Option<NaiveDateTime> {
@@ -274,10 +309,28 @@ pub fn as_duration<T: ArrowPrimitiveType>(v: i64) -> Option<Duration> {
 mod tests {
     use crate::temporal_conversions::{
         date64_to_datetime, split_second, timestamp_ms_to_datetime, timestamp_ns_to_datetime,
+        timestamp_s_to_date, timestamp_s_to_datetime, timestamp_s_to_time,
         timestamp_us_to_datetime, NANOSECONDS,
     };
     use chrono::DateTime;
 
+    #[test]
+    fn test_timestamp_func() {
+        let timestamp = 1234;
+        let datetime = timestamp_s_to_datetime(timestamp).unwrap();
+        let expected_date = datetime.date();
+        let expected_time = datetime.time();
+
+        assert_eq!(
+            timestamp_s_to_date(timestamp).unwrap().date(),
+            expected_date
+        );
+        assert_eq!(
+            timestamp_s_to_time(timestamp).unwrap().time(),
+            expected_time
+        );
+    }
+
     #[test]
     fn negative_input_timestamp_ns_to_datetime() {
         assert_eq!(
diff --git a/arrow-array/src/types.rs b/arrow-array/src/types.rs
index 92262fc04a57..3d8cfcdb112b 100644
--- a/arrow-array/src/types.rs
+++ b/arrow-array/src/types.rs
@@ -69,7 +69,7 @@ pub trait ArrowPrimitiveType: primitive::PrimitiveTypeSealed + 'static {
     const DATA_TYPE: DataType;
 
     /// Returns the byte width of this primitive type.
-    #[deprecated(note = "Use ArrowNativeType::get_byte_width")]
+    #[deprecated(since = "52.0.0", note = "Use ArrowNativeType::get_byte_width")]
     fn get_byte_width() -> usize {
         std::mem::size_of::<Self::Native>()
     }
@@ -324,12 +324,6 @@ pub trait ArrowTimestampType: ArrowTemporalType<Native = i64> {
     /// The [`TimeUnit`] of this timestamp.
     const UNIT: TimeUnit;
 
-    /// Returns the `TimeUnit` of this timestamp.
-    #[deprecated(note = "Use Self::UNIT")]
-    fn get_time_unit() -> TimeUnit {
-        Self::UNIT
-    }
-
     /// Creates a ArrowTimestampType::Native from the provided [`NaiveDateTime`]
     ///
     /// See [`DataType::Timestamp`] for more information on timezone handling
diff --git a/arrow-avro/Cargo.toml b/arrow-avro/Cargo.toml
index d2436f0c15de..c103c2ecc0f3 100644
--- a/arrow-avro/Cargo.toml
+++ b/arrow-avro/Cargo.toml
@@ -39,11 +39,9 @@ deflate = ["flate2"]
 snappy = ["snap", "crc"]
 
 [dependencies]
-arrow-array = { workspace = true  }
-arrow-buffer = { workspace = true  }
-arrow-cast = { workspace = true  }
-arrow-data = { workspace = true  }
-arrow-schema = { workspace = true  }
+arrow-schema = { workspace = true }
+arrow-buffer = { workspace = true }
+arrow-array = { workspace = true }
 serde_json = { version = "1.0", default-features = false, features = ["std"] }
 serde = { version = "1.0.188", features = ["derive"] }
 flate2 = { version = "1.0", default-features = false, features = ["rust_backend"], optional = true }
@@ -53,4 +51,5 @@ crc = { version = "3.0", optional = true }
 
 
 [dev-dependencies]
+rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] }
 
diff --git a/arrow-avro/LICENSE.txt b/arrow-avro/LICENSE.txt
new file mode 120000
index 000000000000..4ab43736a839
--- /dev/null
+++ b/arrow-avro/LICENSE.txt
@@ -0,0 +1 @@
+../LICENSE.txt
\ No newline at end of file
diff --git a/arrow-avro/NOTICE.txt b/arrow-avro/NOTICE.txt
new file mode 120000
index 000000000000..eb9f24e040b5
--- /dev/null
+++ b/arrow-avro/NOTICE.txt
@@ -0,0 +1 @@
+../NOTICE.txt
\ No newline at end of file
diff --git a/arrow-avro/src/codec.rs b/arrow-avro/src/codec.rs
index 1e2acd99d828..2ac1ad038bd7 100644
--- a/arrow-avro/src/codec.rs
+++ b/arrow-avro/src/codec.rs
@@ -29,7 +29,7 @@ use std::sync::Arc;
 /// To accommodate this we special case two-variant unions where one of the
 /// variants is the null type, and use this to derive arrow's notion of nullability
 #[derive(Debug, Copy, Clone)]
-enum Nulls {
+pub enum Nullability {
     /// The nulls are encoded as the first union variant
     NullFirst,
     /// The nulls are encoded as the second union variant
@@ -39,7 +39,7 @@ enum Nulls {
 /// An Avro datatype mapped to the arrow data model
 #[derive(Debug, Clone)]
 pub struct AvroDataType {
-    nulls: Option<Nulls>,
+    nullability: Option<Nullability>,
     metadata: HashMap<String, String>,
     codec: Codec,
 }
@@ -48,7 +48,15 @@ impl AvroDataType {
     /// Returns an arrow [`Field`] with the given name
     pub fn field_with_name(&self, name: &str) -> Field {
         let d = self.codec.data_type();
-        Field::new(name, d, self.nulls.is_some()).with_metadata(self.metadata.clone())
+        Field::new(name, d, self.nullability.is_some()).with_metadata(self.metadata.clone())
+    }
+
+    pub fn codec(&self) -> &Codec {
+        &self.codec
+    }
+
+    pub fn nullability(&self) -> Option<Nullability> {
+        self.nullability
     }
 }
 
@@ -65,9 +73,13 @@ impl AvroField {
         self.data_type.field_with_name(&self.name)
     }
 
-    /// Returns the [`Codec`]
-    pub fn codec(&self) -> &Codec {
-        &self.data_type.codec
+    /// Returns the [`AvroDataType`]
+    pub fn data_type(&self) -> &AvroDataType {
+        &self.data_type
+    }
+
+    pub fn name(&self) -> &str {
+        &self.name
     }
 }
 
@@ -114,7 +126,7 @@ pub enum Codec {
     Fixed(i32),
     List(Arc<AvroDataType>),
     Struct(Arc<[AvroField]>),
-    Duration,
+    Interval,
 }
 
 impl Codec {
@@ -137,9 +149,11 @@ impl Codec {
             Self::TimestampMicros(is_utc) => {
                 DataType::Timestamp(TimeUnit::Microsecond, is_utc.then(|| "+00:00".into()))
             }
-            Self::Duration => DataType::Interval(IntervalUnit::MonthDayNano),
+            Self::Interval => DataType::Interval(IntervalUnit::MonthDayNano),
             Self::Fixed(size) => DataType::FixedSizeBinary(*size),
-            Self::List(f) => DataType::List(Arc::new(f.field_with_name("item"))),
+            Self::List(f) => {
+                DataType::List(Arc::new(f.field_with_name(Field::LIST_FIELD_DEFAULT_NAME)))
+            }
             Self::Struct(f) => DataType::Struct(f.iter().map(|x| x.field()).collect()),
         }
     }
@@ -198,7 +212,7 @@ fn make_data_type<'a>(
 ) -> Result<AvroDataType, ArrowError> {
     match schema {
         Schema::TypeName(TypeName::Primitive(p)) => Ok(AvroDataType {
-            nulls: None,
+            nullability: None,
             metadata: Default::default(),
             codec: (*p).into(),
         }),
@@ -211,12 +225,12 @@ fn make_data_type<'a>(
             match (f.len() == 2, null) {
                 (true, Some(0)) => {
                     let mut field = make_data_type(&f[1], namespace, resolver)?;
-                    field.nulls = Some(Nulls::NullFirst);
+                    field.nullability = Some(Nullability::NullFirst);
                     Ok(field)
                 }
                 (true, Some(1)) => {
                     let mut field = make_data_type(&f[0], namespace, resolver)?;
-                    field.nulls = Some(Nulls::NullSecond);
+                    field.nullability = Some(Nullability::NullSecond);
                     Ok(field)
                 }
                 _ => Err(ArrowError::NotYetImplemented(format!(
@@ -239,7 +253,7 @@ fn make_data_type<'a>(
                     .collect::<Result<_, ArrowError>>()?;
 
                 let field = AvroDataType {
-                    nulls: None,
+                    nullability: None,
                     codec: Codec::Struct(fields),
                     metadata: r.attributes.field_metadata(),
                 };
@@ -249,7 +263,7 @@ fn make_data_type<'a>(
             ComplexType::Array(a) => {
                 let mut field = make_data_type(a.items.as_ref(), namespace, resolver)?;
                 Ok(AvroDataType {
-                    nulls: None,
+                    nullability: None,
                     metadata: a.attributes.field_metadata(),
                     codec: Codec::List(Arc::new(field)),
                 })
@@ -260,7 +274,7 @@ fn make_data_type<'a>(
                 })?;
 
                 let field = AvroDataType {
-                    nulls: None,
+                    nullability: None,
                     metadata: f.attributes.field_metadata(),
                     codec: Codec::Fixed(size),
                 };
@@ -296,7 +310,7 @@ fn make_data_type<'a>(
                 (Some("local-timestamp-micros"), c @ Codec::Int64) => {
                     *c = Codec::TimestampMicros(false)
                 }
-                (Some("duration"), c @ Codec::Fixed(12)) => *c = Codec::Duration,
+                (Some("duration"), c @ Codec::Fixed(12)) => *c = Codec::Interval,
                 (Some(logical), _) => {
                     // Insert unrecognized logical type into metadata map
                     field.metadata.insert("logicalType".into(), logical.into());
diff --git a/arrow-avro/src/compression.rs b/arrow-avro/src/compression.rs
index c5c7a6dabc33..f29b8dd07606 100644
--- a/arrow-avro/src/compression.rs
+++ b/arrow-avro/src/compression.rs
@@ -16,7 +16,6 @@
 // under the License.
 
 use arrow_schema::ArrowError;
-use flate2::read;
 use std::io;
 use std::io::Read;
 
@@ -35,7 +34,7 @@ impl CompressionCodec {
         match self {
             #[cfg(feature = "deflate")]
             CompressionCodec::Deflate => {
-                let mut decoder = read::DeflateDecoder::new(block);
+                let mut decoder = flate2::read::DeflateDecoder::new(block);
                 let mut out = Vec::new();
                 decoder.read_to_end(&mut out)?;
                 Ok(out)
diff --git a/arrow-avro/src/reader/cursor.rs b/arrow-avro/src/reader/cursor.rs
new file mode 100644
index 000000000000..4b6a5a4d65db
--- /dev/null
+++ b/arrow-avro/src/reader/cursor.rs
@@ -0,0 +1,121 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::reader::vlq::read_varint;
+use arrow_schema::ArrowError;
+
+/// A wrapper around a byte slice, providing low-level decoding for Avro
+///
+/// <https://avro.apache.org/docs/1.11.1/specification/#encodings>
+#[derive(Debug)]
+pub(crate) struct AvroCursor<'a> {
+    buf: &'a [u8],
+    start_len: usize,
+}
+
+impl<'a> AvroCursor<'a> {
+    pub(crate) fn new(buf: &'a [u8]) -> Self {
+        Self {
+            buf,
+            start_len: buf.len(),
+        }
+    }
+
+    /// Returns the current cursor position
+    #[inline]
+    pub(crate) fn position(&self) -> usize {
+        self.start_len - self.buf.len()
+    }
+
+    /// Read a single `u8`
+    #[inline]
+    pub(crate) fn get_u8(&mut self) -> Result<u8, ArrowError> {
+        match self.buf.first().copied() {
+            Some(x) => {
+                self.buf = &self.buf[1..];
+                Ok(x)
+            }
+            None => Err(ArrowError::ParseError("Unexpected EOF".to_string())),
+        }
+    }
+
+    #[inline]
+    pub(crate) fn get_bool(&mut self) -> Result<bool, ArrowError> {
+        Ok(self.get_u8()? != 0)
+    }
+
+    pub(crate) fn read_vlq(&mut self) -> Result<u64, ArrowError> {
+        let (val, offset) = read_varint(self.buf)
+            .ok_or_else(|| ArrowError::ParseError("bad varint".to_string()))?;
+        self.buf = &self.buf[offset..];
+        Ok(val)
+    }
+
+    #[inline]
+    pub(crate) fn get_int(&mut self) -> Result<i32, ArrowError> {
+        let varint = self.read_vlq()?;
+        let val: u32 = varint
+            .try_into()
+            .map_err(|_| ArrowError::ParseError("varint overflow".to_string()))?;
+        Ok((val >> 1) as i32 ^ -((val & 1) as i32))
+    }
+
+    #[inline]
+    pub(crate) fn get_long(&mut self) -> Result<i64, ArrowError> {
+        let val = self.read_vlq()?;
+        Ok((val >> 1) as i64 ^ -((val & 1) as i64))
+    }
+
+    pub(crate) fn get_bytes(&mut self) -> Result<&'a [u8], ArrowError> {
+        let len: usize = self.get_long()?.try_into().map_err(|_| {
+            ArrowError::ParseError("offset overflow reading avro bytes".to_string())
+        })?;
+
+        if (self.buf.len() < len) {
+            return Err(ArrowError::ParseError(
+                "Unexpected EOF reading bytes".to_string(),
+            ));
+        }
+        let ret = &self.buf[..len];
+        self.buf = &self.buf[len..];
+        Ok(ret)
+    }
+
+    #[inline]
+    pub(crate) fn get_float(&mut self) -> Result<f32, ArrowError> {
+        if (self.buf.len() < 4) {
+            return Err(ArrowError::ParseError(
+                "Unexpected EOF reading float".to_string(),
+            ));
+        }
+        let ret = f32::from_le_bytes(self.buf[..4].try_into().unwrap());
+        self.buf = &self.buf[4..];
+        Ok(ret)
+    }
+
+    #[inline]
+    pub(crate) fn get_double(&mut self) -> Result<f64, ArrowError> {
+        if (self.buf.len() < 8) {
+            return Err(ArrowError::ParseError(
+                "Unexpected EOF reading float".to_string(),
+            ));
+        }
+        let ret = f64::from_le_bytes(self.buf[..8].try_into().unwrap());
+        self.buf = &self.buf[8..];
+        Ok(ret)
+    }
+}
diff --git a/arrow-avro/src/reader/header.rs b/arrow-avro/src/reader/header.rs
index 19d48d1f89a1..98c285171bf3 100644
--- a/arrow-avro/src/reader/header.rs
+++ b/arrow-avro/src/reader/header.rs
@@ -19,7 +19,7 @@
 
 use crate::compression::{CompressionCodec, CODEC_METADATA_KEY};
 use crate::reader::vlq::VLQDecoder;
-use crate::schema::Schema;
+use crate::schema::{Schema, SCHEMA_METADATA_KEY};
 use arrow_schema::ArrowError;
 
 #[derive(Debug)]
@@ -89,6 +89,17 @@ impl Header {
             ))),
         }
     }
+
+    /// Returns the [`Schema`] if any
+    pub fn schema(&self) -> Result<Option<Schema<'_>>, ArrowError> {
+        self.get(SCHEMA_METADATA_KEY)
+            .map(|x| {
+                serde_json::from_slice(x).map_err(|e| {
+                    ArrowError::ParseError(format!("Failed to parse Avro schema JSON: {e}"))
+                })
+            })
+            .transpose()
+    }
 }
 
 /// A decoder for [`Header`]
diff --git a/arrow-avro/src/reader/mod.rs b/arrow-avro/src/reader/mod.rs
index 0151db7f855a..12fa67d9c8e3 100644
--- a/arrow-avro/src/reader/mod.rs
+++ b/arrow-avro/src/reader/mod.rs
@@ -26,6 +26,8 @@ mod header;
 
 mod block;
 
+mod cursor;
+mod record;
 mod vlq;
 
 /// Read a [`Header`] from the provided [`BufRead`]
@@ -73,35 +75,144 @@ fn read_blocks<R: BufRead>(mut reader: R) -> impl Iterator<Item = Result<Block,
 
 #[cfg(test)]
 mod test {
+    use crate::codec::AvroField;
     use crate::compression::CompressionCodec;
+    use crate::reader::record::RecordDecoder;
     use crate::reader::{read_blocks, read_header};
     use crate::test_util::arrow_test_data;
+    use arrow_array::*;
     use std::fs::File;
     use std::io::BufReader;
+    use std::sync::Arc;
+
+    fn read_file(file: &str, batch_size: usize) -> RecordBatch {
+        let file = File::open(file).unwrap();
+        let mut reader = BufReader::new(file);
+        let header = read_header(&mut reader).unwrap();
+        let compression = header.compression().unwrap();
+        let schema = header.schema().unwrap().unwrap();
+        let root = AvroField::try_from(&schema).unwrap();
+        let mut decoder = RecordDecoder::try_new(root.data_type()).unwrap();
+
+        for result in read_blocks(reader) {
+            let block = result.unwrap();
+            assert_eq!(block.sync, header.sync());
+            if let Some(c) = compression {
+                let decompressed = c.decompress(&block.data).unwrap();
+
+                let mut offset = 0;
+                let mut remaining = block.count;
+                while remaining > 0 {
+                    let to_read = remaining.max(batch_size);
+                    offset += decoder
+                        .decode(&decompressed[offset..], block.count)
+                        .unwrap();
+
+                    remaining -= to_read;
+                }
+                assert_eq!(offset, decompressed.len());
+            }
+        }
+        decoder.flush().unwrap()
+    }
 
     #[test]
-    fn test_mux() {
+    fn test_alltypes() {
         let files = [
             "avro/alltypes_plain.avro",
             "avro/alltypes_plain.snappy.avro",
             "avro/alltypes_plain.zstandard.avro",
-            "avro/alltypes_nulls_plain.avro",
         ];
 
+        let expected = RecordBatch::try_from_iter_with_nullable([
+            (
+                "id",
+                Arc::new(Int32Array::from(vec![4, 5, 6, 7, 2, 3, 0, 1])) as _,
+                true,
+            ),
+            (
+                "bool_col",
+                Arc::new(BooleanArray::from_iter((0..8).map(|x| Some(x % 2 == 0)))) as _,
+                true,
+            ),
+            (
+                "tinyint_col",
+                Arc::new(Int32Array::from_iter_values((0..8).map(|x| x % 2))) as _,
+                true,
+            ),
+            (
+                "smallint_col",
+                Arc::new(Int32Array::from_iter_values((0..8).map(|x| x % 2))) as _,
+                true,
+            ),
+            (
+                "int_col",
+                Arc::new(Int32Array::from_iter_values((0..8).map(|x| x % 2))) as _,
+                true,
+            ),
+            (
+                "bigint_col",
+                Arc::new(Int64Array::from_iter_values((0..8).map(|x| (x % 2) * 10))) as _,
+                true,
+            ),
+            (
+                "float_col",
+                Arc::new(Float32Array::from_iter_values(
+                    (0..8).map(|x| (x % 2) as f32 * 1.1),
+                )) as _,
+                true,
+            ),
+            (
+                "double_col",
+                Arc::new(Float64Array::from_iter_values(
+                    (0..8).map(|x| (x % 2) as f64 * 10.1),
+                )) as _,
+                true,
+            ),
+            (
+                "date_string_col",
+                Arc::new(BinaryArray::from_iter_values([
+                    [48, 51, 47, 48, 49, 47, 48, 57],
+                    [48, 51, 47, 48, 49, 47, 48, 57],
+                    [48, 52, 47, 48, 49, 47, 48, 57],
+                    [48, 52, 47, 48, 49, 47, 48, 57],
+                    [48, 50, 47, 48, 49, 47, 48, 57],
+                    [48, 50, 47, 48, 49, 47, 48, 57],
+                    [48, 49, 47, 48, 49, 47, 48, 57],
+                    [48, 49, 47, 48, 49, 47, 48, 57],
+                ])) as _,
+                true,
+            ),
+            (
+                "string_col",
+                Arc::new(BinaryArray::from_iter_values((0..8).map(|x| [48 + x % 2]))) as _,
+                true,
+            ),
+            (
+                "timestamp_col",
+                Arc::new(
+                    TimestampMicrosecondArray::from_iter_values([
+                        1235865600000000, // 2009-03-01T00:00:00.000
+                        1235865660000000, // 2009-03-01T00:01:00.000
+                        1238544000000000, // 2009-04-01T00:00:00.000
+                        1238544060000000, // 2009-04-01T00:01:00.000
+                        1233446400000000, // 2009-02-01T00:00:00.000
+                        1233446460000000, // 2009-02-01T00:01:00.000
+                        1230768000000000, // 2009-01-01T00:00:00.000
+                        1230768060000000, // 2009-01-01T00:01:00.000
+                    ])
+                    .with_timezone("+00:00"),
+                ) as _,
+                true,
+            ),
+        ])
+        .unwrap();
+
         for file in files {
-            println!("file: {file}");
-            let file = File::open(arrow_test_data(file)).unwrap();
-            let mut reader = BufReader::new(file);
-            let header = read_header(&mut reader).unwrap();
-            let compression = header.compression().unwrap();
-            println!("compression: {compression:?}");
-            for result in read_blocks(reader) {
-                let block = result.unwrap();
-                assert_eq!(block.sync, header.sync());
-                if let Some(c) = compression {
-                    c.decompress(&block.data).unwrap();
-                }
-            }
+            let file = arrow_test_data(file);
+
+            assert_eq!(read_file(&file, 8), expected);
+            assert_eq!(read_file(&file, 3), expected);
         }
     }
 }
diff --git a/arrow-avro/src/reader/record.rs b/arrow-avro/src/reader/record.rs
new file mode 100644
index 000000000000..52a58cf63303
--- /dev/null
+++ b/arrow-avro/src/reader/record.rs
@@ -0,0 +1,292 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::codec::{AvroDataType, Codec, Nullability};
+use crate::reader::block::{Block, BlockDecoder};
+use crate::reader::cursor::AvroCursor;
+use crate::reader::header::Header;
+use crate::schema::*;
+use arrow_array::types::*;
+use arrow_array::*;
+use arrow_buffer::*;
+use arrow_schema::{
+    ArrowError, DataType, Field as ArrowField, FieldRef, Fields, Schema as ArrowSchema, SchemaRef,
+};
+use std::collections::HashMap;
+use std::io::Read;
+use std::sync::Arc;
+
+/// Decodes avro encoded data into [`RecordBatch`]
+pub struct RecordDecoder {
+    schema: SchemaRef,
+    fields: Vec<Decoder>,
+}
+
+impl RecordDecoder {
+    pub fn try_new(data_type: &AvroDataType) -> Result<Self, ArrowError> {
+        match Decoder::try_new(data_type)? {
+            Decoder::Record(fields, encodings) => Ok(Self {
+                schema: Arc::new(ArrowSchema::new(fields)),
+                fields: encodings,
+            }),
+            encoding => Err(ArrowError::ParseError(format!(
+                "Expected record got {encoding:?}"
+            ))),
+        }
+    }
+
+    pub fn schema(&self) -> &SchemaRef {
+        &self.schema
+    }
+
+    /// Decode `count` records from `buf`
+    pub fn decode(&mut self, buf: &[u8], count: usize) -> Result<usize, ArrowError> {
+        let mut cursor = AvroCursor::new(buf);
+        for _ in 0..count {
+            for field in &mut self.fields {
+                field.decode(&mut cursor)?;
+            }
+        }
+        Ok(cursor.position())
+    }
+
+    /// Flush the decoded records into a [`RecordBatch`]
+    pub fn flush(&mut self) -> Result<RecordBatch, ArrowError> {
+        let arrays = self
+            .fields
+            .iter_mut()
+            .map(|x| x.flush(None))
+            .collect::<Result<Vec<_>, _>>()?;
+
+        RecordBatch::try_new(self.schema.clone(), arrays)
+    }
+}
+
+#[derive(Debug)]
+enum Decoder {
+    Null(usize),
+    Boolean(BooleanBufferBuilder),
+    Int32(Vec<i32>),
+    Int64(Vec<i64>),
+    Float32(Vec<f32>),
+    Float64(Vec<f64>),
+    Date32(Vec<i32>),
+    TimeMillis(Vec<i32>),
+    TimeMicros(Vec<i64>),
+    TimestampMillis(bool, Vec<i64>),
+    TimestampMicros(bool, Vec<i64>),
+    Binary(OffsetBufferBuilder<i32>, Vec<u8>),
+    String(OffsetBufferBuilder<i32>, Vec<u8>),
+    List(FieldRef, OffsetBufferBuilder<i32>, Box<Decoder>),
+    Record(Fields, Vec<Decoder>),
+    Nullable(Nullability, NullBufferBuilder, Box<Decoder>),
+}
+
+impl Decoder {
+    fn try_new(data_type: &AvroDataType) -> Result<Self, ArrowError> {
+        let nyi = |s: &str| Err(ArrowError::NotYetImplemented(s.to_string()));
+
+        let decoder = match data_type.codec() {
+            Codec::Null => Self::Null(0),
+            Codec::Boolean => Self::Boolean(BooleanBufferBuilder::new(DEFAULT_CAPACITY)),
+            Codec::Int32 => Self::Int32(Vec::with_capacity(DEFAULT_CAPACITY)),
+            Codec::Int64 => Self::Int64(Vec::with_capacity(DEFAULT_CAPACITY)),
+            Codec::Float32 => Self::Float32(Vec::with_capacity(DEFAULT_CAPACITY)),
+            Codec::Float64 => Self::Float64(Vec::with_capacity(DEFAULT_CAPACITY)),
+            Codec::Binary => Self::Binary(
+                OffsetBufferBuilder::new(DEFAULT_CAPACITY),
+                Vec::with_capacity(DEFAULT_CAPACITY),
+            ),
+            Codec::Utf8 => Self::String(
+                OffsetBufferBuilder::new(DEFAULT_CAPACITY),
+                Vec::with_capacity(DEFAULT_CAPACITY),
+            ),
+            Codec::Date32 => Self::Date32(Vec::with_capacity(DEFAULT_CAPACITY)),
+            Codec::TimeMillis => Self::TimeMillis(Vec::with_capacity(DEFAULT_CAPACITY)),
+            Codec::TimeMicros => Self::TimeMicros(Vec::with_capacity(DEFAULT_CAPACITY)),
+            Codec::TimestampMillis(is_utc) => {
+                Self::TimestampMillis(*is_utc, Vec::with_capacity(DEFAULT_CAPACITY))
+            }
+            Codec::TimestampMicros(is_utc) => {
+                Self::TimestampMicros(*is_utc, Vec::with_capacity(DEFAULT_CAPACITY))
+            }
+            Codec::Fixed(_) => return nyi("decoding fixed"),
+            Codec::Interval => return nyi("decoding interval"),
+            Codec::List(item) => {
+                let decoder = Self::try_new(item)?;
+                Self::List(
+                    Arc::new(item.field_with_name("item")),
+                    OffsetBufferBuilder::new(DEFAULT_CAPACITY),
+                    Box::new(decoder),
+                )
+            }
+            Codec::Struct(fields) => {
+                let mut arrow_fields = Vec::with_capacity(fields.len());
+                let mut encodings = Vec::with_capacity(fields.len());
+                for avro_field in fields.iter() {
+                    let encoding = Self::try_new(avro_field.data_type())?;
+                    arrow_fields.push(avro_field.field());
+                    encodings.push(encoding);
+                }
+                Self::Record(arrow_fields.into(), encodings)
+            }
+        };
+
+        Ok(match data_type.nullability() {
+            Some(nullability) => Self::Nullable(
+                nullability,
+                NullBufferBuilder::new(DEFAULT_CAPACITY),
+                Box::new(decoder),
+            ),
+            None => decoder,
+        })
+    }
+
+    /// Append a null record
+    fn append_null(&mut self) {
+        match self {
+            Self::Null(count) => *count += 1,
+            Self::Boolean(b) => b.append(false),
+            Self::Int32(v) | Self::Date32(v) | Self::TimeMillis(v) => v.push(0),
+            Self::Int64(v)
+            | Self::TimeMicros(v)
+            | Self::TimestampMillis(_, v)
+            | Self::TimestampMicros(_, v) => v.push(0),
+            Self::Float32(v) => v.push(0.),
+            Self::Float64(v) => v.push(0.),
+            Self::Binary(offsets, _) | Self::String(offsets, _) => offsets.push_length(0),
+            Self::List(_, offsets, e) => {
+                offsets.push_length(0);
+                e.append_null();
+            }
+            Self::Record(_, e) => e.iter_mut().for_each(|e| e.append_null()),
+            Self::Nullable(_, _, _) => unreachable!("Nulls cannot be nested"),
+        }
+    }
+
+    /// Decode a single record from `buf`
+    fn decode(&mut self, buf: &mut AvroCursor<'_>) -> Result<(), ArrowError> {
+        match self {
+            Self::Null(x) => *x += 1,
+            Self::Boolean(values) => values.append(buf.get_bool()?),
+            Self::Int32(values) | Self::Date32(values) | Self::TimeMillis(values) => {
+                values.push(buf.get_int()?)
+            }
+            Self::Int64(values)
+            | Self::TimeMicros(values)
+            | Self::TimestampMillis(_, values)
+            | Self::TimestampMicros(_, values) => values.push(buf.get_long()?),
+            Self::Float32(values) => values.push(buf.get_float()?),
+            Self::Float64(values) => values.push(buf.get_double()?),
+            Self::Binary(offsets, values) | Self::String(offsets, values) => {
+                let data = buf.get_bytes()?;
+                offsets.push_length(data.len());
+                values.extend_from_slice(data);
+            }
+            Self::List(_, _, _) => {
+                return Err(ArrowError::NotYetImplemented(
+                    "Decoding ListArray".to_string(),
+                ))
+            }
+            Self::Record(_, encodings) => {
+                for encoding in encodings {
+                    encoding.decode(buf)?;
+                }
+            }
+            Self::Nullable(nullability, nulls, e) => {
+                let is_valid = buf.get_bool()? == matches!(nullability, Nullability::NullFirst);
+                nulls.append(is_valid);
+                match is_valid {
+                    true => e.decode(buf)?,
+                    false => e.append_null(),
+                }
+            }
+        }
+        Ok(())
+    }
+
+    /// Flush decoded records to an [`ArrayRef`]
+    fn flush(&mut self, nulls: Option<NullBuffer>) -> Result<ArrayRef, ArrowError> {
+        Ok(match self {
+            Self::Nullable(_, n, e) => e.flush(n.finish())?,
+            Self::Null(size) => Arc::new(NullArray::new(std::mem::replace(size, 0))),
+            Self::Boolean(b) => Arc::new(BooleanArray::new(b.finish(), nulls)),
+            Self::Int32(values) => Arc::new(flush_primitive::<Int32Type>(values, nulls)),
+            Self::Date32(values) => Arc::new(flush_primitive::<Date32Type>(values, nulls)),
+            Self::Int64(values) => Arc::new(flush_primitive::<Int64Type>(values, nulls)),
+            Self::TimeMillis(values) => {
+                Arc::new(flush_primitive::<Time32MillisecondType>(values, nulls))
+            }
+            Self::TimeMicros(values) => {
+                Arc::new(flush_primitive::<Time64MicrosecondType>(values, nulls))
+            }
+            Self::TimestampMillis(is_utc, values) => Arc::new(
+                flush_primitive::<TimestampMillisecondType>(values, nulls)
+                    .with_timezone_opt(is_utc.then(|| "+00:00")),
+            ),
+            Self::TimestampMicros(is_utc, values) => Arc::new(
+                flush_primitive::<TimestampMicrosecondType>(values, nulls)
+                    .with_timezone_opt(is_utc.then(|| "+00:00")),
+            ),
+            Self::Float32(values) => Arc::new(flush_primitive::<Float32Type>(values, nulls)),
+            Self::Float64(values) => Arc::new(flush_primitive::<Float64Type>(values, nulls)),
+
+            Self::Binary(offsets, values) => {
+                let offsets = flush_offsets(offsets);
+                let values = flush_values(values).into();
+                Arc::new(BinaryArray::new(offsets, values, nulls))
+            }
+            Self::String(offsets, values) => {
+                let offsets = flush_offsets(offsets);
+                let values = flush_values(values).into();
+                Arc::new(StringArray::new(offsets, values, nulls))
+            }
+            Self::List(field, offsets, values) => {
+                let values = values.flush(None)?;
+                let offsets = flush_offsets(offsets);
+                Arc::new(ListArray::new(field.clone(), offsets, values, nulls))
+            }
+            Self::Record(fields, encodings) => {
+                let arrays = encodings
+                    .iter_mut()
+                    .map(|x| x.flush(None))
+                    .collect::<Result<Vec<_>, _>>()?;
+                Arc::new(StructArray::new(fields.clone(), arrays, nulls))
+            }
+        })
+    }
+}
+
+#[inline]
+fn flush_values<T>(values: &mut Vec<T>) -> Vec<T> {
+    std::mem::replace(values, Vec::with_capacity(DEFAULT_CAPACITY))
+}
+
+#[inline]
+fn flush_offsets(offsets: &mut OffsetBufferBuilder<i32>) -> OffsetBuffer<i32> {
+    std::mem::replace(offsets, OffsetBufferBuilder::new(DEFAULT_CAPACITY)).finish()
+}
+
+#[inline]
+fn flush_primitive<T: ArrowPrimitiveType>(
+    values: &mut Vec<T::Native>,
+    nulls: Option<NullBuffer>,
+) -> PrimitiveArray<T> {
+    PrimitiveArray::new(flush_values(values).into(), nulls)
+}
+
+const DEFAULT_CAPACITY: usize = 1024;
diff --git a/arrow-avro/src/reader/vlq.rs b/arrow-avro/src/reader/vlq.rs
index 80f1c60eec7d..b198a0d66f24 100644
--- a/arrow-avro/src/reader/vlq.rs
+++ b/arrow-avro/src/reader/vlq.rs
@@ -44,3 +44,91 @@ impl VLQDecoder {
         None
     }
 }
+
+/// Read a varint from `buf` returning the decoded `u64` and the number of bytes read
+#[inline]
+pub(crate) fn read_varint(buf: &[u8]) -> Option<(u64, usize)> {
+    let first = *buf.first()?;
+    if first < 0x80 {
+        return Some((first as u64, 1));
+    }
+
+    if let Some(array) = buf.get(..10) {
+        return read_varint_array(array.try_into().unwrap());
+    }
+
+    read_varint_slow(buf)
+}
+
+/// Based on
+/// - <https://github.com/tokio-rs/prost/blob/master/prost/src/encoding/varint.rs#L71>
+/// - <https://github.com/google/protobuf/blob/3.3.x/src/google/protobuf/io/coded_stream.cc#L365-L406>
+/// - <https://github.com/protocolbuffers/protobuf-go/blob/v1.27.1/encoding/protowire/wire.go#L358>
+#[inline]
+fn read_varint_array(buf: [u8; 10]) -> Option<(u64, usize)> {
+    let mut in_progress = 0_u64;
+    for (idx, b) in buf.into_iter().take(9).enumerate() {
+        in_progress += (b as u64) << (7 * idx);
+        if b < 0x80 {
+            return Some((in_progress, idx + 1));
+        }
+        in_progress -= 0x80 << (7 * idx);
+    }
+
+    let b = buf[9] as u64;
+    in_progress += b << (7 * 9);
+    (b < 0x02).then_some((in_progress, 10))
+}
+
+#[inline(never)]
+#[cold]
+fn read_varint_slow(buf: &[u8]) -> Option<(u64, usize)> {
+    let mut value = 0;
+    for (count, byte) in buf.iter().take(10).enumerate() {
+        let byte = buf[count];
+        value |= u64::from(byte & 0x7F) << (count * 7);
+        if byte <= 0x7F {
+            // Check for u64::MAX overflow. See [`ConsumeVarint`][1] for details.
+            // [1]: https://github.com/protocolbuffers/protobuf-go/blob/v1.27.1/encoding/protowire/wire.go#L358
+            return (count != 9 || byte < 2).then_some((value, count + 1));
+        }
+    }
+
+    None
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn encode_var(mut n: u64, dst: &mut [u8]) -> usize {
+        let mut i = 0;
+
+        while n >= 0x80 {
+            dst[i] = 0x80 | (n as u8);
+            i += 1;
+            n >>= 7;
+        }
+
+        dst[i] = n as u8;
+        i + 1
+    }
+
+    fn varint_test(a: u64) {
+        let mut buf = [0_u8; 10];
+        let len = encode_var(a, &mut buf);
+        assert_eq!(read_varint(&buf[..len]).unwrap(), (a, len));
+        assert_eq!(read_varint(&buf).unwrap(), (a, len));
+    }
+
+    #[test]
+    fn test_varint() {
+        varint_test(0);
+        varint_test(4395932);
+        varint_test(u64::MAX);
+
+        for _ in 0..1000 {
+            varint_test(rand::random());
+        }
+    }
+}
diff --git a/arrow-buffer/LICENSE.txt b/arrow-buffer/LICENSE.txt
new file mode 120000
index 000000000000..4ab43736a839
--- /dev/null
+++ b/arrow-buffer/LICENSE.txt
@@ -0,0 +1 @@
+../LICENSE.txt
\ No newline at end of file
diff --git a/arrow-buffer/NOTICE.txt b/arrow-buffer/NOTICE.txt
new file mode 120000
index 000000000000..eb9f24e040b5
--- /dev/null
+++ b/arrow-buffer/NOTICE.txt
@@ -0,0 +1 @@
+../NOTICE.txt
\ No newline at end of file
diff --git a/arrow-buffer/src/buffer/boolean.rs b/arrow-buffer/src/buffer/boolean.rs
index 49a75b468dbe..aaa86832f692 100644
--- a/arrow-buffer/src/buffer/boolean.rs
+++ b/arrow-buffer/src/buffer/boolean.rs
@@ -52,8 +52,12 @@ impl BooleanBuffer {
     /// This method will panic if `buffer` is not large enough
     pub fn new(buffer: Buffer, offset: usize, len: usize) -> Self {
         let total_len = offset.saturating_add(len);
-        let bit_len = buffer.len().saturating_mul(8);
-        assert!(total_len <= bit_len);
+        let buffer_len = buffer.len();
+        let bit_len = buffer_len.saturating_mul(8);
+        assert!(
+            total_len <= bit_len,
+            "buffer not large enough (offset: {offset}, len: {len}, buffer_len: {buffer_len})"
+        );
         Self {
             buffer,
             offset,
@@ -96,17 +100,6 @@ impl BooleanBuffer {
         BitChunks::new(self.values(), self.offset, self.len)
     }
 
-    /// Returns `true` if the bit at index `i` is set
-    ///
-    /// # Panics
-    ///
-    /// Panics if `i >= self.len()`
-    #[inline]
-    #[deprecated(note = "use BooleanBuffer::value")]
-    pub fn is_set(&self, i: usize) -> bool {
-        self.value(i)
-    }
-
     /// Returns the offset of this [`BooleanBuffer`] in bits
     #[inline]
     pub fn offset(&self) -> usize {
@@ -125,6 +118,12 @@ impl BooleanBuffer {
         self.len == 0
     }
 
+    /// Free up unused memory.
+    pub fn shrink_to_fit(&mut self) {
+        // TODO(emilk): we could shrink even more in the case where we are a small sub-slice of the full buffer
+        self.buffer.shrink_to_fit();
+    }
+
     /// Returns the boolean value at index `i`.
     ///
     /// # Panics
diff --git a/arrow-buffer/src/buffer/immutable.rs b/arrow-buffer/src/buffer/immutable.rs
index 8d1a46583fca..d0c8ffa39783 100644
--- a/arrow-buffer/src/buffer/immutable.rs
+++ b/arrow-buffer/src/buffer/immutable.rs
@@ -20,10 +20,10 @@ use std::fmt::Debug;
 use std::ptr::NonNull;
 use std::sync::Arc;
 
-use crate::alloc::{Allocation, Deallocation, ALIGNMENT};
+use crate::alloc::{Allocation, Deallocation};
 use crate::util::bit_chunk_iterator::{BitChunks, UnalignedBitChunk};
 use crate::BufferBuilder;
-use crate::{bytes::Bytes, native::ArrowNativeType};
+use crate::{bit_util, bytes::Bytes, native::ArrowNativeType};
 
 use super::ops::bitwise_unary_op_helper;
 use super::{MutableBuffer, ScalarBuffer};
@@ -99,26 +99,6 @@ impl Buffer {
         buffer.into()
     }
 
-    /// Creates a buffer from an existing aligned memory region (must already be byte-aligned), this
-    /// `Buffer` will free this piece of memory when dropped.
-    ///
-    /// # Arguments
-    ///
-    /// * `ptr` - Pointer to raw parts
-    /// * `len` - Length of raw parts in **bytes**
-    /// * `capacity` - Total allocated memory for the pointer `ptr`, in **bytes**
-    ///
-    /// # Safety
-    ///
-    /// This function is unsafe as there is no guarantee that the given pointer is valid for `len`
-    /// bytes. If the `ptr` and `capacity` come from a `Buffer`, then this is guaranteed.
-    #[deprecated(note = "Use Buffer::from_vec")]
-    pub unsafe fn from_raw_parts(ptr: NonNull<u8>, len: usize, capacity: usize) -> Self {
-        assert!(len <= capacity);
-        let layout = Layout::from_size_align(capacity, ALIGNMENT).unwrap();
-        Buffer::build_with_arguments(ptr, len, Deallocation::Standard(layout))
-    }
-
     /// Creates a buffer from an existing memory region. Ownership of the memory is tracked via reference counting
     /// and the memory will be freed using the `drop` method of [crate::alloc::Allocation] when the reference count reaches zero.
     ///
@@ -167,6 +147,41 @@ impl Buffer {
         self.data.capacity()
     }
 
+    /// Tried to shrink the capacity of the buffer as much as possible, freeing unused memory.
+    ///
+    /// If the buffer is shared, this is a no-op.
+    ///
+    /// If the memory was allocated with a custom allocator, this is a no-op.
+    ///
+    /// If the capacity is already less than or equal to the desired capacity, this is a no-op.
+    ///
+    /// The memory region will be reallocated using `std::alloc::realloc`.
+    pub fn shrink_to_fit(&mut self) {
+        let offset = self.ptr_offset();
+        let is_empty = self.is_empty();
+        let desired_capacity = if is_empty {
+            0
+        } else {
+            // For realloc to work, we cannot free the elements before the offset
+            offset + self.len()
+        };
+        if desired_capacity < self.capacity() {
+            if let Some(bytes) = Arc::get_mut(&mut self.data) {
+                if bytes.try_realloc(desired_capacity).is_ok() {
+                    // Realloc complete - update our pointer into `bytes`:
+                    self.ptr = if is_empty {
+                        bytes.as_ptr()
+                    } else {
+                        // SAFETY: we kept all elements leading up to the offset
+                        unsafe { bytes.as_ptr().add(offset) }
+                    }
+                } else {
+                    // Failure to reallocate is fine; we just failed to free up memory.
+                }
+            }
+        }
+    }
+
     /// Returns whether the buffer is empty.
     #[inline]
     pub fn is_empty(&self) -> bool {
@@ -265,7 +280,7 @@ impl Buffer {
     /// otherwise a new buffer is allocated and filled with a copy of the bits in the range.
     pub fn bit_slice(&self, offset: usize, len: usize) -> Self {
         if offset % 8 == 0 {
-            return self.slice(offset / 8);
+            return self.slice_with_length(offset / 8, bit_util::ceil(len, 8));
         }
 
         bitwise_unary_op_helper(self, offset, len, |a| a)
@@ -278,14 +293,6 @@ impl Buffer {
         BitChunks::new(self.as_slice(), offset, len)
     }
 
-    /// Returns the number of 1-bits in this buffer.
-    #[deprecated(note = "use count_set_bits_offset instead")]
-    pub fn count_set_bits(&self) -> usize {
-        let len_in_bits = self.len() * 8;
-        // self.offset is already taken into consideration by the bit_chunks implementation
-        self.count_set_bits_offset(0, len_in_bits)
-    }
-
     /// Returns the number of 1-bits in this buffer, starting from `offset` with `length` bits
     /// inspected. Note that both `offset` and `length` are measured in bits.
     pub fn count_set_bits_offset(&self, offset: usize, len: usize) -> usize {
@@ -295,6 +302,8 @@ impl Buffer {
     /// Returns `MutableBuffer` for mutating the buffer if this buffer is not shared.
     /// Returns `Err` if this is shared or its allocation is from an external source or
     /// it is not allocated with alignment [`ALIGNMENT`]
+    ///
+    /// [`ALIGNMENT`]: crate::alloc::ALIGNMENT
     pub fn into_mutable(self) -> Result<MutableBuffer, Self> {
         let ptr = self.ptr;
         let length = self.length;
@@ -562,6 +571,34 @@ mod tests {
         assert_eq!(buf2.slice_with_length(2, 1).as_slice(), &[10]);
     }
 
+    #[test]
+    fn test_shrink_to_fit() {
+        let original = Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7]);
+        assert_eq!(original.as_slice(), &[0, 1, 2, 3, 4, 5, 6, 7]);
+        assert_eq!(original.capacity(), 64);
+
+        let slice = original.slice_with_length(2, 3);
+        drop(original); // Make sure the buffer isn't shared (or shrink_to_fit won't work)
+        assert_eq!(slice.as_slice(), &[2, 3, 4]);
+        assert_eq!(slice.capacity(), 64);
+
+        let mut shrunk = slice;
+        shrunk.shrink_to_fit();
+        assert_eq!(shrunk.as_slice(), &[2, 3, 4]);
+        assert_eq!(shrunk.capacity(), 5); // shrink_to_fit is allowed to keep the elements before the offset
+
+        // Test that we can handle empty slices:
+        let empty_slice = shrunk.slice_with_length(1, 0);
+        drop(shrunk); // Make sure the buffer isn't shared (or shrink_to_fit won't work)
+        assert_eq!(empty_slice.as_slice(), &[]);
+        assert_eq!(empty_slice.capacity(), 5);
+
+        let mut shrunk_empty = empty_slice;
+        shrunk_empty.shrink_to_fit();
+        assert_eq!(shrunk_empty.as_slice(), &[]);
+        assert_eq!(shrunk_empty.capacity(), 0);
+    }
+
     #[test]
     #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
     fn test_slice_offset_out_of_bound() {
@@ -860,4 +897,37 @@ mod tests {
         let iter_len = usize::MAX / std::mem::size_of::<u64>() + 1;
         let _ = Buffer::from_iter(std::iter::repeat(0_u64).take(iter_len));
     }
+
+    #[test]
+    fn bit_slice_length_preserved() {
+        // Create a boring buffer
+        let buf = Buffer::from_iter(std::iter::repeat(true).take(64));
+
+        let assert_preserved = |offset: usize, len: usize| {
+            let new_buf = buf.bit_slice(offset, len);
+            assert_eq!(new_buf.len(), bit_util::ceil(len, 8));
+
+            // if the offset is not byte-aligned, we have to create a deep copy to a new buffer
+            // (since the `offset` value inside a Buffer is byte-granular, not bit-granular), so
+            // checking the offset should always return 0 if so. If the offset IS byte-aligned, we
+            // want to make sure it doesn't unnecessarily create a deep copy.
+            if offset % 8 == 0 {
+                assert_eq!(new_buf.ptr_offset(), offset / 8);
+            } else {
+                assert_eq!(new_buf.ptr_offset(), 0);
+            }
+        };
+
+        // go through every available value for offset
+        for o in 0..=64 {
+            // and go through every length that could accompany that offset - we can't have a
+            // situation where offset + len > 64, because that would go past the end of the buffer,
+            // so we use the map to ensure it's in range.
+            for l in (o..=64).map(|l| l - o) {
+                // and we just want to make sure every one of these keeps its offset and length
+                // when neeeded
+                assert_preserved(o, l);
+            }
+        }
+    }
 }
diff --git a/arrow-buffer/src/buffer/mutable.rs b/arrow-buffer/src/buffer/mutable.rs
index 7fcbd89dd262..c4315a1d64cd 100644
--- a/arrow-buffer/src/buffer/mutable.rs
+++ b/arrow-buffer/src/buffer/mutable.rs
@@ -118,13 +118,6 @@ impl MutableBuffer {
         Self { data, len, layout }
     }
 
-    /// Create a [`MutableBuffer`] from the provided [`Vec`] without copying
-    #[inline]
-    #[deprecated(note = "Use From<Vec<T>>")]
-    pub fn from_vec<T: ArrowNativeType>(vec: Vec<T>) -> Self {
-        Self::from(vec)
-    }
-
     /// Allocates a new [MutableBuffer] from given `Bytes`.
     pub(crate) fn from_bytes(bytes: Bytes) -> Result<Self, Bytes> {
         let layout = match bytes.deallocation() {
@@ -331,15 +324,6 @@ impl MutableBuffer {
         self.data.as_ptr()
     }
 
-    #[deprecated(
-        since = "2.0.0",
-        note = "This method is deprecated in favour of `into` from the trait `Into`."
-    )]
-    /// Freezes this buffer and return an immutable version of it.
-    pub fn freeze(self) -> Buffer {
-        self.into_buffer()
-    }
-
     #[inline]
     pub(super) fn into_buffer(self) -> Buffer {
         let bytes = unsafe { Bytes::new(self.data, self.len, Deallocation::Standard(self.layout)) };
@@ -483,10 +467,13 @@ impl MutableBuffer {
     }
 }
 
+/// Creates a non-null pointer with alignment of [`ALIGNMENT`]
+///
+/// This is similar to [`NonNull::dangling`]
 #[inline]
-fn dangling_ptr() -> NonNull<u8> {
-    // SAFETY: ALIGNMENT is a non-zero usize which is then casted
-    // to a *mut T. Therefore, `ptr` is not null and the conditions for
+pub(crate) fn dangling_ptr() -> NonNull<u8> {
+    // SAFETY: ALIGNMENT is a non-zero usize which is then cast
+    // to a *mut u8. Therefore, `ptr` is not null and the conditions for
     // calling new_unchecked() are respected.
     #[cfg(miri)]
     {
diff --git a/arrow-buffer/src/buffer/null.rs b/arrow-buffer/src/buffer/null.rs
index c79aef398059..ec12b885eb5a 100644
--- a/arrow-buffer/src/buffer/null.rs
+++ b/arrow-buffer/src/buffer/null.rs
@@ -130,6 +130,11 @@ impl NullBuffer {
         self.buffer.is_empty()
     }
 
+    /// Free up unused memory.
+    pub fn shrink_to_fit(&mut self) {
+        self.buffer.shrink_to_fit();
+    }
+
     /// Returns the null count for this [`NullBuffer`]
     #[inline]
     pub fn null_count(&self) -> usize {
@@ -235,6 +240,12 @@ impl From<&[bool]> for NullBuffer {
     }
 }
 
+impl<const N: usize> From<&[bool; N]> for NullBuffer {
+    fn from(value: &[bool; N]) -> Self {
+        value[..].into()
+    }
+}
+
 impl From<Vec<bool>> for NullBuffer {
     fn from(value: Vec<bool>) -> Self {
         BooleanBuffer::from(value).into()
diff --git a/arrow-buffer/src/buffer/offset.rs b/arrow-buffer/src/buffer/offset.rs
index e9087d30098c..a6be2b67af84 100644
--- a/arrow-buffer/src/buffer/offset.rs
+++ b/arrow-buffer/src/buffer/offset.rs
@@ -133,6 +133,11 @@ impl<O: ArrowNativeType> OffsetBuffer<O> {
         Self(out.into())
     }
 
+    /// Free up unused memory.
+    pub fn shrink_to_fit(&mut self) {
+        self.0.shrink_to_fit();
+    }
+
     /// Returns the inner [`ScalarBuffer`]
     pub fn inner(&self) -> &ScalarBuffer<O> {
         &self.0
diff --git a/arrow-buffer/src/buffer/run.rs b/arrow-buffer/src/buffer/run.rs
index 3dbbe344a025..cc6d19044feb 100644
--- a/arrow-buffer/src/buffer/run.rs
+++ b/arrow-buffer/src/buffer/run.rs
@@ -136,6 +136,12 @@ where
         self.len == 0
     }
 
+    /// Free up unused memory.
+    pub fn shrink_to_fit(&mut self) {
+        // TODO(emilk): we could shrink even more in the case where we are a small sub-slice of the full buffer
+        self.run_ends.shrink_to_fit();
+    }
+
     /// Returns the values of this [`RunEndBuffer`] not including any offset
     #[inline]
     pub fn values(&self) -> &[E] {
diff --git a/arrow-buffer/src/buffer/scalar.rs b/arrow-buffer/src/buffer/scalar.rs
index 343b8549e93d..ab6c87168e5c 100644
--- a/arrow-buffer/src/buffer/scalar.rs
+++ b/arrow-buffer/src/buffer/scalar.rs
@@ -72,6 +72,11 @@ impl<T: ArrowNativeType> ScalarBuffer<T> {
         buffer.slice_with_length(byte_offset, byte_len).into()
     }
 
+    /// Free up unused memory.
+    pub fn shrink_to_fit(&mut self) {
+        self.buffer.shrink_to_fit();
+    }
+
     /// Returns a zero-copy slice of this buffer with length `len` and starting at `offset`
     pub fn slice(&self, offset: usize, len: usize) -> Self {
         Self::new(self.buffer.clone(), offset, len)
diff --git a/arrow-buffer/src/bytes.rs b/arrow-buffer/src/bytes.rs
index ba61342d8e39..77724137aef7 100644
--- a/arrow-buffer/src/bytes.rs
+++ b/arrow-buffer/src/bytes.rs
@@ -24,6 +24,7 @@ use std::ptr::NonNull;
 use std::{fmt::Debug, fmt::Formatter};
 
 use crate::alloc::Deallocation;
+use crate::buffer::dangling_ptr;
 
 /// A continuous, fixed-size, immutable memory region that knows how to de-allocate itself.
 ///
@@ -96,6 +97,48 @@ impl Bytes {
         }
     }
 
+    /// Try to reallocate the underlying memory region to a new size (smaller or larger).
+    ///
+    /// Only works for bytes allocated with the standard allocator.
+    /// Returns `Err` if the memory was allocated with a custom allocator,
+    /// or the call to `realloc` failed, for whatever reason.
+    /// In case of `Err`, the [`Bytes`] will remain as it was (i.e. have the old size).
+    pub fn try_realloc(&mut self, new_len: usize) -> Result<(), ()> {
+        if let Deallocation::Standard(old_layout) = self.deallocation {
+            if old_layout.size() == new_len {
+                return Ok(()); // Nothing to do
+            }
+
+            if let Ok(new_layout) = std::alloc::Layout::from_size_align(new_len, old_layout.align())
+            {
+                let old_ptr = self.ptr.as_ptr();
+
+                let new_ptr = match new_layout.size() {
+                    0 => {
+                        // SAFETY: Verified that old_layout.size != new_len (0)
+                        unsafe { std::alloc::dealloc(self.ptr.as_ptr(), old_layout) };
+                        Some(dangling_ptr())
+                    }
+                    // SAFETY: the call to `realloc` is safe if all the following hold (from https://doc.rust-lang.org/stable/std/alloc/trait.GlobalAlloc.html#method.realloc):
+                    // * `old_ptr` must be currently allocated via this allocator (guaranteed by the invariant/contract of `Bytes`)
+                    // * `old_layout` must be the same layout that was used to allocate that block of memory (same)
+                    // * `new_len` must be greater than zero
+                    // * `new_len`, when rounded up to the nearest multiple of `layout.align()`, must not overflow `isize` (guaranteed by the success of `Layout::from_size_align`)
+                    _ => NonNull::new(unsafe { std::alloc::realloc(old_ptr, old_layout, new_len) }),
+                };
+
+                if let Some(ptr) = new_ptr {
+                    self.ptr = ptr;
+                    self.len = new_len;
+                    self.deallocation = Deallocation::Standard(new_layout);
+                    return Ok(());
+                }
+            }
+        }
+
+        Err(())
+    }
+
     #[inline]
     pub(crate) fn deallocation(&self) -> &Deallocation {
         &self.deallocation
diff --git a/arrow-buffer/src/native.rs b/arrow-buffer/src/native.rs
index c563f73cf5b9..eb8e067db0be 100644
--- a/arrow-buffer/src/native.rs
+++ b/arrow-buffer/src/native.rs
@@ -88,30 +88,6 @@ pub trait ArrowNativeType:
     /// Returns `None` if [`Self`] is not an integer or conversion would result
     /// in truncation/overflow
     fn to_i64(self) -> Option<i64>;
-
-    /// Convert native type from i32.
-    ///
-    /// Returns `None` if [`Self`] is not `i32`
-    #[deprecated(note = "please use `Option::Some` instead")]
-    fn from_i32(_: i32) -> Option<Self> {
-        None
-    }
-
-    /// Convert native type from i64.
-    ///
-    /// Returns `None` if [`Self`] is not `i64`
-    #[deprecated(note = "please use `Option::Some` instead")]
-    fn from_i64(_: i64) -> Option<Self> {
-        None
-    }
-
-    /// Convert native type from i128.
-    ///
-    /// Returns `None` if [`Self`] is not `i128`
-    #[deprecated(note = "please use `Option::Some` instead")]
-    fn from_i128(_: i128) -> Option<Self> {
-        None
-    }
 }
 
 macro_rules! native_integer {
@@ -147,23 +123,15 @@ macro_rules! native_integer {
             fn usize_as(i: usize) -> Self {
                 i as _
             }
-
-
-            $(
-                #[inline]
-                fn $from(v: $t) -> Option<Self> {
-                    Some(v)
-                }
-            )*
         }
     };
 }
 
 native_integer!(i8);
 native_integer!(i16);
-native_integer!(i32, from_i32);
-native_integer!(i64, from_i64);
-native_integer!(i128, from_i128);
+native_integer!(i32);
+native_integer!(i64);
+native_integer!(i128);
 native_integer!(u8);
 native_integer!(u16);
 native_integer!(u32);
diff --git a/arrow-cast/LICENSE.txt b/arrow-cast/LICENSE.txt
new file mode 120000
index 000000000000..4ab43736a839
--- /dev/null
+++ b/arrow-cast/LICENSE.txt
@@ -0,0 +1 @@
+../LICENSE.txt
\ No newline at end of file
diff --git a/arrow-cast/NOTICE.txt b/arrow-cast/NOTICE.txt
new file mode 120000
index 000000000000..eb9f24e040b5
--- /dev/null
+++ b/arrow-cast/NOTICE.txt
@@ -0,0 +1 @@
+../NOTICE.txt
\ No newline at end of file
diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs
index d6b2f884f753..ba82ca9040c7 100644
--- a/arrow-cast/src/cast/decimal.rs
+++ b/arrow-cast/src/cast/decimal.rs
@@ -111,9 +111,13 @@ where
         O::Native::from_decimal(adjusted)
     };
 
-    Ok(match cast_options.safe {
-        true => array.unary_opt(f),
-        false => array.try_unary(|x| f(x).ok_or_else(|| error(x)))?,
+    Ok(if cast_options.safe {
+        array.unary_opt(|x| f(x).filter(|v| O::is_valid_decimal_precision(*v, output_precision)))
+    } else {
+        array.try_unary(|x| {
+            f(x).ok_or_else(|| error(x))
+                .and_then(|v| O::validate_decimal_precision(v, output_precision).map(|_| v))
+        })?
     })
 }
 
@@ -137,15 +141,20 @@ where
 
     let f = |x| O::Native::from_decimal(x).and_then(|x| x.mul_checked(mul).ok());
 
-    Ok(match cast_options.safe {
-        true => array.unary_opt(f),
-        false => array.try_unary(|x| f(x).ok_or_else(|| error(x)))?,
+    Ok(if cast_options.safe {
+        array.unary_opt(|x| f(x).filter(|v| O::is_valid_decimal_precision(*v, output_precision)))
+    } else {
+        array.try_unary(|x| {
+            f(x).ok_or_else(|| error(x))
+                .and_then(|v| O::validate_decimal_precision(v, output_precision).map(|_| v))
+        })?
     })
 }
 
 // Only support one type of decimal cast operations
 pub(crate) fn cast_decimal_to_decimal_same_type<T>(
     array: &PrimitiveArray<T>,
+    input_precision: u8,
     input_scale: i8,
     output_precision: u8,
     output_scale: i8,
@@ -155,20 +164,11 @@ where
     T: DecimalType,
     T::Native: DecimalCast + ArrowNativeTypeOp,
 {
-    let array: PrimitiveArray<T> = match input_scale.cmp(&output_scale) {
-        Ordering::Equal => {
-            // the scale doesn't change, the native value don't need to be changed
+    let array: PrimitiveArray<T> =
+        if input_scale == output_scale && input_precision <= output_precision {
             array.clone()
-        }
-        Ordering::Greater => convert_to_smaller_scale_decimal::<T, T>(
-            array,
-            input_scale,
-            output_precision,
-            output_scale,
-            cast_options,
-        )?,
-        Ordering::Less => {
-            // input_scale < output_scale
+        } else if input_scale < output_scale {
+            // the scale doesn't change, but precision may change and cause overflow
             convert_to_bigger_or_equal_scale_decimal::<T, T>(
                 array,
                 input_scale,
@@ -176,8 +176,15 @@ where
                 output_scale,
                 cast_options,
             )?
-        }
-    };
+        } else {
+            convert_to_smaller_scale_decimal::<T, T>(
+                array,
+                input_scale,
+                output_precision,
+                output_scale,
+                cast_options,
+            )?
+        };
 
     Ok(Arc::new(array.with_precision_and_scale(
         output_precision,
@@ -323,8 +330,8 @@ where
     })
 }
 
-pub(crate) fn string_to_decimal_cast<T, Offset: OffsetSizeTrait>(
-    from: &GenericStringArray<Offset>,
+pub(crate) fn generic_string_to_decimal_cast<'a, T, S>(
+    from: &'a S,
     precision: u8,
     scale: i8,
     cast_options: &CastOptions,
@@ -332,6 +339,7 @@ pub(crate) fn string_to_decimal_cast<T, Offset: OffsetSizeTrait>(
 where
     T: DecimalType,
     T::Native: DecimalCast + ArrowNativeTypeOp,
+    &'a S: StringArrayType<'a>,
 {
     if cast_options.safe {
         let iter = from.iter().map(|v| {
@@ -375,6 +383,37 @@ where
     }
 }
 
+pub(crate) fn string_to_decimal_cast<T, Offset: OffsetSizeTrait>(
+    from: &GenericStringArray<Offset>,
+    precision: u8,
+    scale: i8,
+    cast_options: &CastOptions,
+) -> Result<PrimitiveArray<T>, ArrowError>
+where
+    T: DecimalType,
+    T::Native: DecimalCast + ArrowNativeTypeOp,
+{
+    generic_string_to_decimal_cast::<T, GenericStringArray<Offset>>(
+        from,
+        precision,
+        scale,
+        cast_options,
+    )
+}
+
+pub(crate) fn string_view_to_decimal_cast<T>(
+    from: &StringViewArray,
+    precision: u8,
+    scale: i8,
+    cast_options: &CastOptions,
+) -> Result<PrimitiveArray<T>, ArrowError>
+where
+    T: DecimalType,
+    T::Native: DecimalCast + ArrowNativeTypeOp,
+{
+    generic_string_to_decimal_cast::<T, StringViewArray>(from, precision, scale, cast_options)
+}
+
 /// Cast Utf8 to decimal
 pub(crate) fn cast_string_to_decimal<T, Offset: OffsetSizeTrait>(
     from: &dyn Array,
@@ -399,14 +438,30 @@ where
         )));
     }
 
-    Ok(Arc::new(string_to_decimal_cast::<T, Offset>(
-        from.as_any()
-            .downcast_ref::<GenericStringArray<Offset>>()
-            .unwrap(),
-        precision,
-        scale,
-        cast_options,
-    )?))
+    let result = match from.data_type() {
+        DataType::Utf8View => string_view_to_decimal_cast::<T>(
+            from.as_any().downcast_ref::<StringViewArray>().unwrap(),
+            precision,
+            scale,
+            cast_options,
+        )?,
+        DataType::Utf8 | DataType::LargeUtf8 => string_to_decimal_cast::<T, Offset>(
+            from.as_any()
+                .downcast_ref::<GenericStringArray<Offset>>()
+                .unwrap(),
+            precision,
+            scale,
+            cast_options,
+        )?,
+        other => {
+            return Err(ArrowError::ComputeError(format!(
+                "Cannot cast {:?} to decimal",
+                other
+            )))
+        }
+    };
+
+    Ok(Arc::new(result))
 }
 
 pub(crate) fn cast_floating_point_to_decimal128<T: ArrowPrimitiveType>(
diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index f7059be170f4..ba470635c6cd 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -182,10 +182,10 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
         (Decimal128(_, _) | Decimal256(_, _), UInt8 | UInt16 | UInt32 | UInt64) |
         // decimal to signed numeric
         (Decimal128(_, _) | Decimal256(_, _), Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64) => true,
-        // decimal to Utf8
-        (Decimal128(_, _) | Decimal256(_, _), Utf8 | LargeUtf8) => true,
-        // Utf8 to decimal
-        (Utf8 | LargeUtf8, Decimal128(_, _) | Decimal256(_, _)) => true,
+        // decimal to string
+        (Decimal128(_, _) | Decimal256(_, _), Utf8View | Utf8 | LargeUtf8) => true,
+        // string to decimal
+        (Utf8View | Utf8 | LargeUtf8, Decimal128(_, _) | Decimal256(_, _)) => true,
         (Struct(from_fields), Struct(to_fields)) => {
             from_fields.len() == to_fields.len() &&
                 from_fields.iter().zip(to_fields.iter()).all(|(f1, f2)| {
@@ -197,13 +197,18 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
         (Struct(_), _) => false,
         (_, Struct(_)) => false,
         (_, Boolean) => {
-            DataType::is_integer(from_type) ||
-                DataType::is_floating(from_type)
+            DataType::is_integer(from_type)
+                || DataType::is_floating(from_type)
+                || from_type == &Utf8View
                 || from_type == &Utf8
                 || from_type == &LargeUtf8
         }
         (Boolean, _) => {
-            DataType::is_integer(to_type) || DataType::is_floating(to_type) || to_type == &Utf8 || to_type == &LargeUtf8
+            DataType::is_integer(to_type)
+                || DataType::is_floating(to_type)
+                || to_type == &Utf8View
+                || to_type == &Utf8
+                || to_type == &LargeUtf8
         }
 
         (Binary, LargeBinary | Utf8 | LargeUtf8 | FixedSizeBinary(_) | BinaryView | Utf8View ) => true,
@@ -230,8 +235,9 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
         ) => true,
         (Utf8 | LargeUtf8, Utf8View) => true,
         (BinaryView, Binary | LargeBinary | Utf8 | LargeUtf8 | Utf8View ) => true,
-        (Utf8 | LargeUtf8, _) => to_type.is_numeric() && to_type != &Float16,
+        (Utf8View | Utf8 | LargeUtf8, _) => to_type.is_numeric() && to_type != &Float16,
         (_, Utf8 | LargeUtf8) => from_type.is_primitive(),
+        (_, Utf8View) => from_type.is_numeric(),
 
         (_, Binary | LargeBinary) => from_type.is_integer(),
 
@@ -824,18 +830,20 @@ pub fn cast_with_options(
         (Map(_, ordered1), Map(_, ordered2)) if ordered1 == ordered2 => {
             cast_map_values(array.as_map(), to_type, cast_options, ordered1.to_owned())
         }
-        (Decimal128(_, s1), Decimal128(p2, s2)) => {
+        (Decimal128(p1, s1), Decimal128(p2, s2)) => {
             cast_decimal_to_decimal_same_type::<Decimal128Type>(
                 array.as_primitive(),
+                *p1,
                 *s1,
                 *p2,
                 *s2,
                 cast_options,
             )
         }
-        (Decimal256(_, s1), Decimal256(p2, s2)) => {
+        (Decimal256(p1, s1), Decimal256(p2, s2)) => {
             cast_decimal_to_decimal_same_type::<Decimal256Type>(
                 array.as_primitive(),
+                *p1,
                 *s1,
                 *p2,
                 *s2,
@@ -917,6 +925,7 @@ pub fn cast_with_options(
                 Float64 => cast_decimal_to_float::<Decimal128Type, Float64Type, _>(array, |x| {
                     x as f64 / 10_f64.powi(*scale as i32)
                 }),
+                Utf8View => value_to_string_view(array, cast_options),
                 Utf8 => value_to_string::<i32>(array, cast_options),
                 LargeUtf8 => value_to_string::<i64>(array, cast_options),
                 Null => Ok(new_null_array(to_type, array.len())),
@@ -982,6 +991,7 @@ pub fn cast_with_options(
                 Float64 => cast_decimal_to_float::<Decimal256Type, Float64Type, _>(array, |x| {
                     x.to_f64().unwrap() / 10_f64.powi(*scale as i32)
                 }),
+                Utf8View => value_to_string_view(array, cast_options),
                 Utf8 => value_to_string::<i32>(array, cast_options),
                 LargeUtf8 => value_to_string::<i64>(array, cast_options),
                 Null => Ok(new_null_array(to_type, array.len())),
@@ -1061,7 +1071,7 @@ pub fn cast_with_options(
                     *scale,
                     cast_options,
                 ),
-                Utf8 => cast_string_to_decimal::<Decimal128Type, i32>(
+                Utf8View | Utf8 => cast_string_to_decimal::<Decimal128Type, i32>(
                     array,
                     *precision,
                     *scale,
@@ -1150,7 +1160,7 @@ pub fn cast_with_options(
                     *scale,
                     cast_options,
                 ),
-                Utf8 => cast_string_to_decimal::<Decimal256Type, i32>(
+                Utf8View | Utf8 => cast_string_to_decimal::<Decimal256Type, i32>(
                     array,
                     *precision,
                     *scale,
@@ -1197,6 +1207,7 @@ pub fn cast_with_options(
             Float16 => cast_numeric_to_bool::<Float16Type>(array),
             Float32 => cast_numeric_to_bool::<Float32Type>(array),
             Float64 => cast_numeric_to_bool::<Float64Type>(array),
+            Utf8View => cast_utf8view_to_boolean(array, cast_options),
             Utf8 => cast_utf8_to_boolean::<i32>(array, cast_options),
             LargeUtf8 => cast_utf8_to_boolean::<i64>(array, cast_options),
             _ => Err(ArrowError::CastError(format!(
@@ -1215,6 +1226,7 @@ pub fn cast_with_options(
             Float16 => cast_bool_to_numeric::<Float16Type>(array, cast_options),
             Float32 => cast_bool_to_numeric::<Float32Type>(array, cast_options),
             Float64 => cast_bool_to_numeric::<Float64Type>(array, cast_options),
+            Utf8View => value_to_string_view(array, cast_options),
             Utf8 => value_to_string::<i32>(array, cast_options),
             LargeUtf8 => value_to_string::<i64>(array, cast_options),
             _ => Err(ArrowError::CastError(format!(
@@ -1462,6 +1474,9 @@ pub fn cast_with_options(
         (BinaryView, _) => Err(ArrowError::CastError(format!(
             "Casting from {from_type:?} to {to_type:?} not supported",
         ))),
+        (from_type, Utf8View) if from_type.is_primitive() => {
+            value_to_string_view(array, cast_options)
+        }
         (from_type, LargeUtf8) if from_type.is_primitive() => {
             value_to_string::<i64>(array, cast_options)
         }
@@ -2485,12 +2500,11 @@ where
 
 #[cfg(test)]
 mod tests {
+    use super::*;
     use arrow_buffer::{Buffer, IntervalDayTime, NullBuffer};
     use chrono::NaiveDate;
     use half::f16;
 
-    use super::*;
-
     macro_rules! generate_cast_test_case {
         ($INPUT_ARRAY: expr, $OUTPUT_TYPE_ARRAY: ident, $OUTPUT_TYPE: expr, $OUTPUT_VALUES: expr) => {
             let output =
@@ -2682,13 +2696,16 @@ mod tests {
         // negative test
         let array = vec![Some(123456), None];
         let array = create_decimal_array(array, 10, 0).unwrap();
-        let result = cast(&array, &DataType::Decimal128(2, 2));
-        assert!(result.is_ok());
-        let array = result.unwrap();
-        let array: &Decimal128Array = array.as_primitive();
-        let err = array.validate_decimal_precision(2);
+        let result_safe = cast(&array, &DataType::Decimal128(2, 2));
+        assert!(result_safe.is_ok());
+        let options = CastOptions {
+            safe: false,
+            ..Default::default()
+        };
+
+        let result_unsafe = cast_with_options(&array, &DataType::Decimal128(2, 2), &options);
         assert_eq!("Invalid argument error: 12345600 is too large to store in a Decimal128 of precision 2. Max is 99",
-                   err.unwrap_err().to_string());
+                   result_unsafe.unwrap_err().to_string());
     }
 
     #[test]
@@ -3637,7 +3654,7 @@ mod tests {
         let array = Int32Array::from(vec![5, 6, 7, 8, 9]);
         let b = cast(
             &array,
-            &DataType::List(Arc::new(Field::new("item", DataType::Int32, true))),
+            &DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
         )
         .unwrap();
         assert_eq!(5, b.len());
@@ -3661,7 +3678,7 @@ mod tests {
         let array = Int32Array::from(vec![Some(5), None, Some(7), Some(8), Some(9)]);
         let b = cast(
             &array,
-            &DataType::List(Arc::new(Field::new("item", DataType::Int32, true))),
+            &DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
         )
         .unwrap();
         assert_eq!(5, b.len());
@@ -3689,7 +3706,7 @@ mod tests {
         let array = array.slice(2, 4);
         let b = cast(
             &array,
-            &DataType::List(Arc::new(Field::new("item", DataType::Float64, true))),
+            &DataType::List(Arc::new(Field::new_list_field(DataType::Float64, true))),
         )
         .unwrap();
         assert_eq!(4, b.len());
@@ -3708,6 +3725,55 @@ mod tests {
         assert_eq!(10.0, c.value(3));
     }
 
+    #[test]
+    fn test_cast_int_to_utf8view() {
+        let inputs = vec![
+            Arc::new(Int8Array::from(vec![None, Some(8), Some(9), Some(10)])) as ArrayRef,
+            Arc::new(Int16Array::from(vec![None, Some(8), Some(9), Some(10)])) as ArrayRef,
+            Arc::new(Int32Array::from(vec![None, Some(8), Some(9), Some(10)])) as ArrayRef,
+            Arc::new(Int64Array::from(vec![None, Some(8), Some(9), Some(10)])) as ArrayRef,
+            Arc::new(UInt8Array::from(vec![None, Some(8), Some(9), Some(10)])) as ArrayRef,
+            Arc::new(UInt16Array::from(vec![None, Some(8), Some(9), Some(10)])) as ArrayRef,
+            Arc::new(UInt32Array::from(vec![None, Some(8), Some(9), Some(10)])) as ArrayRef,
+            Arc::new(UInt64Array::from(vec![None, Some(8), Some(9), Some(10)])) as ArrayRef,
+        ];
+        let expected: ArrayRef = Arc::new(StringViewArray::from(vec![
+            None,
+            Some("8"),
+            Some("9"),
+            Some("10"),
+        ]));
+
+        for array in inputs {
+            assert!(can_cast_types(array.data_type(), &DataType::Utf8View));
+            let arr = cast(&array, &DataType::Utf8View).unwrap();
+            assert_eq!(expected.as_ref(), arr.as_ref());
+        }
+    }
+
+    #[test]
+    fn test_cast_float_to_utf8view() {
+        let inputs = vec![
+            Arc::new(Float16Array::from(vec![
+                Some(f16::from_f64(1.5)),
+                Some(f16::from_f64(2.5)),
+                None,
+            ])) as ArrayRef,
+            Arc::new(Float32Array::from(vec![Some(1.5), Some(2.5), None])) as ArrayRef,
+            Arc::new(Float64Array::from(vec![Some(1.5), Some(2.5), None])) as ArrayRef,
+        ];
+
+        let expected: ArrayRef =
+            Arc::new(StringViewArray::from(vec![Some("1.5"), Some("2.5"), None]));
+
+        for array in inputs {
+            println!("type: {}", array.data_type());
+            assert!(can_cast_types(array.data_type(), &DataType::Utf8View));
+            let arr = cast(&array, &DataType::Utf8View).unwrap();
+            assert_eq!(expected.as_ref(), arr.as_ref());
+        }
+    }
+
     #[test]
     fn test_cast_utf8_to_i32() {
         let array = StringArray::from(vec!["5", "6", "seven", "8", "9.1"]);
@@ -3720,6 +3786,41 @@ mod tests {
         assert!(!c.is_valid(4));
     }
 
+    #[test]
+    fn test_cast_utf8view_to_i32() {
+        let array = StringViewArray::from(vec!["5", "6", "seven", "8", "9.1"]);
+        let b = cast(&array, &DataType::Int32).unwrap();
+        let c = b.as_primitive::<Int32Type>();
+        assert_eq!(5, c.value(0));
+        assert_eq!(6, c.value(1));
+        assert!(!c.is_valid(2));
+        assert_eq!(8, c.value(3));
+        assert!(!c.is_valid(4));
+    }
+
+    #[test]
+    fn test_cast_utf8view_to_f32() {
+        let array = StringViewArray::from(vec!["3", "4.56", "seven", "8.9"]);
+        let b = cast(&array, &DataType::Float32).unwrap();
+        let c = b.as_primitive::<Float32Type>();
+        assert_eq!(3.0, c.value(0));
+        assert_eq!(4.56, c.value(1));
+        assert!(!c.is_valid(2));
+        assert_eq!(8.9, c.value(3));
+    }
+
+    #[test]
+    fn test_cast_utf8view_to_decimal128() {
+        let array = StringViewArray::from(vec![None, Some("4"), Some("5.6"), Some("7.89")]);
+        let arr = Arc::new(array) as ArrayRef;
+        generate_cast_test_case!(
+            &arr,
+            Decimal128Array,
+            &DataType::Decimal128(4, 2),
+            vec![None, Some(400_i128), Some(560_i128), Some(789_i128)]
+        );
+    }
+
     #[test]
     fn test_cast_with_options_utf8_to_i32() {
         let array = StringArray::from(vec!["5", "6", "seven", "8", "9.1"]);
@@ -3751,6 +3852,14 @@ mod tests {
         assert_eq!(*as_boolean_array(&casted), expected);
     }
 
+    #[test]
+    fn test_cast_utf8view_to_bool() {
+        let strings = StringViewArray::from(vec!["true", "false", "invalid", " Y ", ""]);
+        let casted = cast(&strings, &DataType::Boolean).unwrap();
+        let expected = BooleanArray::from(vec![Some(true), Some(false), None, Some(true), None]);
+        assert_eq!(*as_boolean_array(&casted), expected);
+    }
+
     #[test]
     fn test_cast_with_options_utf8_to_bool() {
         let strings = StringArray::from(vec!["true", "false", "invalid", " Y ", ""]);
@@ -3782,6 +3891,16 @@ mod tests {
         assert!(!c.is_valid(2));
     }
 
+    #[test]
+    fn test_cast_bool_to_utf8view() {
+        let array = BooleanArray::from(vec![Some(true), Some(false), None]);
+        let b = cast(&array, &DataType::Utf8View).unwrap();
+        let c = b.as_any().downcast_ref::<StringViewArray>().unwrap();
+        assert_eq!("true", c.value(0));
+        assert_eq!("false", c.value(1));
+        assert!(!c.is_valid(2));
+    }
+
     #[test]
     fn test_cast_bool_to_utf8() {
         let array = BooleanArray::from(vec![Some(true), Some(false), None]);
@@ -3975,7 +4094,7 @@ mod tests {
 
         // Construct a list array from the above two
         // [[0,0,0], [-1, -2, -1], [2, 100000000]]
-        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
+        let list_data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
         let list_data = ArrayData::builder(list_data_type)
             .len(3)
             .add_buffer(value_offsets)
@@ -3986,7 +4105,7 @@ mod tests {
 
         let cast_array = cast(
             &list_array,
-            &DataType::List(Arc::new(Field::new("item", DataType::UInt16, true))),
+            &DataType::List(Arc::new(Field::new_list_field(DataType::UInt16, true))),
         )
         .unwrap();
 
@@ -4026,7 +4145,7 @@ mod tests {
         let value_offsets = Buffer::from_slice_ref([0, 3, 6, 9]);
 
         // Construct a list array from the above two
-        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
+        let list_data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
         let list_data = ArrayData::builder(list_data_type)
             .len(3)
             .add_buffer(value_offsets)
@@ -4037,8 +4156,7 @@ mod tests {
 
         let actual = cast(
             &list_array,
-            &DataType::List(Arc::new(Field::new(
-                "item",
+            &DataType::List(Arc::new(Field::new_list_field(
                 DataType::Timestamp(TimeUnit::Microsecond, None),
                 true,
             ))),
@@ -4048,11 +4166,10 @@ mod tests {
         let expected = cast(
             &cast(
                 &list_array,
-                &DataType::List(Arc::new(Field::new("item", DataType::Int64, true))),
+                &DataType::List(Arc::new(Field::new_list_field(DataType::Int64, true))),
             )
             .unwrap(),
-            &DataType::List(Arc::new(Field::new(
-                "item",
+            &DataType::List(Arc::new(Field::new_list_field(
                 DataType::Timestamp(TimeUnit::Microsecond, None),
                 true,
             ))),
@@ -5146,41 +5263,43 @@ mod tests {
         assert_eq!("2018-12-25T00:00:00", c.value(1));
     }
 
+    macro_rules! assert_cast_timestamp_to_string {
+        ($array:expr, $datatype:expr, $output_array_type: ty, $expected:expr) => {{
+            let out = cast(&$array, &$datatype).unwrap();
+            let actual = out
+                .as_any()
+                .downcast_ref::<$output_array_type>()
+                .unwrap()
+                .into_iter()
+                .collect::<Vec<_>>();
+            assert_eq!(actual, $expected);
+        }};
+        ($array:expr, $datatype:expr, $output_array_type: ty, $options:expr, $expected:expr) => {{
+            let out = cast_with_options(&$array, &$datatype, &$options).unwrap();
+            let actual = out
+                .as_any()
+                .downcast_ref::<$output_array_type>()
+                .unwrap()
+                .into_iter()
+                .collect::<Vec<_>>();
+            assert_eq!(actual, $expected);
+        }};
+    }
+
     #[test]
     fn test_cast_timestamp_to_strings() {
         // "2018-12-25T00:00:02.001", "1997-05-19T00:00:03.005", None
         let array =
             TimestampMillisecondArray::from(vec![Some(864000003005), Some(1545696002001), None]);
-        let out = cast(&array, &DataType::Utf8).unwrap();
-        let out = out
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .unwrap()
-            .into_iter()
-            .collect::<Vec<_>>();
-        assert_eq!(
-            out,
-            vec![
-                Some("1997-05-19T00:00:03.005"),
-                Some("2018-12-25T00:00:02.001"),
-                None
-            ]
-        );
-        let out = cast(&array, &DataType::LargeUtf8).unwrap();
-        let out = out
-            .as_any()
-            .downcast_ref::<LargeStringArray>()
-            .unwrap()
-            .into_iter()
-            .collect::<Vec<_>>();
-        assert_eq!(
-            out,
-            vec![
-                Some("1997-05-19T00:00:03.005"),
-                Some("2018-12-25T00:00:02.001"),
-                None
-            ]
-        );
+        let expected = vec![
+            Some("1997-05-19T00:00:03.005"),
+            Some("2018-12-25T00:00:02.001"),
+            None,
+        ];
+
+        assert_cast_timestamp_to_string!(array, DataType::Utf8View, StringViewArray, expected);
+        assert_cast_timestamp_to_string!(array, DataType::Utf8, StringArray, expected);
+        assert_cast_timestamp_to_string!(array, DataType::LargeUtf8, LargeStringArray, expected);
     }
 
     #[test]
@@ -5193,73 +5312,65 @@ mod tests {
                 .with_timestamp_format(Some(ts_format))
                 .with_timestamp_tz_format(Some(ts_format)),
         };
+
         // "2018-12-25T00:00:02.001", "1997-05-19T00:00:03.005", None
         let array_without_tz =
             TimestampMillisecondArray::from(vec![Some(864000003005), Some(1545696002001), None]);
-        let out = cast_with_options(&array_without_tz, &DataType::Utf8, &cast_options).unwrap();
-        let out = out
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .unwrap()
-            .into_iter()
-            .collect::<Vec<_>>();
-        assert_eq!(
-            out,
-            vec![
-                Some("1997-05-19 00:00:03.005000"),
-                Some("2018-12-25 00:00:02.001000"),
-                None
-            ]
+        let expected = vec![
+            Some("1997-05-19 00:00:03.005000"),
+            Some("2018-12-25 00:00:02.001000"),
+            None,
+        ];
+        assert_cast_timestamp_to_string!(
+            array_without_tz,
+            DataType::Utf8View,
+            StringViewArray,
+            cast_options,
+            expected
         );
-        let out =
-            cast_with_options(&array_without_tz, &DataType::LargeUtf8, &cast_options).unwrap();
-        let out = out
-            .as_any()
-            .downcast_ref::<LargeStringArray>()
-            .unwrap()
-            .into_iter()
-            .collect::<Vec<_>>();
-        assert_eq!(
-            out,
-            vec![
-                Some("1997-05-19 00:00:03.005000"),
-                Some("2018-12-25 00:00:02.001000"),
-                None
-            ]
+        assert_cast_timestamp_to_string!(
+            array_without_tz,
+            DataType::Utf8,
+            StringArray,
+            cast_options,
+            expected
+        );
+        assert_cast_timestamp_to_string!(
+            array_without_tz,
+            DataType::LargeUtf8,
+            LargeStringArray,
+            cast_options,
+            expected
         );
 
         let array_with_tz =
             TimestampMillisecondArray::from(vec![Some(864000003005), Some(1545696002001), None])
                 .with_timezone(tz.to_string());
-        let out = cast_with_options(&array_with_tz, &DataType::Utf8, &cast_options).unwrap();
-        let out = out
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .unwrap()
-            .into_iter()
-            .collect::<Vec<_>>();
-        assert_eq!(
-            out,
-            vec![
-                Some("1997-05-19 05:45:03.005000"),
-                Some("2018-12-25 05:45:02.001000"),
-                None
-            ]
+        let expected = vec![
+            Some("1997-05-19 05:45:03.005000"),
+            Some("2018-12-25 05:45:02.001000"),
+            None,
+        ];
+        assert_cast_timestamp_to_string!(
+            array_with_tz,
+            DataType::Utf8View,
+            StringViewArray,
+            cast_options,
+            expected
         );
-        let out = cast_with_options(&array_with_tz, &DataType::LargeUtf8, &cast_options).unwrap();
-        let out = out
-            .as_any()
-            .downcast_ref::<LargeStringArray>()
-            .unwrap()
-            .into_iter()
-            .collect::<Vec<_>>();
-        assert_eq!(
-            out,
-            vec![
-                Some("1997-05-19 05:45:03.005000"),
-                Some("2018-12-25 05:45:02.001000"),
-                None
-            ]
+        assert_cast_timestamp_to_string!(
+            array_with_tz,
+            DataType::Utf8,
+            StringArray,
+            cast_options,
+            expected
+        );
+        assert_cast_timestamp_to_string!(
+            array_with_tz,
+            DataType::LargeUtf8,
+            LargeStringArray,
+            cast_options,
+            expected
         );
     }
 
@@ -7085,12 +7196,12 @@ mod tests {
         cast_from_null_to_other(&data_type);
 
         // Cast null from and to list
-        let data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
+        let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
         cast_from_null_to_other(&data_type);
-        let data_type = DataType::LargeList(Arc::new(Field::new("item", DataType::Int32, true)));
+        let data_type = DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int32, true)));
         cast_from_null_to_other(&data_type);
         let data_type =
-            DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, true)), 4);
+            DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, true)), 4);
         cast_from_null_to_other(&data_type);
 
         // Cast null from and to dictionary
@@ -7207,11 +7318,11 @@ mod tests {
         assert_eq!(actual.data_type(), to_array.data_type());
 
         let invalid_target =
-            DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Binary, true)), 2);
+            DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Binary, true)), 2);
         assert!(!can_cast_types(from_array.data_type(), &invalid_target));
 
         let invalid_size =
-            DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float16, true)), 5);
+            DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Float16, true)), 5);
         assert!(!can_cast_types(from_array.data_type(), &invalid_size));
     }
 
@@ -7364,7 +7475,7 @@ mod tests {
             [(Some([Some(5)]))],
             1,
         )) as ArrayRef;
-        let to_field_inner = Arc::new(Field::new("item", DataType::Float32, false));
+        let to_field_inner = Arc::new(Field::new_list_field(DataType::Float32, false));
         let to_field = Arc::new(Field::new(
             "dummy",
             DataType::FixedSizeList(to_field_inner.clone(), 1),
@@ -7454,7 +7565,7 @@ mod tests {
         // 4. Nulls that are correctly sized (same as target list size)
 
         // Non-null case
-        let field = Arc::new(Field::new("item", DataType::Int32, true));
+        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
         let values = vec![
             Some(vec![Some(1), Some(2), Some(3)]),
             Some(vec![Some(4), Some(5), Some(6)]),
@@ -7530,7 +7641,7 @@ mod tests {
 
         let res = cast_with_options(
             array.as_ref(),
-            &DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, true)), 3),
+            &DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, true)), 3),
             &CastOptions {
                 safe: false,
                 ..Default::default()
@@ -7544,7 +7655,7 @@ mod tests {
         // too short and truncate lists that are too long.
         let res = cast(
             array.as_ref(),
-            &DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, true)), 3),
+            &DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, true)), 3),
         )
         .unwrap();
         let expected = Arc::new(FixedSizeListArray::from_iter_primitive::<Int32Type, _, _>(
@@ -7566,7 +7677,7 @@ mod tests {
         ])) as ArrayRef;
         let res = cast_with_options(
             array.as_ref(),
-            &DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, true)), 3),
+            &DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, true)), 3),
             &CastOptions {
                 safe: false,
                 ..Default::default()
@@ -7591,7 +7702,7 @@ mod tests {
         )) as ArrayRef;
         let actual = cast(
             array.as_ref(),
-            &DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, true)), 2),
+            &DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, true)), 2),
         )
         .unwrap();
         assert_eq!(expected.as_ref(), actual.as_ref());
@@ -7614,14 +7725,14 @@ mod tests {
         )) as ArrayRef;
         let actual = cast(
             array.as_ref(),
-            &DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int64, true)), 2),
+            &DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int64, true)), 2),
         )
         .unwrap();
         assert_eq!(expected.as_ref(), actual.as_ref());
 
         let res = cast_with_options(
             array.as_ref(),
-            &DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int16, true)), 2),
+            &DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int16, true)), 2),
             &CastOptions {
                 safe: false,
                 ..Default::default()
@@ -7633,7 +7744,7 @@ mod tests {
 
     #[test]
     fn test_cast_list_to_fsl_empty() {
-        let field = Arc::new(Field::new("item", DataType::Int32, true));
+        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
         let array = new_empty_array(&DataType::List(field.clone()));
 
         let target_type = DataType::FixedSizeList(field.clone(), 3);
@@ -7656,7 +7767,7 @@ mod tests {
         let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
 
         // Construct a list array from the above two
-        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
+        let list_data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
         let list_data = ArrayData::builder(list_data_type)
             .len(3)
             .add_buffer(value_offsets)
@@ -7680,7 +7791,7 @@ mod tests {
 
         // Construct a list array from the above two
         let list_data_type =
-            DataType::LargeList(Arc::new(Field::new("item", DataType::Int32, true)));
+            DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int32, true)));
         let list_data = ArrayData::builder(list_data_type)
             .len(3)
             .add_buffer(value_offsets)
@@ -7699,7 +7810,7 @@ mod tests {
             .unwrap();
 
         let list_data_type =
-            DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, true)), 4);
+            DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, true)), 4);
         let list_data = ArrayData::builder(list_data_type)
             .len(2)
             .add_child_data(value_data)
@@ -7717,7 +7828,7 @@ mod tests {
             .unwrap();
 
         let list_data_type =
-            DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int64, true)), 4);
+            DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int64, true)), 4);
         let list_data = ArrayData::builder(list_data_type)
             .len(2)
             .add_child_data(value_data)
@@ -7979,7 +8090,7 @@ mod tests {
         let array1 = make_list_array().slice(1, 2);
         let array2 = Arc::new(make_list_array()) as ArrayRef;
 
-        let dt = DataType::LargeList(Arc::new(Field::new("item", DataType::Int32, true)));
+        let dt = DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int32, true)));
         let out1 = cast(&array1, &dt).unwrap();
         let out2 = cast(&array2, &dt).unwrap();
 
@@ -7992,7 +8103,7 @@ mod tests {
         let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
         let value_data = str_array.into_data();
 
-        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Utf8, true)));
+        let list_data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Utf8, true)));
         let list_data = ArrayData::builder(list_data_type)
             .len(3)
             .add_buffer(value_offsets)
@@ -8354,7 +8465,7 @@ mod tests {
         let input_type = DataType::Decimal128(10, 3);
         let output_type = DataType::Decimal256(10, 5);
         assert!(can_cast_types(&input_type, &output_type));
-        let array = vec![Some(i128::MAX), Some(i128::MIN)];
+        let array = vec![Some(123456), Some(-123456)];
         let input_decimal_array = create_decimal_array(array, 10, 3).unwrap();
         let array = Arc::new(input_decimal_array) as ArrayRef;
 
@@ -8364,8 +8475,8 @@ mod tests {
             Decimal256Array,
             &output_type,
             vec![
-                Some(i256::from_i128(i128::MAX).mul_wrapping(hundred)),
-                Some(i256::from_i128(i128::MIN).mul_wrapping(hundred))
+                Some(i256::from_i128(123456).mul_wrapping(hundred)),
+                Some(i256::from_i128(-123456).mul_wrapping(hundred))
             ]
         );
     }
@@ -9114,7 +9225,31 @@ mod tests {
     }
 
     #[test]
-    fn test_cast_decimal_to_utf8() {
+    fn test_cast_decimal_to_string() {
+        assert!(can_cast_types(
+            &DataType::Decimal128(10, 4),
+            &DataType::Utf8View
+        ));
+        assert!(can_cast_types(
+            &DataType::Decimal256(38, 10),
+            &DataType::Utf8View
+        ));
+
+        macro_rules! assert_decimal_values {
+            ($array:expr) => {
+                let c = $array;
+                assert_eq!("1123.454", c.value(0));
+                assert_eq!("2123.456", c.value(1));
+                assert_eq!("-3123.453", c.value(2));
+                assert_eq!("-3123.456", c.value(3));
+                assert_eq!("0.000", c.value(4));
+                assert_eq!("0.123", c.value(5));
+                assert_eq!("1234.567", c.value(6));
+                assert_eq!("-1234.567", c.value(7));
+                assert!(c.is_null(8));
+            };
+        }
+
         fn test_decimal_to_string<IN: ArrowPrimitiveType, OffsetSize: OffsetSizeTrait>(
             output_type: DataType,
             array: PrimitiveArray<IN>,
@@ -9122,18 +9257,19 @@ mod tests {
             let b = cast(&array, &output_type).unwrap();
 
             assert_eq!(b.data_type(), &output_type);
-            let c = b.as_string::<OffsetSize>();
-
-            assert_eq!("1123.454", c.value(0));
-            assert_eq!("2123.456", c.value(1));
-            assert_eq!("-3123.453", c.value(2));
-            assert_eq!("-3123.456", c.value(3));
-            assert_eq!("0.000", c.value(4));
-            assert_eq!("0.123", c.value(5));
-            assert_eq!("1234.567", c.value(6));
-            assert_eq!("-1234.567", c.value(7));
-            assert!(c.is_null(8));
+            match b.data_type() {
+                DataType::Utf8View => {
+                    let c = b.as_string_view();
+                    assert_decimal_values!(c);
+                }
+                DataType::Utf8 | DataType::LargeUtf8 => {
+                    let c = b.as_string::<OffsetSize>();
+                    assert_decimal_values!(c);
+                }
+                _ => (),
+            }
         }
+
         let array128: Vec<Option<i128>> = vec![
             Some(1123454),
             Some(2123456),
@@ -9145,22 +9281,33 @@ mod tests {
             Some(-123456789),
             None,
         ];
+        let array256: Vec<Option<i256>> = array128
+            .iter()
+            .map(|num| num.map(i256::from_i128))
+            .collect();
 
-        let array256: Vec<Option<i256>> = array128.iter().map(|v| v.map(i256::from_i128)).collect();
-
-        test_decimal_to_string::<arrow_array::types::Decimal128Type, i32>(
+        test_decimal_to_string::<Decimal128Type, i32>(
+            DataType::Utf8View,
+            create_decimal_array(array128.clone(), 7, 3).unwrap(),
+        );
+        test_decimal_to_string::<Decimal128Type, i32>(
             DataType::Utf8,
             create_decimal_array(array128.clone(), 7, 3).unwrap(),
         );
-        test_decimal_to_string::<arrow_array::types::Decimal128Type, i64>(
+        test_decimal_to_string::<Decimal128Type, i64>(
             DataType::LargeUtf8,
             create_decimal_array(array128, 7, 3).unwrap(),
         );
-        test_decimal_to_string::<arrow_array::types::Decimal256Type, i32>(
+
+        test_decimal_to_string::<Decimal256Type, i32>(
+            DataType::Utf8View,
+            create_decimal256_array(array256.clone(), 7, 3).unwrap(),
+        );
+        test_decimal_to_string::<Decimal256Type, i32>(
             DataType::Utf8,
             create_decimal256_array(array256.clone(), 7, 3).unwrap(),
         );
-        test_decimal_to_string::<arrow_array::types::Decimal256Type, i64>(
+        test_decimal_to_string::<Decimal256Type, i64>(
             DataType::LargeUtf8,
             create_decimal256_array(array256, 7, 3).unwrap(),
         );
@@ -9793,4 +9940,76 @@ mod tests {
             "Cast non-nullable to non-nullable struct field returning null should fail",
         );
     }
+
+    #[test]
+    fn test_decimal_to_decimal_throw_error_on_precision_overflow_same_scale() {
+        let array = vec![Some(123456789)];
+        let array = create_decimal_array(array, 24, 2).unwrap();
+        println!("{:?}", array);
+        let input_type = DataType::Decimal128(24, 2);
+        let output_type = DataType::Decimal128(6, 2);
+        assert!(can_cast_types(&input_type, &output_type));
+
+        let options = CastOptions {
+            safe: false,
+            ..Default::default()
+        };
+        let result = cast_with_options(&array, &output_type, &options);
+        assert_eq!(result.unwrap_err().to_string(),
+                   "Invalid argument error: 123456790 is too large to store in a Decimal128 of precision 6. Max is 999999");
+    }
+
+    #[test]
+    fn test_decimal_to_decimal_throw_error_on_precision_overflow_lower_scale() {
+        let array = vec![Some(123456789)];
+        let array = create_decimal_array(array, 24, 2).unwrap();
+        println!("{:?}", array);
+        let input_type = DataType::Decimal128(24, 4);
+        let output_type = DataType::Decimal128(6, 2);
+        assert!(can_cast_types(&input_type, &output_type));
+
+        let options = CastOptions {
+            safe: false,
+            ..Default::default()
+        };
+        let result = cast_with_options(&array, &output_type, &options);
+        assert_eq!(result.unwrap_err().to_string(),
+                   "Invalid argument error: 123456790 is too large to store in a Decimal128 of precision 6. Max is 999999");
+    }
+
+    #[test]
+    fn test_decimal_to_decimal_throw_error_on_precision_overflow_greater_scale() {
+        let array = vec![Some(123456789)];
+        let array = create_decimal_array(array, 24, 2).unwrap();
+        println!("{:?}", array);
+        let input_type = DataType::Decimal128(24, 2);
+        let output_type = DataType::Decimal128(6, 3);
+        assert!(can_cast_types(&input_type, &output_type));
+
+        let options = CastOptions {
+            safe: false,
+            ..Default::default()
+        };
+        let result = cast_with_options(&array, &output_type, &options);
+        assert_eq!(result.unwrap_err().to_string(),
+                   "Invalid argument error: 1234567890 is too large to store in a Decimal128 of precision 6. Max is 999999");
+    }
+
+    #[test]
+    fn test_decimal_to_decimal_throw_error_on_precision_overflow_diff_type() {
+        let array = vec![Some(123456789)];
+        let array = create_decimal_array(array, 24, 2).unwrap();
+        println!("{:?}", array);
+        let input_type = DataType::Decimal128(24, 2);
+        let output_type = DataType::Decimal256(6, 2);
+        assert!(can_cast_types(&input_type, &output_type));
+
+        let options = CastOptions {
+            safe: false,
+            ..Default::default()
+        };
+        let result = cast_with_options(&array, &output_type, &options);
+        assert_eq!(result.unwrap_err().to_string(),
+                   "Invalid argument error: 123456789 is too large to store in a Decimal256 of precision 6. Max is 999999");
+    }
 }
diff --git a/arrow-cast/src/cast/string.rs b/arrow-cast/src/cast/string.rs
index 7d0e7e21c859..7f22c4fd64de 100644
--- a/arrow-cast/src/cast/string.rs
+++ b/arrow-cast/src/cast/string.rs
@@ -38,6 +38,30 @@ pub(crate) fn value_to_string<O: OffsetSizeTrait>(
     Ok(Arc::new(builder.finish()))
 }
 
+pub(crate) fn value_to_string_view(
+    array: &dyn Array,
+    options: &CastOptions,
+) -> Result<ArrayRef, ArrowError> {
+    let mut builder = StringViewBuilder::with_capacity(array.len());
+    let formatter = ArrayFormatter::try_new(array, &options.format_options)?;
+    let nulls = array.nulls();
+    // buffer to avoid reallocating on each value
+    // TODO: replace with write to builder after https://github.com/apache/arrow-rs/issues/6373
+    let mut buffer = String::new();
+    for i in 0..array.len() {
+        match nulls.map(|x| x.is_null(i)).unwrap_or_default() {
+            true => builder.append_null(),
+            false => {
+                // write to buffer first and then copy into target array
+                buffer.clear();
+                formatter.value(i).write(&mut buffer)?;
+                builder.append_value(&buffer)
+            }
+        }
+    }
+    Ok(Arc::new(builder.finish()))
+}
+
 /// Parse UTF-8
 pub(crate) fn parse_string<P: Parser, O: OffsetSizeTrait>(
     array: &dyn Array,
@@ -344,19 +368,14 @@ pub(crate) fn cast_binary_to_string<O: OffsetSizeTrait>(
     }
 }
 
-/// Casts Utf8 to Boolean
-pub(crate) fn cast_utf8_to_boolean<OffsetSize>(
-    from: &dyn Array,
+/// Casts string to boolean
+fn cast_string_to_boolean<'a, StrArray>(
+    array: &StrArray,
     cast_options: &CastOptions,
 ) -> Result<ArrayRef, ArrowError>
 where
-    OffsetSize: OffsetSizeTrait,
+    StrArray: StringArrayType<'a>,
 {
-    let array = from
-        .as_any()
-        .downcast_ref::<GenericStringArray<OffsetSize>>()
-        .unwrap();
-
     let output_array = array
         .iter()
         .map(|value| match value {
@@ -378,3 +397,27 @@ where
 
     Ok(Arc::new(output_array))
 }
+
+pub(crate) fn cast_utf8_to_boolean<OffsetSize>(
+    from: &dyn Array,
+    cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError>
+where
+    OffsetSize: OffsetSizeTrait,
+{
+    let array = from
+        .as_any()
+        .downcast_ref::<GenericStringArray<OffsetSize>>()
+        .unwrap();
+
+    cast_string_to_boolean(&array, cast_options)
+}
+
+pub(crate) fn cast_utf8view_to_boolean(
+    from: &dyn Array,
+    cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError> {
+    let array = from.as_any().downcast_ref::<StringViewArray>().unwrap();
+
+    cast_string_to_boolean(&array, cast_options)
+}
diff --git a/arrow-cast/src/parse.rs b/arrow-cast/src/parse.rs
index 4bd94c13fe8d..f4c4639c1c08 100644
--- a/arrow-cast/src/parse.rs
+++ b/arrow-cast/src/parse.rs
@@ -497,6 +497,10 @@ parser_primitive!(Int64Type);
 parser_primitive!(Int32Type);
 parser_primitive!(Int16Type);
 parser_primitive!(Int8Type);
+parser_primitive!(DurationNanosecondType);
+parser_primitive!(DurationMicrosecondType);
+parser_primitive!(DurationMillisecondType);
+parser_primitive!(DurationSecondType);
 
 impl Parser for TimestampNanosecondType {
     fn parse(string: &str) -> Option<i64> {
diff --git a/arrow-cast/src/pretty.rs b/arrow-cast/src/pretty.rs
index 4a3cbda283a5..ad3b952c327d 100644
--- a/arrow-cast/src/pretty.rs
+++ b/arrow-cast/src/pretty.rs
@@ -296,7 +296,7 @@ mod tests {
     fn test_pretty_format_fixed_size_list() {
         // define a schema.
         let field_type =
-            DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, true)), 3);
+            DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, true)), 3);
         let schema = Arc::new(Schema::new(vec![Field::new("d1", field_type, true)]));
 
         let keys_builder = Int32Array::builder(3);
diff --git a/arrow-csv/Cargo.toml b/arrow-csv/Cargo.toml
index be213c9363c2..8823924eb55b 100644
--- a/arrow-csv/Cargo.toml
+++ b/arrow-csv/Cargo.toml
@@ -35,18 +35,16 @@ bench = false
 
 [dependencies]
 arrow-array = { workspace = true }
-arrow-buffer = { workspace = true }
 arrow-cast = { workspace = true }
-arrow-data = { workspace = true }
 arrow-schema = { workspace = true }
 chrono = { workspace = true }
 csv = { version = "1.1", default-features = false }
 csv-core = { version = "0.1" }
 lazy_static = { version = "1.4", default-features = false }
-lexical-core = { version = "1.0", default-features = false }
 regex = { version = "1.7.0", default-features = false, features = ["std", "unicode", "perf"] }
 
 [dev-dependencies]
+arrow-buffer = { workspace = true }
 tempfile = "3.3"
 futures = "0.3"
 tokio = { version = "1.27", default-features = false, features = ["io-util"] }
diff --git a/arrow-csv/LICENSE.txt b/arrow-csv/LICENSE.txt
new file mode 120000
index 000000000000..4ab43736a839
--- /dev/null
+++ b/arrow-csv/LICENSE.txt
@@ -0,0 +1 @@
+../LICENSE.txt
\ No newline at end of file
diff --git a/arrow-csv/NOTICE.txt b/arrow-csv/NOTICE.txt
new file mode 120000
index 000000000000..eb9f24e040b5
--- /dev/null
+++ b/arrow-csv/NOTICE.txt
@@ -0,0 +1 @@
+../NOTICE.txt
\ No newline at end of file
diff --git a/arrow-csv/src/reader/mod.rs b/arrow-csv/src/reader/mod.rs
index c91b436f6cce..d3d518316397 100644
--- a/arrow-csv/src/reader/mod.rs
+++ b/arrow-csv/src/reader/mod.rs
@@ -136,7 +136,7 @@ use lazy_static::lazy_static;
 use regex::{Regex, RegexSet};
 use std::fmt::{self, Debug};
 use std::fs::File;
-use std::io::{BufRead, BufReader as StdBufReader, Read, Seek, SeekFrom};
+use std::io::{BufRead, BufReader as StdBufReader, Read};
 use std::sync::Arc;
 
 use crate::map_csv_error;
@@ -241,7 +241,7 @@ pub struct Format {
 }
 
 impl Format {
-    /// Specify whether the CSV file has a header, defaults to `true`
+    /// Specify whether the CSV file has a header, defaults to `false`
     ///
     /// When `true`, the first row of the CSV file is treated as a header row
     pub fn with_header(mut self, has_header: bool) -> Self {
@@ -399,51 +399,6 @@ impl Format {
     }
 }
 
-/// Infer the schema of a CSV file by reading through the first n records of the file,
-/// with `max_read_records` controlling the maximum number of records to read.
-///
-/// If `max_read_records` is not set, the whole file is read to infer its schema.
-///
-/// Return inferred schema and number of records used for inference. This function does not change
-/// reader cursor offset.
-///
-/// The inferred schema will always have each field set as nullable.
-#[deprecated(note = "Use Format::infer_schema")]
-#[allow(deprecated)]
-pub fn infer_file_schema<R: Read + Seek>(
-    mut reader: R,
-    delimiter: u8,
-    max_read_records: Option<usize>,
-    has_header: bool,
-) -> Result<(Schema, usize), ArrowError> {
-    let saved_offset = reader.stream_position()?;
-    let r = infer_reader_schema(&mut reader, delimiter, max_read_records, has_header)?;
-    // return the reader seek back to the start
-    reader.seek(SeekFrom::Start(saved_offset))?;
-    Ok(r)
-}
-
-/// Infer schema of CSV records provided by struct that implements `Read` trait.
-///
-/// `max_read_records` controlling the maximum number of records to read. If `max_read_records` is
-/// not set, all records are read to infer the schema.
-///
-/// Return inferred schema and number of records used for inference.
-#[deprecated(note = "Use Format::infer_schema")]
-pub fn infer_reader_schema<R: Read>(
-    reader: R,
-    delimiter: u8,
-    max_read_records: Option<usize>,
-    has_header: bool,
-) -> Result<(Schema, usize), ArrowError> {
-    let format = Format {
-        delimiter: Some(delimiter),
-        header: has_header,
-        ..Default::default()
-    };
-    format.infer_schema(reader, max_read_records)
-}
-
 /// Infer schema from a list of CSV files by reading through first n records
 /// with `max_read_records` controlling the maximum number of records to read.
 ///
@@ -824,42 +779,66 @@ fn parse(
                     match key_type.as_ref() {
                         DataType::Int8 => Ok(Arc::new(
                             rows.iter()
-                                .map(|row| row.get(i))
+                                .map(|row| {
+                                    let s = row.get(i);
+                                    (!null_regex.is_null(s)).then_some(s)
+                                })
                                 .collect::<DictionaryArray<Int8Type>>(),
                         ) as ArrayRef),
                         DataType::Int16 => Ok(Arc::new(
                             rows.iter()
-                                .map(|row| row.get(i))
+                                .map(|row| {
+                                    let s = row.get(i);
+                                    (!null_regex.is_null(s)).then_some(s)
+                                })
                                 .collect::<DictionaryArray<Int16Type>>(),
                         ) as ArrayRef),
                         DataType::Int32 => Ok(Arc::new(
                             rows.iter()
-                                .map(|row| row.get(i))
+                                .map(|row| {
+                                    let s = row.get(i);
+                                    (!null_regex.is_null(s)).then_some(s)
+                                })
                                 .collect::<DictionaryArray<Int32Type>>(),
                         ) as ArrayRef),
                         DataType::Int64 => Ok(Arc::new(
                             rows.iter()
-                                .map(|row| row.get(i))
+                                .map(|row| {
+                                    let s = row.get(i);
+                                    (!null_regex.is_null(s)).then_some(s)
+                                })
                                 .collect::<DictionaryArray<Int64Type>>(),
                         ) as ArrayRef),
                         DataType::UInt8 => Ok(Arc::new(
                             rows.iter()
-                                .map(|row| row.get(i))
+                                .map(|row| {
+                                    let s = row.get(i);
+                                    (!null_regex.is_null(s)).then_some(s)
+                                })
                                 .collect::<DictionaryArray<UInt8Type>>(),
                         ) as ArrayRef),
                         DataType::UInt16 => Ok(Arc::new(
                             rows.iter()
-                                .map(|row| row.get(i))
+                                .map(|row| {
+                                    let s = row.get(i);
+                                    (!null_regex.is_null(s)).then_some(s)
+                                })
                                 .collect::<DictionaryArray<UInt16Type>>(),
                         ) as ArrayRef),
                         DataType::UInt32 => Ok(Arc::new(
                             rows.iter()
-                                .map(|row| row.get(i))
+                                .map(|row| {
+                                    let s = row.get(i);
+                                    (!null_regex.is_null(s)).then_some(s)
+                                })
                                 .collect::<DictionaryArray<UInt32Type>>(),
                         ) as ArrayRef),
                         DataType::UInt64 => Ok(Arc::new(
                             rows.iter()
-                                .map(|row| row.get(i))
+                                .map(|row| {
+                                    let s = row.get(i);
+                                    (!null_regex.is_null(s)).then_some(s)
+                                })
                                 .collect::<DictionaryArray<UInt64Type>>(),
                         ) as ArrayRef),
                         _ => Err(ArrowError::ParseError(format!(
@@ -1101,14 +1080,6 @@ impl ReaderBuilder {
         }
     }
 
-    /// Set whether the CSV file has headers
-    #[deprecated(note = "Use with_header")]
-    #[doc(hidden)]
-    pub fn has_header(mut self, has_header: bool) -> Self {
-        self.format.header = has_header;
-        self
-    }
-
     /// Set whether the CSV file has a header
     pub fn with_header(mut self, has_header: bool) -> Self {
         self.format.header = has_header;
@@ -1236,7 +1207,7 @@ impl ReaderBuilder {
 mod tests {
     use super::*;
 
-    use std::io::{Cursor, Write};
+    use std::io::{Cursor, Seek, SeekFrom, Write};
     use tempfile::NamedTempFile;
 
     use arrow_array::cast::AsArray;
@@ -1528,6 +1499,40 @@ mod tests {
         assert_eq!(strings.value(29), "Uckfield, East Sussex, UK");
     }
 
+    #[test]
+    fn test_csv_with_nullable_dictionary() {
+        let offset_type = vec![
+            DataType::Int8,
+            DataType::Int16,
+            DataType::Int32,
+            DataType::Int64,
+            DataType::UInt8,
+            DataType::UInt16,
+            DataType::UInt32,
+            DataType::UInt64,
+        ];
+        for data_type in offset_type {
+            let file = File::open("test/data/dictionary_nullable_test.csv").unwrap();
+            let dictionary_type =
+                DataType::Dictionary(Box::new(data_type), Box::new(DataType::Utf8));
+            let schema = Arc::new(Schema::new(vec![
+                Field::new("id", DataType::Utf8, false),
+                Field::new("name", dictionary_type.clone(), true),
+            ]));
+
+            let mut csv = ReaderBuilder::new(schema)
+                .build(file.try_clone().unwrap())
+                .unwrap();
+
+            let batch = csv.next().unwrap().unwrap();
+            assert_eq!(3, batch.num_rows());
+            assert_eq!(2, batch.num_columns());
+
+            let names = arrow_cast::cast(batch.column(1), &dictionary_type).unwrap();
+            assert!(!names.is_null(2));
+            assert!(names.is_null(1));
+        }
+    }
     #[test]
     fn test_nulls() {
         let schema = Arc::new(Schema::new(vec![
diff --git a/arrow-csv/src/writer.rs b/arrow-csv/src/writer.rs
index eae2133a4623..c5a0a0b76d59 100644
--- a/arrow-csv/src/writer.rs
+++ b/arrow-csv/src/writer.rs
@@ -256,14 +256,6 @@ impl WriterBuilder {
         Self::default()
     }
 
-    /// Set whether to write headers
-    #[deprecated(note = "Use Self::with_header")]
-    #[doc(hidden)]
-    pub fn has_headers(mut self, has_headers: bool) -> Self {
-        self.has_header = has_headers;
-        self
-    }
-
     /// Set whether to write the CSV file with a header
     pub fn with_header(mut self, header: bool) -> Self {
         self.has_header = header;
@@ -397,17 +389,6 @@ impl WriterBuilder {
         self.null_value.as_deref().unwrap_or(DEFAULT_NULL_VALUE)
     }
 
-    /// Use RFC3339 format for date/time/timestamps (default)
-    #[deprecated(note = "Use WriterBuilder::default()")]
-    pub fn with_rfc3339(mut self) -> Self {
-        self.date_format = None;
-        self.datetime_format = None;
-        self.time_format = None;
-        self.timestamp_format = None;
-        self.timestamp_tz_format = None;
-        self
-    }
-
     /// Create a new `Writer`
     pub fn build<W: Write>(self, writer: W) -> Writer<W> {
         let mut builder = csv::WriterBuilder::new();
diff --git a/arrow-csv/test/data/dictionary_nullable_test.csv b/arrow-csv/test/data/dictionary_nullable_test.csv
new file mode 100644
index 000000000000..c9ada5293b70
--- /dev/null
+++ b/arrow-csv/test/data/dictionary_nullable_test.csv
@@ -0,0 +1,3 @@
+id,name
+1,
+2,bob
diff --git a/arrow-data/LICENSE.txt b/arrow-data/LICENSE.txt
new file mode 120000
index 000000000000..4ab43736a839
--- /dev/null
+++ b/arrow-data/LICENSE.txt
@@ -0,0 +1 @@
+../LICENSE.txt
\ No newline at end of file
diff --git a/arrow-data/NOTICE.txt b/arrow-data/NOTICE.txt
new file mode 120000
index 000000000000..eb9f24e040b5
--- /dev/null
+++ b/arrow-data/NOTICE.txt
@@ -0,0 +1 @@
+../NOTICE.txt
\ No newline at end of file
diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs
index 8af2a91cf159..a35b5e8629e9 100644
--- a/arrow-data/src/data.rs
+++ b/arrow-data/src/data.rs
@@ -30,11 +30,6 @@ use std::sync::Arc;
 
 use crate::{equal, validate_binary_view, validate_string_view};
 
-/// A collection of [`Buffer`]
-#[doc(hidden)]
-#[deprecated(note = "Use [Buffer]")]
-pub type Buffers<'a> = &'a [Buffer];
-
 #[inline]
 pub(crate) fn contains_nulls(
     null_bit_buffer: Option<&NullBuffer>,
diff --git a/arrow-flight/Cargo.toml b/arrow-flight/Cargo.toml
index 702cb1360c2d..fbb295036a9b 100644
--- a/arrow-flight/Cargo.toml
+++ b/arrow-flight/Cargo.toml
@@ -43,11 +43,11 @@ base64 = { version = "0.22", default-features = false, features = ["std"] }
 bytes = { version = "1", default-features = false }
 futures = { version = "0.3", default-features = false, features = ["alloc"] }
 once_cell = { version = "1", optional = true }
-paste = { version = "1.0" }
+paste = { version = "1.0" , optional = true }
 prost = { version = "0.13.1", default-features = false, features = ["prost-derive"] }
 # For Timestamp type
 prost-types = { version = "0.13.1", default-features = false }
-tokio = { version = "1.0", default-features = false, features = ["macros", "rt", "rt-multi-thread"] }
+tokio = { version = "1.0", default-features = false, features = ["macros", "rt", "rt-multi-thread"], optional = true }
 tonic = { version = "0.12.3", default-features = false, features = ["transport", "codegen", "prost"] }
 
 # CLI-related dependencies
@@ -61,11 +61,10 @@ all-features = true
 
 [features]
 default = []
-flight-sql-experimental = ["arrow-arith", "arrow-data", "arrow-ord", "arrow-row", "arrow-select", "arrow-string", "once_cell"]
+flight-sql-experimental = ["dep:arrow-arith", "dep:arrow-data", "dep:arrow-ord", "dep:arrow-row", "dep:arrow-select", "dep:arrow-string", "dep:once_cell", "dep:paste"]
 tls = ["tonic/tls"]
-
 # Enable CLI tools
-cli = ["anyhow", "arrow-array/chrono-tz", "arrow-cast/prettyprint", "clap", "tracing-log", "tracing-subscriber", "tonic/tls-webpki-roots"]
+cli = ["arrow-array/chrono-tz", "arrow-cast/prettyprint", "tonic/tls-webpki-roots", "dep:anyhow", "dep:clap", "dep:tracing-log", "dep:tracing-subscriber"]
 
 [dev-dependencies]
 arrow-cast = { workspace = true, features = ["prettyprint"] }
@@ -75,6 +74,9 @@ http-body = "1.0.0"
 hyper-util = "0.1"
 pin-project-lite = "0.2"
 tempfile = "3.3"
+tracing-log = { version = "0.2" }
+tracing-subscriber = { version = "0.3.1", default-features = false, features = ["ansi", "env-filter", "fmt"] }
+tokio = { version = "1.0", default-features = false, features = ["macros", "rt", "rt-multi-thread"] }
 tokio-stream = { version = "0.1", features = ["net"] }
 tower = { version = "0.5.0", features = ["util"] }
 uuid = { version = "1.10.0", features = ["v4"] }
diff --git a/arrow-flight/LICENSE.txt b/arrow-flight/LICENSE.txt
new file mode 120000
index 000000000000..4ab43736a839
--- /dev/null
+++ b/arrow-flight/LICENSE.txt
@@ -0,0 +1 @@
+../LICENSE.txt
\ No newline at end of file
diff --git a/arrow-flight/NOTICE.txt b/arrow-flight/NOTICE.txt
new file mode 120000
index 000000000000..eb9f24e040b5
--- /dev/null
+++ b/arrow-flight/NOTICE.txt
@@ -0,0 +1 @@
+../NOTICE.txt
\ No newline at end of file
diff --git a/arrow-flight/README.md b/arrow-flight/README.md
index df74bc012a1c..661abfc58691 100644
--- a/arrow-flight/README.md
+++ b/arrow-flight/README.md
@@ -31,14 +31,14 @@ Add this to your Cargo.toml:
 
 ```toml
 [dependencies]
-arrow-flight = "53.2.0"
+arrow-flight = "53.3.0"
 ```
 
 Apache Arrow Flight is a gRPC based protocol for exchanging Arrow data between processes. See the blog post [Introducing Apache Arrow Flight: A Framework for Fast Data Transport](https://arrow.apache.org/blog/2019/10/13/introducing-arrow-flight/) for more information.
 
 This crate provides a Rust implementation of the
 [Flight.proto](../format/Flight.proto) gRPC protocol and
-[examples](https://github.com/apache/arrow-rs/tree/master/arrow-flight/examples)
+[examples](https://github.com/apache/arrow-rs/tree/main/arrow-flight/examples)
 that demonstrate how to build a Flight server implemented with [tonic](https://docs.rs/crate/tonic/latest).
 
 ## Feature Flags
diff --git a/arrow-flight/gen/Cargo.toml b/arrow-flight/gen/Cargo.toml
index c7fe89beb93a..6358227a8912 100644
--- a/arrow-flight/gen/Cargo.toml
+++ b/arrow-flight/gen/Cargo.toml
@@ -32,6 +32,5 @@ publish = false
 [dependencies]
 # Pin specific version of the tonic-build dependencies to avoid auto-generated
 # (and checked in) arrow.flight.protocol.rs from changing
-proc-macro2 = { version = "=1.0.89", default-features = false }
-prost-build = { version = "=0.13.3", default-features = false }
+prost-build = { version = "=0.13.4", default-features = false }
 tonic-build = { version = "=0.12.3", default-features = false, features = ["transport", "prost"] }
diff --git a/arrow-flight/src/encode.rs b/arrow-flight/src/encode.rs
index ae3475c7c7d7..315b7b3cb6e5 100644
--- a/arrow-flight/src/encode.rs
+++ b/arrow-flight/src/encode.rs
@@ -535,8 +535,10 @@ fn prepare_field_for_flight(
                 )
                 .with_metadata(field.metadata().clone())
             } else {
+                #[allow(deprecated)]
                 let dict_id = dictionary_tracker.set_dict_id(field.as_ref());
 
+                #[allow(deprecated)]
                 Field::new_dict(
                     field.name(),
                     field.data_type().clone(),
@@ -583,7 +585,9 @@ fn prepare_schema_for_flight(
                     )
                     .with_metadata(field.metadata().clone())
                 } else {
+                    #[allow(deprecated)]
                     let dict_id = dictionary_tracker.set_dict_id(field.as_ref());
+                    #[allow(deprecated)]
                     Field::new_dict(
                         field.name(),
                         field.data_type().clone(),
@@ -650,10 +654,12 @@ struct FlightIpcEncoder {
 
 impl FlightIpcEncoder {
     fn new(options: IpcWriteOptions, error_on_replacement: bool) -> Self {
+        #[allow(deprecated)]
         let preserve_dict_id = options.preserve_dict_id();
         Self {
             options,
             data_gen: IpcDataGenerator::default(),
+            #[allow(deprecated)]
             dictionary_tracker: DictionaryTracker::new_with_preserve_dict_id(
                 error_on_replacement,
                 preserve_dict_id,
@@ -934,7 +940,7 @@ mod tests {
         let mut decoder = FlightDataDecoder::new(encoder);
         let expected_schema = Schema::new(vec![Field::new_list(
             "dict_list",
-            Field::new("item", DataType::Utf8, true),
+            Field::new_list_field(DataType::Utf8, true),
             true,
         )]);
 
@@ -1038,7 +1044,7 @@ mod tests {
             "struct",
             vec![Field::new_list(
                 "dict_list",
-                Field::new("item", DataType::Utf8, true),
+                Field::new_list_field(DataType::Utf8, true),
                 true,
             )],
             true,
@@ -1218,12 +1224,16 @@ mod tests {
 
         let hydrated_struct_fields = vec![Field::new_list(
             "dict_list",
-            Field::new("item", DataType::Utf8, true),
+            Field::new_list_field(DataType::Utf8, true),
             true,
         )];
 
         let hydrated_union_fields = vec![
-            Field::new_list("dict_list", Field::new("item", DataType::Utf8, true), true),
+            Field::new_list(
+                "dict_list",
+                Field::new_list_field(DataType::Utf8, true),
+                true,
+            ),
             Field::new_struct("struct", hydrated_struct_fields.clone(), true),
             Field::new("string", DataType::Utf8, true),
         ];
@@ -1537,6 +1547,7 @@ mod tests {
     async fn verify_flight_round_trip(mut batches: Vec<RecordBatch>) {
         let expected_schema = batches.first().unwrap().schema();
 
+        #[allow(deprecated)]
         let encoder = FlightDataEncoderBuilder::default()
             .with_options(IpcWriteOptions::default().with_preserve_dict_id(false))
             .with_dictionary_handling(DictionaryHandling::Resend)
@@ -1564,6 +1575,7 @@ mod tests {
             HashMap::from([("some_key".to_owned(), "some_value".to_owned())]),
         );
 
+        #[allow(deprecated)]
         let mut dictionary_tracker = DictionaryTracker::new_with_preserve_dict_id(false, true);
 
         let got = prepare_schema_for_flight(&schema, &mut dictionary_tracker, false);
@@ -1582,12 +1594,30 @@ mod tests {
         hydrate_dictionaries(&batch, batch.schema()).expect("failed to optimize");
     }
 
-    pub fn make_flight_data(
+    fn make_flight_data(
+        batch: &RecordBatch,
+        options: &IpcWriteOptions,
+    ) -> (Vec<FlightData>, FlightData) {
+        flight_data_from_arrow_batch(batch, options)
+    }
+
+    fn flight_data_from_arrow_batch(
         batch: &RecordBatch,
         options: &IpcWriteOptions,
     ) -> (Vec<FlightData>, FlightData) {
+        let data_gen = IpcDataGenerator::default();
         #[allow(deprecated)]
-        crate::utils::flight_data_from_arrow_batch(batch, options)
+        let mut dictionary_tracker =
+            DictionaryTracker::new_with_preserve_dict_id(false, options.preserve_dict_id());
+
+        let (encoded_dictionaries, encoded_batch) = data_gen
+            .encoded_batch(batch, &mut dictionary_tracker, options)
+            .expect("DictionaryTracker configured above to not error on replacement");
+
+        let flight_dictionaries = encoded_dictionaries.into_iter().map(Into::into).collect();
+        let flight_batch = encoded_batch.into();
+
+        (flight_dictionaries, flight_batch)
     }
 
     #[test]
@@ -1741,7 +1771,7 @@ mod tests {
 
         let batch = RecordBatch::try_from_iter(vec![("a1", Arc::new(array) as _)]).unwrap();
 
-        verify_encoded_split(batch, 160).await;
+        verify_encoded_split(batch, 48).await;
     }
 
     #[tokio::test]
diff --git a/arrow-flight/src/lib.rs b/arrow-flight/src/lib.rs
index 9f18416c06ec..1dd2700794f3 100644
--- a/arrow-flight/src/lib.rs
+++ b/arrow-flight/src/lib.rs
@@ -38,6 +38,8 @@
 //! [Flight SQL]: https://arrow.apache.org/docs/format/FlightSql.html
 #![allow(rustdoc::invalid_html_tags)]
 #![warn(missing_docs)]
+// The unused_crate_dependencies lint does not work well for crates defining additional examples/bin targets
+#![allow(unused_crate_dependencies)]
 
 use arrow_ipc::{convert, writer, writer::EncodedData, writer::IpcWriteOptions};
 use arrow_schema::{ArrowError, Schema};
@@ -141,6 +143,7 @@ pub struct IpcMessage(pub Bytes);
 
 fn flight_schema_as_encoded_data(arrow_schema: &Schema, options: &IpcWriteOptions) -> EncodedData {
     let data_gen = writer::IpcDataGenerator::default();
+    #[allow(deprecated)]
     let mut dict_tracker =
         writer::DictionaryTracker::new_with_preserve_dict_id(false, options.preserve_dict_id());
     data_gen.schema_to_bytes_with_dictionary_tracker(arrow_schema, &mut dict_tracker, options)
diff --git a/arrow-flight/src/sql/client.rs b/arrow-flight/src/sql/client.rs
index e45e505b2b61..a6e228737b3f 100644
--- a/arrow-flight/src/sql/client.rs
+++ b/arrow-flight/src/sql/client.rs
@@ -228,8 +228,8 @@ impl FlightSqlServiceClient<Channel> {
             .await
             .map_err(status_to_arrow_error)?
             .unwrap();
-        let any = Any::decode(&*result.app_metadata).map_err(decode_error_to_arrow_error)?;
-        let result: DoPutUpdateResult = any.unpack()?.unwrap();
+        let result: DoPutUpdateResult =
+            Message::decode(&*result.app_metadata).map_err(decode_error_to_arrow_error)?;
         Ok(result.record_count)
     }
 
@@ -274,8 +274,8 @@ impl FlightSqlServiceClient<Channel> {
             .await
             .map_err(status_to_arrow_error)?
             .unwrap();
-        let any = Any::decode(&*result.app_metadata).map_err(decode_error_to_arrow_error)?;
-        let result: DoPutUpdateResult = any.unpack()?.unwrap();
+        let result: DoPutUpdateResult =
+            Message::decode(&*result.app_metadata).map_err(decode_error_to_arrow_error)?;
         Ok(result.record_count)
     }
 
@@ -593,8 +593,8 @@ impl PreparedStatement<Channel> {
             .await
             .map_err(status_to_arrow_error)?
             .unwrap();
-        let any = Any::decode(&*result.app_metadata).map_err(decode_error_to_arrow_error)?;
-        let result: DoPutUpdateResult = any.unpack()?.unwrap();
+        let result: DoPutUpdateResult =
+            Message::decode(&*result.app_metadata).map_err(decode_error_to_arrow_error)?;
         Ok(result.record_count)
     }
 
diff --git a/arrow-flight/src/sql/metadata/catalogs.rs b/arrow-flight/src/sql/metadata/catalogs.rs
index 327fed81077b..e27c63c3932f 100644
--- a/arrow-flight/src/sql/metadata/catalogs.rs
+++ b/arrow-flight/src/sql/metadata/catalogs.rs
@@ -68,7 +68,8 @@ impl GetCatalogsBuilder {
     /// builds a `RecordBatch` with the correct schema for a
     /// [`CommandGetCatalogs`] response
     pub fn build(self) -> Result<RecordBatch> {
-        let Self { catalogs } = self;
+        let Self { mut catalogs } = self;
+        catalogs.sort_unstable();
 
         let batch = RecordBatch::try_new(
             Arc::clone(&GET_CATALOG_SCHEMA),
@@ -98,3 +99,30 @@ static GET_CATALOG_SCHEMA: Lazy<SchemaRef> = Lazy::new(|| {
         false,
     )]))
 });
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_catalogs_are_sorted() {
+        let batch = ["a_catalog", "c_catalog", "b_catalog"]
+            .into_iter()
+            .fold(GetCatalogsBuilder::new(), |mut builder, catalog| {
+                builder.append(catalog);
+                builder
+            })
+            .build()
+            .unwrap();
+        let catalogs = batch
+            .column(0)
+            .as_any()
+            .downcast_ref::<StringArray>()
+            .unwrap()
+            .iter()
+            .flatten()
+            .collect::<Vec<_>>();
+        assert!(catalogs.is_sorted());
+        assert_eq!(catalogs, ["a_catalog", "b_catalog", "c_catalog"]);
+    }
+}
diff --git a/arrow-flight/src/sql/metadata/sql_info.rs b/arrow-flight/src/sql/metadata/sql_info.rs
index 2ea30df7fc2f..58b228530942 100644
--- a/arrow-flight/src/sql/metadata/sql_info.rs
+++ b/arrow-flight/src/sql/metadata/sql_info.rs
@@ -172,7 +172,7 @@ static UNION_TYPE: Lazy<DataType> = Lazy::new(|| {
         // treat list as nullable b/c that is what the builders make
         Field::new(
             "string_list",
-            DataType::List(Arc::new(Field::new("item", DataType::Utf8, true))),
+            DataType::List(Arc::new(Field::new_list_field(DataType::Utf8, true))),
             true,
         ),
         Field::new(
@@ -184,7 +184,7 @@ static UNION_TYPE: Lazy<DataType> = Lazy::new(|| {
                         Field::new("keys", DataType::Int32, false),
                         Field::new(
                             "values",
-                            DataType::List(Arc::new(Field::new("item", DataType::Int32, true))),
+                            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
                             true,
                         ),
                     ])),
diff --git a/arrow-flight/src/sql/metadata/xdbc_info.rs b/arrow-flight/src/sql/metadata/xdbc_info.rs
index 485bedaebfb0..a3a18ca10888 100644
--- a/arrow-flight/src/sql/metadata/xdbc_info.rs
+++ b/arrow-flight/src/sql/metadata/xdbc_info.rs
@@ -330,7 +330,7 @@ static GET_XDBC_INFO_SCHEMA: Lazy<SchemaRef> = Lazy::new(|| {
         Field::new("literal_suffix", DataType::Utf8, true),
         Field::new(
             "create_params",
-            DataType::List(Arc::new(Field::new("item", DataType::Utf8, false))),
+            DataType::List(Arc::new(Field::new_list_field(DataType::Utf8, false))),
             true,
         ),
         Field::new("nullable", DataType::Int32, false),
diff --git a/arrow-flight/src/sql/server.rs b/arrow-flight/src/sql/server.rs
index 6b9befa63600..8ab8a16dbb50 100644
--- a/arrow-flight/src/sql/server.rs
+++ b/arrow-flight/src/sql/server.rs
@@ -719,7 +719,7 @@ where
                 let record_count = self.do_put_statement_update(command, request).await?;
                 let result = DoPutUpdateResult { record_count };
                 let output = futures::stream::iter(vec![Ok(PutResult {
-                    app_metadata: result.as_any().encode_to_vec().into(),
+                    app_metadata: result.encode_to_vec().into(),
                 })]);
                 Ok(Response::new(Box::pin(output)))
             }
@@ -727,7 +727,7 @@ where
                 let record_count = self.do_put_statement_ingest(command, request).await?;
                 let result = DoPutUpdateResult { record_count };
                 let output = futures::stream::iter(vec![Ok(PutResult {
-                    app_metadata: result.as_any().encode_to_vec().into(),
+                    app_metadata: result.encode_to_vec().into(),
                 })]);
                 Ok(Response::new(Box::pin(output)))
             }
@@ -744,7 +744,7 @@ where
                 let record_count = self.do_put_substrait_plan(command, request).await?;
                 let result = DoPutUpdateResult { record_count };
                 let output = futures::stream::iter(vec![Ok(PutResult {
-                    app_metadata: result.as_any().encode_to_vec().into(),
+                    app_metadata: result.encode_to_vec().into(),
                 })]);
                 Ok(Response::new(Box::pin(output)))
             }
@@ -754,7 +754,7 @@ where
                     .await?;
                 let result = DoPutUpdateResult { record_count };
                 let output = futures::stream::iter(vec![Ok(PutResult {
-                    app_metadata: result.as_any().encode_to_vec().into(),
+                    app_metadata: result.encode_to_vec().into(),
                 })]);
                 Ok(Response::new(Box::pin(output)))
             }
diff --git a/arrow-flight/src/utils.rs b/arrow-flight/src/utils.rs
index f6129ddfe248..428dde73ca6c 100644
--- a/arrow-flight/src/utils.rs
+++ b/arrow-flight/src/utils.rs
@@ -17,8 +17,7 @@
 
 //! Utilities to assist with reading and writing Arrow data as Flight messages
 
-use crate::{FlightData, IpcMessage, SchemaAsIpc, SchemaResult};
-use bytes::Bytes;
+use crate::{FlightData, SchemaAsIpc};
 use std::collections::HashMap;
 use std::sync::Arc;
 
@@ -28,30 +27,6 @@ use arrow_ipc::convert::fb_to_schema;
 use arrow_ipc::{reader, root_as_message, writer, writer::IpcWriteOptions};
 use arrow_schema::{ArrowError, Schema, SchemaRef};
 
-/// Convert a `RecordBatch` to a vector of `FlightData` representing the bytes of the dictionaries
-/// and a `FlightData` representing the bytes of the batch's values
-#[deprecated(
-    since = "30.0.0",
-    note = "Use IpcDataGenerator directly with DictionaryTracker to avoid re-sending dictionaries"
-)]
-pub fn flight_data_from_arrow_batch(
-    batch: &RecordBatch,
-    options: &IpcWriteOptions,
-) -> (Vec<FlightData>, FlightData) {
-    let data_gen = writer::IpcDataGenerator::default();
-    let mut dictionary_tracker =
-        writer::DictionaryTracker::new_with_preserve_dict_id(false, options.preserve_dict_id());
-
-    let (encoded_dictionaries, encoded_batch) = data_gen
-        .encoded_batch(batch, &mut dictionary_tracker, options)
-        .expect("DictionaryTracker configured above to not error on replacement");
-
-    let flight_dictionaries = encoded_dictionaries.into_iter().map(Into::into).collect();
-    let flight_batch = encoded_batch.into();
-
-    (flight_dictionaries, flight_batch)
-}
-
 /// Convert a slice of wire protocol `FlightData`s into a vector of `RecordBatch`es
 pub fn flight_data_to_batches(flight_data: &[FlightData]) -> Result<Vec<RecordBatch>, ArrowError> {
     let schema = flight_data.first().ok_or_else(|| {
@@ -104,41 +79,6 @@ pub fn flight_data_to_arrow_batch(
         })?
 }
 
-/// Convert a `Schema` to `SchemaResult` by converting to an IPC message
-#[deprecated(
-    since = "4.4.0",
-    note = "Use From trait, e.g.: SchemaAsIpc::new(schema, options).try_into()"
-)]
-pub fn flight_schema_from_arrow_schema(
-    schema: &Schema,
-    options: &IpcWriteOptions,
-) -> Result<SchemaResult, ArrowError> {
-    SchemaAsIpc::new(schema, options).try_into()
-}
-
-/// Convert a `Schema` to `FlightData` by converting to an IPC message
-#[deprecated(
-    since = "4.4.0",
-    note = "Use From trait, e.g.: SchemaAsIpc::new(schema, options).into()"
-)]
-pub fn flight_data_from_arrow_schema(schema: &Schema, options: &IpcWriteOptions) -> FlightData {
-    SchemaAsIpc::new(schema, options).into()
-}
-
-/// Convert a `Schema` to bytes in the format expected in `FlightInfo.schema`
-#[deprecated(
-    since = "4.4.0",
-    note = "Use TryFrom trait, e.g.: SchemaAsIpc::new(schema, options).try_into()"
-)]
-pub fn ipc_message_from_arrow_schema(
-    schema: &Schema,
-    options: &IpcWriteOptions,
-) -> Result<Bytes, ArrowError> {
-    let message = SchemaAsIpc::new(schema, options).try_into()?;
-    let IpcMessage(vals) = message;
-    Ok(vals)
-}
-
 /// Convert `RecordBatch`es to wire protocol `FlightData`s
 pub fn batches_to_flight_data(
     schema: &Schema,
@@ -150,6 +90,7 @@ pub fn batches_to_flight_data(
     let mut flight_data = vec![];
 
     let data_gen = writer::IpcDataGenerator::default();
+    #[allow(deprecated)]
     let mut dictionary_tracker =
         writer::DictionaryTracker::new_with_preserve_dict_id(false, options.preserve_dict_id());
 
diff --git a/arrow-integration-test/LICENSE.txt b/arrow-integration-test/LICENSE.txt
new file mode 120000
index 000000000000..4ab43736a839
--- /dev/null
+++ b/arrow-integration-test/LICENSE.txt
@@ -0,0 +1 @@
+../LICENSE.txt
\ No newline at end of file
diff --git a/arrow-integration-test/NOTICE.txt b/arrow-integration-test/NOTICE.txt
new file mode 120000
index 000000000000..eb9f24e040b5
--- /dev/null
+++ b/arrow-integration-test/NOTICE.txt
@@ -0,0 +1 @@
+../NOTICE.txt
\ No newline at end of file
diff --git a/arrow-integration-test/src/field.rs b/arrow-integration-test/src/field.rs
index 32edc4165938..4b896ed391be 100644
--- a/arrow-integration-test/src/field.rs
+++ b/arrow-integration-test/src/field.rs
@@ -252,6 +252,7 @@ pub fn field_from_json(json: &serde_json::Value) -> Result<Field> {
                 _ => data_type,
             };
 
+            #[allow(deprecated)]
             let mut field = Field::new_dict(name, data_type, nullable, dict_id, dict_is_ordered);
             field.set_metadata(metadata);
             Ok(field)
@@ -274,17 +275,21 @@ pub fn field_to_json(field: &Field) -> serde_json::Value {
     };
 
     match field.data_type() {
-        DataType::Dictionary(ref index_type, ref value_type) => serde_json::json!({
-            "name": field.name(),
-            "nullable": field.is_nullable(),
-            "type": data_type_to_json(value_type),
-            "children": children,
-            "dictionary": {
-                "id": field.dict_id().unwrap(),
-                "indexType": data_type_to_json(index_type),
-                "isOrdered": field.dict_is_ordered().unwrap(),
-            }
-        }),
+        DataType::Dictionary(ref index_type, ref value_type) => {
+            #[allow(deprecated)]
+            let dict_id = field.dict_id().unwrap();
+            serde_json::json!({
+                "name": field.name(),
+                "nullable": field.is_nullable(),
+                "type": data_type_to_json(value_type),
+                "children": children,
+                "dictionary": {
+                    "id": dict_id,
+                    "indexType": data_type_to_json(index_type),
+                    "isOrdered": field.dict_is_ordered().unwrap(),
+                }
+            })
+        }
         _ => serde_json::json!({
             "name": field.name(),
             "nullable": field.is_nullable(),
diff --git a/arrow-integration-test/src/lib.rs b/arrow-integration-test/src/lib.rs
index ea5b545f2e81..f025009c22de 100644
--- a/arrow-integration-test/src/lib.rs
+++ b/arrow-integration-test/src/lib.rs
@@ -787,6 +787,7 @@ pub fn array_from_json(
             Ok(Arc::new(array))
         }
         DataType::Dictionary(key_type, value_type) => {
+            #[allow(deprecated)]
             let dict_id = field.dict_id().ok_or_else(|| {
                 ArrowError::JsonError(format!("Unable to find dict_id for field {field:?}"))
             })?;
@@ -930,10 +931,12 @@ pub fn dictionary_array_from_json(
             let null_buf = create_null_buf(&json_col);
 
             // build the key data into a buffer, then construct values separately
+            #[allow(deprecated)]
             let key_field = Field::new_dict(
                 "key",
                 dict_key.clone(),
                 field.is_nullable(),
+                #[allow(deprecated)]
                 field
                     .dict_id()
                     .expect("Dictionary fields must have a dict_id value"),
@@ -1192,7 +1195,7 @@ mod tests {
             Field::new("utf8s", DataType::Utf8, true),
             Field::new(
                 "lists",
-                DataType::List(Arc::new(Field::new("item", DataType::Int32, true))),
+                DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
                 true,
             ),
             Field::new(
@@ -1249,7 +1252,7 @@ mod tests {
 
         let value_data = Int32Array::from(vec![None, Some(2), None, None]);
         let value_offsets = Buffer::from_slice_ref([0, 3, 4, 4]);
-        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
+        let list_data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
         let list_data = ArrayData::builder(list_data_type)
             .len(3)
             .add_buffer(value_offsets)
diff --git a/arrow-integration-test/src/schema.rs b/arrow-integration-test/src/schema.rs
index 541a1ec746ac..512f0aed8e54 100644
--- a/arrow-integration-test/src/schema.rs
+++ b/arrow-integration-test/src/schema.rs
@@ -150,7 +150,7 @@ mod tests {
                 Field::new("c21", DataType::Interval(IntervalUnit::MonthDayNano), false),
                 Field::new(
                     "c22",
-                    DataType::List(Arc::new(Field::new("item", DataType::Boolean, true))),
+                    DataType::List(Arc::new(Field::new_list_field(DataType::Boolean, true))),
                     false,
                 ),
                 Field::new(
@@ -189,6 +189,7 @@ mod tests {
                 Field::new("c30", DataType::Duration(TimeUnit::Millisecond), false),
                 Field::new("c31", DataType::Duration(TimeUnit::Microsecond), false),
                 Field::new("c32", DataType::Duration(TimeUnit::Nanosecond), false),
+                #[allow(deprecated)]
                 Field::new_dict(
                     "c33",
                     DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
diff --git a/arrow-integration-testing/Cargo.toml b/arrow-integration-testing/Cargo.toml
index 7be56d919852..8654b4b92734 100644
--- a/arrow-integration-testing/Cargo.toml
+++ b/arrow-integration-testing/Cargo.toml
@@ -36,20 +36,17 @@ logging = ["tracing-subscriber"]
 [dependencies]
 arrow = { path = "../arrow", default-features = false, features = ["test_utils", "ipc", "ipc_compression", "json", "ffi"] }
 arrow-flight = { path = "../arrow-flight", default-features = false }
-arrow-buffer = { path = "../arrow-buffer", default-features = false }
 arrow-integration-test = { path = "../arrow-integration-test", default-features = false }
-async-trait = { version = "0.1.41", default-features = false }
 clap = { version = "4", default-features = false, features = ["std", "derive", "help", "error-context", "usage"] }
 futures = { version = "0.3", default-features = false }
-hex = { version = "0.4", default-features = false, features = ["std"] }
 prost = { version = "0.13", default-features = false }
 serde = { version = "1.0", default-features = false, features = ["rc", "derive"] }
 serde_json = { version = "1.0", default-features = false, features = ["std"] }
-tokio = { version = "1.0", default-features = false }
+tokio = { version = "1.0", default-features = false, features = [ "rt-multi-thread"] }
 tonic = { version = "0.12", default-features = false }
 tracing-subscriber = { version = "0.3.1", default-features = false, features = ["fmt"], optional = true }
-num = { version = "0.4", default-features = false, features = ["std"] }
 flate2 = { version = "1", default-features = false, features = ["rust_backend"] }
 
 [dev-dependencies]
+arrow-buffer = { path = "../arrow-buffer", default-features = false }
 tempfile = { version = "3", default-features = false }
diff --git a/arrow-integration-testing/README.md b/arrow-integration-testing/README.md
index dcf39c27fbc5..86c79f5030ce 100644
--- a/arrow-integration-testing/README.md
+++ b/arrow-integration-testing/README.md
@@ -53,7 +53,7 @@ pip install -e dev/archery[integration]
 
 ### Build the C++ binaries:
 
-Follow the [C++ Direction](https://github.com/apache/arrow/tree/master/docs/source/developers/cpp) and build the integration test binaries with a command like this:
+Follow the [C++ Direction](https://github.com/apache/arrow/tree/main/docs/source/developers/cpp) and build the integration test binaries with a command like this:
 
 ```
 # build cpp binaries
diff --git a/arrow-integration-testing/src/bin/arrow-file-to-stream.rs b/arrow-integration-testing/src/bin/arrow-file-to-stream.rs
index 3e027faef91f..661f0a047db4 100644
--- a/arrow-integration-testing/src/bin/arrow-file-to-stream.rs
+++ b/arrow-integration-testing/src/bin/arrow-file-to-stream.rs
@@ -15,6 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
+// The unused_crate_dependencies lint does not work well for crates defining additional examples/bin targets
+#![allow(unused_crate_dependencies)]
+
 use arrow::error::Result;
 use arrow::ipc::reader::FileReader;
 use arrow::ipc::writer::StreamWriter;
diff --git a/arrow-integration-testing/src/bin/arrow-json-integration-test.rs b/arrow-integration-testing/src/bin/arrow-json-integration-test.rs
index cc3dd2110e36..6a901cc63bab 100644
--- a/arrow-integration-testing/src/bin/arrow-json-integration-test.rs
+++ b/arrow-integration-testing/src/bin/arrow-json-integration-test.rs
@@ -15,6 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
+// The unused_crate_dependencies lint does not work well for crates defining additional examples/bin targets
+#![allow(unused_crate_dependencies)]
+
 use arrow::error::{ArrowError, Result};
 use arrow::ipc::reader::FileReader;
 use arrow::ipc::writer::FileWriter;
diff --git a/arrow-integration-testing/src/bin/arrow-stream-to-file.rs b/arrow-integration-testing/src/bin/arrow-stream-to-file.rs
index 07ac5c7ddd42..8b4bb332781c 100644
--- a/arrow-integration-testing/src/bin/arrow-stream-to-file.rs
+++ b/arrow-integration-testing/src/bin/arrow-stream-to-file.rs
@@ -15,6 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
+// The unused_crate_dependencies lint does not work well for crates defining additional examples/bin targets
+#![allow(unused_crate_dependencies)]
+
 use std::io;
 
 use arrow::error::Result;
diff --git a/arrow-integration-testing/src/bin/flight-test-integration-client.rs b/arrow-integration-testing/src/bin/flight-test-integration-client.rs
index b8bbb952837b..0d16fe3b403f 100644
--- a/arrow-integration-testing/src/bin/flight-test-integration-client.rs
+++ b/arrow-integration-testing/src/bin/flight-test-integration-client.rs
@@ -15,6 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
+// The unused_crate_dependencies lint does not work well for crates defining additional examples/bin targets
+#![allow(unused_crate_dependencies)]
+
 use arrow_integration_testing::flight_client_scenarios;
 use clap::Parser;
 type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
diff --git a/arrow-integration-testing/src/bin/flight-test-integration-server.rs b/arrow-integration-testing/src/bin/flight-test-integration-server.rs
index 5310d07d4f8e..94be71309799 100644
--- a/arrow-integration-testing/src/bin/flight-test-integration-server.rs
+++ b/arrow-integration-testing/src/bin/flight-test-integration-server.rs
@@ -15,6 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
+// The unused_crate_dependencies lint does not work well for crates defining additional examples/bin targets
+#![allow(unused_crate_dependencies)]
+
 use arrow_integration_testing::flight_server_scenarios;
 use clap::Parser;
 
diff --git a/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs b/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs
index c8289ff446a0..406419028d00 100644
--- a/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs
+++ b/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs
@@ -29,7 +29,7 @@ use arrow::{
 };
 use arrow_flight::{
     flight_descriptor::DescriptorType, flight_service_client::FlightServiceClient,
-    utils::flight_data_to_arrow_batch, FlightData, FlightDescriptor, Location, SchemaAsIpc, Ticket,
+    utils::flight_data_to_arrow_batch, FlightData, FlightDescriptor, IpcMessage, Location, Ticket,
 };
 use futures::{channel::mpsc, sink::SinkExt, stream, StreamExt};
 use tonic::{Request, Streaming};
@@ -72,7 +72,20 @@ async fn upload_data(
     let (mut upload_tx, upload_rx) = mpsc::channel(10);
 
     let options = arrow::ipc::writer::IpcWriteOptions::default();
-    let mut schema_flight_data: FlightData = SchemaAsIpc::new(&schema, &options).into();
+    #[allow(deprecated)]
+    let mut dict_tracker =
+        writer::DictionaryTracker::new_with_preserve_dict_id(false, options.preserve_dict_id());
+    let data_gen = writer::IpcDataGenerator::default();
+    let data = IpcMessage(
+        data_gen
+            .schema_to_bytes_with_dictionary_tracker(&schema, &mut dict_tracker, &options)
+            .ipc_message
+            .into(),
+    );
+    let mut schema_flight_data = FlightData {
+        data_header: data.0,
+        ..Default::default()
+    };
     // arrow_flight::utils::flight_data_from_arrow_schema(&schema, &options);
     schema_flight_data.flight_descriptor = Some(descriptor.clone());
     upload_tx.send(schema_flight_data).await?;
@@ -82,7 +95,14 @@ async fn upload_data(
     if let Some((counter, first_batch)) = original_data_iter.next() {
         let metadata = counter.to_string().into_bytes();
         // Preload the first batch into the channel before starting the request
-        send_batch(&mut upload_tx, &metadata, first_batch, &options).await?;
+        send_batch(
+            &mut upload_tx,
+            &metadata,
+            first_batch,
+            &options,
+            &mut dict_tracker,
+        )
+        .await?;
 
         let outer = client.do_put(Request::new(upload_rx)).await?;
         let mut inner = outer.into_inner();
@@ -97,7 +117,14 @@ async fn upload_data(
         // Stream the rest of the batches
         for (counter, batch) in original_data_iter {
             let metadata = counter.to_string().into_bytes();
-            send_batch(&mut upload_tx, &metadata, batch, &options).await?;
+            send_batch(
+                &mut upload_tx,
+                &metadata,
+                batch,
+                &options,
+                &mut dict_tracker,
+            )
+            .await?;
 
             let r = inner
                 .next()
@@ -124,12 +151,12 @@ async fn send_batch(
     metadata: &[u8],
     batch: &RecordBatch,
     options: &writer::IpcWriteOptions,
+    dictionary_tracker: &mut writer::DictionaryTracker,
 ) -> Result {
     let data_gen = writer::IpcDataGenerator::default();
-    let mut dictionary_tracker = writer::DictionaryTracker::new_with_preserve_dict_id(false, true);
 
     let (encoded_dictionaries, encoded_batch) = data_gen
-        .encoded_batch(batch, &mut dictionary_tracker, options)
+        .encoded_batch(batch, dictionary_tracker, options)
         .expect("DictionaryTracker configured above to not error on replacement");
 
     let dictionary_flight_data: Vec<FlightData> =
diff --git a/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs b/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs
index 0f404b2ae289..92989a20393e 100644
--- a/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs
+++ b/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs
@@ -119,18 +119,32 @@ impl FlightService for FlightServiceImpl {
             .ok_or_else(|| Status::not_found(format!("Could not find flight. {key}")))?;
 
         let options = arrow::ipc::writer::IpcWriteOptions::default();
+        #[allow(deprecated)]
+        let mut dictionary_tracker =
+            writer::DictionaryTracker::new_with_preserve_dict_id(false, options.preserve_dict_id());
+        let data_gen = writer::IpcDataGenerator::default();
+        let data = IpcMessage(
+            data_gen
+                .schema_to_bytes_with_dictionary_tracker(
+                    &flight.schema,
+                    &mut dictionary_tracker,
+                    &options,
+                )
+                .ipc_message
+                .into(),
+        );
+        let schema_flight_data = FlightData {
+            data_header: data.0,
+            ..Default::default()
+        };
 
-        let schema = std::iter::once(Ok(SchemaAsIpc::new(&flight.schema, &options).into()));
+        let schema = std::iter::once(Ok(schema_flight_data));
 
         let batches = flight
             .chunks
             .iter()
             .enumerate()
             .flat_map(|(counter, batch)| {
-                let data_gen = writer::IpcDataGenerator::default();
-                let mut dictionary_tracker =
-                    writer::DictionaryTracker::new_with_preserve_dict_id(false, true);
-
                 let (encoded_dictionaries, encoded_batch) = data_gen
                     .encoded_batch(batch, &mut dictionary_tracker, &options)
                     .expect("DictionaryTracker configured above to not error on replacement");
diff --git a/arrow-integration-testing/src/lib.rs b/arrow-integration-testing/src/lib.rs
index c8ce01e9f13b..e669690ef4f5 100644
--- a/arrow-integration-testing/src/lib.rs
+++ b/arrow-integration-testing/src/lib.rs
@@ -17,6 +17,8 @@
 
 //! Common code used in the integration test binaries
 
+// The unused_crate_dependencies lint does not work well for crates defining additional examples/bin targets
+#![allow(unused_crate_dependencies)]
 #![warn(missing_docs)]
 use serde_json::Value;
 
diff --git a/arrow-ipc/Cargo.toml b/arrow-ipc/Cargo.toml
index 94b89a55f2fb..cf91b3a3415f 100644
--- a/arrow-ipc/Cargo.toml
+++ b/arrow-ipc/Cargo.toml
@@ -36,7 +36,6 @@ bench = false
 [dependencies]
 arrow-array = { workspace = true }
 arrow-buffer = { workspace = true }
-arrow-cast = { workspace = true }
 arrow-data = { workspace = true }
 arrow-schema = { workspace = true }
 flatbuffers = { version = "24.3.25", default-features = false }
diff --git a/arrow-ipc/LICENSE.txt b/arrow-ipc/LICENSE.txt
new file mode 120000
index 000000000000..4ab43736a839
--- /dev/null
+++ b/arrow-ipc/LICENSE.txt
@@ -0,0 +1 @@
+../LICENSE.txt
\ No newline at end of file
diff --git a/arrow-ipc/NOTICE.txt b/arrow-ipc/NOTICE.txt
new file mode 120000
index 000000000000..eb9f24e040b5
--- /dev/null
+++ b/arrow-ipc/NOTICE.txt
@@ -0,0 +1 @@
+../NOTICE.txt
\ No newline at end of file
diff --git a/arrow-ipc/src/convert.rs b/arrow-ipc/src/convert.rs
index 18f5193bf038..37c5a19439c1 100644
--- a/arrow-ipc/src/convert.rs
+++ b/arrow-ipc/src/convert.rs
@@ -165,6 +165,7 @@ pub fn schema_to_fb_offset<'a>(
 impl From<crate::Field<'_>> for Field {
     fn from(field: crate::Field) -> Field {
         let arrow_field = if let Some(dictionary) = field.dictionary() {
+            #[allow(deprecated)]
             Field::new_dict(
                 field.name().unwrap(),
                 get_data_type(field, true),
@@ -519,6 +520,7 @@ pub(crate) fn build_field<'a>(
         match dictionary_tracker {
             Some(tracker) => Some(get_fb_dictionary(
                 index_type,
+                #[allow(deprecated)]
                 tracker.set_dict_id(field),
                 field
                     .dict_is_ordered()
@@ -527,6 +529,7 @@ pub(crate) fn build_field<'a>(
             )),
             None => Some(get_fb_dictionary(
                 index_type,
+                #[allow(deprecated)]
                 field
                     .dict_id()
                     .expect("Dictionary type must have a dictionary id"),
@@ -1026,10 +1029,14 @@ mod tests {
                 Field::new("utf8_view", DataType::Utf8View, false),
                 Field::new("binary", DataType::Binary, false),
                 Field::new("binary_view", DataType::BinaryView, false),
-                Field::new_list("list[u8]", Field::new("item", DataType::UInt8, false), true),
+                Field::new_list(
+                    "list[u8]",
+                    Field::new_list_field(DataType::UInt8, false),
+                    true,
+                ),
                 Field::new_fixed_size_list(
                     "fixed_size_list[u8]",
-                    Field::new("item", DataType::UInt8, false),
+                    Field::new_list_field(DataType::UInt8, false),
                     2,
                     true,
                 ),
@@ -1139,6 +1146,7 @@ mod tests {
                     ),
                     true,
                 ),
+                #[allow(deprecated)]
                 Field::new_dict(
                     "dictionary<int32, utf8>",
                     DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
@@ -1146,6 +1154,7 @@ mod tests {
                     123,
                     true,
                 ),
+                #[allow(deprecated)]
                 Field::new_dict(
                     "dictionary<uint8, uint32>",
                     DataType::Dictionary(Box::new(DataType::UInt8), Box::new(DataType::UInt32)),
diff --git a/arrow-ipc/src/reader.rs b/arrow-ipc/src/reader.rs
index dcded32882fc..9ff4da30ed8c 100644
--- a/arrow-ipc/src/reader.rs
+++ b/arrow-ipc/src/reader.rs
@@ -196,6 +196,7 @@ fn create_array(
             let index_node = reader.next_node(field)?;
             let index_buffers = [reader.next_buffer()?, reader.next_buffer()?];
 
+            #[allow(deprecated)]
             let dict_id = field.dict_id().ok_or_else(|| {
                 ArrowError::ParseError(format!("Field {field} does not have dict id"))
             })?;
@@ -617,6 +618,7 @@ fn read_dictionary_impl(
     }
 
     let id = batch.id();
+    #[allow(deprecated)]
     let fields_using_this_dictionary = schema.fields_with_dict_id(id);
     let first_field = fields_using_this_dictionary.first().ok_or_else(|| {
         ArrowError::InvalidArgumentError(format!("dictionary id {id} not found in schema"))
@@ -1407,10 +1409,10 @@ mod tests {
 
     fn create_test_projection_schema() -> Schema {
         // define field types
-        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
+        let list_data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
 
         let fixed_size_list_data_type =
-            DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, false)), 3);
+            DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, false)), 3);
 
         let union_fields = UnionFields::new(
             vec![0, 1],
@@ -1424,7 +1426,7 @@ mod tests {
 
         let struct_fields = Fields::from(vec![
             Field::new("id", DataType::Int32, false),
-            Field::new_list("list", Field::new("item", DataType::Int8, true), false),
+            Field::new_list("list", Field::new_list_field(DataType::Int8, true), false),
         ]);
         let struct_data_type = DataType::Struct(struct_fields);
 
@@ -1725,6 +1727,7 @@ mod tests {
         let mut writer = crate::writer::FileWriter::try_new_with_options(
             &mut buf,
             batch.schema_ref(),
+            #[allow(deprecated)]
             IpcWriteOptions::default().with_preserve_dict_id(false),
         )
         .unwrap();
@@ -1778,7 +1781,7 @@ mod tests {
 
     #[test]
     fn test_roundtrip_struct_empty_fields() {
-        let nulls = NullBuffer::from(&[true, true, false][..]);
+        let nulls = NullBuffer::from(&[true, true, false]);
         let rb = RecordBatch::try_from_iter([(
             "",
             Arc::new(StructArray::new_empty_fields(nulls.len(), Some(nulls))) as _,
@@ -1869,6 +1872,7 @@ mod tests {
         let key_dict_keys = Int8Array::from_iter_values([0, 0, 2, 1, 1, 3]);
         let key_dict_array = DictionaryArray::new(key_dict_keys, values);
 
+        #[allow(deprecated)]
         let keys_field = Arc::new(Field::new_dict(
             "keys",
             DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8)),
@@ -1876,6 +1880,7 @@ mod tests {
             1,
             false,
         ));
+        #[allow(deprecated)]
         let values_field = Arc::new(Field::new_dict(
             "values",
             DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8)),
@@ -1956,6 +1961,7 @@ mod tests {
     #[test]
     fn test_roundtrip_stream_dict_of_list_of_dict() {
         // list
+        #[allow(deprecated)]
         let list_data_type = DataType::List(Arc::new(Field::new_dict(
             "item",
             DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8)),
@@ -1967,6 +1973,7 @@ mod tests {
         test_roundtrip_stream_dict_of_list_of_dict_impl::<i32, i32>(list_data_type, offsets);
 
         // large list
+        #[allow(deprecated)]
         let list_data_type = DataType::LargeList(Arc::new(Field::new_dict(
             "item",
             DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8)),
@@ -1985,6 +1992,7 @@ mod tests {
         let dict_array = DictionaryArray::new(keys, Arc::new(values));
         let dict_data = dict_array.into_data();
 
+        #[allow(deprecated)]
         let list_data_type = DataType::FixedSizeList(
             Arc::new(Field::new_dict(
                 "item",
@@ -2075,6 +2083,7 @@ mod tests {
 
         let key_dict_keys = Int8Array::from_iter_values([0, 0, 1, 2, 0, 1, 3]);
         let key_dict_array = DictionaryArray::new(key_dict_keys, utf8_view_array.clone());
+        #[allow(deprecated)]
         let keys_field = Arc::new(Field::new_dict(
             "keys",
             DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8View)),
@@ -2085,6 +2094,7 @@ mod tests {
 
         let value_dict_keys = Int8Array::from_iter_values([0, 3, 0, 1, 2, 0, 1]);
         let value_dict_array = DictionaryArray::new(value_dict_keys, bin_view_array);
+        #[allow(deprecated)]
         let values_field = Arc::new(Field::new_dict(
             "values",
             DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::BinaryView)),
@@ -2150,6 +2160,7 @@ mod tests {
         .unwrap();
 
         let gen = IpcDataGenerator {};
+        #[allow(deprecated)]
         let mut dict_tracker = DictionaryTracker::new_with_preserve_dict_id(false, true);
         let (_, encoded) = gen
             .encoded_batch(&batch, &mut dict_tracker, &Default::default())
@@ -2187,6 +2198,7 @@ mod tests {
         .unwrap();
 
         let gen = IpcDataGenerator {};
+        #[allow(deprecated)]
         let mut dict_tracker = DictionaryTracker::new_with_preserve_dict_id(false, true);
         let (_, encoded) = gen
             .encoded_batch(&batch, &mut dict_tracker, &Default::default())
@@ -2326,6 +2338,7 @@ mod tests {
                 ["a", "b"]
                     .iter()
                     .map(|name| {
+                        #[allow(deprecated)]
                         Field::new_dict(
                             name.to_string(),
                             DataType::Dictionary(
@@ -2360,6 +2373,7 @@ mod tests {
             let mut writer = crate::writer::StreamWriter::try_new_with_options(
                 &mut buf,
                 batch.schema().as_ref(),
+                #[allow(deprecated)]
                 crate::writer::IpcWriteOptions::default().with_preserve_dict_id(false),
             )
             .expect("Failed to create StreamWriter");
diff --git a/arrow-ipc/src/reader/stream.rs b/arrow-ipc/src/reader/stream.rs
index de5f5bdd629f..9b0eea9b6198 100644
--- a/arrow-ipc/src/reader/stream.rs
+++ b/arrow-ipc/src/reader/stream.rs
@@ -324,6 +324,7 @@ mod tests {
             "test1",
             DataType::RunEndEncoded(
                 Arc::new(Field::new("run_ends".to_string(), DataType::Int32, false)),
+                #[allow(deprecated)]
                 Arc::new(Field::new_dict(
                     "values".to_string(),
                     DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
@@ -353,6 +354,7 @@ mod tests {
             let mut writer = StreamWriter::try_new_with_options(
                 &mut buffer,
                 &schema,
+                #[allow(deprecated)]
                 IpcWriteOptions::default().with_preserve_dict_id(false),
             )
             .expect("Failed to create StreamWriter");
diff --git a/arrow-ipc/src/writer.rs b/arrow-ipc/src/writer.rs
index e6fc9d81df67..ee5b9a54cc90 100644
--- a/arrow-ipc/src/writer.rs
+++ b/arrow-ipc/src/writer.rs
@@ -23,6 +23,7 @@
 use std::cmp::min;
 use std::collections::HashMap;
 use std::io::{BufWriter, Write};
+use std::mem::size_of;
 use std::sync::Arc;
 
 use flatbuffers::FlatBufferBuilder;
@@ -63,7 +64,11 @@ pub struct IpcWriteOptions {
     /// Flag indicating whether the writer should preserve the dictionary IDs defined in the
     /// schema or generate unique dictionary IDs internally during encoding.
     ///
-    /// Defaults to `true`
+    /// Defaults to `false`
+    #[deprecated(
+        since = "54.0.0",
+        note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
+    )]
     preserve_dict_id: bool,
 }
 
@@ -107,12 +112,13 @@ impl IpcWriteOptions {
             | crate::MetadataVersion::V3 => Err(ArrowError::InvalidArgumentError(
                 "Writing IPC metadata version 3 and lower not supported".to_string(),
             )),
+            #[allow(deprecated)]
             crate::MetadataVersion::V4 => Ok(Self {
                 alignment,
                 write_legacy_ipc_format,
                 metadata_version,
                 batch_compression_type: None,
-                preserve_dict_id: true,
+                preserve_dict_id: false,
             }),
             crate::MetadataVersion::V5 => {
                 if write_legacy_ipc_format {
@@ -120,12 +126,13 @@ impl IpcWriteOptions {
                         "Legacy IPC format only supported on metadata version 4".to_string(),
                     ))
                 } else {
+                    #[allow(deprecated)]
                     Ok(Self {
                         alignment,
                         write_legacy_ipc_format,
                         metadata_version,
                         batch_compression_type: None,
-                        preserve_dict_id: true,
+                        preserve_dict_id: false,
                     })
                 }
             }
@@ -137,7 +144,12 @@ impl IpcWriteOptions {
 
     /// Return whether the writer is configured to preserve the dictionary IDs
     /// defined in the schema
+    #[deprecated(
+        since = "54.0.0",
+        note = "The ability to preserve dictionary IDs will be removed. With it, all functions related to it."
+    )]
     pub fn preserve_dict_id(&self) -> bool {
+        #[allow(deprecated)]
         self.preserve_dict_id
     }
 
@@ -148,6 +160,11 @@ impl IpcWriteOptions {
     /// to the dictionary batches in order to encode them correctly
     ///
     /// The default will change to `false`  in future releases
+    #[deprecated(
+        since = "54.0.0",
+        note = "The ability to preserve dictionary IDs will be removed. With it, all functions related to it."
+    )]
+    #[allow(deprecated)]
     pub fn with_preserve_dict_id(mut self, preserve_dict_id: bool) -> Self {
         self.preserve_dict_id = preserve_dict_id;
         self
@@ -156,12 +173,13 @@ impl IpcWriteOptions {
 
 impl Default for IpcWriteOptions {
     fn default() -> Self {
+        #[allow(deprecated)]
         Self {
             alignment: 64,
             write_legacy_ipc_format: false,
             metadata_version: crate::MetadataVersion::V5,
             batch_compression_type: None,
-            preserve_dict_id: true,
+            preserve_dict_id: false,
         }
     }
 }
@@ -419,6 +437,7 @@ impl IpcDataGenerator {
                 // It's importnat to only take the dict_id at this point, because the dict ID
                 // sequence is assigned depth-first, so we need to first encode children and have
                 // them take their assigned dict IDs before we take the dict ID for this field.
+                #[allow(deprecated)]
                 let dict_id = dict_id_seq
                     .next()
                     .or_else(|| field.dict_id())
@@ -766,6 +785,10 @@ pub struct DictionaryTracker {
     written: HashMap<i64, ArrayData>,
     dict_ids: Vec<i64>,
     error_on_replacement: bool,
+    #[deprecated(
+        since = "54.0.0",
+        note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
+    )]
     preserve_dict_id: bool,
 }
 
@@ -781,11 +804,12 @@ impl DictionaryTracker {
     /// the last seen dictionary ID (or using `0` if no other dictionary IDs have been
     /// seen)
     pub fn new(error_on_replacement: bool) -> Self {
+        #[allow(deprecated)]
         Self {
             written: HashMap::new(),
             dict_ids: Vec::new(),
             error_on_replacement,
-            preserve_dict_id: true,
+            preserve_dict_id: false,
         }
     }
 
@@ -794,7 +818,12 @@ impl DictionaryTracker {
     /// If `error_on_replacement`
     /// is true, an error will be generated if an update to an
     /// existing dictionary is attempted.
+    #[deprecated(
+        since = "54.0.0",
+        note = "The ability to preserve dictionary IDs will be removed. With it, all functions related to it."
+    )]
     pub fn new_with_preserve_dict_id(error_on_replacement: bool, preserve_dict_id: bool) -> Self {
+        #[allow(deprecated)]
         Self {
             written: HashMap::new(),
             dict_ids: Vec::new(),
@@ -810,8 +839,14 @@ impl DictionaryTracker {
     ///
     /// If `preserve_dict_id` is false, this will return the value of the last `dict_id` assigned incremented by 1
     /// or 0 in the case where no dictionary IDs have yet been assigned
+    #[deprecated(
+        since = "54.0.0",
+        note = "The ability to preserve dictionary IDs will be removed. With it, all functions related to it."
+    )]
     pub fn set_dict_id(&mut self, field: &Field) -> i64 {
+        #[allow(deprecated)]
         let next = if self.preserve_dict_id {
+            #[allow(deprecated)]
             field.dict_id().expect("no dict_id in field")
         } else {
             self.dict_ids
@@ -935,7 +970,9 @@ impl<W: Write> FileWriter<W> {
         writer.write_all(&super::ARROW_MAGIC)?;
         writer.write_all(&PADDING[..pad_len])?;
         // write the schema, set the written bytes to the schema + header
+        #[allow(deprecated)]
         let preserve_dict_id = write_options.preserve_dict_id;
+        #[allow(deprecated)]
         let mut dictionary_tracker =
             DictionaryTracker::new_with_preserve_dict_id(true, preserve_dict_id);
         let encoded_message = data_gen.schema_to_bytes_with_dictionary_tracker(
@@ -1012,7 +1049,9 @@ impl<W: Write> FileWriter<W> {
         let mut fbb = FlatBufferBuilder::new();
         let dictionaries = fbb.create_vector(&self.dictionary_blocks);
         let record_batches = fbb.create_vector(&self.record_blocks);
+        #[allow(deprecated)]
         let preserve_dict_id = self.write_options.preserve_dict_id;
+        #[allow(deprecated)]
         let mut dictionary_tracker =
             DictionaryTracker::new_with_preserve_dict_id(true, preserve_dict_id);
         let schema = IpcSchemaEncoder::new()
@@ -1143,7 +1182,9 @@ impl<W: Write> StreamWriter<W> {
         write_options: IpcWriteOptions,
     ) -> Result<Self, ArrowError> {
         let data_gen = IpcDataGenerator::default();
+        #[allow(deprecated)]
         let preserve_dict_id = write_options.preserve_dict_id;
+        #[allow(deprecated)]
         let mut dictionary_tracker =
             DictionaryTracker::new_with_preserve_dict_id(false, preserve_dict_id);
 
@@ -1430,7 +1471,13 @@ fn reencode_offsets<O: OffsetSizeTrait>(
     let end_offset = offset_slice.last().unwrap();
 
     let offsets = match start_offset.as_usize() {
-        0 => offsets.clone(),
+        0 => {
+            let size = size_of::<O>();
+            offsets.slice_with_length(
+                data.offset() * size,
+                (data.offset() + data.len() + 1) * size,
+            )
+        }
         _ => offset_slice.iter().map(|x| *x - *start_offset).collect(),
     };
 
@@ -2025,6 +2072,7 @@ mod tests {
         let array = Arc::new(inner) as ArrayRef;
 
         // Dict field with id 2
+        #[allow(deprecated)]
         let dctfield = Field::new_dict("dict", array.data_type().clone(), false, 2, false);
         let union_fields = [(0, Arc::new(dctfield))].into_iter().collect();
 
@@ -2042,6 +2090,7 @@ mod tests {
         let batch = RecordBatch::try_new(schema, vec![Arc::new(union)]).unwrap();
 
         let gen = IpcDataGenerator {};
+        #[allow(deprecated)]
         let mut dict_tracker = DictionaryTracker::new_with_preserve_dict_id(false, true);
         gen.encoded_batch(&batch, &mut dict_tracker, &Default::default())
             .unwrap();
@@ -2058,6 +2107,7 @@ mod tests {
         let array = Arc::new(inner) as ArrayRef;
 
         // Dict field with id 2
+        #[allow(deprecated)]
         let dctfield = Arc::new(Field::new_dict(
             "dict",
             array.data_type().clone(),
@@ -2078,6 +2128,7 @@ mod tests {
         let batch = RecordBatch::try_new(schema, vec![struct_array]).unwrap();
 
         let gen = IpcDataGenerator {};
+        #[allow(deprecated)]
         let mut dict_tracker = DictionaryTracker::new_with_preserve_dict_id(false, true);
         gen.encoded_batch(&batch, &mut dict_tracker, &Default::default())
             .unwrap();
@@ -2517,6 +2568,36 @@ mod tests {
         ls.finish()
     }
 
+    fn generate_nested_list_data_starting_at_zero<O: OffsetSizeTrait>() -> GenericListArray<O> {
+        let mut ls =
+            GenericListBuilder::<O, _>::new(GenericListBuilder::<O, _>::new(UInt32Builder::new()));
+
+        for _i in 0..999 {
+            ls.values().append(true);
+            ls.append(true);
+        }
+
+        for j in 0..10 {
+            for value in [j, j, j, j] {
+                ls.values().values().append_value(value);
+            }
+            ls.values().append(true)
+        }
+        ls.append(true);
+
+        for i in 0..9_000 {
+            for j in 0..10 {
+                for value in [i + j, i + j, i + j, i + j] {
+                    ls.values().values().append_value(value);
+                }
+                ls.values().append(true)
+            }
+            ls.append(true);
+        }
+
+        ls.finish()
+    }
+
     fn generate_map_array_data() -> MapArray {
         let keys_builder = UInt32Builder::new();
         let values_builder = UInt32Builder::new();
@@ -2556,7 +2637,7 @@ mod tests {
 
     #[test]
     fn encode_lists() {
-        let val_inner = Field::new("item", DataType::UInt32, true);
+        let val_inner = Field::new_list_field(DataType::UInt32, true);
         let val_list_field = Field::new("val", DataType::List(Arc::new(val_inner)), false);
         let schema = Arc::new(Schema::new(vec![val_list_field]));
 
@@ -2568,7 +2649,7 @@ mod tests {
 
     #[test]
     fn encode_empty_list() {
-        let val_inner = Field::new("item", DataType::UInt32, true);
+        let val_inner = Field::new_list_field(DataType::UInt32, true);
         let val_list_field = Field::new("val", DataType::List(Arc::new(val_inner)), false);
         let schema = Arc::new(Schema::new(vec![val_list_field]));
 
@@ -2583,7 +2664,7 @@ mod tests {
 
     #[test]
     fn encode_large_lists() {
-        let val_inner = Field::new("item", DataType::UInt32, true);
+        let val_inner = Field::new_list_field(DataType::UInt32, true);
         let val_list_field = Field::new("val", DataType::LargeList(Arc::new(val_inner)), false);
         let schema = Arc::new(Schema::new(vec![val_list_field]));
 
@@ -2597,8 +2678,8 @@ mod tests {
 
     #[test]
     fn encode_nested_lists() {
-        let inner_int = Arc::new(Field::new("item", DataType::UInt32, true));
-        let inner_list_field = Arc::new(Field::new("item", DataType::List(inner_int), true));
+        let inner_int = Arc::new(Field::new_list_field(DataType::UInt32, true));
+        let inner_list_field = Arc::new(Field::new_list_field(DataType::List(inner_int), true));
         let list_field = Field::new("val", DataType::List(inner_list_field), true);
         let schema = Arc::new(Schema::new(vec![list_field]));
 
@@ -2608,6 +2689,19 @@ mod tests {
         roundtrip_ensure_sliced_smaller(in_batch, 1000);
     }
 
+    #[test]
+    fn encode_nested_lists_starting_at_zero() {
+        let inner_int = Arc::new(Field::new("item", DataType::UInt32, true));
+        let inner_list_field = Arc::new(Field::new("item", DataType::List(inner_int), true));
+        let list_field = Field::new("val", DataType::List(inner_list_field), true);
+        let schema = Arc::new(Schema::new(vec![list_field]));
+
+        let values = Arc::new(generate_nested_list_data_starting_at_zero::<i32>());
+
+        let in_batch = RecordBatch::try_new(schema, vec![values]).unwrap();
+        roundtrip_ensure_sliced_smaller(in_batch, 1);
+    }
+
     #[test]
     fn encode_map_array() {
         let keys = Arc::new(Field::new("keys", DataType::UInt32, false));
diff --git a/arrow-json/Cargo.toml b/arrow-json/Cargo.toml
index 517bb03d2064..564cb9433b3d 100644
--- a/arrow-json/Cargo.toml
+++ b/arrow-json/Cargo.toml
@@ -48,7 +48,6 @@ chrono = { workspace = true }
 lexical-core = { version = "1.0", default-features = false}
 
 [dev-dependencies]
-tempfile = "3.3"
 flate2 = { version = "1", default-features = false, features = ["rust_backend"] }
 serde = { version = "1.0", default-features = false, features = ["derive"] }
 futures = "0.3"
diff --git a/arrow-json/LICENSE.txt b/arrow-json/LICENSE.txt
new file mode 120000
index 000000000000..4ab43736a839
--- /dev/null
+++ b/arrow-json/LICENSE.txt
@@ -0,0 +1 @@
+../LICENSE.txt
\ No newline at end of file
diff --git a/arrow-json/NOTICE.txt b/arrow-json/NOTICE.txt
new file mode 120000
index 000000000000..eb9f24e040b5
--- /dev/null
+++ b/arrow-json/NOTICE.txt
@@ -0,0 +1 @@
+../NOTICE.txt
\ No newline at end of file
diff --git a/arrow-json/src/reader/mod.rs b/arrow-json/src/reader/mod.rs
index bcacf6f706b8..f857e8813c7e 100644
--- a/arrow-json/src/reader/mod.rs
+++ b/arrow-json/src/reader/mod.rs
@@ -244,13 +244,6 @@ impl ReaderBuilder {
         Self { batch_size, ..self }
     }
 
-    /// Sets if the decoder should coerce primitive values (bool and number) into string
-    /// when the Schema's column is Utf8 or LargeUtf8.
-    #[deprecated(note = "Use with_coerce_primitive")]
-    pub fn coerce_primitive(self, coerce_primitive: bool) -> Self {
-        self.with_coerce_primitive(coerce_primitive)
-    }
-
     /// Sets if the decoder should coerce primitive values (bool and number) into string
     /// when the Schema's column is Utf8 or LargeUtf8.
     pub fn with_coerce_primitive(self, coerce_primitive: bool) -> Self {
@@ -691,6 +684,10 @@ fn make_decoder(
         DataType::Time32(TimeUnit::Millisecond) => primitive_decoder!(Time32MillisecondType, data_type),
         DataType::Time64(TimeUnit::Microsecond) => primitive_decoder!(Time64MicrosecondType, data_type),
         DataType::Time64(TimeUnit::Nanosecond) => primitive_decoder!(Time64NanosecondType, data_type),
+        DataType::Duration(TimeUnit::Nanosecond) => primitive_decoder!(DurationNanosecondType, data_type),
+        DataType::Duration(TimeUnit::Microsecond) => primitive_decoder!(DurationMicrosecondType, data_type),
+        DataType::Duration(TimeUnit::Millisecond) => primitive_decoder!(DurationMillisecondType, data_type),
+        DataType::Duration(TimeUnit::Second) => primitive_decoder!(DurationSecondType, data_type),
         DataType::Decimal128(p, s) => Ok(Box::new(DecimalArrayDecoder::<Decimal128Type>::new(p, s))),
         DataType::Decimal256(p, s) => Ok(Box::new(DecimalArrayDecoder::<Decimal256Type>::new(p, s))),
         DataType::Boolean => Ok(Box::<BooleanArrayDecoder>::default()),
@@ -1330,6 +1327,37 @@ mod tests {
         test_time::<Time64NanosecondType>();
     }
 
+    fn test_duration<T: ArrowTemporalType>() {
+        let buf = r#"
+        {"a": 1, "b": "2"}
+        {"a": 3, "b": null}
+        "#;
+
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", T::DATA_TYPE, true),
+            Field::new("b", T::DATA_TYPE, true),
+        ]));
+
+        let batches = do_read(buf, 1024, true, false, schema);
+        assert_eq!(batches.len(), 1);
+
+        let col_a = batches[0].column_by_name("a").unwrap().as_primitive::<T>();
+        assert_eq!(col_a.null_count(), 0);
+        assert_eq!(col_a.values(), &[1, 3].map(T::Native::usize_as));
+
+        let col2 = batches[0].column_by_name("b").unwrap().as_primitive::<T>();
+        assert_eq!(col2.null_count(), 1);
+        assert_eq!(col2.values(), &[2, 0].map(T::Native::usize_as));
+    }
+
+    #[test]
+    fn test_durations() {
+        test_duration::<DurationNanosecondType>();
+        test_duration::<DurationMicrosecondType>();
+        test_duration::<DurationMillisecondType>();
+        test_duration::<DurationSecondType>();
+    }
+
     #[test]
     fn test_delta_checkpoint() {
         let json = "{\"protocol\":{\"minReaderVersion\":1,\"minWriterVersion\":2}}";
@@ -1726,12 +1754,12 @@ mod tests {
         assert_eq!(&DataType::Int64, a.1.data_type());
         let b = schema.column_with_name("b").unwrap();
         assert_eq!(
-            &DataType::List(Arc::new(Field::new("item", DataType::Float64, true))),
+            &DataType::List(Arc::new(Field::new_list_field(DataType::Float64, true))),
             b.1.data_type()
         );
         let c = schema.column_with_name("c").unwrap();
         assert_eq!(
-            &DataType::List(Arc::new(Field::new("item", DataType::Boolean, true))),
+            &DataType::List(Arc::new(Field::new_list_field(DataType::Boolean, true))),
             c.1.data_type()
         );
         let d = schema.column_with_name("d").unwrap();
@@ -1770,7 +1798,7 @@ mod tests {
 
         let schema = Arc::new(Schema::new(vec![Field::new(
             "items",
-            DataType::List(FieldRef::new(Field::new("item", DataType::Null, true))),
+            DataType::List(FieldRef::new(Field::new_list_field(DataType::Null, true))),
             true,
         )]));
 
@@ -1794,9 +1822,8 @@ mod tests {
 
         let schema = Arc::new(Schema::new(vec![Field::new(
             "items",
-            DataType::List(FieldRef::new(Field::new(
-                "item",
-                DataType::List(FieldRef::new(Field::new("item", DataType::Null, true))),
+            DataType::List(FieldRef::new(Field::new_list_field(
+                DataType::List(FieldRef::new(Field::new_list_field(DataType::Null, true))),
                 true,
             ))),
             true,
diff --git a/arrow-json/src/reader/schema.rs b/arrow-json/src/reader/schema.rs
index ace7b0ea5cb6..07eb40106de0 100644
--- a/arrow-json/src/reader/schema.rs
+++ b/arrow-json/src/reader/schema.rs
@@ -77,7 +77,7 @@ impl InferredType {
 
 /// Shorthand for building list data type of `ty`
 fn list_type_of(ty: DataType) -> DataType {
-    DataType::List(Arc::new(Field::new("item", ty, true)))
+    DataType::List(Arc::new(Field::new_list_field(ty, true)))
 }
 
 /// Coerce data type during inference
diff --git a/arrow-json/src/writer/mod.rs b/arrow-json/src/writer/mod.rs
index a37aa5ff8c2c..ee6d83a0a1f0 100644
--- a/arrow-json/src/writer/mod.rs
+++ b/arrow-json/src/writer/mod.rs
@@ -1771,7 +1771,7 @@ mod tests {
     #[test]
     fn test_writer_fixed_size_list() {
         let size = 3;
-        let field = FieldRef::new(Field::new("item", DataType::Int32, true));
+        let field = FieldRef::new(Field::new_list_field(DataType::Int32, true));
         let schema = SchemaRef::new(Schema::new(vec![Field::new(
             "list",
             DataType::FixedSizeList(field, size),
diff --git a/arrow-ord/Cargo.toml b/arrow-ord/Cargo.toml
index c9c30074fe6e..8d74d2f97d72 100644
--- a/arrow-ord/Cargo.toml
+++ b/arrow-ord/Cargo.toml
@@ -39,8 +39,7 @@ arrow-buffer = { workspace = true }
 arrow-data = { workspace = true }
 arrow-schema = { workspace = true }
 arrow-select = { workspace = true }
-num = { version = "0.4", default-features = false, features = ["std"] }
-half = { version = "2.1", default-features = false, features = ["num-traits"] }
 
 [dev-dependencies]
+half = { version = "2.1", default-features = false, features = ["num-traits"] }
 rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] }
diff --git a/arrow-ord/LICENSE.txt b/arrow-ord/LICENSE.txt
new file mode 120000
index 000000000000..4ab43736a839
--- /dev/null
+++ b/arrow-ord/LICENSE.txt
@@ -0,0 +1 @@
+../LICENSE.txt
\ No newline at end of file
diff --git a/arrow-ord/NOTICE.txt b/arrow-ord/NOTICE.txt
new file mode 120000
index 000000000000..eb9f24e040b5
--- /dev/null
+++ b/arrow-ord/NOTICE.txt
@@ -0,0 +1 @@
+../NOTICE.txt
\ No newline at end of file
diff --git a/arrow-ord/src/cmp.rs b/arrow-ord/src/cmp.rs
index f571e26c444c..2727ff996150 100644
--- a/arrow-ord/src/cmp.rs
+++ b/arrow-ord/src/cmp.rs
@@ -656,7 +656,10 @@ pub fn compare_byte_view<T: ByteViewType>(
 ///
 /// # Safety
 /// The left/right_idx must within range of each array
-#[deprecated(note = "Use `GenericByteViewArray::compare_unchecked` instead")]
+#[deprecated(
+    since = "52.2.0",
+    note = "Use `GenericByteViewArray::compare_unchecked` instead"
+)]
 pub unsafe fn compare_byte_view_unchecked<T: ByteViewType>(
     left: &GenericByteViewArray<T>,
     left_idx: usize,
diff --git a/arrow-ord/src/comparison.rs b/arrow-ord/src/comparison.rs
index d60bc3b8de88..bb82f54d4918 100644
--- a/arrow-ord/src/comparison.rs
+++ b/arrow-ord/src/comparison.rs
@@ -821,7 +821,7 @@ mod tests {
         .into_data();
         let value_offsets = Buffer::from_slice_ref([0i64, 3, 6, 6, 9]);
         let list_data_type =
-            DataType::LargeList(Arc::new(Field::new("item", DataType::Int32, true)));
+            DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int32, true)));
         let list_data = ArrayData::builder(list_data_type)
             .len(4)
             .add_buffer(value_offsets)
diff --git a/arrow-ord/src/ord.rs b/arrow-ord/src/ord.rs
index 6430c8f0e405..55e397cd8aa4 100644
--- a/arrow-ord/src/ord.rs
+++ b/arrow-ord/src/ord.rs
@@ -265,7 +265,7 @@ fn compare_struct(
     Ok(f)
 }
 
-#[deprecated(note = "Use make_comparator")]
+#[deprecated(since = "52.0.0", note = "Use make_comparator")]
 #[doc(hidden)]
 pub fn build_compare(left: &dyn Array, right: &dyn Array) -> Result<DynComparator, ArrowError> {
     make_comparator(left, right, SortOptions::default())
@@ -394,7 +394,7 @@ pub fn make_comparator(
 }
 
 #[cfg(test)]
-pub mod tests {
+mod tests {
     use super::*;
     use arrow_array::builder::{Int32Builder, ListBuilder};
     use arrow_buffer::{i256, IntervalDayTime, OffsetBuffer};
@@ -849,7 +849,7 @@ pub mod tests {
     fn test_struct() {
         let fields = Fields::from(vec![
             Field::new("a", DataType::Int32, true),
-            Field::new_list("b", Field::new("item", DataType::Int32, true), true),
+            Field::new_list("b", Field::new_list_field(DataType::Int32, true), true),
         ]);
 
         let a = Int32Array::from(vec![Some(1), Some(2), None, None]);
diff --git a/arrow-ord/src/partition.rs b/arrow-ord/src/partition.rs
index 8c87eefadbf0..ec1647393239 100644
--- a/arrow-ord/src/partition.rs
+++ b/arrow-ord/src/partition.rs
@@ -24,7 +24,6 @@ use arrow_buffer::BooleanBuffer;
 use arrow_schema::ArrowError;
 
 use crate::cmp::distinct;
-use crate::sort::SortColumn;
 
 /// A computed set of partitions, see [`partition`]
 #[derive(Debug, Clone)]
@@ -160,21 +159,6 @@ fn find_boundaries(v: &dyn Array) -> Result<BooleanBuffer, ArrowError> {
     Ok(distinct(&v1, &v2)?.values().clone())
 }
 
-/// Use [`partition`] instead. Given a list of already sorted columns, find
-/// partition ranges that would partition lexicographically equal values across
-/// columns.
-///
-/// The returned vec would be of size k where k is cardinality of the sorted values; Consecutive
-/// values will be connected: (a, b) and (b, c), where start = 0 and end = n for the first and last
-/// range.
-#[deprecated(note = "Use partition")]
-pub fn lexicographical_partition_ranges(
-    columns: &[SortColumn],
-) -> Result<impl Iterator<Item = Range<usize>> + '_, ArrowError> {
-    let cols: Vec<_> = columns.iter().map(|x| x.values.clone()).collect();
-    Ok(partition(&cols)?.ranges().into_iter())
-}
-
 #[cfg(test)]
 mod tests {
     use std::sync::Arc;
diff --git a/arrow-pyarrow-integration-testing/Cargo.toml b/arrow-pyarrow-integration-testing/Cargo.toml
index 0834f2d13384..03d08df30959 100644
--- a/arrow-pyarrow-integration-testing/Cargo.toml
+++ b/arrow-pyarrow-integration-testing/Cargo.toml
@@ -34,4 +34,4 @@ crate-type = ["cdylib"]
 
 [dependencies]
 arrow = { path = "../arrow", features = ["pyarrow"] }
-pyo3 = { version = "0.22", features = ["extension-module"] }
+pyo3 = { version = "0.23", features = ["extension-module"] }
diff --git a/arrow-pyarrow-integration-testing/src/lib.rs b/arrow-pyarrow-integration-testing/src/lib.rs
index e12c1389e66f..d4908fff0897 100644
--- a/arrow-pyarrow-integration-testing/src/lib.rs
+++ b/arrow-pyarrow-integration-testing/src/lib.rs
@@ -43,7 +43,7 @@ fn to_py_err(err: ArrowError) -> PyErr {
 #[pyfunction]
 fn double(array: &Bound<PyAny>, py: Python) -> PyResult<PyObject> {
     // import
-    let array = make_array(ArrayData::from_pyarrow_bound(&array)?);
+    let array = make_array(ArrayData::from_pyarrow_bound(array)?);
 
     // perform some operation
     let array = array
diff --git a/arrow-row/Cargo.toml b/arrow-row/Cargo.toml
index 3754afb4dbc6..90d99684d265 100644
--- a/arrow-row/Cargo.toml
+++ b/arrow-row/Cargo.toml
@@ -33,12 +33,6 @@ name = "arrow_row"
 path = "src/lib.rs"
 bench = false
 
-[target.'cfg(target_arch = "wasm32")'.dependencies]
-ahash = { version = "0.8", default-features = false, features = ["compile-time-rng"] }
-
-[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
-ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
-
 [dependencies]
 arrow-array = { workspace = true }
 arrow-buffer = { workspace = true }
diff --git a/arrow-row/LICENSE.txt b/arrow-row/LICENSE.txt
new file mode 120000
index 000000000000..4ab43736a839
--- /dev/null
+++ b/arrow-row/LICENSE.txt
@@ -0,0 +1 @@
+../LICENSE.txt
\ No newline at end of file
diff --git a/arrow-row/NOTICE.txt b/arrow-row/NOTICE.txt
new file mode 120000
index 000000000000..eb9f24e040b5
--- /dev/null
+++ b/arrow-row/NOTICE.txt
@@ -0,0 +1 @@
+../NOTICE.txt
\ No newline at end of file
diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs
index 5780bdbfefb9..d0fad12210db 100644
--- a/arrow-row/src/lib.rs
+++ b/arrow-row/src/lib.rs
@@ -2317,7 +2317,7 @@ mod tests {
         let values_len = offsets.last().unwrap().to_usize().unwrap();
         let values = values(values_len);
         let nulls = NullBuffer::from_iter((0..len).map(|_| rng.gen_bool(valid_percent)));
-        let field = Arc::new(Field::new("item", values.data_type().clone(), true));
+        let field = Arc::new(Field::new_list_field(values.data_type().clone(), true));
         ListArray::new(field, offsets, values, Some(nulls))
     }
 
diff --git a/arrow-schema/Cargo.toml b/arrow-schema/Cargo.toml
index 628d4a683cac..1e1f9fbde0e4 100644
--- a/arrow-schema/Cargo.toml
+++ b/arrow-schema/Cargo.toml
@@ -47,3 +47,8 @@ features = ["ffi"]
 [dev-dependencies]
 serde_json = "1.0"
 bincode = { version = "1.3.3", default-features = false }
+criterion = { version = "0.5", default-features = false }
+
+[[bench]]
+name = "ffi"
+harness = false
\ No newline at end of file
diff --git a/arrow-schema/LICENSE.txt b/arrow-schema/LICENSE.txt
new file mode 120000
index 000000000000..4ab43736a839
--- /dev/null
+++ b/arrow-schema/LICENSE.txt
@@ -0,0 +1 @@
+../LICENSE.txt
\ No newline at end of file
diff --git a/arrow-schema/NOTICE.txt b/arrow-schema/NOTICE.txt
new file mode 120000
index 000000000000..eb9f24e040b5
--- /dev/null
+++ b/arrow-schema/NOTICE.txt
@@ -0,0 +1 @@
+../NOTICE.txt
\ No newline at end of file
diff --git a/arrow-schema/benches/ffi.rs b/arrow-schema/benches/ffi.rs
new file mode 100644
index 000000000000..1285acb883ea
--- /dev/null
+++ b/arrow-schema/benches/ffi.rs
@@ -0,0 +1,38 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_schema::ffi::FFI_ArrowSchema;
+use arrow_schema::{DataType, Field};
+use criterion::*;
+use std::sync::Arc;
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let fields = vec![
+        Arc::new(Field::new("c1", DataType::Utf8, false)),
+        Arc::new(Field::new("c2", DataType::Utf8, false)),
+        Arc::new(Field::new("c3", DataType::Utf8, false)),
+        Arc::new(Field::new("c4", DataType::Utf8, false)),
+        Arc::new(Field::new("c5", DataType::Utf8, false)),
+    ];
+    let data_type = DataType::Struct(fields.into());
+    c.bench_function("ffi_arrow_schema_try_from", |b| {
+        b.iter(|| FFI_ArrowSchema::try_from(&data_type));
+    });
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/arrow-schema/src/datatype.rs b/arrow-schema/src/datatype.rs
index ff5832dfa68c..a6333c804805 100644
--- a/arrow-schema/src/datatype.rs
+++ b/arrow-schema/src/datatype.rs
@@ -40,7 +40,7 @@ use crate::{ArrowError, Field, FieldRef, Fields, UnionFields};
 /// # use arrow_schema::{DataType, Field};
 /// # use std::sync::Arc;
 /// // create a new list of 32-bit signed integers directly
-/// let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
+/// let list_data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
 /// // Create the same list type with constructor
 /// let list_data_type2 = DataType::new_list(DataType::Int32, true);
 /// assert_eq!(list_data_type, list_data_type2);
@@ -837,21 +837,21 @@ mod tests {
     #[test]
     fn test_list_datatype_equality() {
         // tests that list type equality is checked while ignoring list names
-        let list_a = DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
+        let list_a = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
         let list_b = DataType::List(Arc::new(Field::new("array", DataType::Int32, true)));
-        let list_c = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
-        let list_d = DataType::List(Arc::new(Field::new("item", DataType::UInt32, true)));
+        let list_c = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
+        let list_d = DataType::List(Arc::new(Field::new_list_field(DataType::UInt32, true)));
         assert!(list_a.equals_datatype(&list_b));
         assert!(!list_a.equals_datatype(&list_c));
         assert!(!list_b.equals_datatype(&list_c));
         assert!(!list_a.equals_datatype(&list_d));
 
         let list_e =
-            DataType::FixedSizeList(Arc::new(Field::new("item", list_a.clone(), false)), 3);
+            DataType::FixedSizeList(Arc::new(Field::new_list_field(list_a.clone(), false)), 3);
         let list_f =
             DataType::FixedSizeList(Arc::new(Field::new("array", list_b.clone(), false)), 3);
         let list_g = DataType::FixedSizeList(
-            Arc::new(Field::new("item", DataType::FixedSizeBinary(3), true)),
+            Arc::new(Field::new_list_field(DataType::FixedSizeBinary(3), true)),
             3,
         );
         assert!(list_e.equals_datatype(&list_f));
diff --git a/arrow-schema/src/datatype_parse.rs b/arrow-schema/src/datatype_parse.rs
index 4378950329f3..bf557d8941dc 100644
--- a/arrow-schema/src/datatype_parse.rs
+++ b/arrow-schema/src/datatype_parse.rs
@@ -90,8 +90,8 @@ impl<'a> Parser<'a> {
         self.expect_token(Token::LParen)?;
         let data_type = self.parse_next_type()?;
         self.expect_token(Token::RParen)?;
-        Ok(DataType::List(Arc::new(Field::new(
-            "item", data_type, true,
+        Ok(DataType::List(Arc::new(Field::new_list_field(
+            data_type, true,
         ))))
     }
 
@@ -100,8 +100,8 @@ impl<'a> Parser<'a> {
         self.expect_token(Token::LParen)?;
         let data_type = self.parse_next_type()?;
         self.expect_token(Token::RParen)?;
-        Ok(DataType::LargeList(Arc::new(Field::new(
-            "item", data_type, true,
+        Ok(DataType::LargeList(Arc::new(Field::new_list_field(
+            data_type, true,
         ))))
     }
 
@@ -113,7 +113,7 @@ impl<'a> Parser<'a> {
         let data_type = self.parse_next_type()?;
         self.expect_token(Token::RParen)?;
         Ok(DataType::FixedSizeList(
-            Arc::new(Field::new("item", data_type, true)),
+            Arc::new(Field::new_list_field(data_type, true)),
             length,
         ))
     }
diff --git a/arrow-schema/src/ffi.rs b/arrow-schema/src/ffi.rs
index 70650d769cf6..96c80974982c 100644
--- a/arrow-schema/src/ffi.rs
+++ b/arrow-schema/src/ffi.rs
@@ -38,6 +38,7 @@ use crate::{
     ArrowError, DataType, Field, FieldRef, IntervalUnit, Schema, TimeUnit, UnionFields, UnionMode,
 };
 use bitflags::bitflags;
+use std::borrow::Cow;
 use std::sync::Arc;
 use std::{
     collections::HashMap,
@@ -685,57 +686,59 @@ impl TryFrom<&DataType> for FFI_ArrowSchema {
     }
 }
 
-fn get_format_string(dtype: &DataType) -> Result<String, ArrowError> {
+fn get_format_string(dtype: &DataType) -> Result<Cow<'static, str>, ArrowError> {
     match dtype {
-        DataType::Null => Ok("n".to_string()),
-        DataType::Boolean => Ok("b".to_string()),
-        DataType::Int8 => Ok("c".to_string()),
-        DataType::UInt8 => Ok("C".to_string()),
-        DataType::Int16 => Ok("s".to_string()),
-        DataType::UInt16 => Ok("S".to_string()),
-        DataType::Int32 => Ok("i".to_string()),
-        DataType::UInt32 => Ok("I".to_string()),
-        DataType::Int64 => Ok("l".to_string()),
-        DataType::UInt64 => Ok("L".to_string()),
-        DataType::Float16 => Ok("e".to_string()),
-        DataType::Float32 => Ok("f".to_string()),
-        DataType::Float64 => Ok("g".to_string()),
-        DataType::BinaryView => Ok("vz".to_string()),
-        DataType::Binary => Ok("z".to_string()),
-        DataType::LargeBinary => Ok("Z".to_string()),
-        DataType::Utf8View => Ok("vu".to_string()),
-        DataType::Utf8 => Ok("u".to_string()),
-        DataType::LargeUtf8 => Ok("U".to_string()),
-        DataType::FixedSizeBinary(num_bytes) => Ok(format!("w:{num_bytes}")),
-        DataType::FixedSizeList(_, num_elems) => Ok(format!("+w:{num_elems}")),
-        DataType::Decimal128(precision, scale) => Ok(format!("d:{precision},{scale}")),
-        DataType::Decimal256(precision, scale) => Ok(format!("d:{precision},{scale},256")),
-        DataType::Date32 => Ok("tdD".to_string()),
-        DataType::Date64 => Ok("tdm".to_string()),
-        DataType::Time32(TimeUnit::Second) => Ok("tts".to_string()),
-        DataType::Time32(TimeUnit::Millisecond) => Ok("ttm".to_string()),
-        DataType::Time64(TimeUnit::Microsecond) => Ok("ttu".to_string()),
-        DataType::Time64(TimeUnit::Nanosecond) => Ok("ttn".to_string()),
-        DataType::Timestamp(TimeUnit::Second, None) => Ok("tss:".to_string()),
-        DataType::Timestamp(TimeUnit::Millisecond, None) => Ok("tsm:".to_string()),
-        DataType::Timestamp(TimeUnit::Microsecond, None) => Ok("tsu:".to_string()),
-        DataType::Timestamp(TimeUnit::Nanosecond, None) => Ok("tsn:".to_string()),
-        DataType::Timestamp(TimeUnit::Second, Some(tz)) => Ok(format!("tss:{tz}")),
-        DataType::Timestamp(TimeUnit::Millisecond, Some(tz)) => Ok(format!("tsm:{tz}")),
-        DataType::Timestamp(TimeUnit::Microsecond, Some(tz)) => Ok(format!("tsu:{tz}")),
-        DataType::Timestamp(TimeUnit::Nanosecond, Some(tz)) => Ok(format!("tsn:{tz}")),
-        DataType::Duration(TimeUnit::Second) => Ok("tDs".to_string()),
-        DataType::Duration(TimeUnit::Millisecond) => Ok("tDm".to_string()),
-        DataType::Duration(TimeUnit::Microsecond) => Ok("tDu".to_string()),
-        DataType::Duration(TimeUnit::Nanosecond) => Ok("tDn".to_string()),
-        DataType::Interval(IntervalUnit::YearMonth) => Ok("tiM".to_string()),
-        DataType::Interval(IntervalUnit::DayTime) => Ok("tiD".to_string()),
-        DataType::Interval(IntervalUnit::MonthDayNano) => Ok("tin".to_string()),
-        DataType::List(_) => Ok("+l".to_string()),
-        DataType::LargeList(_) => Ok("+L".to_string()),
-        DataType::Struct(_) => Ok("+s".to_string()),
-        DataType::Map(_, _) => Ok("+m".to_string()),
-        DataType::RunEndEncoded(_, _) => Ok("+r".to_string()),
+        DataType::Null => Ok("n".into()),
+        DataType::Boolean => Ok("b".into()),
+        DataType::Int8 => Ok("c".into()),
+        DataType::UInt8 => Ok("C".into()),
+        DataType::Int16 => Ok("s".into()),
+        DataType::UInt16 => Ok("S".into()),
+        DataType::Int32 => Ok("i".into()),
+        DataType::UInt32 => Ok("I".into()),
+        DataType::Int64 => Ok("l".into()),
+        DataType::UInt64 => Ok("L".into()),
+        DataType::Float16 => Ok("e".into()),
+        DataType::Float32 => Ok("f".into()),
+        DataType::Float64 => Ok("g".into()),
+        DataType::BinaryView => Ok("vz".into()),
+        DataType::Binary => Ok("z".into()),
+        DataType::LargeBinary => Ok("Z".into()),
+        DataType::Utf8View => Ok("vu".into()),
+        DataType::Utf8 => Ok("u".into()),
+        DataType::LargeUtf8 => Ok("U".into()),
+        DataType::FixedSizeBinary(num_bytes) => Ok(Cow::Owned(format!("w:{num_bytes}"))),
+        DataType::FixedSizeList(_, num_elems) => Ok(Cow::Owned(format!("+w:{num_elems}"))),
+        DataType::Decimal128(precision, scale) => Ok(Cow::Owned(format!("d:{precision},{scale}"))),
+        DataType::Decimal256(precision, scale) => {
+            Ok(Cow::Owned(format!("d:{precision},{scale},256")))
+        }
+        DataType::Date32 => Ok("tdD".into()),
+        DataType::Date64 => Ok("tdm".into()),
+        DataType::Time32(TimeUnit::Second) => Ok("tts".into()),
+        DataType::Time32(TimeUnit::Millisecond) => Ok("ttm".into()),
+        DataType::Time64(TimeUnit::Microsecond) => Ok("ttu".into()),
+        DataType::Time64(TimeUnit::Nanosecond) => Ok("ttn".into()),
+        DataType::Timestamp(TimeUnit::Second, None) => Ok("tss:".into()),
+        DataType::Timestamp(TimeUnit::Millisecond, None) => Ok("tsm:".into()),
+        DataType::Timestamp(TimeUnit::Microsecond, None) => Ok("tsu:".into()),
+        DataType::Timestamp(TimeUnit::Nanosecond, None) => Ok("tsn:".into()),
+        DataType::Timestamp(TimeUnit::Second, Some(tz)) => Ok(Cow::Owned(format!("tss:{tz}"))),
+        DataType::Timestamp(TimeUnit::Millisecond, Some(tz)) => Ok(Cow::Owned(format!("tsm:{tz}"))),
+        DataType::Timestamp(TimeUnit::Microsecond, Some(tz)) => Ok(Cow::Owned(format!("tsu:{tz}"))),
+        DataType::Timestamp(TimeUnit::Nanosecond, Some(tz)) => Ok(Cow::Owned(format!("tsn:{tz}"))),
+        DataType::Duration(TimeUnit::Second) => Ok("tDs".into()),
+        DataType::Duration(TimeUnit::Millisecond) => Ok("tDm".into()),
+        DataType::Duration(TimeUnit::Microsecond) => Ok("tDu".into()),
+        DataType::Duration(TimeUnit::Nanosecond) => Ok("tDn".into()),
+        DataType::Interval(IntervalUnit::YearMonth) => Ok("tiM".into()),
+        DataType::Interval(IntervalUnit::DayTime) => Ok("tiD".into()),
+        DataType::Interval(IntervalUnit::MonthDayNano) => Ok("tin".into()),
+        DataType::List(_) => Ok("+l".into()),
+        DataType::LargeList(_) => Ok("+L".into()),
+        DataType::Struct(_) => Ok("+s".into()),
+        DataType::Map(_, _) => Ok("+m".into()),
+        DataType::RunEndEncoded(_, _) => Ok("+r".into()),
         DataType::Dictionary(key_data_type, _) => get_format_string(key_data_type),
         DataType::Union(fields, mode) => {
             let formats = fields
@@ -743,8 +746,8 @@ fn get_format_string(dtype: &DataType) -> Result<String, ArrowError> {
                 .map(|(t, _)| t.to_string())
                 .collect::<Vec<_>>();
             match mode {
-                UnionMode::Dense => Ok(format!("{}:{}", "+ud", formats.join(","))),
-                UnionMode::Sparse => Ok(format!("{}:{}", "+us", formats.join(","))),
+                UnionMode::Dense => Ok(Cow::Owned(format!("{}:{}", "+ud", formats.join(",")))),
+                UnionMode::Sparse => Ok(Cow::Owned(format!("{}:{}", "+us", formats.join(",")))),
             }
         }
         other => Err(ArrowError::CDataInterface(format!(
@@ -920,6 +923,7 @@ mod tests {
 
     #[test]
     fn test_dictionary_ordered() {
+        #[allow(deprecated)]
         let schema = Schema::new(vec![Field::new_dict(
             "dict",
             DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
diff --git a/arrow-schema/src/field.rs b/arrow-schema/src/field.rs
index b532ea8616b6..7d47c0ae1dea 100644
--- a/arrow-schema/src/field.rs
+++ b/arrow-schema/src/field.rs
@@ -38,6 +38,10 @@ pub struct Field {
     name: String,
     data_type: DataType,
     nullable: bool,
+    #[deprecated(
+        since = "54.0.0",
+        note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
+    )]
     dict_id: i64,
     dict_is_ordered: bool,
     /// A map of key-value pairs containing additional custom meta data.
@@ -117,8 +121,12 @@ impl Hash for Field {
 }
 
 impl Field {
+    /// Default list member field name
+    pub const LIST_FIELD_DEFAULT_NAME: &'static str = "item";
+
     /// Creates a new field with the given name, type, and nullability
     pub fn new(name: impl Into<String>, data_type: DataType, nullable: bool) -> Self {
+        #[allow(deprecated)]
         Field {
             name: name.into(),
             data_type,
@@ -144,10 +152,14 @@ impl Field {
     /// );
     /// ```
     pub fn new_list_field(data_type: DataType, nullable: bool) -> Self {
-        Self::new("item", data_type, nullable)
+        Self::new(Self::LIST_FIELD_DEFAULT_NAME, data_type, nullable)
     }
 
     /// Creates a new field that has additional dictionary information
+    #[deprecated(
+        since = "54.0.0",
+        note = "The ability to preserve dictionary IDs will be removed. With the dict_id field disappearing this function signature will change by removing the dict_id parameter."
+    )]
     pub fn new_dict(
         name: impl Into<String>,
         data_type: DataType,
@@ -155,6 +167,7 @@ impl Field {
         dict_id: i64,
         dict_is_ordered: bool,
     ) -> Self {
+        #[allow(deprecated)]
         Field {
             name: name.into(),
             data_type,
@@ -383,19 +396,30 @@ impl Field {
     /// Returns a vector containing all (potentially nested) `Field` instances selected by the
     /// dictionary ID they use
     #[inline]
+    #[deprecated(
+        since = "54.0.0",
+        note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
+    )]
     pub(crate) fn fields_with_dict_id(&self, id: i64) -> Vec<&Field> {
         self.fields()
             .into_iter()
             .filter(|&field| {
-                matches!(field.data_type(), DataType::Dictionary(_, _)) && field.dict_id == id
+                #[allow(deprecated)]
+                let matching_dict_id = field.dict_id == id;
+                matches!(field.data_type(), DataType::Dictionary(_, _)) && matching_dict_id
             })
             .collect()
     }
 
     /// Returns the dictionary ID, if this is a dictionary type.
     #[inline]
+    #[deprecated(
+        since = "54.0.0",
+        note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
+    )]
     pub const fn dict_id(&self) -> Option<i64> {
         match self.data_type {
+            #[allow(deprecated)]
             DataType::Dictionary(_, _) => Some(self.dict_id),
             _ => None,
         }
@@ -425,6 +449,7 @@ impl Field {
     /// assert!(field.is_nullable());
     /// ```
     pub fn try_merge(&mut self, from: &Field) -> Result<(), ArrowError> {
+        #[allow(deprecated)]
         if from.dict_id != self.dict_id {
             return Err(ArrowError::SchemaError(format!(
                 "Fail to merge schema field '{}' because from dict_id = {} does not match {}",
@@ -567,9 +592,11 @@ impl Field {
     /// * self.metadata is a superset of other.metadata
     /// * all other fields are equal
     pub fn contains(&self, other: &Field) -> bool {
+        #[allow(deprecated)]
+        let matching_dict_id = self.dict_id == other.dict_id;
         self.name == other.name
         && self.data_type.contains(&other.data_type)
-        && self.dict_id == other.dict_id
+        && matching_dict_id
         && self.dict_is_ordered == other.dict_is_ordered
         // self need to be nullable or both of them are not nullable
         && (self.nullable || !other.nullable)
@@ -618,6 +645,7 @@ mod test {
     fn test_new_dict_with_string() {
         // Fields should allow owned Strings to support reuse
         let s = "c1";
+        #[allow(deprecated)]
         Field::new_dict(s, DataType::Int64, false, 4, false);
     }
 
@@ -735,6 +763,7 @@ mod test {
 
     #[test]
     fn test_fields_with_dict_id() {
+        #[allow(deprecated)]
         let dict1 = Field::new_dict(
             "dict1",
             DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
@@ -742,6 +771,7 @@ mod test {
             10,
             false,
         );
+        #[allow(deprecated)]
         let dict2 = Field::new_dict(
             "dict2",
             DataType::Dictionary(DataType::Int32.into(), DataType::Int8.into()),
@@ -778,9 +808,11 @@ mod test {
             false,
         );
 
+        #[allow(deprecated)]
         for field in field.fields_with_dict_id(10) {
             assert_eq!(dict1, *field);
         }
+        #[allow(deprecated)]
         for field in field.fields_with_dict_id(20) {
             assert_eq!(dict2, *field);
         }
@@ -795,6 +827,7 @@ mod test {
     #[test]
     fn test_field_comparison_case() {
         // dictionary-encoding properties not used for field comparison
+        #[allow(deprecated)]
         let dict1 = Field::new_dict(
             "dict1",
             DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
@@ -802,6 +835,7 @@ mod test {
             10,
             false,
         );
+        #[allow(deprecated)]
         let dict2 = Field::new_dict(
             "dict1",
             DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
@@ -813,6 +847,7 @@ mod test {
         assert_eq!(dict1, dict2);
         assert_eq!(get_field_hash(&dict1), get_field_hash(&dict2));
 
+        #[allow(deprecated)]
         let dict1 = Field::new_dict(
             "dict0",
             DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
diff --git a/arrow-schema/src/fields.rs b/arrow-schema/src/fields.rs
index 5b9ce2a6da61..904b933cd299 100644
--- a/arrow-schema/src/fields.rs
+++ b/arrow-schema/src/fields.rs
@@ -18,7 +18,7 @@
 use std::ops::Deref;
 use std::sync::Arc;
 
-use crate::{ArrowError, DataType, Field, FieldRef, SchemaBuilder};
+use crate::{ArrowError, DataType, Field, FieldRef};
 
 /// A cheaply cloneable, owned slice of [`FieldRef`]
 ///
@@ -256,33 +256,6 @@ impl Fields {
             .collect();
         Ok(filtered)
     }
-
-    /// Remove a field by index and return it.
-    ///
-    /// # Panic
-    ///
-    /// Panics if `index` is out of bounds.
-    ///
-    /// # Example
-    /// ```
-    /// use arrow_schema::{DataType, Field, Fields};
-    /// let mut fields = Fields::from(vec![
-    ///   Field::new("a", DataType::Boolean, false),
-    ///   Field::new("b", DataType::Int8, false),
-    ///   Field::new("c", DataType::Utf8, false),
-    /// ]);
-    /// assert_eq!(fields.len(), 3);
-    /// assert_eq!(fields.remove(1), Field::new("b", DataType::Int8, false).into());
-    /// assert_eq!(fields.len(), 2);
-    /// ```
-    #[deprecated(note = "Use SchemaBuilder::remove")]
-    #[doc(hidden)]
-    pub fn remove(&mut self, index: usize) -> FieldRef {
-        let mut builder = SchemaBuilder::from(Fields::from(&*self.0));
-        let field = builder.remove(index);
-        *self = builder.finish().fields;
-        field
-    }
 }
 
 impl Default for Fields {
@@ -496,7 +469,12 @@ mod tests {
                 Field::new("floats", DataType::Struct(floats.clone()), true),
                 true,
             ),
-            Field::new_fixed_size_list("f", Field::new("item", DataType::Int32, false), 3, false),
+            Field::new_fixed_size_list(
+                "f",
+                Field::new_list_field(DataType::Int32, false),
+                3,
+                false,
+            ),
             Field::new_map(
                 "g",
                 "entries",
diff --git a/arrow-schema/src/schema.rs b/arrow-schema/src/schema.rs
index cc3a8a308a83..6c79da53f981 100644
--- a/arrow-schema/src/schema.rs
+++ b/arrow-schema/src/schema.rs
@@ -389,7 +389,12 @@ impl Schema {
 
     /// Returns a vector of immutable references to all [`Field`] instances selected by
     /// the dictionary ID they use.
+    #[deprecated(
+        since = "54.0.0",
+        note = "The ability to preserve dictionary IDs will be removed. With it, all functions related to it."
+    )]
     pub fn fields_with_dict_id(&self, dict_id: i64) -> Vec<&Field> {
+        #[allow(deprecated)]
         self.fields
             .iter()
             .flat_map(|f| f.fields_with_dict_id(dict_id))
@@ -434,33 +439,6 @@ impl Schema {
                 .iter()
                 .all(|(k, v1)| self.metadata.get(k).map(|v2| v1 == v2).unwrap_or_default())
     }
-
-    /// Remove field by index and return it. Recommend to use [`SchemaBuilder`]
-    /// if you are looking to remove multiple columns, as this will save allocations.
-    ///
-    /// # Panic
-    ///
-    /// Panics if `index` is out of bounds.
-    ///
-    /// # Example
-    ///
-    /// ```
-    /// use arrow_schema::{DataType, Field, Schema};
-    /// let mut schema = Schema::new(vec![
-    ///   Field::new("a", DataType::Boolean, false),
-    ///   Field::new("b", DataType::Int8, false),
-    ///   Field::new("c", DataType::Utf8, false),
-    /// ]);
-    /// assert_eq!(schema.fields.len(), 3);
-    /// assert_eq!(schema.remove(1), Field::new("b", DataType::Int8, false).into());
-    /// assert_eq!(schema.fields.len(), 2);
-    /// ```
-    #[deprecated(note = "Use SchemaBuilder::remove")]
-    #[doc(hidden)]
-    #[allow(deprecated)]
-    pub fn remove(&mut self, index: usize) -> FieldRef {
-        self.fields.remove(index)
-    }
 }
 
 impl fmt::Display for Schema {
@@ -665,7 +643,9 @@ mod tests {
         assert_eq!(first_name.name(), "first_name");
         assert_eq!(first_name.data_type(), &DataType::Utf8);
         assert!(!first_name.is_nullable());
-        assert_eq!(first_name.dict_id(), None);
+        #[allow(deprecated)]
+        let dict_id = first_name.dict_id();
+        assert_eq!(dict_id, None);
         assert_eq!(first_name.dict_is_ordered(), None);
 
         let metadata = first_name.metadata();
@@ -682,7 +662,9 @@ mod tests {
             interests.data_type(),
             &DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8))
         );
-        assert_eq!(interests.dict_id(), Some(123));
+        #[allow(deprecated)]
+        let dict_id = interests.dict_id();
+        assert_eq!(dict_id, Some(123));
         assert_eq!(interests.dict_is_ordered(), Some(true));
     }
 
@@ -718,6 +700,7 @@ mod tests {
     fn schema_field_with_dict_id() {
         let schema = person_schema();
 
+        #[allow(deprecated)]
         let fields_dict_123: Vec<_> = schema
             .fields_with_dict_id(123)
             .iter()
@@ -725,7 +708,9 @@ mod tests {
             .collect();
         assert_eq!(fields_dict_123, vec!["interests"]);
 
-        assert!(schema.fields_with_dict_id(456).is_empty());
+        #[allow(deprecated)]
+        let is_empty = schema.fields_with_dict_id(456).is_empty();
+        assert!(is_empty);
     }
 
     fn person_schema() -> Schema {
@@ -745,6 +730,7 @@ mod tests {
                 ])),
                 false,
             ),
+            #[allow(deprecated)]
             Field::new_dict(
                 "interests",
                 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
diff --git a/arrow-select/LICENSE.txt b/arrow-select/LICENSE.txt
new file mode 120000
index 000000000000..4ab43736a839
--- /dev/null
+++ b/arrow-select/LICENSE.txt
@@ -0,0 +1 @@
+../LICENSE.txt
\ No newline at end of file
diff --git a/arrow-select/NOTICE.txt b/arrow-select/NOTICE.txt
new file mode 120000
index 000000000000..eb9f24e040b5
--- /dev/null
+++ b/arrow-select/NOTICE.txt
@@ -0,0 +1 @@
+../NOTICE.txt
\ No newline at end of file
diff --git a/arrow-select/src/filter.rs b/arrow-select/src/filter.rs
index 4c6a5c0668f1..c91732848653 100644
--- a/arrow-select/src/filter.rs
+++ b/arrow-select/src/filter.rs
@@ -431,17 +431,17 @@ where
     R::Native: AddAssign,
 {
     let run_ends: &RunEndBuffer<R::Native> = array.run_ends();
-    let mut values_filter = BooleanBufferBuilder::new(run_ends.len());
     let mut new_run_ends = vec![R::default_value(); run_ends.len()];
 
     let mut start = 0u64;
-    let mut i = 0;
+    let mut j = 0;
     let mut count = R::default_value();
     let filter_values = predicate.filter.values();
+    let run_ends = run_ends.inner();
 
-    for mut end in run_ends.inner().into_iter().map(|i| (*i).into() as u64) {
+    let pred: BooleanArray = BooleanBuffer::collect_bool(run_ends.len(), |i| {
         let mut keep = false;
-
+        let mut end = run_ends[i].into() as u64;
         let difference = end.saturating_sub(filter_values.len() as u64);
         end -= difference;
 
@@ -450,23 +450,18 @@ where
             count += R::Native::from(pred);
             keep |= pred
         }
-
         // this is to avoid branching
-        new_run_ends[i] = count;
-        i += keep as usize;
+        new_run_ends[j] = count;
+        j += keep as usize;
 
-        values_filter.append(keep);
         start = end;
-    }
-
-    new_run_ends.truncate(i);
+        keep
+    })
+    .into();
 
-    if values_filter.is_empty() {
-        new_run_ends.clear();
-    }
+    new_run_ends.truncate(j);
 
     let values = array.values();
-    let pred = BooleanArray::new(values_filter.finish(), None);
     let values = filter(&values, &pred)?;
 
     let run_ends = PrimitiveArray::<R>::new(new_run_ends.into(), None);
@@ -522,14 +517,14 @@ fn filter_bits(buffer: &BooleanBuffer, predicate: &FilterPredicate) -> Buffer {
             unsafe { MutableBuffer::from_trusted_len_iter_bool(bits).into() }
         }
         IterationStrategy::SlicesIterator => {
-            let mut builder = BooleanBufferBuilder::new(bit_util::ceil(predicate.count, 8));
+            let mut builder = BooleanBufferBuilder::new(predicate.count);
             for (start, end) in SlicesIterator::new(&predicate.filter) {
                 builder.append_packed_range(start + offset..end + offset, src)
             }
             builder.into()
         }
         IterationStrategy::Slices(slices) => {
-            let mut builder = BooleanBufferBuilder::new(bit_util::ceil(predicate.count, 8));
+            let mut builder = BooleanBufferBuilder::new(predicate.count);
             for (start, end) in slices {
                 builder.append_packed_range(*start + offset..*end + offset, src)
             }
@@ -1336,7 +1331,7 @@ mod tests {
         let value_offsets = Buffer::from_slice_ref([0i64, 3, 6, 8, 8]);
 
         let list_data_type =
-            DataType::LargeList(Arc::new(Field::new("item", DataType::Int32, false)));
+            DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int32, false)));
         let list_data = ArrayData::builder(list_data_type)
             .len(4)
             .add_buffer(value_offsets)
@@ -1360,7 +1355,7 @@ mod tests {
         let value_offsets = Buffer::from_slice_ref([0i64, 3, 3]);
 
         let list_data_type =
-            DataType::LargeList(Arc::new(Field::new("item", DataType::Int32, false)));
+            DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int32, false)));
         let expected = ArrayData::builder(list_data_type)
             .len(2)
             .add_buffer(value_offsets)
diff --git a/arrow-select/src/interleave.rs b/arrow-select/src/interleave.rs
index a0520e969a6b..4a47017b79ab 100644
--- a/arrow-select/src/interleave.rs
+++ b/arrow-select/src/interleave.rs
@@ -265,6 +265,67 @@ fn interleave_fallback(
     Ok(make_array(array_data.freeze()))
 }
 
+/// Interleave rows by index from multiple [`RecordBatch`] instances and return a new [`RecordBatch`].
+///
+/// This function will call [`interleave`] on each array of the [`RecordBatch`] instances and assemble a new [`RecordBatch`].
+///
+/// # Example
+/// ```
+/// # use std::sync::Arc;
+/// # use arrow_array::{StringArray, Int32Array, RecordBatch, UInt32Array};
+/// # use arrow_schema::{DataType, Field, Schema};
+/// # use arrow_select::interleave::interleave_record_batch;
+///
+/// let schema = Arc::new(Schema::new(vec![
+///     Field::new("a", DataType::Int32, true),
+///     Field::new("b", DataType::Utf8, true),
+/// ]));
+///
+/// let batch1 = RecordBatch::try_new(
+///     schema.clone(),
+///     vec![
+///         Arc::new(Int32Array::from(vec![0, 1, 2])),
+///         Arc::new(StringArray::from(vec!["a", "b", "c"])),
+///     ],
+/// ).unwrap();
+///
+/// let batch2 = RecordBatch::try_new(
+///     schema.clone(),
+///     vec![
+///         Arc::new(Int32Array::from(vec![3, 4, 5])),
+///         Arc::new(StringArray::from(vec!["d", "e", "f"])),
+///     ],
+/// ).unwrap();
+///
+/// let indices = vec![(0, 1), (1, 2), (0, 0), (1, 1)];
+/// let interleaved = interleave_record_batch(&[&batch1, &batch2], &indices).unwrap();
+///
+/// let expected = RecordBatch::try_new(
+///     schema,
+///     vec![
+///         Arc::new(Int32Array::from(vec![1, 5, 0, 4])),
+///         Arc::new(StringArray::from(vec!["b", "f", "a", "e"])),
+///     ],
+/// ).unwrap();
+/// assert_eq!(interleaved, expected);
+/// ```
+pub fn interleave_record_batch(
+    record_batches: &[&RecordBatch],
+    indices: &[(usize, usize)],
+) -> Result<RecordBatch, ArrowError> {
+    let schema = record_batches[0].schema();
+    let columns = (0..schema.fields().len())
+        .map(|i| {
+            let column_values: Vec<&dyn Array> = record_batches
+                .iter()
+                .map(|batch| batch.column(i).as_ref())
+                .collect();
+            interleave(&column_values, indices)
+        })
+        .collect::<Result<Vec<_>, _>>()?;
+    RecordBatch::try_new(schema, columns)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs
index 07630a49fa11..71a7c77a8f92 100644
--- a/arrow-select/src/take.rs
+++ b/arrow-select/src/take.rs
@@ -1606,7 +1606,7 @@ mod tests {
             let value_offsets = Buffer::from_slice_ref(&value_offsets);
             // Construct a list array from the above two
             let list_data_type =
-                DataType::$list_data_type(Arc::new(Field::new("item", DataType::Int32, false)));
+                DataType::$list_data_type(Arc::new(Field::new_list_field(DataType::Int32, false)));
             let list_data = ArrayData::builder(list_data_type.clone())
                 .len(4)
                 .add_buffer(value_offsets)
@@ -1672,7 +1672,7 @@ mod tests {
             let value_offsets = Buffer::from_slice_ref(&value_offsets);
             // Construct a list array from the above two
             let list_data_type =
-                DataType::$list_data_type(Arc::new(Field::new("item", DataType::Int32, true)));
+                DataType::$list_data_type(Arc::new(Field::new_list_field(DataType::Int32, true)));
             let list_data = ArrayData::builder(list_data_type.clone())
                 .len(4)
                 .add_buffer(value_offsets)
@@ -1739,7 +1739,7 @@ mod tests {
             let value_offsets = Buffer::from_slice_ref(&value_offsets);
             // Construct a list array from the above two
             let list_data_type =
-                DataType::$list_data_type(Arc::new(Field::new("item", DataType::Int32, true)));
+                DataType::$list_data_type(Arc::new(Field::new_list_field(DataType::Int32, true)));
             let list_data = ArrayData::builder(list_data_type.clone())
                 .len(4)
                 .add_buffer(value_offsets)
@@ -1904,7 +1904,8 @@ mod tests {
         // Construct offsets
         let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
         // Construct a list array from the above two
-        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
+        let list_data_type =
+            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
         let list_data = ArrayData::builder(list_data_type)
             .len(3)
             .add_buffer(value_offsets)
@@ -2222,7 +2223,7 @@ mod tests {
     fn test_take_fixed_size_list_null_indices() {
         let indices = Int32Array::from_iter([Some(0), None]);
         let values = Arc::new(Int32Array::from(vec![0, 1, 2, 3]));
-        let arr_field = Arc::new(Field::new("item", values.data_type().clone(), true));
+        let arr_field = Arc::new(Field::new_list_field(values.data_type().clone(), true));
         let values = FixedSizeListArray::try_new(arr_field, 2, values, None).unwrap();
 
         let r = take(&values, &indices, None).unwrap();
diff --git a/arrow-string/LICENSE.txt b/arrow-string/LICENSE.txt
new file mode 120000
index 000000000000..4ab43736a839
--- /dev/null
+++ b/arrow-string/LICENSE.txt
@@ -0,0 +1 @@
+../LICENSE.txt
\ No newline at end of file
diff --git a/arrow-string/NOTICE.txt b/arrow-string/NOTICE.txt
new file mode 120000
index 000000000000..eb9f24e040b5
--- /dev/null
+++ b/arrow-string/NOTICE.txt
@@ -0,0 +1 @@
+../NOTICE.txt
\ No newline at end of file
diff --git a/arrow-string/src/length.rs b/arrow-string/src/length.rs
index 6a28d44ea7aa..49fc244e72cc 100644
--- a/arrow-string/src/length.rs
+++ b/arrow-string/src/length.rs
@@ -710,7 +710,7 @@ mod tests {
             .build()
             .unwrap();
         let list_data_type =
-            DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, false)), 3);
+            DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, false)), 3);
         let nulls = NullBuffer::from(vec![true, false, true]);
         let list_data = ArrayData::builder(list_data_type)
             .len(3)
diff --git a/arrow-string/src/like.rs b/arrow-string/src/like.rs
index 0a5aa77dbb95..e30e09146c6d 100644
--- a/arrow-string/src/like.rs
+++ b/arrow-string/src/like.rs
@@ -18,13 +18,16 @@
 //! Provide SQL's LIKE operators for Arrow's string arrays
 
 use crate::predicate::Predicate;
+
 use arrow_array::cast::AsArray;
 use arrow_array::*;
 use arrow_schema::*;
 use arrow_select::take::take;
-use iterator::ArrayIter;
+
 use std::sync::Arc;
 
+pub use arrow_array::StringArrayType;
+
 #[derive(Debug)]
 enum Op {
     Like(bool),
@@ -150,39 +153,6 @@ fn like_op(op: Op, lhs: &dyn Datum, rhs: &dyn Datum) -> Result<BooleanArray, Arr
     }
 }
 
-/// A trait for Arrow String Arrays, currently three types are supported:
-/// - `StringArray`
-/// - `LargeStringArray`
-/// - `StringViewArray`
-///
-/// This trait helps to abstract over the different types of string arrays
-/// so that we don't need to duplicate the implementation for each type.
-pub trait StringArrayType<'a>: ArrayAccessor<Item = &'a str> + Sized {
-    /// Returns true if all data within this string array is ASCII
-    fn is_ascii(&self) -> bool;
-    /// Constructs a new iterator
-    fn iter(&self) -> ArrayIter<Self>;
-}
-
-impl<'a, O: OffsetSizeTrait> StringArrayType<'a> for &'a GenericStringArray<O> {
-    fn is_ascii(&self) -> bool {
-        GenericStringArray::<O>::is_ascii(self)
-    }
-
-    fn iter(&self) -> ArrayIter<Self> {
-        GenericStringArray::<O>::iter(self)
-    }
-}
-impl<'a> StringArrayType<'a> for &'a StringViewArray {
-    fn is_ascii(&self) -> bool {
-        StringViewArray::is_ascii(self)
-    }
-
-    fn iter(&self) -> ArrayIter<Self> {
-        StringViewArray::iter(self)
-    }
-}
-
 fn apply<'a, T: StringArrayType<'a> + 'a>(
     op: Op,
     l: T,
diff --git a/arrow-string/src/regexp.rs b/arrow-string/src/regexp.rs
index 5ad452a17b12..d14662be7280 100644
--- a/arrow-string/src/regexp.rs
+++ b/arrow-string/src/regexp.rs
@@ -447,8 +447,7 @@ pub fn regexp_match(
 
         if regex.is_none() {
             return Ok(new_null_array(
-                &DataType::List(Arc::new(Field::new(
-                    "item",
+                &DataType::List(Arc::new(Field::new_list_field(
                     array.data_type().clone(),
                     true,
                 ))),
diff --git a/arrow-string/src/substring.rs b/arrow-string/src/substring.rs
index bfdafb790f39..fa6a47147521 100644
--- a/arrow-string/src/substring.rs
+++ b/arrow-string/src/substring.rs
@@ -636,7 +636,7 @@ mod tests {
 
         let data = ArrayData::builder(DataType::FixedSizeBinary(5))
             .len(2)
-            .add_buffer(Buffer::from(&values[..]))
+            .add_buffer(Buffer::from(&values))
             .offset(1)
             .null_bit_buffer(Some(Buffer::from(bits_v)))
             .build()
diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index a0fd96415a1d..8860cd61c5b3 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -54,9 +54,7 @@ arrow-select = { workspace = true }
 arrow-string = { workspace = true }
 
 rand = { version = "0.8", default-features = false, features = ["std", "std_rng"], optional = true }
-pyo3 = { version = "0.22.2", default-features = false, optional = true }
-
-chrono = { workspace = true, optional = true }
+pyo3 = { version = "0.23", default-features = false, optional = true }
 
 [package.metadata.docs.rs]
 features = ["prettyprint", "ipc_compression", "ffi", "pyarrow"]
@@ -72,7 +70,7 @@ prettyprint = ["arrow-cast/prettyprint"]
 # not the core arrow code itself. Be aware that `rand` must be kept as
 # an optional dependency for supporting compile to wasm32-unknown-unknown
 # target without assuming an environment containing JavaScript.
-test_utils = ["rand", "dep:chrono"]
+test_utils = ["dep:rand"]
 pyarrow = ["pyo3", "ffi"]
 # force_validate runs full data validation for all arrays that are created
 # this is not enabled by default as it is too computationally expensive
@@ -87,7 +85,6 @@ chrono = { workspace = true }
 criterion = { version = "0.5", default-features = false }
 half = { version = "2.1", default-features = false }
 rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] }
-tempfile = { version = "3", default-features = false }
 serde = { version = "1.0", default-features = false, features = ["derive"] }
 
 [build-dependencies]
diff --git a/arrow/LICENSE.txt b/arrow/LICENSE.txt
new file mode 120000
index 000000000000..4ab43736a839
--- /dev/null
+++ b/arrow/LICENSE.txt
@@ -0,0 +1 @@
+../LICENSE.txt
\ No newline at end of file
diff --git a/arrow/NOTICE.txt b/arrow/NOTICE.txt
new file mode 120000
index 000000000000..eb9f24e040b5
--- /dev/null
+++ b/arrow/NOTICE.txt
@@ -0,0 +1 @@
+../NOTICE.txt
\ No newline at end of file
diff --git a/arrow/README.md b/arrow/README.md
index 557a0b474e4b..a1444005ec00 100644
--- a/arrow/README.md
+++ b/arrow/README.md
@@ -25,7 +25,7 @@
 This crate contains the official Native Rust implementation of [Apache Arrow][arrow] in memory format, governed by the Apache Software Foundation.
 
 The [API documentation](https://docs.rs/arrow/latest) contains examples and full API.
-There are several [examples](https://github.com/apache/arrow-rs/tree/master/arrow/examples) to start from as well.
+There are several [examples](https://github.com/apache/arrow-rs/tree/main/arrow/examples) to start from as well.
 
 The API documentation for most recent, unreleased code is available [here](https://arrow.apache.org/rust/arrow/index.html).
 
@@ -57,7 +57,7 @@ The `arrow` crate provides the following features which may be enabled in your `
 - `ipc` (default) - support for reading [Arrow IPC Format](https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc), also used as the wire protocol in [arrow-flight](https://crates.io/crates/arrow-flight)
 - `ipc_compression` - Enables reading and writing compressed IPC streams (also enables `ipc`)
 - `prettyprint` - support for formatting record batches as textual columns
-  implementations of some [compute](https://github.com/apache/arrow-rs/tree/master/arrow/src/compute/kernels)
+  implementations of some [compute](https://github.com/apache/arrow-rs/tree/main/arrow/src/compute/kernels)
 - `chrono-tz` - support of parsing timezone using [chrono-tz](https://docs.rs/chrono-tz/0.6.0/chrono_tz/)
 - `ffi` - bindings for the Arrow C [C Data Interface](https://arrow.apache.org/docs/format/CDataInterface.html)
 - `pyarrow` - bindings for pyo3 to call arrow-rs from python
diff --git a/arrow/benches/cast_kernels.rs b/arrow/benches/cast_kernels.rs
index ec7990d3d764..5c4fcff13dee 100644
--- a/arrow/benches/cast_kernels.rs
+++ b/arrow/benches/cast_kernels.rs
@@ -250,6 +250,9 @@ fn add_benchmark(c: &mut Criterion) {
     c.bench_function("cast decimal128 to decimal128 512", |b| {
         b.iter(|| cast_array(&decimal128_array, DataType::Decimal128(30, 5)))
     });
+    c.bench_function("cast decimal128 to decimal128 512 lower precision", |b| {
+        b.iter(|| cast_array(&decimal128_array, DataType::Decimal128(6, 5)))
+    });
     c.bench_function("cast decimal128 to decimal256 512", |b| {
         b.iter(|| cast_array(&decimal128_array, DataType::Decimal256(50, 5)))
     });
diff --git a/arrow/benches/concatenate_kernel.rs b/arrow/benches/concatenate_kernel.rs
index 0c553f8b3f3c..034f5f2a305c 100644
--- a/arrow/benches/concatenate_kernel.rs
+++ b/arrow/benches/concatenate_kernel.rs
@@ -86,14 +86,14 @@ fn add_benchmark(c: &mut Criterion) {
     });
 
     let v1 = FixedSizeListArray::try_new(
-        Arc::new(Field::new("item", DataType::Int32, true)),
+        Arc::new(Field::new_list_field(DataType::Int32, true)),
         1024,
         Arc::new(create_primitive_array::<Int32Type>(1024 * 1024, 0.0)),
         None,
     )
     .unwrap();
     let v2 = FixedSizeListArray::try_new(
-        Arc::new(Field::new("item", DataType::Int32, true)),
+        Arc::new(Field::new_list_field(DataType::Int32, true)),
         1024,
         Arc::new(create_primitive_array::<Int32Type>(1024 * 1024, 0.0)),
         None,
diff --git a/arrow/benches/json_reader.rs b/arrow/benches/json_reader.rs
index 8f3898c51f9d..c698a93fe869 100644
--- a/arrow/benches/json_reader.rs
+++ b/arrow/benches/json_reader.rs
@@ -102,22 +102,22 @@ fn small_bench_list(c: &mut Criterion) {
     let schema = Arc::new(Schema::new(vec![
         Field::new(
             "c1",
-            DataType::List(Arc::new(Field::new("item", DataType::Utf8, true))),
+            DataType::List(Arc::new(Field::new_list_field(DataType::Utf8, true))),
             true,
         ),
         Field::new(
             "c2",
-            DataType::List(Arc::new(Field::new("item", DataType::Float64, true))),
+            DataType::List(Arc::new(Field::new_list_field(DataType::Float64, true))),
             true,
         ),
         Field::new(
             "c3",
-            DataType::List(Arc::new(Field::new("item", DataType::UInt32, true))),
+            DataType::List(Arc::new(Field::new_list_field(DataType::UInt32, true))),
             true,
         ),
         Field::new(
             "c4",
-            DataType::List(Arc::new(Field::new("item", DataType::Boolean, true))),
+            DataType::List(Arc::new(Field::new_list_field(DataType::Boolean, true))),
             true,
         ),
     ]));
diff --git a/arrow/benches/lexsort.rs b/arrow/benches/lexsort.rs
index cd952299df47..bb1c6081eaf9 100644
--- a/arrow/benches/lexsort.rs
+++ b/arrow/benches/lexsort.rs
@@ -83,7 +83,7 @@ impl Column {
             Column::RequiredI32List => {
                 let field = Field::new(
                     "_1",
-                    DataType::List(Arc::new(Field::new("item", DataType::Int32, false))),
+                    DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false))),
                     true,
                 );
                 create_random_array(&field, size, 0., 1.).unwrap()
@@ -91,7 +91,7 @@ impl Column {
             Column::OptionalI32List => {
                 let field = Field::new(
                     "_1",
-                    DataType::List(Arc::new(Field::new("item", DataType::Int32, true))),
+                    DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
                     true,
                 );
                 create_random_array(&field, size, 0.2, 1.).unwrap()
@@ -99,7 +99,7 @@ impl Column {
             Column::Required4CharStringList => {
                 let field = Field::new(
                     "_1",
-                    DataType::List(Arc::new(Field::new("item", DataType::Utf8, false))),
+                    DataType::List(Arc::new(Field::new_list_field(DataType::Utf8, false))),
                     true,
                 );
                 create_random_array(&field, size, 0., 1.).unwrap()
@@ -107,7 +107,7 @@ impl Column {
             Column::Optional4CharStringList => {
                 let field = Field::new(
                     "_1",
-                    DataType::List(Arc::new(Field::new("item", DataType::Utf8, true))),
+                    DataType::List(Arc::new(Field::new_list_field(DataType::Utf8, true))),
                     true,
                 );
                 create_random_array(&field, size, 0.2, 1.).unwrap()
diff --git a/arrow/examples/builders.rs b/arrow/examples/builders.rs
index 5c8cd51c55a0..8043ad82fca6 100644
--- a/arrow/examples/builders.rs
+++ b/arrow/examples/builders.rs
@@ -76,7 +76,7 @@ fn main() {
     let array_data = ArrayData::builder(DataType::Utf8)
         .len(3)
         .add_buffer(Buffer::from(offsets.to_byte_slice()))
-        .add_buffer(Buffer::from(&values[..]))
+        .add_buffer(Buffer::from(&values))
         .null_bit_buffer(Some(Buffer::from([0b00000101])))
         .build()
         .unwrap();
@@ -97,7 +97,7 @@ fn main() {
     let value_offsets = Buffer::from([0, 3, 6, 8].to_byte_slice());
 
     // Construct a list array from the above two
-    let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
+    let list_data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
     let list_data = ArrayData::builder(list_data_type)
         .len(3)
         .add_buffer(value_offsets)
diff --git a/arrow/src/lib.rs b/arrow/src/lib.rs
index 5002e5bf181a..7fc5acdc1b19 100644
--- a/arrow/src/lib.rs
+++ b/arrow/src/lib.rs
@@ -336,7 +336,7 @@
 //!
 //! If you think you have found an instance where this is possible, please file
 //! a ticket in our [issue tracker] and it will be triaged and fixed. For more information on
-//! arrow's use of unsafe, see [here](https://github.com/apache/arrow-rs/tree/master/arrow#safety).
+//! arrow's use of unsafe, see [here](https://github.com/apache/arrow-rs/tree/main/arrow#safety).
 //!
 //! # Higher-level Processing
 //!
diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs
index 6effe1c03e01..4ccbd0541d3f 100644
--- a/arrow/src/pyarrow.rs
+++ b/arrow/src/pyarrow.rs
@@ -111,7 +111,7 @@ impl<T: ToPyArrow> IntoPyArrow for T {
 }
 
 fn validate_class(expected: &str, value: &Bound<PyAny>) -> PyResult<()> {
-    let pyarrow = PyModule::import_bound(value.py(), "pyarrow")?;
+    let pyarrow = PyModule::import(value.py(), "pyarrow")?;
     let class = pyarrow.getattr(expected)?;
     if !value.is_instance(&class)? {
         let expected_module = class.getattr("__module__")?.extract::<PyBackedStr>()?;
@@ -177,7 +177,7 @@ impl ToPyArrow for DataType {
     fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
         let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?;
         let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
-        let module = py.import_bound("pyarrow")?;
+        let module = py.import("pyarrow")?;
         let class = module.getattr("DataType")?;
         let dtype = class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?;
         Ok(dtype.into())
@@ -213,7 +213,7 @@ impl ToPyArrow for Field {
     fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
         let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?;
         let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
-        let module = py.import_bound("pyarrow")?;
+        let module = py.import("pyarrow")?;
         let class = module.getattr("Field")?;
         let dtype = class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?;
         Ok(dtype.into())
@@ -249,7 +249,7 @@ impl ToPyArrow for Schema {
     fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
         let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?;
         let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
-        let module = py.import_bound("pyarrow")?;
+        let module = py.import("pyarrow")?;
         let class = module.getattr("Schema")?;
         let schema = class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?;
         Ok(schema.into())
@@ -309,7 +309,7 @@ impl ToPyArrow for ArrayData {
         let array = FFI_ArrowArray::new(self);
         let schema = FFI_ArrowSchema::try_from(self.data_type()).map_err(to_py_err)?;
 
-        let module = py.import_bound("pyarrow")?;
+        let module = py.import("pyarrow")?;
         let class = module.getattr("Array")?;
         let array = class.call_method1(
             "_import_from_c",
@@ -318,7 +318,7 @@ impl ToPyArrow for ArrayData {
                 addr_of!(schema) as Py_uintptr_t,
             ),
         )?;
-        Ok(array.to_object(py))
+        Ok(array.unbind())
     }
 }
 
@@ -335,7 +335,7 @@ impl<T: ToPyArrow> ToPyArrow for Vec<T> {
             .iter()
             .map(|v| v.to_pyarrow(py))
             .collect::<PyResult<Vec<_>>>()?;
-        Ok(values.to_object(py))
+        Ok(PyList::new(py, values)?.unbind().into())
     }
 }
 
@@ -451,7 +451,7 @@ impl FromPyArrow for ArrowArrayStreamReader {
         // make the conversion through PyArrow's private API
         // this changes the pointer's memory and is thus unsafe.
         // In particular, `_export_to_c` can go out of bounds
-        let args = PyTuple::new_bound(value.py(), [stream_ptr as Py_uintptr_t]);
+        let args = PyTuple::new(value.py(), [stream_ptr as Py_uintptr_t])?;
         value.call_method1("_export_to_c", args)?;
 
         let stream_reader = ArrowArrayStreamReader::try_new(stream)
@@ -469,9 +469,9 @@ impl IntoPyArrow for Box<dyn RecordBatchReader + Send> {
         let mut stream = FFI_ArrowArrayStream::new(self);
 
         let stream_ptr = (&mut stream) as *mut FFI_ArrowArrayStream;
-        let module = py.import_bound("pyarrow")?;
+        let module = py.import("pyarrow")?;
         let class = module.getattr("RecordBatchReader")?;
-        let args = PyTuple::new_bound(py, [stream_ptr as Py_uintptr_t]);
+        let args = PyTuple::new(py, [stream_ptr as Py_uintptr_t])?;
         let reader = class.call_method1("_import_from_c", args)?;
 
         Ok(PyObject::from(reader))
@@ -500,11 +500,17 @@ impl<'source, T: FromPyArrow> FromPyObject<'source> for PyArrowType<T> {
     }
 }
 
-impl<T: IntoPyArrow> IntoPy<PyObject> for PyArrowType<T> {
-    fn into_py(self, py: Python) -> PyObject {
+impl<'py, T: IntoPyArrow> IntoPyObject<'py> for PyArrowType<T> {
+    type Target = PyAny;
+
+    type Output = Bound<'py, Self::Target>;
+
+    type Error = PyErr;
+
+    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, PyErr> {
         match self.0.into_pyarrow(py) {
-            Ok(obj) => obj,
-            Err(err) => err.to_object(py),
+            Ok(obj) => Result::Ok(obj.into_bound(py)),
+            Err(err) => Result::Err(err),
         }
     }
 }
diff --git a/arrow/src/util/data_gen.rs b/arrow/src/util/data_gen.rs
index 56bbdefd522d..5f63812e51c0 100644
--- a/arrow/src/util/data_gen.rs
+++ b/arrow/src/util/data_gen.rs
@@ -538,7 +538,7 @@ mod tests {
             Field::new("a", DataType::Int32, false),
             Field::new(
                 "b",
-                DataType::List(Arc::new(Field::new("item", DataType::LargeUtf8, false))),
+                DataType::List(Arc::new(Field::new_list_field(DataType::LargeUtf8, false))),
                 false,
             ),
             Field::new("a", DataType::Int32, false),
@@ -569,10 +569,8 @@ mod tests {
             Field::new("b", DataType::Boolean, true),
             Field::new(
                 "c",
-                DataType::LargeList(Arc::new(Field::new(
-                    "item",
-                    DataType::List(Arc::new(Field::new(
-                        "item",
+                DataType::LargeList(Arc::new(Field::new_list_field(
+                    DataType::List(Arc::new(Field::new_list_field(
                         DataType::FixedSizeBinary(6),
                         true,
                     ))),
diff --git a/arrow/tests/array_cast.rs b/arrow/tests/array_cast.rs
index 8f86cbeab717..ef5ca6041700 100644
--- a/arrow/tests/array_cast.rs
+++ b/arrow/tests/array_cast.rs
@@ -315,7 +315,7 @@ fn make_fixed_size_list_array() -> FixedSizeListArray {
 
     // Construct a fixed size list array from the above two
     let list_data_type =
-        DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, true)), 2);
+        DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, true)), 2);
     let list_data = ArrayData::builder(list_data_type)
         .len(5)
         .add_child_data(value_data)
@@ -325,11 +325,11 @@ fn make_fixed_size_list_array() -> FixedSizeListArray {
 }
 
 fn make_fixed_size_binary_array() -> FixedSizeBinaryArray {
-    let values: [u8; 15] = *b"hellotherearrow";
+    let values: &[u8; 15] = b"hellotherearrow";
 
     let array_data = ArrayData::builder(DataType::FixedSizeBinary(5))
         .len(3)
-        .add_buffer(Buffer::from(&values[..]))
+        .add_buffer(Buffer::from(values))
         .build()
         .unwrap();
     FixedSizeBinaryArray::from(array_data)
@@ -348,7 +348,7 @@ fn make_list_array() -> ListArray {
     let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
 
     // Construct a list array from the above two
-    let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
+    let list_data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
     let list_data = ArrayData::builder(list_data_type)
         .len(3)
         .add_buffer(value_offsets)
@@ -371,7 +371,8 @@ fn make_large_list_array() -> LargeListArray {
     let value_offsets = Buffer::from_slice_ref([0i64, 3, 6, 8]);
 
     // Construct a list array from the above two
-    let list_data_type = DataType::LargeList(Arc::new(Field::new("item", DataType::Int32, true)));
+    let list_data_type =
+        DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int32, true)));
     let list_data = ArrayData::builder(list_data_type)
         .len(3)
         .add_buffer(value_offsets)
@@ -466,12 +467,12 @@ fn get_all_types() -> Vec<DataType> {
         LargeBinary,
         Utf8,
         LargeUtf8,
-        List(Arc::new(Field::new("item", DataType::Int8, true))),
-        List(Arc::new(Field::new("item", DataType::Utf8, true))),
-        FixedSizeList(Arc::new(Field::new("item", DataType::Int8, true)), 10),
-        FixedSizeList(Arc::new(Field::new("item", DataType::Utf8, false)), 10),
-        LargeList(Arc::new(Field::new("item", DataType::Int8, true))),
-        LargeList(Arc::new(Field::new("item", DataType::Utf8, false))),
+        List(Arc::new(Field::new_list_field(DataType::Int8, true))),
+        List(Arc::new(Field::new_list_field(DataType::Utf8, true))),
+        FixedSizeList(Arc::new(Field::new_list_field(DataType::Int8, true)), 10),
+        FixedSizeList(Arc::new(Field::new_list_field(DataType::Utf8, false)), 10),
+        LargeList(Arc::new(Field::new_list_field(DataType::Int8, true))),
+        LargeList(Arc::new(Field::new_list_field(DataType::Utf8, false))),
         Struct(Fields::from(vec![
             Field::new("f1", DataType::Int32, true),
             Field::new("f2", DataType::Utf8, true),
diff --git a/arrow/tests/array_equal.rs b/arrow/tests/array_equal.rs
index 7ed4dae1ed08..94fb85030bf3 100644
--- a/arrow/tests/array_equal.rs
+++ b/arrow/tests/array_equal.rs
@@ -409,8 +409,7 @@ fn test_empty_offsets_list_equal() {
     let values = Int32Array::from(empty);
     let empty_offsets: [u8; 0] = [];
 
-    let a: ListArray = ArrayDataBuilder::new(DataType::List(Arc::new(Field::new(
-        "item",
+    let a: ListArray = ArrayDataBuilder::new(DataType::List(Arc::new(Field::new_list_field(
         DataType::Int32,
         true,
     ))))
@@ -422,8 +421,7 @@ fn test_empty_offsets_list_equal() {
     .unwrap()
     .into();
 
-    let b: ListArray = ArrayDataBuilder::new(DataType::List(Arc::new(Field::new(
-        "item",
+    let b: ListArray = ArrayDataBuilder::new(DataType::List(Arc::new(Field::new_list_field(
         DataType::Int32,
         true,
     ))))
@@ -437,8 +435,7 @@ fn test_empty_offsets_list_equal() {
 
     test_equal(&a, &b, true);
 
-    let c: ListArray = ArrayDataBuilder::new(DataType::List(Arc::new(Field::new(
-        "item",
+    let c: ListArray = ArrayDataBuilder::new(DataType::List(Arc::new(Field::new_list_field(
         DataType::Int32,
         true,
     ))))
@@ -475,8 +472,7 @@ fn test_list_null() {
 
     // a list where the nullness of values is determined by the list's bitmap
     let c_values = Int32Array::from(vec![1, 2, -1, -2, 3, 4, -3, -4]);
-    let c: ListArray = ArrayDataBuilder::new(DataType::List(Arc::new(Field::new(
-        "item",
+    let c: ListArray = ArrayDataBuilder::new(DataType::List(Arc::new(Field::new_list_field(
         DataType::Int32,
         true,
     ))))
@@ -498,8 +494,7 @@ fn test_list_null() {
         None,
         None,
     ]);
-    let d: ListArray = ArrayDataBuilder::new(DataType::List(Arc::new(Field::new(
-        "item",
+    let d: ListArray = ArrayDataBuilder::new(DataType::List(Arc::new(Field::new_list_field(
         DataType::Int32,
         true,
     ))))
diff --git a/arrow/tests/array_transform.rs b/arrow/tests/array_transform.rs
index 08f23c200d52..c6de9f4a3417 100644
--- a/arrow/tests/array_transform.rs
+++ b/arrow/tests/array_transform.rs
@@ -600,7 +600,7 @@ fn test_list_append() {
     ]);
     let list_value_offsets = Buffer::from_slice_ref([0i32, 3, 5, 11, 13, 13, 15, 15, 17]);
     let expected_list_data = ArrayData::try_new(
-        DataType::List(Arc::new(Field::new("item", DataType::Int64, true))),
+        DataType::List(Arc::new(Field::new_list_field(DataType::Int64, true))),
         8,
         None,
         0,
@@ -677,7 +677,7 @@ fn test_list_nulls_append() {
     let list_value_offsets =
         Buffer::from_slice_ref([0, 3, 5, 5, 13, 15, 15, 15, 19, 19, 19, 19, 23]);
     let expected_list_data = ArrayData::try_new(
-        DataType::List(Arc::new(Field::new("item", DataType::Int64, true))),
+        DataType::List(Arc::new(Field::new_list_field(DataType::Int64, true))),
         12,
         Some(Buffer::from(&[0b11011011, 0b1110])),
         0,
@@ -940,7 +940,7 @@ fn test_list_of_strings_append() {
     ]);
     let list_value_offsets = Buffer::from_slice_ref([0, 3, 5, 6, 9, 10, 13]);
     let expected_list_data = ArrayData::try_new(
-        DataType::List(Arc::new(Field::new("item", DataType::Utf8, true))),
+        DataType::List(Arc::new(Field::new_list_field(DataType::Utf8, true))),
         6,
         None,
         0,
@@ -1141,7 +1141,7 @@ fn test_fixed_size_list_append() {
         Some(12),
     ]);
     let expected_fixed_size_list_data = ArrayData::try_new(
-        DataType::FixedSizeList(Arc::new(Field::new("item", DataType::UInt16, true)), 2),
+        DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::UInt16, true)), 2),
         12,
         Some(Buffer::from(&[0b11011101, 0b101])),
         0,
diff --git a/arrow/tests/shrink_to_fit.rs b/arrow/tests/shrink_to_fit.rs
new file mode 100644
index 000000000000..5d7c2cf98bc9
--- /dev/null
+++ b/arrow/tests/shrink_to_fit.rs
@@ -0,0 +1,159 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::{
+    array::{Array, ArrayRef, ListArray, PrimitiveArray},
+    buffer::OffsetBuffer,
+    datatypes::{Field, UInt8Type},
+};
+
+/// Test that `shrink_to_fit` frees memory after concatenating a large number of arrays.
+#[test]
+fn test_shrink_to_fit_after_concat() {
+    let array_len = 6_000;
+    let num_concats = 100;
+
+    let primitive_array: PrimitiveArray<UInt8Type> = (0..array_len)
+        .map(|v| (v % 255) as u8)
+        .collect::<Vec<_>>()
+        .into();
+    let primitive_array: ArrayRef = Arc::new(primitive_array);
+
+    let list_array: ArrayRef = Arc::new(ListArray::new(
+        Field::new_list_field(primitive_array.data_type().clone(), false).into(),
+        OffsetBuffer::from_lengths([primitive_array.len()]),
+        primitive_array.clone(),
+        None,
+    ));
+
+    // Num bytes allocated globally and by this thread, respectively.
+    let (concatenated, _bytes_allocated_globally, bytes_allocated_by_this_thread) =
+        memory_use(|| {
+            let mut concatenated = concatenate(num_concats, list_array.clone());
+            concatenated.shrink_to_fit(); // This is what we're testing!
+            dbg!(concatenated.data_type());
+            concatenated
+        });
+    let expected_len = num_concats * array_len;
+    assert_eq!(bytes_used(concatenated.clone()), expected_len);
+    eprintln!("The concatenated array is {expected_len} B long. Amount of memory used by this thread: {bytes_allocated_by_this_thread} B");
+
+    assert!(
+        expected_len <= bytes_allocated_by_this_thread,
+        "We must allocate at least as much space as the concatenated array"
+    );
+    assert!(
+        bytes_allocated_by_this_thread <= expected_len + expected_len / 100,
+        "We shouldn't have more than 1% memory overhead. In fact, we are using {bytes_allocated_by_this_thread} B of memory for {expected_len} B of data"
+    );
+}
+
+fn concatenate(num_times: usize, array: ArrayRef) -> ArrayRef {
+    let mut concatenated = array.clone();
+    for _ in 0..num_times - 1 {
+        concatenated = arrow::compute::kernels::concat::concat(&[&*concatenated, &*array]).unwrap();
+    }
+    concatenated
+}
+
+fn bytes_used(array: ArrayRef) -> usize {
+    let mut array = array;
+    loop {
+        match array.data_type() {
+            arrow::datatypes::DataType::UInt8 => break,
+            arrow::datatypes::DataType::List(_) => {
+                let list = array.as_any().downcast_ref::<ListArray>().unwrap();
+                array = list.values().clone();
+            }
+            _ => unreachable!(),
+        }
+    }
+
+    array.len()
+}
+
+// --- Memory tracking ---
+
+use std::{
+    alloc::Layout,
+    sync::{
+        atomic::{AtomicUsize, Ordering::Relaxed},
+        Arc,
+    },
+};
+
+static LIVE_BYTES_GLOBAL: AtomicUsize = AtomicUsize::new(0);
+
+thread_local! {
+    static LIVE_BYTES_IN_THREAD: AtomicUsize = const { AtomicUsize::new(0)  } ;
+}
+
+pub struct TrackingAllocator {
+    allocator: std::alloc::System,
+}
+
+#[global_allocator]
+pub static GLOBAL_ALLOCATOR: TrackingAllocator = TrackingAllocator {
+    allocator: std::alloc::System,
+};
+
+#[allow(unsafe_code)]
+// SAFETY:
+// We just do book-keeping and then let another allocator do all the actual work.
+unsafe impl std::alloc::GlobalAlloc for TrackingAllocator {
+    #[allow(clippy::let_and_return)]
+    unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
+        // SAFETY:
+        // Just deferring
+        let ptr = unsafe { self.allocator.alloc(layout) };
+        if !ptr.is_null() {
+            LIVE_BYTES_IN_THREAD.with(|bytes| bytes.fetch_add(layout.size(), Relaxed));
+            LIVE_BYTES_GLOBAL.fetch_add(layout.size(), Relaxed);
+        }
+        ptr
+    }
+
+    unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
+        LIVE_BYTES_IN_THREAD.with(|bytes| bytes.fetch_sub(layout.size(), Relaxed));
+        LIVE_BYTES_GLOBAL.fetch_sub(layout.size(), Relaxed);
+
+        // SAFETY:
+        // Just deferring
+        unsafe { self.allocator.dealloc(ptr, layout) };
+    }
+
+    // No need to override `alloc_zeroed` or `realloc`,
+    // since they both by default just defer to `alloc` and `dealloc`.
+}
+
+fn live_bytes_local() -> usize {
+    LIVE_BYTES_IN_THREAD.with(|bytes| bytes.load(Relaxed))
+}
+
+fn live_bytes_global() -> usize {
+    LIVE_BYTES_GLOBAL.load(Relaxed)
+}
+
+/// Returns `(num_bytes_allocated, num_bytes_allocated_by_this_thread)`.
+fn memory_use<R>(run: impl Fn() -> R) -> (R, usize, usize) {
+    let used_bytes_start_local = live_bytes_local();
+    let used_bytes_start_global = live_bytes_global();
+    let ret = run();
+    let bytes_used_local = live_bytes_local() - used_bytes_start_local;
+    let bytes_used_global = live_bytes_global() - used_bytes_start_global;
+    (ret, bytes_used_global, bytes_used_local)
+}
diff --git a/dev/release/README.md b/dev/release/README.md
index d2d9e48bbb6b..6e6817bffb12 100644
--- a/dev/release/README.md
+++ b/dev/release/README.md
@@ -27,7 +27,7 @@ This file documents the release process for the "Rust Arrow Crates": `arrow`, `a
 
 The Rust Arrow Crates are interconnected (e.g. `parquet` has an optional dependency on `arrow`) so we increment and release all of them together.
 
-If any code has been merged to master that has a breaking API change, as defined
+If any code has been merged to main that has a breaking API change, as defined
 in [Rust RFC 1105] he major version number is incremented (e.g. `9.0.2` to `10.0.2`).
 Otherwise the new minor version incremented (e.g. `9.0.2` to `9.1.0`).
 
@@ -46,19 +46,19 @@ crates.io, the Rust ecosystem's package manager.
 We create a `CHANGELOG.md` so our users know what has been changed between releases.
 
 The CHANGELOG is created automatically using
-[update_change_log.sh](https://github.com/apache/arrow-rs/blob/master/dev/release/update_change_log.sh)
+[update_change_log.sh](https://github.com/apache/arrow-rs/blob/main/dev/release/update_change_log.sh)
 
 This script creates a changelog using github issues and the
 labels associated with them.
 
 ## Prepare CHANGELOG and version:
 
-Now prepare a PR to update `CHANGELOG.md` and versions on `master` to reflect the planned release.
+Now prepare a PR to update `CHANGELOG.md` and versions on `main` to reflect the planned release.
 
 Do this in the root of this repository. For example [#2323](https://github.com/apache/arrow-rs/pull/2323)
 
 ```bash
-git checkout master
+git checkout main
 git pull
 git checkout -b <RELEASE_BRANCH>
 
@@ -72,6 +72,8 @@ export ARROW_GITHUB_API_TOKEN=<TOKEN>
 # manually edit ./dev/release/update_change_log.sh to reflect the release version
 # create the changelog
 ./dev/release/update_change_log.sh
+# commit the intial changes
+git commit -a -m 'Create changelog'
 
 # run automated script to copy labels to issues based on referenced PRs
 # (NOTE 1:  this must be done by a committer / other who has
@@ -80,14 +82,12 @@ export ARROW_GITHUB_API_TOKEN=<TOKEN>
 # NOTE 2: this must be done after creating the initial CHANGELOG file
 python dev/release/label_issues.py
 
-# review change log / edit issues and labels if needed, rerun
-git commit -a -m 'Create changelog'
-
-# Manually edit ./dev/release/update_change_log.sh to reflect the release version
-# Create the changelog
+# review change log / edit issues and labels if needed, rerun, repeat as necessary
+# note you need to revert changes to CHANGELOG-old.md if you want to rerun the script
 CHANGELOG_GITHUB_TOKEN=<TOKEN> ./dev/release/update_change_log.sh
-# Review change log / edit issues and labels if needed, rerun
-git commit -a -m 'Create changelog'
+
+# Commit the changes
+git commit -a -m 'Update changelog'
 
 git push
 ```
@@ -96,7 +96,7 @@ Note that when reviewing the change log, rather than editing the
 `CHANGELOG.md`, it is preferred to update the issues and their labels
 (e.g. add `invalid` label to exclude them from release notes)
 
-Merge this PR to `master` prior to the next step.
+Merge this PR to `main` prior to the next step.
 
 ## Prepare release candidate tarball
 
@@ -115,7 +115,7 @@ Create and push the tag thusly:
 
 ```shell
 git fetch apache
-git tag <version> apache/master
+git tag <version> apache/main
 # push tag to apache
 git push apache <version>
 ```
diff --git a/dev/release/create-tarball.sh b/dev/release/create-tarball.sh
index a77ddbe75701..8b92509104c8 100755
--- a/dev/release/create-tarball.sh
+++ b/dev/release/create-tarball.sh
@@ -109,7 +109,7 @@ The vote will be open for at least 72 hours.
 [1]: https://github.com/apache/arrow-rs/tree/${release_hash}
 [2]: ${url}
 [3]: https://github.com/apache/arrow-rs/blob/${release_hash}/CHANGELOG.md
-[4]: https://github.com/apache/arrow-rs/blob/master/dev/release/verify-release-candidate.sh
+[4]: https://github.com/apache/arrow-rs/blob/main/dev/release/verify-release-candidate.sh
 MAIL
 echo "---------------------------------------------------------"
 
diff --git a/dev/release/update_change_log.sh b/dev/release/update_change_log.sh
index ab6460659d73..d00cc498625f 100755
--- a/dev/release/update_change_log.sh
+++ b/dev/release/update_change_log.sh
@@ -29,8 +29,8 @@
 
 set -e
 
-SINCE_TAG="53.1.0"
-FUTURE_RELEASE="53.2.0"
+SINCE_TAG="53.2.0"
+FUTURE_RELEASE="53.3.0"
 
 SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)"
diff --git a/object_store/Cargo.toml b/object_store/Cargo.toml
index 86d1392ebf61..bcc8e0b92243 100644
--- a/object_store/Cargo.toml
+++ b/object_store/Cargo.toml
@@ -23,7 +23,7 @@ license = "MIT/Apache-2.0"
 readme = "README.md"
 description = "A generic object store interface for uniformly interacting with AWS S3, Google Cloud Storage, Azure Blob Storage and local files."
 keywords = ["object", "storage", "cloud"]
-repository = "https://github.com/apache/arrow-rs/tree/master/object_store"
+repository = "https://github.com/apache/arrow-rs/tree/main/object_store"
 rust-version = "1.64.0"
 
 [package.metadata.docs.rs]
@@ -55,13 +55,14 @@ ring = { version = "0.17", default-features = false, features = ["std"], optiona
 rustls-pemfile = { version = "2.0", default-features = false, features = ["std"], optional = true }
 tokio = { version = "1.29.0", features = ["sync", "macros", "rt", "time", "io-util"] }
 md-5 = { version = "0.10.6", default-features = false, optional = true }
+httparse = { version = "1.8.0", default-features = false, features = ["std"], optional = true }
 
 [target.'cfg(target_family="unix")'.dev-dependencies]
 nix = { version = "0.29.0", features = ["fs"] }
 
 [features]
 cloud = ["serde", "serde_json", "quick-xml", "hyper", "reqwest", "reqwest/json", "reqwest/stream", "chrono/serde", "base64", "rand", "ring"]
-azure = ["cloud"]
+azure = ["cloud", "httparse"]
 gcp = ["cloud", "rustls-pemfile"]
 aws = ["cloud", "md-5"]
 http = ["cloud"]
@@ -75,6 +76,10 @@ hyper-util = "0.1"
 http-body-util = "0.1"
 rand = "0.8"
 tempfile = "3.1.0"
+regex = "1.11.1"
+# The "gzip" feature for reqwest is enabled for an integration test.
+reqwest = { version = "0.12", features = ["gzip"] }
+http = "1.1.0"
 
 [[test]]
 name = "get_range_file"
diff --git a/object_store/dev/release/README.md b/object_store/dev/release/README.md
index 4077dcad9653..912ff4cd8bac 100644
--- a/object_store/dev/release/README.md
+++ b/object_store/dev/release/README.md
@@ -27,7 +27,7 @@ This file documents the release process for the `object_store` crate.
 At the time of writing, we release a new version of `object_store` on demand rather than on a regular schedule.
 
 As we are still in an early phase, we use the 0.x version scheme. If any code has 
-been merged to master that has a breaking API change, as defined in [Rust RFC 1105]
+been merged to main that has a breaking API change, as defined in [Rust RFC 1105]
 the minor version number is incremented changed (e.g. `0.3.0` to `0.4.0`). 
 Otherwise the patch version is incremented (e.g. `0.3.0` to `0.3.1`).
 
@@ -45,14 +45,14 @@ crates.io, the Rust ecosystem's package manager.
 We create a `CHANGELOG.md` so our users know what has been changed between releases.
 
 The CHANGELOG is created automatically using
-[update_change_log.sh](https://github.com/apache/arrow-rs/blob/master/object_store/dev/release/update_change_log.sh)
+[update_change_log.sh](https://github.com/apache/arrow-rs/blob/main/object_store/dev/release/update_change_log.sh)
 
 This script creates a changelog using github issues and the
 labels associated with them.
 
 ## Prepare CHANGELOG and version:
 
-Now prepare a PR to update `CHANGELOG.md` and versions on `master` to reflect the planned release.
+Now prepare a PR to update `CHANGELOG.md` and versions on `main` to reflect the planned release.
 
 Note  this process is done in the `object_store` directory. See [#6227] for an example
 
@@ -62,7 +62,7 @@ Note  this process is done in the `object_store` directory. See [#6227] for an e
 # NOTE: Run commands in object_store sub directory (not main repo checkout)
 # cd object_store
 
-git checkout master
+git checkout main
 git pull
 git checkout -b <RELEASE_BRANCH>
 
@@ -82,7 +82,7 @@ export CHANGELOG_GITHUB_TOKEN=<TOKEN>
 # Commit changes
 git commit -a -m 'Create changelog'
 
-# push changes to fork and create a PR to master
+# push changes to fork and create a PR to main
 git push
 ```
 
@@ -90,7 +90,7 @@ Note that when reviewing the change log, rather than editing the
 `CHANGELOG.md`, it is preferred to update the issues and their labels
 (e.g. add `invalid` label to exclude them from release notes)
 
-Merge this PR to `master` prior to the next step.
+Merge this PR to `main` prior to the next step.
 
 ## Prepare release candidate tarball
 
@@ -109,7 +109,7 @@ Create and push the tag thusly:
 
 ```shell
 git fetch apache
-git tag <version> apache/master
+git tag <version> apache/main
 # push tag to apache
 git push apache <version>
 ```
@@ -170,7 +170,7 @@ The vote will be open for at least 72 hours.
 [1]: https://github.com/apache/arrow-rs/tree/b945b15de9085f5961a478d4f35b0c5c3427e248
 [2]: https://dist.apache.org/repos/dist/dev/arrow/apache-arrow-object-store-rs-0.11.1-rc1
 [3]: https://github.com/apache/arrow-rs/blob/b945b15de9085f5961a478d4f35b0c5c3427e248/object_store/CHANGELOG.md
-[4]: https://github.com/apache/arrow-rs/blob/master/object_store/dev/release/verify-release-candidate.sh
+[4]: https://github.com/apache/arrow-rs/blob/main/object_store/dev/release/verify-release-candidate.sh
 ```
 
 For the release to become "official" it needs at least three Apache Arrow PMC members to vote +1 on it.
diff --git a/object_store/dev/release/create-tarball.sh b/object_store/dev/release/create-tarball.sh
index bbffde89b043..efc26fd0ef0f 100755
--- a/object_store/dev/release/create-tarball.sh
+++ b/object_store/dev/release/create-tarball.sh
@@ -101,7 +101,7 @@ The vote will be open for at least 72 hours.
 [1]: https://github.com/apache/arrow-rs/tree/${release_hash}
 [2]: ${url}
 [3]: https://github.com/apache/arrow-rs/blob/${release_hash}/object_store/CHANGELOG.md
-[4]: https://github.com/apache/arrow-rs/blob/master/object_store/dev/release/verify-release-candidate.sh
+[4]: https://github.com/apache/arrow-rs/blob/main/object_store/dev/release/verify-release-candidate.sh
 MAIL
 echo "---------------------------------------------------------"
 
diff --git a/object_store/src/aws/builder.rs b/object_store/src/aws/builder.rs
index eb79f5e6dc28..840245a7b5d4 100644
--- a/object_store/src/aws/builder.rs
+++ b/object_store/src/aws/builder.rs
@@ -170,6 +170,8 @@ pub struct AmazonS3Builder {
     encryption_bucket_key_enabled: Option<ConfigValue<bool>>,
     /// base64-encoded 256-bit customer encryption key for SSE-C.
     encryption_customer_key_base64: Option<String>,
+    /// When set to true, charge requester for bucket operations
+    request_payer: ConfigValue<bool>,
 }
 
 /// Configuration keys for [`AmazonS3Builder`]
@@ -330,6 +332,13 @@ pub enum AmazonS3ConfigKey {
     /// - `s3_express`
     S3Express,
 
+    /// Enable Support for S3 Requester Pays
+    ///
+    /// Supported keys:
+    /// - `aws_request_payer`
+    /// - `request_payer`
+    RequestPayer,
+
     /// Client options
     Client(ClientConfigKey),
 
@@ -358,6 +367,7 @@ impl AsRef<str> for AmazonS3ConfigKey {
             Self::CopyIfNotExists => "aws_copy_if_not_exists",
             Self::ConditionalPut => "aws_conditional_put",
             Self::DisableTagging => "aws_disable_tagging",
+            Self::RequestPayer => "aws_request_payer",
             Self::Client(opt) => opt.as_ref(),
             Self::Encryption(opt) => opt.as_ref(),
         }
@@ -389,6 +399,7 @@ impl FromStr for AmazonS3ConfigKey {
             "aws_copy_if_not_exists" | "copy_if_not_exists" => Ok(Self::CopyIfNotExists),
             "aws_conditional_put" | "conditional_put" => Ok(Self::ConditionalPut),
             "aws_disable_tagging" | "disable_tagging" => Ok(Self::DisableTagging),
+            "aws_request_payer" | "request_payer" => Ok(Self::RequestPayer),
             // Backwards compatibility
             "aws_allow_http" => Ok(Self::Client(ClientConfigKey::AllowHttp)),
             "aws_server_side_encryption" => Ok(Self::Encryption(
@@ -510,6 +521,9 @@ impl AmazonS3Builder {
             AmazonS3ConfigKey::ConditionalPut => {
                 self.conditional_put = Some(ConfigValue::Deferred(value.into()))
             }
+            AmazonS3ConfigKey::RequestPayer => {
+                self.request_payer = ConfigValue::Deferred(value.into())
+            }
             AmazonS3ConfigKey::Encryption(key) => match key {
                 S3EncryptionConfigKey::ServerSideEncryption => {
                     self.encryption_type = Some(ConfigValue::Deferred(value.into()))
@@ -567,6 +581,7 @@ impl AmazonS3Builder {
                 self.conditional_put.as_ref().map(ToString::to_string)
             }
             AmazonS3ConfigKey::DisableTagging => Some(self.disable_tagging.to_string()),
+            AmazonS3ConfigKey::RequestPayer => Some(self.request_payer.to_string()),
             AmazonS3ConfigKey::Encryption(key) => match key {
                 S3EncryptionConfigKey::ServerSideEncryption => {
                     self.encryption_type.as_ref().map(ToString::to_string)
@@ -845,6 +860,14 @@ impl AmazonS3Builder {
         self
     }
 
+    /// Set whether to charge requester for bucket operations.
+    ///
+    /// <https://docs.aws.amazon.com/AmazonS3/latest/userguide/RequesterPaysBuckets.html>
+    pub fn with_request_payer(mut self, enabled: bool) -> Self {
+        self.request_payer = ConfigValue::Parsed(enabled);
+        self
+    }
+
     /// Create a [`AmazonS3`] instance from the provided values,
     /// consuming `self`.
     pub fn build(mut self) -> Result<AmazonS3> {
@@ -996,6 +1019,7 @@ impl AmazonS3Builder {
             copy_if_not_exists,
             conditional_put: put_precondition,
             encryption_headers,
+            request_payer: self.request_payer.get()?,
         };
 
         let client = Arc::new(S3Client::new(config)?);
diff --git a/object_store/src/aws/client.rs b/object_store/src/aws/client.rs
index 895308f5880e..81015e82b39c 100644
--- a/object_store/src/aws/client.rs
+++ b/object_store/src/aws/client.rs
@@ -29,7 +29,7 @@ use crate::client::list::ListClient;
 use crate::client::retry::RetryExt;
 use crate::client::s3::{
     CompleteMultipartUpload, CompleteMultipartUploadResult, CopyPartResult,
-    InitiateMultipartUploadResult, ListResponse,
+    InitiateMultipartUploadResult, ListResponse, PartMetadata,
 };
 use crate::client::GetOptionsExt;
 use crate::multipart::PartId;
@@ -62,6 +62,7 @@ use std::sync::Arc;
 const VERSION_HEADER: &str = "x-amz-version-id";
 const SHA256_CHECKSUM: &str = "x-amz-checksum-sha256";
 const USER_DEFINED_METADATA_HEADER_PREFIX: &str = "x-amz-meta-";
+const ALGORITHM: &str = "x-amz-checksum-algorithm";
 
 /// A specialized `Error` for object store-related errors
 #[derive(Debug, Snafu)]
@@ -202,6 +203,7 @@ pub(crate) struct S3Config {
     pub checksum: Option<Checksum>,
     pub copy_if_not_exists: Option<S3CopyIfNotExists>,
     pub conditional_put: Option<S3ConditionalPut>,
+    pub request_payer: bool,
     pub(super) encryption_headers: S3EncryptionHeaders,
 }
 
@@ -245,11 +247,12 @@ struct SessionCredential<'a> {
     config: &'a S3Config,
 }
 
-impl<'a> SessionCredential<'a> {
+impl SessionCredential<'_> {
     fn authorizer(&self) -> Option<AwsAuthorizer<'_>> {
         let mut authorizer =
             AwsAuthorizer::new(self.credential.as_deref()?, "s3", &self.config.region)
-                .with_sign_payload(self.config.sign_payload);
+                .with_sign_payload(self.config.sign_payload)
+                .with_request_payer(self.config.request_payer);
 
         if self.session_token {
             let token = HeaderName::from_static("x-amz-s3session-token");
@@ -288,6 +291,7 @@ pub(crate) struct Request<'a> {
     payload: Option<PutPayload>,
     use_session_creds: bool,
     idempotent: bool,
+    retry_on_conflict: bool,
     retry_error_body: bool,
 }
 
@@ -315,6 +319,13 @@ impl<'a> Request<'a> {
         Self { idempotent, ..self }
     }
 
+    pub(crate) fn retry_on_conflict(self, retry_on_conflict: bool) -> Self {
+        Self {
+            retry_on_conflict,
+            ..self
+        }
+    }
+
     pub(crate) fn retry_error_body(self, retry_error_body: bool) -> Self {
         Self {
             retry_error_body,
@@ -380,10 +391,9 @@ impl<'a> Request<'a> {
             let payload_sha256 = sha256.finish();
 
             if let Some(Checksum::SHA256) = self.config.checksum {
-                self.builder = self.builder.header(
-                    "x-amz-checksum-sha256",
-                    BASE64_STANDARD.encode(payload_sha256),
-                );
+                self.builder = self
+                    .builder
+                    .header(SHA256_CHECKSUM, BASE64_STANDARD.encode(payload_sha256));
             }
             self.payload_sha256 = Some(payload_sha256);
         }
@@ -410,6 +420,7 @@ impl<'a> Request<'a> {
         self.builder
             .with_aws_sigv4(credential.authorizer(), sha)
             .retryable(&self.config.retry_config)
+            .retry_on_conflict(self.retry_on_conflict)
             .idempotent(self.idempotent)
             .retry_error_body(self.retry_error_body)
             .payload(self.payload)
@@ -446,6 +457,7 @@ impl S3Client {
             config: &self.config,
             use_session_creds: true,
             idempotent: false,
+            retry_on_conflict: false,
             retry_error_body: false,
         }
     }
@@ -605,8 +617,15 @@ impl S3Client {
         location: &Path,
         opts: PutMultipartOpts,
     ) -> Result<MultipartId> {
-        let response = self
-            .request(Method::POST, location)
+        let mut request = self.request(Method::POST, location);
+        if let Some(algorithm) = self.config.checksum {
+            match algorithm {
+                Checksum::SHA256 => {
+                    request = request.header(ALGORITHM, "SHA256");
+                }
+            }
+        }
+        let response = request
             .query(&[("uploads", "")])
             .with_encryption_headers()
             .with_attributes(opts.attributes)
@@ -657,8 +676,13 @@ impl S3Client {
             request = request.with_encryption_headers();
         }
         let response = request.send().await?;
+        let checksum_sha256 = response
+            .headers()
+            .get(SHA256_CHECKSUM)
+            .and_then(|v| v.to_str().ok())
+            .map(|v| v.to_string());
 
-        let content_id = match is_copy {
+        let e_tag = match is_copy {
             false => get_etag(response.headers()).context(MetadataSnafu)?,
             true => {
                 let response = response
@@ -670,6 +694,17 @@ impl S3Client {
                 response.e_tag
             }
         };
+
+        let content_id = if self.config.checksum == Some(Checksum::SHA256) {
+            let meta = PartMetadata {
+                e_tag,
+                checksum_sha256,
+            };
+            quick_xml::se::to_string(&meta).unwrap()
+        } else {
+            e_tag
+        };
+
         Ok(PartId { content_id })
     }
 
diff --git a/object_store/src/aws/credential.rs b/object_store/src/aws/credential.rs
index 33972c6fa14a..ee2f8e2ec953 100644
--- a/object_store/src/aws/credential.rs
+++ b/object_store/src/aws/credential.rs
@@ -101,11 +101,14 @@ pub struct AwsAuthorizer<'a> {
     region: &'a str,
     token_header: Option<HeaderName>,
     sign_payload: bool,
+    request_payer: bool,
 }
 
 static DATE_HEADER: HeaderName = HeaderName::from_static("x-amz-date");
 static HASH_HEADER: HeaderName = HeaderName::from_static("x-amz-content-sha256");
 static TOKEN_HEADER: HeaderName = HeaderName::from_static("x-amz-security-token");
+static REQUEST_PAYER_HEADER: HeaderName = HeaderName::from_static("x-amz-request-payer");
+static REQUEST_PAYER_HEADER_VALUE: HeaderValue = HeaderValue::from_static("requester");
 const ALGORITHM: &str = "AWS4-HMAC-SHA256";
 
 impl<'a> AwsAuthorizer<'a> {
@@ -118,6 +121,7 @@ impl<'a> AwsAuthorizer<'a> {
             date: None,
             sign_payload: true,
             token_header: None,
+            request_payer: false,
         }
     }
 
@@ -134,6 +138,14 @@ impl<'a> AwsAuthorizer<'a> {
         self
     }
 
+    /// Set whether to include requester pays headers
+    ///
+    /// <https://docs.aws.amazon.com/AmazonS3/latest/userguide/ObjectsinRequesterPaysBuckets.html>
+    pub fn with_request_payer(mut self, request_payer: bool) -> Self {
+        self.request_payer = request_payer;
+        self
+    }
+
     /// Authorize `request` with an optional pre-calculated SHA256 digest by attaching
     /// the relevant [AWS SigV4] headers
     ///
@@ -180,6 +192,15 @@ impl<'a> AwsAuthorizer<'a> {
         let header_digest = HeaderValue::from_str(&digest).unwrap();
         request.headers_mut().insert(&HASH_HEADER, header_digest);
 
+        if self.request_payer {
+            // For DELETE, GET, HEAD, POST, and PUT requests, include x-amz-request-payer :
+            // requester in the header
+            // https://docs.aws.amazon.com/AmazonS3/latest/userguide/ObjectsinRequesterPaysBuckets.html
+            request
+                .headers_mut()
+                .insert(&REQUEST_PAYER_HEADER, REQUEST_PAYER_HEADER_VALUE.clone());
+        }
+
         let (signed_headers, canonical_headers) = canonicalize_headers(request.headers());
 
         let scope = self.scope(date);
@@ -226,6 +247,13 @@ impl<'a> AwsAuthorizer<'a> {
             .append_pair("X-Amz-Expires", &expires_in.as_secs().to_string())
             .append_pair("X-Amz-SignedHeaders", "host");
 
+        if self.request_payer {
+            // For signed URLs, include x-amz-request-payer=requester in the request
+            // https://docs.aws.amazon.com/AmazonS3/latest/userguide/ObjectsinRequesterPaysBuckets.html
+            url.query_pairs_mut()
+                .append_pair("x-amz-request-payer", "requester");
+        }
+
         // For S3, you must include the X-Amz-Security-Token query parameter in the URL if
         // using credentials sourced from the STS service.
         if let Some(ref token) = self.credential.token {
@@ -763,12 +791,53 @@ mod tests {
             region: "us-east-1",
             sign_payload: true,
             token_header: None,
+            request_payer: false,
         };
 
         signer.authorize(&mut request, None);
         assert_eq!(request.headers().get(&AUTHORIZATION).unwrap(), "AWS4-HMAC-SHA256 Credential=AKIAIOSFODNN7EXAMPLE/20220806/us-east-1/ec2/aws4_request, SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature=a3c787a7ed37f7fdfbfd2d7056a3d7c9d85e6d52a2bfbec73793c0be6e7862d4")
     }
 
+    #[test]
+    fn test_sign_with_signed_payload_request_payer() {
+        let client = Client::new();
+
+        // Test credentials from https://docs.aws.amazon.com/AmazonS3/latest/userguide/RESTAuthentication.html
+        let credential = AwsCredential {
+            key_id: "AKIAIOSFODNN7EXAMPLE".to_string(),
+            secret_key: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY".to_string(),
+            token: None,
+        };
+
+        // method = 'GET'
+        // service = 'ec2'
+        // host = 'ec2.amazonaws.com'
+        // region = 'us-east-1'
+        // endpoint = 'https://ec2.amazonaws.com'
+        // request_parameters = ''
+        let date = DateTime::parse_from_rfc3339("2022-08-06T18:01:34Z")
+            .unwrap()
+            .with_timezone(&Utc);
+
+        let mut request = client
+            .request(Method::GET, "https://ec2.amazon.com/")
+            .build()
+            .unwrap();
+
+        let signer = AwsAuthorizer {
+            date: Some(date),
+            credential: &credential,
+            service: "ec2",
+            region: "us-east-1",
+            sign_payload: true,
+            token_header: None,
+            request_payer: true,
+        };
+
+        signer.authorize(&mut request, None);
+        assert_eq!(request.headers().get(&AUTHORIZATION).unwrap(), "AWS4-HMAC-SHA256 Credential=AKIAIOSFODNN7EXAMPLE/20220806/us-east-1/ec2/aws4_request, SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-request-payer, Signature=7030625a9e9b57ed2a40e63d749f4a4b7714b6e15004cab026152f870dd8565d")
+    }
+
     #[test]
     fn test_sign_with_unsigned_payload() {
         let client = Client::new();
@@ -802,6 +871,7 @@ mod tests {
             region: "us-east-1",
             token_header: None,
             sign_payload: false,
+            request_payer: false,
         };
 
         authorizer.authorize(&mut request, None);
@@ -828,6 +898,7 @@ mod tests {
             region: "us-east-1",
             token_header: None,
             sign_payload: false,
+            request_payer: false,
         };
 
         let mut url = Url::parse("https://examplebucket.s3.amazonaws.com/test.txt").unwrap();
@@ -848,6 +919,48 @@ mod tests {
         );
     }
 
+    #[test]
+    fn signed_get_url_request_payer() {
+        // Values from https://docs.aws.amazon.com/AmazonS3/latest/API/sigv4-query-string-auth.html
+        let credential = AwsCredential {
+            key_id: "AKIAIOSFODNN7EXAMPLE".to_string(),
+            secret_key: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY".to_string(),
+            token: None,
+        };
+
+        let date = DateTime::parse_from_rfc3339("2013-05-24T00:00:00Z")
+            .unwrap()
+            .with_timezone(&Utc);
+
+        let authorizer = AwsAuthorizer {
+            date: Some(date),
+            credential: &credential,
+            service: "s3",
+            region: "us-east-1",
+            token_header: None,
+            sign_payload: false,
+            request_payer: true,
+        };
+
+        let mut url = Url::parse("https://examplebucket.s3.amazonaws.com/test.txt").unwrap();
+        authorizer.sign(Method::GET, &mut url, Duration::from_secs(86400));
+
+        assert_eq!(
+            url,
+            Url::parse(
+                "https://examplebucket.s3.amazonaws.com/test.txt?\
+                X-Amz-Algorithm=AWS4-HMAC-SHA256&\
+                X-Amz-Credential=AKIAIOSFODNN7EXAMPLE%2F20130524%2Fus-east-1%2Fs3%2Faws4_request&\
+                X-Amz-Date=20130524T000000Z&\
+                X-Amz-Expires=86400&\
+                X-Amz-SignedHeaders=host&\
+                x-amz-request-payer=requester&\
+                X-Amz-Signature=9ad7c781cc30121f199b47d35ed3528473e4375b63c5d91cd87c927803e4e00a"
+            )
+            .unwrap()
+        );
+    }
+
     #[test]
     fn test_sign_port() {
         let client = Client::new();
@@ -880,6 +993,7 @@ mod tests {
             region: "us-east-1",
             token_header: None,
             sign_payload: true,
+            request_payer: false,
         };
 
         authorizer.authorize(&mut request, None);
diff --git a/object_store/src/aws/dynamo.rs b/object_store/src/aws/dynamo.rs
index ece3b8a357c6..6283e76c1f87 100644
--- a/object_store/src/aws/dynamo.rs
+++ b/object_store/src/aws/dynamo.rs
@@ -471,7 +471,7 @@ enum ReturnValues {
 /// This provides cheap, ordered serialization of maps
 struct Map<'a, K, V>(&'a [(K, V)]);
 
-impl<'a, K: Serialize, V: Serialize> Serialize for Map<'a, K, V> {
+impl<K: Serialize, V: Serialize> Serialize for Map<'_, K, V> {
     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
     where
         S: Serializer,
diff --git a/object_store/src/aws/mod.rs b/object_store/src/aws/mod.rs
index b238d90eb6d7..7f449c49963c 100644
--- a/object_store/src/aws/mod.rs
+++ b/object_store/src/aws/mod.rs
@@ -136,7 +136,8 @@ impl Signer for AmazonS3 {
     /// ```
     async fn signed_url(&self, method: Method, path: &Path, expires_in: Duration) -> Result<Url> {
         let credential = self.credentials().get_credential().await?;
-        let authorizer = AwsAuthorizer::new(&credential, "s3", &self.client.config.region);
+        let authorizer = AwsAuthorizer::new(&credential, "s3", &self.client.config.region)
+            .with_request_payer(self.client.config.request_payer);
 
         let path_url = self.path_url(path);
         let mut url = Url::parse(&path_url).map_err(|e| crate::Error::Generic {
@@ -169,10 +170,7 @@ impl ObjectStore for AmazonS3 {
         match (opts.mode, &self.client.config.conditional_put) {
             (PutMode::Overwrite, _) => request.idempotent(true).do_put().await,
             (PutMode::Create | PutMode::Update(_), None) => Err(Error::NotImplemented),
-            (
-                PutMode::Create,
-                Some(S3ConditionalPut::ETagMatch | S3ConditionalPut::ETagPutIfNotExists),
-            ) => {
+            (PutMode::Create, Some(S3ConditionalPut::ETagMatch)) => {
                 match request.header(&IF_NONE_MATCH, "*").do_put().await {
                     // Technically If-None-Match should return NotModified but some stores,
                     // such as R2, instead return PreconditionFailed
@@ -196,9 +194,26 @@ impl ObjectStore for AmazonS3 {
                     source: "ETag required for conditional put".to_string().into(),
                 })?;
                 match put {
-                    S3ConditionalPut::ETagPutIfNotExists => Err(Error::NotImplemented),
                     S3ConditionalPut::ETagMatch => {
-                        request.header(&IF_MATCH, etag.as_str()).do_put().await
+                        match request
+                            .header(&IF_MATCH, etag.as_str())
+                            // Real S3 will occasionally report 409 Conflict
+                            // if there are concurrent `If-Match` requests
+                            // in flight, so we need to be prepared to retry
+                            // 409 responses.
+                            .retry_on_conflict(true)
+                            .do_put()
+                            .await
+                        {
+                            // Real S3 reports NotFound rather than PreconditionFailed when the
+                            // object doesn't exist. Convert to PreconditionFailed for
+                            // consistency with R2. This also matches what the HTTP spec
+                            // says the behavior should be.
+                            Err(Error::NotFound { path, source }) => {
+                                Err(Error::Precondition { path, source })
+                            }
+                            r => r,
+                        }
                     }
                     S3ConditionalPut::Dynamo(d) => {
                         d.conditional_op(&self.client, location, Some(&etag), move || {
@@ -478,6 +493,66 @@ mod tests {
 
     const NON_EXISTENT_NAME: &str = "nonexistentname";
 
+    #[tokio::test]
+    async fn write_multipart_file_with_signature() {
+        maybe_skip_integration!();
+
+        let store = AmazonS3Builder::from_env()
+            .with_checksum_algorithm(Checksum::SHA256)
+            .build()
+            .unwrap();
+
+        let str = "test.bin";
+        let path = Path::parse(str).unwrap();
+        let opts = PutMultipartOpts::default();
+        let mut upload = store.put_multipart_opts(&path, opts).await.unwrap();
+
+        upload
+            .put_part(PutPayload::from(vec![0u8; 10_000_000]))
+            .await
+            .unwrap();
+        upload
+            .put_part(PutPayload::from(vec![0u8; 5_000_000]))
+            .await
+            .unwrap();
+
+        let res = upload.complete().await.unwrap();
+        assert!(res.e_tag.is_some(), "Should have valid etag");
+
+        store.delete(&path).await.unwrap();
+    }
+
+    #[tokio::test]
+    async fn write_multipart_file_with_signature_object_lock() {
+        maybe_skip_integration!();
+
+        let bucket = "test-object-lock";
+        let store = AmazonS3Builder::from_env()
+            .with_bucket_name(bucket)
+            .with_checksum_algorithm(Checksum::SHA256)
+            .build()
+            .unwrap();
+
+        let str = "test.bin";
+        let path = Path::parse(str).unwrap();
+        let opts = PutMultipartOpts::default();
+        let mut upload = store.put_multipart_opts(&path, opts).await.unwrap();
+
+        upload
+            .put_part(PutPayload::from(vec![0u8; 10_000_000]))
+            .await
+            .unwrap();
+        upload
+            .put_part(PutPayload::from(vec![0u8; 5_000_000]))
+            .await
+            .unwrap();
+
+        let res = upload.complete().await.unwrap();
+        assert!(res.e_tag.is_some(), "Should have valid etag");
+
+        store.delete(&path).await.unwrap();
+    }
+
     #[tokio::test]
     async fn s3_test() {
         maybe_skip_integration!();
@@ -486,6 +561,7 @@ mod tests {
         let integration = config.build().unwrap();
         let config = &integration.client.config;
         let test_not_exists = config.copy_if_not_exists.is_some();
+        let test_conditional_put = config.conditional_put.is_some();
 
         put_get_delete_list(&integration).await;
         get_opts(&integration).await;
@@ -494,6 +570,7 @@ mod tests {
         rename_and_copy(&integration).await;
         stream_get(&integration).await;
         multipart(&integration, &integration).await;
+        multipart_race_condition(&integration, true).await;
         signing(&integration).await;
         s3_encryption(&integration).await;
         put_get_attributes(&integration).await;
@@ -516,9 +593,8 @@ mod tests {
         if test_not_exists {
             copy_if_not_exists(&integration).await;
         }
-        if let Some(conditional_put) = &config.conditional_put {
-            let supports_update = !matches!(conditional_put, S3ConditionalPut::ETagPutIfNotExists);
-            put_opts(&integration, supports_update).await;
+        if test_conditional_put {
+            put_opts(&integration, true).await;
         }
 
         // run integration test with unsigned payload enabled
diff --git a/object_store/src/aws/precondition.rs b/object_store/src/aws/precondition.rs
index e5058052790d..b261ad0dbfb1 100644
--- a/object_store/src/aws/precondition.rs
+++ b/object_store/src/aws/precondition.rs
@@ -138,17 +138,6 @@ pub enum S3ConditionalPut {
     /// [HTTP precondition]: https://datatracker.ietf.org/doc/html/rfc9110#name-preconditions
     ETagMatch,
 
-    /// Like `ETagMatch`, but with support for `PutMode::Create` and not
-    /// `PutMode::Option`.
-    ///
-    /// This is the limited form of conditional put supported by Amazon S3
-    /// as of August 2024 ([announcement]).
-    ///
-    /// Encoded as `etag-put-if-not-exists` ignoring whitespace.
-    ///
-    /// [announcement]: https://aws.amazon.com/about-aws/whats-new/2024/08/amazon-s3-conditional-writes/
-    ETagPutIfNotExists,
-
     /// The name of a DynamoDB table to use for coordination
     ///
     /// Encoded as either `dynamo:<TABLE_NAME>` or `dynamo:<TABLE_NAME>:<TIMEOUT_MILLIS>`
@@ -164,7 +153,6 @@ impl std::fmt::Display for S3ConditionalPut {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
             Self::ETagMatch => write!(f, "etag"),
-            Self::ETagPutIfNotExists => write!(f, "etag-put-if-not-exists"),
             Self::Dynamo(lock) => write!(f, "dynamo: {}", lock.table_name()),
         }
     }
@@ -174,7 +162,6 @@ impl S3ConditionalPut {
     fn from_str(s: &str) -> Option<Self> {
         match s.trim() {
             "etag" => Some(Self::ETagMatch),
-            "etag-put-if-not-exists" => Some(Self::ETagPutIfNotExists),
             trimmed => match trimmed.split_once(':')? {
                 ("dynamo", s) => Some(Self::Dynamo(DynamoCommit::from_str(s)?)),
                 _ => None,
diff --git a/object_store/src/azure/builder.rs b/object_store/src/azure/builder.rs
index 1c4589ba1ec6..08c9a232393d 100644
--- a/object_store/src/azure/builder.rs
+++ b/object_store/src/azure/builder.rs
@@ -240,6 +240,14 @@ pub enum AzureConfigKey {
     /// - `authority_id`
     AuthorityId,
 
+    /// Authority host used in oauth flows
+    ///
+    /// Supported keys:
+    /// - `azure_storage_authority_host`
+    /// - `azure_authority_host`
+    /// - `authority_host`
+    AuthorityHost,
+
     /// Shared access signature.
     ///
     /// The signature is expected to be percent-encoded, much like they are provided
@@ -383,6 +391,7 @@ impl AsRef<str> for AzureConfigKey {
             Self::ClientId => "azure_storage_client_id",
             Self::ClientSecret => "azure_storage_client_secret",
             Self::AuthorityId => "azure_storage_tenant_id",
+            Self::AuthorityHost => "azure_storage_authority_host",
             Self::SasKey => "azure_storage_sas_key",
             Self::Token => "azure_storage_token",
             Self::UseEmulator => "azure_storage_use_emulator",
@@ -427,6 +436,9 @@ impl FromStr for AzureConfigKey {
             | "azure_authority_id"
             | "tenant_id"
             | "authority_id" => Ok(Self::AuthorityId),
+            "azure_storage_authority_host" | "azure_authority_host" | "authority_host" => {
+                Ok(Self::AuthorityHost)
+            }
             "azure_storage_sas_key" | "azure_storage_sas_token" | "sas_key" | "sas_token" => {
                 Ok(Self::SasKey)
             }
@@ -556,6 +568,7 @@ impl MicrosoftAzureBuilder {
             AzureConfigKey::ClientId => self.client_id = Some(value.into()),
             AzureConfigKey::ClientSecret => self.client_secret = Some(value.into()),
             AzureConfigKey::AuthorityId => self.tenant_id = Some(value.into()),
+            AzureConfigKey::AuthorityHost => self.authority_host = Some(value.into()),
             AzureConfigKey::SasKey => self.sas_key = Some(value.into()),
             AzureConfigKey::Token => self.bearer_token = Some(value.into()),
             AzureConfigKey::MsiEndpoint => self.msi_endpoint = Some(value.into()),
@@ -602,6 +615,7 @@ impl MicrosoftAzureBuilder {
             AzureConfigKey::ClientId => self.client_id.clone(),
             AzureConfigKey::ClientSecret => self.client_secret.clone(),
             AzureConfigKey::AuthorityId => self.tenant_id.clone(),
+            AzureConfigKey::AuthorityHost => self.authority_host.clone(),
             AzureConfigKey::SasKey => self.sas_key.clone(),
             AzureConfigKey::Token => self.bearer_token.clone(),
             AzureConfigKey::UseEmulator => Some(self.use_emulator.to_string()),
diff --git a/object_store/src/azure/client.rs b/object_store/src/azure/client.rs
index e78f8db7a8c8..69ff39526bef 100644
--- a/object_store/src/azure/client.rs
+++ b/object_store/src/azure/client.rs
@@ -31,13 +31,14 @@ use crate::{
     PutMultipartOpts, PutOptions, PutPayload, PutResult, Result, RetryConfig, TagSet,
 };
 use async_trait::async_trait;
-use base64::prelude::BASE64_STANDARD;
+use base64::prelude::{BASE64_STANDARD, BASE64_STANDARD_NO_PAD};
 use base64::Engine;
 use bytes::{Buf, Bytes};
 use chrono::{DateTime, Utc};
 use hyper::http::HeaderName;
+use rand::Rng as _;
 use reqwest::{
-    header::{HeaderValue, CONTENT_LENGTH, IF_MATCH, IF_NONE_MATCH},
+    header::{HeaderMap, HeaderValue, CONTENT_LENGTH, CONTENT_TYPE, IF_MATCH, IF_NONE_MATCH},
     Client as ReqwestClient, Method, RequestBuilder, Response,
 };
 use serde::{Deserialize, Serialize};
@@ -79,6 +80,34 @@ pub(crate) enum Error {
         path: String,
     },
 
+    #[snafu(display("Error performing bulk delete request: {}", source))]
+    BulkDeleteRequest { source: crate::client::retry::Error },
+
+    #[snafu(display("Error receiving bulk delete request body: {}", source))]
+    BulkDeleteRequestBody { source: reqwest::Error },
+
+    #[snafu(display(
+        "Bulk delete request failed due to invalid input: {} (code: {})",
+        reason,
+        code
+    ))]
+    BulkDeleteRequestInvalidInput { code: String, reason: String },
+
+    #[snafu(display("Got invalid bulk delete response: {}", reason))]
+    InvalidBulkDeleteResponse { reason: String },
+
+    #[snafu(display(
+        "Bulk delete request failed for key {}: {} (code: {})",
+        path,
+        reason,
+        code
+    ))]
+    DeleteFailed {
+        path: String,
+        code: String,
+        reason: String,
+    },
+
     #[snafu(display("Error performing list request: {}", source))]
     ListRequest { source: crate::client::retry::Error },
 
@@ -247,6 +276,223 @@ impl<'a> PutRequest<'a> {
     }
 }
 
+#[inline]
+fn extend(dst: &mut Vec<u8>, data: &[u8]) {
+    dst.extend_from_slice(data);
+}
+
+// Write header names as title case. The header name is assumed to be ASCII.
+// We need it because Azure is not always treating headers as case insensitive.
+fn title_case(dst: &mut Vec<u8>, name: &[u8]) {
+    dst.reserve(name.len());
+
+    // Ensure first character is uppercased
+    let mut prev = b'-';
+    for &(mut c) in name {
+        if prev == b'-' {
+            c.make_ascii_uppercase();
+        }
+        dst.push(c);
+        prev = c;
+    }
+}
+
+fn write_headers(headers: &HeaderMap, dst: &mut Vec<u8>) {
+    for (name, value) in headers {
+        // We need special case handling here otherwise Azure returns 400
+        // due to `Content-Id` instead of `Content-ID`
+        if name == "content-id" {
+            extend(dst, b"Content-ID");
+        } else {
+            title_case(dst, name.as_str().as_bytes());
+        }
+        extend(dst, b": ");
+        extend(dst, value.as_bytes());
+        extend(dst, b"\r\n");
+    }
+}
+
+// https://docs.oasis-open.org/odata/odata/v4.0/errata02/os/complete/part1-protocol/odata-v4.0-errata02-os-part1-protocol-complete.html#_Toc406398359
+fn serialize_part_delete_request(
+    dst: &mut Vec<u8>,
+    boundary: &str,
+    idx: usize,
+    request: reqwest::Request,
+    relative_url: String,
+) {
+    // Encode start marker for part
+    extend(dst, b"--");
+    extend(dst, boundary.as_bytes());
+    extend(dst, b"\r\n");
+
+    // Encode part headers
+    let mut part_headers = HeaderMap::new();
+    part_headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/http"));
+    part_headers.insert(
+        "Content-Transfer-Encoding",
+        HeaderValue::from_static("binary"),
+    );
+    // Azure returns 400 if we send `Content-Id` instead of `Content-ID`
+    part_headers.insert("Content-ID", HeaderValue::from(idx));
+    write_headers(&part_headers, dst);
+    extend(dst, b"\r\n");
+
+    // Encode the subrequest request-line
+    extend(dst, b"DELETE ");
+    extend(dst, format!("/{} ", relative_url).as_bytes());
+    extend(dst, b"HTTP/1.1");
+    extend(dst, b"\r\n");
+
+    // Encode subrequest headers
+    write_headers(request.headers(), dst);
+    extend(dst, b"\r\n");
+    extend(dst, b"\r\n");
+}
+
+fn parse_multipart_response_boundary(response: &Response) -> Result<String> {
+    let invalid_response = |msg: &str| Error::InvalidBulkDeleteResponse {
+        reason: msg.to_string(),
+    };
+
+    let content_type = response
+        .headers()
+        .get(CONTENT_TYPE)
+        .ok_or_else(|| invalid_response("missing Content-Type"))?;
+
+    let boundary = content_type
+        .as_ref()
+        .strip_prefix(b"multipart/mixed; boundary=")
+        .ok_or_else(|| invalid_response("invalid Content-Type value"))?
+        .to_vec();
+
+    let boundary =
+        String::from_utf8(boundary).map_err(|_| invalid_response("invalid multipart boundary"))?;
+
+    Ok(boundary)
+}
+
+fn invalid_response(msg: &str) -> Error {
+    Error::InvalidBulkDeleteResponse {
+        reason: msg.to_string(),
+    }
+}
+
+#[derive(Debug)]
+struct MultipartField {
+    headers: HeaderMap,
+    content: Bytes,
+}
+
+fn parse_multipart_body_fields(body: Bytes, boundary: &[u8]) -> Result<Vec<MultipartField>> {
+    let start_marker = [b"--", boundary, b"\r\n"].concat();
+    let next_marker = &start_marker[..start_marker.len() - 2];
+    let end_marker = [b"--", boundary, b"--\r\n"].concat();
+
+    // There should be at most 256 responses per batch
+    let mut fields = Vec::with_capacity(256);
+    let mut remaining: &[u8] = body.as_ref();
+    loop {
+        remaining = remaining
+            .strip_prefix(start_marker.as_slice())
+            .ok_or_else(|| invalid_response("missing start marker for field"))?;
+
+        // The documentation only mentions two headers for fields, we leave some extra margin
+        let mut scratch = [httparse::EMPTY_HEADER; 10];
+        let mut headers = HeaderMap::new();
+        match httparse::parse_headers(remaining, &mut scratch) {
+            Ok(httparse::Status::Complete((pos, headers_slice))) => {
+                remaining = &remaining[pos..];
+                for header in headers_slice {
+                    headers.insert(
+                        HeaderName::from_bytes(header.name.as_bytes()).expect("valid"),
+                        HeaderValue::from_bytes(header.value).expect("valid"),
+                    );
+                }
+            }
+            _ => return Err(invalid_response("unable to parse field headers").into()),
+        };
+
+        let next_pos = remaining
+            .windows(next_marker.len())
+            .position(|window| window == next_marker)
+            .ok_or_else(|| invalid_response("early EOF while seeking to next boundary"))?;
+
+        fields.push(MultipartField {
+            headers,
+            content: body.slice_ref(&remaining[..next_pos]),
+        });
+
+        remaining = &remaining[next_pos..];
+
+        // Support missing final CRLF
+        if remaining == end_marker || remaining == &end_marker[..end_marker.len() - 2] {
+            break;
+        }
+    }
+    Ok(fields)
+}
+
+async fn parse_blob_batch_delete_body(
+    batch_body: Bytes,
+    boundary: String,
+    paths: &[Path],
+) -> Result<Vec<Result<Path>>> {
+    let mut results: Vec<Result<Path>> = paths.iter().cloned().map(Ok).collect();
+
+    for field in parse_multipart_body_fields(batch_body, boundary.as_bytes())? {
+        let id = field
+            .headers
+            .get("content-id")
+            .and_then(|v| std::str::from_utf8(v.as_bytes()).ok())
+            .and_then(|v| v.parse::<usize>().ok());
+
+        // Parse part response headers
+        // Documentation mentions 5 headers and states that other standard HTTP headers
+        // may be provided, in order to not incurr in more complexity to support an arbitrary
+        // amount of headers we chose a conservative amount and error otherwise
+        // https://learn.microsoft.com/en-us/rest/api/storageservices/delete-blob?tabs=microsoft-entra-id#response-headers
+        let mut headers = [httparse::EMPTY_HEADER; 48];
+        let mut part_response = httparse::Response::new(&mut headers);
+        match part_response.parse(&field.content) {
+            Ok(httparse::Status::Complete(_)) => {}
+            _ => return Err(invalid_response("unable to parse response").into()),
+        };
+
+        match (id, part_response.code) {
+            (Some(_id), Some(code)) if (200..300).contains(&code) => {}
+            (Some(id), Some(404)) => {
+                results[id] = Err(crate::Error::NotFound {
+                    path: paths[id].as_ref().to_string(),
+                    source: Error::DeleteFailed {
+                        path: paths[id].as_ref().to_string(),
+                        code: 404.to_string(),
+                        reason: part_response.reason.unwrap_or_default().to_string(),
+                    }
+                    .into(),
+                });
+            }
+            (Some(id), Some(code)) => {
+                results[id] = Err(Error::DeleteFailed {
+                    path: paths[id].as_ref().to_string(),
+                    code: code.to_string(),
+                    reason: part_response.reason.unwrap_or_default().to_string(),
+                }
+                .into());
+            }
+            (None, Some(code)) => {
+                return Err(Error::BulkDeleteRequestInvalidInput {
+                    code: code.to_string(),
+                    reason: part_response.reason.unwrap_or_default().to_string(),
+                }
+                .into())
+            }
+            _ => return Err(invalid_response("missing part response status code").into()),
+        }
+    }
+
+    Ok(results)
+}
+
 #[derive(Debug)]
 pub(crate) struct AzureClient {
     config: AzureConfig,
@@ -311,10 +557,11 @@ impl AzureClient {
     pub(crate) async fn put_block(
         &self,
         path: &Path,
-        part_idx: usize,
+        _part_idx: usize,
         payload: PutPayload,
     ) -> Result<PartId> {
-        let content_id = format!("{part_idx:20}");
+        let part_idx = u128::from_be_bytes(rand::thread_rng().gen());
+        let content_id = format!("{part_idx:032x}");
         let block_id = BASE64_STANDARD.encode(&content_id);
 
         self.put_request(path, payload)
@@ -380,6 +627,86 @@ impl AzureClient {
         Ok(())
     }
 
+    fn build_bulk_delete_body(
+        &self,
+        boundary: &str,
+        paths: &[Path],
+        credential: &Option<Arc<AzureCredential>>,
+    ) -> Vec<u8> {
+        let mut body_bytes = Vec::with_capacity(paths.len() * 2048);
+
+        for (idx, path) in paths.iter().enumerate() {
+            let url = self.config.path_url(path);
+
+            // Build subrequest with proper authorization
+            let request = self
+                .client
+                .request(Method::DELETE, url)
+                .header(CONTENT_LENGTH, HeaderValue::from(0))
+                // Each subrequest must be authorized individually [1] and we use
+                // the CredentialExt for this.
+                // [1]: https://learn.microsoft.com/en-us/rest/api/storageservices/blob-batch?tabs=microsoft-entra-id#request-body
+                .with_azure_authorization(credential, &self.config.account)
+                .build()
+                .unwrap();
+
+            // Url for part requests must be relative and without base
+            let relative_url = self.config.service.make_relative(request.url()).unwrap();
+
+            serialize_part_delete_request(&mut body_bytes, boundary, idx, request, relative_url)
+        }
+
+        // Encode end marker
+        extend(&mut body_bytes, b"--");
+        extend(&mut body_bytes, boundary.as_bytes());
+        extend(&mut body_bytes, b"--");
+        extend(&mut body_bytes, b"\r\n");
+        body_bytes
+    }
+
+    pub(crate) async fn bulk_delete_request(&self, paths: Vec<Path>) -> Result<Vec<Result<Path>>> {
+        if paths.is_empty() {
+            return Ok(Vec::new());
+        }
+
+        let credential = self.get_credential().await?;
+
+        // https://www.ietf.org/rfc/rfc2046
+        let random_bytes = rand::random::<[u8; 16]>(); // 128 bits
+        let boundary = format!("batch_{}", BASE64_STANDARD_NO_PAD.encode(random_bytes));
+
+        let body_bytes = self.build_bulk_delete_body(&boundary, &paths, &credential);
+
+        // Send multipart request
+        let url = self.config.path_url(&Path::from("/"));
+        let batch_response = self
+            .client
+            .request(Method::POST, url)
+            .query(&[("restype", "container"), ("comp", "batch")])
+            .header(
+                CONTENT_TYPE,
+                HeaderValue::from_str(format!("multipart/mixed; boundary={}", boundary).as_str())
+                    .unwrap(),
+            )
+            .header(CONTENT_LENGTH, HeaderValue::from(body_bytes.len()))
+            .body(body_bytes)
+            .with_azure_authorization(&credential, &self.config.account)
+            .send_retry(&self.config.retry_config)
+            .await
+            .context(BulkDeleteRequestSnafu {})?;
+
+        let boundary = parse_multipart_response_boundary(&batch_response)?;
+
+        let batch_body = batch_response
+            .bytes()
+            .await
+            .context(BulkDeleteRequestBodySnafu {})?;
+
+        let results = parse_blob_batch_delete_body(batch_body, boundary, &paths).await?;
+
+        Ok(results)
+    }
+
     /// Make an Azure Copy request <https://docs.microsoft.com/en-us/rest/api/storageservices/copy-blob>
     pub(crate) async fn copy_request(&self, from: &Path, to: &Path, overwrite: bool) -> Result<()> {
         let credential = self.get_credential().await?;
@@ -814,8 +1141,10 @@ pub(crate) struct UserDelegationKey {
 #[cfg(test)]
 mod tests {
     use bytes::Bytes;
+    use regex::bytes::Regex;
 
     use super::*;
+    use crate::StaticCredentialProvider;
 
     #[test]
     fn deserde_azure() {
@@ -1005,4 +1334,159 @@ mod tests {
         let _delegated_key_response_internal: UserDelegationKey =
             quick_xml::de::from_str(S).unwrap();
     }
+
+    #[tokio::test]
+    async fn test_build_bulk_delete_body() {
+        let credential_provider = Arc::new(StaticCredentialProvider::new(
+            AzureCredential::BearerToken("static-token".to_string()),
+        ));
+
+        let config = AzureConfig {
+            account: "testaccount".to_string(),
+            container: "testcontainer".to_string(),
+            credentials: credential_provider,
+            service: "http://example.com".try_into().unwrap(),
+            retry_config: Default::default(),
+            is_emulator: false,
+            skip_signature: false,
+            disable_tagging: false,
+            client_options: Default::default(),
+        };
+
+        let client = AzureClient::new(config).unwrap();
+
+        let credential = client.get_credential().await.unwrap();
+        let paths = &[Path::from("a"), Path::from("b"), Path::from("c")];
+
+        let boundary = "batch_statictestboundary".to_string();
+
+        let body_bytes = client.build_bulk_delete_body(&boundary, paths, &credential);
+
+        // Replace Date header value with a static date
+        let re = Regex::new("Date:[^\r]+").unwrap();
+        let body_bytes = re
+            .replace_all(&body_bytes, b"Date: Tue, 05 Nov 2024 15:01:15 GMT")
+            .to_vec();
+
+        let expected_body = b"--batch_statictestboundary\r
+Content-Type: application/http\r
+Content-Transfer-Encoding: binary\r
+Content-ID: 0\r
+\r
+DELETE /testcontainer/a HTTP/1.1\r
+Content-Length: 0\r
+Date: Tue, 05 Nov 2024 15:01:15 GMT\r
+X-Ms-Version: 2023-11-03\r
+Authorization: Bearer static-token\r
+\r
+\r
+--batch_statictestboundary\r
+Content-Type: application/http\r
+Content-Transfer-Encoding: binary\r
+Content-ID: 1\r
+\r
+DELETE /testcontainer/b HTTP/1.1\r
+Content-Length: 0\r
+Date: Tue, 05 Nov 2024 15:01:15 GMT\r
+X-Ms-Version: 2023-11-03\r
+Authorization: Bearer static-token\r
+\r
+\r
+--batch_statictestboundary\r
+Content-Type: application/http\r
+Content-Transfer-Encoding: binary\r
+Content-ID: 2\r
+\r
+DELETE /testcontainer/c HTTP/1.1\r
+Content-Length: 0\r
+Date: Tue, 05 Nov 2024 15:01:15 GMT\r
+X-Ms-Version: 2023-11-03\r
+Authorization: Bearer static-token\r
+\r
+\r
+--batch_statictestboundary--\r\n"
+            .to_vec();
+
+        assert_eq!(expected_body, body_bytes);
+    }
+
+    #[tokio::test]
+    async fn test_parse_blob_batch_delete_body() {
+        let response_body = b"--batchresponse_66925647-d0cb-4109-b6d3-28efe3e1e5ed\r
+Content-Type: application/http\r
+Content-ID: 0\r
+\r
+HTTP/1.1 202 Accepted\r
+x-ms-delete-type-permanent: true\r
+x-ms-request-id: 778fdc83-801e-0000-62ff-0334671e284f\r
+x-ms-version: 2018-11-09\r
+\r
+--batchresponse_66925647-d0cb-4109-b6d3-28efe3e1e5ed\r
+Content-Type: application/http\r
+Content-ID: 1\r
+\r
+HTTP/1.1 202 Accepted\r
+x-ms-delete-type-permanent: true\r
+x-ms-request-id: 778fdc83-801e-0000-62ff-0334671e2851\r
+x-ms-version: 2018-11-09\r
+\r
+--batchresponse_66925647-d0cb-4109-b6d3-28efe3e1e5ed\r
+Content-Type: application/http\r
+Content-ID: 2\r
+\r
+HTTP/1.1 404 The specified blob does not exist.\r
+x-ms-error-code: BlobNotFound\r
+x-ms-request-id: 778fdc83-801e-0000-62ff-0334671e2852\r
+x-ms-version: 2018-11-09\r
+Content-Length: 216\r
+Content-Type: application/xml\r
+\r
+<?xml version=\"1.0\" encoding=\"utf-8\"?>
+<Error><Code>BlobNotFound</Code><Message>The specified blob does not exist.
+RequestId:778fdc83-801e-0000-62ff-0334671e2852
+Time:2018-06-14T16:46:54.6040685Z</Message></Error>\r
+--batchresponse_66925647-d0cb-4109-b6d3-28efe3e1e5ed--\r\n";
+
+        let response: reqwest::Response = http::Response::builder()
+            .status(202)
+            .header("Transfer-Encoding", "chunked")
+            .header(
+                "Content-Type",
+                "multipart/mixed; boundary=batchresponse_66925647-d0cb-4109-b6d3-28efe3e1e5ed",
+            )
+            .header("x-ms-request-id", "778fdc83-801e-0000-62ff-033467000000")
+            .header("x-ms-version", "2018-11-09")
+            .body(Bytes::from(response_body.as_slice()))
+            .unwrap()
+            .into();
+
+        let boundary = parse_multipart_response_boundary(&response).unwrap();
+        let body = response.bytes().await.unwrap();
+
+        let paths = &[Path::from("a"), Path::from("b"), Path::from("c")];
+
+        let results = parse_blob_batch_delete_body(body, boundary, paths)
+            .await
+            .unwrap();
+
+        assert!(results[0].is_ok());
+        assert_eq!(&paths[0], results[0].as_ref().unwrap());
+
+        assert!(results[1].is_ok());
+        assert_eq!(&paths[1], results[1].as_ref().unwrap());
+
+        assert!(results[2].is_err());
+        let err = results[2].as_ref().unwrap_err();
+        let crate::Error::NotFound { source, .. } = err else {
+            unreachable!("must be not found")
+        };
+        let Some(Error::DeleteFailed { path, code, reason }) = source.downcast_ref::<Error>()
+        else {
+            unreachable!("must be client error")
+        };
+
+        assert_eq!(paths[2].as_ref(), path);
+        assert_eq!("404", code);
+        assert_eq!("The specified blob does not exist.", reason);
+    }
 }
diff --git a/object_store/src/azure/mod.rs b/object_store/src/azure/mod.rs
index f89a184f9523..81b6667bc058 100644
--- a/object_store/src/azure/mod.rs
+++ b/object_store/src/azure/mod.rs
@@ -30,7 +30,7 @@ use crate::{
     PutMultipartOpts, PutOptions, PutPayload, PutResult, Result, UploadPart,
 };
 use async_trait::async_trait;
-use futures::stream::BoxStream;
+use futures::stream::{BoxStream, StreamExt, TryStreamExt};
 use reqwest::Method;
 use std::fmt::Debug;
 use std::sync::Arc;
@@ -119,6 +119,26 @@ impl ObjectStore for MicrosoftAzure {
         self.client.delete_request(location, &()).await
     }
 
+    fn delete_stream<'a>(
+        &'a self,
+        locations: BoxStream<'a, Result<Path>>,
+    ) -> BoxStream<'a, Result<Path>> {
+        locations
+            .try_chunks(256)
+            .map(move |locations| async {
+                // Early return the error. We ignore the paths that have already been
+                // collected into the chunk.
+                let locations = locations.map_err(|e| e.1)?;
+                self.client
+                    .bulk_delete_request(locations)
+                    .await
+                    .map(futures::stream::iter)
+            })
+            .buffered(20)
+            .try_flatten()
+            .boxed()
+    }
+
     fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
         self.client.list(prefix)
     }
@@ -294,6 +314,7 @@ mod tests {
         stream_get(&integration).await;
         put_opts(&integration, true).await;
         multipart(&integration, &integration).await;
+        multipart_race_condition(&integration, false).await;
         signing(&integration).await;
 
         let validate = !integration.client.config().disable_tagging;
diff --git a/object_store/src/client/mod.rs b/object_store/src/client/mod.rs
index 76d1c1f22f58..1b7ce5aa7a78 100644
--- a/object_store/src/client/mod.rs
+++ b/object_store/src/client/mod.rs
@@ -671,6 +671,10 @@ impl ClientOptions {
             builder = builder.danger_accept_invalid_certs(true)
         }
 
+        // Reqwest will remove the `Content-Length` header if it is configured to
+        // transparently decompress the body via the non-default `gzip` feature.
+        builder = builder.no_gzip();
+
         builder
             .https_only(!self.allow_http.get()?)
             .build()
diff --git a/object_store/src/client/retry.rs b/object_store/src/client/retry.rs
index 601bffdec158..a8a8e58de4d0 100644
--- a/object_store/src/client/retry.rs
+++ b/object_store/src/client/retry.rs
@@ -200,6 +200,7 @@ pub(crate) struct RetryableRequest {
 
     sensitive: bool,
     idempotent: Option<bool>,
+    retry_on_conflict: bool,
     payload: Option<PutPayload>,
 
     retry_error_body: bool,
@@ -217,6 +218,15 @@ impl RetryableRequest {
         }
     }
 
+    /// Set whether this request should be retried on a 409 Conflict response.
+    #[cfg(feature = "aws")]
+    pub(crate) fn retry_on_conflict(self, retry_on_conflict: bool) -> Self {
+        Self {
+            retry_on_conflict,
+            ..self
+        }
+    }
+
     /// Set whether this request contains sensitive data
     ///
     /// This will avoid printing out the URL in error messages
@@ -340,7 +350,8 @@ impl RetryableRequest {
                         let status = r.status();
                         if retries == max_retries
                             || now.elapsed() > retry_timeout
-                            || !status.is_server_error()
+                            || !(status.is_server_error()
+                                || (self.retry_on_conflict && status == StatusCode::CONFLICT))
                         {
                             return Err(match status.is_client_error() {
                                 true => match r.text().await {
@@ -467,6 +478,7 @@ impl RetryExt for reqwest::RequestBuilder {
             idempotent: None,
             payload: None,
             sensitive: false,
+            retry_on_conflict: false,
             retry_error_body: false,
         }
     }
diff --git a/object_store/src/client/s3.rs b/object_store/src/client/s3.rs
index dba752cb1251..7fe956b2376e 100644
--- a/object_store/src/client/s3.rs
+++ b/object_store/src/client/s3.rs
@@ -106,14 +106,32 @@ pub(crate) struct CompleteMultipartUpload {
     pub part: Vec<MultipartPart>,
 }
 
+#[derive(Serialize, Deserialize)]
+pub(crate) struct PartMetadata {
+    pub e_tag: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub checksum_sha256: Option<String>,
+}
+
 impl From<Vec<PartId>> for CompleteMultipartUpload {
     fn from(value: Vec<PartId>) -> Self {
         let part = value
             .into_iter()
             .enumerate()
-            .map(|(part_number, part)| MultipartPart {
-                e_tag: part.content_id,
-                part_number: part_number + 1,
+            .map(|(part_idx, part)| {
+                let md = match quick_xml::de::from_str::<PartMetadata>(&part.content_id) {
+                    Ok(md) => md,
+                    // fallback to old way
+                    Err(_) => PartMetadata {
+                        e_tag: part.content_id.clone(),
+                        checksum_sha256: None,
+                    },
+                };
+                MultipartPart {
+                    e_tag: md.e_tag,
+                    part_number: part_idx + 1,
+                    checksum_sha256: md.checksum_sha256,
+                }
             })
             .collect();
         Self { part }
@@ -126,6 +144,9 @@ pub(crate) struct MultipartPart {
     pub e_tag: String,
     #[serde(rename = "PartNumber")]
     pub part_number: usize,
+    #[serde(rename = "ChecksumSHA256")]
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub checksum_sha256: Option<String>,
 }
 
 #[derive(Debug, Deserialize)]
diff --git a/object_store/src/gcp/mod.rs b/object_store/src/gcp/mod.rs
index 039ec46b68c2..5199135ba6b0 100644
--- a/object_store/src/gcp/mod.rs
+++ b/object_store/src/gcp/mod.rs
@@ -297,6 +297,7 @@ mod test {
             // https://github.com/fsouza/fake-gcs-server/issues/852
             stream_get(&integration).await;
             multipart(&integration, &integration).await;
+            multipart_race_condition(&integration, true).await;
             // Fake GCS server doesn't currently honor preconditions
             get_opts(&integration).await;
             put_opts(&integration, true).await;
diff --git a/object_store/src/integration.rs b/object_store/src/integration.rs
index 30177878306f..20e95fddc478 100644
--- a/object_store/src/integration.rs
+++ b/object_store/src/integration.rs
@@ -24,6 +24,8 @@
 //!
 //! They are intended solely for testing purposes.
 
+use core::str;
+
 use crate::multipart::MultipartStore;
 use crate::path::Path;
 use crate::{
@@ -1109,3 +1111,88 @@ async fn delete_fixtures(storage: &DynObjectStore) {
         .await
         .unwrap();
 }
+
+/// Tests a race condition where 2 threads are performing multipart writes to the same path
+pub async fn multipart_race_condition(storage: &dyn ObjectStore, last_writer_wins: bool) {
+    let path = Path::from("test_multipart_race_condition");
+
+    let mut multipart_upload_1 = storage.put_multipart(&path).await.unwrap();
+    let mut multipart_upload_2 = storage.put_multipart(&path).await.unwrap();
+
+    multipart_upload_1
+        .put_part(Bytes::from(format!("1:{:05300000},", 0)).into())
+        .await
+        .unwrap();
+    multipart_upload_2
+        .put_part(Bytes::from(format!("2:{:05300000},", 0)).into())
+        .await
+        .unwrap();
+
+    multipart_upload_2
+        .put_part(Bytes::from(format!("2:{:05300000},", 1)).into())
+        .await
+        .unwrap();
+    multipart_upload_1
+        .put_part(Bytes::from(format!("1:{:05300000},", 1)).into())
+        .await
+        .unwrap();
+
+    multipart_upload_1
+        .put_part(Bytes::from(format!("1:{:05300000},", 2)).into())
+        .await
+        .unwrap();
+    multipart_upload_2
+        .put_part(Bytes::from(format!("2:{:05300000},", 2)).into())
+        .await
+        .unwrap();
+
+    multipart_upload_2
+        .put_part(Bytes::from(format!("2:{:05300000},", 3)).into())
+        .await
+        .unwrap();
+    multipart_upload_1
+        .put_part(Bytes::from(format!("1:{:05300000},", 3)).into())
+        .await
+        .unwrap();
+
+    multipart_upload_1
+        .put_part(Bytes::from(format!("1:{:05300000},", 4)).into())
+        .await
+        .unwrap();
+    multipart_upload_2
+        .put_part(Bytes::from(format!("2:{:05300000},", 4)).into())
+        .await
+        .unwrap();
+
+    multipart_upload_1.complete().await.unwrap();
+
+    if last_writer_wins {
+        multipart_upload_2.complete().await.unwrap();
+    } else {
+        let err = multipart_upload_2.complete().await.unwrap_err();
+
+        assert!(matches!(err, crate::Error::Generic { .. }), "{err}");
+    }
+
+    let get_result = storage.get(&path).await.unwrap();
+    let bytes = get_result.bytes().await.unwrap();
+    let string_contents = str::from_utf8(&bytes).unwrap();
+
+    if last_writer_wins {
+        assert!(string_contents.starts_with(
+            format!(
+                "2:{:05300000},2:{:05300000},2:{:05300000},2:{:05300000},2:{:05300000},",
+                0, 1, 2, 3, 4
+            )
+            .as_str()
+        ));
+    } else {
+        assert!(string_contents.starts_with(
+            format!(
+                "1:{:05300000},1:{:05300000},1:{:05300000},1:{:05300000},1:{:05300000},",
+                0, 1, 2, 3, 4
+            )
+            .as_str()
+        ));
+    }
+}
diff --git a/object_store/src/local.rs b/object_store/src/local.rs
index 11324b1e5b92..78fce9c26224 100644
--- a/object_store/src/local.rs
+++ b/object_store/src/local.rs
@@ -1004,7 +1004,7 @@ fn get_inode(metadata: &Metadata) -> u64 {
 
 #[cfg(not(unix))]
 /// On platforms where an inode isn't available, fallback to just relying on size and mtime
-fn get_inode(metadata: &Metadata) -> u64 {
+fn get_inode(_metadata: &Metadata) -> u64 {
     0
 }
 
@@ -1060,7 +1060,10 @@ mod tests {
     use std::fs;
 
     use futures::TryStreamExt;
-    use tempfile::{NamedTempFile, TempDir};
+    use tempfile::TempDir;
+
+    #[cfg(target_family = "unix")]
+    use tempfile::NamedTempFile;
 
     use crate::integration::*;
 
@@ -1248,6 +1251,7 @@ mod tests {
         fs.list_with_delimiter(None).await.unwrap();
     }
 
+    #[cfg(target_family = "unix")]
     async fn check_list(integration: &LocalFileSystem, prefix: Option<&Path>, expected: &[&str]) {
         let result: Vec<_> = integration.list(prefix).try_collect().await.unwrap();
 
diff --git a/object_store/src/memory.rs b/object_store/src/memory.rs
index b458bdddfbf5..a467e3b88a26 100644
--- a/object_store/src/memory.rs
+++ b/object_store/src/memory.rs
@@ -468,12 +468,6 @@ impl InMemory {
         Self { storage }
     }
 
-    /// Creates a clone of the store
-    #[deprecated(note = "Use fork() instead")]
-    pub async fn clone(&self) -> Self {
-        self.fork()
-    }
-
     async fn entry(&self, location: &Path) -> Result<Entry> {
         let storage = self.storage.read();
         let value = storage
diff --git a/object_store/src/path/parts.rs b/object_store/src/path/parts.rs
index df7097cbe9db..de2e1a75c955 100644
--- a/object_store/src/path/parts.rs
+++ b/object_store/src/path/parts.rs
@@ -126,7 +126,7 @@ impl From<String> for PathPart<'static> {
     }
 }
 
-impl<'a> AsRef<str> for PathPart<'a> {
+impl AsRef<str> for PathPart<'_> {
     fn as_ref(&self) -> &str {
         self.raw.as_ref()
     }
diff --git a/object_store/src/prefix.rs b/object_store/src/prefix.rs
index 9b10fea5e0bb..227887d78fd7 100644
--- a/object_store/src/prefix.rs
+++ b/object_store/src/prefix.rs
@@ -26,10 +26,6 @@ use crate::{
     PutOptions, PutPayload, PutResult, Result,
 };
 
-#[doc(hidden)]
-#[deprecated(note = "Use PrefixStore")]
-pub type PrefixObjectStore<T> = PrefixStore<T>;
-
 /// Store wrapper that applies a constant prefix to all paths handled by the store.
 #[derive(Debug, Clone)]
 pub struct PrefixStore<T: ObjectStore> {
diff --git a/object_store/tests/http.rs b/object_store/tests/http.rs
new file mode 100644
index 000000000000..a9b3145bb660
--- /dev/null
+++ b/object_store/tests/http.rs
@@ -0,0 +1,43 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Tests the HTTP store implementation
+
+#[cfg(feature = "http")]
+use object_store::{http::HttpBuilder, path::Path, GetOptions, GetRange, ObjectStore};
+
+/// Tests that even when reqwest has the `gzip` feature enabled, the HTTP store
+/// does not error on a missing `Content-Length` header.
+#[tokio::test]
+#[cfg(feature = "http")]
+async fn test_http_store_gzip() {
+    let http_store = HttpBuilder::new()
+        .with_url("https://raw.githubusercontent.com/apache/arrow-rs/refs/heads/main")
+        .build()
+        .unwrap();
+
+    let _ = http_store
+        .get_opts(
+            &Path::parse("LICENSE.txt").unwrap(),
+            GetOptions {
+                range: Some(GetRange::Bounded(0..100)),
+                ..Default::default()
+            },
+        )
+        .await
+        .unwrap();
+}
diff --git a/parquet-testing b/parquet-testing
index 550368ca77b9..4439a223a315 160000
--- a/parquet-testing
+++ b/parquet-testing
@@ -1 +1 @@
-Subproject commit 550368ca77b97231efead39251a96bd6f8f08c6e
+Subproject commit 4439a223a315cf874746d3b5da25e6a6b2a2b16e
diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml
index 4064baba0947..19f890710778 100644
--- a/parquet/Cargo.toml
+++ b/parquet/Cargo.toml
@@ -67,7 +67,7 @@ hashbrown = { version = "0.15", default-features = false }
 twox-hash = { version = "1.6", default-features = false }
 paste = { version = "1.0" }
 half = { version = "2.1", default-features = false, features = ["num-traits"] }
-sysinfo = { version = "0.32.0", optional = true, default-features = false, features = ["system"] }
+sysinfo = { version = "0.33.0", optional = true, default-features = false, features = ["system"] }
 crc32fast = { version = "1.4.2", optional = true, default-features = false }
 
 [dev-dependencies]
diff --git a/parquet/LICENSE.txt b/parquet/LICENSE.txt
new file mode 120000
index 000000000000..4ab43736a839
--- /dev/null
+++ b/parquet/LICENSE.txt
@@ -0,0 +1 @@
+../LICENSE.txt
\ No newline at end of file
diff --git a/parquet/NOTICE.txt b/parquet/NOTICE.txt
new file mode 120000
index 000000000000..eb9f24e040b5
--- /dev/null
+++ b/parquet/NOTICE.txt
@@ -0,0 +1 @@
+../NOTICE.txt
\ No newline at end of file
diff --git a/parquet/README.md b/parquet/README.md
index a0441ee6026d..e9f52ff279d5 100644
--- a/parquet/README.md
+++ b/parquet/README.md
@@ -59,7 +59,7 @@ The `parquet` crate provides the following features which may be enabled in your
 - `lz4` (default) - support for parquet using `lz4` compression
 - `zstd` (default) - support for parquet using `zstd` compression
 - `snap` (default) - support for parquet using `snappy` compression
-- `cli` - parquet [CLI tools](https://github.com/apache/arrow-rs/tree/master/parquet/src/bin)
+- `cli` - parquet [CLI tools](https://github.com/apache/arrow-rs/tree/main/parquet/src/bin)
 - `crc` - enables functionality to automatically verify checksums of each page (if present) when decoding
 - `experimental` - Experimental APIs which may change, even between minor releases
 
diff --git a/parquet/benches/arrow_reader.rs b/parquet/benches/arrow_reader.rs
index c424d000694a..e5165fee212c 100644
--- a/parquet/benches/arrow_reader.rs
+++ b/parquet/benches/arrow_reader.rs
@@ -680,7 +680,7 @@ fn create_string_list_reader(
     column_desc: ColumnDescPtr,
 ) -> Box<dyn ArrayReader> {
     let items = create_byte_array_reader(page_iterator, column_desc);
-    let field = Field::new("item", DataType::Utf8, true);
+    let field = Field::new_list_field(DataType::Utf8, true);
     let data_type = DataType::List(Arc::new(field));
     Box::new(ListArrayReader::<i32>::new(items, data_type, 2, 1, true))
 }
diff --git a/parquet/benches/arrow_writer.rs b/parquet/benches/arrow_writer.rs
index cf39ee66f31a..bfa333db722c 100644
--- a/parquet/benches/arrow_writer.rs
+++ b/parquet/benches/arrow_writer.rs
@@ -189,17 +189,17 @@ fn create_list_primitive_bench_batch(
     let fields = vec![
         Field::new(
             "_1",
-            DataType::List(Arc::new(Field::new("item", DataType::Int32, true))),
+            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
             true,
         ),
         Field::new(
             "_2",
-            DataType::List(Arc::new(Field::new("item", DataType::Boolean, true))),
+            DataType::List(Arc::new(Field::new_list_field(DataType::Boolean, true))),
             true,
         ),
         Field::new(
             "_3",
-            DataType::LargeList(Arc::new(Field::new("item", DataType::Utf8, true))),
+            DataType::LargeList(Arc::new(Field::new_list_field(DataType::Utf8, true))),
             true,
         ),
     ];
@@ -220,17 +220,17 @@ fn create_list_primitive_bench_batch_non_null(
     let fields = vec![
         Field::new(
             "_1",
-            DataType::List(Arc::new(Field::new("item", DataType::Int32, false))),
+            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false))),
             false,
         ),
         Field::new(
             "_2",
-            DataType::List(Arc::new(Field::new("item", DataType::Boolean, false))),
+            DataType::List(Arc::new(Field::new_list_field(DataType::Boolean, false))),
             false,
         ),
         Field::new(
             "_3",
-            DataType::LargeList(Arc::new(Field::new("item", DataType::Utf8, false))),
+            DataType::LargeList(Arc::new(Field::new_list_field(DataType::Utf8, false))),
             false,
         ),
     ];
@@ -274,10 +274,8 @@ fn _create_nested_bench_batch(
         ),
         Field::new(
             "_2",
-            DataType::LargeList(Arc::new(Field::new(
-                "item",
-                DataType::List(Arc::new(Field::new(
-                    "item",
+            DataType::LargeList(Arc::new(Field::new_list_field(
+                DataType::List(Arc::new(Field::new_list_field(
                     DataType::Struct(Fields::from(vec![
                         Field::new(
                             "_1",
diff --git a/parquet/examples/async_read_parquet.rs b/parquet/examples/async_read_parquet.rs
index e59cad8055cb..0a2e9ba994dd 100644
--- a/parquet/examples/async_read_parquet.rs
+++ b/parquet/examples/async_read_parquet.rs
@@ -45,7 +45,7 @@ async fn main() -> Result<()> {
     builder = builder.with_projection(mask);
 
     // Highlight: set `RowFilter`, it'll push down filter predicates to skip IO and decode.
-    // For more specific usage: please refer to https://github.com/apache/arrow-datafusion/blob/master/datafusion/core/src/physical_plan/file_format/parquet/row_filter.rs.
+    // For more specific usage: please refer to https://github.com/apache/datafusion/blob/main/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs.
     let scalar = Int32Array::from(vec![1]);
     let filter = ArrowPredicateFn::new(
         ProjectionMask::roots(file_metadata.schema_descr(), [0]),
diff --git a/parquet/examples/write_parquet.rs b/parquet/examples/write_parquet.rs
index 1b51d40c8134..ebdd9527b6f1 100644
--- a/parquet/examples/write_parquet.rs
+++ b/parquet/examples/write_parquet.rs
@@ -28,7 +28,7 @@ use parquet::arrow::ArrowWriter as ParquetWriter;
 use parquet::basic::Encoding;
 use parquet::errors::Result;
 use parquet::file::properties::{BloomFilterPosition, WriterProperties};
-use sysinfo::{MemoryRefreshKind, ProcessRefreshKind, ProcessesToUpdate, RefreshKind, System};
+use sysinfo::{ProcessRefreshKind, ProcessesToUpdate, RefreshKind, System};
 
 #[derive(ValueEnum, Clone)]
 enum BloomFilterPositionArg {
@@ -97,8 +97,7 @@ fn main() -> Result<()> {
     let file = File::create(args.path).unwrap();
     let mut writer = ParquetWriter::try_new(file, schema.clone(), Some(properties))?;
 
-    let mut system =
-        System::new_with_specifics(RefreshKind::new().with_memory(MemoryRefreshKind::everything()));
+    let mut system = System::new_with_specifics(RefreshKind::everything());
     eprintln!(
         "{} Writing {} batches of {} rows. RSS = {}",
         now(),
diff --git a/parquet/src/arrow/array_reader/fixed_len_byte_array.rs b/parquet/src/arrow/array_reader/fixed_len_byte_array.rs
index 4be07ed68f1d..6b437be943d4 100644
--- a/parquet/src/arrow/array_reader/fixed_len_byte_array.rs
+++ b/parquet/src/arrow/array_reader/fixed_len_byte_array.rs
@@ -508,8 +508,7 @@ mod tests {
         );
 
         // [[], [1], [2, 3], null, [4], null, [6, 7, 8]]
-        let data = ArrayDataBuilder::new(ArrowType::List(Arc::new(Field::new(
-            "item",
+        let data = ArrayDataBuilder::new(ArrowType::List(Arc::new(Field::new_list_field(
             decimals.data_type().clone(),
             false,
         ))))
diff --git a/parquet/src/arrow/array_reader/fixed_size_list_array.rs b/parquet/src/arrow/array_reader/fixed_size_list_array.rs
index 75099d018fc9..43a9037d4a74 100644
--- a/parquet/src/arrow/array_reader/fixed_size_list_array.rs
+++ b/parquet/src/arrow/array_reader/fixed_size_list_array.rs
@@ -277,7 +277,7 @@ mod tests {
         let mut list_array_reader = FixedSizeListArrayReader::new(
             Box::new(item_array_reader),
             3,
-            ArrowType::FixedSizeList(Arc::new(Field::new("item", ArrowType::Int32, true)), 3),
+            ArrowType::FixedSizeList(Arc::new(Field::new_list_field(ArrowType::Int32, true)), 3),
             2,
             1,
             true,
@@ -323,7 +323,7 @@ mod tests {
         let mut list_array_reader = FixedSizeListArrayReader::new(
             Box::new(item_array_reader),
             2,
-            ArrowType::FixedSizeList(Arc::new(Field::new("item", ArrowType::Int32, true)), 2),
+            ArrowType::FixedSizeList(Arc::new(Field::new_list_field(ArrowType::Int32, true)), 2),
             1,
             1,
             false,
@@ -347,9 +347,9 @@ mod tests {
         //   [[null, null]],
         // ]
         let l2_type =
-            ArrowType::FixedSizeList(Arc::new(Field::new("item", ArrowType::Int32, true)), 2);
+            ArrowType::FixedSizeList(Arc::new(Field::new_list_field(ArrowType::Int32, true)), 2);
         let l1_type =
-            ArrowType::FixedSizeList(Arc::new(Field::new("item", l2_type.clone(), false)), 1);
+            ArrowType::FixedSizeList(Arc::new(Field::new_list_field(l2_type.clone(), false)), 1);
 
         let array = PrimitiveArray::<Int32Type>::from(vec![
             None,
@@ -436,7 +436,7 @@ mod tests {
         let mut list_array_reader = FixedSizeListArrayReader::new(
             Box::new(item_array_reader),
             0,
-            ArrowType::FixedSizeList(Arc::new(Field::new("item", ArrowType::Int32, true)), 0),
+            ArrowType::FixedSizeList(Arc::new(Field::new_list_field(ArrowType::Int32, true)), 0),
             2,
             1,
             true,
@@ -481,9 +481,9 @@ mod tests {
             None,
         ]));
 
-        let inner_type = ArrowType::List(Arc::new(Field::new("item", ArrowType::Int32, true)));
+        let inner_type = ArrowType::List(Arc::new(Field::new_list_field(ArrowType::Int32, true)));
         let list_type =
-            ArrowType::FixedSizeList(Arc::new(Field::new("item", inner_type.clone(), true)), 2);
+            ArrowType::FixedSizeList(Arc::new(Field::new_list_field(inner_type.clone(), true)), 2);
 
         let item_array_reader = InMemoryArrayReader::new(
             ArrowType::Int32,
@@ -534,7 +534,10 @@ mod tests {
         let schema = Arc::new(Schema::new(vec![
             Field::new(
                 "list",
-                ArrowType::FixedSizeList(Arc::new(Field::new("item", ArrowType::Int32, true)), 4),
+                ArrowType::FixedSizeList(
+                    Arc::new(Field::new_list_field(ArrowType::Int32, true)),
+                    4,
+                ),
                 true,
             ),
             Field::new("primitive", ArrowType::Int32, true),
@@ -599,7 +602,7 @@ mod tests {
 
         let schema = Arc::new(Schema::new(vec![Field::new(
             "list",
-            ArrowType::FixedSizeList(Arc::new(Field::new("item", ArrowType::Int32, true)), 4),
+            ArrowType::FixedSizeList(Arc::new(Field::new_list_field(ArrowType::Int32, true)), 4),
             true,
         )]));
 
diff --git a/parquet/src/arrow/array_reader/list_array.rs b/parquet/src/arrow/array_reader/list_array.rs
index ebff3286bed5..6e583ed00c19 100644
--- a/parquet/src/arrow/array_reader/list_array.rs
+++ b/parquet/src/arrow/array_reader/list_array.rs
@@ -265,7 +265,7 @@ mod tests {
         data_type: ArrowType,
         item_nullable: bool,
     ) -> ArrowType {
-        let field = Arc::new(Field::new("item", data_type, item_nullable));
+        let field = Arc::new(Field::new_list_field(data_type, item_nullable));
         GenericListArray::<OffsetSize>::DATA_TYPE_CONSTRUCTOR(field)
     }
 
diff --git a/parquet/src/arrow/array_reader/primitive_array.rs b/parquet/src/arrow/array_reader/primitive_array.rs
index 010e9c2eed3f..a952e00e12ef 100644
--- a/parquet/src/arrow/array_reader/primitive_array.rs
+++ b/parquet/src/arrow/array_reader/primitive_array.rs
@@ -208,10 +208,10 @@ where
         // As there is not always a 1:1 mapping between Arrow and Parquet, there
         // are datatypes which we must convert explicitly.
         // These are:
-        // - date64: we should cast int32 to date32, then date32 to date64.
-        // - decimal: cast in32 to decimal, int64 to decimal
+        // - date64: cast int32 to date32, then date32 to date64.
+        // - decimal: cast int32 to decimal, int64 to decimal
         let array = match target_type {
-            ArrowType::Date64 => {
+            ArrowType::Date64 if *(array.data_type()) == ArrowType::Int32 => {
                 // this is cheap as it internally reinterprets the data
                 let a = arrow_cast::cast(&array, &ArrowType::Date32)?;
                 arrow_cast::cast(&a, target_type)?
@@ -305,9 +305,9 @@ mod tests {
     use crate::util::test_common::rand_gen::make_pages;
     use crate::util::InMemoryPageIterator;
     use arrow::datatypes::ArrowPrimitiveType;
-    use arrow_array::{Array, PrimitiveArray};
+    use arrow_array::{Array, Date32Array, PrimitiveArray};
 
-    use arrow::datatypes::DataType::Decimal128;
+    use arrow::datatypes::DataType::{Date32, Decimal128};
     use rand::distributions::uniform::SampleUniform;
     use std::collections::VecDeque;
 
@@ -783,4 +783,54 @@ mod tests {
             assert_ne!(array, &data_decimal_array)
         }
     }
+
+    #[test]
+    fn test_primitive_array_reader_date32_type() {
+        // parquet `INT32` to date
+        let message_type = "
+            message test_schema {
+                REQUIRED INT32 date1 (DATE);
+        }
+        ";
+        let schema = parse_message_type(message_type)
+            .map(|t| Arc::new(SchemaDescriptor::new(Arc::new(t))))
+            .unwrap();
+        let column_desc = schema.column(0);
+
+        // create the array reader
+        {
+            let mut data = Vec::new();
+            let mut page_lists = Vec::new();
+            make_column_chunks::<Int32Type>(
+                column_desc.clone(),
+                Encoding::PLAIN,
+                100,
+                -99999999,
+                99999999,
+                &mut Vec::new(),
+                &mut Vec::new(),
+                &mut data,
+                &mut page_lists,
+                true,
+                2,
+            );
+            let page_iterator = InMemoryPageIterator::new(page_lists);
+
+            let mut array_reader =
+                PrimitiveArrayReader::<Int32Type>::new(Box::new(page_iterator), column_desc, None)
+                    .unwrap();
+
+            // read data from the reader
+            // the data type is date
+            let array = array_reader.next_batch(50).unwrap();
+            assert_eq!(array.data_type(), &Date32);
+            let array = array.as_any().downcast_ref::<Date32Array>().unwrap();
+            let data_date_array = data[0..50]
+                .iter()
+                .copied()
+                .map(Some)
+                .collect::<Date32Array>();
+            assert_eq!(array, &data_date_array);
+        }
+    }
 }
diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs
index d3709c03e99a..378884a1c430 100644
--- a/parquet/src/arrow/arrow_reader/mod.rs
+++ b/parquet/src/arrow/arrow_reader/mod.rs
@@ -932,12 +932,12 @@ mod tests {
     use arrow_array::builder::*;
     use arrow_array::cast::AsArray;
     use arrow_array::types::{
-        Decimal128Type, Decimal256Type, DecimalType, Float16Type, Float32Type, Float64Type,
-        Time32MillisecondType, Time64MicrosecondType,
+        Date32Type, Date64Type, Decimal128Type, Decimal256Type, DecimalType, Float16Type,
+        Float32Type, Float64Type, Time32MillisecondType, Time64MicrosecondType,
     };
     use arrow_array::*;
     use arrow_buffer::{i256, ArrowNativeType, Buffer, IntervalDayTime};
-    use arrow_data::ArrayDataBuilder;
+    use arrow_data::{ArrayData, ArrayDataBuilder};
     use arrow_schema::{
         ArrowError, DataType as ArrowDataType, Field, Fields, Schema, SchemaRef, TimeUnit,
     };
@@ -1272,6 +1272,117 @@ mod tests {
         Ok(())
     }
 
+    #[test]
+    fn test_date32_roundtrip() -> Result<()> {
+        use arrow_array::Date32Array;
+
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "date32",
+            ArrowDataType::Date32,
+            false,
+        )]));
+
+        let mut buf = Vec::with_capacity(1024);
+
+        let mut writer = ArrowWriter::try_new(&mut buf, schema.clone(), None)?;
+
+        let original = RecordBatch::try_new(
+            schema,
+            vec![Arc::new(Date32Array::from(vec![
+                -1_000_000, -100_000, -10_000, -1_000, 0, 1_000, 10_000, 100_000, 1_000_000,
+            ]))],
+        )?;
+
+        writer.write(&original)?;
+        writer.close()?;
+
+        let mut reader = ParquetRecordBatchReader::try_new(Bytes::from(buf), 1024)?;
+        let ret = reader.next().unwrap()?;
+        assert_eq!(ret, original);
+
+        // Ensure can be downcast to the correct type
+        ret.column(0).as_primitive::<Date32Type>();
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_date64_roundtrip() -> Result<()> {
+        use arrow_array::Date64Array;
+
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("small-date64", ArrowDataType::Date64, false),
+            Field::new("big-date64", ArrowDataType::Date64, false),
+            Field::new("invalid-date64", ArrowDataType::Date64, false),
+        ]));
+
+        let mut default_buf = Vec::with_capacity(1024);
+        let mut coerce_buf = Vec::with_capacity(1024);
+
+        let coerce_props = WriterProperties::builder().set_coerce_types(true).build();
+
+        let mut default_writer = ArrowWriter::try_new(&mut default_buf, schema.clone(), None)?;
+        let mut coerce_writer =
+            ArrowWriter::try_new(&mut coerce_buf, schema.clone(), Some(coerce_props))?;
+
+        static NUM_MILLISECONDS_IN_DAY: i64 = 1000 * 60 * 60 * 24;
+
+        let original = RecordBatch::try_new(
+            schema,
+            vec![
+                // small-date64
+                Arc::new(Date64Array::from(vec![
+                    -1_000_000 * NUM_MILLISECONDS_IN_DAY,
+                    -1_000 * NUM_MILLISECONDS_IN_DAY,
+                    0,
+                    1_000 * NUM_MILLISECONDS_IN_DAY,
+                    1_000_000 * NUM_MILLISECONDS_IN_DAY,
+                ])),
+                // big-date64
+                Arc::new(Date64Array::from(vec![
+                    -10_000_000_000 * NUM_MILLISECONDS_IN_DAY,
+                    -1_000_000_000 * NUM_MILLISECONDS_IN_DAY,
+                    0,
+                    1_000_000_000 * NUM_MILLISECONDS_IN_DAY,
+                    10_000_000_000 * NUM_MILLISECONDS_IN_DAY,
+                ])),
+                // invalid-date64
+                Arc::new(Date64Array::from(vec![
+                    -1_000_000 * NUM_MILLISECONDS_IN_DAY + 1,
+                    -1_000 * NUM_MILLISECONDS_IN_DAY + 1,
+                    1,
+                    1_000 * NUM_MILLISECONDS_IN_DAY + 1,
+                    1_000_000 * NUM_MILLISECONDS_IN_DAY + 1,
+                ])),
+            ],
+        )?;
+
+        default_writer.write(&original)?;
+        coerce_writer.write(&original)?;
+
+        default_writer.close()?;
+        coerce_writer.close()?;
+
+        let mut default_reader = ParquetRecordBatchReader::try_new(Bytes::from(default_buf), 1024)?;
+        let mut coerce_reader = ParquetRecordBatchReader::try_new(Bytes::from(coerce_buf), 1024)?;
+
+        let default_ret = default_reader.next().unwrap()?;
+        let coerce_ret = coerce_reader.next().unwrap()?;
+
+        // Roundtrip should be successful when default writer used
+        assert_eq!(default_ret, original);
+
+        // Only small-date64 should roundtrip successfully when coerce_types writer is used
+        assert_eq!(coerce_ret.column(0), original.column(0));
+        assert_ne!(coerce_ret.column(1), original.column(1));
+        assert_ne!(coerce_ret.column(2), original.column(2));
+
+        // Ensure both can be downcast to the correct type
+        default_ret.column(0).as_primitive::<Date64Type>();
+        coerce_ret.column(0).as_primitive::<Date64Type>();
+
+        Ok(())
+    }
     struct RandFixedLenGen {}
 
     impl RandGen<FixedLenByteArrayType> for RandFixedLenGen {
@@ -1542,8 +1653,7 @@ mod tests {
         let decimals = Decimal128Array::from_iter_values([1, 2, 3, 4, 5, 6, 7, 8]);
 
         // [[], [1], [2, 3], null, [4], null, [6, 7, 8]]
-        let data = ArrayDataBuilder::new(ArrowDataType::List(Arc::new(Field::new(
-            "item",
+        let data = ArrayDataBuilder::new(ArrowDataType::List(Arc::new(Field::new_list_field(
             decimals.data_type().clone(),
             false,
         ))))
@@ -2874,7 +2984,7 @@ mod tests {
 
         let arrow_field = Field::new(
             "emptylist",
-            ArrowDataType::List(Arc::new(Field::new("item", ArrowDataType::Null, true))),
+            ArrowDataType::List(Arc::new(Field::new_list_field(ArrowDataType::Null, true))),
             true,
         );
 
@@ -3346,7 +3456,7 @@ mod tests {
     fn test_row_group_batch(row_group_size: usize, batch_size: usize) {
         let schema = Arc::new(Schema::new(vec![Field::new(
             "list",
-            ArrowDataType::List(Arc::new(Field::new("item", ArrowDataType::Int32, true))),
+            ArrowDataType::List(Arc::new(Field::new_list_field(ArrowDataType::Int32, true))),
             true,
         )]));
 
@@ -3584,9 +3694,7 @@ mod tests {
             .unwrap();
             // Although `Vec<Vec<PageLoacation>>` of each row group is empty,
             // we should read the file successfully.
-            // FIXME: this test will fail when metadata parsing returns `None` for missing page
-            // indexes. https://github.com/apache/arrow-rs/issues/6447
-            assert!(builder.metadata().offset_index().unwrap()[0].is_empty());
+            assert!(builder.metadata().offset_index().is_none());
             let reader = builder.build().unwrap();
             let batches = reader.collect::<Result<Vec<_>, _>>().unwrap();
             assert_eq!(batches.len(), 1);
@@ -3905,7 +4013,7 @@ mod tests {
     fn test_list_selection() {
         let schema = Arc::new(Schema::new(vec![Field::new_list(
             "list",
-            Field::new("item", ArrowDataType::Utf8, true),
+            Field::new_list_field(ArrowDataType::Utf8, true),
             false,
         )]));
         let mut buf = Vec::with_capacity(1024);
@@ -3961,7 +4069,11 @@ mod tests {
         let mut rng = thread_rng();
         let schema = Arc::new(Schema::new(vec![Field::new_list(
             "list",
-            Field::new_list("item", Field::new("item", ArrowDataType::Int32, true), true),
+            Field::new_list(
+                Field::LIST_FIELD_DEFAULT_NAME,
+                Field::new_list_field(ArrowDataType::Int32, true),
+                true,
+            ),
             true,
         )]));
         let mut buf = Vec::with_capacity(1024);
@@ -4065,4 +4177,93 @@ mod tests {
             }
         }
     }
+
+    #[test]
+    fn test_read_old_nested_list() {
+        use arrow::datatypes::DataType;
+        use arrow::datatypes::ToByteSlice;
+
+        let testdata = arrow::util::test_util::parquet_test_data();
+        // message my_record {
+        //     REQUIRED group a (LIST) {
+        //         REPEATED group array (LIST) {
+        //             REPEATED INT32 array;
+        //         }
+        //     }
+        // }
+        // should be read as list<list<int32>>
+        let path = format!("{testdata}/old_list_structure.parquet");
+        let test_file = File::open(path).unwrap();
+
+        // create expected ListArray
+        let a_values = Int32Array::from(vec![1, 2, 3, 4]);
+
+        // Construct a buffer for value offsets, for the nested array: [[1, 2], [3, 4]]
+        let a_value_offsets = arrow::buffer::Buffer::from([0, 2, 4].to_byte_slice());
+
+        // Construct a list array from the above two
+        let a_list_data = ArrayData::builder(DataType::List(Arc::new(Field::new(
+            "array",
+            DataType::Int32,
+            false,
+        ))))
+        .len(2)
+        .add_buffer(a_value_offsets)
+        .add_child_data(a_values.into_data())
+        .build()
+        .unwrap();
+        let a = ListArray::from(a_list_data);
+
+        let builder = ParquetRecordBatchReaderBuilder::try_new(test_file).unwrap();
+        let mut reader = builder.build().unwrap();
+        let out = reader.next().unwrap().unwrap();
+        assert_eq!(out.num_rows(), 1);
+        assert_eq!(out.num_columns(), 1);
+        // grab first column
+        let c0 = out.column(0);
+        let c0arr = c0.as_any().downcast_ref::<ListArray>().unwrap();
+        // get first row: [[1, 2], [3, 4]]
+        let r0 = c0arr.value(0);
+        let r0arr = r0.as_any().downcast_ref::<ListArray>().unwrap();
+        assert_eq!(r0arr, &a);
+    }
+
+    #[test]
+    fn test_map_no_value() {
+        // File schema:
+        // message schema {
+        //   required group my_map (MAP) {
+        //     repeated group key_value {
+        //       required int32 key;
+        //       optional int32 value;
+        //     }
+        //   }
+        //   required group my_map_no_v (MAP) {
+        //     repeated group key_value {
+        //       required int32 key;
+        //     }
+        //   }
+        //   required group my_list (LIST) {
+        //     repeated group list {
+        //       required int32 element;
+        //     }
+        //   }
+        // }
+        let testdata = arrow::util::test_util::parquet_test_data();
+        let path = format!("{testdata}/map_no_value.parquet");
+        let file = File::open(path).unwrap();
+
+        let mut reader = ParquetRecordBatchReaderBuilder::try_new(file)
+            .unwrap()
+            .build()
+            .unwrap();
+        let out = reader.next().unwrap().unwrap();
+        assert_eq!(out.num_rows(), 3);
+        assert_eq!(out.num_columns(), 3);
+        // my_map_no_v and my_list columns should now be equivalent
+        let c0 = out.column(1).as_list::<i32>();
+        let c1 = out.column(2).as_list::<i32>();
+        assert_eq!(c0.len(), c1.len());
+        c0.iter().zip(c1.iter()).for_each(|(l, r)| assert_eq!(l, r));
+    }
 }
diff --git a/parquet/src/arrow/arrow_reader/statistics.rs b/parquet/src/arrow/arrow_reader/statistics.rs
index 8a7511be2afe..09f8ec7cc274 100644
--- a/parquet/src/arrow/arrow_reader/statistics.rs
+++ b/parquet/src/arrow/arrow_reader/statistics.rs
@@ -21,6 +21,7 @@
 /// `arrow-rs/parquet/tests/arrow_reader/statistics.rs`.
 use crate::arrow::buffer::bit_util::sign_extend_be;
 use crate::arrow::parquet_column;
+use crate::basic::Type as PhysicalType;
 use crate::data_type::{ByteArray, FixedLenByteArray};
 use crate::errors::{ParquetError, Result};
 use crate::file::metadata::{ParquetColumnIndex, ParquetOffsetIndex, RowGroupMetaData};
@@ -318,7 +319,7 @@ make_decimal_stats_iterator!(
 /// data_type: The data type of the statistics (e.g. `DataType::Int32`)
 /// iterator: The iterator of [`ParquetStatistics`] to extract the statistics from.
 macro_rules! get_statistics {
-    ($stat_type_prefix: ident, $data_type: ident, $iterator: ident) => {
+    ($stat_type_prefix: ident, $data_type: ident, $iterator: ident, $physical_type: ident) => {
         paste! {
         match $data_type {
             DataType::Boolean => Ok(Arc::new(BooleanArray::from_iter(
@@ -370,10 +371,11 @@ macro_rules! get_statistics {
             DataType::Date32 => Ok(Arc::new(Date32Array::from_iter(
                 [<$stat_type_prefix Int32StatsIterator>]::new($iterator).map(|x| x.copied()),
             ))),
-            DataType::Date64 => Ok(Arc::new(Date64Array::from_iter(
+            DataType::Date64 if $physical_type == Some(PhysicalType::INT32) => Ok(Arc::new(Date64Array::from_iter(
                 [<$stat_type_prefix Int32StatsIterator>]::new($iterator)
-                    .map(|x| x.map(|x| i64::from(*x) * 24 * 60 * 60 * 1000)),
-            ))),
+                    .map(|x| x.map(|x| i64::from(*x) * 24 * 60 * 60 * 1000))))),
+            DataType::Date64 if $physical_type == Some(PhysicalType::INT64) => Ok(Arc::new(Date64Array::from_iter(
+                [<$stat_type_prefix Int64StatsIterator>]::new($iterator).map(|x| x.copied()),))),
             DataType::Timestamp(unit, timezone) =>{
                 let iter = [<$stat_type_prefix Int64StatsIterator>]::new($iterator).map(|x| x.copied());
                 Ok(match unit {
@@ -487,7 +489,7 @@ macro_rules! get_statistics {
                 Ok(Arc::new(arr))
             },
             DataType::Dictionary(_, value_type) => {
-                [<$stat_type_prefix:lower _ statistics>](value_type, $iterator)
+                [<$stat_type_prefix:lower _ statistics>](value_type, $iterator, $physical_type)
             },
             DataType::Utf8View => {
                 let iterator = [<$stat_type_prefix ByteArrayStatsIterator>]::new($iterator);
@@ -524,6 +526,7 @@ macro_rules! get_statistics {
             DataType::Map(_,_) |
             DataType::Duration(_) |
             DataType::Interval(_) |
+            DataType::Date64 |  // required to cover $physical_type match guard
             DataType::Null |
             DataType::List(_) |
             DataType::ListView(_) |
@@ -790,7 +793,7 @@ get_decimal_page_stats_iterator!(
 );
 
 macro_rules! get_data_page_statistics {
-    ($stat_type_prefix: ident, $data_type: ident, $iterator: ident) => {
+    ($stat_type_prefix: ident, $data_type: ident, $iterator: ident, $physical_type: ident) => {
         paste! {
             match $data_type {
                 DataType::Boolean => {
@@ -929,7 +932,7 @@ macro_rules! get_data_page_statistics {
                     Ok(Arc::new(builder.finish()))
                 },
                 DataType::Dictionary(_, value_type) => {
-                    [<$stat_type_prefix:lower _ page_statistics>](value_type, $iterator)
+                    [<$stat_type_prefix:lower _ page_statistics>](value_type, $iterator, $physical_type)
                 },
                 DataType::Timestamp(unit, timezone) => {
                     let iter = [<$stat_type_prefix Int64DataPageStatsIterator>]::new($iterator).flatten();
@@ -941,7 +944,7 @@ macro_rules! get_data_page_statistics {
                     })
                 },
                 DataType::Date32 => Ok(Arc::new(Date32Array::from_iter([<$stat_type_prefix Int32DataPageStatsIterator>]::new($iterator).flatten()))),
-                DataType::Date64 => Ok(
+                DataType::Date64 if $physical_type == Some(PhysicalType::INT32)=> Ok(
                     Arc::new(
                         Date64Array::from_iter([<$stat_type_prefix Int32DataPageStatsIterator>]::new($iterator)
                             .map(|x| {
@@ -954,6 +957,7 @@ macro_rules! get_data_page_statistics {
                         )
                     )
                 ),
+                DataType::Date64 if $physical_type == Some(PhysicalType::INT64) => Ok(Arc::new(Date64Array::from_iter([<$stat_type_prefix Int64DataPageStatsIterator>]::new($iterator).flatten()))),
                 DataType::Decimal128(precision, scale) => Ok(Arc::new(
                     Decimal128Array::from_iter([<$stat_type_prefix Decimal128DataPageStatsIterator>]::new($iterator).flatten()).with_precision_and_scale(*precision, *scale)?)),
                 DataType::Decimal256(precision, scale) => Ok(Arc::new(
@@ -1040,6 +1044,7 @@ macro_rules! get_data_page_statistics {
                     }
                     Ok(Arc::new(builder.finish()))
                 },
+                DataType::Date64 |  // required to cover $physical_type match guard
                 DataType::Null |
                 DataType::Duration(_) |
                 DataType::Interval(_) |
@@ -1067,8 +1072,9 @@ macro_rules! get_data_page_statistics {
 fn min_statistics<'a, I: Iterator<Item = Option<&'a ParquetStatistics>>>(
     data_type: &DataType,
     iterator: I,
+    physical_type: Option<PhysicalType>,
 ) -> Result<ArrayRef> {
-    get_statistics!(Min, data_type, iterator)
+    get_statistics!(Min, data_type, iterator, physical_type)
 }
 
 /// Extracts the max statistics from an iterator of [`ParquetStatistics`] to an [`ArrayRef`]
@@ -1077,26 +1083,35 @@ fn min_statistics<'a, I: Iterator<Item = Option<&'a ParquetStatistics>>>(
 fn max_statistics<'a, I: Iterator<Item = Option<&'a ParquetStatistics>>>(
     data_type: &DataType,
     iterator: I,
+    physical_type: Option<PhysicalType>,
 ) -> Result<ArrayRef> {
-    get_statistics!(Max, data_type, iterator)
+    get_statistics!(Max, data_type, iterator, physical_type)
 }
 
 /// Extracts the min statistics from an iterator
 /// of parquet page [`Index`]'es to an [`ArrayRef`]
-pub(crate) fn min_page_statistics<'a, I>(data_type: &DataType, iterator: I) -> Result<ArrayRef>
+pub(crate) fn min_page_statistics<'a, I>(
+    data_type: &DataType,
+    iterator: I,
+    physical_type: Option<PhysicalType>,
+) -> Result<ArrayRef>
 where
     I: Iterator<Item = (usize, &'a Index)>,
 {
-    get_data_page_statistics!(Min, data_type, iterator)
+    get_data_page_statistics!(Min, data_type, iterator, physical_type)
 }
 
 /// Extracts the max statistics from an iterator
 /// of parquet page [`Index`]'es to an [`ArrayRef`]
-pub(crate) fn max_page_statistics<'a, I>(data_type: &DataType, iterator: I) -> Result<ArrayRef>
+pub(crate) fn max_page_statistics<'a, I>(
+    data_type: &DataType,
+    iterator: I,
+    physical_type: Option<PhysicalType>,
+) -> Result<ArrayRef>
 where
     I: Iterator<Item = (usize, &'a Index)>,
 {
-    get_data_page_statistics!(Max, data_type, iterator)
+    get_data_page_statistics!(Max, data_type, iterator, physical_type)
 }
 
 /// Extracts the null count statistics from an iterator
@@ -1177,6 +1192,8 @@ pub struct StatisticsConverter<'a> {
     arrow_field: &'a Field,
     /// treat missing null_counts as 0 nulls
     missing_null_counts_as_zero: bool,
+    /// The physical type of the matched column in the Parquet schema
+    physical_type: Option<PhysicalType>,
 }
 
 impl<'a> StatisticsConverter<'a> {
@@ -1304,6 +1321,7 @@ impl<'a> StatisticsConverter<'a> {
             parquet_column_index: parquet_index,
             arrow_field,
             missing_null_counts_as_zero: true,
+            physical_type: parquet_index.map(|idx| parquet_schema.column(idx).physical_type()),
         })
     }
 
@@ -1346,7 +1364,7 @@ impl<'a> StatisticsConverter<'a> {
     /// // get the minimum value for the column "foo" in the parquet file
     /// let min_values: ArrayRef = converter
     ///   .row_group_mins(metadata.row_groups().iter())
-    ///  .unwrap();
+    ///   .unwrap();
     /// // if "foo" is a Float64 value, the returned array will contain Float64 values
     /// assert_eq!(min_values, Arc::new(Float64Array::from(vec![Some(1.0), Some(2.0)])) as _);
     /// ```
@@ -1363,7 +1381,7 @@ impl<'a> StatisticsConverter<'a> {
         let iter = metadatas
             .into_iter()
             .map(|x| x.column(parquet_index).statistics());
-        min_statistics(data_type, iter)
+        min_statistics(data_type, iter, self.physical_type)
     }
 
     /// Extract the maximum values from row group statistics in [`RowGroupMetaData`]
@@ -1382,7 +1400,7 @@ impl<'a> StatisticsConverter<'a> {
         let iter = metadatas
             .into_iter()
             .map(|x| x.column(parquet_index).statistics());
-        max_statistics(data_type, iter)
+        max_statistics(data_type, iter, self.physical_type)
     }
 
     /// Extract the null counts from row group statistics in [`RowGroupMetaData`]
@@ -1490,7 +1508,7 @@ impl<'a> StatisticsConverter<'a> {
             (*num_data_pages, column_page_index_per_row_group_per_column)
         });
 
-        min_page_statistics(data_type, iter)
+        min_page_statistics(data_type, iter, self.physical_type)
     }
 
     /// Extract the maximum values from Data Page statistics.
@@ -1521,7 +1539,7 @@ impl<'a> StatisticsConverter<'a> {
             (*num_data_pages, column_page_index_per_row_group_per_column)
         });
 
-        max_page_statistics(data_type, iter)
+        max_page_statistics(data_type, iter, self.physical_type)
     }
 
     /// Returns a [`UInt64Array`] with null counts for each data page.
diff --git a/parquet/src/arrow/arrow_writer/levels.rs b/parquet/src/arrow/arrow_writer/levels.rs
index 3e828bbddd17..e4662b8f316c 100644
--- a/parquet/src/arrow/arrow_writer/levels.rs
+++ b/parquet/src/arrow/arrow_writer/levels.rs
@@ -632,7 +632,7 @@ mod tests {
         // based on the example at https://blog.twitter.com/engineering/en_us/a/2013/dremel-made-simple-with-parquet.html
         // [[a, b, c], [d, e, f, g]], [[h], [i,j]]
 
-        let leaf_type = Field::new("item", DataType::Int32, false);
+        let leaf_type = Field::new_list_field(DataType::Int32, false);
         let inner_type = DataType::List(Arc::new(leaf_type));
         let inner_field = Field::new("l2", inner_type.clone(), false);
         let outer_type = DataType::List(Arc::new(inner_field));
@@ -676,7 +676,7 @@ mod tests {
     fn test_calculate_one_level_1() {
         // This test calculates the levels for a non-null primitive array
         let array = Arc::new(Int32Array::from_iter(0..10)) as ArrayRef;
-        let field = Field::new("item", DataType::Int32, false);
+        let field = Field::new_list_field(DataType::Int32, false);
 
         let levels = calculate_array_levels(&array, &field).unwrap();
         assert_eq!(levels.len(), 1);
@@ -702,7 +702,7 @@ mod tests {
             Some(0),
             None,
         ])) as ArrayRef;
-        let field = Field::new("item", DataType::Int32, true);
+        let field = Field::new_list_field(DataType::Int32, true);
 
         let levels = calculate_array_levels(&array, &field).unwrap();
         assert_eq!(levels.len(), 1);
@@ -720,7 +720,7 @@ mod tests {
 
     #[test]
     fn test_calculate_array_levels_1() {
-        let leaf_field = Field::new("item", DataType::Int32, false);
+        let leaf_field = Field::new_list_field(DataType::Int32, false);
         let list_type = DataType::List(Arc::new(leaf_field));
 
         // if all array values are defined (e.g. batch<list<_>>)
@@ -1046,7 +1046,7 @@ mod tests {
 
         let a_values = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
         let a_value_offsets = arrow::buffer::Buffer::from_iter([0_i32, 1, 3, 3, 6, 10]);
-        let a_list_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
+        let a_list_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
         let a_list_data = ArrayData::builder(a_list_type.clone())
             .len(5)
             .add_buffer(a_value_offsets)
@@ -1059,7 +1059,7 @@ mod tests {
 
         let a = ListArray::from(a_list_data);
 
-        let item_field = Field::new("item", a_list_type, true);
+        let item_field = Field::new_list_field(a_list_type, true);
         let mut builder = levels(&item_field, a);
         builder.write(2..4);
         let levels = builder.finish();
@@ -1334,7 +1334,7 @@ mod tests {
         // define schema
         let int_field = Field::new("a", DataType::Int32, true);
         let fields = Fields::from([Arc::new(int_field)]);
-        let item_field = Field::new("item", DataType::Struct(fields.clone()), true);
+        let item_field = Field::new_list_field(DataType::Struct(fields.clone()), true);
         let list_field = Field::new("list", DataType::List(Arc::new(item_field)), true);
 
         let int_builder = Int32Builder::with_capacity(10);
@@ -1568,7 +1568,7 @@ mod tests {
         let a = builder.finish();
         let values = a.values().clone();
 
-        let item_field = Field::new("item", a.data_type().clone(), true);
+        let item_field = Field::new_list_field(a.data_type().clone(), true);
         let mut builder = levels(&item_field, a);
         builder.write(1..4);
         let levels = builder.finish();
@@ -1594,7 +1594,7 @@ mod tests {
         let field_a = Field::new("a", DataType::Int32, true);
         let field_b = Field::new("b", DataType::Int64, false);
         let fields = Fields::from([Arc::new(field_a), Arc::new(field_b)]);
-        let item_field = Field::new("item", DataType::Struct(fields.clone()), true);
+        let item_field = Field::new_list_field(DataType::Struct(fields.clone()), true);
         let list_field = Field::new(
             "list",
             DataType::FixedSizeList(Arc::new(item_field), 2),
@@ -1758,7 +1758,7 @@ mod tests {
         let array = builder.finish();
         let values = array.values().clone();
 
-        let item_field = Field::new("item", array.data_type().clone(), true);
+        let item_field = Field::new_list_field(array.data_type().clone(), true);
         let mut builder = levels(&item_field, array);
         builder.write(0..3);
         let levels = builder.finish();
@@ -1797,7 +1797,7 @@ mod tests {
         let a = builder.finish();
         let values = a.values().as_list::<i32>().values().clone();
 
-        let item_field = Field::new("item", a.data_type().clone(), true);
+        let item_field = Field::new_list_field(a.data_type().clone(), true);
         let mut builder = levels(&item_field, a);
         builder.write(0..4);
         let levels = builder.finish();
@@ -1827,7 +1827,7 @@ mod tests {
         // [NULL, NULL, 3, 0]
         let dict = DictionaryArray::new(keys, Arc::new(values));
 
-        let item_field = Field::new("item", dict.data_type().clone(), true);
+        let item_field = Field::new_list_field(dict.data_type().clone(), true);
 
         let mut builder = levels(&item_field, dict.clone());
         builder.write(0..4);
@@ -1846,7 +1846,7 @@ mod tests {
     #[test]
     fn mismatched_types() {
         let array = Arc::new(Int32Array::from_iter(0..10)) as ArrayRef;
-        let field = Field::new("item", DataType::Float64, false);
+        let field = Field::new_list_field(DataType::Float64, false);
 
         let err = LevelInfoBuilder::try_new(&field, Default::default(), &array)
             .unwrap_err()
diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs
index 99d54eef3bb5..871b140768cb 100644
--- a/parquet/src/arrow/arrow_writer/mod.rs
+++ b/parquet/src/arrow/arrow_writer/mod.rs
@@ -30,12 +30,10 @@ use arrow_array::types::*;
 use arrow_array::{ArrayRef, RecordBatch, RecordBatchWriter};
 use arrow_schema::{ArrowError, DataType as ArrowDataType, Field, IntervalUnit, SchemaRef};
 
-use super::schema::{
-    add_encoded_arrow_schema_to_metadata, arrow_to_parquet_schema,
-    arrow_to_parquet_schema_with_root, decimal_length_from_precision,
-};
+use super::schema::{add_encoded_arrow_schema_to_metadata, decimal_length_from_precision};
 
 use crate::arrow::arrow_writer::byte_array::ByteArrayEncoder;
+use crate::arrow::ArrowSchemaConverter;
 use crate::column::page::{CompressedPage, PageWriteSpec, PageWriter};
 use crate::column::writer::encoder::ColumnValueEncoder;
 use crate::column::writer::{
@@ -180,11 +178,12 @@ impl<W: Write + Send> ArrowWriter<W> {
         arrow_schema: SchemaRef,
         options: ArrowWriterOptions,
     ) -> Result<Self> {
-        let schema = match options.schema_root {
-            Some(s) => arrow_to_parquet_schema_with_root(&arrow_schema, &s)?,
-            None => arrow_to_parquet_schema(&arrow_schema)?,
-        };
         let mut props = options.properties;
+        let mut converter = ArrowSchemaConverter::new().with_coerce_types(props.coerce_types());
+        if let Some(schema_root) = &options.schema_root {
+            converter = converter.schema_root(schema_root);
+        }
+        let schema = converter.convert(&arrow_schema)?;
         if !options.skip_arrow_metadata {
             // add serialized arrow schema
             add_encoded_arrow_schema_to_metadata(&arrow_schema, &mut props);
@@ -390,9 +389,9 @@ impl ArrowWriterOptions {
     }
 
     /// Set the name of the root parquet schema element (defaults to `"arrow_schema"`)
-    pub fn with_schema_root(self, name: String) -> Self {
+    pub fn with_schema_root(self, schema_root: String) -> Self {
         Self {
-            schema_root: Some(name),
+            schema_root: Some(schema_root),
             ..self
         }
     }
@@ -538,7 +537,7 @@ impl ArrowColumnChunk {
 /// # use std::sync::Arc;
 /// # use arrow_array::*;
 /// # use arrow_schema::*;
-/// # use parquet::arrow::arrow_to_parquet_schema;
+/// # use parquet::arrow::ArrowSchemaConverter;
 /// # use parquet::arrow::arrow_writer::{ArrowLeafColumn, compute_leaves, get_column_writers};
 /// # use parquet::file::properties::WriterProperties;
 /// # use parquet::file::writer::SerializedFileWriter;
@@ -549,8 +548,11 @@ impl ArrowColumnChunk {
 /// ]));
 ///
 /// // Compute the parquet schema
-/// let parquet_schema = arrow_to_parquet_schema(schema.as_ref()).unwrap();
 /// let props = Arc::new(WriterProperties::default());
+/// let parquet_schema = ArrowSchemaConverter::new()
+///   .with_coerce_types(props.coerce_types())
+///   .convert(&schema)
+///   .unwrap();
 ///
 /// // Create writers for each of the leaf columns
 /// let col_writers = get_column_writers(&parquet_schema, &props, &schema).unwrap();
@@ -858,6 +860,12 @@ fn write_leaf(writer: &mut ColumnWriter<'_>, levels: &ArrayLevels) -> Result<usi
         }
         ColumnWriter::Int64ColumnWriter(ref mut typed) => {
             match column.data_type() {
+                ArrowDataType::Date64 => {
+                    let array = arrow_cast::cast(column, &ArrowDataType::Int64)?;
+
+                    let array = array.as_primitive::<Int64Type>();
+                    write_primitive(typed, array.values(), levels)
+                }
                 ArrowDataType::Int64 => {
                     let array = column.as_primitive::<Int64Type>();
                     write_primitive(typed, array.values(), levels)
@@ -1082,6 +1090,7 @@ mod tests {
     use arrow::datatypes::ToByteSlice;
     use arrow::datatypes::{DataType, Schema};
     use arrow::error::Result as ArrowResult;
+    use arrow::util::data_gen::create_random_array;
     use arrow::util::pretty::pretty_format_batches;
     use arrow::{array::*, buffer::Buffer};
     use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano, NullBuffer};
@@ -1194,7 +1203,7 @@ mod tests {
         // define schema
         let schema = Schema::new(vec![Field::new(
             "a",
-            DataType::List(Arc::new(Field::new("item", DataType::Int32, false))),
+            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false))),
             true,
         )]);
 
@@ -1206,8 +1215,7 @@ mod tests {
         let a_value_offsets = arrow::buffer::Buffer::from([0, 1, 3, 3, 6, 10].to_byte_slice());
 
         // Construct a list array from the above two
-        let a_list_data = ArrayData::builder(DataType::List(Arc::new(Field::new(
-            "item",
+        let a_list_data = ArrayData::builder(DataType::List(Arc::new(Field::new_list_field(
             DataType::Int32,
             false,
         ))))
@@ -1234,7 +1242,7 @@ mod tests {
         // define schema
         let schema = Schema::new(vec![Field::new(
             "a",
-            DataType::List(Arc::new(Field::new("item", DataType::Int32, false))),
+            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false))),
             false,
         )]);
 
@@ -1246,8 +1254,7 @@ mod tests {
         let a_value_offsets = arrow::buffer::Buffer::from([0, 1, 3, 3, 6, 10].to_byte_slice());
 
         // Construct a list array from the above two
-        let a_list_data = ArrayData::builder(DataType::List(Arc::new(Field::new(
-            "item",
+        let a_list_data = ArrayData::builder(DataType::List(Arc::new(Field::new_list_field(
             DataType::Int32,
             false,
         ))))
@@ -1365,12 +1372,12 @@ mod tests {
         let struct_field_f = Arc::new(Field::new("f", DataType::Float32, true));
         let struct_field_g = Arc::new(Field::new_list(
             "g",
-            Field::new("item", DataType::Int16, true),
+            Field::new_list_field(DataType::Int16, true),
             false,
         ));
         let struct_field_h = Arc::new(Field::new_list(
             "h",
-            Field::new("item", DataType::Int16, false),
+            Field::new_list_field(DataType::Int16, false),
             true,
         ));
         let struct_field_e = Arc::new(Field::new_struct(
@@ -1743,7 +1750,7 @@ mod tests {
             "Expected a dictionary page"
         );
 
-        let offset_indexes = read_offset_indexes(&file, column).unwrap();
+        let offset_indexes = read_offset_indexes(&file, column).unwrap().unwrap();
 
         let page_locations = offset_indexes[0].page_locations.clone();
 
@@ -2377,7 +2384,7 @@ mod tests {
 
     #[test]
     fn null_list_single_column() {
-        let null_field = Field::new("item", DataType::Null, true);
+        let null_field = Field::new_list_field(DataType::Null, true);
         let list_field = Field::new("emptylist", DataType::List(Arc::new(null_field)), true);
 
         let schema = Schema::new(vec![list_field]);
@@ -2385,8 +2392,7 @@ mod tests {
         // Build [[], null, [null, null]]
         let a_values = NullArray::new(2);
         let a_value_offsets = arrow::buffer::Buffer::from([0, 0, 0, 2].to_byte_slice());
-        let a_list_data = ArrayData::builder(DataType::List(Arc::new(Field::new(
-            "item",
+        let a_list_data = ArrayData::builder(DataType::List(Arc::new(Field::new_list_field(
             DataType::Null,
             true,
         ))))
@@ -2415,8 +2421,7 @@ mod tests {
     fn list_single_column() {
         let a_values = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
         let a_value_offsets = arrow::buffer::Buffer::from([0, 1, 3, 3, 6, 10].to_byte_slice());
-        let a_list_data = ArrayData::builder(DataType::List(Arc::new(Field::new(
-            "item",
+        let a_list_data = ArrayData::builder(DataType::List(Arc::new(Field::new_list_field(
             DataType::Int32,
             false,
         ))))
@@ -2489,6 +2494,56 @@ mod tests {
         one_column_roundtrip(values, false);
     }
 
+    #[test]
+    fn list_and_map_coerced_names() {
+        // Create map and list with non-Parquet naming
+        let list_field =
+            Field::new_list("my_list", Field::new("item", DataType::Int32, false), false);
+        let map_field = Field::new_map(
+            "my_map",
+            "entries",
+            Field::new("keys", DataType::Int32, false),
+            Field::new("values", DataType::Int32, true),
+            false,
+            true,
+        );
+
+        let list_array = create_random_array(&list_field, 100, 0.0, 0.0).unwrap();
+        let map_array = create_random_array(&map_field, 100, 0.0, 0.0).unwrap();
+
+        let arrow_schema = Arc::new(Schema::new(vec![list_field, map_field]));
+
+        // Write data to Parquet but coerce names to match spec
+        let props = Some(WriterProperties::builder().set_coerce_types(true).build());
+        let file = tempfile::tempfile().unwrap();
+        let mut writer =
+            ArrowWriter::try_new(file.try_clone().unwrap(), arrow_schema.clone(), props).unwrap();
+
+        let batch = RecordBatch::try_new(arrow_schema, vec![list_array, map_array]).unwrap();
+        writer.write(&batch).unwrap();
+        let file_metadata = writer.close().unwrap();
+
+        // Coerced name of "item" should be "element"
+        assert_eq!(file_metadata.schema[3].name, "element");
+        // Coerced name of "entries" should be "key_value"
+        assert_eq!(file_metadata.schema[5].name, "key_value");
+        // Coerced name of "keys" should be "key"
+        assert_eq!(file_metadata.schema[6].name, "key");
+        // Coerced name of "values" should be "value"
+        assert_eq!(file_metadata.schema[7].name, "value");
+
+        // Double check schema after reading from the file
+        let reader = SerializedFileReader::new(file).unwrap();
+        let file_schema = reader.metadata().file_metadata().schema();
+        let fields = file_schema.get_fields();
+        let list_field = &fields[0].get_fields()[0];
+        assert_eq!(list_field.get_fields()[0].name(), "element");
+        let map_field = &fields[1].get_fields()[0];
+        assert_eq!(map_field.name(), "key_value");
+        assert_eq!(map_field.get_fields()[0].name(), "key");
+        assert_eq!(map_field.get_fields()[1].name(), "value");
+    }
+
     #[test]
     fn fallback_flush_data_page() {
         //tests if the Fallback::flush_data_page clears all buffers correctly
@@ -2534,6 +2589,7 @@ mod tests {
     #[test]
     fn arrow_writer_string_dictionary() {
         // define schema
+        #[allow(deprecated)]
         let schema = Arc::new(Schema::new(vec![Field::new_dict(
             "dictionary",
             DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
@@ -2555,6 +2611,7 @@ mod tests {
     #[test]
     fn arrow_writer_primitive_dictionary() {
         // define schema
+        #[allow(deprecated)]
         let schema = Arc::new(Schema::new(vec![Field::new_dict(
             "dictionary",
             DataType::Dictionary(Box::new(DataType::UInt8), Box::new(DataType::UInt32)),
@@ -2577,6 +2634,7 @@ mod tests {
     #[test]
     fn arrow_writer_string_dictionary_unsigned_index() {
         // define schema
+        #[allow(deprecated)]
         let schema = Arc::new(Schema::new(vec![Field::new_dict(
             "dictionary",
             DataType::Dictionary(Box::new(DataType::UInt8), Box::new(DataType::Utf8)),
diff --git a/parquet/src/arrow/async_reader/metadata.rs b/parquet/src/arrow/async_reader/metadata.rs
index b19f9830a7c9..526818845b5c 100644
--- a/parquet/src/arrow/async_reader/metadata.rs
+++ b/parquet/src/arrow/async_reader/metadata.rs
@@ -119,7 +119,7 @@ impl<F: MetadataFetch> MetadataLoader<F> {
             return Err(ParquetError::EOF(format!(
                 "file size of {} is less than footer + metadata {}",
                 file_size,
-                length + 8
+                length + FOOTER_SIZE
             )));
         }
 
diff --git a/parquet/src/arrow/async_reader/mod.rs b/parquet/src/arrow/async_reader/mod.rs
index 8b315cc9f784..c408456df147 100644
--- a/parquet/src/arrow/async_reader/mod.rs
+++ b/parquet/src/arrow/async_reader/mod.rs
@@ -158,7 +158,8 @@ pub trait AsyncFileReader: Send {
     fn get_metadata(&mut self) -> BoxFuture<'_, Result<Arc<ParquetMetaData>>>;
 }
 
-impl AsyncFileReader for Box<dyn AsyncFileReader> {
+/// This allows Box<dyn AsyncFileReader + '_> to be used as an AsyncFileReader,
+impl AsyncFileReader for Box<dyn AsyncFileReader + '_> {
     fn get_bytes(&mut self, range: Range<usize>) -> BoxFuture<'_, Result<Bytes>> {
         self.as_mut().get_bytes(range)
     }
@@ -927,7 +928,6 @@ mod tests {
     use crate::arrow::schema::parquet_to_arrow_schema_and_fields;
     use crate::arrow::ArrowWriter;
     use crate::file::metadata::ParquetMetaDataReader;
-    use crate::file::page_index::index_reader;
     use crate::file::properties::WriterProperties;
     use arrow::compute::kernels::cmp::eq;
     use arrow::error::Result as ArrowResult;
@@ -1565,12 +1565,11 @@ mod tests {
         let data = Bytes::from(std::fs::read(path).unwrap());
 
         let metadata = ParquetMetaDataReader::new()
+            .with_page_indexes(true)
             .parse_and_finish(&data)
             .unwrap();
 
-        let offset_index =
-            index_reader::read_offset_indexes(&data, metadata.row_group(0).columns())
-                .expect("reading offset index");
+        let offset_index = metadata.offset_index().expect("reading offset index")[0].clone();
 
         let mut metadata_builder = metadata.into_builder();
         let mut row_groups = metadata_builder.take_row_groups();
@@ -1870,7 +1869,7 @@ mod tests {
     async fn test_nested_skip() {
         let schema = Arc::new(Schema::new(vec![
             Field::new("col_1", DataType::UInt64, false),
-            Field::new_list("col_2", Field::new("item", DataType::Utf8, true), true),
+            Field::new_list("col_2", Field::new_list_field(DataType::Utf8, true), true),
         ]));
 
         // Default writer properties
diff --git a/parquet/src/arrow/async_writer/mod.rs b/parquet/src/arrow/async_writer/mod.rs
index 8155b57d9ac6..c04d5710a971 100644
--- a/parquet/src/arrow/async_writer/mod.rs
+++ b/parquet/src/arrow/async_writer/mod.rs
@@ -89,7 +89,7 @@ pub trait AsyncFileWriter: Send {
     fn complete(&mut self) -> BoxFuture<'_, Result<()>>;
 }
 
-impl AsyncFileWriter for Box<dyn AsyncFileWriter> {
+impl AsyncFileWriter for Box<dyn AsyncFileWriter + '_> {
     fn write(&mut self, bs: Bytes) -> BoxFuture<'_, Result<()>> {
         self.as_mut().write(bs)
     }
diff --git a/parquet/src/arrow/buffer/view_buffer.rs b/parquet/src/arrow/buffer/view_buffer.rs
index 2256f4877d68..fd7d6c213f04 100644
--- a/parquet/src/arrow/buffer/view_buffer.rs
+++ b/parquet/src/arrow/buffer/view_buffer.rs
@@ -130,7 +130,7 @@ mod tests {
     #[test]
     fn test_view_buffer_append_view() {
         let mut buffer = ViewBuffer::default();
-        let string_buffer = Buffer::from(&b"0123456789long string to test string view"[..]);
+        let string_buffer = Buffer::from(b"0123456789long string to test string view");
         let block_id = buffer.append_block(string_buffer);
 
         unsafe {
@@ -157,7 +157,7 @@ mod tests {
     #[test]
     fn test_view_buffer_pad_null() {
         let mut buffer = ViewBuffer::default();
-        let string_buffer = Buffer::from(&b"0123456789long string to test string view"[..]);
+        let string_buffer = Buffer::from(b"0123456789long string to test string view");
         let block_id = buffer.append_block(string_buffer);
 
         unsafe {
diff --git a/parquet/src/arrow/mod.rs b/parquet/src/arrow/mod.rs
index 2d09cd19203f..d77436bc1ff7 100644
--- a/parquet/src/arrow/mod.rs
+++ b/parquet/src/arrow/mod.rs
@@ -116,9 +116,13 @@ pub use self::async_writer::AsyncArrowWriter;
 use crate::schema::types::SchemaDescriptor;
 use arrow_schema::{FieldRef, Schema};
 
+// continue to export deprecated methods until they are removed
+#[allow(deprecated)]
+pub use self::schema::arrow_to_parquet_schema;
+
 pub use self::schema::{
-    arrow_to_parquet_schema, parquet_to_arrow_field_levels, parquet_to_arrow_schema,
-    parquet_to_arrow_schema_by_columns, FieldLevels,
+    parquet_to_arrow_field_levels, parquet_to_arrow_schema, parquet_to_arrow_schema_by_columns,
+    ArrowSchemaConverter, FieldLevels,
 };
 
 /// Schema metadata key used to store serialized Arrow IPC schema
diff --git a/parquet/src/arrow/schema/complex.rs b/parquet/src/arrow/schema/complex.rs
index e487feabb848..16d46bd852dc 100644
--- a/parquet/src/arrow/schema/complex.rs
+++ b/parquet/src/arrow/schema/complex.rs
@@ -271,8 +271,13 @@ impl Visitor {
             return Err(arrow_err!("Child of map field must be repeated"));
         }
 
+        // According to the specification the values are optional (#1642).
+        // In this case, return the keys as a list.
+        if map_key_value.get_fields().len() == 1 {
+            return self.visit_list(map_type, context);
+        }
+
         if map_key_value.get_fields().len() != 2 {
-            // According to the specification the values are optional (#1642)
             return Err(arrow_err!(
                 "Child of map field must have two children, found {}",
                 map_key_value.get_fields().len()
@@ -448,15 +453,21 @@ impl Visitor {
             };
         }
 
+        // test to see if the repeated field is a struct or one-tuple
         let items = repeated_field.get_fields();
         if items.len() != 1
-            || repeated_field.name() == "array"
-            || repeated_field.name() == format!("{}_tuple", list_type.name())
+            || (!repeated_field.is_list()
+                && !repeated_field.has_single_repeated_child()
+                && (repeated_field.name() == "array"
+                    || repeated_field.name() == format!("{}_tuple", list_type.name())))
         {
-            // If the repeated field is a group with multiple fields, then its type is the element type and elements are required.
+            // If the repeated field is a group with multiple fields, then its type is the element
+            // type and elements are required.
             //
-            // If the repeated field is a group with one field and is named either array or uses the LIST-annotated group's name
-            // with _tuple appended then the repeated type is the element type and elements are required.
+            // If the repeated field is a group with one field and is named either array or uses
+            // the LIST-annotated group's name with _tuple appended then the repeated type is the
+            // element type and elements are required. But this rule only applies if the
+            // repeated field is not annotated, and the single child field is not `repeated`.
             let context = VisitorContext {
                 rep_level: context.rep_level,
                 def_level,
@@ -541,8 +552,11 @@ fn convert_field(parquet_type: &Type, field: &ParquetField, arrow_hint: Option<&
     match arrow_hint {
         Some(hint) => {
             // If the inferred type is a dictionary, preserve dictionary metadata
+            #[allow(deprecated)]
             let field = match (&data_type, hint.dict_id(), hint.dict_is_ordered()) {
-                (DataType::Dictionary(_, _), Some(id), Some(ordered)) => {
+                (DataType::Dictionary(_, _), Some(id), Some(ordered)) =>
+                {
+                    #[allow(deprecated)]
                     Field::new_dict(name, data_type, nullable, id, ordered)
                 }
                 _ => Field::new(name, data_type, nullable),
diff --git a/parquet/src/arrow/schema/mod.rs b/parquet/src/arrow/schema/mod.rs
index 3ed3bd24e0a8..5d3d7b2a6541 100644
--- a/parquet/src/arrow/schema/mod.rs
+++ b/parquet/src/arrow/schema/mod.rs
@@ -15,13 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Provides API for converting parquet schema to arrow schema and vice versa.
-//!
-//! The main interfaces for converting parquet schema to arrow schema  are
-//! `parquet_to_arrow_schema`, `parquet_to_arrow_schema_by_columns` and
-//! `parquet_to_arrow_field`.
-//!
-//! The interfaces for converting arrow schema to parquet schema is coming.
+//! Converting Parquet schema <--> Arrow schema: [`ArrowSchemaConverter`] and [parquet_to_arrow_schema]
 
 use base64::prelude::BASE64_STANDARD;
 use base64::Engine;
@@ -178,6 +172,7 @@ fn get_arrow_schema_from_metadata(encoded_meta: &str) -> Result<Schema> {
 /// Encodes the Arrow schema into the IPC format, and base64 encodes it
 fn encode_arrow_schema(schema: &Schema) -> String {
     let options = writer::IpcWriteOptions::default();
+    #[allow(deprecated)]
     let mut dictionary_tracker =
         writer::DictionaryTracker::new_with_preserve_dict_id(true, options.preserve_dict_id());
     let data_gen = writer::IpcDataGenerator::default();
@@ -225,23 +220,134 @@ pub(crate) fn add_encoded_arrow_schema_to_metadata(schema: &Schema, props: &mut
     }
 }
 
+/// Converter for Arrow schema to Parquet schema
+///
+/// Example:
+/// ```
+/// # use std::sync::Arc;
+/// # use arrow_schema::{Field, Schema, DataType};
+/// # use parquet::arrow::ArrowSchemaConverter;
+/// use parquet::schema::types::{SchemaDescriptor, Type};
+/// use parquet::basic; // note there are two `Type`s in the following example
+/// // create an Arrow Schema
+/// let arrow_schema = Schema::new(vec![
+///   Field::new("a", DataType::Int64, true),
+///   Field::new("b", DataType::Date32, true),
+/// ]);
+/// // convert the Arrow schema to a Parquet schema
+/// let parquet_schema = ArrowSchemaConverter::new()
+///   .convert(&arrow_schema)
+///   .unwrap();
+///
+/// let expected_parquet_schema = SchemaDescriptor::new(
+///   Arc::new(
+///     Type::group_type_builder("arrow_schema")
+///       .with_fields(vec![
+///         Arc::new(
+///          Type::primitive_type_builder("a", basic::Type::INT64)
+///           .build().unwrap()
+///         ),
+///         Arc::new(
+///          Type::primitive_type_builder("b", basic::Type::INT32)
+///           .with_converted_type(basic::ConvertedType::DATE)
+///           .with_logical_type(Some(basic::LogicalType::Date))
+///           .build().unwrap()
+///         ),
+///      ])
+///      .build().unwrap()
+///   )
+/// );
+/// assert_eq!(parquet_schema, expected_parquet_schema);
+/// ```
+#[derive(Debug)]
+pub struct ArrowSchemaConverter<'a> {
+    /// Name of the root schema in Parquet
+    schema_root: &'a str,
+    /// Should we coerce Arrow types to compatible Parquet types?
+    ///
+    /// See docs on [Self::with_coerce_types]`
+    coerce_types: bool,
+}
+
+impl Default for ArrowSchemaConverter<'_> {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl<'a> ArrowSchemaConverter<'a> {
+    /// Create a new converter
+    pub fn new() -> Self {
+        Self {
+            schema_root: "arrow_schema",
+            coerce_types: false,
+        }
+    }
+
+    /// Should Arrow types be coerced into Parquet native types (default `false`).
+    ///
+    /// Setting this option to `true` will result in Parquet files that can be
+    /// read by more readers, but may lose precision for Arrow types such as
+    /// [`DataType::Date64`] which have no direct [corresponding Parquet type].
+    ///
+    /// By default, this converter does not coerce to native Parquet types. Enabling type
+    /// coercion allows for meaningful representations that do not require
+    /// downstream readers to consider the embedded Arrow schema, and can allow
+    /// for greater compatibility with other Parquet implementations. However,
+    /// type coercion also prevents data from being losslessly round-tripped.
+    ///
+    /// # Discussion
+    ///
+    /// Some Arrow types such as `Date64`, `Timestamp` and `Interval` have no
+    /// corresponding Parquet logical type. Thus, they can not be losslessly
+    /// round-tripped when stored using the appropriate Parquet logical type.
+    /// For example, some Date64 values may be truncated when stored with
+    /// parquet's native 32 bit date type.
+    ///
+    /// For [`List`] and [`Map`] types, some Parquet readers expect certain
+    /// schema elements to have specific names (earlier versions of the spec
+    /// were somewhat ambiguous on this point). Type coercion will use the names
+    /// prescribed by the Parquet specification, potentially losing naming
+    /// metadata from the Arrow schema.
+    ///
+    /// [`List`]: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists
+    /// [`Map`]: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#maps
+    /// [corresponding Parquet type]: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#date
+    ///
+    pub fn with_coerce_types(mut self, coerce_types: bool) -> Self {
+        self.coerce_types = coerce_types;
+        self
+    }
+
+    /// Set the root schema element name (defaults to `"arrow_schema"`).
+    pub fn schema_root(mut self, schema_root: &'a str) -> Self {
+        self.schema_root = schema_root;
+        self
+    }
+
+    /// Convert the specified Arrow [`Schema`] to the desired Parquet [`SchemaDescriptor`]
+    ///
+    /// See example in [`ArrowSchemaConverter`]
+    pub fn convert(&self, schema: &Schema) -> Result<SchemaDescriptor> {
+        let fields = schema
+            .fields()
+            .iter()
+            .map(|field| arrow_to_parquet_type(field, self.coerce_types).map(Arc::new))
+            .collect::<Result<_>>()?;
+        let group = Type::group_type_builder(self.schema_root)
+            .with_fields(fields)
+            .build()?;
+        Ok(SchemaDescriptor::new(Arc::new(group)))
+    }
+}
+
 /// Convert arrow schema to parquet schema
 ///
 /// The name of the root schema element defaults to `"arrow_schema"`, this can be
-/// overridden with [`arrow_to_parquet_schema_with_root`]
+/// overridden with [`ArrowSchemaConverter`]
+#[deprecated(since = "54.0.0", note = "Use `ArrowSchemaConverter` instead")]
 pub fn arrow_to_parquet_schema(schema: &Schema) -> Result<SchemaDescriptor> {
-    arrow_to_parquet_schema_with_root(schema, "arrow_schema")
-}
-
-/// Convert arrow schema to parquet schema specifying the name of the root schema element
-pub fn arrow_to_parquet_schema_with_root(schema: &Schema, root: &str) -> Result<SchemaDescriptor> {
-    let fields = schema
-        .fields()
-        .iter()
-        .map(|field| arrow_to_parquet_type(field).map(Arc::new))
-        .collect::<Result<_>>()?;
-    let group = Type::group_type_builder(root).with_fields(fields).build()?;
-    Ok(SchemaDescriptor::new(Arc::new(group)))
+    ArrowSchemaConverter::new().convert(schema)
 }
 
 fn parse_key_value_metadata(
@@ -298,7 +404,12 @@ pub fn decimal_length_from_precision(precision: u8) -> usize {
 }
 
 /// Convert an arrow field to a parquet `Type`
-fn arrow_to_parquet_type(field: &Field) -> Result<Type> {
+fn arrow_to_parquet_type(field: &Field, coerce_types: bool) -> Result<Type> {
+    const PARQUET_LIST_ELEMENT_NAME: &str = "element";
+    const PARQUET_MAP_STRUCT_NAME: &str = "key_value";
+    const PARQUET_KEY_FIELD_NAME: &str = "key";
+    const PARQUET_VALUE_FIELD_NAME: &str = "value";
+
     let name = field.name().as_str();
     let repetition = if field.is_nullable() {
         Repetition::OPTIONAL
@@ -415,12 +526,20 @@ fn arrow_to_parquet_type(field: &Field) -> Result<Type> {
             .with_repetition(repetition)
             .with_id(id)
             .build(),
-        // date64 is cast to date32 (#1666)
-        DataType::Date64 => Type::primitive_type_builder(name, PhysicalType::INT32)
-            .with_logical_type(Some(LogicalType::Date))
-            .with_repetition(repetition)
-            .with_id(id)
-            .build(),
+        DataType::Date64 => {
+            if coerce_types {
+                Type::primitive_type_builder(name, PhysicalType::INT32)
+                    .with_logical_type(Some(LogicalType::Date))
+                    .with_repetition(repetition)
+                    .with_id(id)
+                    .build()
+            } else {
+                Type::primitive_type_builder(name, PhysicalType::INT64)
+                    .with_repetition(repetition)
+                    .with_id(id)
+                    .build()
+            }
+        }
         DataType::Time32(TimeUnit::Second) => {
             // Cannot represent seconds in LogicalType
             Type::primitive_type_builder(name, PhysicalType::INT32)
@@ -515,10 +634,18 @@ fn arrow_to_parquet_type(field: &Field) -> Result<Type> {
             .with_id(id)
             .build(),
         DataType::List(f) | DataType::FixedSizeList(f, _) | DataType::LargeList(f) => {
+            let field_ref = if coerce_types && f.name() != PARQUET_LIST_ELEMENT_NAME {
+                // Ensure proper naming per the Parquet specification
+                let ff = f.as_ref().clone().with_name(PARQUET_LIST_ELEMENT_NAME);
+                Arc::new(arrow_to_parquet_type(&ff, coerce_types)?)
+            } else {
+                Arc::new(arrow_to_parquet_type(f, coerce_types)?)
+            };
+
             Type::group_type_builder(name)
                 .with_fields(vec![Arc::new(
                     Type::group_type_builder("list")
-                        .with_fields(vec![Arc::new(arrow_to_parquet_type(f)?)])
+                        .with_fields(vec![field_ref])
                         .with_repetition(Repetition::REPEATED)
                         .build()?,
                 )])
@@ -537,7 +664,7 @@ fn arrow_to_parquet_type(field: &Field) -> Result<Type> {
             // recursively convert children to types/nodes
             let fields = fields
                 .iter()
-                .map(|f| arrow_to_parquet_type(f).map(Arc::new))
+                .map(|f| arrow_to_parquet_type(f, coerce_types).map(Arc::new))
                 .collect::<Result<_>>()?;
             Type::group_type_builder(name)
                 .with_fields(fields)
@@ -547,13 +674,29 @@ fn arrow_to_parquet_type(field: &Field) -> Result<Type> {
         }
         DataType::Map(field, _) => {
             if let DataType::Struct(struct_fields) = field.data_type() {
+                // If coercing then set inner struct name to "key_value"
+                let map_struct_name = if coerce_types {
+                    PARQUET_MAP_STRUCT_NAME
+                } else {
+                    field.name()
+                };
+
+                // If coercing then ensure struct fields are named "key" and "value"
+                let fix_map_field = |name: &str, fld: &Arc<Field>| -> Result<Arc<Type>> {
+                    if coerce_types && fld.name() != name {
+                        let f = fld.as_ref().clone().with_name(name);
+                        Ok(Arc::new(arrow_to_parquet_type(&f, coerce_types)?))
+                    } else {
+                        Ok(Arc::new(arrow_to_parquet_type(fld, coerce_types)?))
+                    }
+                };
+                let key_field = fix_map_field(PARQUET_KEY_FIELD_NAME, &struct_fields[0])?;
+                let val_field = fix_map_field(PARQUET_VALUE_FIELD_NAME, &struct_fields[1])?;
+
                 Type::group_type_builder(name)
                     .with_fields(vec![Arc::new(
-                        Type::group_type_builder(field.name())
-                            .with_fields(vec![
-                                Arc::new(arrow_to_parquet_type(&struct_fields[0])?),
-                                Arc::new(arrow_to_parquet_type(&struct_fields[1])?),
-                            ])
+                        Type::group_type_builder(map_struct_name)
+                            .with_fields(vec![key_field, val_field])
                             .with_repetition(Repetition::REPEATED)
                             .build()?,
                     )])
@@ -571,7 +714,7 @@ fn arrow_to_parquet_type(field: &Field) -> Result<Type> {
         DataType::Dictionary(_, ref value) => {
             // Dictionary encoding not handled at the schema level
             let dict_field = field.clone().with_data_type(value.as_ref().clone());
-            arrow_to_parquet_type(&dict_field)
+            arrow_to_parquet_type(&dict_field, coerce_types)
         }
         DataType::RunEndEncoded(_, _) => Err(arrow_err!(
             "Converting RunEndEncodedType to parquet not supported",
@@ -1408,6 +1551,81 @@ mod tests {
         assert_eq!(arrow_fields, converted_arrow_fields);
     }
 
+    #[test]
+    fn test_coerced_map_list() {
+        // Create Arrow schema with non-Parquet naming
+        let arrow_fields = vec![
+            Field::new_list(
+                "my_list",
+                Field::new("item", DataType::Boolean, true),
+                false,
+            ),
+            Field::new_map(
+                "my_map",
+                "entries",
+                Field::new("keys", DataType::Utf8, false),
+                Field::new("values", DataType::Int32, true),
+                false,
+                true,
+            ),
+        ];
+        let arrow_schema = Schema::new(arrow_fields);
+
+        // Create Parquet schema with coerced names
+        let message_type = "
+        message parquet_schema {
+            REQUIRED GROUP my_list (LIST) {
+                REPEATED GROUP list {
+                    OPTIONAL BOOLEAN element;
+                }
+            }
+            OPTIONAL GROUP my_map (MAP) {
+                REPEATED GROUP key_value {
+                    REQUIRED BINARY key (STRING);
+                    OPTIONAL INT32 value;
+                }
+            }
+        }
+        ";
+        let parquet_group_type = parse_message_type(message_type).unwrap();
+        let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
+        let converted_arrow_schema = ArrowSchemaConverter::new()
+            .with_coerce_types(true)
+            .convert(&arrow_schema)
+            .unwrap();
+        assert_eq!(
+            parquet_schema.columns().len(),
+            converted_arrow_schema.columns().len()
+        );
+
+        // Create Parquet schema without coerced names
+        let message_type = "
+        message parquet_schema {
+            REQUIRED GROUP my_list (LIST) {
+                REPEATED GROUP list {
+                    OPTIONAL BOOLEAN item;
+                }
+            }
+            OPTIONAL GROUP my_map (MAP) {
+                REPEATED GROUP entries {
+                    REQUIRED BINARY keys (STRING);
+                    OPTIONAL INT32 values;
+                }
+            }
+        }
+        ";
+        let parquet_group_type = parse_message_type(message_type).unwrap();
+        let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
+        let converted_arrow_schema = ArrowSchemaConverter::new()
+            .with_coerce_types(false)
+            .convert(&arrow_schema)
+            .unwrap();
+        assert_eq!(
+            parquet_schema.columns().len(),
+            converted_arrow_schema.columns().len()
+        );
+    }
+
     #[test]
     fn test_field_to_column_desc() {
         let message_type = "
@@ -1557,7 +1775,7 @@ mod tests {
             Field::new("decimal256", DataType::Decimal256(39, 2), false),
         ];
         let arrow_schema = Schema::new(arrow_fields);
-        let converted_arrow_schema = arrow_to_parquet_schema(&arrow_schema).unwrap();
+        let converted_arrow_schema = ArrowSchemaConverter::new().convert(&arrow_schema).unwrap();
 
         assert_eq!(
             parquet_schema.columns().len(),
@@ -1594,9 +1812,10 @@ mod tests {
             false,
         )];
         let arrow_schema = Schema::new(arrow_fields);
-        let converted_arrow_schema = arrow_to_parquet_schema(&arrow_schema);
+        let converted_arrow_schema = ArrowSchemaConverter::new()
+            .with_coerce_types(true)
+            .convert(&arrow_schema);
 
-        assert!(converted_arrow_schema.is_err());
         converted_arrow_schema.unwrap();
     }
 
@@ -1665,7 +1884,7 @@ mod tests {
                 Field::new("c20", DataType::Interval(IntervalUnit::YearMonth), false),
                 Field::new_list(
                     "c21",
-                    Field::new("item", DataType::Boolean, true)
+                    Field::new_list_field(DataType::Boolean, true)
                         .with_metadata(meta(&[("Key", "Bar"), (PARQUET_FIELD_ID_META_KEY, "5")])),
                     false,
                 )
@@ -1673,7 +1892,7 @@ mod tests {
                 Field::new(
                     "c22",
                     DataType::FixedSizeList(
-                        Arc::new(Field::new("item", DataType::Boolean, true)),
+                        Arc::new(Field::new_list_field(DataType::Boolean, true)),
                         5,
                     ),
                     false,
@@ -1682,8 +1901,7 @@ mod tests {
                     "c23",
                     Field::new_large_list(
                         "inner",
-                        Field::new(
-                            "item",
+                        Field::new_list_field(
                             DataType::Struct(
                                 vec![
                                     Field::new("a", DataType::Int16, true),
@@ -1714,6 +1932,7 @@ mod tests {
                 // Field::new("c28", DataType::Duration(TimeUnit::Millisecond), false),
                 // Field::new("c29", DataType::Duration(TimeUnit::Microsecond), false),
                 // Field::new("c30", DataType::Duration(TimeUnit::Nanosecond), false),
+                #[allow(deprecated)]
                 Field::new_dict(
                     "c31",
                     DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
@@ -1728,8 +1947,7 @@ mod tests {
                     "c34",
                     Field::new_list(
                         "inner",
-                        Field::new(
-                            "item",
+                        Field::new_list_field(
                             DataType::Struct(
                                 vec![
                                     Field::new("a", DataType::Int16, true),
@@ -1762,7 +1980,7 @@ mod tests {
                         .with_metadata(meta(&[(PARQUET_FIELD_ID_META_KEY, "8")])),
                     Field::new_list(
                         "my_value",
-                        Field::new("item", DataType::Utf8, true)
+                        Field::new_list_field(DataType::Utf8, true)
                             .with_metadata(meta(&[(PARQUET_FIELD_ID_META_KEY, "10")])),
                         true,
                     )
@@ -1777,7 +1995,7 @@ mod tests {
                     Field::new("my_key", DataType::Utf8, false),
                     Field::new_list(
                         "my_value",
-                        Field::new("item", DataType::Utf8, true)
+                        Field::new_list_field(DataType::Utf8, true)
                             .with_metadata(meta(&[(PARQUET_FIELD_ID_META_KEY, "11")])),
                         true,
                     ),
@@ -1868,7 +2086,9 @@ mod tests {
         // don't pass metadata so field ids are read from Parquet and not from serialized Arrow schema
         let arrow_schema = crate::arrow::parquet_to_arrow_schema(&schema_descriptor, None)?;
 
-        let parq_schema_descr = crate::arrow::arrow_to_parquet_schema(&arrow_schema)?;
+        let parq_schema_descr = ArrowSchemaConverter::new()
+            .with_coerce_types(true)
+            .convert(&arrow_schema)?;
         let parq_fields = parq_schema_descr.root_schema().get_fields();
         assert_eq!(parq_fields.len(), 2);
         assert_eq!(parq_fields[0].get_basic_info().id(), 1);
diff --git a/parquet/src/basic.rs b/parquet/src/basic.rs
index 1926b87623bf..97e8c22f1b2f 100644
--- a/parquet/src/basic.rs
+++ b/parquet/src/basic.rs
@@ -302,6 +302,7 @@ pub enum Encoding {
     ///
     /// The RLE/bit-packing hybrid is more cpu and memory efficient and should be used instead.
     #[deprecated(
+        since = "51.0.0",
         note = "Please see documentation for compatibility issues and use the RLE/bit-packing hybrid encoding instead"
     )]
     BIT_PACKED,
diff --git a/parquet/src/bin/parquet-rewrite.rs b/parquet/src/bin/parquet-rewrite.rs
index ad0f7ae0df7d..5a1ec94d5502 100644
--- a/parquet/src/bin/parquet-rewrite.rs
+++ b/parquet/src/bin/parquet-rewrite.rs
@@ -199,6 +199,10 @@ struct Args {
     /// Sets writer version.
     #[clap(long)]
     writer_version: Option<WriterVersionArgs>,
+
+    /// Sets whether to coerce Arrow types to match Parquet specification
+    #[clap(long)]
+    coerce_types: Option<bool>,
 }
 
 fn main() {
@@ -238,6 +242,7 @@ fn main() {
     if let Some(value) = args.dictionary_page_size_limit {
         writer_properties_builder = writer_properties_builder.set_dictionary_page_size_limit(value);
     }
+    #[allow(deprecated)]
     if let Some(value) = args.max_statistics_size {
         writer_properties_builder = writer_properties_builder.set_max_statistics_size(value);
     }
@@ -262,6 +267,9 @@ fn main() {
     if let Some(value) = args.writer_version {
         writer_properties_builder = writer_properties_builder.set_writer_version(value.into());
     }
+    if let Some(value) = args.coerce_types {
+        writer_properties_builder = writer_properties_builder.set_coerce_types(value);
+    }
     let writer_properties = writer_properties_builder.build();
     let mut parquet_writer = ArrowWriter::try_new(
         File::create(&args.output).expect("Unable to open output file"),
diff --git a/parquet/src/column/reader.rs b/parquet/src/column/reader.rs
index 2b43b4c3e45c..953dc057d7a3 100644
--- a/parquet/src/column/reader.rs
+++ b/parquet/src/column/reader.rs
@@ -185,31 +185,6 @@ where
         }
     }
 
-    /// Reads a batch of values of at most `batch_size`, returning a tuple containing the
-    /// actual number of non-null values read, followed by the corresponding number of levels,
-    /// i.e, the total number of values including nulls, empty lists, etc...
-    ///
-    /// If the max definition level is 0, `def_levels` will be ignored, otherwise it will be
-    /// populated with the number of levels read, with an error returned if it is `None`.
-    ///
-    /// If the max repetition level is 0, `rep_levels` will be ignored, otherwise it will be
-    /// populated with the number of levels read, with an error returned if it is `None`.
-    ///
-    /// `values` will be contiguously populated with the non-null values. Note that if the column
-    /// is not required, this may be less than either `batch_size` or the number of levels read
-    #[deprecated(note = "Use read_records")]
-    pub fn read_batch(
-        &mut self,
-        batch_size: usize,
-        def_levels: Option<&mut D::Buffer>,
-        rep_levels: Option<&mut R::Buffer>,
-        values: &mut V::Buffer,
-    ) -> Result<(usize, usize)> {
-        let (_, values, levels) = self.read_records(batch_size, def_levels, rep_levels, values)?;
-
-        Ok((values, levels))
-    }
-
     /// Read up to `max_records` whole records, returning the number of complete
     /// records, non-null values and levels decoded. All levels for a given record
     /// will be read, i.e. the next repetition level, if any, will be 0
diff --git a/parquet/src/column/writer/mod.rs b/parquet/src/column/writer/mod.rs
index 9bd79840f760..8dc1d0db4476 100644
--- a/parquet/src/column/writer/mod.rs
+++ b/parquet/src/column/writer/mod.rs
@@ -347,7 +347,7 @@ pub struct GenericColumnWriter<'a, E: ColumnValueEncoder> {
     data_pages: VecDeque<CompressedPage>,
     // column index and offset index
     column_index_builder: ColumnIndexBuilder,
-    offset_index_builder: OffsetIndexBuilder,
+    offset_index_builder: Option<OffsetIndexBuilder>,
 
     // Below fields used to incrementally check boundary order across data pages.
     // We assume they are ascending/descending until proven wrong.
@@ -394,6 +394,12 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
             column_index_builder.to_invalid()
         }
 
+        // Disable offset_index_builder if requested by user.
+        let offset_index_builder = match props.offset_index_disabled() {
+            false => Some(OffsetIndexBuilder::new()),
+            _ => None,
+        };
+
         Self {
             descr,
             props,
@@ -408,7 +414,7 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
             page_metrics,
             column_metrics,
             column_index_builder,
-            offset_index_builder: OffsetIndexBuilder::new(),
+            offset_index_builder,
             encodings,
             data_page_boundary_ascending: true,
             data_page_boundary_descending: true,
@@ -568,7 +574,11 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
     /// anticipated encoded size.
     #[cfg(feature = "arrow")]
     pub(crate) fn get_estimated_total_bytes(&self) -> u64 {
-        self.column_metrics.total_bytes_written
+        self.data_pages
+            .iter()
+            .map(|page| page.data().len() as u64)
+            .sum::<u64>()
+            + self.column_metrics.total_bytes_written
             + self.encoder.estimated_data_page_size() as u64
             + self.encoder.estimated_dict_page_size().unwrap_or_default() as u64
     }
@@ -613,7 +623,8 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
             .column_index_builder
             .valid()
             .then(|| self.column_index_builder.build_to_thrift());
-        let offset_index = Some(self.offset_index_builder.build_to_thrift());
+
+        let offset_index = self.offset_index_builder.map(|b| b.build_to_thrift());
 
         Ok(ColumnCloseResult {
             bytes_written: self.column_metrics.total_bytes_written,
@@ -841,11 +852,10 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
         );
 
         // Update the offset index
-        self.offset_index_builder
-            .append_row_count(self.page_metrics.num_buffered_rows as i64);
-
-        self.offset_index_builder
-            .append_unencoded_byte_array_data_bytes(page_variable_length_bytes);
+        if let Some(builder) = self.offset_index_builder.as_mut() {
+            builder.append_row_count(self.page_metrics.num_buffered_rows as i64);
+            builder.append_unencoded_byte_array_data_bytes(page_variable_length_bytes);
+        }
     }
 
     /// Determine if we should allow truncating min/max values for this column's statistics
@@ -868,24 +878,67 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
         }
     }
 
+    /// Returns `true` if this column's logical type is a UTF-8 string.
+    fn is_utf8(&self) -> bool {
+        self.get_descriptor().logical_type() == Some(LogicalType::String)
+            || self.get_descriptor().converted_type() == ConvertedType::UTF8
+    }
+
+    /// Truncates a binary statistic to at most `truncation_length` bytes.
+    ///
+    /// If truncation is not possible, returns `data`.
+    ///
+    /// The `bool` in the returned tuple indicates whether truncation occurred or not.
+    ///
+    /// UTF-8 Note:
+    /// If the column type indicates UTF-8, and `data` contains valid UTF-8, then the result will
+    /// also remain valid UTF-8, but may be less tnan `truncation_length` bytes to avoid splitting
+    /// on non-character boundaries.
     fn truncate_min_value(&self, truncation_length: Option<usize>, data: &[u8]) -> (Vec<u8>, bool) {
         truncation_length
             .filter(|l| data.len() > *l)
-            .and_then(|l| match str::from_utf8(data) {
-                Ok(str_data) => truncate_utf8(str_data, l),
-                Err(_) => Some(data[..l].to_vec()),
-            })
+            .and_then(|l|
+                // don't do extra work if this column isn't UTF-8
+                if self.is_utf8() {
+                    match str::from_utf8(data) {
+                        Ok(str_data) => truncate_utf8(str_data, l),
+                        Err(_) => Some(data[..l].to_vec()),
+                    }
+                } else {
+                    Some(data[..l].to_vec())
+                }
+            )
             .map(|truncated| (truncated, true))
             .unwrap_or_else(|| (data.to_vec(), false))
     }
 
+    /// Truncates a binary statistic to at most `truncation_length` bytes, and then increment the
+    /// final byte(s) to yield a valid upper bound. This may result in a result of less than
+    /// `truncation_length` bytes if the last byte(s) overflows.
+    ///
+    /// If truncation is not possible, returns `data`.
+    ///
+    /// The `bool` in the returned tuple indicates whether truncation occurred or not.
+    ///
+    /// UTF-8 Note:
+    /// If the column type indicates UTF-8, and `data` contains valid UTF-8, then the result will
+    /// also remain valid UTF-8 (but again may be less than `truncation_length` bytes). If `data`
+    /// does not contain valid UTF-8, then truncation will occur as if the column is non-string
+    /// binary.
     fn truncate_max_value(&self, truncation_length: Option<usize>, data: &[u8]) -> (Vec<u8>, bool) {
         truncation_length
             .filter(|l| data.len() > *l)
-            .and_then(|l| match str::from_utf8(data) {
-                Ok(str_data) => truncate_utf8(str_data, l).and_then(increment_utf8),
-                Err(_) => increment(data[..l].to_vec()),
-            })
+            .and_then(|l|
+                // don't do extra work if this column isn't UTF-8
+                if self.is_utf8() {
+                    match str::from_utf8(data) {
+                        Ok(str_data) => truncate_and_increment_utf8(str_data, l),
+                        Err(_) => increment(data[..l].to_vec()),
+                    }
+                } else {
+                    increment(data[..l].to_vec())
+                }
+            )
             .map(|truncated| (truncated, true))
             .unwrap_or_else(|| (data.to_vec(), false))
     }
@@ -1174,8 +1227,10 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
         let page_spec = self.page_writer.write_page(page)?;
         // update offset index
         // compressed_size = header_size + compressed_data_size
-        self.offset_index_builder
-            .append_offset_and_size(page_spec.offset as i64, page_spec.compressed_size as i32);
+        if let Some(builder) = self.offset_index_builder.as_mut() {
+            builder
+                .append_offset_and_size(page_spec.offset as i64, page_spec.compressed_size as i32)
+        }
         self.update_metrics_for_page(page_spec);
         Ok(())
     }
@@ -1406,13 +1461,50 @@ fn compare_greater_byte_array_decimals(a: &[u8], b: &[u8]) -> bool {
     (a[1..]) > (b[1..])
 }
 
-/// Truncate a UTF8 slice to the longest prefix that is still a valid UTF8 string,
-/// while being less than `length` bytes and non-empty
+/// Truncate a UTF-8 slice to the longest prefix that is still a valid UTF-8 string,
+/// while being less than `length` bytes and non-empty. Returns `None` if truncation
+/// is not possible within those constraints.
+///
+/// The caller guarantees that data.len() > length.
 fn truncate_utf8(data: &str, length: usize) -> Option<Vec<u8>> {
     let split = (1..=length).rfind(|x| data.is_char_boundary(*x))?;
     Some(data.as_bytes()[..split].to_vec())
 }
 
+/// Truncate a UTF-8 slice and increment it's final character. The returned value is the
+/// longest such slice that is still a valid UTF-8 string while being less than `length`
+/// bytes and non-empty. Returns `None` if no such transformation is possible.
+///
+/// The caller guarantees that data.len() > length.
+fn truncate_and_increment_utf8(data: &str, length: usize) -> Option<Vec<u8>> {
+    // UTF-8 is max 4 bytes, so start search 3 back from desired length
+    let lower_bound = length.saturating_sub(3);
+    let split = (lower_bound..=length).rfind(|x| data.is_char_boundary(*x))?;
+    increment_utf8(data.get(..split)?)
+}
+
+/// Increment the final character in a UTF-8 string in such a way that the returned result
+/// is still a valid UTF-8 string. The returned string may be shorter than the input if the
+/// last character(s) cannot be incremented (due to overflow or producing invalid code points).
+/// Returns `None` if the string cannot be incremented.
+///
+/// Note that this implementation will not promote an N-byte code point to (N+1) bytes.
+fn increment_utf8(data: &str) -> Option<Vec<u8>> {
+    for (idx, original_char) in data.char_indices().rev() {
+        let original_len = original_char.len_utf8();
+        if let Some(next_char) = char::from_u32(original_char as u32 + 1) {
+            // do not allow increasing byte width of incremented char
+            if next_char.len_utf8() == original_len {
+                let mut result = data.as_bytes()[..idx + original_len].to_vec();
+                next_char.encode_utf8(&mut result[idx..]);
+                return Some(result);
+            }
+        }
+    }
+
+    None
+}
+
 /// Try and increment the bytes from right to left.
 ///
 /// Returns `None` if all bytes are set to `u8::MAX`.
@@ -1429,29 +1521,15 @@ fn increment(mut data: Vec<u8>) -> Option<Vec<u8>> {
     None
 }
 
-/// Try and increment the the string's bytes from right to left, returning when the result
-/// is a valid UTF8 string. Returns `None` when it can't increment any byte.
-fn increment_utf8(mut data: Vec<u8>) -> Option<Vec<u8>> {
-    for idx in (0..data.len()).rev() {
-        let original = data[idx];
-        let (byte, overflow) = original.overflowing_add(1);
-        if !overflow {
-            data[idx] = byte;
-            if str::from_utf8(&data).is_ok() {
-                return Some(data);
-            }
-            data[idx] = original;
-        }
-    }
-
-    None
-}
-
 #[cfg(test)]
 mod tests {
-    use crate::file::properties::DEFAULT_COLUMN_INDEX_TRUNCATE_LENGTH;
+    use crate::{
+        file::{properties::DEFAULT_COLUMN_INDEX_TRUNCATE_LENGTH, writer::SerializedFileWriter},
+        schema::parser::parse_message_type,
+    };
+    use core::str;
     use rand::distributions::uniform::SampleUniform;
-    use std::sync::Arc;
+    use std::{fs::File, sync::Arc};
 
     use crate::column::{
         page::PageReader,
@@ -3128,39 +3206,69 @@ mod tests {
 
     #[test]
     fn test_increment_utf8() {
+        let test_inc = |o: &str, expected: &str| {
+            if let Ok(v) = String::from_utf8(increment_utf8(o).unwrap()) {
+                // Got the expected result...
+                assert_eq!(v, expected);
+                // and it's greater than the original string
+                assert!(*v > *o);
+                // Also show that BinaryArray level comparison works here
+                let mut greater = ByteArray::new();
+                greater.set_data(Bytes::from(v));
+                let mut original = ByteArray::new();
+                original.set_data(Bytes::from(o.as_bytes().to_vec()));
+                assert!(greater > original);
+            } else {
+                panic!("Expected incremented UTF8 string to also be valid.");
+            }
+        };
+
         // Basic ASCII case
-        let v = increment_utf8("hello".as_bytes().to_vec()).unwrap();
-        assert_eq!(&v, "hellp".as_bytes());
+        test_inc("hello", "hellp");
 
-        // Also show that BinaryArray level comparison works here
-        let mut greater = ByteArray::new();
-        greater.set_data(Bytes::from(v));
-        let mut original = ByteArray::new();
-        original.set_data(Bytes::from("hello".as_bytes().to_vec()));
-        assert!(greater > original);
+        // 1-byte ending in max 1-byte
+        test_inc("a\u{7f}", "b");
+
+        // 1-byte max should not truncate as it would need 2-byte code points
+        assert!(increment_utf8("\u{7f}\u{7f}").is_none());
 
         // UTF8 string
-        let s = "❤️🧡💛💚💙💜";
-        let v = increment_utf8(s.as_bytes().to_vec()).unwrap();
+        test_inc("❤️🧡💛💚💙💜", "❤️🧡💛💚💙💝");
 
-        if let Ok(new) = String::from_utf8(v) {
-            assert_ne!(&new, s);
-            assert_eq!(new, "❤️🧡💛💚💙💝");
-            assert!(new.as_bytes().last().unwrap() > s.as_bytes().last().unwrap());
-        } else {
-            panic!("Expected incremented UTF8 string to also be valid.")
-        }
+        // 2-byte without overflow
+        test_inc("éééé", "éééê");
 
-        // Max UTF8 character - should be a No-Op
-        let s = char::MAX.to_string();
-        assert_eq!(s.len(), 4);
-        let v = increment_utf8(s.as_bytes().to_vec());
-        assert!(v.is_none());
+        // 2-byte that overflows lowest byte
+        test_inc("\u{ff}\u{ff}", "\u{ff}\u{100}");
+
+        // 2-byte ending in max 2-byte
+        test_inc("a\u{7ff}", "b");
+
+        // Max 2-byte should not truncate as it would need 3-byte code points
+        assert!(increment_utf8("\u{7ff}\u{7ff}").is_none());
+
+        // 3-byte without overflow [U+800, U+800] -> [U+800, U+801] (note that these
+        // characters should render right to left).
+        test_inc("ࠀࠀ", "ࠀࠁ");
+
+        // 3-byte ending in max 3-byte
+        test_inc("a\u{ffff}", "b");
+
+        // Max 3-byte should not truncate as it would need 4-byte code points
+        assert!(increment_utf8("\u{ffff}\u{ffff}").is_none());
+
+        // 4-byte without overflow
+        test_inc("𐀀𐀀", "𐀀𐀁");
+
+        // 4-byte ending in max unicode
+        test_inc("a\u{10ffff}", "b");
 
-        // Handle multi-byte UTF8 characters
-        let s = "a\u{10ffff}";
-        let v = increment_utf8(s.as_bytes().to_vec());
-        assert_eq!(&v.unwrap(), "b\u{10ffff}".as_bytes());
+        // Max 4-byte should not truncate
+        assert!(increment_utf8("\u{10ffff}\u{10ffff}").is_none());
+
+        // Skip over surrogate pair range (0xD800..=0xDFFF)
+        //test_inc("a\u{D7FF}", "a\u{e000}");
+        test_inc("a\u{D7FF}", "b");
     }
 
     #[test]
@@ -3170,7 +3278,6 @@ mod tests {
         let r = truncate_utf8(data, data.as_bytes().len()).unwrap();
         assert_eq!(r.len(), data.as_bytes().len());
         assert_eq!(&r, data.as_bytes());
-        println!("len is {}", data.len());
 
         // We slice it away from the UTF8 boundary
         let r = truncate_utf8(data, 13).unwrap();
@@ -3180,6 +3287,90 @@ mod tests {
         // One multi-byte code point, and a length shorter than it, so we can't slice it
         let r = truncate_utf8("\u{0836}", 1);
         assert!(r.is_none());
+
+        // Test truncate and increment for max bounds on UTF-8 statistics
+        // 7-bit (i.e. ASCII)
+        let r = truncate_and_increment_utf8("yyyyyyyyy", 8).unwrap();
+        assert_eq!(&r, "yyyyyyyz".as_bytes());
+
+        // 2-byte without overflow
+        let r = truncate_and_increment_utf8("ééééé", 7).unwrap();
+        assert_eq!(&r, "ééê".as_bytes());
+
+        // 2-byte that overflows lowest byte
+        let r = truncate_and_increment_utf8("\u{ff}\u{ff}\u{ff}\u{ff}\u{ff}", 8).unwrap();
+        assert_eq!(&r, "\u{ff}\u{ff}\u{ff}\u{100}".as_bytes());
+
+        // max 2-byte should not truncate as it would need 3-byte code points
+        let r = truncate_and_increment_utf8("߿߿߿߿߿", 8);
+        assert!(r.is_none());
+
+        // 3-byte without overflow [U+800, U+800, U+800] -> [U+800, U+801] (note that these
+        // characters should render right to left).
+        let r = truncate_and_increment_utf8("ࠀࠀࠀࠀ", 8).unwrap();
+        assert_eq!(&r, "ࠀࠁ".as_bytes());
+
+        // max 3-byte should not truncate as it would need 4-byte code points
+        let r = truncate_and_increment_utf8("\u{ffff}\u{ffff}\u{ffff}", 8);
+        assert!(r.is_none());
+
+        // 4-byte without overflow
+        let r = truncate_and_increment_utf8("𐀀𐀀𐀀𐀀", 9).unwrap();
+        assert_eq!(&r, "𐀀𐀁".as_bytes());
+
+        // max 4-byte should not truncate
+        let r = truncate_and_increment_utf8("\u{10ffff}\u{10ffff}", 8);
+        assert!(r.is_none());
+    }
+
+    #[test]
+    // Check fallback truncation of statistics that should be UTF-8, but aren't
+    // (see https://github.com/apache/arrow-rs/pull/6870).
+    fn test_byte_array_truncate_invalid_utf8_statistics() {
+        let message_type = "
+            message test_schema {
+                OPTIONAL BYTE_ARRAY a (UTF8);
+            }
+        ";
+        let schema = Arc::new(parse_message_type(message_type).unwrap());
+
+        // Create Vec<ByteArray> containing non-UTF8 bytes
+        let data = vec![ByteArray::from(vec![128u8; 32]); 7];
+        let def_levels = [1, 1, 1, 1, 0, 1, 0, 1, 0, 1];
+        let file: File = tempfile::tempfile().unwrap();
+        let props = Arc::new(
+            WriterProperties::builder()
+                .set_statistics_enabled(EnabledStatistics::Chunk)
+                .set_statistics_truncate_length(Some(8))
+                .build(),
+        );
+
+        let mut writer = SerializedFileWriter::new(&file, schema, props).unwrap();
+        let mut row_group_writer = writer.next_row_group().unwrap();
+
+        let mut col_writer = row_group_writer.next_column().unwrap().unwrap();
+        col_writer
+            .typed::<ByteArrayType>()
+            .write_batch(&data, Some(&def_levels), None)
+            .unwrap();
+        col_writer.close().unwrap();
+        row_group_writer.close().unwrap();
+        let file_metadata = writer.close().unwrap();
+        assert!(file_metadata.row_groups[0].columns[0].meta_data.is_some());
+        let stats = file_metadata.row_groups[0].columns[0]
+            .meta_data
+            .as_ref()
+            .unwrap()
+            .statistics
+            .as_ref()
+            .unwrap();
+        assert!(!stats.is_max_value_exact.unwrap());
+        // Truncation of invalid UTF-8 should fall back to binary truncation, so last byte should
+        // be incremented by 1.
+        assert_eq!(
+            stats.max_value,
+            Some([128, 128, 128, 128, 128, 128, 128, 129].to_vec())
+        );
     }
 
     #[test]
@@ -3215,6 +3406,52 @@ mod tests {
         assert!(column_close_result.column_index.is_none());
     }
 
+    #[test]
+    fn test_no_offset_index_when_disabled() {
+        // Test that offset indexes can be disabled
+        let descr = Arc::new(get_test_column_descr::<Int32Type>(1, 0));
+        let props = Arc::new(
+            WriterProperties::builder()
+                .set_statistics_enabled(EnabledStatistics::None)
+                .set_offset_index_disabled(true)
+                .build(),
+        );
+        let column_writer = get_column_writer(descr, props, get_test_page_writer());
+        let mut writer = get_typed_column_writer::<Int32Type>(column_writer);
+
+        let data = Vec::new();
+        let def_levels = vec![0; 10];
+        writer.write_batch(&data, Some(&def_levels), None).unwrap();
+        writer.flush_data_pages().unwrap();
+
+        let column_close_result = writer.close().unwrap();
+        assert!(column_close_result.offset_index.is_none());
+        assert!(column_close_result.column_index.is_none());
+    }
+
+    #[test]
+    fn test_offset_index_overridden() {
+        // Test that offset indexes are not disabled when gathering page statistics
+        let descr = Arc::new(get_test_column_descr::<Int32Type>(1, 0));
+        let props = Arc::new(
+            WriterProperties::builder()
+                .set_statistics_enabled(EnabledStatistics::Page)
+                .set_offset_index_disabled(true)
+                .build(),
+        );
+        let column_writer = get_column_writer(descr, props, get_test_page_writer());
+        let mut writer = get_typed_column_writer::<Int32Type>(column_writer);
+
+        let data = Vec::new();
+        let def_levels = vec![0; 10];
+        writer.write_batch(&data, Some(&def_levels), None).unwrap();
+        writer.flush_data_pages().unwrap();
+
+        let column_close_result = writer.close().unwrap();
+        assert!(column_close_result.offset_index.is_some());
+        assert!(column_close_result.column_index.is_some());
+    }
+
     #[test]
     fn test_boundary_order() -> Result<()> {
         let descr = Arc::new(get_test_column_descr::<Int32Type>(1, 0));
@@ -3368,6 +3605,26 @@ mod tests {
         assert!(stats.max_bytes_opt().is_none());
     }
 
+    #[test]
+    #[cfg(feature = "arrow")]
+    fn test_column_writer_get_estimated_total_bytes() {
+        let page_writer = get_test_page_writer();
+        let props = Default::default();
+        let mut writer = get_test_column_writer::<Int32Type>(page_writer, 0, 0, props);
+        assert_eq!(writer.get_estimated_total_bytes(), 0);
+
+        writer.write_batch(&[1, 2, 3, 4], None, None).unwrap();
+        writer.add_data_page().unwrap();
+        let size_with_one_page = writer.get_estimated_total_bytes();
+        assert_eq!(size_with_one_page, 20);
+
+        writer.write_batch(&[5, 6, 7, 8], None, None).unwrap();
+        writer.add_data_page().unwrap();
+        let size_with_two_pages = writer.get_estimated_total_bytes();
+        // different pages have different compressed lengths
+        assert_eq!(size_with_two_pages, 20 + 21);
+    }
+
     fn write_multiple_pages<T: DataType>(
         column_descr: &Arc<ColumnDescriptor>,
         pages: &[&[Option<T::T>]],
diff --git a/parquet/src/encodings/rle.rs b/parquet/src/encodings/rle.rs
index 0c708c126503..d089ba7836e1 100644
--- a/parquet/src/encodings/rle.rs
+++ b/parquet/src/encodings/rle.rs
@@ -369,17 +369,17 @@ impl RleDecoder {
     }
 
     #[inline(never)]
-    pub fn get_batch<T: FromBytes>(&mut self, buffer: &mut [T]) -> Result<usize> {
+    pub fn get_batch<T: FromBytes + Clone>(&mut self, buffer: &mut [T]) -> Result<usize> {
         assert!(size_of::<T>() <= 8);
 
         let mut values_read = 0;
         while values_read < buffer.len() {
             if self.rle_left > 0 {
                 let num_values = cmp::min(buffer.len() - values_read, self.rle_left as usize);
+                let repeated_value =
+                    T::try_from_le_slice(&self.current_value.as_mut().unwrap().to_ne_bytes())?;
                 for i in 0..num_values {
-                    let repeated_value =
-                        T::try_from_le_slice(&self.current_value.as_mut().unwrap().to_ne_bytes())?;
-                    buffer[values_read + i] = repeated_value;
+                    buffer[values_read + i] = repeated_value.clone();
                 }
                 self.rle_left -= num_values as u32;
                 values_read += num_values;
diff --git a/parquet/src/errors.rs b/parquet/src/errors.rs
index f7fb1ead0ccc..d749287bba62 100644
--- a/parquet/src/errors.rs
+++ b/parquet/src/errors.rs
@@ -28,6 +28,7 @@ use arrow_schema::ArrowError;
 // Note: we don't implement PartialEq as the semantics for the
 // external variant are not well defined (#4469)
 #[derive(Debug)]
+#[non_exhaustive]
 pub enum ParquetError {
     /// General Parquet error.
     /// Returned when code violates normal workflow of working with Parquet files.
@@ -48,6 +49,9 @@ pub enum ParquetError {
     IndexOutOfBound(usize, usize),
     /// An external error variant
     External(Box<dyn Error + Send + Sync>),
+    /// Returned when a function needs more data to complete properly. The `usize` field indicates
+    /// the total number of bytes required, not the number of additional bytes.
+    NeedMoreData(usize),
 }
 
 impl std::fmt::Display for ParquetError {
@@ -64,6 +68,7 @@ impl std::fmt::Display for ParquetError {
                 write!(fmt, "Index {index} out of bound: {bound}")
             }
             ParquetError::External(e) => write!(fmt, "External: {e}"),
+            ParquetError::NeedMoreData(needed) => write!(fmt, "NeedMoreData: {needed}"),
         }
     }
 }
diff --git a/parquet/src/file/metadata/mod.rs b/parquet/src/file/metadata/mod.rs
index 32b985710023..252cb99f3f36 100644
--- a/parquet/src/file/metadata/mod.rs
+++ b/parquet/src/file/metadata/mod.rs
@@ -190,7 +190,7 @@ impl ParquetMetaData {
 
     /// Creates Parquet metadata from file metadata, a list of row
     /// group metadata, and the column index structures.
-    #[deprecated(note = "Use ParquetMetaDataBuilder")]
+    #[deprecated(since = "53.1.0", note = "Use ParquetMetaDataBuilder")]
     pub fn new_with_page_index(
         file_metadata: FileMetaData,
         row_groups: Vec<RowGroupMetaData>,
@@ -230,12 +230,6 @@ impl ParquetMetaData {
         &self.row_groups
     }
 
-    /// Returns page indexes in this file.
-    #[deprecated(note = "Use Self::column_index")]
-    pub fn page_indexes(&self) -> Option<&ParquetColumnIndex> {
-        self.column_index.as_ref()
-    }
-
     /// Returns the column index for this file if loaded
     ///
     /// Returns `None` if the parquet file does not have a `ColumnIndex` or
@@ -246,12 +240,6 @@ impl ParquetMetaData {
         self.column_index.as_ref()
     }
 
-    /// Returns the offset index for this file if loaded
-    #[deprecated(note = "Use Self::offset_index")]
-    pub fn offset_indexes(&self) -> Option<&ParquetOffsetIndex> {
-        self.offset_index.as_ref()
-    }
-
     /// Returns offset indexes in this file, if loaded
     ///
     /// Returns `None` if the parquet file does not have a `OffsetIndex` or
diff --git a/parquet/src/file/metadata/reader.rs b/parquet/src/file/metadata/reader.rs
index 1a9957f00f1e..c6715a33b5ae 100644
--- a/parquet/src/file/metadata/reader.rs
+++ b/parquet/src/file/metadata/reader.rs
@@ -178,8 +178,10 @@ impl ParquetMetaDataReader {
     ///
     /// # Errors
     ///
-    /// This function will return [`ParquetError::IndexOutOfBound`] in the event `reader` does not
-    /// provide enough data to fully parse the metadata (see example below).
+    /// This function will return [`ParquetError::NeedMoreData`] in the event `reader` does not
+    /// provide enough data to fully parse the metadata (see example below). The returned error
+    /// will be populated with a `usize` field indicating the number of bytes required from the
+    /// tail of the file to completely parse the requested metadata.
     ///
     /// Other errors returned include [`ParquetError::General`] and [`ParquetError::EOF`].
     ///
@@ -192,11 +194,13 @@ impl ParquetMetaDataReader {
     /// # fn open_parquet_file(path: &str) -> std::fs::File { unimplemented!(); }
     /// let file = open_parquet_file("some_path.parquet");
     /// let len = file.len() as usize;
-    /// let bytes = get_bytes(&file, 1000..len);
+    /// // Speculatively read 1 kilobyte from the end of the file
+    /// let bytes = get_bytes(&file, len - 1024..len);
     /// let mut reader = ParquetMetaDataReader::new().with_page_indexes(true);
     /// match reader.try_parse_sized(&bytes, len) {
     ///     Ok(_) => (),
-    ///     Err(ParquetError::IndexOutOfBound(needed, _)) => {
+    ///     Err(ParquetError::NeedMoreData(needed)) => {
+    ///         // Read the needed number of bytes from the end of the file
     ///         let bytes = get_bytes(&file, len - needed..len);
     ///         reader.try_parse_sized(&bytes, len).unwrap();
     ///     }
@@ -204,15 +208,44 @@ impl ParquetMetaDataReader {
     /// }
     /// let metadata = reader.finish().unwrap();
     /// ```
+    ///
+    /// Note that it is possible for the file metadata to be completely read, but there are
+    /// insufficient bytes available to read the page indexes. [`Self::has_metadata()`] can be used
+    /// to test for this. In the event the file metadata is present, re-parsing of the file
+    /// metadata can be skipped by using [`Self::read_page_indexes_sized()`], as shown below.
+    /// ```no_run
+    /// # use parquet::file::metadata::ParquetMetaDataReader;
+    /// # use parquet::errors::ParquetError;
+    /// # use crate::parquet::file::reader::Length;
+    /// # fn get_bytes(file: &std::fs::File, range: std::ops::Range<usize>) -> bytes::Bytes { unimplemented!(); }
+    /// # fn open_parquet_file(path: &str) -> std::fs::File { unimplemented!(); }
+    /// let file = open_parquet_file("some_path.parquet");
+    /// let len = file.len() as usize;
+    /// // Speculatively read 1 kilobyte from the end of the file
+    /// let mut bytes = get_bytes(&file, len - 1024..len);
+    /// let mut reader = ParquetMetaDataReader::new().with_page_indexes(true);
+    /// // Loop until `bytes` is large enough
+    /// loop {
+    ///     match reader.try_parse_sized(&bytes, len) {
+    ///         Ok(_) => break,
+    ///         Err(ParquetError::NeedMoreData(needed)) => {
+    ///             // Read the needed number of bytes from the end of the file
+    ///             bytes = get_bytes(&file, len - needed..len);
+    ///             // If file metadata was read only read page indexes, otherwise continue loop
+    ///             if reader.has_metadata() {
+    ///                 reader.read_page_indexes_sized(&bytes, len);
+    ///                 break;
+    ///             }
+    ///         }
+    ///         _ => panic!("unexpected error")
+    ///     }
+    /// }
+    /// let metadata = reader.finish().unwrap();
+    /// ```
     pub fn try_parse_sized<R: ChunkReader>(&mut self, reader: &R, file_size: usize) -> Result<()> {
         self.metadata = match self.parse_metadata(reader) {
             Ok(metadata) => Some(metadata),
-            // FIXME: throughout this module ParquetError::IndexOutOfBound is used to indicate the
-            // need for more data. This is not it's intended use. The plan is to add a NeedMoreData
-            // value to the enum, but this would be a breaking change. This will be done as
-            // 54.0.0 draws nearer.
-            // https://github.com/apache/arrow-rs/issues/6447
-            Err(ParquetError::IndexOutOfBound(needed, _)) => {
+            Err(ParquetError::NeedMoreData(needed)) => {
                 // If reader is the same length as `file_size` then presumably there is no more to
                 // read, so return an EOF error.
                 if file_size == reader.len() as usize || needed > file_size {
@@ -223,7 +256,7 @@ impl ParquetMetaDataReader {
                     ));
                 } else {
                     // Ask for a larger buffer
-                    return Err(ParquetError::IndexOutOfBound(needed, file_size));
+                    return Err(ParquetError::NeedMoreData(needed));
                 }
             }
             Err(e) => return Err(e),
@@ -246,7 +279,8 @@ impl ParquetMetaDataReader {
     /// Read the page index structures when a [`ParquetMetaData`] has already been obtained.
     /// This variant is used when `reader` cannot access the entire Parquet file (e.g. it is
     /// a [`Bytes`] struct containing the tail of the file).
-    /// See [`Self::new_with_metadata()`] and [`Self::has_metadata()`].
+    /// See [`Self::new_with_metadata()`] and [`Self::has_metadata()`]. Like
+    /// [`Self::try_parse_sized()`] this function may return [`ParquetError::NeedMoreData`].
     pub fn read_page_indexes_sized<R: ChunkReader>(
         &mut self,
         reader: &R,
@@ -269,7 +303,6 @@ impl ParquetMetaDataReader {
 
         // Get bounds needed for page indexes (if any are present in the file).
         let Some(range) = self.range_for_page_index() else {
-            self.empty_page_indexes();
             return Ok(());
         };
 
@@ -285,10 +318,7 @@ impl ParquetMetaDataReader {
                 ));
             } else {
                 // Ask for a larger buffer
-                return Err(ParquetError::IndexOutOfBound(
-                    file_size - range.start,
-                    file_size,
-                ));
+                return Err(ParquetError::NeedMoreData(file_size - range.start));
             }
         }
 
@@ -446,20 +476,6 @@ impl ParquetMetaDataReader {
         Ok(())
     }
 
-    /// Set the column_index and offset_indexes to empty `Vec` for backwards compatibility
-    ///
-    /// See <https://github.com/apache/arrow-rs/pull/6451>  for details
-    fn empty_page_indexes(&mut self) {
-        let metadata = self.metadata.as_mut().unwrap();
-        let num_row_groups = metadata.num_row_groups();
-        if self.column_index {
-            metadata.set_column_index(Some(vec![vec![]; num_row_groups]));
-        }
-        if self.offset_index {
-            metadata.set_offset_index(Some(vec![vec![]; num_row_groups]));
-        }
-    }
-
     fn range_for_page_index(&self) -> Option<Range<usize>> {
         // sanity check
         self.metadata.as_ref()?;
@@ -484,10 +500,7 @@ impl ParquetMetaDataReader {
         // check file is large enough to hold footer
         let file_size = chunk_reader.len();
         if file_size < (FOOTER_SIZE as u64) {
-            return Err(ParquetError::IndexOutOfBound(
-                FOOTER_SIZE,
-                file_size as usize,
-            ));
+            return Err(ParquetError::NeedMoreData(FOOTER_SIZE));
         }
 
         let mut footer = [0_u8; 8];
@@ -500,10 +513,7 @@ impl ParquetMetaDataReader {
         self.metadata_size = Some(footer_metadata_len);
 
         if footer_metadata_len > file_size as usize {
-            return Err(ParquetError::IndexOutOfBound(
-                footer_metadata_len,
-                file_size as usize,
-            ));
+            return Err(ParquetError::NeedMoreData(footer_metadata_len));
         }
 
         let start = file_size - footer_metadata_len as u64;
@@ -681,7 +691,7 @@ mod tests {
         let err = ParquetMetaDataReader::new()
             .parse_metadata(&test_file)
             .unwrap_err();
-        assert!(matches!(err, ParquetError::IndexOutOfBound(8, _)));
+        assert!(matches!(err, ParquetError::NeedMoreData(8)));
     }
 
     #[test]
@@ -700,7 +710,7 @@ mod tests {
         let err = ParquetMetaDataReader::new()
             .parse_metadata(&test_file)
             .unwrap_err();
-        assert!(matches!(err, ParquetError::IndexOutOfBound(263, _)));
+        assert!(matches!(err, ParquetError::NeedMoreData(263)));
     }
 
     #[test]
@@ -794,7 +804,7 @@ mod tests {
         // should fail
         match reader.try_parse_sized(&bytes, len).unwrap_err() {
             // expected error, try again with provided bounds
-            ParquetError::IndexOutOfBound(needed, _) => {
+            ParquetError::NeedMoreData(needed) => {
                 let bytes = bytes_for_range(len - needed..len);
                 reader.try_parse_sized(&bytes, len).unwrap();
                 let metadata = reader.finish().unwrap();
@@ -804,6 +814,26 @@ mod tests {
             _ => panic!("unexpected error"),
         };
 
+        // not enough for file metadata, but keep trying until page indexes are read
+        let mut reader = ParquetMetaDataReader::new().with_page_indexes(true);
+        let mut bytes = bytes_for_range(452505..len);
+        loop {
+            match reader.try_parse_sized(&bytes, len) {
+                Ok(_) => break,
+                Err(ParquetError::NeedMoreData(needed)) => {
+                    bytes = bytes_for_range(len - needed..len);
+                    if reader.has_metadata() {
+                        reader.read_page_indexes_sized(&bytes, len).unwrap();
+                        break;
+                    }
+                }
+                _ => panic!("unexpected error"),
+            }
+        }
+        let metadata = reader.finish().unwrap();
+        assert!(metadata.column_index.is_some());
+        assert!(metadata.offset_index.is_some());
+
         // not enough for page index but lie about file size
         let bytes = bytes_for_range(323584..len);
         let reader_result = reader.try_parse_sized(&bytes, len - 323584).unwrap_err();
@@ -818,7 +848,7 @@ mod tests {
         // should fail
         match reader.try_parse_sized(&bytes, len).unwrap_err() {
             // expected error, try again with provided bounds
-            ParquetError::IndexOutOfBound(needed, _) => {
+            ParquetError::NeedMoreData(needed) => {
                 let bytes = bytes_for_range(len - needed..len);
                 reader.try_parse_sized(&bytes, len).unwrap();
                 reader.finish().unwrap();
diff --git a/parquet/src/file/page_index/index_reader.rs b/parquet/src/file/page_index/index_reader.rs
index 395e9afe122c..fd3639ac3069 100644
--- a/parquet/src/file/page_index/index_reader.rs
+++ b/parquet/src/file/page_index/index_reader.rs
@@ -43,8 +43,7 @@ pub(crate) fn acc_range(a: Option<Range<usize>>, b: Option<Range<usize>>) -> Opt
 ///
 /// Returns a vector of `index[column_number]`.
 ///
-/// Returns an empty vector if this row group does not contain a
-/// [`ColumnIndex`].
+/// Returns `None` if this row group does not contain a [`ColumnIndex`].
 ///
 /// See [Page Index Documentation] for more details.
 ///
@@ -52,26 +51,29 @@ pub(crate) fn acc_range(a: Option<Range<usize>>, b: Option<Range<usize>>) -> Opt
 pub fn read_columns_indexes<R: ChunkReader>(
     reader: &R,
     chunks: &[ColumnChunkMetaData],
-) -> Result<Vec<Index>, ParquetError> {
+) -> Result<Option<Vec<Index>>, ParquetError> {
     let fetch = chunks
         .iter()
         .fold(None, |range, c| acc_range(range, c.column_index_range()));
 
     let fetch = match fetch {
         Some(r) => r,
-        None => return Ok(vec![Index::NONE; chunks.len()]),
+        None => return Ok(None),
     };
 
     let bytes = reader.get_bytes(fetch.start as _, fetch.end - fetch.start)?;
     let get = |r: Range<usize>| &bytes[(r.start - fetch.start)..(r.end - fetch.start)];
 
-    chunks
-        .iter()
-        .map(|c| match c.column_index_range() {
-            Some(r) => decode_column_index(get(r), c.column_type()),
-            None => Ok(Index::NONE),
-        })
-        .collect()
+    Some(
+        chunks
+            .iter()
+            .map(|c| match c.column_index_range() {
+                Some(r) => decode_column_index(get(r), c.column_type()),
+                None => Ok(Index::NONE),
+            })
+            .collect(),
+    )
+    .transpose()
 }
 
 /// Reads [`OffsetIndex`],  per-page [`PageLocation`] for all columns of a row
@@ -116,8 +118,7 @@ pub fn read_pages_locations<R: ChunkReader>(
 ///
 /// Returns a vector of `offset_index[column_number]`.
 ///
-/// Returns an empty vector if this row group does not contain an
-/// [`OffsetIndex`].
+/// Returns `None` if this row group does not contain an [`OffsetIndex`].
 ///
 /// See [Page Index Documentation] for more details.
 ///
@@ -125,26 +126,29 @@ pub fn read_pages_locations<R: ChunkReader>(
 pub fn read_offset_indexes<R: ChunkReader>(
     reader: &R,
     chunks: &[ColumnChunkMetaData],
-) -> Result<Vec<OffsetIndexMetaData>, ParquetError> {
+) -> Result<Option<Vec<OffsetIndexMetaData>>, ParquetError> {
     let fetch = chunks
         .iter()
         .fold(None, |range, c| acc_range(range, c.offset_index_range()));
 
     let fetch = match fetch {
         Some(r) => r,
-        None => return Ok(vec![]),
+        None => return Ok(None),
     };
 
     let bytes = reader.get_bytes(fetch.start as _, fetch.end - fetch.start)?;
     let get = |r: Range<usize>| &bytes[(r.start - fetch.start)..(r.end - fetch.start)];
 
-    chunks
-        .iter()
-        .map(|c| match c.offset_index_range() {
-            Some(r) => decode_offset_index(get(r)),
-            None => Err(general_err!("missing offset index")),
-        })
-        .collect()
+    Some(
+        chunks
+            .iter()
+            .map(|c| match c.offset_index_range() {
+                Some(r) => decode_offset_index(get(r)),
+                None => Err(general_err!("missing offset index")),
+            })
+            .collect(),
+    )
+    .transpose()
 }
 
 pub(crate) fn decode_offset_index(data: &[u8]) -> Result<OffsetIndexMetaData, ParquetError> {
diff --git a/parquet/src/file/properties.rs b/parquet/src/file/properties.rs
index efcb63258f99..dc918f6b5634 100644
--- a/parquet/src/file/properties.rs
+++ b/parquet/src/file/properties.rs
@@ -16,14 +16,13 @@
 // under the License.
 
 //! Configuration via [`WriterProperties`] and [`ReaderProperties`]
-use std::str::FromStr;
-use std::{collections::HashMap, sync::Arc};
-
 use crate::basic::{Compression, Encoding};
 use crate::compression::{CodecOptions, CodecOptionsBuilder};
 use crate::file::metadata::KeyValue;
 use crate::format::SortingColumn;
 use crate::schema::types::ColumnPath;
+use std::str::FromStr;
+use std::{collections::HashMap, sync::Arc};
 
 /// Default value for [`WriterProperties::data_page_size_limit`]
 pub const DEFAULT_PAGE_SIZE: usize = 1024 * 1024;
@@ -42,6 +41,7 @@ pub const DEFAULT_DATA_PAGE_ROW_COUNT_LIMIT: usize = 20_000;
 /// Default value for [`WriterProperties::statistics_enabled`]
 pub const DEFAULT_STATISTICS_ENABLED: EnabledStatistics = EnabledStatistics::Page;
 /// Default value for [`WriterProperties::max_statistics_size`]
+#[deprecated(since = "54.0.0", note = "Unused; will be removed in 56.0.0")]
 pub const DEFAULT_MAX_STATISTICS_SIZE: usize = 4096;
 /// Default value for [`WriterProperties::max_row_group_size`]
 pub const DEFAULT_MAX_ROW_GROUP_SIZE: usize = 1024 * 1024;
@@ -57,6 +57,10 @@ pub const DEFAULT_BLOOM_FILTER_FPP: f64 = 0.05;
 pub const DEFAULT_BLOOM_FILTER_NDV: u64 = 1_000_000_u64;
 /// Default values for [`WriterProperties::statistics_truncate_length`]
 pub const DEFAULT_STATISTICS_TRUNCATE_LENGTH: Option<usize> = None;
+/// Default value for [`WriterProperties::offset_index_disabled`]
+pub const DEFAULT_OFFSET_INDEX_DISABLED: bool = false;
+/// Default values for [`WriterProperties::coerce_types`]
+pub const DEFAULT_COERCE_TYPES: bool = false;
 
 /// Parquet writer version.
 ///
@@ -157,12 +161,14 @@ pub struct WriterProperties {
     bloom_filter_position: BloomFilterPosition,
     writer_version: WriterVersion,
     created_by: String,
+    offset_index_disabled: bool,
     pub(crate) key_value_metadata: Option<Vec<KeyValue>>,
     default_column_properties: ColumnProperties,
     column_properties: HashMap<ColumnPath, ColumnProperties>,
     sorting_columns: Option<Vec<SortingColumn>>,
     column_index_truncate_length: Option<usize>,
     statistics_truncate_length: Option<usize>,
+    coerce_types: bool,
 }
 
 impl Default for WriterProperties {
@@ -185,14 +191,6 @@ impl WriterProperties {
         WriterPropertiesBuilder::with_defaults()
     }
 
-    /// Returns data page size limit.
-    ///
-    /// Note: this is a best effort limit based on the write batch size
-    #[deprecated(since = "41.0.0", note = "Use data_page_size_limit")]
-    pub fn data_pagesize_limit(&self) -> usize {
-        self.data_page_size_limit
-    }
-
     /// Returns data page size limit.
     ///
     /// Note: this is a best effort limit based on the write batch size
@@ -202,14 +200,6 @@ impl WriterProperties {
         self.data_page_size_limit
     }
 
-    /// Returns dictionary page size limit.
-    ///
-    /// Note: this is a best effort limit based on the write batch size
-    #[deprecated(since = "41.0.0", note = "Use dictionary_page_size_limit")]
-    pub fn dictionary_pagesize_limit(&self) -> usize {
-        self.dictionary_page_size_limit
-    }
-
     /// Returns dictionary page size limit.
     ///
     /// Note: this is a best effort limit based on the write batch size
@@ -257,6 +247,22 @@ impl WriterProperties {
         &self.created_by
     }
 
+    /// Returns `true` if offset index writing is disabled.
+    pub fn offset_index_disabled(&self) -> bool {
+        // If page statistics are to be collected, then do not disable the offset indexes.
+        let default_page_stats_enabled =
+            self.default_column_properties.statistics_enabled() == Some(EnabledStatistics::Page);
+        let column_page_stats_enabled = self
+            .column_properties
+            .iter()
+            .any(|path_props| path_props.1.statistics_enabled() == Some(EnabledStatistics::Page));
+        if default_page_stats_enabled || column_page_stats_enabled {
+            return false;
+        }
+
+        self.offset_index_disabled
+    }
+
     /// Returns `key_value_metadata` KeyValue pairs.
     pub fn key_value_metadata(&self) -> Option<&Vec<KeyValue>> {
         self.key_value_metadata.as_ref()
@@ -281,6 +287,11 @@ impl WriterProperties {
         self.statistics_truncate_length
     }
 
+    /// Returns `true` if type coercion is enabled.
+    pub fn coerce_types(&self) -> bool {
+        self.coerce_types
+    }
+
     /// Returns encoding for a data page, when dictionary encoding is enabled.
     /// This is not configurable.
     #[inline]
@@ -340,7 +351,9 @@ impl WriterProperties {
 
     /// Returns max size for statistics.
     /// Only applicable if statistics are enabled.
+    #[deprecated(since = "54.0.0", note = "Unused; will be removed in 56.0.0")]
     pub fn max_statistics_size(&self, col: &ColumnPath) -> usize {
+        #[allow(deprecated)]
         self.column_properties
             .get(col)
             .and_then(|c| c.max_statistics_size())
@@ -371,12 +384,14 @@ pub struct WriterPropertiesBuilder {
     bloom_filter_position: BloomFilterPosition,
     writer_version: WriterVersion,
     created_by: String,
+    offset_index_disabled: bool,
     key_value_metadata: Option<Vec<KeyValue>>,
     default_column_properties: ColumnProperties,
     column_properties: HashMap<ColumnPath, ColumnProperties>,
     sorting_columns: Option<Vec<SortingColumn>>,
     column_index_truncate_length: Option<usize>,
     statistics_truncate_length: Option<usize>,
+    coerce_types: bool,
 }
 
 impl WriterPropertiesBuilder {
@@ -391,12 +406,14 @@ impl WriterPropertiesBuilder {
             bloom_filter_position: DEFAULT_BLOOM_FILTER_POSITION,
             writer_version: DEFAULT_WRITER_VERSION,
             created_by: DEFAULT_CREATED_BY.to_string(),
+            offset_index_disabled: DEFAULT_OFFSET_INDEX_DISABLED,
             key_value_metadata: None,
             default_column_properties: Default::default(),
             column_properties: HashMap::new(),
             sorting_columns: None,
             column_index_truncate_length: DEFAULT_COLUMN_INDEX_TRUNCATE_LENGTH,
             statistics_truncate_length: DEFAULT_STATISTICS_TRUNCATE_LENGTH,
+            coerce_types: DEFAULT_COERCE_TYPES,
         }
     }
 
@@ -411,12 +428,14 @@ impl WriterPropertiesBuilder {
             bloom_filter_position: self.bloom_filter_position,
             writer_version: self.writer_version,
             created_by: self.created_by,
+            offset_index_disabled: self.offset_index_disabled,
             key_value_metadata: self.key_value_metadata,
             default_column_properties: self.default_column_properties,
             column_properties: self.column_properties,
             sorting_columns: self.sorting_columns,
             column_index_truncate_length: self.column_index_truncate_length,
             statistics_truncate_length: self.statistics_truncate_length,
+            coerce_types: self.coerce_types,
         }
     }
 
@@ -433,16 +452,6 @@ impl WriterPropertiesBuilder {
         self
     }
 
-    /// Sets best effort maximum size of a data page in bytes.
-    ///
-    /// Note: this is a best effort limit based on value of
-    /// [`set_write_batch_size`](Self::set_write_batch_size).
-    #[deprecated(since = "41.0.0", note = "Use set_data_page_size_limit")]
-    pub fn set_data_pagesize_limit(mut self, value: usize) -> Self {
-        self.data_page_size_limit = value;
-        self
-    }
-
     /// Sets best effort maximum size of a data page in bytes (defaults to `1024 * 1024`).
     ///
     /// The parquet writer will attempt to limit the sizes of each
@@ -471,16 +480,6 @@ impl WriterPropertiesBuilder {
         self
     }
 
-    /// Sets best effort maximum dictionary page size, in bytes.
-    ///
-    /// Note: this is a best effort limit based on value of
-    /// [`set_write_batch_size`](Self::set_write_batch_size).
-    #[deprecated(since = "41.0.0", note = "Use set_dictionary_page_size_limit")]
-    pub fn set_dictionary_pagesize_limit(mut self, value: usize) -> Self {
-        self.dictionary_page_size_limit = value;
-        self
-    }
-
     /// Sets best effort maximum dictionary page size, in bytes (defaults to `1024 * 1024`).
     ///
     /// The parquet writer will attempt to limit the size of each
@@ -532,6 +531,21 @@ impl WriterPropertiesBuilder {
         self
     }
 
+    /// Sets whether the writing of offset indexes is disabled (defaults to `false`).
+    ///
+    /// If statistics level is set to [`Page`] this setting will be overridden with `false`.
+    ///
+    /// Note: As the offset indexes are useful for accessing data by row number,
+    /// they are always written by default, regardless of whether other statistics
+    /// are enabled. Disabling this metadata may result in a degradation in read
+    /// performance, so use this option with care.
+    ///
+    /// [`Page`]: EnabledStatistics::Page
+    pub fn set_offset_index_disabled(mut self, value: bool) -> Self {
+        self.offset_index_disabled = value;
+        self
+    }
+
     /// Sets "key_value_metadata" property (defaults to `None`).
     pub fn set_key_value_metadata(mut self, value: Option<Vec<KeyValue>>) -> Self {
         self.key_value_metadata = value;
@@ -590,7 +604,9 @@ impl WriterPropertiesBuilder {
     /// Sets default max statistics size for all columns (defaults to `4096`).
     ///
     /// Applicable only if statistics are enabled.
+    #[deprecated(since = "54.0.0", note = "Unused; will be removed in 56.0.0")]
     pub fn set_max_statistics_size(mut self, value: usize) -> Self {
+        #[allow(deprecated)]
         self.default_column_properties
             .set_max_statistics_size(value);
         self
@@ -695,7 +711,9 @@ impl WriterPropertiesBuilder {
     /// Sets max size for statistics for a specific column.
     ///
     /// Takes precedence over [`Self::set_max_statistics_size`].
+    #[deprecated(since = "54.0.0", note = "Unused; will be removed in 56.0.0")]
     pub fn set_column_max_statistics_size(mut self, col: ColumnPath, value: usize) -> Self {
+        #[allow(deprecated)]
         self.get_mut_props(col).set_max_statistics_size(value);
         self
     }
@@ -767,6 +785,29 @@ impl WriterPropertiesBuilder {
         self.statistics_truncate_length = max_length;
         self
     }
+
+    /// Should the writer coerce types to parquet native types (defaults to `false`).
+    ///
+    /// Leaving this option the default `false` will ensure the exact same data
+    /// written to parquet using this library will be read.
+    ///
+    /// Setting this option to `true` will result in parquet files that can be
+    /// read by more readers, but potentially lose information in the process.
+    ///
+    /// * Types such as [`DataType::Date64`], which have no direct corresponding
+    ///   Parquet type, may be stored with lower precision.
+    ///
+    /// * The internal field names of `List` and `Map` types will be renamed if
+    ///   necessary to match what is required by the newest Parquet specification.
+    ///
+    /// See [`ArrowToParquetSchemaConverter::with_coerce_types`] for more details
+    ///
+    /// [`DataType::Date64`]: arrow_schema::DataType::Date64
+    /// [`ArrowToParquetSchemaConverter::with_coerce_types`]: crate::arrow::ArrowSchemaConverter::with_coerce_types
+    pub fn set_coerce_types(mut self, coerce_types: bool) -> Self {
+        self.coerce_types = coerce_types;
+        self
+    }
 }
 
 /// Controls the level of statistics to be computed by the writer and stored in
@@ -862,6 +903,7 @@ struct ColumnProperties {
     codec: Option<Compression>,
     dictionary_enabled: Option<bool>,
     statistics_enabled: Option<EnabledStatistics>,
+    #[deprecated(since = "54.0.0", note = "Unused; will be removed in 56.0.0")]
     max_statistics_size: Option<usize>,
     /// bloom filter related properties
     bloom_filter_properties: Option<BloomFilterProperties>,
@@ -894,12 +936,14 @@ impl ColumnProperties {
         self.dictionary_enabled = Some(enabled);
     }
 
-    /// Sets whether or not statistics are enabled for this column.
+    /// Sets the statistics level for this column.
     fn set_statistics_enabled(&mut self, enabled: EnabledStatistics) {
         self.statistics_enabled = Some(enabled);
     }
 
     /// Sets max size for statistics for this column.
+    #[deprecated(since = "54.0.0", note = "Unused; will be removed in 56.0.0")]
+    #[allow(deprecated)]
     fn set_max_statistics_size(&mut self, value: usize) {
         self.max_statistics_size = Some(value);
     }
@@ -957,14 +1001,16 @@ impl ColumnProperties {
         self.dictionary_enabled
     }
 
-    /// Returns `Some(true)` if statistics are enabled for this column, if disabled then
-    /// returns `Some(false)`. If result is `None`, then no setting has been provided.
+    /// Returns optional statistics level requested for this column. If result is `None`,
+    /// then no setting has been provided.
     fn statistics_enabled(&self) -> Option<EnabledStatistics> {
         self.statistics_enabled
     }
 
     /// Returns optional max size in bytes for statistics.
+    #[deprecated(since = "54.0.0", note = "Unused; will be removed in 56.0.0")]
     fn max_statistics_size(&self) -> Option<usize> {
+        #[allow(deprecated)]
         self.max_statistics_size
     }
 
@@ -1108,10 +1154,6 @@ mod tests {
             props.statistics_enabled(&ColumnPath::from("col")),
             DEFAULT_STATISTICS_ENABLED
         );
-        assert_eq!(
-            props.max_statistics_size(&ColumnPath::from("col")),
-            DEFAULT_MAX_STATISTICS_SIZE
-        );
         assert!(props
             .bloom_filter_properties(&ColumnPath::from("col"))
             .is_none());
@@ -1188,13 +1230,11 @@ mod tests {
             .set_compression(Compression::GZIP(Default::default()))
             .set_dictionary_enabled(false)
             .set_statistics_enabled(EnabledStatistics::None)
-            .set_max_statistics_size(50)
             // specific column settings
             .set_column_encoding(ColumnPath::from("col"), Encoding::RLE)
             .set_column_compression(ColumnPath::from("col"), Compression::SNAPPY)
             .set_column_dictionary_enabled(ColumnPath::from("col"), true)
             .set_column_statistics_enabled(ColumnPath::from("col"), EnabledStatistics::Chunk)
-            .set_column_max_statistics_size(ColumnPath::from("col"), 123)
             .set_column_bloom_filter_enabled(ColumnPath::from("col"), true)
             .set_column_bloom_filter_ndv(ColumnPath::from("col"), 100_u64)
             .set_column_bloom_filter_fpp(ColumnPath::from("col"), 0.1)
@@ -1226,7 +1266,6 @@ mod tests {
             props.statistics_enabled(&ColumnPath::from("a")),
             EnabledStatistics::None
         );
-        assert_eq!(props.max_statistics_size(&ColumnPath::from("a")), 50);
 
         assert_eq!(
             props.encoding(&ColumnPath::from("col")),
@@ -1241,7 +1280,6 @@ mod tests {
             props.statistics_enabled(&ColumnPath::from("col")),
             EnabledStatistics::Chunk
         );
-        assert_eq!(props.max_statistics_size(&ColumnPath::from("col")), 123);
         assert_eq!(
             props.bloom_filter_properties(&ColumnPath::from("col")),
             Some(&BloomFilterProperties { fpp: 0.1, ndv: 100 })
diff --git a/parquet/src/file/serialized_reader.rs b/parquet/src/file/serialized_reader.rs
index f3ac13797a03..a942481f7e4d 100644
--- a/parquet/src/file/serialized_reader.rs
+++ b/parquet/src/file/serialized_reader.rs
@@ -1262,8 +1262,8 @@ mod tests {
         let reader = SerializedFileReader::new_with_options(test_file, read_options)?;
         let metadata = reader.metadata();
         assert_eq!(metadata.num_row_groups(), 0);
-        assert_eq!(metadata.column_index().unwrap().len(), 0);
-        assert_eq!(metadata.offset_index().unwrap().len(), 0);
+        assert!(metadata.column_index().is_none());
+        assert!(metadata.offset_index().is_none());
 
         // false, true predicate
         let test_file = get_test_file("alltypes_tiny_pages.parquet");
@@ -1275,8 +1275,8 @@ mod tests {
         let reader = SerializedFileReader::new_with_options(test_file, read_options)?;
         let metadata = reader.metadata();
         assert_eq!(metadata.num_row_groups(), 0);
-        assert_eq!(metadata.column_index().unwrap().len(), 0);
-        assert_eq!(metadata.offset_index().unwrap().len(), 0);
+        assert!(metadata.column_index().is_none());
+        assert!(metadata.offset_index().is_none());
 
         // false, false predicate
         let test_file = get_test_file("alltypes_tiny_pages.parquet");
@@ -1288,8 +1288,8 @@ mod tests {
         let reader = SerializedFileReader::new_with_options(test_file, read_options)?;
         let metadata = reader.metadata();
         assert_eq!(metadata.num_row_groups(), 0);
-        assert_eq!(metadata.column_index().unwrap().len(), 0);
-        assert_eq!(metadata.offset_index().unwrap().len(), 0);
+        assert!(metadata.column_index().is_none());
+        assert!(metadata.offset_index().is_none());
         Ok(())
     }
 
@@ -1379,13 +1379,15 @@ mod tests {
         let columns = metadata.row_group(0).columns();
         let reversed: Vec<_> = columns.iter().cloned().rev().collect();
 
-        let a = read_columns_indexes(&test_file, columns).unwrap();
-        let mut b = read_columns_indexes(&test_file, &reversed).unwrap();
+        let a = read_columns_indexes(&test_file, columns).unwrap().unwrap();
+        let mut b = read_columns_indexes(&test_file, &reversed)
+            .unwrap()
+            .unwrap();
         b.reverse();
         assert_eq!(a, b);
 
-        let a = read_offset_indexes(&test_file, columns).unwrap();
-        let mut b = read_offset_indexes(&test_file, &reversed).unwrap();
+        let a = read_offset_indexes(&test_file, columns).unwrap().unwrap();
+        let mut b = read_offset_indexes(&test_file, &reversed).unwrap().unwrap();
         b.reverse();
         assert_eq!(a, b);
     }
diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs
index b84c57a60e19..6b7707f03cd9 100644
--- a/parquet/src/file/writer.rs
+++ b/parquet/src/file/writer.rs
@@ -1742,6 +1742,7 @@ mod tests {
         let props = WriterProperties::builder()
             .set_statistics_enabled(EnabledStatistics::None)
             .set_column_statistics_enabled("a".into(), EnabledStatistics::Page)
+            .set_offset_index_disabled(true) // this should be ignored because of the line above
             .build();
         let mut file = Vec::with_capacity(1024);
         let mut file_writer =
diff --git a/parquet/src/record/api.rs b/parquet/src/record/api.rs
index c95ce3f9223b..1b0d81c7d9ab 100644
--- a/parquet/src/record/api.rs
+++ b/parquet/src/record/api.rs
@@ -52,6 +52,11 @@ pub struct Row {
 
 #[allow(clippy::len_without_is_empty)]
 impl Row {
+    /// Constructs a `Row` from the list of `fields` and returns it.
+    pub fn new(fields: Vec<(String, Field)>) -> Row {
+        Row { fields }
+    }
+
     /// Get the number of fields in this row.
     pub fn len(&self) -> usize {
         self.fields.len()
@@ -283,12 +288,6 @@ impl RowAccessor for Row {
     row_complex_accessor!(get_map, MapInternal, Map);
 }
 
-/// Constructs a `Row` from the list of `fields` and returns it.
-#[inline]
-pub fn make_row(fields: Vec<(String, Field)>) -> Row {
-    Row { fields }
-}
-
 impl fmt::Display for Row {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         write!(f, "{{")?;
@@ -1386,7 +1385,7 @@ mod tests {
             ("z".to_string(), Field::Float(3.1)),
             ("a".to_string(), Field::Str("abc".to_string())),
         ];
-        let row = Field::Group(make_row(fields));
+        let row = Field::Group(Row::new(fields));
         assert_eq!(format!("{row}"), "{x: null, Y: 2, z: 3.1, a: \"abc\"}");
 
         let row = Field::ListInternal(make_list(vec![
@@ -1431,7 +1430,7 @@ mod tests {
         assert!(Field::Decimal(Decimal::from_i32(4, 8, 2)).is_primitive());
 
         // complex types
-        assert!(!Field::Group(make_row(vec![
+        assert!(!Field::Group(Row::new(vec![
             ("x".to_string(), Field::Null),
             ("Y".to_string(), Field::Int(2)),
             ("z".to_string(), Field::Float(3.1)),
@@ -1458,7 +1457,7 @@ mod tests {
     #[test]
     fn test_row_primitive_field_fmt() {
         // Primitives types
-        let row = make_row(vec![
+        let row = Row::new(vec![
             ("00".to_string(), Field::Null),
             ("01".to_string(), Field::Bool(false)),
             ("02".to_string(), Field::Byte(3)),
@@ -1513,10 +1512,10 @@ mod tests {
     #[test]
     fn test_row_complex_field_fmt() {
         // Complex types
-        let row = make_row(vec![
+        let row = Row::new(vec![
             (
                 "00".to_string(),
-                Field::Group(make_row(vec![
+                Field::Group(Row::new(vec![
                     ("x".to_string(), Field::Null),
                     ("Y".to_string(), Field::Int(2)),
                 ])),
@@ -1548,7 +1547,7 @@ mod tests {
     #[test]
     fn test_row_primitive_accessors() {
         // primitives
-        let row = make_row(vec![
+        let row = Row::new(vec![
             ("a".to_string(), Field::Null),
             ("b".to_string(), Field::Bool(false)),
             ("c".to_string(), Field::Byte(3)),
@@ -1590,7 +1589,7 @@ mod tests {
     #[test]
     fn test_row_primitive_invalid_accessors() {
         // primitives
-        let row = make_row(vec![
+        let row = Row::new(vec![
             ("a".to_string(), Field::Null),
             ("b".to_string(), Field::Bool(false)),
             ("c".to_string(), Field::Byte(3)),
@@ -1619,10 +1618,10 @@ mod tests {
 
     #[test]
     fn test_row_complex_accessors() {
-        let row = make_row(vec![
+        let row = Row::new(vec![
             (
                 "a".to_string(),
-                Field::Group(make_row(vec![
+                Field::Group(Row::new(vec![
                     ("x".to_string(), Field::Null),
                     ("Y".to_string(), Field::Int(2)),
                 ])),
@@ -1653,10 +1652,10 @@ mod tests {
 
     #[test]
     fn test_row_complex_invalid_accessors() {
-        let row = make_row(vec![
+        let row = Row::new(vec![
             (
                 "a".to_string(),
-                Field::Group(make_row(vec![
+                Field::Group(Row::new(vec![
                     ("x".to_string(), Field::Null),
                     ("Y".to_string(), Field::Int(2)),
                 ])),
@@ -1802,7 +1801,7 @@ mod tests {
 
     #[test]
     fn test_list_complex_accessors() {
-        let list = make_list(vec![Field::Group(make_row(vec![
+        let list = make_list(vec![Field::Group(Row::new(vec![
             ("x".to_string(), Field::Null),
             ("Y".to_string(), Field::Int(2)),
         ]))]);
@@ -1826,7 +1825,7 @@ mod tests {
 
     #[test]
     fn test_list_complex_invalid_accessors() {
-        let list = make_list(vec![Field::Group(make_row(vec![
+        let list = make_list(vec![Field::Group(Row::new(vec![
             ("x".to_string(), Field::Null),
             ("Y".to_string(), Field::Int(2)),
         ]))]);
@@ -1961,7 +1960,7 @@ mod tests {
             ("Y".to_string(), Field::Double(2.2)),
             ("Z".to_string(), Field::Str("abc".to_string())),
         ];
-        let row = Field::Group(make_row(fields));
+        let row = Field::Group(Row::new(fields));
         assert_eq!(
             row.to_json_value(),
             serde_json::json!({"X": 1, "Y": 2.2, "Z": "abc"})
@@ -1990,14 +1989,14 @@ mod tests {
 #[cfg(test)]
 #[allow(clippy::many_single_char_names)]
 mod api_tests {
-    use super::{make_list, make_map, make_row};
+    use super::{make_list, make_map, Row};
     use crate::record::Field;
 
     #[test]
     fn test_field_visibility() {
-        let row = make_row(vec![(
+        let row = Row::new(vec![(
             "a".to_string(),
-            Field::Group(make_row(vec![
+            Field::Group(Row::new(vec![
                 ("x".to_string(), Field::Null),
                 ("Y".to_string(), Field::Int(2)),
             ])),
@@ -2009,7 +2008,7 @@ mod api_tests {
                 match column.1 {
                     Field::Group(r) => {
                         assert_eq!(
-                            &make_row(vec![
+                            &Row::new(vec![
                                 ("x".to_string(), Field::Null),
                                 ("Y".to_string(), Field::Int(2)),
                             ]),
@@ -2027,7 +2026,7 @@ mod api_tests {
     fn test_list_element_access() {
         let expected = vec![
             Field::Int(1),
-            Field::Group(make_row(vec![
+            Field::Group(Row::new(vec![
                 ("x".to_string(), Field::Null),
                 ("Y".to_string(), Field::Int(2)),
             ])),
diff --git a/parquet/src/record/reader.rs b/parquet/src/record/reader.rs
index fd6ca7cdd57a..9e70f7a980db 100644
--- a/parquet/src/record/reader.rs
+++ b/parquet/src/record/reader.rs
@@ -24,7 +24,7 @@ use crate::basic::{ConvertedType, Repetition};
 use crate::errors::{ParquetError, Result};
 use crate::file::reader::{FileReader, RowGroupReader};
 use crate::record::{
-    api::{make_list, make_map, make_row, Field, Row},
+    api::{make_list, make_map, Field, Row},
     triplet::TripletIter,
 };
 use crate::schema::types::{ColumnPath, SchemaDescPtr, SchemaDescriptor, Type, TypePtr};
@@ -217,11 +217,15 @@ impl TreeBuilder {
                         Repetition::REPEATED,
                         "Invalid map type: {field:?}"
                     );
-                    assert_eq!(
-                        key_value_type.get_fields().len(),
-                        2,
-                        "Invalid map type: {field:?}"
-                    );
+                    // Parquet spec allows no value. In that case treat as a list. #1642
+                    if key_value_type.get_fields().len() != 1 {
+                        // If not a list, then there can only be 2 fields in the struct
+                        assert_eq!(
+                            key_value_type.get_fields().len(),
+                            2,
+                            "Invalid map type: {field:?}"
+                        );
+                    }
 
                     path.push(String::from(key_value_type.name()));
 
@@ -239,25 +243,35 @@ impl TreeBuilder {
                         row_group_reader,
                     )?;
 
-                    let value_type = &key_value_type.get_fields()[1];
-                    let value_reader = self.reader_tree(
-                        value_type.clone(),
-                        path,
-                        curr_def_level + 1,
-                        curr_rep_level + 1,
-                        paths,
-                        row_group_reader,
-                    )?;
+                    if key_value_type.get_fields().len() == 1 {
+                        path.pop();
+                        Reader::RepeatedReader(
+                            field,
+                            curr_def_level,
+                            curr_rep_level,
+                            Box::new(key_reader),
+                        )
+                    } else {
+                        let value_type = &key_value_type.get_fields()[1];
+                        let value_reader = self.reader_tree(
+                            value_type.clone(),
+                            path,
+                            curr_def_level + 1,
+                            curr_rep_level + 1,
+                            paths,
+                            row_group_reader,
+                        )?;
 
-                    path.pop();
+                        path.pop();
 
-                    Reader::KeyValueReader(
-                        field,
-                        curr_def_level,
-                        curr_rep_level,
-                        Box::new(key_reader),
-                        Box::new(value_reader),
-                    )
+                        Reader::KeyValueReader(
+                            field,
+                            curr_def_level,
+                            curr_rep_level,
+                            Box::new(key_reader),
+                            Box::new(value_reader),
+                        )
+                    }
                 }
                 // A repeated field that is neither contained by a `LIST`- or
                 // `MAP`-annotated group nor annotated by `LIST` or `MAP`
@@ -345,6 +359,19 @@ impl Reader {
     /// <https://github.com/apache/parquet-format/blob/master/LogicalTypes.md>
     ///   #backward-compatibility-rules
     fn is_element_type(repeated_type: &Type) -> bool {
+        // For legacy 2-level list types whose element type is a 2-level list
+        //
+        //    // ARRAY<ARRAY<INT>> (nullable list, non-null elements)
+        //    optional group my_list (LIST) {
+        //      repeated group array (LIST) {
+        //        repeated int32 array;
+        //      };
+        //    }
+        //
+        if repeated_type.is_list() || repeated_type.has_single_repeated_child() {
+            return false;
+        }
+
         // For legacy 2-level list types with primitive element type, e.g.:
         //
         //    // ARRAY<INT> (nullable list, non-null elements)
@@ -399,7 +426,7 @@ impl Reader {
                 for reader in readers {
                     fields.push((String::from(reader.field_name()), reader.read_field()?));
                 }
-                Ok(make_row(fields))
+                Ok(Row::new(fields))
             }
             _ => panic!("Cannot call read() on {self}"),
         }
@@ -434,7 +461,7 @@ impl Reader {
                         fields.push((String::from(reader.field_name()), Field::Null));
                     }
                 }
-                let row = make_row(fields);
+                let row = Row::new(fields);
                 Field::Group(row)
             }
             Reader::RepeatedReader(_, def_level, rep_level, ref mut reader) => {
@@ -826,7 +853,7 @@ mod tests {
     macro_rules! row {
         ($($e:tt)*) => {
             {
-                make_row(vec![$($e)*])
+                Row::new(vec![$($e)*])
             }
         }
     }
@@ -1459,8 +1486,7 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(expected = "Invalid map type")]
-    fn test_file_reader_rows_invalid_map_type() {
+    fn test_file_reader_rows_nested_map_type() {
         let schema = "
       message spark_schema {
         OPTIONAL group a (MAP) {
@@ -1823,6 +1849,36 @@ mod tests {
         assert_eq!(rows, expected_rows);
     }
 
+    #[test]
+    fn test_map_no_value() {
+        // File schema:
+        // message schema {
+        //   required group my_map (MAP) {
+        //     repeated group key_value {
+        //       required int32 key;
+        //       optional int32 value;
+        //     }
+        //   }
+        //   required group my_map_no_v (MAP) {
+        //     repeated group key_value {
+        //       required int32 key;
+        //     }
+        //   }
+        //   required group my_list (LIST) {
+        //     repeated group list {
+        //       required int32 element;
+        //     }
+        //   }
+        // }
+        let rows = test_file_reader_rows("map_no_value.parquet", None).unwrap();
+
+        // the my_map_no_v and my_list columns should be equivalent lists by this point
+        for row in rows {
+            let cols = row.into_columns();
+            assert_eq!(cols[1].1, cols[2].1);
+        }
+    }
+
     fn test_file_reader_rows(file_name: &str, schema: Option<Type>) -> Result<Vec<Row>> {
         let file = get_test_file(file_name);
         let file_reader: Box<dyn FileReader> = Box::new(SerializedFileReader::new(file)?);
@@ -1839,4 +1895,21 @@ mod tests {
         let iter = row_group_reader.get_row_iter(schema)?;
         Ok(iter.map(|row| row.unwrap()).collect())
     }
+
+    #[test]
+    fn test_read_old_nested_list() {
+        let rows = test_file_reader_rows("old_list_structure.parquet", None).unwrap();
+        let expected_rows = vec![row![(
+            "a".to_string(),
+            Field::ListInternal(make_list(
+                [
+                    make_list([1, 2].map(Field::Int).to_vec()),
+                    make_list([3, 4].map(Field::Int).to_vec())
+                ]
+                .map(Field::ListInternal)
+                .to_vec()
+            ))
+        ),]];
+        assert_eq!(rows, expected_rows);
+    }
 }
diff --git a/parquet/src/schema/types.rs b/parquet/src/schema/types.rs
index 8aff612dba5d..0347f7da46d6 100644
--- a/parquet/src/schema/types.rs
+++ b/parquet/src/schema/types.rs
@@ -202,6 +202,29 @@ impl Type {
         self.get_basic_info().has_repetition()
             && self.get_basic_info().repetition() != Repetition::REQUIRED
     }
+
+    /// Returns `true` if this type is annotated as a list.
+    pub(crate) fn is_list(&self) -> bool {
+        if self.is_group() {
+            let basic_info = self.get_basic_info();
+            if let Some(logical_type) = basic_info.logical_type() {
+                return logical_type == LogicalType::List;
+            }
+            return basic_info.converted_type() == ConvertedType::LIST;
+        }
+        false
+    }
+
+    /// Returns `true` if this type is a group with a single child field that is `repeated`.
+    pub(crate) fn has_single_repeated_child(&self) -> bool {
+        if self.is_group() {
+            let children = self.get_fields();
+            return children.len() == 1
+                && children[0].get_basic_info().has_repetition()
+                && children[0].get_basic_info().repetition() == Repetition::REPEATED;
+        }
+        false
+    }
 }
 
 /// A builder for primitive types. All attributes are optional
@@ -927,6 +950,32 @@ impl ColumnDescriptor {
 ///
 /// Encapsulates the file's schema ([`Type`]) and [`ColumnDescriptor`]s for
 /// each primitive (leaf) column.
+///
+/// # Example
+/// ```
+/// # use std::sync::Arc;
+/// use parquet::schema::types::{SchemaDescriptor, Type};
+/// use parquet::basic; // note there are two `Type`s that are different
+/// // Schema for a table with two columns: "a" (int64) and "b" (int32, stored as a date)
+/// let descriptor = SchemaDescriptor::new(
+///   Arc::new(
+///     Type::group_type_builder("my_schema")
+///       .with_fields(vec![
+///         Arc::new(
+///          Type::primitive_type_builder("a", basic::Type::INT64)
+///           .build().unwrap()
+///         ),
+///         Arc::new(
+///          Type::primitive_type_builder("b", basic::Type::INT32)
+///           .with_converted_type(basic::ConvertedType::DATE)
+///           .with_logical_type(Some(basic::LogicalType::Date))
+///           .build().unwrap()
+///         ),
+///      ])
+///      .build().unwrap()
+///   )
+/// );
+/// ```
 #[derive(PartialEq)]
 pub struct SchemaDescriptor {
     /// The top-level logical schema (the "message" type).
diff --git a/parquet_derive/LICENSE.txt b/parquet_derive/LICENSE.txt
new file mode 120000
index 000000000000..4ab43736a839
--- /dev/null
+++ b/parquet_derive/LICENSE.txt
@@ -0,0 +1 @@
+../LICENSE.txt
\ No newline at end of file
diff --git a/parquet_derive/NOTICE.txt b/parquet_derive/NOTICE.txt
new file mode 120000
index 000000000000..eb9f24e040b5
--- /dev/null
+++ b/parquet_derive/NOTICE.txt
@@ -0,0 +1 @@
+../NOTICE.txt
\ No newline at end of file