diff --git a/Cargo.toml b/Cargo.toml
index 77e3c6038ea71..1a2f4a84af38e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -54,9 +54,41 @@ arrow-array = { version = "48.0.0", default-features = false, features = ["chron
 arrow-buffer = { version = "48.0.0", default-features = false }
 arrow-flight = { version = "48.0.0", features = ["flight-sql-experimental"] }
 arrow-schema = { version = "48.0.0", default-features = false }
+async-trait = "0.1.73"
+bigdecimal = "0.4.1"
+bytes = "1.4"
+ctor = "0.2.0"
+datafusion = { path = "datafusion/core" }
+datafusion-common = { path = "datafusion/common" }
+datafusion-expr = { path = "datafusion/expr" }
+datafusion-sql = { path = "datafusion/sql" }
+datafusion-optimizer = { path = "datafusion/optimizer" }
+datafusion-physical-expr = { path = "datafusion/physical-expr" }
+datafusion-physical-plan = { path = "datafusion/physical-plan" }
+datafusion-execution = { path = "datafusion/execution" }
+datafusion-proto = { path = "datafusion/proto" }
+datafusion-sqllogictest = { path = "datafusion/sqllogictest" }
+datafusion-substrait = { path = "datafusion/substrait" }
+dashmap = "5.4.0"
+doc-comment = "0.3"
+env_logger = "0.10"
+futures = "0.3"
+half = "2.2.1"
+indexmap = "2.0.0"
+itertools = "0.11"
+log = "^0.4"
+num_cpus = "1.13.0"
+object_store = "0.7.0"
+parking_lot = "0.12"
 parquet = { version = "48.0.0", features = ["arrow", "async", "object_store"] }
-sqlparser = { version = "0.38.0", features = ["visitor"] }
+rand = "0.8"
+rstest = "0.18.0"
+serde_json = "1"
+sqlparser = { version = "0.39.0", features = ["visitor"] }
+tempfile = "3"
+thiserror = "1.0.44"
 chrono = { version = "0.4.31", default-features = false }
+url = "2.2"
 
 [profile.release]
 codegen-units = 1
@@ -75,3 +107,4 @@ opt-level = 3
 overflow-checks = false
 panic = 'unwind'
 rpath = false
+
diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml
index 0def335521ce7..ce0a4267fc285 100644
--- a/benchmarks/Cargo.toml
+++ b/benchmarks/Cargo.toml
@@ -36,14 +36,14 @@ snmalloc = ["snmalloc-rs"]
 arrow = { workspace = true }
 datafusion = { path = "../datafusion/core", version = "32.0.0" }
 datafusion-common = { path = "../datafusion/common", version = "32.0.0" }
-env_logger = "0.10"
-futures = "0.3"
-log = "^0.4"
+env_logger = { workspace = true }
+futures = { workspace = true }
+log = { workspace = true }
 mimalloc = { version = "0.1", optional = true, default-features = false }
-num_cpus = "1.13.0"
+num_cpus = { workspace = true }
 parquet = { workspace = true }
 serde = { version = "1.0.136", features = ["derive"] }
-serde_json = "1.0.78"
+serde_json = { workspace = true }
 snmalloc-rs = { version = "0.3", optional = true }
 structopt = { version = "0.3", default-features = false }
 test-utils = { path = "../test-utils/", version = "0.1.0" }
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index b83088f94c579..dc828f018fd5d 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -25,15 +25,16 @@ checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234"
 
 [[package]]
 name = "ahash"
-version = "0.8.3"
+version = "0.8.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f"
+checksum = "91429305e9f0a25f6205c5b8e0d2db09e0708a7a6df0f42212bb56c32c8ac97a"
 dependencies = [
  "cfg-if",
  "const-random",
  "getrandom",
  "once_cell",
  "version_check",
+ "zerocopy",
 ]
 
 [[package]]
@@ -106,8 +107,8 @@ dependencies = [
  "serde",
  "serde_json",
  "snap",
- "strum 0.25.0",
- "strum_macros 0.25.2",
+ "strum",
+ "strum_macros",
  "thiserror",
  "typed-builder",
  "uuid",
@@ -177,7 +178,7 @@ dependencies = [
  "chrono",
  "chrono-tz",
  "half",
- "hashbrown 0.14.1",
+ "hashbrown 0.14.2",
  "num",
 ]
 
@@ -302,7 +303,7 @@ dependencies = [
  "arrow-data",
  "arrow-schema",
  "half",
- "hashbrown 0.14.1",
+ "hashbrown 0.14.2",
 ]
 
 [[package]]
@@ -358,9 +359,9 @@ dependencies = [
 
 [[package]]
 name = "async-compression"
-version = "0.4.3"
+version = "0.4.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bb42b2197bf15ccb092b62c74515dbd8b86d0effd934795f6687c93b6e679a2c"
+checksum = "f658e2baef915ba0f26f1f7c42bfb8e12f532a01f449a090ded75ae7a07e9ba2"
 dependencies = [
  "bzip2",
  "flate2",
@@ -370,15 +371,15 @@ dependencies = [
  "pin-project-lite",
  "tokio",
  "xz2",
- "zstd 0.12.4",
- "zstd-safe 6.0.6",
+ "zstd 0.13.0",
+ "zstd-safe 7.0.0",
 ]
 
 [[package]]
 name = "async-trait"
-version = "0.1.73"
+version = "0.1.74"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bc00ceb34980c03614e35a3a4e218276a0a824e911d07651cd0d858a51e8c0f0"
+checksum = "a66537f1bb974b254c98ed142ff995236e81b9d0fe4db0575f46612cb15eb0f9"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -709,9 +710,9 @@ dependencies = [
 
 [[package]]
 name = "base64"
-version = "0.21.4"
+version = "0.21.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9ba43ea6f343b788c8764558649e08df62f86c6ef251fdaeb1ffd010a9ae50a2"
+checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9"
 
 [[package]]
 name = "base64-simd"
@@ -731,9 +732,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
 
 [[package]]
 name = "bitflags"
-version = "2.4.0"
+version = "2.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635"
+checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07"
 
 [[package]]
 name = "blake2"
@@ -779,9 +780,9 @@ dependencies = [
 
 [[package]]
 name = "brotli-decompressor"
-version = "2.5.0"
+version = "2.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "da74e2b81409b1b743f8f0c62cc6254afefb8b8e50bbfe3735550f7aeefa3448"
+checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f"
 dependencies = [
  "alloc-no-stdlib",
  "alloc-stdlib",
@@ -878,9 +879,9 @@ dependencies = [
 
 [[package]]
 name = "chrono-tz"
-version = "0.8.3"
+version = "0.8.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f1369bc6b9e9a7dfdae2055f6ec151fe9c554a9d23d357c0237cee2e25eaabb7"
+checksum = "e23185c0e21df6ed832a12e2bda87c7d1def6842881fb634a8511ced741b0d76"
 dependencies = [
  "chrono",
  "chrono-tz-build",
@@ -889,9 +890,9 @@ dependencies = [
 
 [[package]]
 name = "chrono-tz-build"
-version = "0.2.0"
+version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e2f5ebdc942f57ed96d560a6d1a459bae5851102a25d5bf89dc04ae453e31ecf"
+checksum = "433e39f13c9a060046954e0592a8d0a4bcb1040125cbf91cb8ee58964cfb350f"
 dependencies = [
  "parse-zoneinfo",
  "phf",
@@ -950,34 +951,32 @@ dependencies = [
 
 [[package]]
 name = "comfy-table"
-version = "7.0.1"
+version = "7.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9ab77dbd8adecaf3f0db40581631b995f312a8a5ae3aa9993188bb8f23d83a5b"
+checksum = "7c64043d6c7b7a4c58e39e7efccfdea7b93d885a795d0c054a69dbbf4dd52686"
 dependencies = [
- "strum 0.24.1",
- "strum_macros 0.24.3",
+ "strum",
+ "strum_macros",
  "unicode-width",
 ]
 
 [[package]]
 name = "const-random"
-version = "0.1.15"
+version = "0.1.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "368a7a772ead6ce7e1de82bfb04c485f3db8ec744f72925af5735e29a22cc18e"
+checksum = "11df32a13d7892ec42d51d3d175faba5211ffe13ed25d4fb348ac9e9ce835593"
 dependencies = [
  "const-random-macro",
- "proc-macro-hack",
 ]
 
 [[package]]
 name = "const-random-macro"
-version = "0.1.15"
+version = "0.1.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9d7d6ab3c3a2282db210df5f02c4dab6e0a7057af0fb7ebd4070f30fe05c0ddb"
+checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e"
 dependencies = [
  "getrandom",
  "once_cell",
- "proc-macro-hack",
  "tiny-keccak",
 ]
 
@@ -1014,9 +1013,9 @@ dependencies = [
 
 [[package]]
 name = "cpufeatures"
-version = "0.2.9"
+version = "0.2.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a17b76ff3a4162b0b27f354a0c87015ddad39d35f9c0c36607a3bdd175dde1f1"
+checksum = "ce420fe07aecd3e67c5f910618fe65e94158f6dcc0adf44e00d69ce2bdfe0fd0"
 dependencies = [
  "libc",
 ]
@@ -1090,7 +1089,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856"
 dependencies = [
  "cfg-if",
- "hashbrown 0.14.1",
+ "hashbrown 0.14.2",
  "lock_api",
  "once_cell",
  "parking_lot_core",
@@ -1122,7 +1121,7 @@ dependencies = [
  "futures",
  "glob",
  "half",
- "hashbrown 0.14.1",
+ "hashbrown 0.14.2",
  "indexmap 2.0.2",
  "itertools",
  "log",
@@ -1131,7 +1130,6 @@ dependencies = [
  "object_store",
  "parking_lot",
  "parquet",
- "percent-encoding",
  "pin-project-lite",
  "rand",
  "sqlparser",
@@ -1197,7 +1195,7 @@ dependencies = [
  "datafusion-common",
  "datafusion-expr",
  "futures",
- "hashbrown 0.14.1",
+ "hashbrown 0.14.2",
  "log",
  "object_store",
  "parking_lot",
@@ -1215,8 +1213,8 @@ dependencies = [
  "arrow-array",
  "datafusion-common",
  "sqlparser",
- "strum 0.25.0",
- "strum_macros 0.25.2",
+ "strum",
+ "strum_macros",
 ]
 
 [[package]]
@@ -1229,7 +1227,7 @@ dependencies = [
  "datafusion-common",
  "datafusion-expr",
  "datafusion-physical-expr",
- "hashbrown 0.14.1",
+ "hashbrown 0.14.2",
  "itertools",
  "log",
  "regex-syntax",
@@ -1251,7 +1249,7 @@ dependencies = [
  "datafusion-common",
  "datafusion-expr",
  "half",
- "hashbrown 0.14.1",
+ "hashbrown 0.14.2",
  "hex",
  "indexmap 2.0.2",
  "itertools",
@@ -1284,7 +1282,7 @@ dependencies = [
  "datafusion-physical-expr",
  "futures",
  "half",
- "hashbrown 0.14.1",
+ "hashbrown 0.14.2",
  "indexmap 2.0.2",
  "itertools",
  "log",
@@ -1310,9 +1308,12 @@ dependencies = [
 
 [[package]]
 name = "deranged"
-version = "0.3.8"
+version = "0.3.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2696e8a945f658fd14dc3b87242e6b80cd0f36ff04ea560fa39082368847946"
+checksum = "0f32d04922c60427da6f9fef14d042d9edddef64cb9d4ce0d64d0685fbeb1fd3"
+dependencies = [
+ "powerfmt",
+]
 
 [[package]]
 name = "difflib"
@@ -1482,9 +1483,9 @@ dependencies = [
 
 [[package]]
 name = "flate2"
-version = "1.0.27"
+version = "1.0.28"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c6c98ee8095e9d1dcbf2fcc6d95acccb90d1c81db1e44725c6a984b1dbdfb010"
+checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e"
 dependencies = [
  "crc32fast",
  "miniz_oxide",
@@ -1516,9 +1517,9 @@ dependencies = [
 
 [[package]]
 name = "futures"
-version = "0.3.28"
+version = "0.3.29"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40"
+checksum = "da0290714b38af9b4a7b094b8a37086d1b4e61f2df9122c3cad2577669145335"
 dependencies = [
  "futures-channel",
  "futures-core",
@@ -1531,9 +1532,9 @@ dependencies = [
 
 [[package]]
 name = "futures-channel"
-version = "0.3.28"
+version = "0.3.29"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2"
+checksum = "ff4dd66668b557604244583e3e1e1eada8c5c2e96a6d0d6653ede395b78bbacb"
 dependencies = [
  "futures-core",
  "futures-sink",
@@ -1541,15 +1542,15 @@ dependencies = [
 
 [[package]]
 name = "futures-core"
-version = "0.3.28"
+version = "0.3.29"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c"
+checksum = "eb1d22c66e66d9d72e1758f0bd7d4fd0bee04cad842ee34587d68c07e45d088c"
 
 [[package]]
 name = "futures-executor"
-version = "0.3.28"
+version = "0.3.29"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0"
+checksum = "0f4fb8693db0cf099eadcca0efe2a5a22e4550f98ed16aba6c48700da29597bc"
 dependencies = [
  "futures-core",
  "futures-task",
@@ -1558,15 +1559,15 @@ dependencies = [
 
 [[package]]
 name = "futures-io"
-version = "0.3.28"
+version = "0.3.29"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964"
+checksum = "8bf34a163b5c4c52d0478a4d757da8fb65cabef42ba90515efee0f6f9fa45aaa"
 
 [[package]]
 name = "futures-macro"
-version = "0.3.28"
+version = "0.3.29"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72"
+checksum = "53b153fd91e4b0147f4aced87be237c98248656bb01050b96bf3ee89220a8ddb"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -1575,15 +1576,15 @@ dependencies = [
 
 [[package]]
 name = "futures-sink"
-version = "0.3.28"
+version = "0.3.29"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e"
+checksum = "e36d3378ee38c2a36ad710c5d30c2911d752cb941c00c72dbabfb786a7970817"
 
 [[package]]
 name = "futures-task"
-version = "0.3.28"
+version = "0.3.29"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65"
+checksum = "efd193069b0ddadc69c46389b740bbccdd97203899b48d09c5f7969591d6bae2"
 
 [[package]]
 name = "futures-timer"
@@ -1593,9 +1594,9 @@ checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c"
 
 [[package]]
 name = "futures-util"
-version = "0.3.28"
+version = "0.3.29"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533"
+checksum = "a19526d624e703a3179b3d322efec918b6246ea0fa51d41124525f00f1cc8104"
 dependencies = [
  "futures-channel",
  "futures-core",
@@ -1689,9 +1690,9 @@ dependencies = [
 
 [[package]]
 name = "hashbrown"
-version = "0.14.1"
+version = "0.14.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7dfda62a12f55daeae5015f81b0baea145391cb4520f86c248fc615d72640d12"
+checksum = "f93e7192158dbcda357bdec5fb5788eebf8bbac027f3f33e719d29135ae84156"
 dependencies = [
  "ahash",
  "allocator-api2",
@@ -1790,7 +1791,7 @@ dependencies = [
  "httpdate",
  "itoa",
  "pin-project-lite",
- "socket2 0.4.9",
+ "socket2 0.4.10",
  "tokio",
  "tower-service",
  "tracing",
@@ -1814,30 +1815,30 @@ dependencies = [
 
 [[package]]
 name = "hyper-rustls"
-version = "0.24.1"
+version = "0.24.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8d78e1e73ec14cf7375674f74d7dde185c8206fd9dea6fb6295e8a98098aaa97"
+checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590"
 dependencies = [
  "futures-util",
  "http",
  "hyper",
- "rustls 0.21.7",
+ "rustls 0.21.8",
  "tokio",
  "tokio-rustls 0.24.1",
 ]
 
 [[package]]
 name = "iana-time-zone"
-version = "0.1.57"
+version = "0.1.58"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2fad5b825842d2b38bd206f3e81d6957625fd7f0a361e345c30e01a0ae2dd613"
+checksum = "8326b86b6cff230b97d0d312a6c40a60726df3332e721f72a1b035f451663b20"
 dependencies = [
  "android_system_properties",
  "core-foundation-sys",
  "iana-time-zone-haiku",
  "js-sys",
  "wasm-bindgen",
- "windows",
+ "windows-core",
 ]
 
 [[package]]
@@ -1876,7 +1877,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8adf3ddd720272c6ea8bf59463c04e0f93d0bbf7c5439b691bca2987e0270897"
 dependencies = [
  "equivalent",
- "hashbrown 0.14.1",
+ "hashbrown 0.14.2",
 ]
 
 [[package]]
@@ -1896,9 +1897,9 @@ checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02"
 
 [[package]]
 name = "ipnet"
-version = "2.8.0"
+version = "2.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "28b29a3cd74f0f4598934efe3aeba42bae0eb4680554128851ebbecb02af14e6"
+checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3"
 
 [[package]]
 name = "itertools"
@@ -2057,9 +2058,9 @@ checksum = "da2479e8c062e40bf0066ffa0bc823de0a9368974af99c9f6df941d2c231e03f"
 
 [[package]]
 name = "lock_api"
-version = "0.4.10"
+version = "0.4.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c1cc9717a20b1bb222f333e6a92fd32f7d8a18ddc5a3191a11af45dcbf4dcd16"
+checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45"
 dependencies = [
  "autocfg",
  "scopeguard",
@@ -2133,9 +2134,9 @@ dependencies = [
 
 [[package]]
 name = "mio"
-version = "0.8.8"
+version = "0.8.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "927a765cd3fc26206e66b296465fa9d3e5ab003e651c1b3c060e7956d96b19d2"
+checksum = "3dce281c5e46beae905d4de1870d8b1509a9142b62eedf18b443b011ca8343d0"
 dependencies = [
  "libc",
  "wasi",
@@ -2317,9 +2318,9 @@ dependencies = [
 
 [[package]]
 name = "os_str_bytes"
-version = "6.5.1"
+version = "6.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4d5d9eb14b174ee9aa2ef96dc2b94637a2d4b6e7cb873c7e171f0c20c6cf3eac"
+checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1"
 
 [[package]]
 name = "outref"
@@ -2339,13 +2340,13 @@ dependencies = [
 
 [[package]]
 name = "parking_lot_core"
-version = "0.9.8"
+version = "0.9.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447"
+checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e"
 dependencies = [
  "cfg-if",
  "libc",
- "redox_syscall 0.3.5",
+ "redox_syscall 0.4.1",
  "smallvec",
  "windows-targets",
 ]
@@ -2370,7 +2371,7 @@ dependencies = [
  "chrono",
  "flate2",
  "futures",
- "hashbrown 0.14.1",
+ "hashbrown 0.14.2",
  "lz4_flex",
  "num",
  "num-bigint",
@@ -2491,6 +2492,12 @@ version = "0.3.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964"
 
+[[package]]
+name = "powerfmt"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
+
 [[package]]
 name = "ppv-lite86"
 version = "0.2.17"
@@ -2552,12 +2559,6 @@ dependencies = [
  "version_check",
 ]
 
-[[package]]
-name = "proc-macro-hack"
-version = "0.5.20+deprecated"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068"
-
 [[package]]
 name = "proc-macro2"
 version = "1.0.69"
@@ -2643,9 +2644,9 @@ dependencies = [
 
 [[package]]
 name = "redox_syscall"
-version = "0.3.5"
+version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29"
+checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa"
 dependencies = [
  "bitflags 1.3.2",
 ]
@@ -2663,9 +2664,9 @@ dependencies = [
 
 [[package]]
 name = "regex"
-version = "1.10.0"
+version = "1.10.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d119d7c7ca818f8a53c300863d4f87566aac09943aef5b355bb83969dae75d87"
+checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -2675,9 +2676,9 @@ dependencies = [
 
 [[package]]
 name = "regex-automata"
-version = "0.4.1"
+version = "0.4.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "465c6fc0621e4abc4187a2bda0937bfd4f722c2730b29562e19689ea796c9a4b"
+checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -2686,15 +2687,15 @@ dependencies = [
 
 [[package]]
 name = "regex-lite"
-version = "0.1.3"
+version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9a6ebcd15653947e6140f59a9811a06ed061d18a5c35dfca2e2e4c5525696878"
+checksum = "30b661b2f27137bdbc16f00eda72866a92bb28af1753ffbd56744fb6e2e9cd8e"
 
 [[package]]
 name = "regex-syntax"
-version = "0.8.1"
+version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "56d84fdd47036b038fc80dd333d10b6aab10d5d31f4a366e20014def75328d33"
+checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
 
 [[package]]
 name = "reqwest"
@@ -2711,7 +2712,7 @@ dependencies = [
  "http",
  "http-body",
  "hyper",
- "hyper-rustls 0.24.1",
+ "hyper-rustls 0.24.2",
  "ipnet",
  "js-sys",
  "log",
@@ -2719,7 +2720,7 @@ dependencies = [
  "once_cell",
  "percent-encoding",
  "pin-project-lite",
- "rustls 0.21.7",
+ "rustls 0.21.8",
  "rustls-pemfile",
  "serde",
  "serde_json",
@@ -2755,9 +2756,9 @@ dependencies = [
 
 [[package]]
 name = "ring"
-version = "0.17.3"
+version = "0.17.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9babe80d5c16becf6594aa32ad2be8fe08498e7ae60b77de8df700e67f191d7e"
+checksum = "fb0205304757e5d899b9c2e448b867ffd03ae7f988002e47cd24954391394d0b"
 dependencies = [
  "cc",
  "getrandom",
@@ -2816,11 +2817,11 @@ dependencies = [
 
 [[package]]
 name = "rustix"
-version = "0.38.18"
+version = "0.38.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5a74ee2d7c2581cd139b42447d7d9389b889bdaad3a73f1ebb16f2a3237bb19c"
+checksum = "2b426b0506e5d50a7d8dafcf2e81471400deb602392c7dd110815afb4eaf02a3"
 dependencies = [
- "bitflags 2.4.0",
+ "bitflags 2.4.1",
  "errno",
  "libc",
  "linux-raw-sys",
@@ -2841,12 +2842,12 @@ dependencies = [
 
 [[package]]
 name = "rustls"
-version = "0.21.7"
+version = "0.21.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cd8d6c9f025a446bc4d18ad9632e69aec8f287aa84499ee335599fabd20c3fd8"
+checksum = "446e14c5cda4f3f30fe71863c34ec70f5ac79d6087097ad0bb433e1be5edf04c"
 dependencies = [
  "log",
- "ring 0.16.20",
+ "ring 0.17.5",
  "rustls-webpki",
  "sct",
 ]
@@ -2874,12 +2875,12 @@ dependencies = [
 
 [[package]]
 name = "rustls-webpki"
-version = "0.101.6"
+version = "0.101.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3c7d5dece342910d9ba34d259310cae3e0154b873b35408b787b59bce53d34fe"
+checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765"
 dependencies = [
- "ring 0.16.20",
- "untrusted 0.7.1",
+ "ring 0.17.5",
+ "untrusted 0.9.0",
 ]
 
 [[package]]
@@ -2943,12 +2944,12 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
 
 [[package]]
 name = "sct"
-version = "0.7.0"
+version = "0.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4"
+checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414"
 dependencies = [
- "ring 0.16.20",
- "untrusted 0.7.1",
+ "ring 0.17.5",
+ "untrusted 0.9.0",
 ]
 
 [[package]]
@@ -2988,18 +2989,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
 
 [[package]]
 name = "serde"
-version = "1.0.188"
+version = "1.0.190"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e"
+checksum = "91d3c334ca1ee894a2c6f6ad698fe8c435b76d504b13d436f0685d648d6d96f7"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.188"
+version = "1.0.190"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2"
+checksum = "67c5609f394e5c2bd7fc51efda478004ea80ef42fee983d5c67a65e34f32c0e3"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -3008,9 +3009,9 @@ dependencies = [
 
 [[package]]
 name = "serde_json"
-version = "1.0.107"
+version = "1.0.108"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6b420ce6e3d8bd882e9b243c6eed35dbc9a6110c9769e74b584e0d68d1f20c65"
+checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b"
 dependencies = [
  "itoa",
  "ryu",
@@ -3091,9 +3092,9 @@ checksum = "5e9f0ab6ef7eb7353d9119c170a436d1bf248eea575ac42d19d12f4e34130831"
 
 [[package]]
 name = "socket2"
-version = "0.4.9"
+version = "0.4.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662"
+checksum = "9f7916fc008ca5542385b89a3d3ce689953c143e9304a9bf8beec1de48994c0d"
 dependencies = [
  "libc",
  "winapi",
@@ -3101,9 +3102,9 @@ dependencies = [
 
 [[package]]
 name = "socket2"
-version = "0.5.4"
+version = "0.5.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4031e820eb552adee9295814c0ced9e5cf38ddf1e8b7d566d6de8e2538ea989e"
+checksum = "7b5fac59a5cb5dd637972e5fca70daf0523c9067fcdc4842f053dae04a18f8e9"
 dependencies = [
  "libc",
  "windows-sys",
@@ -3123,9 +3124,9 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
 
 [[package]]
 name = "sqlparser"
-version = "0.38.0"
+version = "0.39.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0272b7bb0a225320170c99901b4b5fb3a4384e255a7f2cc228f61e2ba3893e75"
+checksum = "743b4dc2cbde11890ccb254a8fc9d537fa41b36da00de2a1c5e9848c9bc42bd7"
 dependencies = [
  "log",
  "sqlparser_derive",
@@ -3160,39 +3161,20 @@ version = "0.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
 
-[[package]]
-name = "strum"
-version = "0.24.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f"
-
 [[package]]
 name = "strum"
 version = "0.25.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125"
 dependencies = [
- "strum_macros 0.25.2",
-]
-
-[[package]]
-name = "strum_macros"
-version = "0.24.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59"
-dependencies = [
- "heck",
- "proc-macro2",
- "quote",
- "rustversion",
- "syn 1.0.109",
+ "strum_macros",
 ]
 
 [[package]]
 name = "strum_macros"
-version = "0.25.2"
+version = "0.25.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ad8d03b598d3d0fff69bf533ee3ef19b8eeb342729596df84bcc7e1f96ec4059"
+checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0"
 dependencies = [
  "heck",
  "proc-macro2",
@@ -3252,13 +3234,13 @@ dependencies = [
 
 [[package]]
 name = "tempfile"
-version = "3.8.0"
+version = "3.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cb94d2f3cc536af71caac6b6fcebf65860b347e7ce0cc9ebe8f70d3e521054ef"
+checksum = "7ef1adac450ad7f4b3c28589471ade84f25f731a7a0fe30d71dfa9f60fd808e5"
 dependencies = [
  "cfg-if",
  "fastrand 2.0.1",
- "redox_syscall 0.3.5",
+ "redox_syscall 0.4.1",
  "rustix",
  "windows-sys",
 ]
@@ -3286,18 +3268,18 @@ checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d"
 
 [[package]]
 name = "thiserror"
-version = "1.0.49"
+version = "1.0.50"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1177e8c6d7ede7afde3585fd2513e611227efd6481bd78d2e82ba1ce16557ed4"
+checksum = "f9a7210f5c9a7156bb50aa36aed4c95afb51df0df00713949448cf9e97d382d2"
 dependencies = [
  "thiserror-impl",
 ]
 
 [[package]]
 name = "thiserror-impl"
-version = "1.0.49"
+version = "1.0.50"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "10712f02019e9288794769fba95cd6847df9874d49d871d062172f9dd41bc4cc"
+checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -3317,11 +3299,12 @@ dependencies = [
 
 [[package]]
 name = "time"
-version = "0.3.29"
+version = "0.3.30"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "426f806f4089c493dcac0d24c29c01e2c38baf8e30f1b716ee37e83d200b18fe"
+checksum = "c4a34ab300f2dee6e562c10a046fc05e358b29f9bf92277f30c3c8d82275f6f5"
 dependencies = [
  "deranged",
+ "powerfmt",
  "serde",
  "time-core",
  "time-macros",
@@ -3379,7 +3362,7 @@ dependencies = [
  "num_cpus",
  "parking_lot",
  "pin-project-lite",
- "socket2 0.5.4",
+ "socket2 0.5.5",
  "tokio-macros",
  "windows-sys",
 ]
@@ -3412,7 +3395,7 @@ version = "0.24.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081"
 dependencies = [
- "rustls 0.21.7",
+ "rustls 0.21.8",
  "tokio",
 ]
 
@@ -3429,9 +3412,9 @@ dependencies = [
 
 [[package]]
 name = "tokio-util"
-version = "0.7.9"
+version = "0.7.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1d68074620f57a0b21594d9735eb2e98ab38b17f80d3fcb189fca266771ca60d"
+checksum = "5419f34732d9eb6ee4c3578b7989078579b7f039cbbb9ca2c4da015749371e15"
 dependencies = [
  "bytes",
  "futures-core",
@@ -3471,11 +3454,10 @@ checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52"
 
 [[package]]
 name = "tracing"
-version = "0.1.37"
+version = "0.1.40"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8"
+checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef"
 dependencies = [
- "cfg-if",
  "log",
  "pin-project-lite",
  "tracing-attributes",
@@ -3484,9 +3466,9 @@ dependencies = [
 
 [[package]]
 name = "tracing-attributes"
-version = "0.1.26"
+version = "0.1.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab"
+checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -3495,9 +3477,9 @@ dependencies = [
 
 [[package]]
 name = "tracing-core"
-version = "0.1.31"
+version = "0.1.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a"
+checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54"
 dependencies = [
  "once_cell",
 ]
@@ -3614,9 +3596,9 @@ checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
 
 [[package]]
 name = "uuid"
-version = "1.4.1"
+version = "1.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "79daa5ed5740825c40b389c5e50312b9c86df53fccd33f281df655642b43869d"
+checksum = "88ad59a7560b41a70d191093a945f0b87bc1deeda46fb237479708a1d6b6cdfc"
 dependencies = [
  "getrandom",
  "serde",
@@ -3763,7 +3745,7 @@ version = "0.22.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ed63aea5ce73d0ff405984102c42de94fc55a6b75765d621c65262469b3c9b53"
 dependencies = [
- "ring 0.17.3",
+ "ring 0.17.5",
  "untrusted 0.9.0",
 ]
 
@@ -3805,10 +3787,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
 
 [[package]]
-name = "windows"
-version = "0.48.0"
+name = "windows-core"
+version = "0.51.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f"
+checksum = "f1f8cf84f35d2db49a46868f947758c7a1138116f7fac3bc844f43ade1292e64"
 dependencies = [
  "windows-targets",
 ]
@@ -3904,6 +3886,26 @@ dependencies = [
  "lzma-sys",
 ]
 
+[[package]]
+name = "zerocopy"
+version = "0.7.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dd66a62464e3ffd4e37bd09950c2b9dd6c4f8767380fabba0d523f9a775bc85a"
+dependencies = [
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.7.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "255c4596d41e6916ced49cfafea18727b24d67878fa180ddfd69b9df34fd1726"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.38",
+]
+
 [[package]]
 name = "zeroize"
 version = "1.6.0"
diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml
index 8d504f834bc5d..57691520a401b 100644
--- a/datafusion-examples/Cargo.toml
+++ b/datafusion-examples/Cargo.toml
@@ -20,9 +20,9 @@ name = "datafusion-examples"
 description = "DataFusion usage examples"
 keywords = ["arrow", "query", "sql"]
 publish = false
+readme = "README.md"
 version = { workspace = true }
 edition = { workspace = true }
-readme = { workspace = true }
 homepage = { workspace = true }
 repository = { workspace = true }
 license = { workspace = true }
@@ -33,26 +33,26 @@ rust-version = { workspace = true }
 arrow = { workspace = true }
 arrow-flight = { workspace = true }
 arrow-schema = { workspace = true }
-async-trait = "0.1.41"
-bytes = "1.4"
-dashmap = "5.4"
+async-trait = { workspace = true }
+bytes = { workspace = true }
+dashmap = { workspace = true }
 datafusion = { path = "../datafusion/core", features = ["avro"] }
 datafusion-common = { path = "../datafusion/common" }
 datafusion-expr = { path = "../datafusion/expr" }
 datafusion-optimizer = { path = "../datafusion/optimizer" }
 datafusion-sql = { path = "../datafusion/sql" }
-env_logger = "0.10"
-futures = "0.3"
-log = "0.4"
+env_logger = { workspace = true }
+futures = { workspace = true }
+log = { workspace = true }
 mimalloc = { version = "0.1", default-features = false }
-num_cpus = "1.13.0"
+num_cpus = { workspace = true }
 object_store = { version = "0.7.0", features = ["aws", "http"] }
 prost = { version = "0.12", default-features = false }
 prost-derive = { version = "0.11", default-features = false }
 serde = { version = "1.0.136", features = ["derive"] }
-serde_json = "1.0.82"
-tempfile = "3"
+serde_json = { workspace = true }
+tempfile = { workspace = true }
 tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync", "parking_lot"] }
 tonic = "0.10"
-url = "2.2"
+url = { workspace = true }
 uuid = "1.2"
diff --git a/datafusion-examples/examples/dataframe.rs b/datafusion-examples/examples/dataframe.rs
index 26fddcd226a98..ea01c53b1c624 100644
--- a/datafusion-examples/examples/dataframe.rs
+++ b/datafusion-examples/examples/dataframe.rs
@@ -18,7 +18,9 @@
 use datafusion::arrow::datatypes::{DataType, Field, Schema};
 use datafusion::error::Result;
 use datafusion::prelude::*;
-use std::fs;
+use std::fs::File;
+use std::io::Write;
+use tempfile::tempdir;
 
 /// This example demonstrates executing a simple query against an Arrow data source (Parquet) and
 /// fetching results, using the DataFrame trait
@@ -41,12 +43,19 @@ async fn main() -> Result<()> {
     // print the results
     df.show().await?;
 
+    // create a csv file waiting to be written
+    let dir = tempdir()?;
+    let file_path = dir.path().join("example.csv");
+    let file = File::create(&file_path)?;
+    write_csv_file(file);
+
     // Reading CSV file with inferred schema example
-    let csv_df = example_read_csv_file_with_inferred_schema().await;
+    let csv_df =
+        example_read_csv_file_with_inferred_schema(file_path.to_str().unwrap()).await;
     csv_df.show().await?;
 
     // Reading CSV file with defined schema
-    let csv_df = example_read_csv_file_with_schema().await;
+    let csv_df = example_read_csv_file_with_schema(file_path.to_str().unwrap()).await;
     csv_df.show().await?;
 
     // Reading PARQUET file and print describe
@@ -59,31 +68,28 @@ async fn main() -> Result<()> {
 }
 
 // Function to create an test CSV file
-fn create_csv_file(path: String) {
+fn write_csv_file(mut file: File) {
     // Create the data to put into the csv file with headers
     let content = r#"id,time,vote,unixtime,rating
 a1,"10 6, 2013",3,1381017600,5.0
 a2,"08 9, 2013",2,1376006400,4.5"#;
     // write the data
-    fs::write(path, content).expect("Problem with writing file!");
+    file.write_all(content.as_ref())
+        .expect("Problem with writing file!");
 }
 
 // Example to read data from a csv file with inferred schema
-async fn example_read_csv_file_with_inferred_schema() -> DataFrame {
-    let path = "example.csv";
-    // Create a csv file using the predefined function
-    create_csv_file(path.to_string());
+async fn example_read_csv_file_with_inferred_schema(file_path: &str) -> DataFrame {
     // Create a session context
     let ctx = SessionContext::new();
     // Register a lazy DataFrame using the context
-    ctx.read_csv(path, CsvReadOptions::default()).await.unwrap()
+    ctx.read_csv(file_path, CsvReadOptions::default())
+        .await
+        .unwrap()
 }
 
 // Example to read csv file with a defined schema for the csv file
-async fn example_read_csv_file_with_schema() -> DataFrame {
-    let path = "example.csv";
-    // Create a csv file using the predefined function
-    create_csv_file(path.to_string());
+async fn example_read_csv_file_with_schema(file_path: &str) -> DataFrame {
     // Create a session context
     let ctx = SessionContext::new();
     // Define the schema
@@ -101,5 +107,5 @@ async fn example_read_csv_file_with_schema() -> DataFrame {
         ..Default::default()
     };
     // Register a lazy DataFrame by using the context and option provider
-    ctx.read_csv(path, csv_read_option).await.unwrap()
+    ctx.read_csv(file_path, csv_read_option).await.unwrap()
 }
diff --git a/datafusion-examples/examples/dataframe_subquery.rs b/datafusion-examples/examples/dataframe_subquery.rs
index 94049e59b3ab8..9fb61008b9f69 100644
--- a/datafusion-examples/examples/dataframe_subquery.rs
+++ b/datafusion-examples/examples/dataframe_subquery.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use arrow_schema::DataType;
 use std::sync::Arc;
 
 use datafusion::error::Result;
@@ -38,7 +39,7 @@ async fn main() -> Result<()> {
     Ok(())
 }
 
-//select c1,c2 from t1 where (select avg(t2.c2) from t2 where t1.c1 = t2.c1)>0 limit 10;
+//select c1,c2 from t1 where (select avg(t2.c2) from t2 where t1.c1 = t2.c1)>0 limit 3;
 async fn where_scalar_subquery(ctx: &SessionContext) -> Result<()> {
     ctx.table("t1")
         .await?
@@ -46,7 +47,7 @@ async fn where_scalar_subquery(ctx: &SessionContext) -> Result<()> {
             scalar_subquery(Arc::new(
                 ctx.table("t2")
                     .await?
-                    .filter(col("t1.c1").eq(col("t2.c1")))?
+                    .filter(out_ref_col(DataType::Utf8, "t1.c1").eq(col("t2.c1")))?
                     .aggregate(vec![], vec![avg(col("t2.c2"))])?
                     .select(vec![avg(col("t2.c2"))])?
                     .into_unoptimized_plan(),
@@ -60,7 +61,7 @@ async fn where_scalar_subquery(ctx: &SessionContext) -> Result<()> {
     Ok(())
 }
 
-//SELECT t1.c1, t1.c2 FROM t1 WHERE t1.c2 in (select max(t2.c2) from t2 where t2.c1 > 0 ) limit 10
+//SELECT t1.c1, t1.c2 FROM t1 WHERE t1.c2 in (select max(t2.c2) from t2 where t2.c1 > 0 ) limit 3;
 async fn where_in_subquery(ctx: &SessionContext) -> Result<()> {
     ctx.table("t1")
         .await?
@@ -82,14 +83,14 @@ async fn where_in_subquery(ctx: &SessionContext) -> Result<()> {
     Ok(())
 }
 
-//SELECT t1.c1, t1.c2 FROM t1 WHERE EXISTS (select t2.c2 from t2 where t1.c1 = t2.c1) limit 10
+//SELECT t1.c1, t1.c2 FROM t1 WHERE EXISTS (select t2.c2 from t2 where t1.c1 = t2.c1) limit 3;
 async fn where_exist_subquery(ctx: &SessionContext) -> Result<()> {
     ctx.table("t1")
         .await?
         .filter(exists(Arc::new(
             ctx.table("t2")
                 .await?
-                .filter(col("t1.c1").eq(col("t2.c1")))?
+                .filter(out_ref_col(DataType::Utf8, "t1.c1").eq(col("t2.c1")))?
                 .select(vec![col("t2.c2")])?
                 .into_unoptimized_plan(),
         )))?
diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml
index 490fbeacad859..d04db86b78301 100644
--- a/datafusion/common/Cargo.toml
+++ b/datafusion/common/Cargo.toml
@@ -19,9 +19,9 @@
 name = "datafusion-common"
 description = "Common functionality for DataFusion query engine"
 keywords = ["arrow", "query", "sql"]
+readme = "README.md"
 version = { workspace = true }
 edition = { workspace = true }
-readme = { workspace = true }
 homepage = { workspace = true }
 repository = { workspace = true }
 license = { workspace = true }
@@ -46,7 +46,7 @@ arrow-buffer = { workspace = true }
 arrow-schema = { workspace = true }
 chrono = { workspace = true }
 half = { version = "2.1", default-features = false }
-num_cpus = "1.13.0"
+num_cpus = { workspace = true }
 object_store = { version = "0.7.0", default-features = false, optional = true }
 parquet = { workspace = true, optional = true }
 pyo3 = { version = "0.20.0", optional = true }
diff --git a/datafusion/common/README.md b/datafusion/common/README.md
index 9bccf3f18b7f4..524ab4420d2a8 100644
--- a/datafusion/common/README.md
+++ b/datafusion/common/README.md
@@ -19,7 +19,7 @@
 
 # DataFusion Common
 
-[DataFusion](df) is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format.
+[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format.
 
 This crate is a submodule of DataFusion that provides common data types and utilities.
 
diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs
index be24e2b933b5f..b3c11740abf9e 100644
--- a/datafusion/common/src/scalar.rs
+++ b/datafusion/common/src/scalar.rs
@@ -600,117 +600,6 @@ macro_rules! typed_cast {
     }};
 }
 
-macro_rules! build_timestamp_list {
-    ($TIME_UNIT:expr, $TIME_ZONE:expr, $VALUES:expr, $SIZE:expr) => {{
-        match $VALUES {
-            // the return on the macro is necessary, to short-circuit and return ArrayRef
-            None => {
-                return new_null_array(
-                    &DataType::List(Arc::new(Field::new(
-                        "item",
-                        DataType::Timestamp($TIME_UNIT, $TIME_ZONE),
-                        true,
-                    ))),
-                    $SIZE,
-                )
-            }
-            Some(values) => match $TIME_UNIT {
-                TimeUnit::Second => {
-                    build_values_list_tz!(
-                        TimestampSecondBuilder,
-                        TimestampSecond,
-                        values,
-                        $SIZE,
-                        $TIME_ZONE
-                    )
-                }
-                TimeUnit::Millisecond => build_values_list_tz!(
-                    TimestampMillisecondBuilder,
-                    TimestampMillisecond,
-                    values,
-                    $SIZE,
-                    $TIME_ZONE
-                ),
-                TimeUnit::Microsecond => build_values_list_tz!(
-                    TimestampMicrosecondBuilder,
-                    TimestampMicrosecond,
-                    values,
-                    $SIZE,
-                    $TIME_ZONE
-                ),
-                TimeUnit::Nanosecond => build_values_list_tz!(
-                    TimestampNanosecondBuilder,
-                    TimestampNanosecond,
-                    values,
-                    $SIZE,
-                    $TIME_ZONE
-                ),
-            },
-        }
-    }};
-}
-
-macro_rules! new_builder {
-    (StringBuilder, $len:expr) => {
-        StringBuilder::new()
-    };
-    (LargeStringBuilder, $len:expr) => {
-        LargeStringBuilder::new()
-    };
-    ($el:ident, $len:expr) => {{
-        <$el>::with_capacity($len)
-    }};
-}
-
-macro_rules! build_values_list {
-    ($VALUE_BUILDER_TY:ident, $SCALAR_TY:ident, $VALUES:expr, $SIZE:expr) => {{
-        let builder = new_builder!($VALUE_BUILDER_TY, $VALUES.len());
-        let mut builder = ListBuilder::new(builder);
-
-        for _ in 0..$SIZE {
-            for scalar_value in $VALUES {
-                match scalar_value {
-                    ScalarValue::$SCALAR_TY(Some(v)) => {
-                        builder.values().append_value(v.clone());
-                    }
-                    ScalarValue::$SCALAR_TY(None) => {
-                        builder.values().append_null();
-                    }
-                    _ => panic!("Incompatible ScalarValue for list"),
-                };
-            }
-            builder.append(true);
-        }
-
-        builder.finish()
-    }};
-}
-
-macro_rules! build_values_list_tz {
-    ($VALUE_BUILDER_TY:ident, $SCALAR_TY:ident, $VALUES:expr, $SIZE:expr, $TIME_ZONE:expr) => {{
-        let mut builder = ListBuilder::new(
-            $VALUE_BUILDER_TY::with_capacity($VALUES.len()).with_timezone_opt($TIME_ZONE),
-        );
-
-        for _ in 0..$SIZE {
-            for scalar_value in $VALUES {
-                match scalar_value {
-                    ScalarValue::$SCALAR_TY(Some(v), _) => {
-                        builder.values().append_value(v.clone());
-                    }
-                    ScalarValue::$SCALAR_TY(None, _) => {
-                        builder.values().append_null();
-                    }
-                    _ => panic!("Incompatible ScalarValue for list"),
-                };
-            }
-            builder.append(true);
-        }
-
-        builder.finish()
-    }};
-}
-
 macro_rules! build_array_from_option {
     ($DATA_TYPE:ident, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {{
         match $EXPR {
@@ -1198,7 +1087,8 @@ impl ScalarValue {
     }
 
     /// Converts an iterator of references [`ScalarValue`] into an [`ArrayRef`]
-    /// corresponding to those values. For example,
+    /// corresponding to those values. For example, an iterator of
+    /// [`ScalarValue::Int32`] would be converted to an [`Int32Array`].
     ///
     /// Returns an error if the iterator is empty or if the
     /// [`ScalarValue`]s are not all the same type
@@ -1312,10 +1202,11 @@ impl ScalarValue {
                 Arc::new(ListArray::from_iter_primitive::<$ARRAY_TY, _, _>(
                     scalars.into_iter().map(|x| match x {
                         ScalarValue::List(arr) => {
-                            if arr.as_any().downcast_ref::<NullArray>().is_some() {
+                            // `ScalarValue::List` contains a single element `ListArray`.
+                            let list_arr = as_list_array(&arr);
+                            if list_arr.is_null(0) {
                                 None
                             } else {
-                                let list_arr = as_list_array(&arr);
                                 let primitive_arr =
                                     list_arr.values().as_primitive::<$ARRAY_TY>();
                                 Some(
@@ -1339,12 +1230,14 @@ impl ScalarValue {
                 for scalar in scalars.into_iter() {
                     match scalar {
                         ScalarValue::List(arr) => {
-                            if arr.as_any().downcast_ref::<NullArray>().is_some() {
+                            // `ScalarValue::List` contains a single element `ListArray`.
+                            let list_arr = as_list_array(&arr);
+
+                            if list_arr.is_null(0) {
                                 builder.append(false);
                                 continue;
                             }
 
-                            let list_arr = as_list_array(&arr);
                             let string_arr = $STRING_ARRAY(list_arr.values());
 
                             for v in string_arr.iter() {
@@ -1654,41 +1547,6 @@ impl ScalarValue {
         Ok(array)
     }
 
-    /// This function does not contains nulls but empty array instead.
-    fn iter_to_array_list_without_nulls(
-        values: &[ScalarValue],
-        data_type: &DataType,
-    ) -> Result<GenericListArray<i32>> {
-        let mut elements: Vec<ArrayRef> = vec![];
-        let mut offsets = vec![];
-
-        if values.is_empty() {
-            offsets.push(0);
-        } else {
-            let arr = ScalarValue::iter_to_array(values.to_vec())?;
-            offsets.push(arr.len());
-            elements.push(arr);
-        }
-
-        // Concatenate element arrays to create single flat array
-        let flat_array = if elements.is_empty() {
-            new_empty_array(data_type)
-        } else {
-            let element_arrays: Vec<&dyn Array> =
-                elements.iter().map(|a| a.as_ref()).collect();
-            arrow::compute::concat(&element_arrays)?
-        };
-
-        let list_array = ListArray::new(
-            Arc::new(Field::new("item", flat_array.data_type().to_owned(), true)),
-            OffsetBuffer::<i32>::from_lengths(offsets),
-            flat_array,
-            None,
-        );
-
-        Ok(list_array)
-    }
-
     /// This function build with nulls with nulls buffer.
     fn iter_to_array_list(
         scalars: impl IntoIterator<Item = ScalarValue>,
@@ -1699,15 +1557,16 @@ impl ScalarValue {
 
         for scalar in scalars {
             if let ScalarValue::List(arr) = scalar {
-                // i.e. NullArray(1)
-                if arr.as_any().downcast_ref::<NullArray>().is_some() {
+                // `ScalarValue::List` contains a single element `ListArray`.
+                let list_arr = as_list_array(&arr);
+
+                if list_arr.is_null(0) {
                     // Repeat previous offset index
                     offsets.push(0);
 
                     // Element is null
                     valid.append(false);
                 } else {
-                    let list_arr = as_list_array(&arr);
                     let arr = list_arr.values().to_owned();
                     offsets.push(arr.len());
                     elements.push(arr);
@@ -1776,7 +1635,8 @@ impl ScalarValue {
             .unwrap()
     }
 
-    /// Converts `Vec<ScalaValue>` to ListArray, simplified version of ScalarValue::to_array
+    /// Converts `Vec<ScalarValue>` where each element has type corresponding to
+    /// `data_type`, to a [`ListArray`].
     ///
     /// Example
     /// ```
@@ -1802,52 +1662,12 @@ impl ScalarValue {
     /// assert_eq!(result, &expected);
     /// ```
     pub fn new_list(values: &[ScalarValue], data_type: &DataType) -> ArrayRef {
-        Arc::new(match data_type {
-            DataType::Boolean => build_values_list!(BooleanBuilder, Boolean, values, 1),
-            DataType::Int8 => build_values_list!(Int8Builder, Int8, values, 1),
-            DataType::Int16 => build_values_list!(Int16Builder, Int16, values, 1),
-            DataType::Int32 => build_values_list!(Int32Builder, Int32, values, 1),
-            DataType::Int64 => build_values_list!(Int64Builder, Int64, values, 1),
-            DataType::UInt8 => build_values_list!(UInt8Builder, UInt8, values, 1),
-            DataType::UInt16 => build_values_list!(UInt16Builder, UInt16, values, 1),
-            DataType::UInt32 => build_values_list!(UInt32Builder, UInt32, values, 1),
-            DataType::UInt64 => build_values_list!(UInt64Builder, UInt64, values, 1),
-            DataType::Utf8 => build_values_list!(StringBuilder, Utf8, values, 1),
-            DataType::LargeUtf8 => {
-                build_values_list!(LargeStringBuilder, LargeUtf8, values, 1)
-            }
-            DataType::Float32 => build_values_list!(Float32Builder, Float32, values, 1),
-            DataType::Float64 => build_values_list!(Float64Builder, Float64, values, 1),
-            DataType::Timestamp(unit, tz) => {
-                let values = Some(values);
-                build_timestamp_list!(unit.clone(), tz.clone(), values, 1)
-            }
-            DataType::List(_) | DataType::Struct(_) => {
-                ScalarValue::iter_to_array_list_without_nulls(values, data_type).unwrap()
-            }
-            DataType::Decimal128(precision, scale) => {
-                let mut vals = vec![];
-                for value in values.iter() {
-                    if let ScalarValue::Decimal128(v, _, _) = value {
-                        vals.push(v.to_owned())
-                    }
-                }
-
-                let arr = Decimal128Array::from(vals)
-                    .with_precision_and_scale(*precision, *scale)
-                    .unwrap();
-                wrap_into_list_array(Arc::new(arr))
-            }
-
-            DataType::Null => {
-                let arr = new_null_array(&DataType::Null, values.len());
-                wrap_into_list_array(arr)
-            }
-            _ => panic!(
-                "Unsupported data type {:?} for ScalarValue::list_to_array",
-                data_type
-            ),
-        })
+        let values = if values.is_empty() {
+            new_empty_array(data_type)
+        } else {
+            Self::iter_to_array(values.iter().cloned()).unwrap()
+        };
+        Arc::new(wrap_into_list_array(values))
     }
 
     /// Converts a scalar value into an array of `size` rows.
@@ -2234,28 +2054,20 @@ impl ScalarValue {
             }
             DataType::Utf8 => typed_cast!(array, index, StringArray, Utf8),
             DataType::LargeUtf8 => typed_cast!(array, index, LargeStringArray, LargeUtf8),
-            DataType::List(nested_type) => {
+            DataType::List(_) => {
                 let list_array = as_list_array(array);
-                let arr = match list_array.is_null(index) {
-                    true => new_null_array(nested_type.data_type(), 0),
-                    false => {
-                        let nested_array = list_array.value(index);
-                        Arc::new(wrap_into_list_array(nested_array))
-                    }
-                };
+                let nested_array = list_array.value(index);
+                // Produces a single element `ListArray` with the value at `index`.
+                let arr = Arc::new(wrap_into_list_array(nested_array));
 
                 ScalarValue::List(arr)
             }
             // TODO: There is no test for FixedSizeList now, add it later
-            DataType::FixedSizeList(nested_type, _len) => {
+            DataType::FixedSizeList(_, _) => {
                 let list_array = as_fixed_size_list_array(array)?;
-                let arr = match list_array.is_null(index) {
-                    true => new_null_array(nested_type.data_type(), 0),
-                    false => {
-                        let nested_array = list_array.value(index);
-                        Arc::new(wrap_into_list_array(nested_array))
-                    }
-                };
+                let nested_array = list_array.value(index);
+                // Produces a single element `ListArray` with the value at `index`.
+                let arr = Arc::new(wrap_into_list_array(nested_array));
 
                 ScalarValue::List(arr)
             }
@@ -2944,8 +2756,15 @@ impl TryFrom<&DataType> for ScalarValue {
                 index_type.clone(),
                 Box::new(value_type.as_ref().try_into()?),
             ),
-            DataType::List(_) => ScalarValue::List(new_null_array(&DataType::Null, 0)),
-
+            // `ScalaValue::List` contains single element `ListArray`.
+            DataType::List(field) => ScalarValue::List(new_null_array(
+                &DataType::List(Arc::new(Field::new(
+                    "item",
+                    field.data_type().clone(),
+                    true,
+                ))),
+                1,
+            )),
             DataType::Struct(fields) => ScalarValue::Struct(None, fields.clone()),
             DataType::Null => ScalarValue::Null,
             _ => {
@@ -3885,6 +3704,78 @@ mod tests {
         );
     }
 
+    #[test]
+    fn scalar_try_from_array_list_array_null() {
+        let list = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
+            Some(vec![Some(1), Some(2)]),
+            None,
+        ]);
+
+        let non_null_list_scalar = ScalarValue::try_from_array(&list, 0).unwrap();
+        let null_list_scalar = ScalarValue::try_from_array(&list, 1).unwrap();
+
+        let data_type =
+            DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
+
+        assert_eq!(non_null_list_scalar.data_type(), data_type.clone());
+        assert_eq!(null_list_scalar.data_type(), data_type);
+    }
+
+    #[test]
+    fn scalar_try_from_list() {
+        let data_type =
+            DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
+        let data_type = &data_type;
+        let scalar: ScalarValue = data_type.try_into().unwrap();
+
+        let expected = ScalarValue::List(new_null_array(
+            &DataType::List(Arc::new(Field::new("item", DataType::Int32, true))),
+            1,
+        ));
+
+        assert_eq!(expected, scalar)
+    }
+
+    #[test]
+    fn scalar_try_from_list_of_list() {
+        let data_type = DataType::List(Arc::new(Field::new(
+            "item",
+            DataType::List(Arc::new(Field::new("item", DataType::Int32, true))),
+            true,
+        )));
+        let data_type = &data_type;
+        let scalar: ScalarValue = data_type.try_into().unwrap();
+
+        let expected = ScalarValue::List(new_null_array(
+            &DataType::List(Arc::new(Field::new(
+                "item",
+                DataType::List(Arc::new(Field::new("item", DataType::Int32, true))),
+                true,
+            ))),
+            1,
+        ));
+
+        assert_eq!(expected, scalar)
+    }
+
+    #[test]
+    fn scalar_try_from_not_equal_list_nested_list() {
+        let list_data_type =
+            DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
+        let data_type = &list_data_type;
+        let list_scalar: ScalarValue = data_type.try_into().unwrap();
+
+        let nested_list_data_type = DataType::List(Arc::new(Field::new(
+            "item",
+            DataType::List(Arc::new(Field::new("item", DataType::Int32, true))),
+            true,
+        )));
+        let data_type = &nested_list_data_type;
+        let nested_list_scalar: ScalarValue = data_type.try_into().unwrap();
+
+        assert_ne!(list_scalar, nested_list_scalar);
+    }
+
     #[test]
     fn scalar_try_from_dict_datatype() {
         let data_type =
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index 5f9d28bd620bb..4015ba439e67d 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -20,9 +20,9 @@ name = "datafusion"
 description = "DataFusion is an in-memory query engine that uses Apache Arrow as the memory model"
 keywords = ["arrow", "query", "sql"]
 include = ["benches/*.rs", "src/**/*.rs", "Cargo.toml"]
+readme = "README.md"
 version = { workspace = true }
 edition = { workspace = true }
-readme = { workspace = true }
 homepage = { workspace = true }
 repository = { workspace = true }
 license = { workspace = true }
@@ -57,62 +57,61 @@ arrow = { workspace = true }
 arrow-array = { workspace = true }
 arrow-schema = { workspace = true }
 async-compression = { version = "0.4.0", features = ["bzip2", "gzip", "xz", "zstd", "futures-io", "tokio"], optional = true }
-async-trait = "0.1.73"
-bytes = "1.4"
+async-trait = { workspace = true }
+bytes = { workspace = true }
 bzip2 = { version = "0.4.3", optional = true }
 chrono = { workspace = true }
-dashmap = "5.4.0"
+dashmap = { workspace = true }
 datafusion-common = { path = "../common", version = "32.0.0", features = ["object_store"], default-features = false }
-datafusion-execution = { path = "../execution", version = "32.0.0" }
-datafusion-expr = { path = "../expr", version = "32.0.0" }
+datafusion-execution = { workspace = true }
+datafusion-expr = { workspace = true }
 datafusion-optimizer = { path = "../optimizer", version = "32.0.0", default-features = false }
 datafusion-physical-expr = { path = "../physical-expr", version = "32.0.0", default-features = false }
-datafusion-physical-plan = { path = "../physical-plan", version = "32.0.0", default-features = false }
-datafusion-sql = { path = "../sql", version = "32.0.0" }
+datafusion-physical-plan = { workspace = true }
+datafusion-sql = { workspace = true }
 flate2 = { version = "1.0.24", optional = true }
-futures = "0.3"
+futures = { workspace = true }
 glob = "0.3.0"
 half = { version = "2.1", default-features = false }
 hashbrown = { version = "0.14", features = ["raw"] }
-indexmap = "2.0.0"
-itertools = "0.11"
-log = "^0.4"
+indexmap = { workspace = true }
+itertools = { workspace = true }
+log = { workspace = true }
 num-traits = { version = "0.2", optional = true }
-num_cpus = "1.13.0"
-object_store = "0.7.0"
-parking_lot = "0.12"
+num_cpus = { workspace = true }
+object_store = { workspace = true }
+parking_lot = { workspace = true }
 parquet = { workspace = true, optional = true }
-percent-encoding = "2.2.0"
 pin-project-lite = "^0.2.7"
-rand = "0.8"
+rand = { workspace = true }
 sqlparser = { workspace = true }
-tempfile = "3"
+tempfile = { workspace = true }
 tokio = { version = "1.28", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] }
 tokio-util = { version = "0.7.4", features = ["io"] }
-url = "2.2"
+url = { workspace = true }
 uuid = { version = "1.0", features = ["v4"] }
 xz2 = { version = "0.1", optional = true }
 zstd = { version = "0.13", optional = true, default-features = false }
 
 [dev-dependencies]
-async-trait = "0.1.53"
-bigdecimal = "0.4.1"
+async-trait = { workspace = true }
+bigdecimal = { workspace = true }
 criterion = { version = "0.5", features = ["async_tokio"] }
 csv = "1.1.6"
-ctor = "0.2.0"
-doc-comment = "0.3"
-env_logger = "0.10"
-half = "2.2.1"
+ctor = { workspace = true }
+doc-comment = { workspace = true }
+env_logger = { workspace = true }
+half = { workspace = true }
 postgres-protocol = "0.6.4"
 postgres-types = { version = "0.2.4", features = ["derive", "with-chrono-0_4"] }
 rand = { version = "0.8", features = ["small_rng"] }
 rand_distr = "0.4.3"
 regex = "1.5.4"
-rstest = "0.18.0"
+rstest = { workspace = true }
 rust_decimal = { version = "1.27.0", features = ["tokio-pg"] }
-serde_json = "1"
+serde_json = { workspace = true }
 test-utils = { path = "../../test-utils" }
-thiserror = "1.0.37"
+thiserror = { workspace = true }
 tokio-postgres = "0.7.7"
 [target.'cfg(not(target_os = "windows"))'.dev-dependencies]
 nix = { version = "0.27.1", features = ["fs"] }
diff --git a/datafusion/core/README.md b/datafusion/core/README.md
new file mode 100644
index 0000000000000..5a9493d086cd1
--- /dev/null
+++ b/datafusion/core/README.md
@@ -0,0 +1,26 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# DataFusion Common
+
+[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format.
+
+This crate contains the main entrypoints and high level DataFusion APIs such as SessionContext, and DataFrame and ListingTable.
+
+[df]: https://crates.io/crates/datafusion
diff --git a/datafusion/core/src/catalog/mod.rs b/datafusion/core/src/catalog/mod.rs
index fe5bdc0ec6a9e..ce27d57da00d8 100644
--- a/datafusion/core/src/catalog/mod.rs
+++ b/datafusion/core/src/catalog/mod.rs
@@ -93,12 +93,6 @@ impl CatalogList for MemoryCatalogList {
     }
 }
 
-impl Default for MemoryCatalogProvider {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
 /// Represents a catalog, comprising a number of named schemas.
 pub trait CatalogProvider: Sync + Send {
     /// Returns the catalog provider as [`Any`]
@@ -161,6 +155,12 @@ impl MemoryCatalogProvider {
     }
 }
 
+impl Default for MemoryCatalogProvider {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl CatalogProvider for MemoryCatalogProvider {
     fn as_any(&self) -> &dyn Any {
         self
diff --git a/datafusion/core/src/datasource/listing/url.rs b/datafusion/core/src/datasource/listing/url.rs
index 4d1ca4853a735..9197e37adbd5d 100644
--- a/datafusion/core/src/datasource/listing/url.rs
+++ b/datafusion/core/src/datasource/listing/url.rs
@@ -27,7 +27,6 @@ use itertools::Itertools;
 use log::debug;
 use object_store::path::Path;
 use object_store::{ObjectMeta, ObjectStore};
-use percent_encoding;
 use std::sync::Arc;
 use url::Url;
 
@@ -46,6 +45,16 @@ pub struct ListingTableUrl {
 impl ListingTableUrl {
     /// Parse a provided string as a `ListingTableUrl`
     ///
+    /// # URL Encoding
+    ///
+    /// URL paths are expected to be URL-encoded. That is, the URL for a file named `bar%2Efoo`
+    /// would be `file:///bar%252Efoo`, as per the [URL] specification.
+    ///
+    /// It should be noted that some tools, such as the AWS CLI, take a different approach and
+    /// instead interpret the URL path verbatim. For example the object `bar%2Efoo` would be
+    /// addressed as `s3://BUCKET/bar%252Efoo` using [`ListingTableUrl`] but `s3://BUCKET/bar%2Efoo`
+    /// when using the aws-cli.
+    ///
     /// # Paths without a Scheme
     ///
     /// If no scheme is provided, or the string is an absolute filesystem path
@@ -77,6 +86,7 @@ impl ListingTableUrl {
     /// filter when listing files from object storage
     ///
     /// [file URI]: https://en.wikipedia.org/wiki/File_URI_scheme
+    /// [URL]: https://url.spec.whatwg.org/
     pub fn parse(s: impl AsRef<str>) -> Result<Self> {
         let s = s.as_ref();
 
@@ -86,7 +96,7 @@ impl ListingTableUrl {
         }
 
         match Url::parse(s) {
-            Ok(url) => Ok(Self::new(url, None)),
+            Ok(url) => Self::try_new(url, None),
             Err(url::ParseError::RelativeUrlWithoutBase) => Self::parse_path(s),
             Err(e) => Err(DataFusionError::External(Box::new(e))),
         }
@@ -138,15 +148,13 @@ impl ListingTableUrl {
         .map_err(|_| DataFusionError::Internal(format!("Can not open path: {s}")))?;
         // TODO: Currently we do not have an IO-related error variant that accepts ()
         //       or a string. Once we have such a variant, change the error type above.
-        Ok(Self::new(url, glob))
+        Self::try_new(url, glob)
     }
 
     /// Creates a new [`ListingTableUrl`] from a url and optional glob expression
-    fn new(url: Url, glob: Option<Pattern>) -> Self {
-        let decoded_path =
-            percent_encoding::percent_decode_str(url.path()).decode_utf8_lossy();
-        let prefix = Path::from(decoded_path.as_ref());
-        Self { url, prefix, glob }
+    fn try_new(url: Url, glob: Option<Pattern>) -> Result<Self> {
+        let prefix = Path::from_url_path(url.path())?;
+        Ok(Self { url, prefix, glob })
     }
 
     /// Returns the URL scheme
@@ -286,6 +294,7 @@ fn split_glob_expression(path: &str) -> Option<(&str, &str)> {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use tempfile::tempdir;
 
     #[test]
     fn test_prefix_path() {
@@ -317,8 +326,27 @@ mod tests {
         let url = ListingTableUrl::parse("file:///foo/bar?").unwrap();
         assert_eq!(url.prefix.as_ref(), "foo/bar");
 
-        let url = ListingTableUrl::parse("file:///foo/😺").unwrap();
-        assert_eq!(url.prefix.as_ref(), "foo/%F0%9F%98%BA");
+        let err = ListingTableUrl::parse("file:///foo/😺").unwrap_err();
+        assert_eq!(err.to_string(), "Object Store error: Encountered object with invalid path: Error parsing Path \"/foo/😺\": Encountered illegal character sequence \"😺\" whilst parsing path segment \"😺\"");
+
+        let url = ListingTableUrl::parse("file:///foo/bar%2Efoo").unwrap();
+        assert_eq!(url.prefix.as_ref(), "foo/bar.foo");
+
+        let url = ListingTableUrl::parse("file:///foo/bar%2Efoo").unwrap();
+        assert_eq!(url.prefix.as_ref(), "foo/bar.foo");
+
+        let url = ListingTableUrl::parse("file:///foo/bar%252Ffoo").unwrap();
+        assert_eq!(url.prefix.as_ref(), "foo/bar%2Ffoo");
+
+        let url = ListingTableUrl::parse("file:///foo/a%252Fb.txt").unwrap();
+        assert_eq!(url.prefix.as_ref(), "foo/a%2Fb.txt");
+
+        let dir = tempdir().unwrap();
+        let path = dir.path().join("bar%2Ffoo");
+        std::fs::File::create(&path).unwrap();
+
+        let url = ListingTableUrl::parse(path.to_str().unwrap()).unwrap();
+        assert!(url.prefix.as_ref().ends_with("bar%2Ffoo"), "{}", url.prefix);
     }
 
     #[test]
diff --git a/datafusion/core/src/datasource/physical_plan/csv.rs b/datafusion/core/src/datasource/physical_plan/csv.rs
index 8117e101ea99d..82163da64af80 100644
--- a/datafusion/core/src/datasource/physical_plan/csv.rs
+++ b/datafusion/core/src/datasource/physical_plan/csv.rs
@@ -46,6 +46,7 @@ use datafusion_physical_expr::{
 };
 
 use bytes::{Buf, Bytes};
+use datafusion_common::config::ConfigOptions;
 use futures::{ready, StreamExt, TryStreamExt};
 use object_store::{GetOptions, GetResultPayload, ObjectStore};
 use tokio::io::AsyncWriteExt;
@@ -117,34 +118,6 @@ impl CsvExec {
     pub fn escape(&self) -> Option<u8> {
         self.escape
     }
-
-    /// Redistribute files across partitions according to their size
-    /// See comments on `repartition_file_groups()` for more detail.
-    ///
-    /// Return `None` if can't get repartitioned(empty/compressed file).
-    pub fn get_repartitioned(
-        &self,
-        target_partitions: usize,
-        repartition_file_min_size: usize,
-    ) -> Option<Self> {
-        // Parallel execution on compressed CSV file is not supported yet.
-        if self.file_compression_type.is_compressed() {
-            return None;
-        }
-
-        let repartitioned_file_groups_option = FileScanConfig::repartition_file_groups(
-            self.base_config.file_groups.clone(),
-            target_partitions,
-            repartition_file_min_size,
-        );
-
-        if let Some(repartitioned_file_groups) = repartitioned_file_groups_option {
-            let mut new_plan = self.clone();
-            new_plan.base_config.file_groups = repartitioned_file_groups;
-            return Some(new_plan);
-        }
-        None
-    }
 }
 
 impl DisplayAs for CsvExec {
@@ -205,6 +178,35 @@ impl ExecutionPlan for CsvExec {
         Ok(self)
     }
 
+    /// Redistribute files across partitions according to their size
+    /// See comments on `repartition_file_groups()` for more detail.
+    ///
+    /// Return `None` if can't get repartitioned(empty/compressed file).
+    fn repartitioned(
+        &self,
+        target_partitions: usize,
+        config: &ConfigOptions,
+    ) -> Result<Option<Arc<dyn ExecutionPlan>>> {
+        let repartition_file_min_size = config.optimizer.repartition_file_min_size;
+        // Parallel execution on compressed CSV file is not supported yet.
+        if self.file_compression_type.is_compressed() {
+            return Ok(None);
+        }
+
+        let repartitioned_file_groups_option = FileScanConfig::repartition_file_groups(
+            self.base_config.file_groups.clone(),
+            target_partitions,
+            repartition_file_min_size,
+        );
+
+        if let Some(repartitioned_file_groups) = repartitioned_file_groups_option {
+            let mut new_plan = self.clone();
+            new_plan.base_config.file_groups = repartitioned_file_groups;
+            return Ok(Some(Arc::new(new_plan)));
+        }
+        Ok(None)
+    }
+
     fn execute(
         &self,
         partition: usize,
diff --git a/datafusion/core/src/datasource/physical_plan/mod.rs b/datafusion/core/src/datasource/physical_plan/mod.rs
index 6643e4127dbdc..ea0a9698ff5ca 100644
--- a/datafusion/core/src/datasource/physical_plan/mod.rs
+++ b/datafusion/core/src/datasource/physical_plan/mod.rs
@@ -527,6 +527,7 @@ mod tests {
     };
     use arrow_schema::Field;
     use chrono::Utc;
+    use datafusion_common::config::ConfigOptions;
 
     use crate::physical_plan::{DefaultDisplay, VerboseDisplay};
 
@@ -828,11 +829,7 @@ mod tests {
                 None,
             );
 
-            let partitioned_file = parquet_exec
-                .get_repartitioned(4, 0)
-                .base_config()
-                .file_groups
-                .clone();
+            let partitioned_file = repartition_with_size(&parquet_exec, 4, 0);
 
             assert!(partitioned_file[0][0].range.is_none());
         }
@@ -893,13 +890,8 @@ mod tests {
                         None,
                     );
 
-                    let actual = file_groups_to_vec(
-                        parquet_exec
-                            .get_repartitioned(n_partition, 10)
-                            .base_config()
-                            .file_groups
-                            .clone(),
-                    );
+                    let actual =
+                        repartition_with_size_to_vec(&parquet_exec, n_partition, 10);
 
                     assert_eq!(expected, &actual);
                 }
@@ -927,13 +919,7 @@ mod tests {
                 None,
             );
 
-            let actual = file_groups_to_vec(
-                parquet_exec
-                    .get_repartitioned(4, 10)
-                    .base_config()
-                    .file_groups
-                    .clone(),
-            );
+            let actual = repartition_with_size_to_vec(&parquet_exec, 4, 10);
             let expected = vec![
                 (0, "a".to_string(), 0, 31),
                 (1, "a".to_string(), 31, 62),
@@ -964,13 +950,7 @@ mod tests {
                 None,
             );
 
-            let actual = file_groups_to_vec(
-                parquet_exec
-                    .get_repartitioned(96, 5)
-                    .base_config()
-                    .file_groups
-                    .clone(),
-            );
+            let actual = repartition_with_size_to_vec(&parquet_exec, 96, 5);
             let expected = vec![
                 (0, "a".to_string(), 0, 1),
                 (1, "a".to_string(), 1, 2),
@@ -1007,13 +987,7 @@ mod tests {
                 None,
             );
 
-            let actual = file_groups_to_vec(
-                parquet_exec
-                    .get_repartitioned(3, 10)
-                    .base_config()
-                    .file_groups
-                    .clone(),
-            );
+            let actual = repartition_with_size_to_vec(&parquet_exec, 3, 10);
             let expected = vec![
                 (0, "a".to_string(), 0, 34),
                 (1, "a".to_string(), 34, 40),
@@ -1046,13 +1020,7 @@ mod tests {
                 None,
             );
 
-            let actual = file_groups_to_vec(
-                parquet_exec
-                    .get_repartitioned(2, 10)
-                    .base_config()
-                    .file_groups
-                    .clone(),
-            );
+            let actual = repartition_with_size_to_vec(&parquet_exec, 2, 10);
             let expected = vec![
                 (0, "a".to_string(), 0, 40),
                 (0, "b".to_string(), 0, 10),
@@ -1086,11 +1054,7 @@ mod tests {
                 None,
             );
 
-            let actual = parquet_exec
-                .get_repartitioned(65, 10)
-                .base_config()
-                .file_groups
-                .clone();
+            let actual = repartition_with_size(&parquet_exec, 65, 10);
             assert_eq!(2, actual.len());
         }
 
@@ -1115,17 +1079,47 @@ mod tests {
                 None,
             );
 
-            let actual = parquet_exec
-                .get_repartitioned(65, 500)
+            let actual = repartition_with_size(&parquet_exec, 65, 500);
+            assert_eq!(1, actual.len());
+        }
+
+        /// Calls `ParquetExec.repartitioned` with the  specified
+        /// `target_partitions` and `repartition_file_min_size`, returning the
+        /// resulting `PartitionedFile`s
+        fn repartition_with_size(
+            parquet_exec: &ParquetExec,
+            target_partitions: usize,
+            repartition_file_min_size: usize,
+        ) -> Vec<Vec<PartitionedFile>> {
+            let mut config = ConfigOptions::new();
+            config.optimizer.repartition_file_min_size = repartition_file_min_size;
+
+            parquet_exec
+                .repartitioned(target_partitions, &config)
+                .unwrap() // unwrap Result
+                .unwrap() // unwrap Option
+                .as_any()
+                .downcast_ref::<ParquetExec>()
+                .unwrap()
                 .base_config()
                 .file_groups
-                .clone();
-            assert_eq!(1, actual.len());
+                .clone()
         }
 
-        fn file_groups_to_vec(
-            file_groups: Vec<Vec<PartitionedFile>>,
+        /// Calls `repartition_with_size` and returns a tuple for each output `PartitionedFile`:
+        ///
+        /// `(partition index, file path, start, end)`
+        fn repartition_with_size_to_vec(
+            parquet_exec: &ParquetExec,
+            target_partitions: usize,
+            repartition_file_min_size: usize,
         ) -> Vec<(usize, String, i64, i64)> {
+            let file_groups = repartition_with_size(
+                parquet_exec,
+                target_partitions,
+                repartition_file_min_size,
+            );
+
             file_groups
                 .iter()
                 .enumerate()
diff --git a/datafusion/core/src/datasource/physical_plan/parquet.rs b/datafusion/core/src/datasource/physical_plan/parquet.rs
index 3a2459bec817e..f6e999f60249d 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet.rs
@@ -259,26 +259,6 @@ impl ParquetExec {
         self.enable_bloom_filter
             .unwrap_or(config_options.execution.parquet.bloom_filter_enabled)
     }
-
-    /// Redistribute files across partitions according to their size
-    /// See comments on `get_file_groups_repartitioned()` for more detail.
-    pub fn get_repartitioned(
-        &self,
-        target_partitions: usize,
-        repartition_file_min_size: usize,
-    ) -> Self {
-        let repartitioned_file_groups_option = FileScanConfig::repartition_file_groups(
-            self.base_config.file_groups.clone(),
-            target_partitions,
-            repartition_file_min_size,
-        );
-
-        let mut new_plan = self.clone();
-        if let Some(repartitioned_file_groups) = repartitioned_file_groups_option {
-            new_plan.base_config.file_groups = repartitioned_file_groups;
-        }
-        new_plan
-    }
 }
 
 impl DisplayAs for ParquetExec {
@@ -349,6 +329,27 @@ impl ExecutionPlan for ParquetExec {
         Ok(self)
     }
 
+    /// Redistribute files across partitions according to their size
+    /// See comments on `get_file_groups_repartitioned()` for more detail.
+    fn repartitioned(
+        &self,
+        target_partitions: usize,
+        config: &ConfigOptions,
+    ) -> Result<Option<Arc<dyn ExecutionPlan>>> {
+        let repartition_file_min_size = config.optimizer.repartition_file_min_size;
+        let repartitioned_file_groups_option = FileScanConfig::repartition_file_groups(
+            self.base_config.file_groups.clone(),
+            target_partitions,
+            repartition_file_min_size,
+        );
+
+        let mut new_plan = self.clone();
+        if let Some(repartitioned_file_groups) = repartitioned_file_groups_option {
+            new_plan.base_config.file_groups = repartitioned_file_groups;
+        }
+        Ok(Some(Arc::new(new_plan)))
+    }
+
     fn execute(
         &self,
         partition_index: usize,
diff --git a/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs b/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs
index 838ae613683e8..2c4e929788df9 100644
--- a/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs
+++ b/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs
@@ -93,7 +93,7 @@ impl PhysicalOptimizerRule for CombinePartialFinalAggregate {
                                             input_agg_exec.filter_expr().to_vec(),
                                             input_agg_exec.order_by_expr().to_vec(),
                                             input_agg_exec.input().clone(),
-                                            input_agg_exec.input_schema().clone(),
+                                            input_agg_exec.input_schema(),
                                         )
                                         .ok()
                                         .map(Arc::new)
diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
index 7b91dce32aa94..6de39db7d52af 100644
--- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs
+++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
@@ -26,9 +26,6 @@ use std::fmt::Formatter;
 use std::sync::Arc;
 
 use crate::config::ConfigOptions;
-use crate::datasource::physical_plan::CsvExec;
-#[cfg(feature = "parquet")]
-use crate::datasource::physical_plan::ParquetExec;
 use crate::error::Result;
 use crate::physical_optimizer::utils::{
     add_sort_above, get_children_exectrees, get_plan_string, is_coalesce_partitions,
@@ -554,7 +551,7 @@ fn reorder_aggregate_keys(
                         agg_exec.filter_expr().to_vec(),
                         agg_exec.order_by_expr().to_vec(),
                         partial_agg,
-                        agg_exec.input_schema().clone(),
+                        agg_exec.input_schema(),
                     )?);
 
                     // Need to create a new projection to change the expr ordering back
@@ -1188,7 +1185,6 @@ fn ensure_distribution(
     // When `false`, round robin repartition will not be added to increase parallelism
     let enable_round_robin = config.optimizer.enable_round_robin_repartition;
     let repartition_file_scans = config.optimizer.repartition_file_scans;
-    let repartition_file_min_size = config.optimizer.repartition_file_min_size;
     let batch_size = config.execution.batch_size;
     let is_unbounded = unbounded_output(&dist_context.plan);
     // Use order preserving variants either of the conditions true
@@ -1265,25 +1261,13 @@ fn ensure_distribution(
                 // Unless partitioning doesn't increase the partition count, it is not beneficial:
                 && child.output_partitioning().partition_count() < target_partitions
             {
-                // When `repartition_file_scans` is set, leverage source operators
-                // (`ParquetExec`, `CsvExec` etc.) to increase parallelism at the source.
+                // When `repartition_file_scans` is set, attempt to increase
+                // parallelism at the source.
                 if repartition_file_scans {
-                    #[cfg(feature = "parquet")]
-                    if let Some(parquet_exec) =
-                        child.as_any().downcast_ref::<ParquetExec>()
+                    if let Some(new_child) =
+                        child.repartitioned(target_partitions, config)?
                     {
-                        child = Arc::new(parquet_exec.get_repartitioned(
-                            target_partitions,
-                            repartition_file_min_size,
-                        ));
-                    }
-                    if let Some(csv_exec) = child.as_any().downcast_ref::<CsvExec>() {
-                        if let Some(csv_exec) = csv_exec.get_repartitioned(
-                            target_partitions,
-                            repartition_file_min_size,
-                        ) {
-                            child = Arc::new(csv_exec);
-                        }
+                        child = new_child;
                     }
                 }
                 // Increase parallelism by adding round-robin repartitioning
@@ -1644,8 +1628,8 @@ mod tests {
     use crate::datasource::file_format::file_compression_type::FileCompressionType;
     use crate::datasource::listing::PartitionedFile;
     use crate::datasource::object_store::ObjectStoreUrl;
-    use crate::datasource::physical_plan::FileScanConfig;
     use crate::datasource::physical_plan::ParquetExec;
+    use crate::datasource::physical_plan::{CsvExec, FileScanConfig};
     use crate::physical_optimizer::enforce_sorting::EnforceSorting;
     use crate::physical_optimizer::output_requirements::OutputRequirements;
     use crate::physical_plan::aggregates::{
diff --git a/datafusion/core/src/physical_optimizer/topk_aggregation.rs b/datafusion/core/src/physical_optimizer/topk_aggregation.rs
index 572e796a8ba73..e0a8da82e35fc 100644
--- a/datafusion/core/src/physical_optimizer/topk_aggregation.rs
+++ b/datafusion/core/src/physical_optimizer/topk_aggregation.rs
@@ -75,7 +75,7 @@ impl TopKAggregation {
             aggr.filter_expr().to_vec(),
             aggr.order_by_expr().to_vec(),
             aggr.input().clone(),
-            aggr.input_schema().clone(),
+            aggr.input_schema(),
         )
         .expect("Unable to copy Aggregate!")
         .with_limit(Some(limit));
diff --git a/datafusion/execution/Cargo.toml b/datafusion/execution/Cargo.toml
index 6ae8bccdae38f..e9bb87e9f8ac3 100644
--- a/datafusion/execution/Cargo.toml
+++ b/datafusion/execution/Cargo.toml
@@ -19,9 +19,9 @@
 name = "datafusion-execution"
 description = "Execution configuration support for DataFusion query engine"
 keywords = ["arrow", "query", "sql"]
+readme = "README.md"
 version = { workspace = true }
 edition = { workspace = true }
-readme = { workspace = true }
 homepage = { workspace = true }
 repository = { workspace = true }
 license = { workspace = true }
@@ -35,14 +35,14 @@ path = "src/lib.rs"
 [dependencies]
 arrow = { workspace = true }
 chrono = { version = "0.4", default-features = false }
-dashmap = "5.4.0"
-datafusion-common = { path = "../common", version = "32.0.0" }
-datafusion-expr = { path = "../expr", version = "32.0.0" }
-futures = "0.3"
+dashmap = { workspace = true }
+datafusion-common = { workspace = true }
+datafusion-expr = { workspace = true }
+futures = { workspace = true }
 hashbrown = { version = "0.14", features = ["raw"] }
-log = "^0.4"
-object_store = "0.7.0"
-parking_lot = "0.12"
-rand = "0.8"
-tempfile = "3"
-url = "2.2"
+log = { workspace = true }
+object_store = { workspace = true }
+parking_lot = { workspace = true }
+rand = { workspace = true }
+tempfile = { workspace = true }
+url = { workspace = true }
diff --git a/datafusion/execution/README.md b/datafusion/execution/README.md
new file mode 100644
index 0000000000000..67aac6be82b3f
--- /dev/null
+++ b/datafusion/execution/README.md
@@ -0,0 +1,26 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# DataFusion Common
+
+[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format.
+
+This crate is a submodule of DataFusion that provides execution runtime such as the memory pools and disk manager.
+
+[df]: https://crates.io/crates/datafusion
diff --git a/datafusion/execution/src/memory_pool/mod.rs b/datafusion/execution/src/memory_pool/mod.rs
index bbd8f4be4f1cf..71483e200af60 100644
--- a/datafusion/execution/src/memory_pool/mod.rs
+++ b/datafusion/execution/src/memory_pool/mod.rs
@@ -157,6 +157,11 @@ impl MemoryReservation {
         self.size
     }
 
+    /// Returns [MemoryConsumer] for this [MemoryReservation]
+    pub fn consumer(&self) -> &MemoryConsumer {
+        &self.registration.consumer
+    }
+
     /// Frees all bytes from this reservation back to the underlying
     /// pool, returning the number of bytes freed.
     pub fn free(&mut self) -> usize {
diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml
index c5cf6a1ac11f2..5b1b421538772 100644
--- a/datafusion/expr/Cargo.toml
+++ b/datafusion/expr/Cargo.toml
@@ -19,9 +19,9 @@
 name = "datafusion-expr"
 description = "Logical plan and expression representation for DataFusion query engine"
 keywords = ["datafusion", "logical", "plan", "expressions"]
+readme = "README.md"
 version = { workspace = true }
 edition = { workspace = true }
-readme = { workspace = true }
 homepage = { workspace = true }
 repository = { workspace = true }
 license = { workspace = true }
@@ -38,11 +38,11 @@ path = "src/lib.rs"
 ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
 arrow = { workspace = true }
 arrow-array = { workspace = true }
-datafusion-common = { path = "../common", version = "32.0.0", default-features = false }
+datafusion-common = { workspace = true }
 sqlparser = { workspace = true }
 strum = { version = "0.25.0", features = ["derive"] }
 strum_macros = "0.25.0"
 
 [dev-dependencies]
-ctor = "0.2.0"
-env_logger = "0.10"
+ctor = { workspace = true }
+env_logger = { workspace = true }
diff --git a/datafusion/expr/README.md b/datafusion/expr/README.md
index bcce30be39d95..b086f930e871b 100644
--- a/datafusion/expr/README.md
+++ b/datafusion/expr/README.md
@@ -19,7 +19,7 @@
 
 # DataFusion Logical Plan and Expressions
 
-[DataFusion](df) is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format.
+[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format.
 
 This crate is a submodule of DataFusion that provides data types and utilities for logical plans and expressions.
 
diff --git a/datafusion/optimizer/Cargo.toml b/datafusion/optimizer/Cargo.toml
index bf786686f474f..797dd17a26b5d 100644
--- a/datafusion/optimizer/Cargo.toml
+++ b/datafusion/optimizer/Cargo.toml
@@ -19,9 +19,9 @@
 name = "datafusion-optimizer"
 description = "DataFusion Query Optimizer"
 keywords = [ "datafusion", "query", "optimizer" ]
+readme = "README.md"
 version = { workspace = true }
 edition = { workspace = true }
-readme = { workspace = true }
 homepage = { workspace = true }
 repository = { workspace = true }
 license = { workspace = true }
@@ -40,17 +40,17 @@ unicode_expressions = ["datafusion-physical-expr/unicode_expressions"]
 
 [dependencies]
 arrow = { workspace = true }
-async-trait = "0.1.41"
+async-trait = { workspace = true }
 chrono = { workspace = true }
-datafusion-common = { path = "../common", version = "32.0.0", default-features = false }
-datafusion-expr = { path = "../expr", version = "32.0.0" }
+datafusion-common = { workspace = true }
+datafusion-expr = { workspace = true }
 datafusion-physical-expr = { path = "../physical-expr", version = "32.0.0", default-features = false }
 hashbrown = { version = "0.14", features = ["raw"] }
-itertools = "0.11"
-log = "^0.4"
+itertools = { workspace = true }
+log = { workspace = true }
 regex-syntax = "0.8.0"
 
 [dev-dependencies]
-ctor = "0.2.0"
+ctor = { workspace = true }
 datafusion-sql = { path = "../sql", version = "32.0.0" }
 env_logger = "0.10.0"
diff --git a/datafusion/optimizer/README.md b/datafusion/optimizer/README.md
index c8baae03efa25..b8e5b93e6692c 100644
--- a/datafusion/optimizer/README.md
+++ b/datafusion/optimizer/README.md
@@ -19,7 +19,7 @@
 
 # DataFusion Query Optimizer
 
-[DataFusion](df) is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory
+[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory
 format.
 
 DataFusion has modular design, allowing individual crates to be re-used in other projects.
diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml
index 6269f27310a69..4be625e384b97 100644
--- a/datafusion/physical-expr/Cargo.toml
+++ b/datafusion/physical-expr/Cargo.toml
@@ -19,9 +19,9 @@
 name = "datafusion-physical-expr"
 description = "Physical expression implementation for DataFusion query engine"
 keywords = ["arrow", "query", "sql"]
+readme = "README.md"
 version = { workspace = true }
 edition = { workspace = true }
-readme = { workspace = true }
 homepage = { workspace = true }
 repository = { workspace = true }
 license = { workspace = true }
@@ -49,19 +49,19 @@ base64 = { version = "0.21", optional = true }
 blake2 = { version = "^0.10.2", optional = true }
 blake3 = { version = "1.0", optional = true }
 chrono = { workspace = true }
-datafusion-common = { path = "../common", version = "32.0.0", default-features = false }
-datafusion-expr = { path = "../expr", version = "32.0.0" }
+datafusion-common = { workspace = true }
+datafusion-expr = { workspace = true }
 half = { version = "2.1", default-features = false }
 hashbrown = { version = "0.14", features = ["raw"] }
 hex = { version = "0.4", optional = true }
-indexmap = "2.0.0"
+indexmap = { workspace = true }
 itertools = { version = "0.11", features = ["use_std"] }
 libc = "0.2.140"
-log = "^0.4"
+log = { workspace = true }
 md-5 = { version = "^0.10.0", optional = true }
 paste = "^1.0"
 petgraph = "0.6.2"
-rand = "0.8"
+rand = { workspace = true }
 regex = { version = "1.8", optional = true }
 sha2 = { version = "^0.10.1", optional = true }
 unicode-segmentation = { version = "^1.7.1", optional = true }
@@ -69,8 +69,8 @@ uuid = { version = "^1.2", features = ["v4"] }
 
 [dev-dependencies]
 criterion = "0.5"
-rand = "0.8"
-rstest = "0.18.0"
+rand = { workspace = true }
+rstest = { workspace = true }
 
 [[bench]]
 harness = false
diff --git a/datafusion/physical-expr/README.md b/datafusion/physical-expr/README.md
index a887d3eb29fe3..424256c77e7e2 100644
--- a/datafusion/physical-expr/README.md
+++ b/datafusion/physical-expr/README.md
@@ -19,7 +19,7 @@
 
 # DataFusion Physical Expressions
 
-[DataFusion](df) is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format.
+[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format.
 
 This crate is a submodule of DataFusion that provides data types and utilities for physical expressions.
 
diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs
index 7077f8b598604..84fd301b84de0 100644
--- a/datafusion/physical-expr/src/array_expressions.rs
+++ b/datafusion/physical-expr/src/array_expressions.rs
@@ -811,7 +811,7 @@ fn concat_internal(args: &[ArrayRef]) -> Result<ArrayRef> {
         }
     }
     // Assume all arrays have the same data type
-    let data_type = list_arrays[0].value_type().clone();
+    let data_type = list_arrays[0].value_type();
     let buffer = valid.finish();
 
     let elements = arrays
diff --git a/datafusion/physical-expr/src/expressions/column.rs b/datafusion/physical-expr/src/expressions/column.rs
index b7b5895db6d31..62da8ff9ed44e 100644
--- a/datafusion/physical-expr/src/expressions/column.rs
+++ b/datafusion/physical-expr/src/expressions/column.rs
@@ -28,7 +28,6 @@ use arrow::{
     datatypes::{DataType, Schema},
     record_batch::RecordBatch,
 };
-use datafusion_common::plan_err;
 use datafusion_common::{internal_err, DataFusionError, Result};
 use datafusion_expr::ColumnarValue;
 
@@ -176,7 +175,7 @@ impl PhysicalExpr for UnKnownColumn {
 
     /// Evaluate the expression
     fn evaluate(&self, _batch: &RecordBatch) -> Result<ColumnarValue> {
-        plan_err!("UnKnownColumn::evaluate() should not be called")
+        internal_err!("UnKnownColumn::evaluate() should not be called")
     }
 
     fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
diff --git a/datafusion/physical-expr/src/expressions/no_op.rs b/datafusion/physical-expr/src/expressions/no_op.rs
index 497fb42fe4dff..95e6879a6c2d9 100644
--- a/datafusion/physical-expr/src/expressions/no_op.rs
+++ b/datafusion/physical-expr/src/expressions/no_op.rs
@@ -28,7 +28,7 @@ use arrow::{
 
 use crate::physical_expr::down_cast_any_ref;
 use crate::PhysicalExpr;
-use datafusion_common::{plan_err, DataFusionError, Result};
+use datafusion_common::{internal_err, DataFusionError, Result};
 use datafusion_expr::ColumnarValue;
 
 /// A place holder expression, can not be evaluated.
@@ -65,7 +65,7 @@ impl PhysicalExpr for NoOp {
     }
 
     fn evaluate(&self, _batch: &RecordBatch) -> Result<ColumnarValue> {
-        plan_err!("NoOp::evaluate() should not be called")
+        internal_err!("NoOp::evaluate() should not be called")
     }
 
     fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs
index 8422862043aeb..b66bac41014da 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -357,6 +357,8 @@ where
                 ColumnarValue::Array(a) => Some(a.len()),
             });
 
+        let is_scalar = len.is_none();
+
         let inferred_length = len.unwrap_or(1);
         let args = args
             .iter()
@@ -373,7 +375,14 @@ where
             .collect::<Vec<ArrayRef>>();
 
         let result = (inner)(&args);
-        result.map(ColumnarValue::Array)
+
+        if is_scalar {
+            // If all inputs are scalar, keeps output as scalar
+            let result = result.and_then(|arr| ScalarValue::try_from_array(&arr, 0));
+            result.map(ColumnarValue::Scalar)
+        } else {
+            result.map(ColumnarValue::Array)
+        }
     })
 }
 
diff --git a/datafusion/physical-expr/src/planner.rs b/datafusion/physical-expr/src/planner.rs
index 9a74c2ca64d17..64c1d0be04558 100644
--- a/datafusion/physical-expr/src/planner.rs
+++ b/datafusion/physical-expr/src/planner.rs
@@ -448,3 +448,37 @@ pub fn create_physical_expr(
         }
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow_array::{ArrayRef, BooleanArray, RecordBatch, StringArray};
+    use arrow_schema::{DataType, Field, Schema};
+    use datafusion_common::{DFSchema, Result};
+    use datafusion_expr::{col, left, Literal};
+
+    #[test]
+    fn test_create_physical_expr_scalar_input_output() -> Result<()> {
+        let expr = col("letter").eq(left("APACHE".lit(), 1i64.lit()));
+
+        let schema = Schema::new(vec![Field::new("letter", DataType::Utf8, false)]);
+        let df_schema = DFSchema::try_from_qualified_schema("data", &schema)?;
+        let p = create_physical_expr(&expr, &df_schema, &schema, &ExecutionProps::new())?;
+
+        let batch = RecordBatch::try_new(
+            Arc::new(schema),
+            vec![Arc::new(StringArray::from_iter_values(vec![
+                "A", "B", "C", "D",
+            ]))],
+        )?;
+        let result = p.evaluate(&batch)?;
+        let result = result.into_array(4);
+
+        assert_eq!(
+            &result,
+            &(Arc::new(BooleanArray::from(vec![true, false, false, false,])) as ArrayRef)
+        );
+
+        Ok(())
+    }
+}
diff --git a/datafusion/physical-expr/src/sort_expr.rs b/datafusion/physical-expr/src/sort_expr.rs
index 74179ba5947cc..3b3221289d31e 100644
--- a/datafusion/physical-expr/src/sort_expr.rs
+++ b/datafusion/physical-expr/src/sort_expr.rs
@@ -25,7 +25,7 @@ use crate::PhysicalExpr;
 
 use arrow::compute::kernels::sort::{SortColumn, SortOptions};
 use arrow::record_batch::RecordBatch;
-use datafusion_common::plan_err;
+use datafusion_common::exec_err;
 use datafusion_common::{DataFusionError, Result};
 use datafusion_expr::ColumnarValue;
 
@@ -66,7 +66,7 @@ impl PhysicalSortExpr {
         let array_to_sort = match value_to_sort {
             ColumnarValue::Array(array) => array,
             ColumnarValue::Scalar(scalar) => {
-                return plan_err!(
+                return exec_err!(
                     "Sort operation is not applicable to scalar value {scalar}"
                 );
             }
diff --git a/datafusion/physical-expr/src/sort_properties.rs b/datafusion/physical-expr/src/sort_properties.rs
index 097f491cb9794..8ae3379218fbd 100644
--- a/datafusion/physical-expr/src/sort_properties.rs
+++ b/datafusion/physical-expr/src/sort_properties.rs
@@ -155,37 +155,36 @@ impl Neg for SortProperties {
 #[derive(Debug)]
 pub struct ExprOrdering {
     pub expr: Arc<dyn PhysicalExpr>,
-    pub state: Option<SortProperties>,
-    pub children_states: Option<Vec<SortProperties>>,
+    pub state: SortProperties,
+    pub children_states: Vec<SortProperties>,
 }
 
 impl ExprOrdering {
+    /// Creates a new [`ExprOrdering`] with [`SortProperties::Unordered`] states
+    /// for `expr` and its children.
     pub fn new(expr: Arc<dyn PhysicalExpr>) -> Self {
+        let size = expr.children().len();
         Self {
             expr,
-            state: None,
-            children_states: None,
+            state: SortProperties::Unordered,
+            children_states: vec![SortProperties::Unordered; size],
         }
     }
 
-    pub fn children(&self) -> Vec<ExprOrdering> {
+    /// Updates this [`ExprOrdering`]'s children states with the given states.
+    pub fn with_new_children(mut self, children_states: Vec<SortProperties>) -> Self {
+        self.children_states = children_states;
+        self
+    }
+
+    /// Creates new [`ExprOrdering`] objects for each child of the expression.
+    pub fn children_expr_orderings(&self) -> Vec<ExprOrdering> {
         self.expr
             .children()
             .into_iter()
             .map(ExprOrdering::new)
             .collect()
     }
-
-    pub fn new_with_children(
-        children_states: Vec<SortProperties>,
-        parent_expr: Arc<dyn PhysicalExpr>,
-    ) -> Self {
-        Self {
-            expr: parent_expr,
-            state: None,
-            children_states: Some(children_states),
-        }
-    }
 }
 
 impl TreeNode for ExprOrdering {
@@ -193,7 +192,7 @@ impl TreeNode for ExprOrdering {
     where
         F: FnMut(&Self) -> Result<VisitRecursion>,
     {
-        for child in self.children() {
+        for child in self.children_expr_orderings() {
             match op(&child)? {
                 VisitRecursion::Continue => {}
                 VisitRecursion::Skip => return Ok(VisitRecursion::Continue),
@@ -207,17 +206,20 @@ impl TreeNode for ExprOrdering {
     where
         F: FnMut(Self) -> Result<Self>,
     {
-        let children = self.children();
-        if children.is_empty() {
+        if self.children_states.is_empty() {
             Ok(self)
         } else {
-            Ok(ExprOrdering::new_with_children(
-                children
+            let child_expr_orderings = self.children_expr_orderings();
+            // After mapping over the children, the function `F` applies to the
+            // current object and updates its state.
+            Ok(self.with_new_children(
+                child_expr_orderings
                     .into_iter()
+                    // Update children states after this transformation:
                     .map(transform)
-                    .map_ok(|c| c.state.unwrap_or(SortProperties::Unordered))
+                    // Extract the state (i.e. sort properties) information:
+                    .map_ok(|c| c.state)
                     .collect::<Result<Vec<_>>>()?,
-                self.expr,
             ))
         }
     }
@@ -248,13 +250,13 @@ pub fn update_ordering(
     //       a BinaryExpr like a + b), and there is an ordering equivalence of
     //       it (let's say like c + d), we actually can find it at this step.
     if sort_expr.expr.eq(&node.expr) {
-        node.state = Some(SortProperties::Ordered(sort_expr.options));
+        node.state = SortProperties::Ordered(sort_expr.options);
         return Ok(Transformed::Yes(node));
     }
 
-    if let Some(children_sort_options) = &node.children_states {
+    if !node.expr.children().is_empty() {
         // We have an intermediate (non-leaf) node, account for its children:
-        node.state = Some(node.expr.get_ordering(children_sort_options));
+        node.state = node.expr.get_ordering(&node.children_states);
     } else if let Some(column) = node.expr.as_any().downcast_ref::<Column>() {
         // We have a Column, which is one of the two possible leaf node types:
         node.state = get_indices_of_matching_sort_exprs_with_order_eq(
@@ -268,10 +270,11 @@ pub fn update_ordering(
                 descending: sort_options[0].descending,
                 nulls_first: sort_options[0].nulls_first,
             })
-        });
+        })
+        .unwrap_or(SortProperties::Unordered);
     } else {
         // We have a Literal, which is the other possible leaf node type:
-        node.state = Some(node.expr.get_ordering(&[]));
+        node.state = node.expr.get_ordering(&[]);
     }
     Ok(Transformed::Yes(node))
 }
diff --git a/datafusion/physical-expr/src/utils.rs b/datafusion/physical-expr/src/utils.rs
index b2a6bb5ca6d21..b38117d206cc4 100644
--- a/datafusion/physical-expr/src/utils.rs
+++ b/datafusion/physical-expr/src/utils.rs
@@ -773,7 +773,7 @@ pub fn find_orderings_of_exprs(
                     &input_ordering_equal_properties,
                 )
             })?;
-            if let Some(SortProperties::Ordered(sort_options)) = transformed.state {
+            if let SortProperties::Ordered(sort_options) = transformed.state {
                 orderings.push(Some(PhysicalSortExpr {
                     expr: Arc::new(Column::new(name, index)),
                     options: sort_options,
@@ -1836,4 +1836,92 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn test_find_orderings_of_exprs() -> Result<()> {
+        let schema = Schema::new(vec![
+            Field::new("a", DataType::Int32, true),
+            Field::new("b", DataType::Int32, true),
+            Field::new("c", DataType::Int32, true),
+            Field::new("d", DataType::Int32, true),
+        ]);
+
+        let mut eq = EquivalenceProperties::new(Arc::new(schema.clone()));
+        let col_a = &col("a", &schema)?;
+        let col_b = &col("b", &schema)?;
+        let col_c = &col("c", &schema)?;
+        let col_d = &col("d", &schema)?;
+        let option_asc = SortOptions {
+            descending: false,
+            nulls_first: false,
+        };
+        // b=a (e.g they are aliases)
+        eq.add_equal_conditions((&Column::new("b", 1), &Column::new("a", 0)));
+        let mut oeq = OrderingEquivalenceProperties::new(Arc::new(schema.clone()));
+        // [b ASC], [d ASC]
+        oeq.add_equal_conditions((
+            &vec![PhysicalSortExpr {
+                expr: col_b.clone(),
+                options: option_asc,
+            }],
+            &vec![PhysicalSortExpr {
+                expr: col_d.clone(),
+                options: option_asc,
+            }],
+        ));
+
+        let orderings = find_orderings_of_exprs(
+            &[
+                // d + b
+                (
+                    Arc::new(BinaryExpr::new(
+                        col_d.clone(),
+                        Operator::Plus,
+                        col_b.clone(),
+                    )),
+                    "d+b".to_string(),
+                ),
+                // b as b_new
+                (col_b.clone(), "b_new".to_string()),
+                // a as a_new
+                (col_a.clone(), "a_new".to_string()),
+                // a + c
+                (
+                    Arc::new(BinaryExpr::new(
+                        col_a.clone(),
+                        Operator::Plus,
+                        col_c.clone(),
+                    )),
+                    "a+c".to_string(),
+                ),
+            ],
+            Some(&[PhysicalSortExpr {
+                expr: col_b.clone(),
+                options: option_asc,
+            }]),
+            eq,
+            oeq,
+        )?;
+
+        assert_eq!(
+            vec![
+                Some(PhysicalSortExpr {
+                    expr: Arc::new(Column::new("d+b", 0)),
+                    options: option_asc,
+                }),
+                Some(PhysicalSortExpr {
+                    expr: Arc::new(Column::new("b_new", 1)),
+                    options: option_asc,
+                }),
+                Some(PhysicalSortExpr {
+                    expr: Arc::new(Column::new("a_new", 2)),
+                    options: option_asc,
+                }),
+                None,
+            ],
+            orderings
+        );
+
+        Ok(())
+    }
 }
diff --git a/datafusion/physical-plan/Cargo.toml b/datafusion/physical-plan/Cargo.toml
index 2dfcf12e350a0..82c8f49a764fa 100644
--- a/datafusion/physical-plan/Cargo.toml
+++ b/datafusion/physical-plan/Cargo.toml
@@ -19,9 +19,9 @@
 name = "datafusion-physical-plan"
 description = "Physical (ExecutionPlan) implementations for DataFusion query engine"
 keywords = ["arrow", "query", "sql"]
+readme = "README.md"
 version = { workspace = true }
 edition = { workspace = true }
-readme = { workspace = true }
 homepage = { workspace = true }
 repository = { workspace = true }
 license = { workspace = true }
@@ -38,26 +38,26 @@ arrow = { workspace = true }
 arrow-array = { workspace = true }
 arrow-buffer = { workspace = true }
 arrow-schema = { workspace = true }
-async-trait = "0.1.41"
+async-trait = { workspace = true }
 chrono = { version = "0.4.23", default-features = false }
-datafusion-common = { path = "../common", version = "32.0.0", default-features = false }
-datafusion-execution = { path = "../execution", version = "32.0.0" }
-datafusion-expr = { path = "../expr", version = "32.0.0" }
-datafusion-physical-expr = { path = "../physical-expr", version = "32.0.0" }
-futures = "0.3"
+datafusion-common = { workspace = true }
+datafusion-execution = { workspace = true }
+datafusion-expr = { workspace = true }
+datafusion-physical-expr = { workspace = true }
+futures = { workspace = true }
 half = { version = "2.1", default-features = false }
 hashbrown = { version = "0.14", features = ["raw"] }
-indexmap = "2.0.0"
+indexmap = { workspace = true }
 itertools = { version = "0.11", features = ["use_std"] }
-log = "^0.4"
+log = { workspace = true }
 once_cell = "1.18.0"
-parking_lot = "0.12"
+parking_lot = { workspace = true }
 pin-project-lite = "^0.2.7"
-rand = "0.8"
+rand = { workspace = true }
 tokio = { version = "1.28", features = ["sync", "fs", "parking_lot"] }
 uuid = { version = "^1.2", features = ["v4"] }
 
 [dev-dependencies]
-rstest = "0.18.0"
+rstest = { workspace = true }
 termtree = "0.4.1"
 tokio = { version = "1.28", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] }
diff --git a/datafusion/physical-plan/README.md b/datafusion/physical-plan/README.md
new file mode 100644
index 0000000000000..366a6b555150e
--- /dev/null
+++ b/datafusion/physical-plan/README.md
@@ -0,0 +1,27 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# DataFusion Common
+
+[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format.
+
+This crate is a submodule of DataFusion that contains the `ExecutionPlan` trait and the various implementations of that
+trait for built in operators such as filters, projections, joins, aggregations, etc.
+
+[df]: https://crates.io/crates/datafusion
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
index 4c612223178c9..da152a6264afa 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -2155,7 +2155,7 @@ mod tests {
         spill: bool,
     ) -> Result<()> {
         let task_ctx = if spill {
-            new_spill_ctx(2, 2812)
+            new_spill_ctx(2, 2886)
         } else {
             Arc::new(TaskContext::default())
         };
diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs
index d773533ad6a32..7b660885845b2 100644
--- a/datafusion/physical-plan/src/aggregates/row_hash.rs
+++ b/datafusion/physical-plan/src/aggregates/row_hash.rs
@@ -673,7 +673,16 @@ impl GroupedHashAggregateStream {
         let spillfile = self.runtime.disk_manager.create_tmp_file("HashAggSpill")?;
         let mut writer = IPCWriter::new(spillfile.path(), &emit.schema())?;
         // TODO: slice large `sorted` and write to multiple files in parallel
-        writer.write(&sorted)?;
+        let mut offset = 0;
+        let total_rows = sorted.num_rows();
+
+        while offset < total_rows {
+            let length = std::cmp::min(total_rows - offset, self.batch_size);
+            let batch = sorted.slice(offset, length);
+            offset += batch.num_rows();
+            writer.write(&batch)?;
+        }
+
         writer.finish()?;
         self.spill_state.spills.push(spillfile);
         Ok(())
diff --git a/datafusion/physical-plan/src/joins/hash_join.rs b/datafusion/physical-plan/src/joins/hash_join.rs
index bdff46c498530..3487476f08d6a 100644
--- a/datafusion/physical-plan/src/joins/hash_join.rs
+++ b/datafusion/physical-plan/src/joins/hash_join.rs
@@ -15,8 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Defines the join plan for executing partitions in parallel and then joining the results
-//! into a set of partitions.
+//! [`HashJoinExec`] Partitioned Hash Join Operator
 
 use std::fmt;
 use std::mem::size_of;
@@ -81,29 +80,143 @@ type JoinLeftData = (JoinHashMap, RecordBatch, MemoryReservation);
 /// Tuple representing last matched probe-build side indices for partial join output
 type MatchedIndicesPair = Option<(usize, usize)>;
 
-/// Join execution plan executes partitions in parallel and combines them into a set of
-/// partitions.
+/// Tuple representing last matched probe-build side indices for partial join output
+type MatchedIndicesPair = Option<(usize, usize)>;
+
+/// Join execution plan: Evaluates eqijoin predicates in parallel on multiple
+/// partitions using a hash table and an optional filter list to apply post
+/// join.
+///
+/// # Join Expressions
+///
+/// This implementation is optimized for evaluating eqijoin predicates  (
+/// `<col1> = <col2>`) expressions, which are represented as a list of `Columns`
+/// in [`Self::on`].
+///
+/// Non-equality predicates, which can not pushed down to a join inputs (e.g.
+/// `<col1> != <col2>`) are known as "filter expressions" and are evaluated
+/// after the equijoin predicates.
+///
+/// # "Build Side" vs "Probe Side"
+///
+/// HashJoin takes two inputs, which are referred to as the "build" and the
+/// "probe". The build side is the first child, and the probe side is the second
+/// child.
+///
+/// The two inputs are treated differently and it is VERY important that the
+/// *smaller* input is placed on the build side to minimize the work of creating
+/// the hash table.
+///
+/// ```text
+///          ┌───────────┐
+///          │ HashJoin  │
+///          │           │
+///          └───────────┘
+///              │   │
+///        ┌─────┘   └─────┐
+///        ▼               ▼
+/// ┌────────────┐  ┌─────────────┐
+/// │   Input    │  │    Input    │
+/// │    [0]     │  │     [1]     │
+/// └────────────┘  └─────────────┘
+///
+///  "build side"    "probe side"
+/// ```
+///
+/// Execution proceeds in 2 stages:
 ///
-/// Filter expression expected to contain non-equality predicates that can not be pushed
-/// down to any of join inputs.
-/// In case of outer join, filter applied to only matched rows.
+/// 1. the **build phase** where a hash table is created from the tuples of the
+/// build side.
+///
+/// 2. the **probe phase** where the tuples of the probe side are streamed
+/// through, checking for matches of the join keys in the hash table.
+///
+/// ```text
+///                 ┌────────────────┐          ┌────────────────┐
+///                 │ ┌─────────┐    │          │ ┌─────────┐    │
+///                 │ │  Hash   │    │          │ │  Hash   │    │
+///                 │ │  Table  │    │          │ │  Table  │    │
+///                 │ │(keys are│    │          │ │(keys are│    │
+///                 │ │equi join│    │          │ │equi join│    │  Stage 2: batches from
+///  Stage 1: the   │ │columns) │    │          │ │columns) │    │    the probe side are
+/// *entire* build  │ │         │    │          │ │         │    │  streamed through, and
+///  side is read   │ └─────────┘    │          │ └─────────┘    │   checked against the
+/// into the hash   │      ▲         │          │          ▲     │   contents of the hash
+///     table       │       HashJoin │          │  HashJoin      │          table
+///                 └──────┼─────────┘          └──────────┼─────┘
+///             ─ ─ ─ ─ ─ ─                                 ─ ─ ─ ─ ─ ─ ─
+///            │                                                         │
+///
+///            │                                                         │
+///     ┌────────────┐                                            ┌────────────┐
+///     │RecordBatch │                                            │RecordBatch │
+///     └────────────┘                                            └────────────┘
+///     ┌────────────┐                                            ┌────────────┐
+///     │RecordBatch │                                            │RecordBatch │
+///     └────────────┘                                            └────────────┘
+///           ...                                                       ...
+///     ┌────────────┐                                            ┌────────────┐
+///     │RecordBatch │                                            │RecordBatch │
+///     └────────────┘                                            └────────────┘
+///
+///        build side                                                probe side
+///
+/// ```
+///
+/// # Example "Optimal" Plans
+///
+/// The differences in the inputs means that for classic "Star Schema Query",
+/// the optimal plan will be a **"Right Deep Tree"** . A Star Schema Query is
+/// one where there is one large table and several smaller "dimension" tables,
+/// joined on `Foreign Key = Primary Key` predicates.
+///
+/// A "Right Deep Tree" looks like this large table as the probe side on the
+/// lowest join:
+///
+/// ```text
+///             ┌───────────┐
+///             │ HashJoin  │
+///             │           │
+///             └───────────┘
+///                 │   │
+///         ┌───────┘   └──────────┐
+///         ▼                      ▼
+/// ┌───────────────┐        ┌───────────┐
+/// │ small table 1 │        │ HashJoin  │
+/// │  "dimension"  │        │           │
+/// └───────────────┘        └───┬───┬───┘
+///                   ┌──────────┘   └───────┐
+///                   │                      │
+///                   ▼                      ▼
+///           ┌───────────────┐        ┌───────────┐
+///           │ small table 2 │        │ HashJoin  │
+///           │  "dimension"  │        │           │
+///           └───────────────┘        └───┬───┬───┘
+///                               ┌────────┘   └────────┐
+///                               │                     │
+///                               ▼                     ▼
+///                       ┌───────────────┐     ┌───────────────┐
+///                       │ small table 3 │     │  large table  │
+///                       │  "dimension"  │     │    "fact"     │
+///                       └───────────────┘     └───────────────┘
+/// ```
 #[derive(Debug)]
 pub struct HashJoinExec {
     /// left (build) side which gets hashed
     pub left: Arc<dyn ExecutionPlan>,
     /// right (probe) side which are filtered by the hash table
     pub right: Arc<dyn ExecutionPlan>,
-    /// Set of common columns used to join on
+    /// Set of equijoin columns from the relations: `(left_col, right_col)`
     pub on: Vec<(Column, Column)>,
     /// Filters which are applied while finding matching rows
     pub filter: Option<JoinFilter>,
-    /// How the join is performed
+    /// How the join is performed (`OUTER`, `INNER`, etc)
     pub join_type: JoinType,
-    /// The schema once the join is applied
+    /// The output schema for the join
     schema: SchemaRef,
     /// Build-side data
     left_fut: OnceAsync<JoinLeftData>,
-    /// Shares the `RandomState` for the hashing algorithm
+    /// Shared the `RandomState` for the hashing algorithm
     random_state: RandomState,
     /// Output order
     output_order: Option<Vec<PhysicalSortExpr>>,
@@ -113,12 +226,16 @@ pub struct HashJoinExec {
     metrics: ExecutionPlanMetricsSet,
     /// Information of index and left / right placement of columns
     column_indices: Vec<ColumnIndex>,
-    /// If null_equals_null is true, null == null else null != null
+    /// Null matching behavior: If `null_equals_null` is true, rows that have
+    /// `null`s in both left and right equijoin columns will be matched.
+    /// Otherwise, rows that have `null`s in the join columns will not be
+    /// matched and thus will not appear in the output.
     pub null_equals_null: bool,
 }
 
 impl HashJoinExec {
     /// Tries to create a new [HashJoinExec].
+    ///
     /// # Error
     /// This function errors when it is not possible to join the left and right sides on keys `on`.
     pub fn try_new(
diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs
index b2f81579f8e83..8ae2a86866745 100644
--- a/datafusion/physical-plan/src/lib.rs
+++ b/datafusion/physical-plan/src/lib.rs
@@ -76,6 +76,7 @@ pub use crate::metrics::Metric;
 pub use crate::topk::TopK;
 pub use crate::visitor::{accept, visit_execution_plan, ExecutionPlanVisitor};
 
+use datafusion_common::config::ConfigOptions;
 pub use datafusion_common::hash_utils;
 pub use datafusion_common::utils::project_schema;
 pub use datafusion_common::{internal_err, ColumnStatistics, Statistics};
@@ -209,7 +210,136 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync {
         children: Vec<Arc<dyn ExecutionPlan>>,
     ) -> Result<Arc<dyn ExecutionPlan>>;
 
-    /// creates an iterator
+    /// If supported, attempt to increase the partitioning of this `ExecutionPlan` to
+    /// produce `target_partitions` partitions.
+    ///
+    /// If the `ExecutionPlan` does not support changing its partitioning,
+    /// returns `Ok(None)` (the default).
+    ///
+    /// It is the `ExecutionPlan` can increase its partitioning, but not to the
+    /// `target_partitions`, it may return an ExecutionPlan with fewer
+    /// partitions. This might happen, for example, if each new partition would
+    /// be too small to be efficiently processed individually.
+    ///
+    /// The DataFusion optimizer attempts to use as many threads as possible by
+    /// repartitioning its inputs to match the target number of threads
+    /// available (`target_partitions`). Some data sources, such as the built in
+    /// CSV and Parquet readers, implement this method as they are able to read
+    /// from their input files in parallel, regardless of how the source data is
+    /// split amongst files.
+    fn repartitioned(
+        &self,
+        _target_partitions: usize,
+        _config: &ConfigOptions,
+    ) -> Result<Option<Arc<dyn ExecutionPlan>>> {
+        Ok(None)
+    }
+
+    /// Begin execution of `partition`, returning a stream of [`RecordBatch`]es.
+    ///
+    /// # Implementation Examples
+    ///
+    /// ## Return Precomputed Batch
+    ///
+    /// We can return a precomputed batch as a stream
+    ///
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use arrow_array::RecordBatch;
+    /// # use arrow_schema::SchemaRef;
+    /// # use datafusion_common::Result;
+    /// # use datafusion_execution::{SendableRecordBatchStream, TaskContext};
+    /// # use datafusion_physical_plan::memory::MemoryStream;
+    /// # use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
+    /// struct MyPlan {
+    ///     batch: RecordBatch,
+    /// }
+    ///
+    /// impl MyPlan {
+    ///     fn execute(
+    ///         &self,
+    ///         partition: usize,
+    ///         context: Arc<TaskContext>
+    ///     ) -> Result<SendableRecordBatchStream> {
+    ///         let fut = futures::future::ready(Ok(self.batch.clone()));
+    ///         let stream = futures::stream::once(fut);
+    ///         Ok(Box::pin(RecordBatchStreamAdapter::new(self.batch.schema(), stream)))
+    ///     }
+    /// }
+    /// ```
+    ///
+    /// ## Async Compute Batch
+    ///
+    /// We can also lazily compute a RecordBatch when the returned stream is polled
+    ///
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use arrow_array::RecordBatch;
+    /// # use arrow_schema::SchemaRef;
+    /// # use datafusion_common::Result;
+    /// # use datafusion_execution::{SendableRecordBatchStream, TaskContext};
+    /// # use datafusion_physical_plan::memory::MemoryStream;
+    /// # use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
+    /// struct MyPlan {
+    ///     schema: SchemaRef,
+    /// }
+    ///
+    /// async fn get_batch() -> Result<RecordBatch> {
+    ///     todo!()
+    /// }
+    ///
+    /// impl MyPlan {
+    ///     fn execute(
+    ///         &self,
+    ///         partition: usize,
+    ///         context: Arc<TaskContext>
+    ///     ) -> Result<SendableRecordBatchStream> {
+    ///         let fut = get_batch();
+    ///         let stream = futures::stream::once(fut);
+    ///         Ok(Box::pin(RecordBatchStreamAdapter::new(self.schema.clone(), stream)))
+    ///     }
+    /// }
+    /// ```
+    ///
+    /// ## Async Compute Batch Stream
+    ///
+    /// We can lazily compute a RecordBatch stream when the returned stream is polled
+    /// flattening the result into a single stream
+    ///
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use arrow_array::RecordBatch;
+    /// # use arrow_schema::SchemaRef;
+    /// # use futures::TryStreamExt;
+    /// # use datafusion_common::Result;
+    /// # use datafusion_execution::{SendableRecordBatchStream, TaskContext};
+    /// # use datafusion_physical_plan::memory::MemoryStream;
+    /// # use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
+    /// struct MyPlan {
+    ///     schema: SchemaRef,
+    /// }
+    ///
+    /// async fn get_batch_stream() -> Result<SendableRecordBatchStream> {
+    ///     todo!()
+    /// }
+    ///
+    /// impl MyPlan {
+    ///     fn execute(
+    ///         &self,
+    ///         partition: usize,
+    ///         context: Arc<TaskContext>
+    ///     ) -> Result<SendableRecordBatchStream> {
+    ///         // A future that yields a stream
+    ///         let fut = get_batch_stream();
+    ///         // Use TryStreamExt::try_flatten to flatten the stream of streams
+    ///         let stream = futures::stream::once(fut).try_flatten();
+    ///         Ok(Box::pin(RecordBatchStreamAdapter::new(self.schema.clone(), stream)))
+    ///     }
+    /// }
+    /// ```
+    ///
+    /// See [`futures::stream::StreamExt`] and [`futures::stream::TryStreamExt`] for further
+    /// combinators that can be used with streams
     fn execute(
         &self,
         partition: usize,
@@ -217,7 +347,7 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync {
     ) -> Result<SendableRecordBatchStream>;
 
     /// Return a snapshot of the set of [`Metric`]s for this
-    /// [`ExecutionPlan`].
+    /// [`ExecutionPlan`]. If no `Metric`s are available, return None.
     ///
     /// While the values of the metrics in the returned
     /// [`MetricsSet`]s may change as execution progresses, the
diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs
index c7d676493f04a..08fa2c25d792c 100644
--- a/datafusion/physical-plan/src/sorts/sort.rs
+++ b/datafusion/physical-plan/src/sorts/sort.rs
@@ -735,7 +735,13 @@ impl SortExec {
         self
     }
 
-    /// Whether this `SortExec` preserves partitioning of the children
+    /// Modify how many rows to include in the result
+    ///
+    /// If None, then all rows will be returned, in sorted order.
+    /// If Some, then only the top `fetch` rows will be returned.
+    /// This can reduce the memory pressure required by the sort
+    /// operation since rows that are not going to be included
+    /// can be dropped.
     pub fn with_fetch(mut self, fetch: Option<usize>) -> Self {
         self.fetch = fetch;
         self
diff --git a/datafusion/proto/Cargo.toml b/datafusion/proto/Cargo.toml
index 32e10e58a7d7d..72a4df66ebd7b 100644
--- a/datafusion/proto/Cargo.toml
+++ b/datafusion/proto/Cargo.toml
@@ -19,9 +19,9 @@
 name = "datafusion-proto"
 description = "Protobuf serialization of DataFusion logical plan expressions"
 keywords = ["arrow", "query", "sql"]
+readme = "README.md"
 version = { workspace = true }
 edition = { workspace = true }
-readme = { workspace = true }
 homepage = { workspace = true }
 repository = { workspace = true }
 license = { workspace = true }
@@ -44,14 +44,14 @@ parquet = ["datafusion/parquet", "datafusion-common/parquet"]
 arrow = { workspace = true }
 chrono = { workspace = true }
 datafusion = { path = "../core", version = "32.0.0" }
-datafusion-common = { path = "../common", version = "32.0.0", default-features = false }
-datafusion-expr = { path = "../expr", version = "32.0.0" }
+datafusion-common = { workspace = true }
+datafusion-expr = { workspace = true }
 object_store = { version = "0.7.0" }
 pbjson = { version = "0.5", optional = true }
 prost = "0.12.0"
 serde = { version = "1.0", optional = true }
-serde_json = { version = "1.0", optional = true }
+serde_json = { workspace = true, optional = true }
 
 [dev-dependencies]
-doc-comment = "0.3"
+doc-comment = { workspace = true }
 tokio = "1.18"
diff --git a/datafusion/proto/README.md b/datafusion/proto/README.md
index fd66d54aa2de9..171aadb744d69 100644
--- a/datafusion/proto/README.md
+++ b/datafusion/proto/README.md
@@ -19,7 +19,7 @@
 
 # DataFusion Proto
 
-[DataFusion](df) is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format.
+[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format.
 
 This crate is a submodule of DataFusion that provides a protocol buffer format for representing query plans and expressions.
 
diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml
index a00a7f0213520..b91a2ac1fbd7e 100644
--- a/datafusion/sql/Cargo.toml
+++ b/datafusion/sql/Cargo.toml
@@ -19,9 +19,9 @@
 name = "datafusion-sql"
 description = "DataFusion SQL Query Planner"
 keywords = ["datafusion", "sql", "parser", "planner"]
+readme = "README.md"
 version = { workspace = true }
 edition = { workspace = true }
-readme = { workspace = true }
 homepage = { workspace = true }
 repository = { workspace = true }
 license = { workspace = true }
@@ -39,13 +39,13 @@ unicode_expressions = []
 [dependencies]
 arrow = { workspace = true }
 arrow-schema = { workspace = true }
-datafusion-common = { path = "../common", version = "32.0.0", default-features = false }
-datafusion-expr = { path = "../expr", version = "32.0.0" }
-log = "^0.4"
+datafusion-common = { workspace = true }
+datafusion-expr = { workspace = true }
+log = { workspace = true }
 sqlparser = { workspace = true }
 
 [dev-dependencies]
-ctor = "0.2.0"
-env_logger = "0.10"
+ctor = { workspace = true }
+env_logger = { workspace = true }
 paste = "^1.0"
 rstest = "0.18"
diff --git a/datafusion/sql/README.md b/datafusion/sql/README.md
index 2ad994e4eba5c..256fa774b4105 100644
--- a/datafusion/sql/README.md
+++ b/datafusion/sql/README.md
@@ -20,7 +20,7 @@
 # DataFusion SQL Query Planner
 
 This crate provides a general purpose SQL query planner that can parse SQL and translate queries into logical
-plans. Although this crate is used by the [DataFusion](df) query engine, it was designed to be easily usable from any
+plans. Although this crate is used by the [DataFusion][df] query engine, it was designed to be easily usable from any
 project that requires a SQL query planner and does not make any assumptions about how the resulting logical plan
 will be translated to a physical plan. For example, there is no concept of row-based versus columnar execution in the
 logical plan.
diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs
index 373388277351e..c58b8319ceb72 100644
--- a/datafusion/sql/src/expr/function.rs
+++ b/datafusion/sql/src/expr/function.rs
@@ -36,44 +36,57 @@ use super::arrow_cast::ARROW_CAST_NAME;
 impl<'a, S: ContextProvider> SqlToRel<'a, S> {
     pub(super) fn sql_function_to_expr(
         &self,
-        mut function: SQLFunction,
+        function: SQLFunction,
         schema: &DFSchema,
         planner_context: &mut PlannerContext,
     ) -> Result<Expr> {
-        let name = if function.name.0.len() > 1 {
+        let SQLFunction {
+            name,
+            args,
+            over,
+            distinct,
+            filter,
+            null_treatment,
+            special: _, // true if not called with trailing parens
+            order_by,
+        } = function;
+
+        if let Some(null_treatment) = null_treatment {
+            return not_impl_err!("Null treatment in aggregate functions is not supported: {null_treatment}");
+        }
+
+        let name = if name.0.len() > 1 {
             // DF doesn't handle compound identifiers
             // (e.g. "foo.bar") for function names yet
-            function.name.to_string()
+            name.to_string()
         } else {
-            crate::utils::normalize_ident(function.name.0[0].clone())
+            crate::utils::normalize_ident(name.0[0].clone())
         };
 
         // user-defined function (UDF) should have precedence in case it has the same name as a scalar built-in function
         if let Some(fm) = self.context_provider.get_function_meta(&name) {
-            let args =
-                self.function_args_to_expr(function.args, schema, planner_context)?;
+            let args = self.function_args_to_expr(args, schema, planner_context)?;
             return Ok(Expr::ScalarUDF(ScalarUDF::new(fm, args)));
         }
 
         // next, scalar built-in
         if let Ok(fun) = BuiltinScalarFunction::from_str(&name) {
-            let args =
-                self.function_args_to_expr(function.args, schema, planner_context)?;
+            let args = self.function_args_to_expr(args, schema, planner_context)?;
             return Ok(Expr::ScalarFunction(ScalarFunction::new(fun, args)));
         };
 
         // If function is a window function (it has an OVER clause),
         // it shouldn't have ordering requirement as function argument
         // required ordering should be defined in OVER clause.
-        let is_function_window = function.over.is_some();
-        if !function.order_by.is_empty() && is_function_window {
+        let is_function_window = over.is_some();
+        if !order_by.is_empty() && is_function_window {
             return plan_err!(
                 "Aggregate ORDER BY is not implemented for window functions"
             );
         }
 
         // then, window function
-        if let Some(WindowType::WindowSpec(window)) = function.over.take() {
+        if let Some(WindowType::WindowSpec(window)) = over {
             let partition_by = window
                 .partition_by
                 .into_iter()
@@ -97,11 +110,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             if let Ok(fun) = self.find_window_func(&name) {
                 let expr = match fun {
                     WindowFunction::AggregateFunction(aggregate_fun) => {
-                        let args = self.function_args_to_expr(
-                            function.args,
-                            schema,
-                            planner_context,
-                        )?;
+                        let args =
+                            self.function_args_to_expr(args, schema, planner_context)?;
 
                         Expr::WindowFunction(expr::WindowFunction::new(
                             WindowFunction::AggregateFunction(aggregate_fun),
@@ -113,11 +123,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                     }
                     _ => Expr::WindowFunction(expr::WindowFunction::new(
                         fun,
-                        self.function_args_to_expr(
-                            function.args,
-                            schema,
-                            planner_context,
-                        )?,
+                        self.function_args_to_expr(args, schema, planner_context)?,
                         partition_by,
                         order_by,
                         window_frame,
@@ -128,8 +134,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         } else {
             // User defined aggregate functions (UDAF) have precedence in case it has the same name as a scalar built-in function
             if let Some(fm) = self.context_provider.get_aggregate_meta(&name) {
-                let args =
-                    self.function_args_to_expr(function.args, schema, planner_context)?;
+                let args = self.function_args_to_expr(args, schema, planner_context)?;
                 return Ok(Expr::AggregateUDF(expr::AggregateUDF::new(
                     fm, args, None, None,
                 )));
@@ -137,25 +142,23 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
 
             // next, aggregate built-ins
             if let Ok(fun) = AggregateFunction::from_str(&name) {
-                let distinct = function.distinct;
-                let order_by = self.order_by_to_sort_expr(
-                    &function.order_by,
-                    schema,
-                    planner_context,
-                )?;
+                let order_by =
+                    self.order_by_to_sort_expr(&order_by, schema, planner_context)?;
                 let order_by = (!order_by.is_empty()).then_some(order_by);
-                let args =
-                    self.function_args_to_expr(function.args, schema, planner_context)?;
+                let args = self.function_args_to_expr(args, schema, planner_context)?;
+                let filter: Option<Box<Expr>> = filter
+                    .map(|e| self.sql_expr_to_logical_expr(*e, schema, planner_context))
+                    .transpose()?
+                    .map(Box::new);
 
                 return Ok(Expr::AggregateFunction(expr::AggregateFunction::new(
-                    fun, args, distinct, None, order_by,
+                    fun, args, distinct, filter, order_by,
                 )));
             };
 
             // Special case arrow_cast (as its type is dependent on its argument value)
             if name == ARROW_CAST_NAME {
-                let args =
-                    self.function_args_to_expr(function.args, schema, planner_context)?;
+                let args = self.function_args_to_expr(args, schema, planner_context)?;
                 return super::arrow_cast::create_arrow_cast(args, schema);
             }
         }
diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index 26184834874a6..1cf0fc133f040 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -222,7 +222,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 planner_context,
             ),
 
-            SQLExpr::Cast { expr, data_type } => Ok(Expr::Cast(Cast::new(
+            SQLExpr::Cast {
+                expr, data_type, ..
+            } => Ok(Expr::Cast(Cast::new(
                 Box::new(self.sql_expr_to_logical_expr(
                     *expr,
                     schema,
@@ -231,7 +233,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 self.convert_data_type(&data_type)?,
             ))),
 
-            SQLExpr::TryCast { expr, data_type } => Ok(Expr::TryCast(TryCast::new(
+            SQLExpr::TryCast {
+                expr, data_type, ..
+            } => Ok(Expr::TryCast(TryCast::new(
                 Box::new(self.sql_expr_to_logical_expr(
                     *expr,
                     schema,
@@ -412,6 +416,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 expr,
                 trim_where,
                 trim_what,
+                ..
             } => self.sql_trim_to_expr(
                 *expr,
                 trim_where,
@@ -477,10 +482,36 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 self.parse_array_agg(array_agg, schema, planner_context)
             }
 
+            SQLExpr::Struct { values, fields } => {
+                self.parse_struct(values, fields, schema, planner_context)
+            }
+
             _ => not_impl_err!("Unsupported ast node in sqltorel: {sql:?}"),
         }
     }
 
+    fn parse_struct(
+        &self,
+        values: Vec<SQLExpr>,
+        fields: Vec<sqlparser::ast::StructField>,
+        input_schema: &DFSchema,
+        planner_context: &mut PlannerContext,
+    ) -> Result<Expr> {
+        if !fields.is_empty() {
+            return not_impl_err!("Struct fields are not supported yet");
+        }
+        let args = values
+            .into_iter()
+            .map(|value| {
+                self.sql_expr_to_logical_expr(value, input_schema, planner_context)
+            })
+            .collect::<Result<Vec<_>>>()?;
+        Ok(Expr::ScalarFunction(ScalarFunction::new(
+            BuiltinScalarFunction::Struct,
+            args,
+        )))
+    }
+
     fn parse_array_agg(
         &self,
         array_agg: ArrayAgg,
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index f7d8307d33a05..ca5e260aee050 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -24,8 +24,8 @@ use arrow_schema::*;
 use datafusion_common::field_not_found;
 use datafusion_common::internal_err;
 use datafusion_expr::WindowUDF;
-use sqlparser::ast::ExactNumberInfo;
 use sqlparser::ast::TimezoneInfo;
+use sqlparser::ast::{ArrayElemTypeDef, ExactNumberInfo};
 use sqlparser::ast::{ColumnDef as SQLColumnDef, ColumnOption};
 use sqlparser::ast::{DataType as SQLDataType, Ident, ObjectName, TableAlias};
 
@@ -297,14 +297,15 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
 
     pub(crate) fn convert_data_type(&self, sql_type: &SQLDataType) -> Result<DataType> {
         match sql_type {
-            SQLDataType::Array(Some(inner_sql_type)) => {
+            SQLDataType::Array(ArrayElemTypeDef::AngleBracket(inner_sql_type))
+            | SQLDataType::Array(ArrayElemTypeDef::SquareBracket(inner_sql_type)) => {
                 let data_type = self.convert_simple_data_type(inner_sql_type)?;
 
                 Ok(DataType::List(Arc::new(Field::new(
                     "field", data_type, true,
                 ))))
             }
-            SQLDataType::Array(None) => {
+            SQLDataType::Array(ArrayElemTypeDef::None) => {
                 not_impl_err!("Arrays with unspecified type is not supported")
             }
             other => self.convert_simple_data_type(other),
@@ -330,7 +331,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             SQLDataType::Char(_)
             | SQLDataType::Varchar(_)
             | SQLDataType::Text
-            | SQLDataType::String => Ok(DataType::Utf8),
+            | SQLDataType::String(_) => Ok(DataType::Utf8),
             SQLDataType::Timestamp(None, tz_info) => {
                 let tz = if matches!(tz_info, TimezoneInfo::Tz)
                     || matches!(tz_info, TimezoneInfo::WithTimeZone)
@@ -400,7 +401,12 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             | SQLDataType::Dec(_)
             | SQLDataType::BigNumeric(_)
             | SQLDataType::BigDecimal(_)
-            | SQLDataType::Clob(_) => not_impl_err!(
+            | SQLDataType::Clob(_)
+            | SQLDataType::Bytes(_)
+            | SQLDataType::Int64
+            | SQLDataType::Float64
+            | SQLDataType::Struct(_)
+            => not_impl_err!(
                 "Unsupported SQL type {sql_type:?}"
             ),
         }
diff --git a/datafusion/sql/src/set_expr.rs b/datafusion/sql/src/set_expr.rs
index e771a5ba3de46..7300d49be0f55 100644
--- a/datafusion/sql/src/set_expr.rs
+++ b/datafusion/sql/src/set_expr.rs
@@ -44,6 +44,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                     SetQuantifier::AllByName => {
                         return not_impl_err!("UNION ALL BY NAME not implemented")
                     }
+                    SetQuantifier::DistinctByName => {
+                        return not_impl_err!("UNION DISTINCT BY NAME not implemented")
+                    }
                 };
 
                 let left_plan = self.set_expr_to_plan(*left, planner_context)?;
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 80a27db6e63d0..9d9c55361a5e9 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -430,6 +430,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 table,
                 on,
                 returning,
+                ignore,
             } => {
                 if or.is_some() {
                     plan_err!("Inserts with or clauses not supported")?;
@@ -449,6 +450,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 if returning.is_some() {
                     plan_err!("Insert-returning clause not supported")?;
                 }
+                if ignore {
+                    plan_err!("Insert-ignore clause not supported")?;
+                }
                 let _ = into; // optional keyword doesn't change behavior
                 self.insert_to_plan(table_name, columns, source, overwrite)
             }
@@ -471,6 +475,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 selection,
                 returning,
                 from,
+                order_by,
+                limit,
             } => {
                 if !tables.is_empty() {
                     plan_err!("DELETE <TABLE> not supported")?;
@@ -483,6 +489,15 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 if returning.is_some() {
                     plan_err!("Delete-returning clause not yet supported")?;
                 }
+
+                if !order_by.is_empty() {
+                    plan_err!("Delete-order-by clause not yet supported")?;
+                }
+
+                if limit.is_some() {
+                    plan_err!("Delete-limit clause not yet supported")?;
+                }
+
                 let table_name = self.get_delete_target(from)?;
                 self.delete_to_plan(table_name, selection)
             }
@@ -963,10 +978,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         // Do a table lookup to verify the table exists
         let table_name = self.object_name_to_table_reference(table_name)?;
         let table_source = self.context_provider.get_table_source(table_name.clone())?;
-        let arrow_schema = (*table_source.schema()).clone();
         let table_schema = Arc::new(DFSchema::try_from_qualified_schema(
             table_name.clone(),
-            &arrow_schema,
+            &table_source.schema(),
         )?);
 
         // Overwrite with assignment expressions
@@ -985,21 +999,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             })
             .collect::<Result<HashMap<String, Expr>>>()?;
 
-        let values_and_types = table_schema
-            .fields()
-            .iter()
-            .map(|f| {
-                let col_name = f.name();
-                let val = assign_map.remove(col_name).unwrap_or_else(|| {
-                    ast::Expr::Identifier(ast::Ident::from(col_name.as_str()))
-                });
-                (col_name, val, f.data_type())
-            })
-            .collect::<Vec<_>>();
-
-        // Build scan
-        let from = from.unwrap_or(table);
-        let scan = self.plan_from_tables(vec![from], &mut planner_context)?;
+        // Build scan, join with from table if it exists.
+        let mut input_tables = vec![table];
+        input_tables.extend(from);
+        let scan = self.plan_from_tables(input_tables, &mut planner_context)?;
 
         // Filter
         let source = match predicate_expr {
@@ -1007,33 +1010,49 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             Some(predicate_expr) => {
                 let filter_expr = self.sql_to_expr(
                     predicate_expr,
-                    &table_schema,
+                    scan.schema(),
                     &mut planner_context,
                 )?;
                 let mut using_columns = HashSet::new();
                 expr_to_columns(&filter_expr, &mut using_columns)?;
                 let filter_expr = normalize_col_with_schemas_and_ambiguity_check(
                     filter_expr,
-                    &[&[&table_schema]],
+                    &[&[&scan.schema()]],
                     &[using_columns],
                 )?;
                 LogicalPlan::Filter(Filter::try_new(filter_expr, Arc::new(scan))?)
             }
         };
 
-        // Projection
-        let mut exprs = vec![];
-        for (col_name, expr, dt) in values_and_types.into_iter() {
-            let mut expr = self.sql_to_expr(expr, &table_schema, &mut planner_context)?;
-            // Update placeholder's datatype to the type of the target column
-            if let datafusion_expr::Expr::Placeholder(placeholder) = &mut expr {
-                placeholder.data_type =
-                    placeholder.data_type.take().or_else(|| Some(dt.clone()));
-            }
-            // Cast to target column type, if necessary
-            let expr = expr.cast_to(dt, source.schema())?.alias(col_name);
-            exprs.push(expr);
-        }
+        // Build updated values for each column, using the previous value if not modified
+        let exprs = table_schema
+            .fields()
+            .iter()
+            .map(|field| {
+                let expr = match assign_map.remove(field.name()) {
+                    Some(new_value) => {
+                        let mut expr = self.sql_to_expr(
+                            new_value,
+                            source.schema(),
+                            &mut planner_context,
+                        )?;
+                        // Update placeholder's datatype to the type of the target column
+                        if let datafusion_expr::Expr::Placeholder(placeholder) = &mut expr
+                        {
+                            placeholder.data_type = placeholder
+                                .data_type
+                                .take()
+                                .or_else(|| Some(field.data_type().clone()));
+                        }
+                        // Cast to target column type, if necessary
+                        expr.cast_to(field.data_type(), source.schema())?
+                    }
+                    None => datafusion_expr::Expr::Column(field.qualified_column()),
+                };
+                Ok(expr.alias(field.name()))
+            })
+            .collect::<Result<Vec<_>>>()?;
+
         let source = project(source, exprs)?;
 
         let plan = LogicalPlan::Dml(DmlStatement {
diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs
index 2446ee0a58418..ff6dca7eef2a8 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -1287,6 +1287,16 @@ fn select_simple_aggregate_repeated_aggregate_with_unique_aliases() {
     );
 }
 
+#[test]
+fn select_simple_aggregate_respect_nulls() {
+    let sql = "SELECT MIN(age) RESPECT NULLS FROM person";
+    let err = logical_plan(sql).expect_err("query should have failed");
+
+    assert_contains!(
+        err.strip_backtrace(),
+        "This feature is not implemented: Null treatment in aggregate functions is not supported: RESPECT NULLS"
+    );
+}
 #[test]
 fn select_from_typed_string_values() {
     quick_test(
diff --git a/datafusion/sqllogictest/Cargo.toml b/datafusion/sqllogictest/Cargo.toml
index 454f99942f52c..07debf179529f 100644
--- a/datafusion/sqllogictest/Cargo.toml
+++ b/datafusion/sqllogictest/Cargo.toml
@@ -21,7 +21,7 @@ edition = { workspace = true }
 homepage = { workspace = true }
 license = { workspace = true }
 name = "datafusion-sqllogictest"
-readme = { workspace = true }
+readme = "README.md"
 repository = { workspace = true }
 rust-version = { workspace = true }
 version = { workspace = true }
@@ -32,24 +32,24 @@ path = "src/lib.rs"
 
 [dependencies]
 arrow = { workspace = true }
-async-trait = "0.1.41"
-bigdecimal = "0.4.1"
+async-trait = { workspace = true }
+bigdecimal = { workspace = true }
 bytes = { version = "1.4.0", optional = true }
 chrono = { workspace = true, optional = true }
 datafusion = { path = "../core", version = "32.0.0" }
-datafusion-common = { path = "../common", version = "32.0.0", default-features = false }
+datafusion-common = { workspace = true }
 futures = { version = "0.3.28" }
-half = "2.2.1"
-itertools = "0.11"
-log = "^0.4"
-object_store = "0.7.0"
+half = { workspace = true }
+itertools = { workspace = true }
+log = { workspace = true }
+object_store = { workspace = true }
 postgres-protocol = { version = "0.6.4", optional = true }
 postgres-types = { version = "0.2.4", optional = true }
 rust_decimal = { version = "1.27.0" }
 sqllogictest = "0.17.0"
 sqlparser = { workspace = true }
-tempfile = "3"
-thiserror = "1.0.44"
+tempfile = { workspace = true }
+thiserror = { workspace = true }
 tokio = { version = "1.0" }
 tokio-postgres = { version = "0.7.7", optional = true }
 
@@ -58,8 +58,8 @@ avro = ["datafusion/avro"]
 postgres = ["bytes", "chrono", "tokio-postgres", "postgres-types", "postgres-protocol"]
 
 [dev-dependencies]
-env_logger = "0.10"
-num_cpus = "1.13.0"
+env_logger = { workspace = true }
+num_cpus = { workspace = true }
 
 [[test]]
 harness = false
diff --git a/datafusion/sqllogictest/README.md b/datafusion/sqllogictest/README.md
index 3e94859d35a79..0349ed852f468 100644
--- a/datafusion/sqllogictest/README.md
+++ b/datafusion/sqllogictest/README.md
@@ -17,19 +17,26 @@
   under the License.
 -->
 
-#### Overview
+# DataFusion sqllogictest
 
-This is the Datafusion implementation of [sqllogictest](https://www.sqlite.org/sqllogictest/doc/trunk/about.wiki). We
-use [sqllogictest-rs](https://github.com/risinglightdb/sqllogictest-rs) as a parser/runner of `.slt` files
-in [`test_files`](test_files).
+[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format.
 
-#### Testing setup
+This crate is a submodule of DataFusion that contains an implementation of [sqllogictest](https://www.sqlite.org/sqllogictest/doc/trunk/about.wiki).
+
+[df]: https://crates.io/crates/datafusion
+
+## Overview
+
+This crate uses [sqllogictest-rs](https://github.com/risinglightdb/sqllogictest-rs) to parse and run `.slt` files in the
+[`test_files`](test_files) directory of this crate.
+
+## Testing setup
 
 1. `rustup update stable` DataFusion uses the latest stable release of rust
 2. `git submodule init`
 3. `git submodule update`
 
-#### Running tests: TLDR Examples
+## Running tests: TLDR Examples
 
 ```shell
 # Run all tests
@@ -56,7 +63,7 @@ cargo test --test sqllogictests -- ddl --complete
 RUST_LOG=debug cargo test --test sqllogictests -- ddl
 ```
 
-#### Cookbook: Adding Tests
+## Cookbook: Adding Tests
 
 1. Add queries
 
@@ -95,11 +102,11 @@ SELECT * from foo;
 
 Assuming it looks good, check it in!
 
-#### Reference
+# Reference
 
-#### Running tests: Validation Mode
+## Running tests: Validation Mode
 
-In this model, `sqllogictests` runs the statements and queries in a `.slt` file, comparing the expected output in the
+In this mode, `sqllogictests` runs the statements and queries in a `.slt` file, comparing the expected output in the
 file to the output produced by that run.
 
 For example, to run all tests suites in validation mode
@@ -115,10 +122,10 @@ sqllogictests also supports `cargo test` style substring matches on file names t
 cargo test --test sqllogictests -- information
 ```
 
-#### Running tests: Postgres compatibility
+## Running tests: Postgres compatibility
 
 Test files that start with prefix `pg_compat_` verify compatibility
-with Postgres by running the same script files both with DataFusion and with Posgres
+with Postgres by running the same script files both with DataFusion and with Postgres
 
 In order to run the sqllogictests running against a previously running Postgres instance, do:
 
@@ -145,7 +152,7 @@ docker run \
   postgres
 ```
 
-#### Running Tests: `tpch`
+## Running Tests: `tpch`
 
 Test files in `tpch` directory runs against the `TPCH` data set (SF =
 0.1), which must be generated before running. You can use following
@@ -165,7 +172,7 @@ Then you need to add `INCLUDE_TPCH=true` to run tpch tests:
 INCLUDE_TPCH=true cargo test --test sqllogictests
 ```
 
-#### Updating tests: Completion Mode
+## Updating tests: Completion Mode
 
 In test script completion mode, `sqllogictests` reads a prototype script and runs the statements and queries against the
 database engine. The output is a full script that is a copy of the prototype script with result inserted.
@@ -177,7 +184,7 @@ You can update the tests / generate expected output by passing the `--complete`
 cargo test --test sqllogictests -- ddl --complete
 ```
 
-#### Running tests: `scratchdir`
+## Running tests: `scratchdir`
 
 The DataFusion sqllogictest runner automatically creates a directory
 named `test_files/scratch/<filename>`, creating it if needed and
@@ -190,7 +197,7 @@ Tests that need to write temporary files should write (only) to this
 directory to ensure they do not interfere with others concurrently
 running tests.
 
-#### `.slt` file format
+## `.slt` file format
 
 [`sqllogictest`] was originally written for SQLite to verify the
 correctness of SQL queries against the SQLite engine. The format is designed
@@ -247,7 +254,7 @@ query <type_string> <sort_mode>
 > :warning: It is encouraged to either apply `order by`, or use `rowsort` for queries without explicit `order by`
 > clauses.
 
-##### Example
+### Example
 
 ```sql
 # group_by_distinct
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index 777b634e93b1f..6217f12279a94 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -2020,14 +2020,6 @@ statement ok
 drop table t;
 
 
-
-
-statement error DataFusion error: Execution error: Table 't_source' doesn't exist\.
-drop table t_source;
-
-statement error DataFusion error: Execution error: Table 't' doesn't exist\.
-drop table t;
-
 query I
 select median(a) from (select 1 as a where 1=0);
 ----
@@ -2199,6 +2191,26 @@ NULL 1 10.1 10.1 10.1 10.1 0 NULL
 statement ok
 set datafusion.sql_parser.dialect = 'Generic';
 
+## Multiple distinct aggregates and dictionaries
+statement ok
+create table dict_test as values (1, arrow_cast('foo', 'Dictionary(Int32, Utf8)')), (2, arrow_cast('bar', 'Dictionary(Int32, Utf8)'));
+
+query I?
+select * from dict_test;
+----
+1 foo
+2 bar
+
+query II
+select count(distinct column1), count(distinct column2) from dict_test group by column1;
+----
+1 1
+1 1
+
+statement ok
+drop table dict_test;
+
+
 # Prepare the table with dictionary values for testing
 statement ok
 CREATE TABLE value(x bigint) AS VALUES (1), (2), (3), (1), (3), (4), (5), (2);
@@ -2282,6 +2294,13 @@ select max(x_dict) from value_dict group by x_dict % 2 order by max(x_dict);
 4
 5
 
+statement ok
+drop table value
+
+statement ok
+drop table value_dict
+
+
 # bool aggregation
 statement ok
 CREATE TABLE value_bool(x boolean, g int) AS VALUES (NULL, 0), (false, 0), (true, 0), (false, 1), (true, 2), (NULL, 3);
diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt
index 621cb4a8f4c04..b5601a22226c0 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -209,6 +209,17 @@ AS VALUES
   (make_array([28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]), [28, 29, 30], [37, 38, 39], 10)
 ;
 
+query TTT
+select arrow_typeof(column1), arrow_typeof(column2), arrow_typeof(column3) from arrays;
+----
+List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+
 # arrays table
 query ???
 select column1, column2, column3 from arrays;
diff --git a/datafusion/sqllogictest/test_files/scalar.slt b/datafusion/sqllogictest/test_files/scalar.slt
index e5c1a828492a1..ecb7fe13fcf4c 100644
--- a/datafusion/sqllogictest/test_files/scalar.slt
+++ b/datafusion/sqllogictest/test_files/scalar.slt
@@ -1878,3 +1878,51 @@ query T
 SELECT CONCAT('Hello', 'World')
 ----
 HelloWorld
+
+statement ok
+CREATE TABLE simple_string(
+  letter STRING,
+  letter2 STRING
+) as VALUES
+  ('A', 'APACHE'),
+  ('B', 'APACHE'),
+  ('C', 'APACHE'),
+  ('D', 'APACHE')
+;
+
+query TT
+EXPLAIN SELECT letter, letter = LEFT('APACHE', 1) FROM simple_string;
+----
+logical_plan
+Projection: simple_string.letter, simple_string.letter = Utf8("A") AS simple_string.letter = left(Utf8("APACHE"),Int64(1))
+--TableScan: simple_string projection=[letter]
+physical_plan
+ProjectionExec: expr=[letter@0 as letter, letter@0 = A as simple_string.letter = left(Utf8("APACHE"),Int64(1))]
+--MemoryExec: partitions=1, partition_sizes=[1]
+
+query TB
+SELECT letter, letter = LEFT('APACHE', 1) FROM simple_string;
+ ----
+----
+A true
+B false
+C false
+D false
+
+query TT
+EXPLAIN SELECT letter, letter = LEFT(letter2, 1) FROM simple_string;
+----
+logical_plan
+Projection: simple_string.letter, simple_string.letter = left(simple_string.letter2, Int64(1))
+--TableScan: simple_string projection=[letter, letter2]
+physical_plan
+ProjectionExec: expr=[letter@0 as letter, letter@0 = left(letter2@1, 1) as simple_string.letter = left(simple_string.letter2,Int64(1))]
+--MemoryExec: partitions=1, partition_sizes=[1]
+
+query TB
+SELECT letter, letter = LEFT(letter2, 1) FROM simple_string;
+----
+A true
+B false
+C false
+D false
diff --git a/datafusion/sqllogictest/test_files/update.slt b/datafusion/sqllogictest/test_files/update.slt
index 4542a262390cd..cb8c6a4fac28a 100644
--- a/datafusion/sqllogictest/test_files/update.slt
+++ b/datafusion/sqllogictest/test_files/update.slt
@@ -41,3 +41,39 @@ logical_plan
 Dml: op=[Update] table=[t1]
 --Projection: CAST(t1.c + CAST(Int64(1) AS Float64) AS Int32) AS a, CAST(t1.a AS Utf8) AS b, t1.c + Float64(1) AS c,  CAST(t1.b AS Int32) AS d
 ----TableScan: t1
+
+statement ok
+create table t2(a int, b varchar, c double, d int);
+
+## set from subquery
+query TT
+explain update t1 set b = (select max(b) from t2 where t1.a = t2.a)
+----
+logical_plan
+Dml: op=[Update] table=[t1]
+--Projection: t1.a AS a, (<subquery>) AS b, t1.c AS c, t1.d AS d
+----Subquery:
+------Projection: MAX(t2.b)
+--------Aggregate: groupBy=[[]], aggr=[[MAX(t2.b)]]
+----------Filter: outer_ref(t1.a) = t2.a
+------------TableScan: t2
+----TableScan: t1
+
+# set from other table
+query TT
+explain update t1 set b = t2.b, c = t2.a, d = 1 from t2 where t1.a = t2.a and t1.b > 'foo' and t2.c > 1.0;
+----
+logical_plan
+Dml: op=[Update] table=[t1]
+--Projection: t1.a AS a, t2.b AS b, CAST(t2.a AS Float64) AS c, CAST(Int64(1) AS Int32) AS d
+----Filter: t1.a = t2.a AND t1.b > Utf8("foo") AND t2.c > Float64(1)
+------CrossJoin:
+--------TableScan: t1
+--------TableScan: t2
+
+statement ok
+create table t3(a int, b varchar, c double, d int);
+
+# set from mutiple tables, sqlparser only supports from one table
+query error DataFusion error: SQL error: ParserError\("Expected end of statement, found: ,"\)
+explain update t1 set b = t2.b, c = t3.a, d = 1 from t2, t3 where t1.a = t2.a and t1.a = t3.a;
\ No newline at end of file
diff --git a/datafusion/substrait/Cargo.toml b/datafusion/substrait/Cargo.toml
index 7c4ff868cfcd3..585cb6e43d153 100644
--- a/datafusion/substrait/Cargo.toml
+++ b/datafusion/substrait/Cargo.toml
@@ -18,9 +18,9 @@
 [package]
 name = "datafusion-substrait"
 description = "DataFusion Substrait Producer and Consumer"
+readme = "README.md"
 version = { workspace = true }
 edition = { workspace = true }
-readme = { workspace = true }
 homepage = { workspace = true }
 repository = { workspace = true }
 license = { workspace = true }
@@ -30,9 +30,9 @@ rust-version = "1.70"
 [dependencies]
 async-recursion = "1.0"
 chrono = { workspace = true }
-datafusion = { version = "32.0.0", path = "../core" }
-itertools = "0.11"
-object_store = "0.7.0"
+datafusion = { workspace = true }
+itertools = { workspace = true }
+object_store = { workspace = true }
 prost = "0.12"
 prost-types = "0.12"
 substrait = "0.18.0"
diff --git a/datafusion/substrait/src/logical_plan/producer.rs b/datafusion/substrait/src/logical_plan/producer.rs
index 757bddf9fe582..e3c6f94d43d58 100644
--- a/datafusion/substrait/src/logical_plan/producer.rs
+++ b/datafusion/substrait/src/logical_plan/producer.rs
@@ -326,7 +326,7 @@ pub fn to_substrait_rel(
                     left: Some(left),
                     right: Some(right),
                     r#type: join_type as i32,
-                    expression: join_expr.clone(),
+                    expression: join_expr,
                     post_join_filter: None,
                     advanced_extension: None,
                 }))),
diff --git a/datafusion/wasmtest/Cargo.toml b/datafusion/wasmtest/Cargo.toml
index e1a9a5d41a5ac..882b02bcc84b6 100644
--- a/datafusion/wasmtest/Cargo.toml
+++ b/datafusion/wasmtest/Cargo.toml
@@ -18,9 +18,9 @@
 [package]
 name = "datafusion-wasmtest"
 description = "Test library to compile datafusion crates to wasm"
+readme = "README.md"
 version = { workspace = true }
 edition = { workspace = true }
-readme = { workspace = true }
 homepage = { workspace = true }
 repository = { workspace = true }
 license = { workspace = true }
@@ -38,11 +38,11 @@ crate-type = ["cdylib", "rlib",]
 # code size when deploying.
 console_error_panic_hook = { version = "0.1.1", optional = true }
 
-datafusion-common = { path = "../common", version = "32.0.0", default-features = false }
-datafusion-expr = { path = "../expr" }
-datafusion-optimizer = { path = "../optimizer" }
-datafusion-physical-expr = { path = "../physical-expr" }
-datafusion-sql = { path = "../sql" }
+datafusion-common = { workspace = true }
+datafusion-expr = { workspace = true }
+datafusion-optimizer = { workspace = true }
+datafusion-physical-expr = { workspace = true }
+datafusion-sql = { workspace = true }
 
 # getrandom must be compiled with js feature
 getrandom = { version = "0.2.8", features = ["js"] }
diff --git a/datafusion/wasmtest/README.md b/datafusion/wasmtest/README.md
index 5dc7bb2de45d4..d26369a18ab9e 100644
--- a/datafusion/wasmtest/README.md
+++ b/datafusion/wasmtest/README.md
@@ -17,9 +17,16 @@
   under the License.
 -->
 
-## wasmtest
+# DataFusion wasmtest
+
+[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format.
+
+This crate is a submodule of DataFusion used to verify that various DataFusion crates compile successfully to the
+`wasm32-unknown-unknown` target with wasm-pack.
 
-Library crate to verify that various DataFusion crates compile successfully to the `wasm32-unknown-unknown` target with wasm-pack.
+[df]: https://crates.io/crates/datafusion
+
+## wasmtest
 
 Some of DataFusion's downstream projects compile to WASM to run in the browser. Doing so requires special care that certain library dependencies are not included in DataFusion.
 
diff --git a/test-utils/Cargo.toml b/test-utils/Cargo.toml
index 5ab10e42cf685..b9c4db17c0981 100644
--- a/test-utils/Cargo.toml
+++ b/test-utils/Cargo.toml
@@ -26,4 +26,4 @@ edition = { workspace = true }
 arrow = { workspace = true }
 datafusion-common = { path = "../datafusion/common" }
 env_logger = "0.10.0"
-rand = "0.8"
+rand = { workspace = true }